Use lemmy structs
This commit is contained in:
parent
8509c19f50
commit
08ffa07a6d
7 changed files with 421 additions and 546 deletions
762
Cargo.lock
generated
762
Cargo.lock
generated
File diff suppressed because it is too large
Load diff
17
Cargo.toml
17
Cargo.toml
|
@ -5,12 +5,13 @@ authors = ["Felix Ableitner"]
|
||||||
edition = "2018"
|
edition = "2018"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
reqwest = { version = "0.10.10", default-features = false, features = ["json", "rustls-tls"] }
|
reqwest = { version = "0.11.10", default-features = false, features = ["json", "rustls-tls"] }
|
||||||
serde = { version = "1.0.123", features = ["derive"] }
|
serde = { version = "1.0.137", features = ["derive"] }
|
||||||
anyhow = "1.0.38"
|
anyhow = "1.0.57"
|
||||||
tokio = { version = "0.2.25", features = ["rt-threaded", "macros"] }
|
tokio = { version = "1.18.1", features = ["macros", "rt-multi-thread"] }
|
||||||
futures = "0.3.13"
|
futures = "0.3.21"
|
||||||
serde_json = "1.0.64"
|
serde_json = "1.0.81"
|
||||||
clap = "2.33.3"
|
clap = "3.1.15"
|
||||||
semver = "1.0.7"
|
semver = "1.0.9"
|
||||||
once_cell = "1.10.0"
|
once_cell = "1.10.0"
|
||||||
|
lemmy_api_common = { git = "https://github.com/LemmyNet/lemmy.git", branch = "api-derive-default" }
|
||||||
|
|
85
src/crawl.rs
85
src/crawl.rs
|
@ -1,9 +1,9 @@
|
||||||
use crate::federated_instances::GetSiteResponse;
|
|
||||||
use crate::node_info::NodeInfo;
|
|
||||||
use crate::REQUEST_TIMEOUT;
|
use crate::REQUEST_TIMEOUT;
|
||||||
use anyhow::anyhow;
|
use anyhow::anyhow;
|
||||||
use anyhow::Error;
|
use anyhow::Error;
|
||||||
use futures::try_join;
|
use futures::try_join;
|
||||||
|
use lemmy_api_common::node_info::NodeInfo;
|
||||||
|
use lemmy_api_common::site::GetSiteResponse;
|
||||||
use once_cell::sync::Lazy;
|
use once_cell::sync::Lazy;
|
||||||
use reqwest::Client;
|
use reqwest::Client;
|
||||||
use semver::Version;
|
use semver::Version;
|
||||||
|
@ -33,7 +33,7 @@ pub async fn crawl(
|
||||||
match fetch_instance_details(¤t_instance.domain, &min_lemmy_version).await {
|
match fetch_instance_details(¤t_instance.domain, &min_lemmy_version).await {
|
||||||
Ok(details) => {
|
Ok(details) => {
|
||||||
instance_details.push(details.to_owned());
|
instance_details.push(details.to_owned());
|
||||||
for i in details.linked_instances {
|
for i in details.site_info.federated_instances.unwrap().linked {
|
||||||
let is_in_crawled = crawled_instances.contains(&i);
|
let is_in_crawled = crawled_instances.contains(&i);
|
||||||
let is_in_pending = pending_instances.iter().any(|p| p.domain == i);
|
let is_in_pending = pending_instances.iter().any(|p| p.domain == i);
|
||||||
if !is_in_crawled && !is_in_pending {
|
if !is_in_crawled && !is_in_pending {
|
||||||
|
@ -50,7 +50,7 @@ pub async fn crawl(
|
||||||
}
|
}
|
||||||
|
|
||||||
// Sort by active monthly users descending
|
// Sort by active monthly users descending
|
||||||
instance_details.sort_by_key(|i| i.users_active_month);
|
instance_details.sort_by_key(|i| i.node_info.usage.users.active_month);
|
||||||
instance_details.reverse();
|
instance_details.reverse();
|
||||||
|
|
||||||
Ok((instance_details, failed_instances))
|
Ok((instance_details, failed_instances))
|
||||||
|
@ -59,20 +59,8 @@ pub async fn crawl(
|
||||||
#[derive(Serialize, Clone)]
|
#[derive(Serialize, Clone)]
|
||||||
pub struct InstanceDetails {
|
pub struct InstanceDetails {
|
||||||
pub domain: String,
|
pub domain: String,
|
||||||
pub name: String,
|
pub node_info: NodeInfo,
|
||||||
pub description: Option<String>,
|
pub site_info: GetSiteResponse,
|
||||||
pub version: String,
|
|
||||||
pub icon: Option<String>,
|
|
||||||
pub online_users: i32,
|
|
||||||
pub total_users: i64,
|
|
||||||
pub users_active_halfyear: i64,
|
|
||||||
pub users_active_month: i64,
|
|
||||||
pub open_registrations: bool,
|
|
||||||
pub linked_instances_count: i32,
|
|
||||||
pub require_application: bool,
|
|
||||||
// The following fields are only used for aggregation, but not shown in output
|
|
||||||
#[serde(skip)]
|
|
||||||
pub linked_instances: Vec<String>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
struct CrawlInstance {
|
struct CrawlInstance {
|
||||||
|
@ -95,23 +83,13 @@ async fn fetch_instance_details(
|
||||||
let node_info_url = format!("https://{}/nodeinfo/2.0.json", domain);
|
let node_info_url = format!("https://{}/nodeinfo/2.0.json", domain);
|
||||||
let node_info_request = client.get(&node_info_url).timeout(REQUEST_TIMEOUT).send();
|
let node_info_request = client.get(&node_info_url).timeout(REQUEST_TIMEOUT).send();
|
||||||
|
|
||||||
let site_info_url_v2 = format!("https://{}/api/v2/site", domain);
|
let site_info_url = format!("https://{}/api/v3/site", domain);
|
||||||
let site_info_request_v2 = client
|
let site_info_request = client.get(&site_info_url).timeout(REQUEST_TIMEOUT).send();
|
||||||
.get(&site_info_url_v2)
|
|
||||||
.timeout(REQUEST_TIMEOUT)
|
|
||||||
.send();
|
|
||||||
let site_info_url_v3 = format!("https://{}/api/v3/site", domain);
|
|
||||||
let site_info_request_v3 = client
|
|
||||||
.get(&site_info_url_v3)
|
|
||||||
.timeout(REQUEST_TIMEOUT)
|
|
||||||
.send();
|
|
||||||
|
|
||||||
let (node_info, site_info_v2, site_info_v3) = try_join!(
|
let (node_info, site_info) = try_join!(node_info_request, site_info_request)?;
|
||||||
node_info_request,
|
|
||||||
site_info_request_v2,
|
|
||||||
site_info_request_v3
|
|
||||||
)?;
|
|
||||||
let node_info: NodeInfo = node_info.json().await?;
|
let node_info: NodeInfo = node_info.json().await?;
|
||||||
|
let site_info = site_info.json::<GetSiteResponse>().await?;
|
||||||
|
|
||||||
if node_info.software.name != "lemmy" {
|
if node_info.software.name != "lemmy" {
|
||||||
return Err(anyhow!("not a lemmy instance"));
|
return Err(anyhow!("not a lemmy instance"));
|
||||||
}
|
}
|
||||||
|
@ -119,48 +97,11 @@ async fn fetch_instance_details(
|
||||||
if &version < min_lemmy_version {
|
if &version < min_lemmy_version {
|
||||||
return Err(anyhow!("lemmy version is too old ({})", version));
|
return Err(anyhow!("lemmy version is too old ({})", version));
|
||||||
}
|
}
|
||||||
let site_info_v2 = site_info_v2.json::<GetSiteResponse>().await.ok();
|
|
||||||
let site_info_v3 = site_info_v3.json::<GetSiteResponse>().await.ok();
|
|
||||||
let mut site_info: GetSiteResponse = if let Some(site_info_v2) = site_info_v2 {
|
|
||||||
site_info_v2
|
|
||||||
} else if let Some(site_info_v3) = site_info_v3 {
|
|
||||||
site_info_v3
|
|
||||||
} else {
|
|
||||||
return Err(anyhow!("Failed to read site_info"));
|
|
||||||
};
|
|
||||||
|
|
||||||
if let Some(description) = &site_info.site_view.site.description {
|
|
||||||
if description.len() > 150 {
|
|
||||||
site_info.site_view.site.description = None;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let require_application = site_info
|
|
||||||
.site_view
|
|
||||||
.site
|
|
||||||
.require_application
|
|
||||||
.unwrap_or(false);
|
|
||||||
let linked_instances: Vec<String> = site_info
|
|
||||||
.federated_instances
|
|
||||||
.map(|f| f.linked)
|
|
||||||
.unwrap_or_default()
|
|
||||||
.iter()
|
|
||||||
.map(|l| l.to_lowercase())
|
|
||||||
.collect();
|
|
||||||
Ok(InstanceDetails {
|
Ok(InstanceDetails {
|
||||||
domain: domain.to_owned(),
|
domain: domain.to_owned(),
|
||||||
name: site_info.site_view.site.name,
|
node_info,
|
||||||
description: site_info.site_view.site.description,
|
site_info,
|
||||||
version: node_info.software.version,
|
|
||||||
icon: site_info.site_view.site.icon,
|
|
||||||
online_users: site_info.online as i32,
|
|
||||||
total_users: node_info.usage.users.total,
|
|
||||||
users_active_halfyear: node_info.usage.users.active_halfyear,
|
|
||||||
users_active_month: node_info.usage.users.active_month,
|
|
||||||
open_registrations: node_info.open_registrations,
|
|
||||||
linked_instances_count: linked_instances.len() as i32,
|
|
||||||
require_application,
|
|
||||||
linked_instances,
|
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,28 +0,0 @@
|
||||||
use serde::Deserialize;
|
|
||||||
|
|
||||||
#[derive(Deserialize, Debug, Clone)]
|
|
||||||
pub struct GetSiteResponse {
|
|
||||||
pub site_view: SiteView,
|
|
||||||
pub online: usize,
|
|
||||||
pub federated_instances: Option<FederatedInstances>,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Deserialize, Debug, Clone)]
|
|
||||||
pub struct FederatedInstances {
|
|
||||||
pub linked: Vec<String>,
|
|
||||||
pub allowed: Option<Vec<String>>,
|
|
||||||
pub blocked: Option<Vec<String>>,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Deserialize, Debug, Clone)]
|
|
||||||
pub struct SiteView {
|
|
||||||
pub site: Site,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Deserialize, Debug, Clone)]
|
|
||||||
pub struct Site {
|
|
||||||
pub name: String,
|
|
||||||
pub icon: Option<String>,
|
|
||||||
pub description: Option<String>,
|
|
||||||
pub require_application: Option<bool>,
|
|
||||||
}
|
|
|
@ -1,8 +1,6 @@
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
|
|
||||||
pub mod crawl;
|
pub mod crawl;
|
||||||
pub mod federated_instances;
|
|
||||||
pub mod node_info;
|
|
||||||
|
|
||||||
pub const REQUEST_TIMEOUT: Duration = Duration::from_secs(10);
|
pub const REQUEST_TIMEOUT: Duration = Duration::from_secs(10);
|
||||||
pub const DEFAULT_START_INSTANCES: &str = "lemmy.ml";
|
pub const DEFAULT_START_INSTANCES: &str = "lemmy.ml";
|
||||||
|
|
40
src/main.rs
40
src/main.rs
|
@ -1,20 +1,20 @@
|
||||||
use anyhow::Error;
|
use anyhow::Error;
|
||||||
use clap::{App, Arg};
|
use clap::{Arg, Command};
|
||||||
use lemmy_stats_crawler::crawl::{crawl, InstanceDetails};
|
use lemmy_stats_crawler::crawl::{crawl, InstanceDetails};
|
||||||
use lemmy_stats_crawler::{DEFAULT_MAX_CRAWL_DEPTH, DEFAULT_START_INSTANCES, EXCLUDE_INSTANCES};
|
use lemmy_stats_crawler::{DEFAULT_MAX_CRAWL_DEPTH, DEFAULT_START_INSTANCES, EXCLUDE_INSTANCES};
|
||||||
use serde::Serialize;
|
use serde::Serialize;
|
||||||
|
|
||||||
#[tokio::main]
|
#[tokio::main]
|
||||||
pub async fn main() -> Result<(), Error> {
|
pub async fn main() -> Result<(), Error> {
|
||||||
let matches = App::new("Lemmy Stats Crawler")
|
let matches = Command::new("Lemmy Stats Crawler")
|
||||||
.arg(
|
.arg(
|
||||||
Arg::with_name("start-instances")
|
Arg::new("start-instances")
|
||||||
.long("start-instances")
|
.long("start-instances")
|
||||||
.takes_value(true),
|
.takes_value(true),
|
||||||
)
|
)
|
||||||
.arg(Arg::with_name("exclude").long("exclude").takes_value(true))
|
.arg(Arg::new("exclude").long("exclude").takes_value(true))
|
||||||
.arg(
|
.arg(
|
||||||
Arg::with_name("max-crawl-depth")
|
Arg::new("max-crawl-depth")
|
||||||
.long("max-crawl-depth")
|
.long("max-crawl-depth")
|
||||||
.takes_value(true),
|
.takes_value(true),
|
||||||
)
|
)
|
||||||
|
@ -49,25 +49,43 @@ pub async fn main() -> Result<(), Error> {
|
||||||
struct TotalStats {
|
struct TotalStats {
|
||||||
crawled_instances: i32,
|
crawled_instances: i32,
|
||||||
failed_instances: i32,
|
failed_instances: i32,
|
||||||
|
online_users: usize,
|
||||||
total_users: i64,
|
total_users: i64,
|
||||||
total_online_users: i32,
|
users_active_day: i64,
|
||||||
|
users_active_week: i64,
|
||||||
|
users_active_month: i64,
|
||||||
|
users_active_halfyear: i64,
|
||||||
instance_details: Vec<InstanceDetails>,
|
instance_details: Vec<InstanceDetails>,
|
||||||
}
|
}
|
||||||
|
|
||||||
fn aggregate(instance_details: Vec<InstanceDetails>, failed_instances: i32) -> TotalStats {
|
fn aggregate(instance_details: Vec<InstanceDetails>, failed_instances: i32) -> TotalStats {
|
||||||
let mut crawled_instances = 0;
|
let mut online_users = 0;
|
||||||
let mut total_users = 0;
|
let mut total_users = 0;
|
||||||
let mut total_online_users = 0;
|
let mut users_active_day = 0;
|
||||||
|
let mut users_active_week = 0;
|
||||||
|
let mut users_active_month = 0;
|
||||||
|
let mut users_active_halfyear = 0;
|
||||||
|
let mut crawled_instances = 0;
|
||||||
for i in &instance_details {
|
for i in &instance_details {
|
||||||
crawled_instances += 1;
|
crawled_instances += 1;
|
||||||
total_users += i.total_users;
|
online_users += i.site_info.online;
|
||||||
total_online_users += i.online_users;
|
if let Some(site_view) = &i.site_info.site_view {
|
||||||
|
total_users += site_view.counts.users;
|
||||||
|
users_active_day += site_view.counts.users_active_day;
|
||||||
|
users_active_week += site_view.counts.users_active_week;
|
||||||
|
users_active_month += site_view.counts.users_active_month;
|
||||||
|
users_active_halfyear += site_view.counts.users_active_half_year;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
TotalStats {
|
TotalStats {
|
||||||
crawled_instances,
|
crawled_instances,
|
||||||
failed_instances,
|
failed_instances,
|
||||||
|
online_users,
|
||||||
total_users,
|
total_users,
|
||||||
total_online_users,
|
users_active_day,
|
||||||
|
users_active_week,
|
||||||
|
users_active_halfyear,
|
||||||
|
users_active_month,
|
||||||
instance_details,
|
instance_details,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,33 +0,0 @@
|
||||||
use serde::Deserialize;
|
|
||||||
|
|
||||||
#[derive(Deserialize, Debug)]
|
|
||||||
#[serde(rename_all = "camelCase")]
|
|
||||||
pub struct NodeInfo {
|
|
||||||
pub version: String,
|
|
||||||
pub software: NodeInfoSoftware,
|
|
||||||
pub protocols: Vec<String>,
|
|
||||||
pub usage: NodeInfoUsage,
|
|
||||||
pub open_registrations: bool,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Deserialize, Debug)]
|
|
||||||
pub struct NodeInfoSoftware {
|
|
||||||
pub name: String,
|
|
||||||
pub version: String,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Deserialize, Debug)]
|
|
||||||
#[serde(rename_all = "camelCase")]
|
|
||||||
pub struct NodeInfoUsage {
|
|
||||||
pub users: NodeInfoUsers,
|
|
||||||
pub local_posts: i64,
|
|
||||||
pub local_comments: i64,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Deserialize, Debug)]
|
|
||||||
#[serde(rename_all = "camelCase")]
|
|
||||||
pub struct NodeInfoUsers {
|
|
||||||
pub total: i64,
|
|
||||||
pub active_halfyear: i64,
|
|
||||||
pub active_month: i64,
|
|
||||||
}
|
|
Loading…
Reference in a new issue