diff --git a/Cargo.lock b/Cargo.lock index 72a0cf1..bd4244a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -417,6 +417,7 @@ dependencies = [ "futures", "reqwest", "serde", + "serde_json", "tokio", ] diff --git a/Cargo.toml b/Cargo.toml index 45494ab..6162361 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,3 +10,4 @@ serde = { version = "1.0.123", features = ["derive"] } anyhow = "1.0.38" tokio = { version = "0.2.25", features = ["full"] } futures = "0.3.13" +serde_json = "1.0.64" diff --git a/README.md b/README.md new file mode 100644 index 0000000..8ae8f3f --- /dev/null +++ b/README.md @@ -0,0 +1,15 @@ +# Lemmy-Stats-Crawler + +Crawls Lemmy instances using nodeinfo and API endpoints, to generate a list of instances and overall details. + +## Usage + +For testing: +``` +cargo run +``` + +For production (hide debug logs): +``` +cargo run 2>/dev/null +``` \ No newline at end of file diff --git a/src/main.rs b/src/main.rs index c79cabe..10ce7c4 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,6 +1,7 @@ use anyhow::Error; use lemmy_stats_crawler::federated_instances::GetSiteResponse; use lemmy_stats_crawler::node_info::NodeInfo; +use serde::Serialize; #[derive(Default, Debug)] struct TotalStats { @@ -12,20 +13,22 @@ struct TotalStats { pub async fn main() -> Result<(), Error> { let mut pending_instances = vec!["lemmy.ml".to_string()]; let mut crawled_instances = vec![]; + let mut instance_details = vec![]; let mut total_stats = TotalStats::default(); while let Some(pi) = pending_instances.iter().next() { crawled_instances.push(pi.to_owned()); - let instance_details = fetch_instance_details(&pi).await.ok(); + let current_instance_details = fetch_instance_details(&pi).await.ok(); pending_instances = pending_instances .iter() .filter(|i| i != &pi) .map(|i| i.to_owned()) .collect(); - if let Some(details) = instance_details { + if let Some(details) = current_instance_details { + instance_details.push(details.to_owned()); total_stats.online_users += details.online_users; total_stats.users += details.total_users; - // remove all which are in crawled_instances + // add all unknown, linked instances to pending for ci in details.linked_instances { if !crawled_instances.contains(&ci) { pending_instances.push(ci); @@ -33,14 +36,26 @@ pub async fn main() -> Result<(), Error> { } } } + instance_details = instance_details + .iter() + .filter(|i| i.open_registrations) + .map(|i| i.to_owned()) + .collect(); + instance_details.sort_by(|a, b| b.users_active_halfyear.cmp(&a.users_active_halfyear)); + print!("{}", serde_json::to_string(&instance_details)?); dbg!(total_stats); Ok(()) } +#[derive(Serialize, Clone)] struct InstanceDetails { domain: String, online_users: i32, total_users: i64, + users_active_halfyear: i64, + users_active_month: i64, + open_registrations: bool, + #[serde(skip)] linked_instances: Vec, } @@ -57,6 +72,9 @@ async fn fetch_instance_details(domain: &str) -> Result domain: domain.to_owned(), online_users: site_info.online as i32, total_users: node_info.usage.users.total, + users_active_halfyear: node_info.usage.users.active_halfyear, + users_active_month: node_info.usage.users.active_month, + open_registrations: node_info.open_registrations, linked_instances: site_info .federated_instances .map(|f| f.linked)