From 27c23bd5d83a057a4c48fc917555112933f920ab Mon Sep 17 00:00:00 2001 From: Felix Ableitner Date: Thu, 11 Mar 2021 02:25:10 +0100 Subject: [PATCH] Improve output, refactor code, add timeout --- src/federated_instances.rs | 2 - src/main.rs | 96 ++++++++++++++++++++++++++++---------- 2 files changed, 71 insertions(+), 27 deletions(-) diff --git a/src/federated_instances.rs b/src/federated_instances.rs index b98d246..018f20e 100644 --- a/src/federated_instances.rs +++ b/src/federated_instances.rs @@ -17,12 +17,10 @@ pub struct FederatedInstances { #[derive(Deserialize, Debug, Clone)] pub struct SiteView { pub site: Site, - } #[derive(Deserialize, Debug, Clone)] pub struct Site { pub name: String, pub icon: String, - } diff --git a/src/main.rs b/src/main.rs index b180fea..ec4169b 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,20 +1,56 @@ use anyhow::Error; use lemmy_stats_crawler::federated_instances::GetSiteResponse; use lemmy_stats_crawler::node_info::NodeInfo; +use reqwest::Client; use serde::Serialize; - -#[derive(Default, Debug)] -struct TotalStats { - users: i64, - online_users: i32, -} +use tokio::time::Duration; #[tokio::main] pub async fn main() -> Result<(), Error> { - let mut pending_instances = vec!["lemmy.ml".to_string()]; + let start_instances = vec!["lemmy.ml".to_string()]; + let instance_details = crawl(start_instances).await?; + let instance_details = cleanup(instance_details); + let total_stats = aggregate(instance_details); + + print!("{}", serde_json::to_string(&total_stats)?); + Ok(()) +} + +#[derive(Serialize)] +struct TotalStats { + total_users: i64, + total_online_users: i32, + instance_details: Vec, +} + +fn aggregate(instance_details: Vec) -> TotalStats { + let mut total_users = 0; + let mut total_online_users = 0; + for i in &instance_details { + total_users += i.total_users; + total_online_users += i.online_users; + } + TotalStats { + total_users, + total_online_users, + instance_details, + } +} + +fn cleanup(instance_details: Vec) -> Vec { + let mut instance_details: Vec = instance_details + .iter() + .filter(|i| i.open_registrations) + .map(|i| i.to_owned()) + .collect(); + instance_details.sort_by(|a, b| b.users_active_halfyear.cmp(&a.users_active_halfyear)); + instance_details +} + +async fn crawl(start_instances: Vec) -> Result, Error> { + let mut pending_instances = start_instances; let mut crawled_instances = vec![]; let mut instance_details = vec![]; - let mut total_stats = TotalStats::default(); while let Some(pi) = pending_instances.iter().next() { crawled_instances.push(pi.to_owned()); let current_instance_details = fetch_instance_details(&pi).await.ok(); @@ -26,8 +62,6 @@ pub async fn main() -> Result<(), Error> { if let Some(details) = current_instance_details { instance_details.push(details.to_owned()); - total_stats.online_users += details.online_users; - total_stats.users += details.total_users; // add all unknown, linked instances to pending for ci in details.linked_instances { if !crawled_instances.contains(&ci) { @@ -36,15 +70,8 @@ pub async fn main() -> Result<(), Error> { } } } - instance_details = instance_details - .iter() - .filter(|i| i.open_registrations) - .map(|i| i.to_owned()) - .collect(); - instance_details.sort_by(|a, b| b.users_active_halfyear.cmp(&a.users_active_halfyear)); - print!("{}", serde_json::to_string(&instance_details)?); - dbg!(total_stats); - Ok(()) + + Ok(instance_details) } #[derive(Serialize, Clone)] @@ -57,6 +84,8 @@ struct InstanceDetails { users_active_halfyear: i64, users_active_month: i64, open_registrations: bool, + linked_instances_count: i32, + // The following fields are only used for aggregation, but not shown in output #[serde(skip)] linked_instances: Vec, } @@ -64,12 +93,31 @@ struct InstanceDetails { async fn fetch_instance_details(domain: &str) -> Result { dbg!(domain); + let client = Client::default(); + let timeout = Duration::from_secs(10); + let node_info_url = format!("https://{}/nodeinfo/2.0.json", domain); - let node_info: NodeInfo = reqwest::get(&node_info_url).await?.json().await?; + let node_info: NodeInfo = client + .get(&node_info_url) + .timeout(timeout) + .send() + .await? + .json() + .await?; let site_info_url = format!("https://{}/api/v2/site", domain); - let site_info: GetSiteResponse = reqwest::get(&site_info_url).await?.json().await?; + let site_info: GetSiteResponse = client + .get(&site_info_url) + .timeout(timeout) + .send() + .await? + .json() + .await?; + let linked_instances = site_info + .federated_instances + .map(|f| f.linked) + .unwrap_or(vec![]); Ok(InstanceDetails { domain: domain.to_owned(), name: site_info.site_view.site.name, @@ -79,9 +127,7 @@ async fn fetch_instance_details(domain: &str) -> Result users_active_halfyear: node_info.usage.users.active_halfyear, users_active_month: node_info.usage.users.active_month, open_registrations: node_info.open_registrations, - linked_instances: site_info - .federated_instances - .map(|f| f.linked) - .unwrap_or(vec![]), + linked_instances_count: linked_instances.len() as i32, + linked_instances, }) }