From 45ec994271aacad1868e008cd7a606d6ea75b60a Mon Sep 17 00:00:00 2001 From: Felix Ableitner Date: Tue, 9 Mar 2021 19:45:49 +0100 Subject: [PATCH] Crawl for number of online and total users --- Cargo.lock | 1 - Cargo.toml | 1 - src/federated_instances.rs | 16 ++++----- src/lib.rs | 2 +- src/main.rs | 69 ++++++++++++++++++++++++++++++++------ src/node_info.rs | 28 ++++++++-------- 6 files changed, 81 insertions(+), 36 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 33d75f9..72a0cf1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -418,7 +418,6 @@ dependencies = [ "reqwest", "serde", "tokio", - "url", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 277fbbe..45494ab 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,7 +6,6 @@ edition = "2018" [dependencies] reqwest = { version = "0.10.10", features = ["json"] } -url = "2.2.1" serde = { version = "1.0.123", features = ["derive"] } anyhow = "1.0.38" tokio = { version = "0.2.25", features = ["full"] } diff --git a/src/federated_instances.rs b/src/federated_instances.rs index d16e114..9f0be83 100644 --- a/src/federated_instances.rs +++ b/src/federated_instances.rs @@ -1,14 +1,14 @@ use serde::Deserialize; -#[derive(Deserialize, Debug)] +#[derive(Deserialize, Debug, Clone)] pub struct GetSiteResponse { - pub online: usize, - pub federated_instances: Option, + pub online: usize, + pub federated_instances: Option, } -#[derive(Deserialize, Debug)] +#[derive(Deserialize, Debug, Clone)] pub struct FederatedInstances { - pub linked: Vec, - pub allowed: Option>, - pub blocked: Option>, -} \ No newline at end of file + pub linked: Vec, + pub allowed: Option>, + pub blocked: Option>, +} diff --git a/src/lib.rs b/src/lib.rs index dfda550..34f82e2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,2 +1,2 @@ +pub mod federated_instances; pub mod node_info; -pub mod federated_instances; \ No newline at end of file diff --git a/src/main.rs b/src/main.rs index 88018d2..c79cabe 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,18 +1,65 @@ use anyhow::Error; -use url::Url; -use lemmy_stats_crawler::node_info::NodeInfo; use lemmy_stats_crawler::federated_instances::GetSiteResponse; +use lemmy_stats_crawler::node_info::NodeInfo; + +#[derive(Default, Debug)] +struct TotalStats { + users: i64, + online_users: i32, +} #[tokio::main] pub async fn main() -> Result<(), Error> { - let url = Url::parse("https://lemmy.ml/nodeinfo/2.0.json")?; - let node_info: NodeInfo = reqwest::get(url).await?.json().await?; + let mut pending_instances = vec!["lemmy.ml".to_string()]; + let mut crawled_instances = vec![]; + let mut total_stats = TotalStats::default(); + while let Some(pi) = pending_instances.iter().next() { + crawled_instances.push(pi.to_owned()); + let instance_details = fetch_instance_details(&pi).await.ok(); + pending_instances = pending_instances + .iter() + .filter(|i| i != &pi) + .map(|i| i.to_owned()) + .collect(); - dbg!(node_info); - - let url = Url::parse("https://lemmy.ml/api/v2/site")?; - let site_info: GetSiteResponse = reqwest::get(url).await?.json().await?; - - dbg!(site_info); + if let Some(details) = instance_details { + total_stats.online_users += details.online_users; + total_stats.users += details.total_users; + // remove all which are in crawled_instances + for ci in details.linked_instances { + if !crawled_instances.contains(&ci) { + pending_instances.push(ci); + } + } + } + } + dbg!(total_stats); Ok(()) -} \ No newline at end of file +} + +struct InstanceDetails { + domain: String, + online_users: i32, + total_users: i64, + linked_instances: Vec, +} + +async fn fetch_instance_details(domain: &str) -> Result { + dbg!(domain); + + let node_info_url = format!("https://{}/nodeinfo/2.0.json", domain); + let node_info: NodeInfo = reqwest::get(&node_info_url).await?.json().await?; + + let site_info_url = format!("https://{}/api/v2/site", domain); + let site_info: GetSiteResponse = reqwest::get(&site_info_url).await?.json().await?; + + Ok(InstanceDetails { + domain: domain.to_owned(), + online_users: site_info.online as i32, + total_users: node_info.usage.users.total, + linked_instances: site_info + .federated_instances + .map(|f| f.linked) + .unwrap_or(vec![]), + }) +} diff --git a/src/node_info.rs b/src/node_info.rs index 41f532a..72440a2 100644 --- a/src/node_info.rs +++ b/src/node_info.rs @@ -3,31 +3,31 @@ use serde::Deserialize; #[derive(Deserialize, Debug)] #[serde(rename_all = "camelCase")] pub struct NodeInfo { - pub version: String, - pub software: NodeInfoSoftware, - pub protocols: Vec, - pub usage: NodeInfoUsage, - pub open_registrations: bool, + pub version: String, + pub software: NodeInfoSoftware, + pub protocols: Vec, + pub usage: NodeInfoUsage, + pub open_registrations: bool, } #[derive(Deserialize, Debug)] pub struct NodeInfoSoftware { - pub name: String, - pub version: String, + pub name: String, + pub version: String, } #[derive(Deserialize, Debug)] #[serde(rename_all = "camelCase")] pub struct NodeInfoUsage { - pub users: NodeInfoUsers, - pub local_posts: i64, - pub local_comments: i64, + pub users: NodeInfoUsers, + pub local_posts: i64, + pub local_comments: i64, } #[derive(Deserialize, Debug)] #[serde(rename_all = "camelCase")] pub struct NodeInfoUsers { - pub total: i64, - pub active_halfyear: i64, - pub active_month: i64, -} \ No newline at end of file + pub total: i64, + pub active_halfyear: i64, + pub active_month: i64, +}