From 3fe9e0f6b917deadf4b3520325b303fe997b61be Mon Sep 17 00:00:00 2001 From: Felix Ableitner Date: Mon, 5 Dec 2022 16:01:37 +0100 Subject: [PATCH] Also calculate counts including federated instances --- Cargo.lock | 1 + Cargo.toml | 1 + src/lib.rs | 85 +++++++++++++++++++++++++++++++++++++++++++++++++++-- src/main.rs | 7 ++--- 4 files changed, 88 insertions(+), 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 325b632..2bb7dac 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1380,6 +1380,7 @@ dependencies = [ "derive-new", "futures", "lemmy_api_common", + "lemmy_db_schema", "log", "once_cell", "reqwest", diff --git a/Cargo.toml b/Cargo.toml index 4c446d7..ee3c163 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,6 +14,7 @@ serde_json = "1.0.89" semver = "1.0.14" once_cell = "1.16.0" lemmy_api_common = "=0.16.0" +lemmy_db_schema = "=0.16.0" async-recursion = "1.0.0" log = "0.4.17" derive-new = "0.5.9" diff --git a/src/lib.rs b/src/lib.rs index 245a8b3..e7e4deb 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,10 +4,13 @@ extern crate derive_new; use crate::crawl::{CrawlJob, CrawlParams, InstanceDetails}; use anyhow::Error; use futures::future::join_all; +use lemmy_api_common::site::GetSiteResponse; +use lemmy_db_schema::aggregates::site_aggregates::SiteAggregates; use log::warn; use once_cell::sync::Lazy; use reqwest::{Client, ClientBuilder}; use semver::Version; +use serde::Serialize; use std::collections::HashSet; use std::sync::Arc; use std::time::Duration; @@ -25,11 +28,18 @@ static CLIENT: Lazy = Lazy::new(|| { .unwrap() }); +#[derive(Serialize, Debug)] +pub struct InstanceDetails2 { + pub domain: String, + pub site_info: GetSiteResponse, + pub federated_counts: Option, +} + pub async fn start_crawl( start_instances: Vec, exclude_domains: Vec, max_distance: i32, -) -> Result, Error> { +) -> Result, Error> { let params = Arc::new(CrawlParams::new( min_lemmy_version().await?, exclude_domains, @@ -65,7 +75,7 @@ pub async fn start_crawl( }); instance_details.reverse(); - Ok(instance_details) + calculate_federated_site_aggregates(instance_details) } /// calculate minimum allowed lemmy version based on current version. in case of current version @@ -82,3 +92,74 @@ async fn min_lemmy_version() -> Result { version.minor -= 1; Ok(version) } + +fn calculate_federated_site_aggregates( + instance_details: Vec, +) -> Result, Error> { + let mut ret = vec![]; + for instance in &instance_details { + let federated_counts = + if let Some(federated_instances) = &instance.site_info.federated_instances { + let federated_counts = instance_details + .iter() + .filter(|i| federated_instances.linked.contains(&i.domain)) + .inspect(|i| { + if instance.domain == "lemmy.ml" { + warn!("{}", &i.domain); + } + }) + .filter_map(|i| i.site_info.site_view.clone()) + .map(|s| s.counts) + .reduce(|a, b| SiteAggregates { + id: 0, + site_id: 0, + users: a.users + b.users, + posts: a.posts + b.posts, + comments: a.comments + b.comments, + communities: a.communities + b.communities, + users_active_day: a.users_active_day + b.users_active_day, + users_active_week: a.users_active_week + b.users_active_week, + users_active_month: a.users_active_month + b.users_active_month, + users_active_half_year: a.users_active_half_year + b.users_active_half_year, + }); + + // also add local site counts to federated counts + // TODO: this will be easier (using iter) once GetSiteResponse derives Clone + if let Some(local_site_view) = &instance.site_info.site_view { + if let Some(federated_counts) = &federated_counts { + Some(SiteAggregates { + id: 0, + site_id: 0, + users: federated_counts.users + local_site_view.counts.users, + posts: federated_counts.posts + local_site_view.counts.posts, + comments: federated_counts.comments + local_site_view.counts.comments, + communities: federated_counts.communities + + local_site_view.counts.communities, + users_active_day: federated_counts.users_active_day + + local_site_view.counts.users_active_day, + users_active_week: federated_counts.users_active_week + + local_site_view.counts.users_active_week, + users_active_month: federated_counts.users_active_month + + local_site_view.counts.users_active_month, + users_active_half_year: federated_counts.users_active_half_year + + local_site_view.counts.users_active_half_year, + }) + } else { + federated_counts + } + } else { + federated_counts + } + } else { + None + }; + // TODO: workaround because GetSiteResponse doesnt implement clone + let site_info = serde_json::from_str(&serde_json::to_string(&instance.site_info)?)?; + ret.push(InstanceDetails2 { + domain: instance.domain.clone(), + site_info, + federated_counts, + }); + } + Ok(ret) +} diff --git a/src/main.rs b/src/main.rs index a8447c8..a6d893d 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,6 +1,5 @@ use anyhow::Error; -use lemmy_stats_crawler::crawl::InstanceDetails; -use lemmy_stats_crawler::start_crawl; +use lemmy_stats_crawler::{start_crawl, InstanceDetails2}; use serde::Serialize; use structopt::StructOpt; @@ -57,10 +56,10 @@ struct TotalStats { users_active_week: i64, users_active_month: i64, users_active_halfyear: i64, - instance_details: Vec, + instance_details: Vec, } -fn aggregate(instance_details: Vec) -> TotalStats { +fn aggregate(instance_details: Vec) -> TotalStats { let mut online_users = 0; let mut total_users = 0; let mut users_active_day = 0;