Also calculate counts including federated instances

This commit is contained in:
Felix Ableitner 2022-12-05 16:01:37 +01:00
parent 1343932b2b
commit 3fe9e0f6b9
4 changed files with 88 additions and 6 deletions

1
Cargo.lock generated
View file

@ -1380,6 +1380,7 @@ dependencies = [
"derive-new", "derive-new",
"futures", "futures",
"lemmy_api_common", "lemmy_api_common",
"lemmy_db_schema",
"log", "log",
"once_cell", "once_cell",
"reqwest", "reqwest",

View file

@ -14,6 +14,7 @@ serde_json = "1.0.89"
semver = "1.0.14" semver = "1.0.14"
once_cell = "1.16.0" once_cell = "1.16.0"
lemmy_api_common = "=0.16.0" lemmy_api_common = "=0.16.0"
lemmy_db_schema = "=0.16.0"
async-recursion = "1.0.0" async-recursion = "1.0.0"
log = "0.4.17" log = "0.4.17"
derive-new = "0.5.9" derive-new = "0.5.9"

View file

@ -4,10 +4,13 @@ extern crate derive_new;
use crate::crawl::{CrawlJob, CrawlParams, InstanceDetails}; use crate::crawl::{CrawlJob, CrawlParams, InstanceDetails};
use anyhow::Error; use anyhow::Error;
use futures::future::join_all; use futures::future::join_all;
use lemmy_api_common::site::GetSiteResponse;
use lemmy_db_schema::aggregates::site_aggregates::SiteAggregates;
use log::warn; use log::warn;
use once_cell::sync::Lazy; use once_cell::sync::Lazy;
use reqwest::{Client, ClientBuilder}; use reqwest::{Client, ClientBuilder};
use semver::Version; use semver::Version;
use serde::Serialize;
use std::collections::HashSet; use std::collections::HashSet;
use std::sync::Arc; use std::sync::Arc;
use std::time::Duration; use std::time::Duration;
@ -25,11 +28,18 @@ static CLIENT: Lazy<Client> = Lazy::new(|| {
.unwrap() .unwrap()
}); });
#[derive(Serialize, Debug)]
pub struct InstanceDetails2 {
pub domain: String,
pub site_info: GetSiteResponse,
pub federated_counts: Option<SiteAggregates>,
}
pub async fn start_crawl( pub async fn start_crawl(
start_instances: Vec<String>, start_instances: Vec<String>,
exclude_domains: Vec<String>, exclude_domains: Vec<String>,
max_distance: i32, max_distance: i32,
) -> Result<Vec<InstanceDetails>, Error> { ) -> Result<Vec<InstanceDetails2>, Error> {
let params = Arc::new(CrawlParams::new( let params = Arc::new(CrawlParams::new(
min_lemmy_version().await?, min_lemmy_version().await?,
exclude_domains, exclude_domains,
@ -65,7 +75,7 @@ pub async fn start_crawl(
}); });
instance_details.reverse(); instance_details.reverse();
Ok(instance_details) calculate_federated_site_aggregates(instance_details)
} }
/// calculate minimum allowed lemmy version based on current version. in case of current version /// calculate minimum allowed lemmy version based on current version. in case of current version
@ -82,3 +92,74 @@ async fn min_lemmy_version() -> Result<Version, Error> {
version.minor -= 1; version.minor -= 1;
Ok(version) Ok(version)
} }
fn calculate_federated_site_aggregates(
instance_details: Vec<InstanceDetails>,
) -> Result<Vec<InstanceDetails2>, Error> {
let mut ret = vec![];
for instance in &instance_details {
let federated_counts =
if let Some(federated_instances) = &instance.site_info.federated_instances {
let federated_counts = instance_details
.iter()
.filter(|i| federated_instances.linked.contains(&i.domain))
.inspect(|i| {
if instance.domain == "lemmy.ml" {
warn!("{}", &i.domain);
}
})
.filter_map(|i| i.site_info.site_view.clone())
.map(|s| s.counts)
.reduce(|a, b| SiteAggregates {
id: 0,
site_id: 0,
users: a.users + b.users,
posts: a.posts + b.posts,
comments: a.comments + b.comments,
communities: a.communities + b.communities,
users_active_day: a.users_active_day + b.users_active_day,
users_active_week: a.users_active_week + b.users_active_week,
users_active_month: a.users_active_month + b.users_active_month,
users_active_half_year: a.users_active_half_year + b.users_active_half_year,
});
// also add local site counts to federated counts
// TODO: this will be easier (using iter) once GetSiteResponse derives Clone
if let Some(local_site_view) = &instance.site_info.site_view {
if let Some(federated_counts) = &federated_counts {
Some(SiteAggregates {
id: 0,
site_id: 0,
users: federated_counts.users + local_site_view.counts.users,
posts: federated_counts.posts + local_site_view.counts.posts,
comments: federated_counts.comments + local_site_view.counts.comments,
communities: federated_counts.communities
+ local_site_view.counts.communities,
users_active_day: federated_counts.users_active_day
+ local_site_view.counts.users_active_day,
users_active_week: federated_counts.users_active_week
+ local_site_view.counts.users_active_week,
users_active_month: federated_counts.users_active_month
+ local_site_view.counts.users_active_month,
users_active_half_year: federated_counts.users_active_half_year
+ local_site_view.counts.users_active_half_year,
})
} else {
federated_counts
}
} else {
federated_counts
}
} else {
None
};
// TODO: workaround because GetSiteResponse doesnt implement clone
let site_info = serde_json::from_str(&serde_json::to_string(&instance.site_info)?)?;
ret.push(InstanceDetails2 {
domain: instance.domain.clone(),
site_info,
federated_counts,
});
}
Ok(ret)
}

View file

@ -1,6 +1,5 @@
use anyhow::Error; use anyhow::Error;
use lemmy_stats_crawler::crawl::InstanceDetails; use lemmy_stats_crawler::{start_crawl, InstanceDetails2};
use lemmy_stats_crawler::start_crawl;
use serde::Serialize; use serde::Serialize;
use structopt::StructOpt; use structopt::StructOpt;
@ -57,10 +56,10 @@ struct TotalStats {
users_active_week: i64, users_active_week: i64,
users_active_month: i64, users_active_month: i64,
users_active_halfyear: i64, users_active_halfyear: i64,
instance_details: Vec<InstanceDetails>, instance_details: Vec<InstanceDetails2>,
} }
fn aggregate(instance_details: Vec<InstanceDetails>) -> TotalStats { fn aggregate(instance_details: Vec<InstanceDetails2>) -> TotalStats {
let mut online_users = 0; let mut online_users = 0;
let mut total_users = 0; let mut total_users = 0;
let mut users_active_day = 0; let mut users_active_day = 0;