Also calculate counts including federated instances
This commit is contained in:
parent
1343932b2b
commit
3fe9e0f6b9
4 changed files with 88 additions and 6 deletions
1
Cargo.lock
generated
1
Cargo.lock
generated
|
@ -1380,6 +1380,7 @@ dependencies = [
|
||||||
"derive-new",
|
"derive-new",
|
||||||
"futures",
|
"futures",
|
||||||
"lemmy_api_common",
|
"lemmy_api_common",
|
||||||
|
"lemmy_db_schema",
|
||||||
"log",
|
"log",
|
||||||
"once_cell",
|
"once_cell",
|
||||||
"reqwest",
|
"reqwest",
|
||||||
|
|
|
@ -14,6 +14,7 @@ serde_json = "1.0.89"
|
||||||
semver = "1.0.14"
|
semver = "1.0.14"
|
||||||
once_cell = "1.16.0"
|
once_cell = "1.16.0"
|
||||||
lemmy_api_common = "=0.16.0"
|
lemmy_api_common = "=0.16.0"
|
||||||
|
lemmy_db_schema = "=0.16.0"
|
||||||
async-recursion = "1.0.0"
|
async-recursion = "1.0.0"
|
||||||
log = "0.4.17"
|
log = "0.4.17"
|
||||||
derive-new = "0.5.9"
|
derive-new = "0.5.9"
|
||||||
|
|
85
src/lib.rs
85
src/lib.rs
|
@ -4,10 +4,13 @@ extern crate derive_new;
|
||||||
use crate::crawl::{CrawlJob, CrawlParams, InstanceDetails};
|
use crate::crawl::{CrawlJob, CrawlParams, InstanceDetails};
|
||||||
use anyhow::Error;
|
use anyhow::Error;
|
||||||
use futures::future::join_all;
|
use futures::future::join_all;
|
||||||
|
use lemmy_api_common::site::GetSiteResponse;
|
||||||
|
use lemmy_db_schema::aggregates::site_aggregates::SiteAggregates;
|
||||||
use log::warn;
|
use log::warn;
|
||||||
use once_cell::sync::Lazy;
|
use once_cell::sync::Lazy;
|
||||||
use reqwest::{Client, ClientBuilder};
|
use reqwest::{Client, ClientBuilder};
|
||||||
use semver::Version;
|
use semver::Version;
|
||||||
|
use serde::Serialize;
|
||||||
use std::collections::HashSet;
|
use std::collections::HashSet;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
|
@ -25,11 +28,18 @@ static CLIENT: Lazy<Client> = Lazy::new(|| {
|
||||||
.unwrap()
|
.unwrap()
|
||||||
});
|
});
|
||||||
|
|
||||||
|
#[derive(Serialize, Debug)]
|
||||||
|
pub struct InstanceDetails2 {
|
||||||
|
pub domain: String,
|
||||||
|
pub site_info: GetSiteResponse,
|
||||||
|
pub federated_counts: Option<SiteAggregates>,
|
||||||
|
}
|
||||||
|
|
||||||
pub async fn start_crawl(
|
pub async fn start_crawl(
|
||||||
start_instances: Vec<String>,
|
start_instances: Vec<String>,
|
||||||
exclude_domains: Vec<String>,
|
exclude_domains: Vec<String>,
|
||||||
max_distance: i32,
|
max_distance: i32,
|
||||||
) -> Result<Vec<InstanceDetails>, Error> {
|
) -> Result<Vec<InstanceDetails2>, Error> {
|
||||||
let params = Arc::new(CrawlParams::new(
|
let params = Arc::new(CrawlParams::new(
|
||||||
min_lemmy_version().await?,
|
min_lemmy_version().await?,
|
||||||
exclude_domains,
|
exclude_domains,
|
||||||
|
@ -65,7 +75,7 @@ pub async fn start_crawl(
|
||||||
});
|
});
|
||||||
instance_details.reverse();
|
instance_details.reverse();
|
||||||
|
|
||||||
Ok(instance_details)
|
calculate_federated_site_aggregates(instance_details)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// calculate minimum allowed lemmy version based on current version. in case of current version
|
/// calculate minimum allowed lemmy version based on current version. in case of current version
|
||||||
|
@ -82,3 +92,74 @@ async fn min_lemmy_version() -> Result<Version, Error> {
|
||||||
version.minor -= 1;
|
version.minor -= 1;
|
||||||
Ok(version)
|
Ok(version)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn calculate_federated_site_aggregates(
|
||||||
|
instance_details: Vec<InstanceDetails>,
|
||||||
|
) -> Result<Vec<InstanceDetails2>, Error> {
|
||||||
|
let mut ret = vec![];
|
||||||
|
for instance in &instance_details {
|
||||||
|
let federated_counts =
|
||||||
|
if let Some(federated_instances) = &instance.site_info.federated_instances {
|
||||||
|
let federated_counts = instance_details
|
||||||
|
.iter()
|
||||||
|
.filter(|i| federated_instances.linked.contains(&i.domain))
|
||||||
|
.inspect(|i| {
|
||||||
|
if instance.domain == "lemmy.ml" {
|
||||||
|
warn!("{}", &i.domain);
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.filter_map(|i| i.site_info.site_view.clone())
|
||||||
|
.map(|s| s.counts)
|
||||||
|
.reduce(|a, b| SiteAggregates {
|
||||||
|
id: 0,
|
||||||
|
site_id: 0,
|
||||||
|
users: a.users + b.users,
|
||||||
|
posts: a.posts + b.posts,
|
||||||
|
comments: a.comments + b.comments,
|
||||||
|
communities: a.communities + b.communities,
|
||||||
|
users_active_day: a.users_active_day + b.users_active_day,
|
||||||
|
users_active_week: a.users_active_week + b.users_active_week,
|
||||||
|
users_active_month: a.users_active_month + b.users_active_month,
|
||||||
|
users_active_half_year: a.users_active_half_year + b.users_active_half_year,
|
||||||
|
});
|
||||||
|
|
||||||
|
// also add local site counts to federated counts
|
||||||
|
// TODO: this will be easier (using iter) once GetSiteResponse derives Clone
|
||||||
|
if let Some(local_site_view) = &instance.site_info.site_view {
|
||||||
|
if let Some(federated_counts) = &federated_counts {
|
||||||
|
Some(SiteAggregates {
|
||||||
|
id: 0,
|
||||||
|
site_id: 0,
|
||||||
|
users: federated_counts.users + local_site_view.counts.users,
|
||||||
|
posts: federated_counts.posts + local_site_view.counts.posts,
|
||||||
|
comments: federated_counts.comments + local_site_view.counts.comments,
|
||||||
|
communities: federated_counts.communities
|
||||||
|
+ local_site_view.counts.communities,
|
||||||
|
users_active_day: federated_counts.users_active_day
|
||||||
|
+ local_site_view.counts.users_active_day,
|
||||||
|
users_active_week: federated_counts.users_active_week
|
||||||
|
+ local_site_view.counts.users_active_week,
|
||||||
|
users_active_month: federated_counts.users_active_month
|
||||||
|
+ local_site_view.counts.users_active_month,
|
||||||
|
users_active_half_year: federated_counts.users_active_half_year
|
||||||
|
+ local_site_view.counts.users_active_half_year,
|
||||||
|
})
|
||||||
|
} else {
|
||||||
|
federated_counts
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
federated_counts
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
// TODO: workaround because GetSiteResponse doesnt implement clone
|
||||||
|
let site_info = serde_json::from_str(&serde_json::to_string(&instance.site_info)?)?;
|
||||||
|
ret.push(InstanceDetails2 {
|
||||||
|
domain: instance.domain.clone(),
|
||||||
|
site_info,
|
||||||
|
federated_counts,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
Ok(ret)
|
||||||
|
}
|
||||||
|
|
|
@ -1,6 +1,5 @@
|
||||||
use anyhow::Error;
|
use anyhow::Error;
|
||||||
use lemmy_stats_crawler::crawl::InstanceDetails;
|
use lemmy_stats_crawler::{start_crawl, InstanceDetails2};
|
||||||
use lemmy_stats_crawler::start_crawl;
|
|
||||||
use serde::Serialize;
|
use serde::Serialize;
|
||||||
use structopt::StructOpt;
|
use structopt::StructOpt;
|
||||||
|
|
||||||
|
@ -57,10 +56,10 @@ struct TotalStats {
|
||||||
users_active_week: i64,
|
users_active_week: i64,
|
||||||
users_active_month: i64,
|
users_active_month: i64,
|
||||||
users_active_halfyear: i64,
|
users_active_halfyear: i64,
|
||||||
instance_details: Vec<InstanceDetails>,
|
instance_details: Vec<InstanceDetails2>,
|
||||||
}
|
}
|
||||||
|
|
||||||
fn aggregate(instance_details: Vec<InstanceDetails>) -> TotalStats {
|
fn aggregate(instance_details: Vec<InstanceDetails2>) -> TotalStats {
|
||||||
let mut online_users = 0;
|
let mut online_users = 0;
|
||||||
let mut total_users = 0;
|
let mut total_users = 0;
|
||||||
let mut users_active_day = 0;
|
let mut users_active_day = 0;
|
||||||
|
|
Loading…
Reference in a new issue