Improve output, refactor code, add timeout

This commit is contained in:
Felix Ableitner 2021-03-11 02:25:10 +01:00
parent c45771d9e7
commit 27c23bd5d8
2 changed files with 71 additions and 27 deletions

View file

@ -17,12 +17,10 @@ pub struct FederatedInstances {
#[derive(Deserialize, Debug, Clone)] #[derive(Deserialize, Debug, Clone)]
pub struct SiteView { pub struct SiteView {
pub site: Site, pub site: Site,
} }
#[derive(Deserialize, Debug, Clone)] #[derive(Deserialize, Debug, Clone)]
pub struct Site { pub struct Site {
pub name: String, pub name: String,
pub icon: String, pub icon: String,
} }

View file

@ -1,20 +1,56 @@
use anyhow::Error; use anyhow::Error;
use lemmy_stats_crawler::federated_instances::GetSiteResponse; use lemmy_stats_crawler::federated_instances::GetSiteResponse;
use lemmy_stats_crawler::node_info::NodeInfo; use lemmy_stats_crawler::node_info::NodeInfo;
use reqwest::Client;
use serde::Serialize; use serde::Serialize;
use tokio::time::Duration;
#[derive(Default, Debug)]
struct TotalStats {
users: i64,
online_users: i32,
}
#[tokio::main] #[tokio::main]
pub async fn main() -> Result<(), Error> { pub async fn main() -> Result<(), Error> {
let mut pending_instances = vec!["lemmy.ml".to_string()]; let start_instances = vec!["lemmy.ml".to_string()];
let instance_details = crawl(start_instances).await?;
let instance_details = cleanup(instance_details);
let total_stats = aggregate(instance_details);
print!("{}", serde_json::to_string(&total_stats)?);
Ok(())
}
#[derive(Serialize)]
struct TotalStats {
total_users: i64,
total_online_users: i32,
instance_details: Vec<InstanceDetails>,
}
fn aggregate(instance_details: Vec<InstanceDetails>) -> TotalStats {
let mut total_users = 0;
let mut total_online_users = 0;
for i in &instance_details {
total_users += i.total_users;
total_online_users += i.online_users;
}
TotalStats {
total_users,
total_online_users,
instance_details,
}
}
fn cleanup(instance_details: Vec<InstanceDetails>) -> Vec<InstanceDetails> {
let mut instance_details: Vec<InstanceDetails> = instance_details
.iter()
.filter(|i| i.open_registrations)
.map(|i| i.to_owned())
.collect();
instance_details.sort_by(|a, b| b.users_active_halfyear.cmp(&a.users_active_halfyear));
instance_details
}
async fn crawl(start_instances: Vec<String>) -> Result<Vec<InstanceDetails>, Error> {
let mut pending_instances = start_instances;
let mut crawled_instances = vec![]; let mut crawled_instances = vec![];
let mut instance_details = vec![]; let mut instance_details = vec![];
let mut total_stats = TotalStats::default();
while let Some(pi) = pending_instances.iter().next() { while let Some(pi) = pending_instances.iter().next() {
crawled_instances.push(pi.to_owned()); crawled_instances.push(pi.to_owned());
let current_instance_details = fetch_instance_details(&pi).await.ok(); let current_instance_details = fetch_instance_details(&pi).await.ok();
@ -26,8 +62,6 @@ pub async fn main() -> Result<(), Error> {
if let Some(details) = current_instance_details { if let Some(details) = current_instance_details {
instance_details.push(details.to_owned()); instance_details.push(details.to_owned());
total_stats.online_users += details.online_users;
total_stats.users += details.total_users;
// add all unknown, linked instances to pending // add all unknown, linked instances to pending
for ci in details.linked_instances { for ci in details.linked_instances {
if !crawled_instances.contains(&ci) { if !crawled_instances.contains(&ci) {
@ -36,15 +70,8 @@ pub async fn main() -> Result<(), Error> {
} }
} }
} }
instance_details = instance_details
.iter() Ok(instance_details)
.filter(|i| i.open_registrations)
.map(|i| i.to_owned())
.collect();
instance_details.sort_by(|a, b| b.users_active_halfyear.cmp(&a.users_active_halfyear));
print!("{}", serde_json::to_string(&instance_details)?);
dbg!(total_stats);
Ok(())
} }
#[derive(Serialize, Clone)] #[derive(Serialize, Clone)]
@ -57,6 +84,8 @@ struct InstanceDetails {
users_active_halfyear: i64, users_active_halfyear: i64,
users_active_month: i64, users_active_month: i64,
open_registrations: bool, open_registrations: bool,
linked_instances_count: i32,
// The following fields are only used for aggregation, but not shown in output
#[serde(skip)] #[serde(skip)]
linked_instances: Vec<String>, linked_instances: Vec<String>,
} }
@ -64,12 +93,31 @@ struct InstanceDetails {
async fn fetch_instance_details(domain: &str) -> Result<InstanceDetails, Error> { async fn fetch_instance_details(domain: &str) -> Result<InstanceDetails, Error> {
dbg!(domain); dbg!(domain);
let client = Client::default();
let timeout = Duration::from_secs(10);
let node_info_url = format!("https://{}/nodeinfo/2.0.json", domain); let node_info_url = format!("https://{}/nodeinfo/2.0.json", domain);
let node_info: NodeInfo = reqwest::get(&node_info_url).await?.json().await?; let node_info: NodeInfo = client
.get(&node_info_url)
.timeout(timeout)
.send()
.await?
.json()
.await?;
let site_info_url = format!("https://{}/api/v2/site", domain); let site_info_url = format!("https://{}/api/v2/site", domain);
let site_info: GetSiteResponse = reqwest::get(&site_info_url).await?.json().await?; let site_info: GetSiteResponse = client
.get(&site_info_url)
.timeout(timeout)
.send()
.await?
.json()
.await?;
let linked_instances = site_info
.federated_instances
.map(|f| f.linked)
.unwrap_or(vec![]);
Ok(InstanceDetails { Ok(InstanceDetails {
domain: domain.to_owned(), domain: domain.to_owned(),
name: site_info.site_view.site.name, name: site_info.site_view.site.name,
@ -79,9 +127,7 @@ async fn fetch_instance_details(domain: &str) -> Result<InstanceDetails, Error>
users_active_halfyear: node_info.usage.users.active_halfyear, users_active_halfyear: node_info.usage.users.active_halfyear,
users_active_month: node_info.usage.users.active_month, users_active_month: node_info.usage.users.active_month,
open_registrations: node_info.open_registrations, open_registrations: node_info.open_registrations,
linked_instances: site_info linked_instances_count: linked_instances.len() as i32,
.federated_instances linked_instances,
.map(|f| f.linked)
.unwrap_or(vec![]),
}) })
} }