Improve output, refactor code, add timeout
This commit is contained in:
parent
c45771d9e7
commit
27c23bd5d8
2 changed files with 71 additions and 27 deletions
|
@ -17,12 +17,10 @@ pub struct FederatedInstances {
|
|||
#[derive(Deserialize, Debug, Clone)]
|
||||
pub struct SiteView {
|
||||
pub site: Site,
|
||||
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Debug, Clone)]
|
||||
pub struct Site {
|
||||
pub name: String,
|
||||
pub icon: String,
|
||||
|
||||
}
|
||||
|
|
96
src/main.rs
96
src/main.rs
|
@ -1,20 +1,56 @@
|
|||
use anyhow::Error;
|
||||
use lemmy_stats_crawler::federated_instances::GetSiteResponse;
|
||||
use lemmy_stats_crawler::node_info::NodeInfo;
|
||||
use reqwest::Client;
|
||||
use serde::Serialize;
|
||||
|
||||
#[derive(Default, Debug)]
|
||||
struct TotalStats {
|
||||
users: i64,
|
||||
online_users: i32,
|
||||
}
|
||||
use tokio::time::Duration;
|
||||
|
||||
#[tokio::main]
|
||||
pub async fn main() -> Result<(), Error> {
|
||||
let mut pending_instances = vec!["lemmy.ml".to_string()];
|
||||
let start_instances = vec!["lemmy.ml".to_string()];
|
||||
let instance_details = crawl(start_instances).await?;
|
||||
let instance_details = cleanup(instance_details);
|
||||
let total_stats = aggregate(instance_details);
|
||||
|
||||
print!("{}", serde_json::to_string(&total_stats)?);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct TotalStats {
|
||||
total_users: i64,
|
||||
total_online_users: i32,
|
||||
instance_details: Vec<InstanceDetails>,
|
||||
}
|
||||
|
||||
fn aggregate(instance_details: Vec<InstanceDetails>) -> TotalStats {
|
||||
let mut total_users = 0;
|
||||
let mut total_online_users = 0;
|
||||
for i in &instance_details {
|
||||
total_users += i.total_users;
|
||||
total_online_users += i.online_users;
|
||||
}
|
||||
TotalStats {
|
||||
total_users,
|
||||
total_online_users,
|
||||
instance_details,
|
||||
}
|
||||
}
|
||||
|
||||
fn cleanup(instance_details: Vec<InstanceDetails>) -> Vec<InstanceDetails> {
|
||||
let mut instance_details: Vec<InstanceDetails> = instance_details
|
||||
.iter()
|
||||
.filter(|i| i.open_registrations)
|
||||
.map(|i| i.to_owned())
|
||||
.collect();
|
||||
instance_details.sort_by(|a, b| b.users_active_halfyear.cmp(&a.users_active_halfyear));
|
||||
instance_details
|
||||
}
|
||||
|
||||
async fn crawl(start_instances: Vec<String>) -> Result<Vec<InstanceDetails>, Error> {
|
||||
let mut pending_instances = start_instances;
|
||||
let mut crawled_instances = vec![];
|
||||
let mut instance_details = vec![];
|
||||
let mut total_stats = TotalStats::default();
|
||||
while let Some(pi) = pending_instances.iter().next() {
|
||||
crawled_instances.push(pi.to_owned());
|
||||
let current_instance_details = fetch_instance_details(&pi).await.ok();
|
||||
|
@ -26,8 +62,6 @@ pub async fn main() -> Result<(), Error> {
|
|||
|
||||
if let Some(details) = current_instance_details {
|
||||
instance_details.push(details.to_owned());
|
||||
total_stats.online_users += details.online_users;
|
||||
total_stats.users += details.total_users;
|
||||
// add all unknown, linked instances to pending
|
||||
for ci in details.linked_instances {
|
||||
if !crawled_instances.contains(&ci) {
|
||||
|
@ -36,15 +70,8 @@ pub async fn main() -> Result<(), Error> {
|
|||
}
|
||||
}
|
||||
}
|
||||
instance_details = instance_details
|
||||
.iter()
|
||||
.filter(|i| i.open_registrations)
|
||||
.map(|i| i.to_owned())
|
||||
.collect();
|
||||
instance_details.sort_by(|a, b| b.users_active_halfyear.cmp(&a.users_active_halfyear));
|
||||
print!("{}", serde_json::to_string(&instance_details)?);
|
||||
dbg!(total_stats);
|
||||
Ok(())
|
||||
|
||||
Ok(instance_details)
|
||||
}
|
||||
|
||||
#[derive(Serialize, Clone)]
|
||||
|
@ -57,6 +84,8 @@ struct InstanceDetails {
|
|||
users_active_halfyear: i64,
|
||||
users_active_month: i64,
|
||||
open_registrations: bool,
|
||||
linked_instances_count: i32,
|
||||
// The following fields are only used for aggregation, but not shown in output
|
||||
#[serde(skip)]
|
||||
linked_instances: Vec<String>,
|
||||
}
|
||||
|
@ -64,12 +93,31 @@ struct InstanceDetails {
|
|||
async fn fetch_instance_details(domain: &str) -> Result<InstanceDetails, Error> {
|
||||
dbg!(domain);
|
||||
|
||||
let client = Client::default();
|
||||
let timeout = Duration::from_secs(10);
|
||||
|
||||
let node_info_url = format!("https://{}/nodeinfo/2.0.json", domain);
|
||||
let node_info: NodeInfo = reqwest::get(&node_info_url).await?.json().await?;
|
||||
let node_info: NodeInfo = client
|
||||
.get(&node_info_url)
|
||||
.timeout(timeout)
|
||||
.send()
|
||||
.await?
|
||||
.json()
|
||||
.await?;
|
||||
|
||||
let site_info_url = format!("https://{}/api/v2/site", domain);
|
||||
let site_info: GetSiteResponse = reqwest::get(&site_info_url).await?.json().await?;
|
||||
let site_info: GetSiteResponse = client
|
||||
.get(&site_info_url)
|
||||
.timeout(timeout)
|
||||
.send()
|
||||
.await?
|
||||
.json()
|
||||
.await?;
|
||||
|
||||
let linked_instances = site_info
|
||||
.federated_instances
|
||||
.map(|f| f.linked)
|
||||
.unwrap_or(vec![]);
|
||||
Ok(InstanceDetails {
|
||||
domain: domain.to_owned(),
|
||||
name: site_info.site_view.site.name,
|
||||
|
@ -79,9 +127,7 @@ async fn fetch_instance_details(domain: &str) -> Result<InstanceDetails, Error>
|
|||
users_active_halfyear: node_info.usage.users.active_halfyear,
|
||||
users_active_month: node_info.usage.users.active_month,
|
||||
open_registrations: node_info.open_registrations,
|
||||
linked_instances: site_info
|
||||
.federated_instances
|
||||
.map(|f| f.linked)
|
||||
.unwrap_or(vec![]),
|
||||
linked_instances_count: linked_instances.len() as i32,
|
||||
linked_instances,
|
||||
})
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue