Crawl for number of online and total users

This commit is contained in:
Felix Ableitner 2021-03-09 19:45:49 +01:00
parent 21be5c58ee
commit 45ec994271
6 changed files with 81 additions and 36 deletions

1
Cargo.lock generated
View file

@ -418,7 +418,6 @@ dependencies = [
"reqwest", "reqwest",
"serde", "serde",
"tokio", "tokio",
"url",
] ]
[[package]] [[package]]

View file

@ -6,7 +6,6 @@ edition = "2018"
[dependencies] [dependencies]
reqwest = { version = "0.10.10", features = ["json"] } reqwest = { version = "0.10.10", features = ["json"] }
url = "2.2.1"
serde = { version = "1.0.123", features = ["derive"] } serde = { version = "1.0.123", features = ["derive"] }
anyhow = "1.0.38" anyhow = "1.0.38"
tokio = { version = "0.2.25", features = ["full"] } tokio = { version = "0.2.25", features = ["full"] }

View file

@ -1,14 +1,14 @@
use serde::Deserialize; use serde::Deserialize;
#[derive(Deserialize, Debug)] #[derive(Deserialize, Debug, Clone)]
pub struct GetSiteResponse { pub struct GetSiteResponse {
pub online: usize, pub online: usize,
pub federated_instances: Option<FederatedInstances>, pub federated_instances: Option<FederatedInstances>,
} }
#[derive(Deserialize, Debug)] #[derive(Deserialize, Debug, Clone)]
pub struct FederatedInstances { pub struct FederatedInstances {
pub linked: Vec<String>, pub linked: Vec<String>,
pub allowed: Option<Vec<String>>, pub allowed: Option<Vec<String>>,
pub blocked: Option<Vec<String>>, pub blocked: Option<Vec<String>>,
} }

View file

@ -1,2 +1,2 @@
pub mod node_info;
pub mod federated_instances; pub mod federated_instances;
pub mod node_info;

View file

@ -1,18 +1,65 @@
use anyhow::Error; use anyhow::Error;
use url::Url;
use lemmy_stats_crawler::node_info::NodeInfo;
use lemmy_stats_crawler::federated_instances::GetSiteResponse; use lemmy_stats_crawler::federated_instances::GetSiteResponse;
use lemmy_stats_crawler::node_info::NodeInfo;
#[derive(Default, Debug)]
struct TotalStats {
users: i64,
online_users: i32,
}
#[tokio::main] #[tokio::main]
pub async fn main() -> Result<(), Error> { pub async fn main() -> Result<(), Error> {
let url = Url::parse("https://lemmy.ml/nodeinfo/2.0.json")?; let mut pending_instances = vec!["lemmy.ml".to_string()];
let node_info: NodeInfo = reqwest::get(url).await?.json().await?; let mut crawled_instances = vec![];
let mut total_stats = TotalStats::default();
while let Some(pi) = pending_instances.iter().next() {
crawled_instances.push(pi.to_owned());
let instance_details = fetch_instance_details(&pi).await.ok();
pending_instances = pending_instances
.iter()
.filter(|i| i != &pi)
.map(|i| i.to_owned())
.collect();
dbg!(node_info); if let Some(details) = instance_details {
total_stats.online_users += details.online_users;
let url = Url::parse("https://lemmy.ml/api/v2/site")?; total_stats.users += details.total_users;
let site_info: GetSiteResponse = reqwest::get(url).await?.json().await?; // remove all which are in crawled_instances
for ci in details.linked_instances {
dbg!(site_info); if !crawled_instances.contains(&ci) {
pending_instances.push(ci);
}
}
}
}
dbg!(total_stats);
Ok(()) Ok(())
} }
struct InstanceDetails {
domain: String,
online_users: i32,
total_users: i64,
linked_instances: Vec<String>,
}
async fn fetch_instance_details(domain: &str) -> Result<InstanceDetails, Error> {
dbg!(domain);
let node_info_url = format!("https://{}/nodeinfo/2.0.json", domain);
let node_info: NodeInfo = reqwest::get(&node_info_url).await?.json().await?;
let site_info_url = format!("https://{}/api/v2/site", domain);
let site_info: GetSiteResponse = reqwest::get(&site_info_url).await?.json().await?;
Ok(InstanceDetails {
domain: domain.to_owned(),
online_users: site_info.online as i32,
total_users: node_info.usage.users.total,
linked_instances: site_info
.federated_instances
.map(|f| f.linked)
.unwrap_or(vec![]),
})
}

View file

@ -3,31 +3,31 @@ use serde::Deserialize;
#[derive(Deserialize, Debug)] #[derive(Deserialize, Debug)]
#[serde(rename_all = "camelCase")] #[serde(rename_all = "camelCase")]
pub struct NodeInfo { pub struct NodeInfo {
pub version: String, pub version: String,
pub software: NodeInfoSoftware, pub software: NodeInfoSoftware,
pub protocols: Vec<String>, pub protocols: Vec<String>,
pub usage: NodeInfoUsage, pub usage: NodeInfoUsage,
pub open_registrations: bool, pub open_registrations: bool,
} }
#[derive(Deserialize, Debug)] #[derive(Deserialize, Debug)]
pub struct NodeInfoSoftware { pub struct NodeInfoSoftware {
pub name: String, pub name: String,
pub version: String, pub version: String,
} }
#[derive(Deserialize, Debug)] #[derive(Deserialize, Debug)]
#[serde(rename_all = "camelCase")] #[serde(rename_all = "camelCase")]
pub struct NodeInfoUsage { pub struct NodeInfoUsage {
pub users: NodeInfoUsers, pub users: NodeInfoUsers,
pub local_posts: i64, pub local_posts: i64,
pub local_comments: i64, pub local_comments: i64,
} }
#[derive(Deserialize, Debug)] #[derive(Deserialize, Debug)]
#[serde(rename_all = "camelCase")] #[serde(rename_all = "camelCase")]
pub struct NodeInfoUsers { pub struct NodeInfoUsers {
pub total: i64, pub total: i64,
pub active_halfyear: i64, pub active_halfyear: i64,
pub active_month: i64, pub active_month: i64,
} }