Crawl for number of online and total users

This commit is contained in:
Felix Ableitner 2021-03-09 19:45:49 +01:00
parent 21be5c58ee
commit 45ec994271
6 changed files with 81 additions and 36 deletions

1
Cargo.lock generated
View file

@ -418,7 +418,6 @@ dependencies = [
"reqwest",
"serde",
"tokio",
"url",
]
[[package]]

View file

@ -6,7 +6,6 @@ edition = "2018"
[dependencies]
reqwest = { version = "0.10.10", features = ["json"] }
url = "2.2.1"
serde = { version = "1.0.123", features = ["derive"] }
anyhow = "1.0.38"
tokio = { version = "0.2.25", features = ["full"] }

View file

@ -1,12 +1,12 @@
use serde::Deserialize;
#[derive(Deserialize, Debug)]
#[derive(Deserialize, Debug, Clone)]
pub struct GetSiteResponse {
pub online: usize,
pub federated_instances: Option<FederatedInstances>,
}
#[derive(Deserialize, Debug)]
#[derive(Deserialize, Debug, Clone)]
pub struct FederatedInstances {
pub linked: Vec<String>,
pub allowed: Option<Vec<String>>,

View file

@ -1,2 +1,2 @@
pub mod node_info;
pub mod federated_instances;
pub mod node_info;

View file

@ -1,18 +1,65 @@
use anyhow::Error;
use url::Url;
use lemmy_stats_crawler::node_info::NodeInfo;
use lemmy_stats_crawler::federated_instances::GetSiteResponse;
use lemmy_stats_crawler::node_info::NodeInfo;
#[derive(Default, Debug)]
struct TotalStats {
users: i64,
online_users: i32,
}
#[tokio::main]
pub async fn main() -> Result<(), Error> {
let url = Url::parse("https://lemmy.ml/nodeinfo/2.0.json")?;
let node_info: NodeInfo = reqwest::get(url).await?.json().await?;
let mut pending_instances = vec!["lemmy.ml".to_string()];
let mut crawled_instances = vec![];
let mut total_stats = TotalStats::default();
while let Some(pi) = pending_instances.iter().next() {
crawled_instances.push(pi.to_owned());
let instance_details = fetch_instance_details(&pi).await.ok();
pending_instances = pending_instances
.iter()
.filter(|i| i != &pi)
.map(|i| i.to_owned())
.collect();
dbg!(node_info);
let url = Url::parse("https://lemmy.ml/api/v2/site")?;
let site_info: GetSiteResponse = reqwest::get(url).await?.json().await?;
dbg!(site_info);
if let Some(details) = instance_details {
total_stats.online_users += details.online_users;
total_stats.users += details.total_users;
// remove all which are in crawled_instances
for ci in details.linked_instances {
if !crawled_instances.contains(&ci) {
pending_instances.push(ci);
}
}
}
}
dbg!(total_stats);
Ok(())
}
struct InstanceDetails {
domain: String,
online_users: i32,
total_users: i64,
linked_instances: Vec<String>,
}
async fn fetch_instance_details(domain: &str) -> Result<InstanceDetails, Error> {
dbg!(domain);
let node_info_url = format!("https://{}/nodeinfo/2.0.json", domain);
let node_info: NodeInfo = reqwest::get(&node_info_url).await?.json().await?;
let site_info_url = format!("https://{}/api/v2/site", domain);
let site_info: GetSiteResponse = reqwest::get(&site_info_url).await?.json().await?;
Ok(InstanceDetails {
domain: domain.to_owned(),
online_users: site_info.online as i32,
total_users: node_info.usage.users.total,
linked_instances: site_info
.federated_instances
.map(|f| f.linked)
.unwrap_or(vec![]),
})
}