Crawl for number of online and total users
This commit is contained in:
parent
21be5c58ee
commit
45ec994271
6 changed files with 81 additions and 36 deletions
1
Cargo.lock
generated
1
Cargo.lock
generated
|
@ -418,7 +418,6 @@ dependencies = [
|
||||||
"reqwest",
|
"reqwest",
|
||||||
"serde",
|
"serde",
|
||||||
"tokio",
|
"tokio",
|
||||||
"url",
|
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
|
|
@ -6,7 +6,6 @@ edition = "2018"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
reqwest = { version = "0.10.10", features = ["json"] }
|
reqwest = { version = "0.10.10", features = ["json"] }
|
||||||
url = "2.2.1"
|
|
||||||
serde = { version = "1.0.123", features = ["derive"] }
|
serde = { version = "1.0.123", features = ["derive"] }
|
||||||
anyhow = "1.0.38"
|
anyhow = "1.0.38"
|
||||||
tokio = { version = "0.2.25", features = ["full"] }
|
tokio = { version = "0.2.25", features = ["full"] }
|
||||||
|
|
|
@ -1,12 +1,12 @@
|
||||||
use serde::Deserialize;
|
use serde::Deserialize;
|
||||||
|
|
||||||
#[derive(Deserialize, Debug)]
|
#[derive(Deserialize, Debug, Clone)]
|
||||||
pub struct GetSiteResponse {
|
pub struct GetSiteResponse {
|
||||||
pub online: usize,
|
pub online: usize,
|
||||||
pub federated_instances: Option<FederatedInstances>,
|
pub federated_instances: Option<FederatedInstances>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Deserialize, Debug)]
|
#[derive(Deserialize, Debug, Clone)]
|
||||||
pub struct FederatedInstances {
|
pub struct FederatedInstances {
|
||||||
pub linked: Vec<String>,
|
pub linked: Vec<String>,
|
||||||
pub allowed: Option<Vec<String>>,
|
pub allowed: Option<Vec<String>>,
|
||||||
|
|
|
@ -1,2 +1,2 @@
|
||||||
pub mod node_info;
|
|
||||||
pub mod federated_instances;
|
pub mod federated_instances;
|
||||||
|
pub mod node_info;
|
||||||
|
|
67
src/main.rs
67
src/main.rs
|
@ -1,18 +1,65 @@
|
||||||
use anyhow::Error;
|
use anyhow::Error;
|
||||||
use url::Url;
|
|
||||||
use lemmy_stats_crawler::node_info::NodeInfo;
|
|
||||||
use lemmy_stats_crawler::federated_instances::GetSiteResponse;
|
use lemmy_stats_crawler::federated_instances::GetSiteResponse;
|
||||||
|
use lemmy_stats_crawler::node_info::NodeInfo;
|
||||||
|
|
||||||
|
#[derive(Default, Debug)]
|
||||||
|
struct TotalStats {
|
||||||
|
users: i64,
|
||||||
|
online_users: i32,
|
||||||
|
}
|
||||||
|
|
||||||
#[tokio::main]
|
#[tokio::main]
|
||||||
pub async fn main() -> Result<(), Error> {
|
pub async fn main() -> Result<(), Error> {
|
||||||
let url = Url::parse("https://lemmy.ml/nodeinfo/2.0.json")?;
|
let mut pending_instances = vec!["lemmy.ml".to_string()];
|
||||||
let node_info: NodeInfo = reqwest::get(url).await?.json().await?;
|
let mut crawled_instances = vec![];
|
||||||
|
let mut total_stats = TotalStats::default();
|
||||||
|
while let Some(pi) = pending_instances.iter().next() {
|
||||||
|
crawled_instances.push(pi.to_owned());
|
||||||
|
let instance_details = fetch_instance_details(&pi).await.ok();
|
||||||
|
pending_instances = pending_instances
|
||||||
|
.iter()
|
||||||
|
.filter(|i| i != &pi)
|
||||||
|
.map(|i| i.to_owned())
|
||||||
|
.collect();
|
||||||
|
|
||||||
dbg!(node_info);
|
if let Some(details) = instance_details {
|
||||||
|
total_stats.online_users += details.online_users;
|
||||||
let url = Url::parse("https://lemmy.ml/api/v2/site")?;
|
total_stats.users += details.total_users;
|
||||||
let site_info: GetSiteResponse = reqwest::get(url).await?.json().await?;
|
// remove all which are in crawled_instances
|
||||||
|
for ci in details.linked_instances {
|
||||||
dbg!(site_info);
|
if !crawled_instances.contains(&ci) {
|
||||||
|
pending_instances.push(ci);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
dbg!(total_stats);
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct InstanceDetails {
|
||||||
|
domain: String,
|
||||||
|
online_users: i32,
|
||||||
|
total_users: i64,
|
||||||
|
linked_instances: Vec<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn fetch_instance_details(domain: &str) -> Result<InstanceDetails, Error> {
|
||||||
|
dbg!(domain);
|
||||||
|
|
||||||
|
let node_info_url = format!("https://{}/nodeinfo/2.0.json", domain);
|
||||||
|
let node_info: NodeInfo = reqwest::get(&node_info_url).await?.json().await?;
|
||||||
|
|
||||||
|
let site_info_url = format!("https://{}/api/v2/site", domain);
|
||||||
|
let site_info: GetSiteResponse = reqwest::get(&site_info_url).await?.json().await?;
|
||||||
|
|
||||||
|
Ok(InstanceDetails {
|
||||||
|
domain: domain.to_owned(),
|
||||||
|
online_users: site_info.online as i32,
|
||||||
|
total_users: node_info.usage.users.total,
|
||||||
|
linked_instances: site_info
|
||||||
|
.federated_instances
|
||||||
|
.map(|f| f.linked)
|
||||||
|
.unwrap_or(vec![]),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in a new issue