Output instance info as json

This commit is contained in:
Felix Ableitner 2021-03-10 18:06:21 +01:00
parent cb4e6e221a
commit 9cbed8f2a9
4 changed files with 38 additions and 3 deletions

1
Cargo.lock generated
View file

@ -417,6 +417,7 @@ dependencies = [
"futures", "futures",
"reqwest", "reqwest",
"serde", "serde",
"serde_json",
"tokio", "tokio",
] ]

View file

@ -10,3 +10,4 @@ serde = { version = "1.0.123", features = ["derive"] }
anyhow = "1.0.38" anyhow = "1.0.38"
tokio = { version = "0.2.25", features = ["full"] } tokio = { version = "0.2.25", features = ["full"] }
futures = "0.3.13" futures = "0.3.13"
serde_json = "1.0.64"

15
README.md Normal file
View file

@ -0,0 +1,15 @@
# Lemmy-Stats-Crawler
Crawls Lemmy instances using nodeinfo and API endpoints, to generate a list of instances and overall details.
## Usage
For testing:
```
cargo run
```
For production (hide debug logs):
```
cargo run 2>/dev/null
```

View file

@ -1,6 +1,7 @@
use anyhow::Error; use anyhow::Error;
use lemmy_stats_crawler::federated_instances::GetSiteResponse; use lemmy_stats_crawler::federated_instances::GetSiteResponse;
use lemmy_stats_crawler::node_info::NodeInfo; use lemmy_stats_crawler::node_info::NodeInfo;
use serde::Serialize;
#[derive(Default, Debug)] #[derive(Default, Debug)]
struct TotalStats { struct TotalStats {
@ -12,20 +13,22 @@ struct TotalStats {
pub async fn main() -> Result<(), Error> { pub async fn main() -> Result<(), Error> {
let mut pending_instances = vec!["lemmy.ml".to_string()]; let mut pending_instances = vec!["lemmy.ml".to_string()];
let mut crawled_instances = vec![]; let mut crawled_instances = vec![];
let mut instance_details = vec![];
let mut total_stats = TotalStats::default(); let mut total_stats = TotalStats::default();
while let Some(pi) = pending_instances.iter().next() { while let Some(pi) = pending_instances.iter().next() {
crawled_instances.push(pi.to_owned()); crawled_instances.push(pi.to_owned());
let instance_details = fetch_instance_details(&pi).await.ok(); let current_instance_details = fetch_instance_details(&pi).await.ok();
pending_instances = pending_instances pending_instances = pending_instances
.iter() .iter()
.filter(|i| i != &pi) .filter(|i| i != &pi)
.map(|i| i.to_owned()) .map(|i| i.to_owned())
.collect(); .collect();
if let Some(details) = instance_details { if let Some(details) = current_instance_details {
instance_details.push(details.to_owned());
total_stats.online_users += details.online_users; total_stats.online_users += details.online_users;
total_stats.users += details.total_users; total_stats.users += details.total_users;
// remove all which are in crawled_instances // add all unknown, linked instances to pending
for ci in details.linked_instances { for ci in details.linked_instances {
if !crawled_instances.contains(&ci) { if !crawled_instances.contains(&ci) {
pending_instances.push(ci); pending_instances.push(ci);
@ -33,14 +36,26 @@ pub async fn main() -> Result<(), Error> {
} }
} }
} }
instance_details = instance_details
.iter()
.filter(|i| i.open_registrations)
.map(|i| i.to_owned())
.collect();
instance_details.sort_by(|a, b| b.users_active_halfyear.cmp(&a.users_active_halfyear));
print!("{}", serde_json::to_string(&instance_details)?);
dbg!(total_stats); dbg!(total_stats);
Ok(()) Ok(())
} }
#[derive(Serialize, Clone)]
struct InstanceDetails { struct InstanceDetails {
domain: String, domain: String,
online_users: i32, online_users: i32,
total_users: i64, total_users: i64,
users_active_halfyear: i64,
users_active_month: i64,
open_registrations: bool,
#[serde(skip)]
linked_instances: Vec<String>, linked_instances: Vec<String>,
} }
@ -57,6 +72,9 @@ async fn fetch_instance_details(domain: &str) -> Result<InstanceDetails, Error>
domain: domain.to_owned(), domain: domain.to_owned(),
online_users: site_info.online as i32, online_users: site_info.online as i32,
total_users: node_info.usage.users.total, total_users: node_info.usage.users.total,
users_active_halfyear: node_info.usage.users.active_halfyear,
users_active_month: node_info.usage.users.active_month,
open_registrations: node_info.open_registrations,
linked_instances: site_info linked_instances: site_info
.federated_instances .federated_instances
.map(|f| f.linked) .map(|f| f.linked)