Merge pull request 'Add min version check to avoid including old instances' (#9) from min-version into main
Reviewed-on: https://yerbamate.ml/LemmyNet/lemmy-stats-crawler/pulls/9
This commit is contained in:
commit
8509c19f50
3 changed files with 44 additions and 4 deletions
12
Cargo.lock
generated
12
Cargo.lock
generated
|
@ -418,7 +418,9 @@ dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"clap",
|
"clap",
|
||||||
"futures",
|
"futures",
|
||||||
|
"once_cell",
|
||||||
"reqwest",
|
"reqwest",
|
||||||
|
"semver",
|
||||||
"serde",
|
"serde",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
"tokio",
|
"tokio",
|
||||||
|
@ -521,9 +523,9 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "once_cell"
|
name = "once_cell"
|
||||||
version = "1.7.2"
|
version = "1.10.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "af8b08b04175473088b46763e51ee54da5f9a164bc162f615b91bc179dbf15a3"
|
checksum = "87f3e037eac156d1775da914196f0f37741a274155e34a0b7e427c35d2a2ecb9"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "percent-encoding"
|
name = "percent-encoding"
|
||||||
|
@ -680,6 +682,12 @@ dependencies = [
|
||||||
"untrusted",
|
"untrusted",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "semver"
|
||||||
|
version = "1.0.7"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d65bd28f48be7196d222d95b9243287f48d27aca604e08497513019ff0502cc4"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "serde"
|
name = "serde"
|
||||||
version = "1.0.124"
|
version = "1.0.124"
|
||||||
|
|
|
@ -12,3 +12,5 @@ tokio = { version = "0.2.25", features = ["rt-threaded", "macros"] }
|
||||||
futures = "0.3.13"
|
futures = "0.3.13"
|
||||||
serde_json = "1.0.64"
|
serde_json = "1.0.64"
|
||||||
clap = "2.33.3"
|
clap = "2.33.3"
|
||||||
|
semver = "1.0.7"
|
||||||
|
once_cell = "1.10.0"
|
||||||
|
|
34
src/crawl.rs
34
src/crawl.rs
|
@ -4,10 +4,14 @@ use crate::REQUEST_TIMEOUT;
|
||||||
use anyhow::anyhow;
|
use anyhow::anyhow;
|
||||||
use anyhow::Error;
|
use anyhow::Error;
|
||||||
use futures::try_join;
|
use futures::try_join;
|
||||||
|
use once_cell::sync::Lazy;
|
||||||
use reqwest::Client;
|
use reqwest::Client;
|
||||||
|
use semver::Version;
|
||||||
use serde::Serialize;
|
use serde::Serialize;
|
||||||
use std::collections::VecDeque;
|
use std::collections::VecDeque;
|
||||||
|
|
||||||
|
static CLIENT: Lazy<Client> = Lazy::new(Client::default);
|
||||||
|
|
||||||
pub async fn crawl(
|
pub async fn crawl(
|
||||||
start_instances: Vec<String>,
|
start_instances: Vec<String>,
|
||||||
exclude: Vec<String>,
|
exclude: Vec<String>,
|
||||||
|
@ -17,6 +21,7 @@ pub async fn crawl(
|
||||||
.iter()
|
.iter()
|
||||||
.map(|s| CrawlInstance::new(s.to_string(), 0))
|
.map(|s| CrawlInstance::new(s.to_string(), 0))
|
||||||
.collect();
|
.collect();
|
||||||
|
let min_lemmy_version = min_lemmy_version().await?;
|
||||||
let mut crawled_instances = vec![];
|
let mut crawled_instances = vec![];
|
||||||
let mut instance_details = vec![];
|
let mut instance_details = vec![];
|
||||||
let mut failed_instances = 0;
|
let mut failed_instances = 0;
|
||||||
|
@ -25,7 +30,7 @@ pub async fn crawl(
|
||||||
if current_instance.depth > max_depth || exclude.contains(¤t_instance.domain) {
|
if current_instance.depth > max_depth || exclude.contains(¤t_instance.domain) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
match fetch_instance_details(¤t_instance.domain).await {
|
match fetch_instance_details(¤t_instance.domain, &min_lemmy_version).await {
|
||||||
Ok(details) => {
|
Ok(details) => {
|
||||||
instance_details.push(details.to_owned());
|
instance_details.push(details.to_owned());
|
||||||
for i in details.linked_instances {
|
for i in details.linked_instances {
|
||||||
|
@ -81,7 +86,10 @@ impl CrawlInstance {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn fetch_instance_details(domain: &str) -> Result<InstanceDetails, Error> {
|
async fn fetch_instance_details(
|
||||||
|
domain: &str,
|
||||||
|
min_lemmy_version: &Version,
|
||||||
|
) -> Result<InstanceDetails, Error> {
|
||||||
let client = Client::default();
|
let client = Client::default();
|
||||||
|
|
||||||
let node_info_url = format!("https://{}/nodeinfo/2.0.json", domain);
|
let node_info_url = format!("https://{}/nodeinfo/2.0.json", domain);
|
||||||
|
@ -104,6 +112,13 @@ async fn fetch_instance_details(domain: &str) -> Result<InstanceDetails, Error>
|
||||||
site_info_request_v3
|
site_info_request_v3
|
||||||
)?;
|
)?;
|
||||||
let node_info: NodeInfo = node_info.json().await?;
|
let node_info: NodeInfo = node_info.json().await?;
|
||||||
|
if node_info.software.name != "lemmy" {
|
||||||
|
return Err(anyhow!("not a lemmy instance"));
|
||||||
|
}
|
||||||
|
let version = Version::parse(&node_info.software.version)?;
|
||||||
|
if &version < min_lemmy_version {
|
||||||
|
return Err(anyhow!("lemmy version is too old ({})", version));
|
||||||
|
}
|
||||||
let site_info_v2 = site_info_v2.json::<GetSiteResponse>().await.ok();
|
let site_info_v2 = site_info_v2.json::<GetSiteResponse>().await.ok();
|
||||||
let site_info_v3 = site_info_v3.json::<GetSiteResponse>().await.ok();
|
let site_info_v3 = site_info_v3.json::<GetSiteResponse>().await.ok();
|
||||||
let mut site_info: GetSiteResponse = if let Some(site_info_v2) = site_info_v2 {
|
let mut site_info: GetSiteResponse = if let Some(site_info_v2) = site_info_v2 {
|
||||||
|
@ -148,3 +163,18 @@ async fn fetch_instance_details(domain: &str) -> Result<InstanceDetails, Error>
|
||||||
linked_instances,
|
linked_instances,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// calculate minimum allowed lemmy version based on current version. in case of current version
|
||||||
|
/// 0.16.3, the minimum from this function is 0.15.3. this is to avoid rejecting all instances on
|
||||||
|
/// the previous version when a major lemmy release is published.
|
||||||
|
async fn min_lemmy_version() -> Result<Version, Error> {
|
||||||
|
let lemmy_version_url = "https://raw.githubusercontent.com/LemmyNet/lemmy-ansible/main/VERSION";
|
||||||
|
let req = CLIENT
|
||||||
|
.get(lemmy_version_url)
|
||||||
|
.timeout(REQUEST_TIMEOUT)
|
||||||
|
.send()
|
||||||
|
.await?;
|
||||||
|
let mut version = Version::parse(req.text().await?.trim())?;
|
||||||
|
version.minor -= 1;
|
||||||
|
Ok(version)
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in a new issue