Merge pull request 'Add min version check to avoid including old instances' (#9) from min-version into main
Reviewed-on: https://yerbamate.ml/LemmyNet/lemmy-stats-crawler/pulls/9
This commit is contained in:
commit
8509c19f50
3 changed files with 44 additions and 4 deletions
12
Cargo.lock
generated
12
Cargo.lock
generated
|
@ -418,7 +418,9 @@ dependencies = [
|
|||
"anyhow",
|
||||
"clap",
|
||||
"futures",
|
||||
"once_cell",
|
||||
"reqwest",
|
||||
"semver",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"tokio",
|
||||
|
@ -521,9 +523,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "once_cell"
|
||||
version = "1.7.2"
|
||||
version = "1.10.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "af8b08b04175473088b46763e51ee54da5f9a164bc162f615b91bc179dbf15a3"
|
||||
checksum = "87f3e037eac156d1775da914196f0f37741a274155e34a0b7e427c35d2a2ecb9"
|
||||
|
||||
[[package]]
|
||||
name = "percent-encoding"
|
||||
|
@ -680,6 +682,12 @@ dependencies = [
|
|||
"untrusted",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "semver"
|
||||
version = "1.0.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d65bd28f48be7196d222d95b9243287f48d27aca604e08497513019ff0502cc4"
|
||||
|
||||
[[package]]
|
||||
name = "serde"
|
||||
version = "1.0.124"
|
||||
|
|
|
@ -12,3 +12,5 @@ tokio = { version = "0.2.25", features = ["rt-threaded", "macros"] }
|
|||
futures = "0.3.13"
|
||||
serde_json = "1.0.64"
|
||||
clap = "2.33.3"
|
||||
semver = "1.0.7"
|
||||
once_cell = "1.10.0"
|
||||
|
|
34
src/crawl.rs
34
src/crawl.rs
|
@ -4,10 +4,14 @@ use crate::REQUEST_TIMEOUT;
|
|||
use anyhow::anyhow;
|
||||
use anyhow::Error;
|
||||
use futures::try_join;
|
||||
use once_cell::sync::Lazy;
|
||||
use reqwest::Client;
|
||||
use semver::Version;
|
||||
use serde::Serialize;
|
||||
use std::collections::VecDeque;
|
||||
|
||||
static CLIENT: Lazy<Client> = Lazy::new(Client::default);
|
||||
|
||||
pub async fn crawl(
|
||||
start_instances: Vec<String>,
|
||||
exclude: Vec<String>,
|
||||
|
@ -17,6 +21,7 @@ pub async fn crawl(
|
|||
.iter()
|
||||
.map(|s| CrawlInstance::new(s.to_string(), 0))
|
||||
.collect();
|
||||
let min_lemmy_version = min_lemmy_version().await?;
|
||||
let mut crawled_instances = vec![];
|
||||
let mut instance_details = vec![];
|
||||
let mut failed_instances = 0;
|
||||
|
@ -25,7 +30,7 @@ pub async fn crawl(
|
|||
if current_instance.depth > max_depth || exclude.contains(¤t_instance.domain) {
|
||||
continue;
|
||||
}
|
||||
match fetch_instance_details(¤t_instance.domain).await {
|
||||
match fetch_instance_details(¤t_instance.domain, &min_lemmy_version).await {
|
||||
Ok(details) => {
|
||||
instance_details.push(details.to_owned());
|
||||
for i in details.linked_instances {
|
||||
|
@ -81,7 +86,10 @@ impl CrawlInstance {
|
|||
}
|
||||
}
|
||||
|
||||
async fn fetch_instance_details(domain: &str) -> Result<InstanceDetails, Error> {
|
||||
async fn fetch_instance_details(
|
||||
domain: &str,
|
||||
min_lemmy_version: &Version,
|
||||
) -> Result<InstanceDetails, Error> {
|
||||
let client = Client::default();
|
||||
|
||||
let node_info_url = format!("https://{}/nodeinfo/2.0.json", domain);
|
||||
|
@ -104,6 +112,13 @@ async fn fetch_instance_details(domain: &str) -> Result<InstanceDetails, Error>
|
|||
site_info_request_v3
|
||||
)?;
|
||||
let node_info: NodeInfo = node_info.json().await?;
|
||||
if node_info.software.name != "lemmy" {
|
||||
return Err(anyhow!("not a lemmy instance"));
|
||||
}
|
||||
let version = Version::parse(&node_info.software.version)?;
|
||||
if &version < min_lemmy_version {
|
||||
return Err(anyhow!("lemmy version is too old ({})", version));
|
||||
}
|
||||
let site_info_v2 = site_info_v2.json::<GetSiteResponse>().await.ok();
|
||||
let site_info_v3 = site_info_v3.json::<GetSiteResponse>().await.ok();
|
||||
let mut site_info: GetSiteResponse = if let Some(site_info_v2) = site_info_v2 {
|
||||
|
@ -148,3 +163,18 @@ async fn fetch_instance_details(domain: &str) -> Result<InstanceDetails, Error>
|
|||
linked_instances,
|
||||
})
|
||||
}
|
||||
|
||||
/// calculate minimum allowed lemmy version based on current version. in case of current version
|
||||
/// 0.16.3, the minimum from this function is 0.15.3. this is to avoid rejecting all instances on
|
||||
/// the previous version when a major lemmy release is published.
|
||||
async fn min_lemmy_version() -> Result<Version, Error> {
|
||||
let lemmy_version_url = "https://raw.githubusercontent.com/LemmyNet/lemmy-ansible/main/VERSION";
|
||||
let req = CLIENT
|
||||
.get(lemmy_version_url)
|
||||
.timeout(REQUEST_TIMEOUT)
|
||||
.send()
|
||||
.await?;
|
||||
let mut version = Version::parse(req.text().await?.trim())?;
|
||||
version.minor -= 1;
|
||||
Ok(version)
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue