From 60563fc7d9665591a28c5a8b6f1fbddea73bb921 Mon Sep 17 00:00:00 2001 From: Felix Ableitner Date: Tue, 27 Apr 2021 15:09:02 +0200 Subject: [PATCH] Exclude test instances from crawl --- src/crawl.rs | 6 ++++-- src/lib.rs | 6 ++++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/crawl.rs b/src/crawl.rs index f7833fd..018ee4f 100644 --- a/src/crawl.rs +++ b/src/crawl.rs @@ -1,6 +1,6 @@ use crate::federated_instances::GetSiteResponse; use crate::node_info::NodeInfo; -use crate::REQUEST_TIMEOUT; +use crate::{EXCLUDE_INSTANCES, REQUEST_TIMEOUT}; use anyhow::anyhow; use anyhow::Error; use futures::try_join; @@ -21,7 +21,9 @@ pub async fn crawl( let mut failed_instances = 0; while let Some(current_instance) = pending_instances.pop_back() { crawled_instances.push(current_instance.domain.clone()); - if current_instance.depth > max_depth { + if current_instance.depth > max_depth + || EXCLUDE_INSTANCES.contains(&&**¤t_instance.domain) + { continue; } match fetch_instance_details(¤t_instance.domain).await { diff --git a/src/lib.rs b/src/lib.rs index bf7e6e8..ac7af40 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -7,3 +7,9 @@ pub mod node_info; pub const REQUEST_TIMEOUT: Duration = Duration::from_secs(10); pub const DEFAULT_START_INSTANCES: &str = "lemmy.ml"; pub const DEFAULT_MAX_CRAWL_DEPTH: &str = "1"; +pub const EXCLUDE_INSTANCES: &'static [&str] = &[ + "ds9.lemmy.ml", + "enterprise.lemmy.ml", + "voyager.lemmy.ml", + "test.lemmy.ml", +];