Exclude test instances from crawl
This commit is contained in:
parent
d92de61d9c
commit
60563fc7d9
2 changed files with 10 additions and 2 deletions
|
@ -1,6 +1,6 @@
|
||||||
use crate::federated_instances::GetSiteResponse;
|
use crate::federated_instances::GetSiteResponse;
|
||||||
use crate::node_info::NodeInfo;
|
use crate::node_info::NodeInfo;
|
||||||
use crate::REQUEST_TIMEOUT;
|
use crate::{EXCLUDE_INSTANCES, REQUEST_TIMEOUT};
|
||||||
use anyhow::anyhow;
|
use anyhow::anyhow;
|
||||||
use anyhow::Error;
|
use anyhow::Error;
|
||||||
use futures::try_join;
|
use futures::try_join;
|
||||||
|
@ -21,7 +21,9 @@ pub async fn crawl(
|
||||||
let mut failed_instances = 0;
|
let mut failed_instances = 0;
|
||||||
while let Some(current_instance) = pending_instances.pop_back() {
|
while let Some(current_instance) = pending_instances.pop_back() {
|
||||||
crawled_instances.push(current_instance.domain.clone());
|
crawled_instances.push(current_instance.domain.clone());
|
||||||
if current_instance.depth > max_depth {
|
if current_instance.depth > max_depth
|
||||||
|
|| EXCLUDE_INSTANCES.contains(&&**¤t_instance.domain)
|
||||||
|
{
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
match fetch_instance_details(¤t_instance.domain).await {
|
match fetch_instance_details(¤t_instance.domain).await {
|
||||||
|
|
|
@ -7,3 +7,9 @@ pub mod node_info;
|
||||||
pub const REQUEST_TIMEOUT: Duration = Duration::from_secs(10);
|
pub const REQUEST_TIMEOUT: Duration = Duration::from_secs(10);
|
||||||
pub const DEFAULT_START_INSTANCES: &str = "lemmy.ml";
|
pub const DEFAULT_START_INSTANCES: &str = "lemmy.ml";
|
||||||
pub const DEFAULT_MAX_CRAWL_DEPTH: &str = "1";
|
pub const DEFAULT_MAX_CRAWL_DEPTH: &str = "1";
|
||||||
|
pub const EXCLUDE_INSTANCES: &'static [&str] = &[
|
||||||
|
"ds9.lemmy.ml",
|
||||||
|
"enterprise.lemmy.ml",
|
||||||
|
"voyager.lemmy.ml",
|
||||||
|
"test.lemmy.ml",
|
||||||
|
];
|
||||||
|
|
Loading…
Reference in a new issue