From ce909d5e72f2cbf60b1f788c956b7bd9e7884939 Mon Sep 17 00:00:00 2001 From: Felix Ableitner Date: Thu, 11 Mar 2021 14:30:30 +0100 Subject: [PATCH] Simplify crawl by using queue --- src/main.rs | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/main.rs b/src/main.rs index 3c7b1f8..b5ac432 100644 --- a/src/main.rs +++ b/src/main.rs @@ -4,6 +4,7 @@ use lemmy_stats_crawler::federated_instances::GetSiteResponse; use lemmy_stats_crawler::node_info::NodeInfo; use reqwest::Client; use serde::Serialize; +use std::collections::VecDeque; use tokio::time::Duration; const REQUEST_TIMEOUT: Duration = Duration::from_secs(10); @@ -47,12 +48,9 @@ fn aggregate(instance_details: Vec) -> TotalStats { } async fn crawl(start_instances: Vec) -> Result, Error> { - let mut pending_instances = start_instances; + let mut pending_instances = VecDeque::from(start_instances); let mut instance_details = vec![]; - while let Some(current_instance) = pending_instances.to_owned().first() { - // remove current instance from pending - pending_instances.remove(0); - + while let Some(current_instance) = pending_instances.pop_back() { match fetch_instance_details(¤t_instance).await { Ok(details) => { instance_details.push(details.to_owned()); @@ -61,7 +59,7 @@ async fn crawl(start_instances: Vec) -> Result, Err &instance_details.iter().map(|i| i.domain.as_ref()).collect(); for i in details.linked_instances { if !crawled_instances.contains(&&*i) && !pending_instances.contains(&i) { - pending_instances.push(i); + pending_instances.push_back(i); } } }