Simplify crawl by using queue

This commit is contained in:
Felix Ableitner 2021-03-11 14:30:30 +01:00
parent 0690824e9e
commit ce909d5e72

View file

@ -4,6 +4,7 @@ use lemmy_stats_crawler::federated_instances::GetSiteResponse;
use lemmy_stats_crawler::node_info::NodeInfo; use lemmy_stats_crawler::node_info::NodeInfo;
use reqwest::Client; use reqwest::Client;
use serde::Serialize; use serde::Serialize;
use std::collections::VecDeque;
use tokio::time::Duration; use tokio::time::Duration;
const REQUEST_TIMEOUT: Duration = Duration::from_secs(10); const REQUEST_TIMEOUT: Duration = Duration::from_secs(10);
@ -47,12 +48,9 @@ fn aggregate(instance_details: Vec<InstanceDetails>) -> TotalStats {
} }
async fn crawl(start_instances: Vec<String>) -> Result<Vec<InstanceDetails>, Error> { async fn crawl(start_instances: Vec<String>) -> Result<Vec<InstanceDetails>, Error> {
let mut pending_instances = start_instances; let mut pending_instances = VecDeque::from(start_instances);
let mut instance_details = vec![]; let mut instance_details = vec![];
while let Some(current_instance) = pending_instances.to_owned().first() { while let Some(current_instance) = pending_instances.pop_back() {
// remove current instance from pending
pending_instances.remove(0);
match fetch_instance_details(&current_instance).await { match fetch_instance_details(&current_instance).await {
Ok(details) => { Ok(details) => {
instance_details.push(details.to_owned()); instance_details.push(details.to_owned());
@ -61,7 +59,7 @@ async fn crawl(start_instances: Vec<String>) -> Result<Vec<InstanceDetails>, Err
&instance_details.iter().map(|i| i.domain.as_ref()).collect(); &instance_details.iter().map(|i| i.domain.as_ref()).collect();
for i in details.linked_instances { for i in details.linked_instances {
if !crawled_instances.contains(&&*i) && !pending_instances.contains(&i) { if !crawled_instances.contains(&&*i) && !pending_instances.contains(&i) {
pending_instances.push(i); pending_instances.push_back(i);
} }
} }
} }