From 71cdf40baecafe88dc03278ab2ec1a0423d3e4a2 Mon Sep 17 00:00:00 2001 From: Felix Ableitner Date: Thu, 11 Mar 2021 03:24:05 +0100 Subject: [PATCH] Simplified crawl loop, improved output --- src/main.rs | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/src/main.rs b/src/main.rs index e2323f8..bdb75a7 100644 --- a/src/main.rs +++ b/src/main.rs @@ -12,11 +12,13 @@ const START_INSTANCES: [&'static str; 1] = ["lemmy.ml"]; #[tokio::main] pub async fn main() -> Result<(), Error> { let start_instances = START_INSTANCES.iter().map(|s| s.to_string()).collect(); + + eprintln!("Crawling..."); let instance_details = crawl(start_instances).await?; let instance_details = cleanup(instance_details); let total_stats = aggregate(instance_details); - print!("{}", serde_json::to_string(&total_stats)?); + println!("{}", serde_json::to_string(&total_stats)?); Ok(()) } @@ -57,23 +59,19 @@ fn cleanup(instance_details: Vec) -> Vec { async fn crawl(start_instances: Vec) -> Result, Error> { let mut pending_instances = start_instances; - let mut crawled_instances = vec![]; let mut instance_details = vec![]; while let Some(current_instance) = pending_instances.to_owned().first() { - crawled_instances.push(current_instance.to_owned()); - // remove curent instance from pending - pending_instances = pending_instances - .iter() - .filter(|i| i != ¤t_instance) - .map(|i| i.to_owned()) - .collect(); + // remove current instance from pending + pending_instances.remove(0); match fetch_instance_details(¤t_instance).await { Ok(details) => { instance_details.push(details.to_owned()); // add all unknown, linked instances to pending + let crawled_instances: &Vec<&str> = + &instance_details.iter().map(|i| i.domain.as_ref()).collect(); for i in details.linked_instances { - if !crawled_instances.contains(&i) { + if !crawled_instances.contains(&&*i) && !pending_instances.contains(&i) { pending_instances.push(i); } }