From 638fd0df5737ce920470ac41ddb844207e150c49 Mon Sep 17 00:00:00 2001 From: Felix Ableitner Date: Tue, 11 Jul 2023 12:59:06 +0200 Subject: [PATCH] Use jq to filter instance list and avoid js crashes (fixes #216) --- crawl.mjs | 30 ++++++++++++------------------ 1 file changed, 12 insertions(+), 18 deletions(-) diff --git a/crawl.mjs b/crawl.mjs index 8d0e328..2f519e9 100644 --- a/crawl.mjs +++ b/crawl.mjs @@ -19,16 +19,19 @@ try { all_recommended.push(...recommended_instances[k]); } } - const run = spawn( - "cargo", + // Run Rust crawler with given params. Then pipe output directly into jq, to filter + // out fields with lots of data which we dont need. This is necessary because otherwise + // Javascript may crash when loading the crawl output. + const run = spawn("sh", [ - "run", - "--", - "--json", - "--start-instances", - all_recommended, - "--exclude-instances", - recommended_instances.exclude, + "-c", + `cargo run -- --json --start-instances ${all_recommended} \ + --exclude-instances ${recommended_instances.exclude} | \ + jq 'del(.instance_details[].federated_instances, \ + .instance_details[].site_info.all_languages, \ + .instance_details[].site_info.discussion_languages, \ + .instance_details[].site_info.admins, .instance_details[].site_info.taglines, \ + .instance_details[].site_info.custom_emojis)'` ], { cwd: "lemmy-stats-crawler", @@ -62,15 +65,6 @@ try { i => i.site_info.site_view.counts.users_active_month > min_monthly_users, ); - // Exclude unnecessary data - stats.instance_details.forEach(i => { - delete i.site_info.admins; - delete i.site_info.all_languages; - delete i.site_info.discussion_languages; - delete i.site_info.taglines; - delete i.site_info.custom_emojis; - delete i.federated_instances; - }); let stats2 = { stats: stats,