joinlemmy-site/crawl.mjs

90 lines
2.8 KiB
JavaScript
Raw Permalink Normal View History

import fs from "fs";
import { spawn } from "child_process";
const outDir = "src/shared/translations/";
const recommendationsFile = "recommended-instances.json";
const instanceStatsFile = "src/shared/instance_stats.ts";
const min_monthly_users = 5;
fs.mkdirSync(outDir, { recursive: true });
// crawl instance stats
try {
const recommended_instances = JSON.parse(
fs.readFileSync(recommendationsFile, "utf8"),
);
var all_recommended = [];
for (var k in recommended_instances) {
if (k != "exclude") {
all_recommended.push(...recommended_instances[k]);
}
}
// Run Rust crawler with given params. Then pipe output directly into jq, to filter
// out fields with lots of data which we dont need. This is necessary because otherwise
// Javascript may crash when loading the crawl output.
const run = spawn(
"sh",
[
"-c",
`cargo run -- --json --start-instances ${all_recommended} \
--exclude-instances ${recommended_instances.exclude} | \
jq 'del(.instance_details[].federated_instances, \
.instance_details[].site_info.all_languages, \
.instance_details[].site_info.discussion_languages, \
.instance_details[].site_info.admins, .instance_details[].site_info.taglines, \
.instance_details[].site_info.custom_emojis)'`,
],
{
cwd: "lemmy-stats-crawler",
encoding: "utf8",
},
);
let savedOutput = "";
run.stdout.on("data", data => {
const strData = data.toString();
process.stdout.write(strData);
savedOutput += strData;
});
run.stderr.on("data", data => {
const strData = data.toString();
process.stdout.write(strData);
});
New tailwind-based joinlemmy-site (#243) * Starting on new tailwind based joinlemmy-site. * Formatting fix. * Adding follow communities block. * Adding a few more blocks. * Finishing up blocks. * Adding a few more pages. * Adding apps page. * Almost done with donation page. * Adding most of instances page. * Trying to fix CI 1. * Adding navbar and footer. * Adding bottom spacer. * Finishing up more info modal. * Adding icons to main page. * Eruda only in development mode. * Finishing up main page, starting to work on recs. * Adding main images. * Adding images 2. * Starting to add filters. * Finishing up helper modal. * Adding topic icons. * Adding more instances. * Fixing recommended. * Forgot to add instance picker. * Adding world background image. * Adding alexandrite. * Adding funding goal block. * Fix dockerfile. * Upgrading deps. * Fixing package json. * Updating coders, sponsors. * Fixing mobile margins. * Fixing navbar auto-close when clicked. * Removing todo. * Removing some useless instance helper links. * Fixing news titling. * Addressing PR comments. * Updating instance stats. * Fixing class -> className * Fixing sm:max directives. * Make instance images links to their sites. * Use ubuntu font. * Addressing PR comments. * Adding a few more android apps. * Adding thunder and combustible apps. * Fixing z index. * Add a warning alert for closed source apps. * Adding MLMYM app. Fixes #213 * Fixing i18n key. * Adding QR codes for cryptos. Fixes #219 * Addressing PR comments. * Fixing news preview. * Adding registration mode to details modal. Fixes #153 * Filter out bot instances. * Using glide carousel. * Adding glide min css. * Adding donation platform fetching. Fixes #248 * Prettying glide css. * Change dev goal to 3 * Adding sign up button. * Minifying docker image. * Removing sortpack.
2023-10-31 13:31:03 +00:00
run.on("close", _exitCode => {
var stats = JSON.parse(savedOutput);
// Crawl results from all instances include tons of data which needs to be compiled.
// If it is too much data it breaks the build, so we need to exclude as much as possible.
stats.instance_details = stats.instance_details
// Exclude instances with closed registration
.filter(
i => i.site_info.site_view.local_site.registration_mode != "closed",
)
// Exclude instances with few active users
.filter(
i =>
i.site_info.site_view.counts.users_active_month > min_monthly_users,
)
// Exclude large instances which represent more than 30% of all active users
.filter(i => {
let active_users_percent =
i.site_info.site_view.counts.users_active_month /
stats.users_active_month;
return active_users_percent < 0.3;
});
let stats2 = {
stats: stats,
recommended: recommended_instances,
};
let data = `export const instance_stats = \n `;
data += JSON.stringify(stats2, null, 2) + ";";
fs.writeFileSync(instanceStatsFile, data);
});
run.await;
} catch (err) {
console.error(err);
}