2023-06-08 20:09:25 +00:00
|
|
|
import fs from "fs";
|
|
|
|
import { spawn } from "child_process";
|
2022-05-03 12:23:37 +00:00
|
|
|
|
|
|
|
const outDir = "src/shared/translations/";
|
|
|
|
const recommendationsFile = "recommended-instances.json";
|
|
|
|
const instanceStatsFile = "src/shared/instance_stats.ts";
|
2023-06-29 20:59:38 +00:00
|
|
|
const min_monthly_users = 5;
|
2022-05-03 12:23:37 +00:00
|
|
|
|
|
|
|
fs.mkdirSync(outDir, { recursive: true });
|
|
|
|
|
|
|
|
// crawl instance stats
|
|
|
|
try {
|
2023-06-08 20:09:25 +00:00
|
|
|
const recommended_instances = JSON.parse(
|
2023-07-07 14:03:15 +00:00
|
|
|
fs.readFileSync(recommendationsFile, "utf8"),
|
2023-06-08 20:09:25 +00:00
|
|
|
);
|
2022-05-03 12:23:37 +00:00
|
|
|
var all_recommended = [];
|
|
|
|
for (var k in recommended_instances) {
|
|
|
|
if (k != "exclude") {
|
2022-05-17 13:53:57 +00:00
|
|
|
all_recommended.push(...recommended_instances[k]);
|
2022-05-03 12:23:37 +00:00
|
|
|
}
|
|
|
|
}
|
2023-07-11 12:13:03 +00:00
|
|
|
// Run Rust crawler with given params. Then pipe output directly into jq, to filter
|
|
|
|
// out fields with lots of data which we dont need. This is necessary because otherwise
|
|
|
|
// Javascript may crash when loading the crawl output.
|
2023-06-08 20:09:25 +00:00
|
|
|
const run = spawn(
|
2023-07-11 12:13:03 +00:00
|
|
|
"sh",
|
2023-06-08 20:09:25 +00:00
|
|
|
[
|
2023-07-11 12:13:03 +00:00
|
|
|
"-c",
|
|
|
|
`cargo run -- --json --start-instances ${all_recommended} \
|
|
|
|
--exclude-instances ${recommended_instances.exclude} | \
|
|
|
|
jq 'del(.instance_details[].federated_instances, \
|
|
|
|
.instance_details[].site_info.all_languages, \
|
|
|
|
.instance_details[].site_info.discussion_languages, \
|
|
|
|
.instance_details[].site_info.admins, .instance_details[].site_info.taglines, \
|
|
|
|
.instance_details[].site_info.custom_emojis)'`,
|
2023-06-08 20:09:25 +00:00
|
|
|
],
|
|
|
|
{
|
|
|
|
cwd: "lemmy-stats-crawler",
|
|
|
|
encoding: "utf8",
|
2023-07-07 14:03:15 +00:00
|
|
|
},
|
2023-06-08 20:09:25 +00:00
|
|
|
);
|
|
|
|
let savedOutput = "";
|
|
|
|
|
|
|
|
run.stdout.on("data", data => {
|
|
|
|
const strData = data.toString();
|
|
|
|
process.stdout.write(strData);
|
|
|
|
savedOutput += strData;
|
2022-05-03 12:23:37 +00:00
|
|
|
});
|
|
|
|
|
2023-06-08 20:09:25 +00:00
|
|
|
run.stderr.on("data", data => {
|
|
|
|
const strData = data.toString();
|
|
|
|
process.stdout.write(strData);
|
2022-05-17 13:53:57 +00:00
|
|
|
});
|
|
|
|
|
2023-10-08 04:14:56 +00:00
|
|
|
run.on("close", _exitCode => {
|
2023-06-29 20:59:38 +00:00
|
|
|
var stats = JSON.parse(savedOutput);
|
|
|
|
// Crawl results from all instances include tons of data which needs to be compiled.
|
|
|
|
// If it is too much data it breaks the build, so we need to exclude as much as possible.
|
|
|
|
stats.instance_details = stats.instance_details
|
|
|
|
// Exclude instances with closed registration
|
|
|
|
.filter(
|
2023-07-07 14:03:15 +00:00
|
|
|
i => i.site_info.site_view.local_site.registration_mode != "closed",
|
2023-06-29 20:59:38 +00:00
|
|
|
)
|
|
|
|
// Exclude instances with few active users
|
|
|
|
.filter(
|
2023-07-07 14:03:15 +00:00
|
|
|
i =>
|
|
|
|
i.site_info.site_view.counts.users_active_month > min_monthly_users,
|
2023-06-29 20:59:38 +00:00
|
|
|
);
|
2022-05-03 12:23:37 +00:00
|
|
|
|
2022-05-17 13:53:57 +00:00
|
|
|
let stats2 = {
|
|
|
|
stats: stats,
|
2023-06-08 20:09:25 +00:00
|
|
|
recommended: recommended_instances,
|
|
|
|
};
|
2022-05-17 13:53:57 +00:00
|
|
|
|
|
|
|
let data = `export const instance_stats = \n `;
|
|
|
|
data += JSON.stringify(stats2, null, 2) + ";";
|
|
|
|
fs.writeFileSync(instanceStatsFile, data);
|
|
|
|
});
|
2023-06-08 20:09:25 +00:00
|
|
|
run.await;
|
2022-05-03 12:23:37 +00:00
|
|
|
} catch (err) {
|
|
|
|
console.error(err);
|
|
|
|
}
|