Filter instances at crawl time (#206)

* Filter instance list at crawl time

* fixes

* exclude more data

* no max crawl

* prettier

* update instance stats
This commit is contained in:
Nutomic 2023-06-29 22:59:38 +02:00 committed by GitHub
parent 3e23e6b8fa
commit 1d190e0362
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 6854 additions and 214220 deletions

View file

@ -1,11 +1,10 @@
import fs from "fs"; import fs from "fs";
import path from "path";
import { exit } from "process";
import { spawn } from "child_process"; import { spawn } from "child_process";
const outDir = "src/shared/translations/"; const outDir = "src/shared/translations/";
const recommendationsFile = "recommended-instances.json"; const recommendationsFile = "recommended-instances.json";
const instanceStatsFile = "src/shared/instance_stats.ts"; const instanceStatsFile = "src/shared/instance_stats.ts";
const min_monthly_users = 5;
fs.mkdirSync(outDir, { recursive: true }); fs.mkdirSync(outDir, { recursive: true });
@ -50,7 +49,27 @@ try {
}); });
run.on("close", exitCode => { run.on("close", exitCode => {
const stats = JSON.parse(savedOutput); var stats = JSON.parse(savedOutput);
// Crawl results from all instances include tons of data which needs to be compiled.
// If it is too much data it breaks the build, so we need to exclude as much as possible.
stats.instance_details = stats.instance_details
// Exclude instances with closed registration
.filter(
i => i.site_info.site_view.local_site.registration_mode != "closed"
)
// Exclude instances with few active users
.filter(
i => i.site_info.site_view.counts.users_active_month > min_monthly_users
);
// Exclude unnecessary data
stats.instance_details.forEach(i => {
delete i.site_info.admins;
delete i.site_info.all_languages;
delete i.site_info.discussion_languages;
delete i.site_info.taglines;
delete i.site_info.custom_emojis;
delete i.federated_instances;
});
let stats2 = { let stats2 = {
stats: stats, stats: stats,

@ -1 +1 @@
Subproject commit 2eac19b2a21d77d711b8a0b0a534affe0d4d6851 Subproject commit 4553c749cb0fb74d62fd156ebd119dc479693dfd

@ -1 +1 @@
Subproject commit ed6a393329b79fd23964231e26cfa00347b8fc83 Subproject commit 788a3dd6e02fbe153e6d7c6315601ade15637f8c

@ -1 +1 @@
Subproject commit a241fe1255a6363c7ae1ec5a09520c066745e6ce Subproject commit 5a9d44656e2658ab7cb2dbec3fd1bfaf57654533

View file

@ -4,8 +4,6 @@ import { i18n } from "../i18next";
import { instance_stats } from "../instance_stats"; import { instance_stats } from "../instance_stats";
import { numToSI } from "../utils"; import { numToSI } from "../utils";
const min_monthly_users = 5;
export class Instances extends Component<any, any> { export class Instances extends Component<any, any> {
constructor(props: any, context: any) { constructor(props: any, context: any) {
super(props, context); super(props, context);
@ -101,43 +99,33 @@ export class Instances extends Component<any, any> {
<div> <div>
<h2>{header}</h2> <h2>{header}</h2>
<div class="row"> <div class="row">
{instances {instances.map(instance => {
.filter( let domain = instance.domain;
i => let description = instance.site_info.site_view.site.description;
i.site_info.site_view.local_site.registration_mode != "closed" let icon = instance.site_info.site_view.site.icon;
) return (
.filter( <div class="card col-6">
i => <header>
i.site_info.site_view.counts.users_active_month > <div class="row">
min_monthly_users <h4 class="col">{domain}</h4>
)
.map(instance => {
let domain = instance.domain;
let description = instance.site_info.site_view.site.description;
let icon = instance.site_info.site_view.site.icon;
return (
<div class="card col-6">
<header>
<div class="row">
<h4 class="col">{domain}</h4>
</div>
</header>
<div class="is-center">
<img
class="join-banner"
src={icon || "/static/assets/images/lemmy.svg"}
/>
</div> </div>
<br /> </header>
<p class="join-desc">{description}</p> <div class="is-center">
<footer> <img
<a class="button primary" href={`https://${domain}`}> class="join-banner"
{i18n.t("browse_instance")} src={icon || "/static/assets/images/lemmy.svg"}
</a> />
</footer>
</div> </div>
); <br />
})} <p class="join-desc">{description}</p>
<footer>
<a class="button primary" href={`https://${domain}`}>
{i18n.t("browse_instance")}
</a>
</footer>
</div>
);
})}
</div> </div>
</div> </div>
); );

File diff suppressed because one or more lines are too long