Add support for command line parameters
This commit is contained in:
parent
f01e077020
commit
9621b91f7c
6 changed files with 99 additions and 17 deletions
63
Cargo.lock
generated
63
Cargo.lock
generated
|
@ -2,12 +2,32 @@
|
||||||
# It is not intended for manual editing.
|
# It is not intended for manual editing.
|
||||||
version = 3
|
version = 3
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "ansi_term"
|
||||||
|
version = "0.11.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b"
|
||||||
|
dependencies = [
|
||||||
|
"winapi 0.3.9",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "anyhow"
|
name = "anyhow"
|
||||||
version = "1.0.38"
|
version = "1.0.38"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "afddf7f520a80dbf76e6f50a35bca42a2331ef227a28b3b6dc5c2e2338d114b1"
|
checksum = "afddf7f520a80dbf76e6f50a35bca42a2331ef227a28b3b6dc5c2e2338d114b1"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "atty"
|
||||||
|
version = "0.2.14"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
|
||||||
|
dependencies = [
|
||||||
|
"hermit-abi",
|
||||||
|
"libc",
|
||||||
|
"winapi 0.3.9",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "autocfg"
|
name = "autocfg"
|
||||||
version = "1.0.1"
|
version = "1.0.1"
|
||||||
|
@ -68,6 +88,21 @@ version = "1.0.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "clap"
|
||||||
|
version = "2.33.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "37e58ac78573c40708d45522f0d80fa2f01cc4f9b4e2bf749807255454312002"
|
||||||
|
dependencies = [
|
||||||
|
"ansi_term",
|
||||||
|
"atty",
|
||||||
|
"bitflags",
|
||||||
|
"strsim",
|
||||||
|
"textwrap",
|
||||||
|
"unicode-width",
|
||||||
|
"vec_map",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "encoding_rs"
|
name = "encoding_rs"
|
||||||
version = "0.8.28"
|
version = "0.8.28"
|
||||||
|
@ -381,6 +416,7 @@ name = "lemmy-stats-crawler"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
|
"clap",
|
||||||
"futures",
|
"futures",
|
||||||
"reqwest",
|
"reqwest",
|
||||||
"serde",
|
"serde",
|
||||||
|
@ -710,6 +746,12 @@ version = "0.5.2"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d"
|
checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "strsim"
|
||||||
|
version = "0.8.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "syn"
|
name = "syn"
|
||||||
version = "1.0.62"
|
version = "1.0.62"
|
||||||
|
@ -721,6 +763,15 @@ dependencies = [
|
||||||
"unicode-xid",
|
"unicode-xid",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "textwrap"
|
||||||
|
version = "0.11.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060"
|
||||||
|
dependencies = [
|
||||||
|
"unicode-width",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "tinyvec"
|
name = "tinyvec"
|
||||||
version = "1.1.1"
|
version = "1.1.1"
|
||||||
|
@ -862,6 +913,12 @@ dependencies = [
|
||||||
"tinyvec",
|
"tinyvec",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "unicode-width"
|
||||||
|
version = "0.1.8"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "9337591893a19b88d8d87f2cec1e73fad5cdfd10e5a6f349f498ad6ea2ffb1e3"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "unicode-xid"
|
name = "unicode-xid"
|
||||||
version = "0.2.1"
|
version = "0.2.1"
|
||||||
|
@ -886,6 +943,12 @@ dependencies = [
|
||||||
"percent-encoding",
|
"percent-encoding",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "vec_map"
|
||||||
|
version = "0.8.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "version_check"
|
name = "version_check"
|
||||||
version = "0.9.2"
|
version = "0.9.2"
|
||||||
|
|
|
@ -11,3 +11,4 @@ anyhow = "1.0.38"
|
||||||
tokio = { version = "0.2.25", features = ["rt-threaded", "macros"] }
|
tokio = { version = "0.2.25", features = ["rt-threaded", "macros"] }
|
||||||
futures = "0.3.13"
|
futures = "0.3.13"
|
||||||
serde_json = "1.0.64"
|
serde_json = "1.0.64"
|
||||||
|
clap = "2.33.3"
|
||||||
|
|
|
@ -4,12 +4,6 @@ Crawls Lemmy instances using nodeinfo and API endpoints, to generate a list of i
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
|
|
||||||
For testing:
|
|
||||||
```
|
```
|
||||||
cargo run
|
cargo run -- --start-instances baraza.africa,lemmy.ml
|
||||||
```
|
|
||||||
|
|
||||||
For production (hide debug logs):
|
|
||||||
```
|
|
||||||
cargo run 2>/dev/null
|
|
||||||
```
|
```
|
10
src/crawl.rs
10
src/crawl.rs
|
@ -59,14 +59,14 @@ pub struct InstanceDetails {
|
||||||
}
|
}
|
||||||
|
|
||||||
struct CrawlInstance {
|
struct CrawlInstance {
|
||||||
domain: String,
|
domain: String,
|
||||||
depth: i32,
|
depth: i32,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl CrawlInstance {
|
impl CrawlInstance {
|
||||||
pub fn new(domain: String, depth: i32) -> CrawlInstance {
|
pub fn new(domain: String, depth: i32) -> CrawlInstance {
|
||||||
CrawlInstance { domain, depth }
|
CrawlInstance { domain, depth }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn fetch_instance_details(domain: &str) -> Result<InstanceDetails, Error> {
|
async fn fetch_instance_details(domain: &str) -> Result<InstanceDetails, Error> {
|
||||||
|
|
|
@ -5,5 +5,5 @@ pub mod federated_instances;
|
||||||
pub mod node_info;
|
pub mod node_info;
|
||||||
|
|
||||||
pub const REQUEST_TIMEOUT: Duration = Duration::from_secs(10);
|
pub const REQUEST_TIMEOUT: Duration = Duration::from_secs(10);
|
||||||
pub const START_INSTANCES: [&'static str; 1] = ["lemmy.ml"];
|
pub const DEFAULT_START_INSTANCES: &'static str = "lemmy.ml";
|
||||||
pub const MAX_CRAWL_DEPTH: i32 = 2;
|
pub const DEFAULT_MAX_CRAWL_DEPTH: &'static str = "1";
|
||||||
|
|
30
src/main.rs
30
src/main.rs
|
@ -1,14 +1,38 @@
|
||||||
use anyhow::Error;
|
use anyhow::Error;
|
||||||
|
use clap::{App, Arg};
|
||||||
use lemmy_stats_crawler::crawl::{crawl, InstanceDetails};
|
use lemmy_stats_crawler::crawl::{crawl, InstanceDetails};
|
||||||
use lemmy_stats_crawler::{MAX_CRAWL_DEPTH, START_INSTANCES};
|
use lemmy_stats_crawler::{DEFAULT_MAX_CRAWL_DEPTH, DEFAULT_START_INSTANCES};
|
||||||
use serde::Serialize;
|
use serde::Serialize;
|
||||||
|
|
||||||
#[tokio::main]
|
#[tokio::main]
|
||||||
pub async fn main() -> Result<(), Error> {
|
pub async fn main() -> Result<(), Error> {
|
||||||
let start_instances = START_INSTANCES.iter().map(|s| s.to_string()).collect();
|
let matches = App::new("Lemmy Stats Crawler")
|
||||||
|
.arg(
|
||||||
|
Arg::with_name("start-instances")
|
||||||
|
.long("start-instances")
|
||||||
|
.takes_value(true),
|
||||||
|
)
|
||||||
|
.arg(
|
||||||
|
Arg::with_name("max-crawl-depth")
|
||||||
|
.long("max-crawl-depth")
|
||||||
|
.takes_value(true),
|
||||||
|
)
|
||||||
|
.get_matches();
|
||||||
|
let trusted_instances: Vec<String> = matches
|
||||||
|
.value_of("start-instances")
|
||||||
|
.unwrap_or(DEFAULT_START_INSTANCES)
|
||||||
|
.split(",")
|
||||||
|
.map(|s| s.to_string())
|
||||||
|
.collect();
|
||||||
|
let max_crawl_depth: i32 = matches
|
||||||
|
.value_of("max-crawl-depth")
|
||||||
|
.unwrap_or(DEFAULT_MAX_CRAWL_DEPTH)
|
||||||
|
.parse()?;
|
||||||
|
|
||||||
|
let start_instances = trusted_instances.iter().map(|s| s.to_string()).collect();
|
||||||
|
|
||||||
eprintln!("Crawling...");
|
eprintln!("Crawling...");
|
||||||
let instance_details = crawl(start_instances, MAX_CRAWL_DEPTH).await?;
|
let instance_details = crawl(start_instances, max_crawl_depth).await?;
|
||||||
let total_stats = aggregate(instance_details);
|
let total_stats = aggregate(instance_details);
|
||||||
|
|
||||||
println!("{}", serde_json::to_string(&total_stats)?);
|
println!("{}", serde_json::to_string(&total_stats)?);
|
||||||
|
|
Loading…
Reference in a new issue