diff --git a/.drone.yml b/.drone.yml index d929325..2541bef 100644 --- a/.drone.yml +++ b/.drone.yml @@ -24,8 +24,6 @@ steps: - rustup component add clippy - cargo clippy --no-default-features -- -D warnings - cargo clippy --no-default-features --features io-uring -- -D warnings - - cargo clippy --no-default-features --features object-storage -- -D warnings - - cargo clippy --no-default-features --features object-storage,io-uring -- -D warnings trigger: event: diff --git a/Cargo.lock b/Cargo.lock index 33c3d1d..6458c45 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -219,6 +219,15 @@ dependencies = [ "url", ] +[[package]] +name = "addr2line" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9ecd88a8c8378ca913a680cd98f0f13ac67383d35993f86c90a70e3f137816b" +dependencies = [ + "gimli", +] + [[package]] name = "adler" version = "1.0.2" @@ -366,6 +375,21 @@ dependencies = [ "anyhow", ] +[[package]] +name = "backtrace" +version = "0.3.64" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e121dee8023ce33ab248d9ce1493df03c3b38a659b240096fcbd7048ff9c31f" +dependencies = [ + "addr2line", + "cc", + "cfg-if", + "libc", + "miniz_oxide", + "object", + "rustc-demangle", +] + [[package]] name = "base64" version = "0.13.0" @@ -464,17 +488,59 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "clap" -version = "2.34.0" +version = "3.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a0610544180c38b88101fecf2dd634b174a62eef6946f84dfc6a7127512b381c" +checksum = "71c47df61d9e16dc010b55dba1952a57d8c215dbb533fd13cdd13369aac73b1c" dependencies = [ - "ansi_term", "atty", "bitflags", + "clap_derive", + "indexmap", + "lazy_static", + "os_str_bytes", "strsim", + "termcolor", "textwrap", - "unicode-width", - "vec_map", +] + +[[package]] +name = "clap_derive" +version = "3.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a3aab4734e083b809aaf5794e14e756d1c798d2c69c7f7de7a09a2f5214993c1" +dependencies = [ + "heck 0.4.0", + "proc-macro-error", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "color-eyre" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ebf286c900a6d5867aeff75cfee3192857bb7f24b547d4f0df2ed6baa812c90" +dependencies = [ + "backtrace", + "color-spantrace", + "eyre", + "indenter", + "once_cell", + "owo-colors", + "tracing-error", +] + +[[package]] +name = "color-spantrace" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ba75b3d9449ecdccb27ecbc479fdc0b87fa2dd43d2f8298f9bf0e59aacc8dce" +dependencies = [ + "once_cell", + "owo-colors", + "tracing-core", + "tracing-error", ] [[package]] @@ -704,6 +770,16 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "eyre" +version = "0.6.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9289ed2c0440a6536e65119725cf91fc2c6b5e513bfd2e36e1134d7cca6ca12f" +dependencies = [ + "indenter", + "once_cell", +] + [[package]] name = "fake-simd" version = "0.1.2" @@ -896,6 +972,12 @@ dependencies = [ "wasi 0.10.2+wasi-snapshot-preview1", ] +[[package]] +name = "gimli" +version = "0.26.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78cc372d058dcf6d5ecd98510e7fbc9e5aec4d21de70f65fea8fecebcd881bd4" + [[package]] name = "h2" version = "0.3.13" @@ -952,6 +1034,12 @@ dependencies = [ "unicode-segmentation", ] +[[package]] +name = "heck" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2540771e65fc8cb83cd6e8a237f70c319bd5c29f78ed1084ba5d50eeac86f7f9" + [[package]] name = "hermit-abi" version = "0.1.19" @@ -1077,6 +1165,12 @@ dependencies = [ "unicode-normalization", ] +[[package]] +name = "indenter" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce23b50ad8242c51a442f3ff322d56b02f08852c77e4c0b4d3fd684abc89c683" + [[package]] name = "indexmap" version = "1.8.1" @@ -1359,6 +1453,15 @@ dependencies = [ "libc", ] +[[package]] +name = "object" +version = "0.27.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67ac1d3f9a1d3616fd9a60c8d74296f22406a238b6a72f5cc1e6f314df4ffbf9" +dependencies = [ + "memchr", +] + [[package]] name = "once_cell" version = "1.10.0" @@ -1426,6 +1529,21 @@ dependencies = [ "hashbrown 0.12.0", ] +[[package]] +name = "os_str_bytes" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e22443d1643a904602595ba1cd8f7d896afe56d26712531c5ff73a15b2fbf64" +dependencies = [ + "memchr", +] + +[[package]] +name = "owo-colors" +version = "3.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e72e30578e0d0993c8ae20823dd9cff2bc5517d2f586a8aef462a581e8a03eb" + [[package]] name = "parking_lot" version = "0.11.2" @@ -1547,7 +1665,7 @@ dependencies = [ [[package]] name = "pict-rs" -version = "0.3.0" +version = "0.4.0-alpha.1" dependencies = [ "actix-form-data", "actix-rt", @@ -1557,6 +1675,8 @@ dependencies = [ "async-trait", "awc", "base64", + "clap", + "color-eyre", "config", "console-subscriber", "dashmap", @@ -1574,12 +1694,12 @@ dependencies = [ "sha2 0.10.2", "sled", "storage-path-generator", - "structopt", "thiserror", "time", "tokio", "tokio-uring", "tokio-util 0.7.1", + "toml", "tracing", "tracing-actix-web", "tracing-awc", @@ -1680,7 +1800,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "62941722fb675d463659e49c4f3fe1fe792ff24fe5bbaa9c08cd3b98a1c354f5" dependencies = [ "bytes", - "heck", + "heck 0.3.3", "itertools", "lazy_static", "log", @@ -1923,6 +2043,12 @@ dependencies = [ "url", ] +[[package]] +name = "rustc-demangle" +version = "0.1.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ef03e0a2b150c7a90d01faf6254c9c48a41e95fb2a8c2ac1c6f0d2b9aefc342" + [[package]] name = "rustc_version" version = "0.4.0" @@ -2163,33 +2289,9 @@ dependencies = [ [[package]] name = "strsim" -version = "0.8.0" +version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" - -[[package]] -name = "structopt" -version = "0.3.26" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c6b5c64445ba8094a6ab0c3cd2ad323e07171012d9c98b0b15651daf1787a10" -dependencies = [ - "clap", - "lazy_static", - "structopt-derive", -] - -[[package]] -name = "structopt-derive" -version = "0.4.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dcb5ae327f9cc13b68763b5749770cb9e048a99bd9dfdfa58d0cf05d5f64afe0" -dependencies = [ - "heck", - "proc-macro-error", - "proc-macro2", - "quote", - "syn", -] +checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" [[package]] name = "subtle" @@ -2223,14 +2325,20 @@ dependencies = [ ] [[package]] -name = "textwrap" -version = "0.11.0" +name = "termcolor" +version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" +checksum = "bab24d30b911b2376f3a13cc2cd443142f0c81dda04c118693e35b3835757755" dependencies = [ - "unicode-width", + "winapi-util", ] +[[package]] +name = "textwrap" +version = "0.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1141d4d61095b28419e22cb0bbf02755f5e54e0526f97f1e3d1d160e60885fb" + [[package]] name = "thiserror" version = "1.0.30" @@ -2595,6 +2703,16 @@ dependencies = [ "tracing-subscriber", ] +[[package]] +name = "tracing-serde" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc6b213177105856957181934e4920de57730fc69bf42c37ee5bb664d406d9e1" +dependencies = [ + "serde", + "tracing-core", +] + [[package]] name = "tracing-subscriber" version = "0.3.10" @@ -2605,12 +2723,15 @@ dependencies = [ "lazy_static", "matchers", "regex", + "serde", + "serde_json", "sharded-slab", "smallvec", "thread_local", "tracing", "tracing-core", "tracing-log", + "tracing-serde", ] [[package]] @@ -2668,12 +2789,6 @@ version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7e8820f5d777f6224dc4be3632222971ac30164d4a258d595640799554ebfd99" -[[package]] -name = "unicode-width" -version = "0.1.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ed742d4ea2bd1176e236172c8429aaf54486e7ac098db29ffe6529e0ce50973" - [[package]] name = "unicode-xid" version = "0.2.2" @@ -2715,12 +2830,6 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d" -[[package]] -name = "vec_map" -version = "0.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191" - [[package]] name = "version_check" version = "0.9.4" @@ -2871,6 +2980,15 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" +[[package]] +name = "winapi-util" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" +dependencies = [ + "winapi", +] + [[package]] name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" diff --git a/Cargo.toml b/Cargo.toml index c8859c8..63b2f5d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "pict-rs" description = "A simple image hosting service" -version = "0.3.0" +version = "0.4.0-alpha.1" authors = ["asonix "] license = "AGPL-3.0" readme = "README.md" @@ -10,8 +10,7 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [features] -default = ["object-storage"] -object-storage = ["reqwest", "rust-s3"] +default = [] io-uring = [ "actix-rt/io-uring", "actix-server/io-uring", @@ -28,6 +27,8 @@ anyhow = "1.0" async-trait = "0.1.51" awc = { version = "3.0.0", default-features = false, features = ["rustls"] } base64 = "0.13.0" +clap = { version = "3.1.6", features = ["derive"] } +color-eyre = "0.6" config = "0.13.0" console-subscriber = "0.1" dashmap = "5.1.0" @@ -41,30 +42,33 @@ pin-project-lite = "0.2.7" reqwest = { version = "0.11.5", default-features = false, features = [ "rustls-tls", "stream", -], optional = true } +] } rust-s3 = { version = "0.29.0", default-features = false, features = [ "fail-on-err", "with-reqwest", -], optional = true, git = "https://github.com/asonix/rust-s3", branch = "asonix/generic-client" } +], git = "https://github.com/asonix/rust-s3", branch = "asonix/generic-client" } serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" sha2 = "0.10.0" sled = { version = "0.34.7" } storage-path-generator = "0.1.0" -structopt = "0.3.14" thiserror = "1.0" time = { version = "0.3.0", features = ["serde"] } tokio = { version = "1", features = ["full", "tracing"] } tokio-uring = { version = "0.3", optional = true, features = ["bytes"] } tokio-util = { version = "0.7", default-features = false, features = ["codec"] } +toml = "0.5.8" tracing = "0.1.15" tracing-error = "0.2.0" tracing-futures = "0.2.4" tracing-log = "0.1.2" tracing-opentelemetry = "0.17" tracing-subscriber = { version = "0.3.0", features = [ + "ansi", "env-filter", "fmt", + "json", + "registry", "tracing-log", ] } url = { version = "2.2", features = ["serde"] } @@ -73,9 +77,9 @@ uuid = { version = "0.8.2", features = ["v4", "serde"] } [dependencies.tracing-actix-web] version = "0.5.0" default-features = false -features = ["emit_event_on_error", "opentelemetry_0_17"] +features = ["opentelemetry_0_17"] [dependencies.tracing-awc] version = "0.1.0" default-features = false -features = ["emit_event_on_error", "opentelemetry_0_17"] +features = ["opentelemetry_0_17"] diff --git a/README.md b/README.md index a8d1465..e970c18 100644 --- a/README.md +++ b/README.md @@ -9,109 +9,142 @@ _a simple image hosting service_ ## Usage ### Running ``` -pict-rs 0.3.0 +pict-rs 0.4.0-alpha.1 +asonix +A simple image hosting service USAGE: - pict-rs [FLAGS] [OPTIONS] [SUBCOMMAND] - -FLAGS: - -h, --help Prints help information - -s, --skip-validate-imports Whether to skip validating images uploaded via the internal import API - -V, --version Prints version information + pict-rs [OPTIONS] OPTIONS: - -a, --addr The address and port the server binds to. - --api-key - An optional string to be checked on requests to privileged endpoints + -c, --config-file + Path to the pict-rs configuration file - -c, --config-file Path to the pict-rs configuration file - --console-buffer-capacity - Specify the number of events the console subscriber is allowed to buffer + --console-address + Address and port to expose tokio-console metrics - -f, --filters ... - An optional list of filters to permit, supports 'identity', 'thumbnail', 'resize', 'crop', and 'blur' + --console-buffer-capacity + Capacity of the console-subscriber Event Buffer - -i, --image-format - An optional image format to convert all uploaded files into, supports 'jpg', 'png', and 'webp' + -h, --help + Print help information - -m, --max-file-size - Specify the maximum allowed uploaded file size (in Megabytes) + --log-format + Format of logs printed to stdout - --max-image-area Specify the maximum area in pixels allowed in an image - --max-image-height Specify the maximum width in pixels allowed on an image - --max-image-width Specify the maximum width in pixels allowed on an image - --migrate-file Path to a file defining a store migration - -o, --opentelemetry-url - Enable OpenTelemetry Tracing exports to the given OpenTelemetry collector + --log-targets + Log levels to print to stdout, respects RUST_LOG formatting - -p, --path The path to the data directory, e.g. data/ - --sled-cache-capacity - Specify the number of bytes sled is allowed to use for it's cache + --old-db-path + Path to the old pict-rs sled database + --opentelemetry-service-name + Service Name to use for OpenTelemetry + + --opentelemetry-targets + Log levels to use for OpenTelemetry, respects RUST_LOG formatting + + --opentelemetry-url + URL to send OpenTelemetry metrics + + --save-to + File to save the current configuration for reproducible runs + + -V, --version + Print version information SUBCOMMANDS: - file-store - help Prints this message or the help of the given subcommand(s) - s3-store + filesystem Migrate from the provided filesystem storage + help Print this message or the help of the given subcommand(s) + object-storage Migrate from the provided object storage + run Runs the pict-rs web server ``` ``` -pict-rs-file-store 0.3.0 +pict-rs-run +Runs the pict-rs web server USAGE: - pict-rs file-store [OPTIONS] - -FLAGS: - -h, --help Prints help information - -V, --version Prints version information + pict-rs run [OPTIONS] [SUBCOMMAND] OPTIONS: - --path Path in which pict-rs will create it's 'files' directory + -a, --address
+ The address and port to bind the pict-rs web server + + --api-key + The API KEY required to access restricted routes + + -h, --help + Print help information + + --media-enable-silent-video + Whether to enable GIF and silent MP4 uploads. Full videos are unsupported + + --media-filters + Which media filters should be enabled on the `process` endpoint + + --media-format + Enforce uploaded media is transcoded to the provided format + + --media-max-area + The maximum area, in pixels, for uploaded media + + --media-max-file-size + The maximum size, in megabytes, for uploaded media + + --media-max-height + The maximum height, in pixels, for uploaded media + + --media-max-width + The maximum width, in pixels, for uploaded media + + --media-skip-validate-imports + Whether to validate media on the "import" endpoint + + --worker-id + + +SUBCOMMANDS: + filesystem Run pict-rs with filesystem storage + help Print this message or the help of the given subcommand(s) + object-storage Run pict-rs with object storage ``` +Try running `help` commands for more runtime configuration options ``` -pict-rs-s3-store 0.3.0 - -USAGE: - pict-rs s3-store [OPTIONS] --bucket-name --region - -FLAGS: - -h, --help Prints help information - -V, --version Prints version information - -OPTIONS: - --access-key - --bucket-name Name of the bucket in which pict-rs will store images - --region Region in which the bucket exists, can be an http endpoint - --secret-key - --security-token - --session-token +$ pict-rs run filesystem -h +$ pict-rs run object-storage -h +$ pict-rs run filesystem sled -h +$ pict-rs run object-storage sled -h ``` -See [`pict-rs.toml`](https://git.asonix.dog/asonix/pict-rs/src/branch/main/pict-rs.toml) and -[`migrate.toml`](https://git.asonix.dog/asonix/pict-rs/src/branch/main/migrate.toml) for more +See [`pict-rs.toml`](https://git.asonix.dog/asonix/pict-rs/src/branch/main/pict-rs.toml) for more configuration #### Example: Running on all interfaces, port 8080, storing data in /opt/data ``` -$ ./pict-rs -a 0.0.0.0:8080 -p /opt/data +$ ./pict-rs -a 0.0.0.0:8080 -p /opt/data run ``` Running locally, port 9000, storing data in data/, and converting all uploads to PNG ``` -$ ./pict-rs -a 127.0.0.1:9000 -p data/ -f png +$ ./pict-rs -a 127.0.0.1:9000 -p data/ -f png run ``` Running locally, port 8080, storing data in data/, and only allowing the `thumbnail` and `identity` filters ``` -$ ./pict-rs -a 127.0.0.1:8080 -p data/ -w thumbnail identity +$ ./pict-rs -a 127.0.0.1:8080 -p data/ -w thumbnail identity run ``` Running from a configuration file ``` -$ ./pict-rs -c ./pict-rs.toml +$ ./pict-rs -c ./pict-rs.toml run ``` -Migrating between storage backends +Migrating to object storage from filesystem storage (both storages must be configured in pict-rs.toml) ``` -$ ./pict-rs -p ./data --migrate-file ./migrate.toml +$ ./pict-rs -c ./pict-rs.toml --store filesystem migrate-store object-storage +``` +Dumping commandline flags to a toml file +``` +$ ./pict-rs -p data/ --store object-storage --object-storage-bucket-name pict-rs --object-storage-region us-east-1 dump pict-rs.toml ``` #### Docker diff --git a/client-examples/bash/upload.sh b/client-examples/bash/upload.sh new file mode 100755 index 0000000..0520ac0 --- /dev/null +++ b/client-examples/bash/upload.sh @@ -0,0 +1,41 @@ +#!/usr/bin/env bash + +set -xe + +upload_ids=$( + curl \ + -F "images[]=@../cat.jpg" \ + -F "images[]=@../earth.gif" \ + -F "images[]=@../scene.webp" \ + -F "images[]=@../test.png" \ + -F "images[]=@../earth.gif" \ + -F "images[]=@../test.png" \ + -F "images[]=@../cat.jpg" \ + -F "images[]=@../scene.webp" \ + 'http://localhost:8080/image/backgrounded' | \ + jq '.uploads[].upload_id' | \ + sed 's/"//g' +) + +for upload in $(echo $upload_ids) +do + echo "Processing for $upload" + + json=$(curl "http://localhost:8080/image/backgrounded/claim?upload_id=$upload") + delete_token=$(echo $json | jq '.files[0].delete_token' | sed 's/"//g') + filename=$(echo $json | jq '.files[0].file' | sed 's/"//g') + + details=$(curl "http://localhost:8080/image/details/original/$filename") + mime_type=$(echo $details | jq '.content_type' | sed 's/"//g') + + echo "Original mime: $mime_type" + + curl "http://localhost:8080/image/process_backgrounded.webp?src=$filename&resize=200" + sleep 1 + details=$(curl "http://localhost:8080/image/details/process.webp?src=$filename&resize=200") + mime_type=$(echo $details | jq '.content_type' | sed 's/"//g') + + echo "Processed mime: $mime_type" + + curl "http://localhost:8080/image/delete/$delete_token/$filename" +done diff --git a/defaults.toml b/defaults.toml new file mode 100644 index 0000000..c43d4f2 --- /dev/null +++ b/defaults.toml @@ -0,0 +1,40 @@ +[server] +address = '0.0.0.0:8080' +worker_id = 'pict-rs-1' +[tracing.logging] +format = 'normal' +targets = 'warn,tracing_actix_web=info,actix_server=info,actix_web=info' + +[tracing.console] +buffer_capacity = 102400 + +[tracing.opentelemetry] +service_name = 'pict-rs' +targets = 'info' + +[old_db] +path = '/mnt' + +[media] +max_width = 10000 +max_height = 10000 +max_area = 40000000 +max_file_size = 40 +enable_silent_video = true +filters = [ + 'blur', + 'crop', + 'identity', + 'resize', + 'thumbnail', +] +skip_validate_imports = false + +[repo] +type = 'sled' +path = '/mnt/sled-repo' +cache_capacity = 67108864 + +[store] +type = 'filesystem' +path = '/mnt/files' diff --git a/dev.toml b/dev.toml new file mode 100644 index 0000000..04e9397 --- /dev/null +++ b/dev.toml @@ -0,0 +1,40 @@ +[server] +address = '0.0.0.0:8080' +worker_id = 'pict-rs-1' +[tracing.logging] +format = 'normal' +targets = 'warn,tracing_actix_web=info,actix_server=info,actix_web=info' + +[tracing.console] +buffer_capacity = 102400 + +[tracing.opentelemetry] +service_name = 'pict-rs' +targets = 'info' + +[old_db] +path = 'data/' + +[media] +max_width = 10000 +max_height = 10000 +max_area = 40000000 +max_file_size = 40 +enable_silent_video = true +filters = [ + 'blur', + 'crop', + 'identity', + 'resize', + 'thumbnail', +] +skip_validate_imports = false + +[repo] +type = 'sled' +path = 'data/sled-repo' +cache_capacity = 67108864 + +[store] +type = 'filesystem' +path = 'data/files' diff --git a/docker/object-storage/Dockerfile b/docker/object-storage/Dockerfile index 9b07035..fcb932e 100644 --- a/docker/object-storage/Dockerfile +++ b/docker/object-storage/Dockerfile @@ -1,25 +1,17 @@ -FROM archlinux:latest +FROM alpine:edge ARG UID=1000 ARG GID=1000 RUN \ - pacman -Syu --noconfirm \ - perl-image-exiftool \ - imagemagick \ - ffmpeg && \ - groupadd -g 1000 app && \ - useradd -m \ - -d /opt/app \ - -u $UID \ - -g $GID \ - app + apk add exiftool imagemagick ffmpeg && \ + addgroup -g $GID app && \ + adduser -h /opt/app -g "" -G app -u $UID -D app && \ + chown -R app:app /mnt COPY root/ / COPY ./pict-rs.toml /etc/pict-rs.toml -ENV PATH=$PATH:/usr/bin/vendor_perl - WORKDIR /opt/app USER app diff --git a/docker/object-storage/pict-rs.toml b/docker/object-storage/pict-rs.toml index 7d4090c..f710493 100644 --- a/docker/object-storage/pict-rs.toml +++ b/docker/object-storage/pict-rs.toml @@ -1,8 +1,42 @@ +[server] +address = '0.0.0.0:8080' +worker_id = 'pict-rs-1' +[tracing.logging] +format = 'normal' +targets = 'warn,tracing_actix_web=info,actix_server=info,actix_web=info' + +[tracing.console] +buffer_capacity = 102400 + +[tracing.opentelemetry] +service_name = 'pict-rs' +targets = 'info' + +[old_db] path = '/mnt' -addr = '0.0.0.0:8080' + +[media] +max_width = 10000 +max_height = 10000 +max_area = 40000000 +max_file_size = 40 +enable_silent_video = true +filters = [ + 'blur', + 'crop', + 'identity', + 'resize', + 'thumbnail', +] +skip_validate_imports = false + +[repo] +type = 'sled' +path = '/mnt/sled-repo' +cache_capacity = 67108864 [store] -type = 's3_store' +type = 'object_storage' bucket_name = 'pict-rs' region = 'http://minio:9000' access_key = 'Q7Z3AY3JO01N27UNH5IR' diff --git a/pict-rs.toml b/pict-rs.toml index 645d002..16c3182 100644 --- a/pict-rs.toml +++ b/pict-rs.toml @@ -43,13 +43,6 @@ max_image_area = 40_000_000 # in Pixels # default: false skip_validate_imports = false -## Optional: set sled's cache capacity to a given number of bytes -# environment variable: PICTRS_SLED_CACHE_CAPACITY -# default: 67_108_864 (1024 * 1024 * 64) e.g. 64MB -# -# Increasing this value can improve performance by keeping more of the database in RAM -sled_cache_capacity = 67_108_864 # in bytes - ## Optional: enable tokio-console and set the event buffer size # environment variable: PICTRS_CONSOLE_BUFFER_CAPACITY # default: empty @@ -95,58 +88,65 @@ api_key = 'API_KEY' # Not specifying opentelemetry_url means no traces will be exported opentelemetry_url = 'http://localhost:4317/' -## Optional: store definition -# default store: file_store -# -# Not specifying a store means a file_store will be used with the top-level pict-rs' path -[store] -type = "file_store" +## Optional: the data repository to use +# environment variable: PICTRS_REPO +# default: 'sled' +# available options: 'sled' +repo = 'sled' -## Example file store -# [store] -# -# # environment variable: PICTRS_STORE__TYPE -# type = 'file_store' -# -# # Optional: file path -# # environment variable: PICTRS_STORE__PATH -# # default: empty -# # -# # Not specifying path means pict-rs' top-level `path` config is used -# path = './data' +## Optional: the file storage to use +# environment variable: PICTRS_STORE +# default: 'filesystem' +# available options: 'filesystem', 'object_storage' +store = 'filesystem' -## Example s3 store -# [store] + +## Optional: Sled store configration definition +[sled] +## Optional: set sled's cache capacity to a given number of bytes +# environment variable: PICTRS_SLED__SLED_CACHE_CAPACITY +# default: 67_108_864 (1024 * 1024 * 64) e.g. 64MB # -# # environment variable: PICTRS_STORE__TYPE -# type = 's3_store' +# Increasing this value can improve performance by keeping more of the database in RAM +sled_cache_capacity = 67_108_864 # in bytes + + +## Optional: Filesystem storage configuration +[filesystem_storage] +## Optional: set the path for pict-rs filesystem file storage +# environment variable: PICTRS_FILESYSTEM_STORAGE__FILESYSTEM_STORAGE_PATH +# default '${path}/files' +filesystem_storage_path = 'data/files' + + +## Optional: Object Storage configuration +[object_storage] +## Required: bucket name +# environment variable: PICTRS_OBJECT_STORAGE__OBJECT_STORE_BUCKET_NAME +object_store_bucket_name = 'pict-rs' + +## Required: bucket region +# environment variable: PICTRS_OBJECT_STORAGE__OBJECT_STORE_REGION # -# # Required: bucket name -# # environment variable: PICTRS_STORE__BUCKET_NAME -# bucket_name = 'rust_s3' -# -# # Required: bucket region -# # environment variable: PICTRS_STORE__REGION -# # -# # can also be endpoint of local s3 store, e.g. 'http://minio:9000' -# region = 'eu-central-1' -# -# # Optional: bucket access key -# # environment variable: PICTRS_STORE__ACCESS_KEY -# # default: empty -# access_key = 'ACCESS_KEY' -# -# # Optional: bucket secret key -# # environment variable: PICTRS_STORE__SECRET_KEY -# # default: empty -# secret_key = 'SECRET_KEY' -# -# # Optional: bucket security token -# # environment variable: PICTRS_STORE__SECURITY_TOKEN -# # default: empty -# security_token = 'SECURITY_TOKEN' -# -# # Optional: bucket session token -# # environment variable: PICTRS_STORE__SESSION_TOKEN -# # default: empty -# session_token = 'SESSION_TOKEN' +# can also be endpoint of local s3 store, e.g. 'http://minio:9000' +object_store_region = 'eu-central-1' + +## Optional: bucket access key +# environment variable: PICTRS_OBJECT_STORAGE__OBJECT_STORE_ACCESS_KEY +# default: empty +object_store_access_key = '09ODZ3BGBISV4U92JLIM' + +## Optional: bucket secret key +# environment variable: PICTRS_OBJECT_STORAGE__OBJECT_STORE_SECRET_KEY +# default: empty +object_store_secret_key = 'j35YE9RrxhBP0dpiD5mmdXRXvPkEJR4k6zK12q3o' + +## Optional: bucket security token +# environment variable: PICTRS_OBJECT_STORAGE__OBJECT_STORE_SECURITY_TOKEN +# default: empty +object_store_security_token = 'SECURITY_TOKEN' + +## Optional: bucket session token +# environment variable: PICTRS_OBJECT_STORAGE__OBJECT_STORE_SESSION_TOKEN +# default: empty +object_store_session_token = 'SESSION_TOKEN' diff --git a/src/backgrounded.rs b/src/backgrounded.rs new file mode 100644 index 0000000..e696161 --- /dev/null +++ b/src/backgrounded.rs @@ -0,0 +1,92 @@ +use crate::{ + error::Error, + repo::{FullRepo, UploadId, UploadRepo}, + store::Store, +}; +use actix_web::web::Bytes; +use futures_util::{Stream, TryStreamExt}; +use tokio_util::io::StreamReader; + +pub(crate) struct Backgrounded +where + R: FullRepo + 'static, + S: Store, +{ + repo: R, + identifier: Option, + upload_id: Option, +} + +impl Backgrounded +where + R: FullRepo + 'static, + S: Store, +{ + pub(crate) fn disarm(mut self) { + let _ = self.identifier.take(); + let _ = self.upload_id.take(); + } + + pub(crate) fn upload_id(&self) -> Option { + self.upload_id + } + + pub(crate) fn identifier(&self) -> Option<&S::Identifier> { + self.identifier.as_ref() + } + + pub(crate) async fn proxy

(repo: R, store: S, stream: P) -> Result + where + P: Stream>, + { + let mut this = Self { + repo, + identifier: None, + upload_id: Some(UploadId::generate()), + }; + + this.do_proxy(store, stream).await?; + + Ok(this) + } + + async fn do_proxy

(&mut self, store: S, stream: P) -> Result<(), Error> + where + P: Stream>, + { + UploadRepo::create(&self.repo, self.upload_id.expect("Upload id exists")).await?; + + let stream = stream.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e)); + let mut reader = StreamReader::new(Box::pin(stream)); + + let identifier = store.save_async_read(&mut reader).await?; + + self.identifier = Some(identifier); + + Ok(()) + } +} + +impl Drop for Backgrounded +where + R: FullRepo + 'static, + S: Store, +{ + fn drop(&mut self) { + if let Some(identifier) = self.identifier.take() { + let repo = self.repo.clone(); + + actix_rt::spawn(async move { + let _ = crate::queue::cleanup_identifier(&repo, identifier).await; + }); + } + + if let Some(upload_id) = self.upload_id { + let repo = self.repo.clone(); + + actix_rt::spawn(async move { + let _ = repo.claim(upload_id).await; + }); + } + } +} diff --git a/src/concurrent_processor.rs b/src/concurrent_processor.rs index abaeaca..92a8cf5 100644 --- a/src/concurrent_processor.rs +++ b/src/concurrent_processor.rs @@ -1,6 +1,6 @@ use crate::{ + details::Details, error::{Error, UploadError}, - upload_manager::Details, }; use actix_web::web; use dashmap::{mapref::entry::Entry, DashMap}; @@ -16,13 +16,15 @@ use tracing::Span; type OutcomeSender = Sender<(Details, web::Bytes)>; -type ProcessMap = DashMap>; +type ProcessMapKey = (Vec, PathBuf); + +type ProcessMap = DashMap>; static PROCESS_MAP: Lazy = Lazy::new(DashMap::new); struct CancelToken { span: Span, - path: PathBuf, + key: ProcessMapKey, receiver: Option>, } @@ -39,14 +41,17 @@ impl CancelSafeProcessor where F: Future>, { - pub(super) fn new(path: PathBuf, fut: F) -> Self { - let entry = PROCESS_MAP.entry(path.clone()); + pub(super) fn new(hash: &[u8], path: PathBuf, fut: F) -> Self { + let key = (hash.to_vec(), path.clone()); + + let entry = PROCESS_MAP.entry(key.clone()); let (receiver, span) = match entry { Entry::Vacant(vacant) => { vacant.insert(Vec::new()); let span = tracing::info_span!( "Processing image", + hash = &tracing::field::debug(&hash), path = &tracing::field::debug(&path), completed = &tracing::field::Empty, ); @@ -57,6 +62,7 @@ where occupied.get_mut().push(tx); let span = tracing::info_span!( "Waiting for processed image", + hash = &tracing::field::debug(&hash), path = &tracing::field::debug(&path), ); (Some(rx), span) @@ -66,7 +72,7 @@ where CancelSafeProcessor { cancel_token: CancelToken { span, - path, + key, receiver, }, fut, @@ -85,7 +91,7 @@ where let span = &this.cancel_token.span; let receiver = &mut this.cancel_token.receiver; - let path = &this.cancel_token.path; + let key = &this.cancel_token.key; let fut = this.fut; span.in_scope(|| { @@ -95,7 +101,7 @@ where .map(|res| res.map_err(|_| UploadError::Canceled.into())) } else { fut.poll(cx).map(|res| { - let opt = PROCESS_MAP.remove(path); + let opt = PROCESS_MAP.remove(key); res.map(|tup| { if let Some((_, vec)) = opt { for sender in vec { @@ -113,7 +119,7 @@ where impl Drop for CancelToken { fn drop(&mut self) { if self.receiver.is_none() { - let completed = PROCESS_MAP.remove(&self.path).is_none(); + let completed = PROCESS_MAP.remove(&self.key).is_none(); self.span.record("completed", &completed); } } diff --git a/src/config.rs b/src/config.rs index be1e09c..93e8e6a 100644 --- a/src/config.rs +++ b/src/config.rs @@ -1,369 +1,45 @@ -use std::{collections::HashSet, net::SocketAddr, path::PathBuf}; -use structopt::StructOpt; -use url::Url; +use clap::Parser; -use crate::magick::ValidInputType; +mod commandline; +mod defaults; +mod file; +mod primitives; -#[derive(Clone, Debug, StructOpt)] -pub(crate) struct Args { - #[structopt(short, long, help = "Path to the pict-rs configuration file")] - config_file: Option, +use commandline::{Args, Output}; +use config::Config; +use defaults::Defaults; - #[structopt(long, help = "Path to a file defining a store migration")] - migrate_file: Option, +pub(crate) use commandline::Operation; +pub(crate) use file::{ConfigFile as Configuration, OpenTelemetry, Repo, Sled, Tracing}; +pub(crate) use primitives::{Filesystem, ImageFormat, LogFormat, ObjectStorage, Store}; - #[structopt(flatten)] - overrides: Overrides, -} - -fn is_false(b: &bool) -> bool { - !b -} - -#[derive(Clone, Debug, serde::Serialize, structopt::StructOpt)] -#[serde(rename_all = "snake_case")] -pub(crate) struct Overrides { - #[structopt( - short, - long, - help = "Whether to skip validating images uploaded via the internal import API" - )] - #[serde(skip_serializing_if = "is_false")] - skip_validate_imports: bool, - - #[structopt(short, long, help = "The address and port the server binds to.")] - #[serde(skip_serializing_if = "Option::is_none")] - addr: Option, - - #[structopt(short, long, help = "The path to the data directory, e.g. data/")] - #[serde(skip_serializing_if = "Option::is_none")] - path: Option, - - #[structopt( - short, - long, - help = "An optional image format to convert all uploaded files into, supports 'jpg', 'png', and 'webp'" - )] - #[serde(skip_serializing_if = "Option::is_none")] - image_format: Option, - - #[structopt( - short, - long, - help = "An optional list of filters to permit, supports 'identity', 'thumbnail', 'resize', 'crop', and 'blur'" - )] - #[serde(skip_serializing_if = "Option::is_none")] - filters: Option>, - - #[structopt( - short, - long, - help = "Specify the maximum allowed uploaded file size (in Megabytes)" - )] - #[serde(skip_serializing_if = "Option::is_none")] - max_file_size: Option, - - #[structopt(long, help = "Specify the maximum width in pixels allowed on an image")] - #[serde(skip_serializing_if = "Option::is_none")] - max_image_width: Option, - - #[structopt(long, help = "Specify the maximum width in pixels allowed on an image")] - #[serde(skip_serializing_if = "Option::is_none")] - max_image_height: Option, - - #[structopt(long, help = "Specify the maximum area in pixels allowed in an image")] - #[serde(skip_serializing_if = "Option::is_none")] - max_image_area: Option, - - #[structopt( - long, - help = "Specify the number of bytes sled is allowed to use for it's cache" - )] - #[serde(skip_serializing_if = "Option::is_none")] - sled_cache_capacity: Option, - - #[structopt( - long, - help = "Specify the number of events the console subscriber is allowed to buffer" - )] - #[serde(skip_serializing_if = "Option::is_none")] - console_buffer_capacity: Option, - - #[structopt( - long, - help = "An optional string to be checked on requests to privileged endpoints" - )] - #[serde(skip_serializing_if = "Option::is_none")] - api_key: Option, - - #[structopt( - short, - long, - help = "Enable OpenTelemetry Tracing exports to the given OpenTelemetry collector" - )] - #[serde(skip_serializing_if = "Option::is_none")] - opentelemetry_url: Option, - - #[structopt(subcommand)] - #[serde(skip_serializing_if = "Option::is_none")] - store: Option, -} - -impl Overrides { - fn is_default(&self) -> bool { - !self.skip_validate_imports - && self.addr.is_none() - && self.path.is_none() - && self.image_format.is_none() - && self.filters.is_none() - && self.max_file_size.is_none() - && self.max_image_width.is_none() - && self.max_image_height.is_none() - && self.max_image_area.is_none() - && self.sled_cache_capacity.is_none() - && self.console_buffer_capacity.is_none() - && self.api_key.is_none() - && self.opentelemetry_url.is_none() - && self.store.is_none() - } -} - -#[derive(Clone, Debug, serde::Deserialize, serde::Serialize)] -#[serde(rename_all = "snake_case")] -pub(crate) struct Migrate { - from: Store, - to: Store, -} - -impl Migrate { - pub(crate) fn from(&self) -> &Store { - &self.from - } - - pub(crate) fn to(&self) -> &Store { - &self.to - } -} - -#[derive(Clone, Debug, serde::Deserialize, serde::Serialize, structopt::StructOpt)] -#[serde(rename_all = "snake_case")] -#[serde(tag = "type")] -pub(crate) enum Store { - FileStore { - // defaults to {config.path} - #[structopt( - long, - help = "Path in which pict-rs will create it's 'files' directory" - )] - #[serde(skip_serializing_if = "Option::is_none")] - path: Option, - }, - #[cfg(feature = "object-storage")] - S3Store { - #[structopt(long, help = "Name of the bucket in which pict-rs will store images")] - bucket_name: String, - - #[structopt( - long, - help = "Region in which the bucket exists, can be an http endpoint" - )] - region: crate::serde_str::Serde, - - #[serde(skip_serializing_if = "Option::is_none")] - #[structopt(long)] - access_key: Option, - - #[structopt(long)] - #[serde(skip_serializing_if = "Option::is_none")] - secret_key: Option, - - #[structopt(long)] - #[serde(skip_serializing_if = "Option::is_none")] - security_token: Option, - - #[structopt(long)] - #[serde(skip_serializing_if = "Option::is_none")] - session_token: Option, - }, -} - -#[derive(Clone, Debug, serde::Deserialize, serde::Serialize)] -#[serde(rename_all = "snake_case")] -pub(crate) struct Config { - skip_validate_imports: bool, - addr: SocketAddr, - path: PathBuf, - image_format: Option, - filters: Option>, - max_file_size: usize, - max_image_width: usize, - max_image_height: usize, - max_image_area: usize, - sled_cache_capacity: u64, - console_buffer_capacity: Option, - api_key: Option, - opentelemetry_url: Option, - store: Store, -} - -#[derive(serde::Serialize)] -#[serde(rename_all = "snake_case")] -pub(crate) struct Defaults { - skip_validate_imports: bool, - addr: SocketAddr, - max_file_size: usize, - max_image_width: usize, - max_image_height: usize, - max_image_area: usize, - sled_cache_capacity: u64, - store: Store, -} - -impl Defaults { - fn new() -> Self { - Defaults { - skip_validate_imports: false, - addr: ([0, 0, 0, 0], 8080).into(), - max_file_size: 40, - max_image_width: 10_000, - max_image_height: 10_000, - max_image_area: 40_000_000, - sled_cache_capacity: 1024 * 1024 * 64, // 16 times smaller than sled's default of 1GB - store: Store::FileStore { path: None }, - } - } -} - -impl Config { - pub(crate) fn build() -> anyhow::Result { - let args = Args::from_args(); - - if let Some(path) = args.migrate_file { - let migrate_config = config::Config::builder() - .add_source(config::File::from(path)) - .build()?; - let migrate: Migrate = migrate_config.try_deserialize()?; - - crate::MIGRATE.set(migrate).unwrap(); - } - - let mut base_config = - config::Config::builder().add_source(config::Config::try_from(&Defaults::new())?); - - if let Some(path) = args.config_file { - base_config = base_config.add_source(config::File::from(path)); - }; - - if !args.overrides.is_default() { - let merging = config::Config::try_from(&args.overrides)?; - - base_config = base_config.add_source(merging); - } - - let config: Self = base_config - .add_source(config::Environment::with_prefix("PICTRS").separator("__")) - .build()? - .try_deserialize()?; - - Ok(config) - } - - pub(crate) fn store(&self) -> &Store { - &self.store - } - - pub(crate) fn bind_address(&self) -> SocketAddr { - self.addr - } - - pub(crate) fn data_dir(&self) -> PathBuf { - self.path.clone() - } - - pub(crate) fn sled_cache_capacity(&self) -> u64 { - self.sled_cache_capacity - } - - pub(crate) fn console_buffer_capacity(&self) -> Option { - self.console_buffer_capacity - } - - pub(crate) fn format(&self) -> Option { - self.image_format - } - - pub(crate) fn allowed_filters(&self) -> Option> { - self.filters.as_ref().map(|wl| wl.iter().cloned().collect()) - } - - pub(crate) fn validate_imports(&self) -> bool { - !self.skip_validate_imports - } - - pub(crate) fn max_file_size(&self) -> usize { - self.max_file_size - } - - pub(crate) fn max_width(&self) -> usize { - self.max_image_width - } - - pub(crate) fn max_height(&self) -> usize { - self.max_image_height - } - - pub(crate) fn max_area(&self) -> usize { - self.max_image_area - } - - pub(crate) fn api_key(&self) -> Option<&str> { - self.api_key.as_deref() - } - - pub(crate) fn opentelemetry_url(&self) -> Option<&Url> { - self.opentelemetry_url.as_ref() - } -} - -#[derive(Debug, thiserror::Error)] -#[error("Invalid format supplied, {0}")] -pub(crate) struct FormatError(String); - -#[derive(Copy, Clone, Debug, serde::Deserialize, serde::Serialize)] -#[serde(rename_all = "snake_case")] -pub(crate) enum Format { - Jpeg, - Png, - Webp, -} - -impl Format { - pub(crate) fn as_magick_format(&self) -> &'static str { - match self { - Format::Jpeg => "JPEG", - Format::Png => "PNG", - Format::Webp => "WEBP", - } - } - - pub(crate) fn as_hint(&self) -> Option { - match self { - Format::Jpeg => Some(ValidInputType::Jpeg), - Format::Png => Some(ValidInputType::Png), - Format::Webp => Some(ValidInputType::Webp), - } - } -} - -impl std::str::FromStr for Format { - type Err = FormatError; - - fn from_str(s: &str) -> Result { - match s { - "png" => Ok(Format::Png), - "jpg" => Ok(Format::Jpeg), - "webp" => Ok(Format::Webp), - other => Err(FormatError(other.to_string())), - } - } +pub(crate) fn configure() -> color_eyre::Result<(Configuration, Operation)> { + let Output { + config_format, + operation, + save_to, + config_file, + } = Args::parse().into_output(); + + let config = Config::builder().add_source(config::Config::try_from(&Defaults::default())?); + + let config = if let Some(config_file) = config_file { + config.add_source(config::File::from(config_file)) + } else { + config + }; + + let built = config + .add_source(config::Environment::with_prefix("PICTRS").separator("__")) + .add_source(config::Config::try_from(&config_format)?) + .build()?; + + let config: Configuration = built.try_deserialize()?; + + if let Some(save_to) = save_to { + let output = toml::to_string_pretty(&config)?; + std::fs::write(save_to, output)?; + } + + Ok((config, operation)) } diff --git a/src/config/commandline.rs b/src/config/commandline.rs new file mode 100644 index 0000000..c335cbb --- /dev/null +++ b/src/config/commandline.rs @@ -0,0 +1,576 @@ +use crate::{ + config::primitives::{ImageFormat, LogFormat, Targets}, + serde_str::Serde, +}; +use clap::{Parser, Subcommand}; +use std::{net::SocketAddr, path::PathBuf}; +use url::Url; + +impl Args { + pub(super) fn into_output(self) -> Output { + let Args { + config_file, + old_db_path, + log_format, + log_targets, + console_address, + console_buffer_capacity, + opentelemetry_url, + opentelemetry_service_name, + opentelemetry_targets, + save_to, + command, + } = self; + + let old_db = OldDb { path: old_db_path }; + + let tracing = Tracing { + logging: Logging { + format: log_format, + targets: log_targets.map(Serde::new), + }, + console: Console { + address: console_address, + buffer_capacity: console_buffer_capacity, + }, + opentelemetry: OpenTelemetry { + url: opentelemetry_url, + service_name: opentelemetry_service_name, + targets: opentelemetry_targets.map(Serde::new), + }, + }; + + match command { + Command::Run(Run { + address, + api_key, + worker_id, + media_skip_validate_imports, + media_max_width, + media_max_height, + media_max_area, + media_max_file_size, + media_enable_silent_video, + media_filters, + media_format, + store, + }) => { + let server = Server { + address, + api_key, + worker_id, + }; + let media = Media { + skip_validate_imports: media_skip_validate_imports, + max_width: media_max_width, + max_height: media_max_height, + max_area: media_max_area, + max_file_size: media_max_file_size, + enable_silent_video: media_enable_silent_video, + filters: media_filters, + format: media_format, + }; + let operation = Operation::Run; + + match store { + Some(RunStore::Filesystem(RunFilesystem { system, repo })) => { + let store = Some(Store::Filesystem(system)); + Output { + config_format: ConfigFormat { + server, + old_db, + tracing, + media, + store, + repo, + }, + operation, + config_file, + save_to, + } + } + Some(RunStore::ObjectStorage(RunObjectStorage { storage, repo })) => { + let store = Some(Store::ObjectStorage(storage)); + Output { + config_format: ConfigFormat { + server, + old_db, + tracing, + media, + store, + repo, + }, + operation, + config_file, + save_to, + } + } + None => Output { + config_format: ConfigFormat { + server, + old_db, + tracing, + media, + store: None, + repo: None, + }, + operation, + config_file, + save_to, + }, + } + } + Command::MigrateStore(migrate_store) => { + let server = Server::default(); + let media = Media::default(); + + match migrate_store { + MigrateStore::Filesystem(MigrateFilesystem { from, to }) => match to { + MigrateStoreInner::Filesystem(MigrateFilesystemInner { to, repo }) => { + Output { + config_format: ConfigFormat { + server, + old_db, + tracing, + media, + store: None, + repo, + }, + operation: Operation::MigrateStore { + from: from.into(), + to: to.into(), + }, + config_file, + save_to, + } + } + MigrateStoreInner::ObjectStorage(MigrateObjectStorageInner { + to, + repo, + }) => Output { + config_format: ConfigFormat { + server, + old_db, + tracing, + media, + store: None, + repo, + }, + operation: Operation::MigrateStore { + from: from.into(), + to: to.into(), + }, + config_file, + save_to, + }, + }, + MigrateStore::ObjectStorage(MigrateObjectStorage { from, to }) => match to { + MigrateStoreInner::Filesystem(MigrateFilesystemInner { to, repo }) => { + Output { + config_format: ConfigFormat { + server, + old_db, + tracing, + media, + store: None, + repo, + }, + operation: Operation::MigrateStore { + from: from.into(), + to: to.into(), + }, + config_file, + save_to, + } + } + MigrateStoreInner::ObjectStorage(MigrateObjectStorageInner { + to, + repo, + }) => Output { + config_format: ConfigFormat { + server, + old_db, + tracing, + media, + store: None, + repo, + }, + operation: Operation::MigrateStore { + from: from.into(), + to: to.into(), + }, + config_file, + save_to, + }, + }, + } + } + } + } +} + +pub(super) struct Output { + pub(super) config_format: ConfigFormat, + pub(super) operation: Operation, + pub(super) save_to: Option, + pub(super) config_file: Option, +} + +#[allow(clippy::large_enum_variant)] +#[derive(Clone)] +pub(crate) enum Operation { + Run, + MigrateStore { + from: crate::config::primitives::Store, + to: crate::config::primitives::Store, + }, +} + +#[derive(Debug, Default, serde::Serialize)] +#[serde(rename_all = "snake_case")] +pub(super) struct ConfigFormat { + server: Server, + old_db: OldDb, + tracing: Tracing, + media: Media, + #[serde(skip_serializing_if = "Option::is_none")] + repo: Option, + #[serde(skip_serializing_if = "Option::is_none")] + store: Option, +} + +#[derive(Debug, Default, serde::Serialize)] +#[serde(rename_all = "snake_case")] +struct Server { + #[serde(skip_serializing_if = "Option::is_none")] + address: Option, + #[serde(skip_serializing_if = "Option::is_none")] + worker_id: Option, + #[serde(skip_serializing_if = "Option::is_none")] + api_key: Option, +} + +#[derive(Debug, Default, serde::Serialize)] +#[serde(rename_all = "snake_case")] +struct Tracing { + logging: Logging, + console: Console, + opentelemetry: OpenTelemetry, +} + +#[derive(Debug, Default, serde::Serialize)] +#[serde(rename_all = "snake_case")] +struct Logging { + #[serde(skip_serializing_if = "Option::is_none")] + format: Option, + #[serde(skip_serializing_if = "Option::is_none")] + targets: Option>, +} + +#[derive(Debug, Default, serde::Serialize)] +#[serde(rename_all = "snake_case")] +struct Console { + #[serde(skip_serializing_if = "Option::is_none")] + address: Option, + #[serde(skip_serializing_if = "Option::is_none")] + buffer_capacity: Option, +} + +#[derive(Debug, Default, serde::Serialize)] +#[serde(rename_all = "snake_case")] +struct OpenTelemetry { + #[serde(skip_serializing_if = "Option::is_none")] + url: Option, + #[serde(skip_serializing_if = "Option::is_none")] + service_name: Option, + #[serde(skip_serializing_if = "Option::is_none")] + targets: Option>, +} + +#[derive(Debug, Default, serde::Serialize)] +#[serde(rename_all = "snake_case")] +struct OldDb { + #[serde(skip_serializing_if = "Option::is_none")] + path: Option, +} + +#[derive(Debug, Default, serde::Serialize)] +#[serde(rename_all = "snake_case")] +struct Media { + #[serde(skip_serializing_if = "Option::is_none")] + max_width: Option, + #[serde(skip_serializing_if = "Option::is_none")] + max_height: Option, + #[serde(skip_serializing_if = "Option::is_none")] + max_area: Option, + #[serde(skip_serializing_if = "Option::is_none")] + max_file_size: Option, + #[serde(skip_serializing_if = "Option::is_none")] + enable_silent_video: Option, + #[serde(skip_serializing_if = "Option::is_none")] + filters: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + format: Option, + #[serde(skip_serializing_if = "Option::is_none")] + skip_validate_imports: Option, +} + +/// Run the pict-rs application +#[derive(Debug, Parser)] +#[clap(author, version, about, long_about = None)] +pub(super) struct Args { + /// Path to the pict-rs configuration file + #[clap(short, long)] + config_file: Option, + + /// Path to the old pict-rs sled database + #[clap(long)] + old_db_path: Option, + + /// Format of logs printed to stdout + #[clap(long)] + log_format: Option, + /// Log levels to print to stdout, respects RUST_LOG formatting + #[clap(long)] + log_targets: Option, + + /// Address and port to expose tokio-console metrics + #[clap(long)] + console_address: Option, + /// Capacity of the console-subscriber Event Buffer + #[clap(long)] + console_buffer_capacity: Option, + + /// URL to send OpenTelemetry metrics + #[clap(long)] + opentelemetry_url: Option, + /// Service Name to use for OpenTelemetry + #[clap(long)] + opentelemetry_service_name: Option, + /// Log levels to use for OpenTelemetry, respects RUST_LOG formatting + #[clap(long)] + opentelemetry_targets: Option, + + /// File to save the current configuration for reproducible runs + #[clap(long)] + save_to: Option, + + #[clap(subcommand)] + command: Command, +} + +#[derive(Debug, Subcommand)] +enum Command { + /// Runs the pict-rs web server + Run(Run), + + /// Migrates from one provided media store to another + #[clap(flatten)] + MigrateStore(MigrateStore), +} + +#[derive(Debug, Parser)] +struct Run { + /// The address and port to bind the pict-rs web server + #[clap(short, long)] + address: Option, + + /// The API KEY required to access restricted routes + #[clap(long)] + api_key: Option, + + #[clap(long)] + worker_id: Option, + + /// Whether to validate media on the "import" endpoint + #[clap(long)] + media_skip_validate_imports: Option, + /// The maximum width, in pixels, for uploaded media + #[clap(long)] + media_max_width: Option, + /// The maximum height, in pixels, for uploaded media + #[clap(long)] + media_max_height: Option, + /// The maximum area, in pixels, for uploaded media + #[clap(long)] + media_max_area: Option, + /// The maximum size, in megabytes, for uploaded media + #[clap(long)] + media_max_file_size: Option, + /// Whether to enable GIF and silent MP4 uploads. Full videos are unsupported + #[clap(long)] + media_enable_silent_video: Option, + /// Which media filters should be enabled on the `process` endpoint + #[clap(long)] + media_filters: Option>, + /// Enforce uploaded media is transcoded to the provided format + #[clap(long)] + media_format: Option, + + #[clap(subcommand)] + store: Option, +} + +/// Configure the provided storage +#[derive(Clone, Debug, Subcommand, serde::Serialize)] +#[serde(rename_all = "snake_case")] +#[serde(tag = "type")] +enum Store { + /// configure filesystem storage + Filesystem(Filesystem), + + /// configure object storage + ObjectStorage(ObjectStorage), +} + +/// Run pict-rs with the provided storage +#[derive(Debug, Subcommand)] +enum RunStore { + /// Run pict-rs with filesystem storage + Filesystem(RunFilesystem), + + /// Run pict-rs with object storage + ObjectStorage(RunObjectStorage), +} + +/// Configure the pict-rs storage migration +#[derive(Debug, Subcommand)] +enum MigrateStore { + /// Migrate from the provided filesystem storage + Filesystem(MigrateFilesystem), + + /// Migrate from the provided object storage + ObjectStorage(MigrateObjectStorage), +} + +/// Configure the destination storage for pict-rs storage migration +#[derive(Debug, Subcommand)] +enum MigrateStoreInner { + /// Migrate to the provided filesystem storage + Filesystem(MigrateFilesystemInner), + + /// Migrate to the provided object storage + ObjectStorage(MigrateObjectStorageInner), +} + +/// Migrate pict-rs' storage from the provided filesystem storage +#[derive(Debug, Parser)] +struct MigrateFilesystem { + #[clap(flatten)] + from: crate::config::primitives::Filesystem, + + #[clap(subcommand)] + to: MigrateStoreInner, +} + +/// Migrate pict-rs' storage to the provided filesystem storage +#[derive(Debug, Parser)] +struct MigrateFilesystemInner { + #[clap(flatten)] + to: crate::config::primitives::Filesystem, + + #[clap(subcommand)] + repo: Option, +} + +/// Migrate pict-rs' storage from the provided object storage +#[derive(Debug, Parser)] +struct MigrateObjectStorage { + #[clap(flatten)] + from: crate::config::primitives::ObjectStorage, + + #[clap(subcommand)] + to: MigrateStoreInner, +} + +/// Migrate pict-rs' storage to the provided object storage +#[derive(Debug, Parser)] +struct MigrateObjectStorageInner { + #[clap(flatten)] + to: crate::config::primitives::ObjectStorage, + + #[clap(subcommand)] + repo: Option, +} + +/// Run pict-rs with the provided filesystem storage +#[derive(Debug, Parser)] +struct RunFilesystem { + #[clap(flatten)] + system: Filesystem, + + #[clap(subcommand)] + repo: Option, +} + +/// Run pict-rs with the provided object storage +#[derive(Debug, Parser)] +struct RunObjectStorage { + #[clap(flatten)] + storage: ObjectStorage, + + #[clap(subcommand)] + repo: Option, +} + +/// Configuration for data repositories +#[derive(Debug, Subcommand, serde::Serialize)] +#[serde(rename_all = "snake_case")] +#[serde(tag = "type")] +enum Repo { + /// Run pict-rs with the provided sled-backed data repository + Sled(Sled), +} + +/// Configuration for filesystem media storage +#[derive(Clone, Debug, Parser, serde::Serialize)] +#[serde(rename_all = "snake_case")] +struct Filesystem { + /// The path to store uploaded media + #[clap(short, long)] + path: Option, +} + +/// Configuration for Object Storage +#[derive(Clone, Debug, Parser, serde::Serialize)] +#[serde(rename_all = "snake_case")] +struct ObjectStorage { + /// The bucket in which to store media + #[clap(short, long)] + bucket_name: Option, + + /// The region the bucket is located in + #[clap(short, long)] + region: Option>, + + /// The Access Key for the user accessing the bucket + #[clap(short, long)] + access_key: Option, + + /// The secret key for the user accessing the bucket + #[clap(short, long)] + secret_key: Option, + + /// The security token for accessing the bucket + #[clap(long)] + security_token: Option, + + /// The session token for accessing the bucket + #[clap(long)] + session_token: Option, +} + +/// Configuration for the sled-backed data repository +#[derive(Debug, Parser, serde::Serialize)] +#[serde(rename_all = "snake_case")] +struct Sled { + /// The path to store the sled database + #[clap(short, long)] + #[serde(skip_serializing_if = "Option::is_none")] + path: Option, + + /// The cache capacity, in bytes, allowed to sled for in-memory operations + #[clap(short, long)] + #[serde(skip_serializing_if = "Option::is_none")] + cache_capacity: Option, +} diff --git a/src/config/defaults.rs b/src/config/defaults.rs new file mode 100644 index 0000000..bcedb0d --- /dev/null +++ b/src/config/defaults.rs @@ -0,0 +1,192 @@ +use crate::{ + config::primitives::{LogFormat, Targets}, + serde_str::Serde, +}; +use std::{net::SocketAddr, path::PathBuf}; + +#[derive(Clone, Debug, Default, serde::Serialize)] +#[serde(rename_all = "snake_case")] +pub(crate) struct Defaults { + server: ServerDefaults, + tracing: TracingDefaults, + old_db: OldDbDefaults, + media: MediaDefaults, + repo: RepoDefaults, + store: StoreDefaults, +} + +#[derive(Clone, Debug, serde::Serialize)] +#[serde(rename_all = "snake_case")] +struct ServerDefaults { + address: SocketAddr, + worker_id: String, +} + +#[derive(Clone, Debug, Default, serde::Serialize)] +#[serde(rename_all = "snake_case")] +struct TracingDefaults { + logging: LoggingDefaults, + + console: ConsoleDefaults, + + opentelemetry: OpenTelemetryDefaults, +} + +#[derive(Clone, Debug, serde::Serialize)] +#[serde(rename_all = "snake_case")] +struct LoggingDefaults { + format: LogFormat, + targets: Serde, +} + +#[derive(Clone, Debug, serde::Serialize)] +#[serde(rename_all = "snake_case")] +struct ConsoleDefaults { + buffer_capacity: usize, +} + +#[derive(Clone, Debug, serde::Serialize)] +#[serde(rename_all = "snake_case")] +struct OpenTelemetryDefaults { + service_name: String, + targets: Serde, +} + +#[derive(Clone, Debug, serde::Serialize)] +#[serde(rename_all = "snake_case")] +struct OldDbDefaults { + path: PathBuf, +} + +#[derive(Clone, Debug, serde::Serialize)] +#[serde(rename_all = "snake_case")] +struct MediaDefaults { + max_width: usize, + max_height: usize, + max_area: usize, + max_file_size: usize, + enable_silent_video: bool, + filters: Vec, + skip_validate_imports: bool, +} + +#[derive(Clone, Debug, serde::Serialize)] +#[serde(rename_all = "snake_case")] +#[serde(tag = "type")] +enum RepoDefaults { + Sled(SledDefaults), +} + +#[derive(Clone, Debug, serde::Serialize)] +#[serde(rename_all = "snake_case")] +struct SledDefaults { + path: PathBuf, + cache_capacity: u64, +} + +#[derive(Clone, Debug, serde::Serialize)] +#[serde(rename_all = "snake_case")] +#[serde(tag = "type")] +enum StoreDefaults { + Filesystem(FilesystemDefaults), +} + +#[derive(Clone, Debug, serde::Serialize)] +#[serde(rename_all = "snake_case")] +struct FilesystemDefaults { + path: PathBuf, +} + +impl Default for ServerDefaults { + fn default() -> Self { + ServerDefaults { + address: "0.0.0.0:8080".parse().expect("Valid address string"), + worker_id: String::from("pict-rs-1"), + } + } +} + +impl Default for LoggingDefaults { + fn default() -> Self { + LoggingDefaults { + format: LogFormat::Normal, + targets: "warn,tracing_actix_web=info,actix_web=info,actix_server=info" + .parse() + .expect("Valid targets string"), + } + } +} + +impl Default for ConsoleDefaults { + fn default() -> Self { + ConsoleDefaults { + buffer_capacity: 1024 * 100, + } + } +} + +impl Default for OpenTelemetryDefaults { + fn default() -> Self { + OpenTelemetryDefaults { + service_name: String::from("pict-rs"), + targets: "info".parse().expect("Valid targets string"), + } + } +} + +impl Default for OldDbDefaults { + fn default() -> Self { + OldDbDefaults { + path: PathBuf::from(String::from("/mnt")), + } + } +} + +impl Default for MediaDefaults { + fn default() -> Self { + MediaDefaults { + max_width: 10_000, + max_height: 10_000, + max_area: 40_000_000, + max_file_size: 40, + enable_silent_video: true, + filters: vec![ + "identity".into(), + "thumbnail".into(), + "resize".into(), + "crop".into(), + "blur".into(), + ], + skip_validate_imports: false, + } + } +} + +impl Default for RepoDefaults { + fn default() -> Self { + Self::Sled(SledDefaults::default()) + } +} + +impl Default for SledDefaults { + fn default() -> Self { + SledDefaults { + path: PathBuf::from(String::from("/mnt/sled-repo")), + cache_capacity: 1024 * 1024 * 64, + } + } +} + +impl Default for StoreDefaults { + fn default() -> Self { + Self::Filesystem(FilesystemDefaults::default()) + } +} + +impl Default for FilesystemDefaults { + fn default() -> Self { + Self { + path: PathBuf::from(String::from("/mnt/files")), + } + } +} diff --git a/src/config/file.rs b/src/config/file.rs new file mode 100644 index 0000000..d49edc2 --- /dev/null +++ b/src/config/file.rs @@ -0,0 +1,113 @@ +use crate::{ + config::primitives::{ImageFormat, LogFormat, Store, Targets}, + serde_str::Serde, +}; +use std::{collections::BTreeSet, net::SocketAddr, path::PathBuf}; +use url::Url; + +#[derive(Clone, Debug, serde::Deserialize, serde::Serialize)] +#[serde(rename_all = "snake_case")] +pub(crate) struct ConfigFile { + pub(crate) server: Server, + + pub(crate) tracing: Tracing, + + pub(crate) old_db: OldDb, + + pub(crate) media: Media, + + pub(crate) repo: Repo, + + pub(crate) store: Store, +} + +#[derive(Clone, Debug, serde::Deserialize, serde::Serialize)] +#[serde(rename_all = "snake_case")] +#[serde(tag = "type")] +pub(crate) enum Repo { + Sled(Sled), +} + +#[derive(Clone, Debug, serde::Deserialize, serde::Serialize)] +#[serde(rename_all = "snake_case")] +pub(crate) struct Server { + pub(crate) address: SocketAddr, + + pub(crate) worker_id: String, + + #[serde(skip_serializing_if = "Option::is_none")] + pub(crate) api_key: Option, +} + +#[derive(Clone, Debug, serde::Deserialize, serde::Serialize)] +#[serde(rename_all = "snake_case")] +pub(crate) struct Tracing { + pub(crate) logging: Logging, + + pub(crate) console: Console, + + pub(crate) opentelemetry: OpenTelemetry, +} + +#[derive(Clone, Debug, serde::Deserialize, serde::Serialize)] +#[serde(rename_all = "snake_case")] +pub(crate) struct Logging { + pub(crate) format: LogFormat, + + pub(crate) targets: Serde, +} + +#[derive(Clone, Debug, serde::Deserialize, serde::Serialize)] +#[serde(rename_all = "snake_case")] +pub(crate) struct OpenTelemetry { + #[serde(skip_serializing_if = "Option::is_none")] + pub(crate) url: Option, + + pub(crate) service_name: String, + + pub(crate) targets: Serde, +} + +#[derive(Clone, Debug, serde::Deserialize, serde::Serialize)] +#[serde(rename_all = "snake_case")] +pub(crate) struct Console { + #[serde(skip_serializing_if = "Option::is_none")] + pub(crate) address: Option, + + pub(crate) buffer_capacity: usize, +} + +#[derive(Clone, Debug, serde::Deserialize, serde::Serialize)] +#[serde(rename_all = "snake_case")] +pub(crate) struct OldDb { + pub(crate) path: PathBuf, +} + +#[derive(Clone, Debug, serde::Deserialize, serde::Serialize)] +#[serde(rename_all = "snake_case")] +pub(crate) struct Media { + pub(crate) max_width: usize, + + pub(crate) max_height: usize, + + pub(crate) max_area: usize, + + pub(crate) max_file_size: usize, + + pub(crate) enable_silent_video: bool, + + pub(crate) filters: BTreeSet, + + #[serde(skip_serializing_if = "Option::is_none")] + pub(crate) format: Option, + + pub(crate) skip_validate_imports: bool, +} + +#[derive(Clone, Debug, serde::Deserialize, serde::Serialize)] +#[serde(rename_all = "snake_case")] +pub(crate) struct Sled { + pub(crate) path: PathBuf, + + pub(crate) cache_capacity: u64, +} diff --git a/src/config/primitives.rs b/src/config/primitives.rs new file mode 100644 index 0000000..f48027d --- /dev/null +++ b/src/config/primitives.rs @@ -0,0 +1,251 @@ +use crate::magick::ValidInputType; +use crate::serde_str::Serde; +use clap::ArgEnum; +use std::{fmt::Display, path::PathBuf, str::FromStr}; +use tracing::Level; + +#[derive( + Clone, + Copy, + Debug, + PartialEq, + Eq, + PartialOrd, + Ord, + Hash, + serde::Deserialize, + serde::Serialize, + ArgEnum, +)] +#[serde(rename_all = "snake_case")] +pub(crate) enum LogFormat { + Compact, + Json, + Normal, + Pretty, +} + +#[derive( + Clone, + Copy, + Debug, + PartialEq, + Eq, + PartialOrd, + Ord, + Hash, + serde::Deserialize, + serde::Serialize, + ArgEnum, +)] +#[serde(rename_all = "snake_case")] +pub(crate) enum ImageFormat { + Jpeg, + Webp, + Png, +} + +#[derive(Clone, Debug)] +pub(crate) struct Targets { + pub(crate) targets: tracing_subscriber::filter::Targets, +} + +/// Configuration for filesystem media storage +#[derive(Clone, Debug, serde::Deserialize, serde::Serialize, clap::Parser)] +#[serde(rename_all = "snake_case")] +pub(crate) struct Filesystem { + /// Path to store media + #[clap(short, long)] + pub(crate) path: PathBuf, +} + +/// Configuration for object media storage +#[derive(Clone, Debug, serde::Deserialize, serde::Serialize, clap::Parser)] +#[serde(rename_all = "snake_case")] +pub(crate) struct ObjectStorage { + /// The bucket in which to store media + #[clap(short, long)] + pub(crate) bucket_name: String, + + /// The region the bucket is located in + #[clap(short, long)] + pub(crate) region: Serde, + + /// The Access Key for the user accessing the bucket + #[clap(short, long)] + pub(crate) access_key: String, + + /// The secret key for the user accessing the bucket + #[clap(short, long)] + pub(crate) secret_key: String, + + /// The security token for accessing the bucket + #[clap(long)] + #[serde(skip_serializing_if = "Option::is_none")] + pub(crate) security_token: Option, + + /// The session token for accessing the bucket + #[clap(long)] + #[serde(skip_serializing_if = "Option::is_none")] + pub(crate) session_token: Option, +} + +#[derive(Clone, Debug, serde::Deserialize, serde::Serialize)] +#[serde(rename_all = "snake_case")] +#[serde(tag = "type")] +pub(crate) enum Store { + Filesystem(Filesystem), + + ObjectStorage(ObjectStorage), +} + +impl ImageFormat { + pub(crate) fn as_hint(self) -> Option { + Some(ValidInputType::from_format(self)) + } + + pub(crate) fn as_magick_format(self) -> &'static str { + match self { + Self::Jpeg => "JPEG", + Self::Png => "PNG", + Self::Webp => "WEBP", + } + } +} + +impl From for Store { + fn from(f: Filesystem) -> Self { + Self::Filesystem(f) + } +} + +impl From for Store { + fn from(o: ObjectStorage) -> Self { + Self::ObjectStorage(o) + } +} + +impl FromStr for Targets { + type Err = ::Err; + + fn from_str(s: &str) -> Result { + Ok(Targets { + targets: s.parse()?, + }) + } +} + +impl Display for Targets { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let targets = self + .targets + .iter() + .map(|(path, level)| format!("{}={}", path, level)) + .collect::>() + .join(","); + + let max_level = [ + Level::TRACE, + Level::DEBUG, + Level::INFO, + Level::WARN, + Level::ERROR, + ] + .iter() + .fold(None, |found, level| { + if found.is_none() + && self + .targets + .would_enable("not_a_real_target_so_nothing_can_conflict", level) + { + Some(level.to_string().to_lowercase()) + } else { + found + } + }); + + if let Some(level) = max_level { + if !targets.is_empty() { + write!(f, "{},{}", level, targets) + } else { + write!(f, "{}", level) + } + } else if !targets.is_empty() { + write!(f, "{}", targets) + } else { + Ok(()) + } + } +} + +impl FromStr for ImageFormat { + type Err = String; + + fn from_str(s: &str) -> Result { + match s.to_lowercase().as_str() { + "jpeg" | "jpg" => Ok(Self::Jpeg), + "png" => Ok(Self::Png), + "webp" => Ok(Self::Webp), + other => Err(format!("Invalid variant: {}", other)), + } + } +} + +impl FromStr for LogFormat { + type Err = String; + + fn from_str(s: &str) -> Result { + for variant in Self::value_variants() { + if variant.to_possible_value().unwrap().matches(s, false) { + return Ok(*variant); + } + } + Err(format!("Invalid variant: {}", s)) + } +} + +impl Display for ImageFormat { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.to_possible_value() + .expect("no values are skipped") + .get_name() + .fmt(f) + } +} + +impl Display for LogFormat { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.to_possible_value() + .expect("no values are skipped") + .get_name() + .fmt(f) + } +} + +#[cfg(test)] +mod tests { + use super::{Serde, Targets}; + + #[test] + fn builds_info_targets() { + let t: Serde = "info".parse().unwrap(); + + println!("{:?}", t); + + assert_eq!(t.to_string(), "info"); + } + + #[test] + fn builds_specific_targets() { + let t: Serde = "pict_rs=info".parse().unwrap(); + + assert_eq!(t.to_string(), "pict_rs=info"); + } + + #[test] + fn builds_warn_and_specific_targets() { + let t: Serde = "warn,pict_rs=info".parse().unwrap(); + + assert_eq!(t.to_string(), "warn,pict_rs=info"); + } +} diff --git a/src/details.rs b/src/details.rs new file mode 100644 index 0000000..37ddc50 --- /dev/null +++ b/src/details.rs @@ -0,0 +1,63 @@ +use crate::{error::Error, magick::ValidInputType, serde_str::Serde, store::Store}; +use actix_web::web; + +#[derive(Clone, Debug, serde::Deserialize, serde::Serialize)] +pub(crate) struct Details { + width: usize, + height: usize, + content_type: Serde, + created_at: time::OffsetDateTime, +} + +impl Details { + pub(crate) fn is_motion(&self) -> bool { + self.content_type.type_() == "video" + || self.content_type.type_() == "image" && self.content_type.subtype() == "gif" + } + + #[tracing::instrument("Details from bytes", skip(input))] + pub(crate) async fn from_bytes( + input: web::Bytes, + hint: Option, + ) -> Result { + let details = crate::magick::details_bytes(input, hint).await?; + + Ok(Details::now( + details.width, + details.height, + details.mime_type, + )) + } + + #[tracing::instrument("Details from store")] + pub(crate) async fn from_store( + store: S, + identifier: S::Identifier, + expected_format: Option, + ) -> Result { + let details = crate::magick::details_store(store, identifier, expected_format).await?; + + Ok(Details::now( + details.width, + details.height, + details.mime_type, + )) + } + + pub(crate) fn now(width: usize, height: usize, content_type: mime::Mime) -> Self { + Details { + width, + height, + content_type: Serde::new(content_type), + created_at: time::OffsetDateTime::now_utc(), + } + } + + pub(crate) fn content_type(&self) -> mime::Mime { + (*self.content_type).clone() + } + + pub(crate) fn system_time(&self) -> std::time::SystemTime { + self.created_at.into() + } +} diff --git a/src/error.rs b/src/error.rs index deec103..52a1c70 100644 --- a/src/error.rs +++ b/src/error.rs @@ -1,27 +1,31 @@ use actix_web::{http::StatusCode, HttpResponse, ResponseError}; -use tracing_error::SpanTrace; +use color_eyre::Report; pub(crate) struct Error { - context: SpanTrace, - kind: UploadError, + inner: color_eyre::Report, +} + +impl Error { + fn kind(&self) -> Option<&UploadError> { + self.inner.downcast_ref() + } } impl std::fmt::Debug for Error { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - writeln!(f, "{}", self.kind) + std::fmt::Debug::fmt(&self.inner, f) } } impl std::fmt::Display for Error { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - writeln!(f, "{}", self.kind)?; - std::fmt::Display::fmt(&self.context, f) + std::fmt::Display::fmt(&self.inner, f) } } impl std::error::Error for Error { fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { - self.kind.source() + self.inner.source() } } @@ -31,46 +35,38 @@ where { fn from(error: T) -> Self { Error { - kind: UploadError::from(error), - context: SpanTrace::capture(), - } - } -} - -impl From> for Error { - fn from(e: sled::transaction::TransactionError) -> Self { - match e { - sled::transaction::TransactionError::Abort(t) => t, - sled::transaction::TransactionError::Storage(e) => e.into(), + inner: Report::from(UploadError::from(error)), } } } #[derive(Debug, thiserror::Error)] pub(crate) enum UploadError { - #[error("Couln't upload file, {0}")] + #[error("Couln't upload file")] Upload(#[from] actix_form_data::Error), - #[error("Error in DB, {0}")] - Db(#[from] sled::Error), + #[error("Error in DB")] + Sled(#[from] crate::repo::sled::SledError), - #[error("Error parsing string, {0}")] + #[error("Error in old sled DB")] + OldSled(#[from] ::sled::Error), + + #[error("Error parsing string")] ParseString(#[from] std::string::FromUtf8Error), - #[error("Error interacting with filesystem, {0}")] + #[error("Error interacting with filesystem")] Io(#[from] std::io::Error), - #[error(transparent)] + #[error("Error generating path")] PathGenerator(#[from] storage_path_generator::PathError), - #[error(transparent)] + #[error("Error stripping prefix")] StripPrefix(#[from] std::path::StripPrefixError), - #[error(transparent)] + #[error("Error storing file")] FileStore(#[from] crate::store::file_store::FileError), - #[cfg(feature = "object-storage")] - #[error(transparent)] + #[error("Error storing object")] ObjectStore(#[from] crate::store::object_store::ObjectError), #[error("Provided process path is invalid")] @@ -88,44 +84,44 @@ pub(crate) enum UploadError { #[error("Requested a file that doesn't exist")] MissingAlias, - #[error("Alias directed to missing file")] - MissingFile, - #[error("Provided token did not match expected token")] InvalidToken, #[error("Unsupported image format")] UnsupportedFormat, + #[error("Gif uploads are not enabled")] + SilentVideoDisabled, + #[error("Invalid media dimensions")] Dimensions, #[error("Unable to download image, bad response {0}")] Download(actix_web::http::StatusCode), - #[error("Unable to download image, {0}")] + #[error("Unable to download image")] Payload(#[from] awc::error::PayloadError), #[error("Unable to send request, {0}")] SendRequest(String), - #[error("No filename provided in request")] - MissingFilename, - #[error("Error converting Path to String")] Path, #[error("Tried to save an image with an already-taken name")] DuplicateAlias, - #[error("{0}")] + #[error("Error in json")] Json(#[from] serde_json::Error), #[error("Range header not satisfiable")] Range, - #[error(transparent)] - Limit(#[from] super::LimitError), + #[error("Hit limit")] + Limit(#[from] crate::stream::LimitError), + + #[error("Response timeout")] + Timeout(#[from] crate::stream::TimeoutError), } impl From for UploadError { @@ -148,24 +144,40 @@ impl From for UploadError { impl ResponseError for Error { fn status_code(&self) -> StatusCode { - match self.kind { - UploadError::DuplicateAlias - | UploadError::Limit(_) - | UploadError::NoFiles - | UploadError::Upload(_) => StatusCode::BAD_REQUEST, - UploadError::MissingAlias | UploadError::MissingFilename => StatusCode::NOT_FOUND, - UploadError::InvalidToken => StatusCode::FORBIDDEN, - UploadError::Range => StatusCode::RANGE_NOT_SATISFIABLE, + match self.kind() { + Some( + UploadError::DuplicateAlias + | UploadError::Limit(_) + | UploadError::NoFiles + | UploadError::Upload(_) + | UploadError::UnsupportedFormat + | UploadError::SilentVideoDisabled, + ) => StatusCode::BAD_REQUEST, + Some( + UploadError::Sled(crate::repo::sled::SledError::Missing) + | UploadError::MissingAlias, + ) => StatusCode::NOT_FOUND, + Some(UploadError::InvalidToken) => StatusCode::FORBIDDEN, + Some(UploadError::Range) => StatusCode::RANGE_NOT_SATISFIABLE, _ => StatusCode::INTERNAL_SERVER_ERROR, } } fn error_response(&self) -> HttpResponse { - HttpResponse::build(self.status_code()) - .content_type("application/json") - .body( - serde_json::to_string(&serde_json::json!({ "msg": self.kind.to_string() })) - .unwrap_or_else(|_| r#"{"msg":"Request failed"}"#.to_string()), - ) + if let Some(kind) = self.kind() { + HttpResponse::build(self.status_code()) + .content_type("application/json") + .body( + serde_json::to_string(&serde_json::json!({ "msg": kind.to_string() })) + .unwrap_or_else(|_| r#"{"msg":"Request failed"}"#.to_string()), + ) + } else { + HttpResponse::build(self.status_code()) + .content_type("application/json") + .body( + serde_json::to_string(&serde_json::json!({ "msg": "Unknown error" })) + .unwrap_or_else(|_| r#"{"msg":"Request failed"}"#.to_string()), + ) + } } } diff --git a/src/ffmpeg.rs b/src/ffmpeg.rs index 2206a8c..3dd9124 100644 --- a/src/ffmpeg.rs +++ b/src/ffmpeg.rs @@ -101,10 +101,7 @@ pub(crate) async fn thumbnail( from: S::Identifier, input_format: InputFormat, format: ThumbnailFormat, -) -> Result -where - Error: From, -{ +) -> Result { let input_file = crate::tmp_file::tmp_file(Some(input_format.to_ext())); let input_file_str = input_file.to_str().ok_or(UploadError::Path)?; crate::store::file_store::safe_create_parent(&input_file).await?; diff --git a/src/file.rs b/src/file.rs index 6f011e7..34348a5 100644 --- a/src/file.rs +++ b/src/file.rs @@ -8,7 +8,7 @@ pub(crate) use tokio_file::File; mod tokio_file { use crate::{store::file_store::FileError, Either}; use actix_web::web::{Bytes, BytesMut}; - use futures_util::stream::{Stream, StreamExt}; + use futures_util::{Stream, StreamExt, TryStreamExt}; use std::{io::SeekFrom, path::Path}; use tokio::io::{AsyncRead, AsyncReadExt, AsyncSeekExt, AsyncWrite, AsyncWriteExt}; use tokio_util::codec::{BytesCodec, FramedRead}; @@ -91,38 +91,7 @@ mod tokio_file { (None, None) => Either::right(self.inner), }; - Ok(BytesFreezer::new(FramedRead::new(obj, BytesCodec::new()))) - } - } - - pin_project_lite::pin_project! { - struct BytesFreezer { - #[pin] - inner: S, - } - } - - impl BytesFreezer { - fn new(inner: S) -> Self { - BytesFreezer { inner } - } - } - - impl Stream for BytesFreezer - where - S: Stream> + Unpin, - { - type Item = Result; - - fn poll_next( - mut self: std::pin::Pin<&mut Self>, - cx: &mut std::task::Context<'_>, - ) -> std::task::Poll> { - let this = self.as_mut().project(); - - this.inner - .poll_next(cx) - .map(|opt| opt.map(|res| res.map(BytesMut::freeze))) + Ok(FramedRead::new(obj, BytesCodec::new()).map_ok(BytesMut::freeze)) } } } diff --git a/src/generate.rs b/src/generate.rs new file mode 100644 index 0000000..bffac6f --- /dev/null +++ b/src/generate.rs @@ -0,0 +1,93 @@ +use crate::{ + concurrent_processor::CancelSafeProcessor, + config::ImageFormat, + details::Details, + error::Error, + ffmpeg::{InputFormat, ThumbnailFormat}, + repo::{Alias, FullRepo}, + store::Store, +}; +use actix_web::web::Bytes; +use std::path::PathBuf; +use tokio::io::AsyncReadExt; + +pub(crate) async fn generate( + repo: &R, + store: &S, + format: ImageFormat, + alias: Alias, + thumbnail_path: PathBuf, + thumbnail_args: Vec, + hash: R::Bytes, +) -> Result<(Details, Bytes), Error> { + let process_fut = process( + repo, + store, + format, + alias, + thumbnail_path.clone(), + thumbnail_args, + hash.clone(), + ); + + let (details, bytes) = + CancelSafeProcessor::new(hash.as_ref(), thumbnail_path, process_fut).await?; + + Ok((details, bytes)) +} + +async fn process( + repo: &R, + store: &S, + format: ImageFormat, + alias: Alias, + thumbnail_path: PathBuf, + thumbnail_args: Vec, + hash: R::Bytes, +) -> Result<(Details, Bytes), Error> { + let permit = crate::PROCESS_SEMAPHORE.acquire().await?; + + let identifier = if let Some(identifier) = repo + .still_identifier_from_alias::(&alias) + .await? + { + identifier + } else { + let identifier = repo.identifier(hash.clone()).await?; + let mut reader = crate::ffmpeg::thumbnail( + store.clone(), + identifier, + InputFormat::Mp4, + ThumbnailFormat::Jpeg, + ) + .await?; + let motion_identifier = store.save_async_read(&mut reader).await?; + + repo.relate_motion_identifier(hash.clone(), &motion_identifier) + .await?; + + motion_identifier + }; + + let mut processed_reader = + crate::magick::process_image_store_read(store.clone(), identifier, thumbnail_args, format)?; + + let mut vec = Vec::new(); + processed_reader.read_to_end(&mut vec).await?; + let bytes = Bytes::from(vec); + + drop(permit); + + let details = Details::from_bytes(bytes.clone(), format.as_hint()).await?; + + let identifier = store.save_bytes(bytes.clone()).await?; + repo.relate_details(&identifier, &details).await?; + repo.relate_variant_identifier( + hash, + thumbnail_path.to_string_lossy().to_string(), + &identifier, + ) + .await?; + + Ok((details, bytes)) as Result<(Details, Bytes), Error> +} diff --git a/src/ingest.rs b/src/ingest.rs new file mode 100644 index 0000000..613bac3 --- /dev/null +++ b/src/ingest.rs @@ -0,0 +1,214 @@ +use crate::{ + error::{Error, UploadError}, + magick::ValidInputType, + repo::{Alias, AliasRepo, DeleteToken, FullRepo, HashRepo}, + store::Store, + CONFIG, +}; +use actix_web::web::{Bytes, BytesMut}; +use futures_util::{Stream, StreamExt}; +use sha2::{Digest, Sha256}; +use tracing::debug; + +mod hasher; +use hasher::Hasher; + +pub(crate) struct Session +where + R: FullRepo + 'static, + S: Store, +{ + repo: R, + hash: Option>, + alias: Option, + identifier: Option, +} + +pub(crate) async fn ingest( + repo: &R, + store: &S, + stream: impl Stream>, + declared_alias: Option, + should_validate: bool, +) -> Result, Error> +where + R: FullRepo + 'static, + S: Store, +{ + let permit = crate::PROCESS_SEMAPHORE.acquire().await; + + let mut bytes_mut = BytesMut::new(); + + futures_util::pin_mut!(stream); + + debug!("Reading stream to memory"); + while let Some(res) = stream.next().await { + let bytes = res?; + bytes_mut.extend_from_slice(&bytes); + } + + debug!("Validating bytes"); + let (input_type, validated_reader) = crate::validate::validate_image_bytes( + bytes_mut.freeze(), + CONFIG.media.format, + CONFIG.media.enable_silent_video, + should_validate, + ) + .await?; + + let mut hasher_reader = Hasher::new(validated_reader, Sha256::new()); + + let identifier = store.save_async_read(&mut hasher_reader).await?; + + drop(permit); + + let mut session = Session { + repo: repo.clone(), + hash: None, + alias: None, + identifier: Some(identifier.clone()), + }; + + let hash = hasher_reader.finalize_reset().await?; + + session.hash = Some(hash.clone()); + + debug!("Saving upload"); + + save_upload(repo, store, &hash, &identifier).await?; + + debug!("Adding alias"); + + if let Some(alias) = declared_alias { + session.add_existing_alias(&hash, alias).await? + } else { + session.create_alias(&hash, input_type).await?; + } + + Ok(session) +} + +async fn save_upload( + repo: &R, + store: &S, + hash: &[u8], + identifier: &S::Identifier, +) -> Result<(), Error> +where + S: Store, + R: FullRepo, +{ + if HashRepo::create(repo, hash.to_vec().into()).await?.is_err() { + store.remove(identifier).await?; + return Ok(()); + } + + repo.relate_identifier(hash.to_vec().into(), identifier) + .await?; + + Ok(()) +} + +impl Session +where + R: FullRepo + 'static, + S: Store, +{ + pub(crate) fn disarm(&mut self) { + let _ = self.alias.take(); + let _ = self.identifier.take(); + } + + pub(crate) fn alias(&self) -> Option<&Alias> { + self.alias.as_ref() + } + + pub(crate) async fn delete_token(&self) -> Result { + let alias = self.alias.clone().ok_or(UploadError::MissingAlias)?; + + debug!("Generating delete token"); + let delete_token = DeleteToken::generate(); + + debug!("Saving delete token"); + let res = self.repo.relate_delete_token(&alias, &delete_token).await?; + + if res.is_err() { + let delete_token = self.repo.delete_token(&alias).await?; + debug!("Returning existing delete token, {:?}", delete_token); + return Ok(delete_token); + } + + debug!("Returning new delete token, {:?}", delete_token); + Ok(delete_token) + } + + async fn add_existing_alias(&mut self, hash: &[u8], alias: Alias) -> Result<(), Error> { + AliasRepo::create(&self.repo, &alias) + .await? + .map_err(|_| UploadError::DuplicateAlias)?; + + self.alias = Some(alias.clone()); + + self.repo.relate_hash(&alias, hash.to_vec().into()).await?; + self.repo.relate_alias(hash.to_vec().into(), &alias).await?; + + Ok(()) + } + + async fn create_alias(&mut self, hash: &[u8], input_type: ValidInputType) -> Result<(), Error> { + debug!("Alias gen loop"); + + loop { + let alias = Alias::generate(input_type.as_ext().to_string()); + + if AliasRepo::create(&self.repo, &alias).await?.is_ok() { + self.alias = Some(alias.clone()); + + self.repo.relate_hash(&alias, hash.to_vec().into()).await?; + self.repo.relate_alias(hash.to_vec().into(), &alias).await?; + + return Ok(()); + } + + debug!("Alias exists, regenerating"); + } + } +} + +impl Drop for Session +where + R: FullRepo + 'static, + S: Store, +{ + fn drop(&mut self) { + if let Some(hash) = self.hash.take() { + let repo = self.repo.clone(); + actix_rt::spawn(async move { + let _ = crate::queue::cleanup_hash(&repo, hash.into()).await; + }); + } + + if let Some(alias) = self.alias.take() { + let repo = self.repo.clone(); + + actix_rt::spawn(async move { + if let Ok(token) = repo.delete_token(&alias).await { + let _ = crate::queue::cleanup_alias(&repo, alias, token).await; + } else { + let token = DeleteToken::generate(); + if let Ok(Ok(())) = repo.relate_delete_token(&alias, &token).await { + let _ = crate::queue::cleanup_alias(&repo, alias, token).await; + } + } + }); + } + + if let Some(identifier) = self.identifier.take() { + let repo = self.repo.clone(); + + actix_rt::spawn(async move { + let _ = crate::queue::cleanup_identifier(&repo, identifier).await; + }); + } + } +} diff --git a/src/upload_manager/hasher.rs b/src/ingest/hasher.rs similarity index 79% rename from src/upload_manager/hasher.rs rename to src/ingest/hasher.rs index 0ae9a8b..e1a1551 100644 --- a/src/upload_manager/hasher.rs +++ b/src/ingest/hasher.rs @@ -16,10 +16,6 @@ pin_project_lite::pin_project! { } } -pub(super) struct Hash { - inner: Vec, -} - impl Hasher where D: Digest + FixedOutputReset + Send + 'static, @@ -31,27 +27,13 @@ where } } - pub(super) async fn finalize_reset(self) -> Result { + pub(super) async fn finalize_reset(self) -> Result, Error> { let mut hasher = self.hasher; - let hash = web::block(move || Hash::new(hasher.finalize_reset().to_vec())).await?; + let hash = web::block(move || hasher.finalize_reset().to_vec()).await?; Ok(hash) } } -impl Hash { - fn new(inner: Vec) -> Self { - Hash { inner } - } - - pub(super) fn as_slice(&self) -> &[u8] { - &self.inner - } - - pub(super) fn into_inner(self) -> Vec { - self.inner - } -} - impl AsyncRead for Hasher where I: AsyncRead, @@ -77,12 +59,6 @@ where } } -impl std::fmt::Debug for Hash { - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - write!(f, "{}", base64::encode(&self.inner)) - } -} - #[cfg(test)] mod test { use super::Hasher; @@ -127,6 +103,6 @@ mod test { hasher.update(vec); let correct_hash = hasher.finalize_reset().to_vec(); - assert_eq!(hash.inner, correct_hash); + assert_eq!(hash, correct_hash); } } diff --git a/src/init_tracing.rs b/src/init_tracing.rs index 65a5d51..c586b41 100644 --- a/src/init_tracing.rs +++ b/src/init_tracing.rs @@ -1,3 +1,4 @@ +use crate::config::{LogFormat, OpenTelemetry, Tracing}; use console_subscriber::ConsoleLayer; use opentelemetry::{ sdk::{propagation::TraceContextPropagator, Resource}, @@ -8,74 +9,75 @@ use tracing::subscriber::set_global_default; use tracing_error::ErrorLayer; use tracing_log::LogTracer; use tracing_subscriber::{ - filter::Targets, fmt::format::FmtSpan, layer::SubscriberExt, registry::LookupSpan, Layer, - Registry, + fmt::format::FmtSpan, layer::SubscriberExt, registry::LookupSpan, Layer, Registry, }; -use url::Url; -pub(super) fn init_tracing( - servic_name: &'static str, - opentelemetry_url: Option<&Url>, - buffer_capacity: Option, -) -> anyhow::Result<()> { +pub(super) fn init_tracing(tracing: &Tracing) -> color_eyre::Result<()> { + color_eyre::install()?; + LogTracer::init()?; opentelemetry::global::set_text_map_propagator(TraceContextPropagator::new()); - let targets = std::env::var("RUST_LOG") - .unwrap_or_else(|_| "info".into()) - .parse::()?; + let format_layer = + tracing_subscriber::fmt::layer().with_span_events(FmtSpan::NEW | FmtSpan::CLOSE); - let format_layer = tracing_subscriber::fmt::layer() - .with_span_events(FmtSpan::NEW | FmtSpan::CLOSE) - .with_filter(targets.clone()); + match tracing.logging.format { + LogFormat::Compact => with_format(format_layer.compact(), tracing), + LogFormat::Json => with_format(format_layer.json(), tracing), + LogFormat::Normal => with_format(format_layer, tracing), + LogFormat::Pretty => with_format(format_layer.pretty(), tracing), + } +} + +fn with_format(format_layer: F, tracing: &Tracing) -> color_eyre::Result<()> +where + F: Layer + Send + Sync, +{ + let format_layer = format_layer.with_filter(tracing.logging.targets.targets.clone()); let subscriber = Registry::default() .with(format_layer) .with(ErrorLayer::default()); - if let Some(buffer_capacity) = buffer_capacity { + if let Some(address) = tracing.console.address { let console_layer = ConsoleLayer::builder() .with_default_env() - .event_buffer_capacity(buffer_capacity) - .server_addr(([0, 0, 0, 0], 6669)) + .event_buffer_capacity(tracing.console.buffer_capacity) + .server_addr(address) .spawn(); let subscriber = subscriber.with(console_layer); - with_otel(subscriber, targets, servic_name, opentelemetry_url) + with_subscriber(subscriber, &tracing.opentelemetry) } else { - with_otel(subscriber, targets, servic_name, opentelemetry_url) + with_subscriber(subscriber, &tracing.opentelemetry) } } -fn with_otel( - subscriber: S, - targets: Targets, - servic_name: &'static str, - opentelemetry_url: Option<&Url>, -) -> anyhow::Result<()> +fn with_subscriber(subscriber: S, otel: &OpenTelemetry) -> color_eyre::Result<()> where S: SubscriberExt + Send + Sync, for<'a> S: LookupSpan<'a>, { - if let Some(url) = opentelemetry_url { - let tracer = - opentelemetry_otlp::new_pipeline() - .tracing() - .with_trace_config(opentelemetry::sdk::trace::config().with_resource( - Resource::new(vec![KeyValue::new("service.name", servic_name)]), - )) - .with_exporter( - opentelemetry_otlp::new_exporter() - .tonic() - .with_endpoint(url.as_str()), - ) - .install_batch(opentelemetry::runtime::Tokio)?; + if let Some(url) = otel.url.as_ref() { + let tracer = opentelemetry_otlp::new_pipeline() + .tracing() + .with_trace_config( + opentelemetry::sdk::trace::config().with_resource(Resource::new(vec![ + KeyValue::new("service.name", otel.service_name.clone()), + ])), + ) + .with_exporter( + opentelemetry_otlp::new_exporter() + .tonic() + .with_endpoint(url.as_str()), + ) + .install_batch(opentelemetry::runtime::Tokio)?; let otel_layer = tracing_opentelemetry::layer() .with_tracer(tracer) - .with_filter(targets); + .with_filter(otel.targets.as_ref().targets.clone()); let subscriber = subscriber.with(otel_layer); diff --git a/src/magick.rs b/src/magick.rs index 327052e..e0c1c0d 100644 --- a/src/magick.rs +++ b/src/magick.rs @@ -1,7 +1,8 @@ use crate::{ - config::Format, + config::ImageFormat, error::{Error, UploadError}, process::Process, + repo::Alias, store::Store, }; use actix_web::web::Bytes; @@ -11,8 +12,9 @@ use tokio::{ }; use tracing::instrument; -pub(crate) fn details_hint(filename: &str) -> Option { - if filename.ends_with(".mp4") { +pub(crate) fn details_hint(alias: &Alias) -> Option { + let ext = alias.extension()?; + if ext.ends_with(".mp4") { Some(ValidInputType::Mp4) } else { None @@ -61,11 +63,11 @@ impl ValidInputType { matches!(self, Self::Mp4) } - pub(crate) fn from_format(format: Format) -> Self { + pub(crate) fn from_format(format: ImageFormat) -> Self { match format { - Format::Jpeg => ValidInputType::Jpeg, - Format::Png => ValidInputType::Png, - Format::Webp => ValidInputType::Webp, + ImageFormat::Jpeg => ValidInputType::Jpeg, + ImageFormat::Png => ValidInputType::Png, + ImageFormat::Webp => ValidInputType::Webp, } } } @@ -85,7 +87,7 @@ pub(crate) fn clear_metadata_bytes_read(input: Bytes) -> std::io::Result std::io::Result { let process = Process::run( "magick", @@ -137,14 +139,12 @@ pub(crate) async fn details_bytes( parse_details(s) } -pub(crate) async fn details_store( +#[tracing::instrument(skip(store))] +pub(crate) async fn details_store( store: S, identifier: S::Identifier, hint: Option, -) -> Result -where - Error: From, -{ +) -> Result { if hint.as_ref().map(|h| h.is_mp4()).unwrap_or(false) { let input_file = crate::tmp_file::tmp_file(Some(".mp4")); let input_file_str = input_file.to_str().ok_or(UploadError::Path)?; @@ -180,6 +180,7 @@ where parse_details(s) } +#[tracing::instrument] pub(crate) async fn details_file(path_str: &str) -> Result { let process = Process::run( "magick", @@ -254,11 +255,11 @@ pub(crate) async fn input_type_bytes(input: Bytes) -> Result( +pub(crate) fn process_image_store_read( store: S, identifier: S::Identifier, args: Vec, - format: Format, + format: ImageFormat, ) -> std::io::Result { let command = "magick"; let convert_args = ["convert", "-"]; @@ -277,9 +278,9 @@ pub(crate) fn process_image_store_read( impl Details { #[instrument(name = "Validating input type")] fn validate_input(&self) -> Result { - if self.width > crate::CONFIG.max_width() - || self.height > crate::CONFIG.max_height() - || self.width * self.height > crate::CONFIG.max_area() + if self.width > crate::CONFIG.media.max_width + || self.height > crate::CONFIG.media.max_height + || self.width * self.height > crate::CONFIG.media.max_area { return Err(UploadError::Dimensions.into()); } diff --git a/src/main.rs b/src/main.rs index 4b2a059..6c2d6a5 100644 --- a/src/main.rs +++ b/src/main.rs @@ -7,56 +7,63 @@ use actix_web::{ use awc::Client; use futures_util::{ stream::{empty, once}, - Stream, + Stream, StreamExt, TryStreamExt, }; -use once_cell::sync::{Lazy, OnceCell}; +use once_cell::sync::Lazy; use std::{ - collections::HashSet, future::ready, path::PathBuf, - pin::Pin, - task::{Context, Poll}, - time::SystemTime, + sync::atomic::{AtomicU64, Ordering}, + time::{Duration, SystemTime}, }; -use tokio::{io::AsyncReadExt, sync::Semaphore}; -use tracing::{debug, error, info, instrument, Span}; +use tokio::sync::Semaphore; +use tracing::{debug, info, instrument}; use tracing_actix_web::TracingLogger; use tracing_awc::Tracing; use tracing_futures::Instrument; +mod backgrounded; mod concurrent_processor; mod config; +mod details; mod either; mod error; mod exiftool; mod ffmpeg; mod file; +mod generate; +mod ingest; mod init_tracing; mod magick; -mod map_error; mod middleware; -mod migrate; mod process; mod processor; +mod queue; mod range; +mod repo; mod serde_str; mod store; +mod stream; mod tmp_file; -mod upload_manager; mod validate; -use crate::{magick::details_hint, store::file_store::FileStore}; +use crate::repo::UploadResult; use self::{ - concurrent_processor::CancelSafeProcessor, - config::{Config, Format, Migrate}, + backgrounded::Backgrounded, + config::{Configuration, ImageFormat, Operation}, + details::Details, either::Either, error::{Error, UploadError}, + ingest::Session, init_tracing::init_tracing, + magick::details_hint, middleware::{Deadline, Internal}, - migrate::LatestDb, - store::Store, - upload_manager::{Details, UploadManager, UploadManagerSession}, + queue::queue_generate, + repo::{Alias, DeleteToken, FullRepo, HashRepo, IdentifierRepo, Repo, SettingsRepo, UploadId}, + serde_str::Serde, + store::{file_store::FileStore, object_store::ObjectStore, Identifier, Store}, + stream::{StreamLimit, StreamTimeout}, }; const MEGABYTES: usize = 1024 * 1024; @@ -64,21 +71,20 @@ const MINUTES: u32 = 60; const HOURS: u32 = 60 * MINUTES; const DAYS: u32 = 24 * HOURS; -static MIGRATE: OnceCell = OnceCell::new(); -static CONFIG: Lazy = Lazy::new(|| Config::build().unwrap()); +static DO_CONFIG: Lazy<(Configuration, Operation)> = + Lazy::new(|| config::configure().expect("Failed to configure")); +static CONFIG: Lazy = Lazy::new(|| DO_CONFIG.0.clone()); +static OPERATION: Lazy = Lazy::new(|| DO_CONFIG.1.clone()); static PROCESS_SEMAPHORE: Lazy = Lazy::new(|| Semaphore::new(num_cpus::get().saturating_sub(1).max(1))); /// Handle responding to succesful uploads -#[instrument(name = "Uploaded files", skip(value, manager))] -async fn upload( - value: Value>, - manager: web::Data, +#[instrument(name = "Uploaded files", skip(value))] +async fn upload( + value: Value>, + repo: web::Data, store: web::Data, -) -> Result -where - Error: From, -{ +) -> Result { let images = value .map() .and_then(|mut m| m.remove("images")) @@ -90,189 +96,202 @@ where .into_iter() .filter_map(|i| i.file()) .collect::>(); + for image in &images { if let Some(alias) = image.result.alias() { info!("Uploaded {} as {:?}", image.filename, alias); let delete_token = image.result.delete_token().await?; - let name = manager.from_alias(alias.to_owned()).await?; - let identifier = manager.identifier_from_filename::(name.clone()).await?; - - let details = manager.variant_details(&identifier, name.clone()).await?; + let identifier = repo.identifier_from_alias::(alias).await?; + let details = repo.details(&identifier).await?; let details = if let Some(details) = details { debug!("details exist"); details } else { debug!("generating new details from {:?}", identifier); - let hint = details_hint(&name); + let hint = details_hint(alias); let new_details = Details::from_store((**store).clone(), identifier.clone(), hint).await?; - debug!("storing details for {:?} {}", identifier, name); - manager - .store_variant_details(&identifier, name, &new_details) - .await?; + debug!("storing details for {:?}", identifier); + repo.relate_details(&identifier, &new_details).await?; debug!("stored"); new_details }; files.push(serde_json::json!({ - "file": alias, - "delete_token": delete_token, + "file": alias.to_string(), + "delete_token": delete_token.to_string(), "details": details, })); } } - for image in images { - image.result.succeed(); + for mut image in images { + image.result.disarm(); } + Ok(HttpResponse::Created().json(&serde_json::json!({ "msg": "ok", "files": files }))) } +#[instrument(name = "Uploaded files", skip(value))] +async fn upload_backgrounded( + value: Value>, + repo: web::Data, +) -> Result { + let images = value + .map() + .and_then(|mut m| m.remove("images")) + .and_then(|images| images.array()) + .ok_or(UploadError::NoFiles)?; + + let mut files = Vec::new(); + let images = images + .into_iter() + .filter_map(|i| i.file()) + .collect::>(); + + for image in &images { + let upload_id = image.result.upload_id().expect("Upload ID exists"); + let identifier = image + .result + .identifier() + .expect("Identifier exists") + .to_bytes()?; + + queue::queue_ingest(&**repo, identifier, upload_id, None, true).await?; + + files.push(serde_json::json!({ + "upload_id": upload_id.to_string(), + })); + } + + for image in images { + image.result.disarm(); + } + + Ok(HttpResponse::Accepted().json(&serde_json::json!({ + "msg": "ok", + "uploads": files + }))) +} + +#[derive(Debug, serde::Deserialize)] +struct ClaimQuery { + upload_id: Serde, +} + +/// Claim a backgrounded upload +#[instrument(name = "Waiting on upload", skip(repo))] +async fn claim_upload( + repo: web::Data, + query: web::Query, +) -> Result { + let upload_id = Serde::into_inner(query.into_inner().upload_id); + + match actix_rt::time::timeout(Duration::from_secs(10), repo.wait(upload_id)).await { + Ok(wait_res) => { + let upload_result = wait_res?; + repo.claim(upload_id).await?; + + match upload_result { + UploadResult::Success { alias, token } => { + Ok(HttpResponse::Ok().json(&serde_json::json!({ + "msg": "ok", + "files": [{ + "file": alias.to_string(), + "delete_token": token.to_string(), + }] + }))) + } + UploadResult::Failure { message } => Ok(HttpResponse::UnprocessableEntity().json( + &serde_json::json!({ + "msg": message, + }), + )), + } + } + Err(_) => Ok(HttpResponse::NoContent().finish()), + } +} + #[derive(Debug, serde::Deserialize)] struct UrlQuery { url: String, } -pin_project_lite::pin_project! { - struct Limit { - #[pin] - inner: S, - - count: u64, - limit: u64, - } -} - -impl Limit { - fn new(inner: S, limit: u64) -> Self { - Limit { - inner, - count: 0, - limit, - } - } -} - -#[derive(Debug, thiserror::Error)] -#[error("Resonse body larger than size limit")] -struct LimitError; - -impl Stream for Limit -where - S: Stream>, - E: From, -{ - type Item = Result; - - fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { - let this = self.as_mut().project(); - - let limit = this.limit; - let count = this.count; - let inner = this.inner; - - inner.poll_next(cx).map(|opt| { - opt.map(|res| match res { - Ok(bytes) => { - *count += bytes.len() as u64; - if *count > *limit { - return Err(LimitError.into()); - } - Ok(bytes) - } - Err(e) => Err(e), - }) - }) - } -} - /// download an image from a URL -#[instrument(name = "Downloading file", skip(client, manager))] -async fn download( +#[instrument(name = "Downloading file", skip(client, repo))] +async fn download( client: web::Data, - manager: web::Data, + repo: web::Data, store: web::Data, query: web::Query, -) -> Result -where - Error: From, -{ +) -> Result { let res = client.get(&query.url).send().await?; if !res.status().is_success() { return Err(UploadError::Download(res.status()).into()); } - let stream = Limit::new( - map_error::map_crate_error(res), - (CONFIG.max_file_size() * MEGABYTES) as u64, - ); + let stream = res + .map_err(Error::from) + .limit((CONFIG.media.max_file_size * MEGABYTES) as u64); - futures_util::pin_mut!(stream); + let mut session = ingest::ingest(&**repo, &**store, stream, None, true).await?; - let permit = PROCESS_SEMAPHORE.acquire().await?; - let session = manager.session((**store).clone()).upload(stream).await?; - let alias = session.alias().unwrap().to_owned(); - drop(permit); + let alias = session.alias().expect("alias should exist").to_owned(); let delete_token = session.delete_token().await?; - let name = manager.from_alias(alias.to_owned()).await?; - let identifier = manager.identifier_from_filename::(name.clone()).await?; + let identifier = repo.identifier_from_alias::(&alias).await?; - let details = manager.variant_details(&identifier, name.clone()).await?; + let details = repo.details(&identifier).await?; let details = if let Some(details) = details { details } else { - let hint = details_hint(&name); + let hint = details_hint(&alias); let new_details = Details::from_store((**store).clone(), identifier.clone(), hint).await?; - manager - .store_variant_details(&identifier, name, &new_details) - .await?; + repo.relate_details(&identifier, &new_details).await?; new_details }; - session.succeed(); + session.disarm(); Ok(HttpResponse::Created().json(&serde_json::json!({ "msg": "ok", "files": [{ - "file": alias, - "delete_token": delete_token, + "file": alias.to_string(), + "delete_token": delete_token.to_string(), "details": details, }] }))) } /// Delete aliases and files -#[instrument(name = "Deleting file", skip(manager))] -async fn delete( - manager: web::Data, - store: web::Data, +#[instrument(name = "Deleting file", skip(repo))] +async fn delete( + repo: web::Data, path_entries: web::Path<(String, String)>, -) -> Result -where - Error: From, -{ - let (alias, token) = path_entries.into_inner(); +) -> Result { + let (token, alias) = path_entries.into_inner(); - manager.delete((**store).clone(), token, alias).await?; + let token = DeleteToken::from_existing(&token); + let alias = Alias::from_existing(&alias); + + queue::cleanup_alias(&**repo, alias, token).await?; Ok(HttpResponse::NoContent().finish()) } type ProcessQuery = Vec<(String, String)>; -async fn prepare_process( +fn prepare_process( query: web::Query, ext: &str, - manager: &UploadManager, - filters: &Option>, -) -> Result<(Format, String, PathBuf, Vec), Error> { +) -> Result<(ImageFormat, Alias, PathBuf, Vec), Error> { let (alias, operations) = query .into_inner() @@ -287,51 +306,42 @@ async fn prepare_process( }); if alias.is_empty() { - return Err(UploadError::MissingFilename.into()); + return Err(UploadError::MissingAlias.into()); } - let name = manager.from_alias(alias).await?; + let alias = Alias::from_existing(&alias); - let operations = if let Some(filters) = filters.as_ref() { - operations - .into_iter() - .filter(|(k, _)| filters.contains(&k.to_lowercase())) - .collect() - } else { - operations - }; + let operations = operations + .into_iter() + .filter(|(k, _)| CONFIG.media.filters.contains(&k.to_lowercase())) + .collect::>(); let format = ext - .parse::() + .parse::() .map_err(|_| UploadError::UnsupportedFormat)?; - let processed_name = format!("{}.{}", name, ext); - let (thumbnail_path, thumbnail_args) = - self::processor::build_chain(&operations, processed_name)?; + let ext = format.to_string(); - Ok((format, name, thumbnail_path, thumbnail_args)) + let (thumbnail_path, thumbnail_args) = self::processor::build_chain(&operations, &ext)?; + + Ok((format, alias, thumbnail_path, thumbnail_args)) } -#[instrument(name = "Fetching derived details", skip(manager, filters))] -async fn process_details( +#[instrument(name = "Fetching derived details", skip(repo))] +async fn process_details( query: web::Query, ext: web::Path, - manager: web::Data, - store: web::Data, - filters: web::Data>>, -) -> Result -where - Error: From, -{ - let (_, name, thumbnail_path, _) = - prepare_process(query, ext.as_str(), &manager, &filters).await?; + repo: web::Data, +) -> Result { + let (_, alias, thumbnail_path, _) = prepare_process(query, ext.as_str())?; - let identifier = manager - .variant_identifier::(&thumbnail_path, &name) + let hash = repo.hash(&alias).await?; + let identifier = repo + .variant_identifier::(hash, thumbnail_path.to_string_lossy().to_string()) .await? .ok_or(UploadError::MissingAlias)?; - let details = manager.variant_details(&identifier, name).await?; + let details = repo.details(&identifier).await?; let details = details.ok_or(UploadError::NoFiles)?; @@ -339,106 +349,47 @@ where } /// Process files -#[instrument(name = "Serving processed image", skip(manager, filters))] -async fn process( +#[instrument(name = "Serving processed image", skip(repo))] +async fn process( range: Option>, query: web::Query, ext: web::Path, - manager: web::Data, + repo: web::Data, store: web::Data, - filters: web::Data>>, -) -> Result -where - Error: From, -{ - let (format, name, thumbnail_path, thumbnail_args) = - prepare_process(query, ext.as_str(), &manager, &filters).await?; +) -> Result { + let (format, alias, thumbnail_path, thumbnail_args) = prepare_process(query, ext.as_str())?; - let identifier_opt = manager - .variant_identifier::(&thumbnail_path, &name) + let path_string = thumbnail_path.to_string_lossy().to_string(); + let hash = repo.hash(&alias).await?; + let identifier_opt = repo + .variant_identifier::(hash.clone(), path_string) .await?; if let Some(identifier) = identifier_opt { - let details_opt = manager.variant_details(&identifier, name.clone()).await?; + let details_opt = repo.details(&identifier).await?; let details = if let Some(details) = details_opt { details } else { - let hint = details_hint(&name); + let hint = details_hint(&alias); let details = Details::from_store((**store).clone(), identifier.clone(), hint).await?; - manager - .store_variant_details(&identifier, name, &details) - .await?; + repo.relate_details(&identifier, &details).await?; details }; return ranged_file_resp(&**store, identifier, range, details).await; } - let identifier = manager - .still_identifier_from_filename((**store).clone(), name.clone()) - .await?; - - let thumbnail_path2 = thumbnail_path.clone(); - let process_fut = async { - let thumbnail_path = thumbnail_path2; - - let permit = PROCESS_SEMAPHORE.acquire().await?; - - let mut processed_reader = crate::magick::process_image_store_read( - (**store).clone(), - identifier, - thumbnail_args, - format, - )?; - - let mut vec = Vec::new(); - processed_reader.read_to_end(&mut vec).await?; - let bytes = web::Bytes::from(vec); - - drop(permit); - - let details = Details::from_bytes(bytes.clone(), format.as_hint()).await?; - - let save_span = tracing::info_span!( - parent: None, - "Saving variant information", - path = tracing::field::debug(&thumbnail_path), - name = tracing::field::display(&name), - ); - save_span.follows_from(Span::current()); - let details2 = details.clone(); - let bytes2 = bytes.clone(); - actix_rt::spawn( - async move { - let identifier = match store.save_bytes(bytes2, &name).await { - Ok(identifier) => identifier, - Err(e) => { - tracing::warn!("Failed to generate directory path: {}", e); - return; - } - }; - if let Err(e) = manager - .store_variant_details(&identifier, name.clone(), &details2) - .await - { - tracing::warn!("Error saving variant details: {}", e); - return; - } - if let Err(e) = manager - .store_variant(Some(&thumbnail_path), &identifier, &name) - .await - { - tracing::warn!("Error saving variant info: {}", e); - } - } - .instrument(save_span), - ); - - Ok((details, bytes)) as Result<(Details, web::Bytes), Error> - }; - - let (details, bytes) = CancelSafeProcessor::new(thumbnail_path.clone(), process_fut).await?; + let (details, bytes) = generate::generate( + &**repo, + &**store, + format, + alias, + thumbnail_path, + thumbnail_args, + hash, + ) + .await?; let (builder, stream) = if let Some(web::Header(range_header)) = range { if let Some(range) = range::single_bytes_range(&range_header) { @@ -472,29 +423,50 @@ where )) } -/// Fetch file details -#[instrument(name = "Fetching details", skip(manager))] -async fn details( - alias: web::Path, - manager: web::Data, - store: web::Data, -) -> Result -where - Error: From, -{ - let name = manager.from_alias(alias.into_inner()).await?; - let identifier = manager.identifier_from_filename::(name.clone()).await?; +/// Process files +#[instrument(name = "Spawning image process", skip(repo))] +async fn process_backgrounded( + query: web::Query, + ext: web::Path, + repo: web::Data, +) -> Result { + let (target_format, source, process_path, process_args) = prepare_process(query, ext.as_str())?; - let details = manager.variant_details(&identifier, name.clone()).await?; + let path_string = process_path.to_string_lossy().to_string(); + let hash = repo.hash(&source).await?; + let identifier_opt = repo + .variant_identifier::(hash.clone(), path_string) + .await?; + + if identifier_opt.is_some() { + return Ok(HttpResponse::Accepted().finish()); + } + + queue_generate(&**repo, target_format, source, process_path, process_args).await?; + + Ok(HttpResponse::Accepted().finish()) +} + +/// Fetch file details +#[instrument(name = "Fetching details", skip(repo))] +async fn details( + alias: web::Path, + repo: web::Data, + store: web::Data, +) -> Result { + let alias = alias.into_inner(); + let alias = Alias::from_existing(&alias); + + let identifier = repo.identifier_from_alias::(&alias).await?; + + let details = repo.details(&identifier).await?; let details = if let Some(details) = details { details } else { - let hint = details_hint(&name); + let hint = details_hint(&alias); let new_details = Details::from_store((**store).clone(), identifier.clone(), hint).await?; - manager - .store_variant_details(&identifier, name, &new_details) - .await?; + repo.relate_details(&identifier, &new_details).await?; new_details }; @@ -502,44 +474,37 @@ where } /// Serve files -#[instrument(name = "Serving file", skip(manager))] -async fn serve( +#[instrument(name = "Serving file", skip(repo))] +async fn serve( range: Option>, alias: web::Path, - manager: web::Data, + repo: web::Data, store: web::Data, -) -> Result -where - Error: From, -{ - let name = manager.from_alias(alias.into_inner()).await?; - let identifier = manager.identifier_from_filename::(name.clone()).await?; +) -> Result { + let alias = alias.into_inner(); + let alias = Alias::from_existing(&alias); + let identifier = repo.identifier_from_alias::(&alias).await?; - let details = manager.variant_details(&identifier, name.clone()).await?; + let details = repo.details(&identifier).await?; let details = if let Some(details) = details { details } else { - let hint = details_hint(&name); + let hint = details_hint(&alias); let details = Details::from_store((**store).clone(), identifier.clone(), hint).await?; - manager - .store_variant_details(&identifier, name, &details) - .await?; + repo.relate_details(&identifier, &details).await?; details }; ranged_file_resp(&**store, identifier, range, details).await } -async fn ranged_file_resp( +async fn ranged_file_resp( store: &S, identifier: S::Identifier, range: Option>, details: Details, -) -> Result -where - Error: From, -{ +) -> Result { let (builder, stream) = if let Some(web::Header(range_header)) = range { //Range header exists - return as ranged if let Some(range) = range::single_bytes_range(&range_header) { @@ -550,9 +515,11 @@ where builder.insert_header(content_range); ( builder, - Either::left(Either::left(map_error::map_crate_error( - range::chop_store(range, store, &identifier, len).await?, - ))), + Either::left(Either::left( + range::chop_store(range, store, &identifier, len) + .await? + .map_err(Error::from), + )), ) } else { ( @@ -565,7 +532,10 @@ where } } else { //No Range header in the request - return the entire document - let stream = map_error::map_crate_error(store.to_stream(&identifier, None, None).await?); + let stream = store + .to_stream(&identifier, None, None) + .await? + .map_err(Error::from); (HttpResponse::Ok(), Either::right(stream)) }; @@ -591,6 +561,12 @@ where E: std::error::Error + 'static, actix_web::Error: From, { + let stream = stream.timeout(Duration::from_secs(5)).map(|res| match res { + Ok(Ok(item)) => Ok(item), + Ok(Err(e)) => Err(actix_web::Error::from(e)), + Err(e) => Err(Error::from(e).into()), + }); + builder .insert_header(LastModified(modified.into())) .insert_header(CacheControl(vec![ @@ -604,77 +580,40 @@ where } #[derive(Debug, serde::Deserialize)] -#[serde(untagged)] -enum FileOrAlias { - File { file: String }, - Alias { alias: String }, +struct AliasQuery { + alias: String, } -#[instrument(name = "Purging file", skip(upload_manager))] -async fn purge( - query: web::Query, - upload_manager: web::Data, - store: web::Data, -) -> Result -where - Error: From, -{ - let aliases = match query.into_inner() { - FileOrAlias::File { file } => upload_manager.aliases_by_filename(file).await?, - FileOrAlias::Alias { alias } => upload_manager.aliases_by_alias(alias).await?, - }; +#[instrument(name = "Purging file", skip(repo))] +async fn purge( + query: web::Query, + repo: web::Data, +) -> Result { + let alias = Alias::from_existing(&query.alias); + let aliases = repo.aliases_from_alias(&alias).await?; for alias in aliases.iter() { - upload_manager - .delete_without_token((**store).clone(), alias.to_owned()) - .await?; + let token = repo.delete_token(alias).await?; + queue::cleanup_alias(&**repo, alias.clone(), token).await?; } Ok(HttpResponse::Ok().json(&serde_json::json!({ "msg": "ok", - "aliases": aliases + "aliases": aliases.iter().map(|a| a.to_string()).collect::>() }))) } -#[instrument(name = "Fetching aliases", skip(upload_manager))] -async fn aliases( - query: web::Query, - upload_manager: web::Data, - store: web::Data, -) -> Result -where - Error: From, -{ - let aliases = match query.into_inner() { - FileOrAlias::File { file } => upload_manager.aliases_by_filename(file).await?, - FileOrAlias::Alias { alias } => upload_manager.aliases_by_alias(alias).await?, - }; +#[instrument(name = "Fetching aliases", skip(repo))] +async fn aliases( + query: web::Query, + repo: web::Data, +) -> Result { + let alias = Alias::from_existing(&query.alias); + let aliases = repo.aliases_from_alias(&alias).await?; Ok(HttpResponse::Ok().json(&serde_json::json!({ "msg": "ok", - "aliases": aliases, - }))) -} - -#[derive(Debug, serde::Deserialize)] -struct ByAlias { - alias: String, -} - -#[instrument(name = "Fetching filename", skip(upload_manager))] -async fn filename_by_alias( - query: web::Query, - upload_manager: web::Data, - store: web::Data, -) -> Result -where - Error: From, -{ - let filename = upload_manager.from_alias(query.into_inner().alias).await?; - - Ok(HttpResponse::Ok().json(&serde_json::json!({ - "msg": "ok", - "filename": filename, + "aliases": aliases.iter().map(|a| a.to_string()).collect::>() }))) } @@ -691,139 +630,194 @@ fn build_client() -> awc::Client { .finish() } -#[cfg(feature = "object-storage")] fn build_reqwest_client() -> reqwest::Result { reqwest::Client::builder() .user_agent("pict-rs v0.3.0-main") .build() } -async fn launch(manager: UploadManager, store: S) -> anyhow::Result<()> -where - S::Error: Unpin, - Error: From, -{ +fn next_worker_id() -> String { + static WORKER_ID: AtomicU64 = AtomicU64::new(0); + + let next_id = WORKER_ID.fetch_add(1, Ordering::Relaxed); + + format!("{}-{}", CONFIG.server.worker_id, next_id) +} + +async fn launch( + repo: R, + store: S, +) -> color_eyre::Result<()> { + repo.requeue_in_progress(CONFIG.server.worker_id.as_bytes().to_vec()) + .await?; // Create a new Multipart Form validator // // This form is expecting a single array field, 'images' with at most 10 files in it - let manager2 = manager.clone(); + let repo2 = repo.clone(); let store2 = store.clone(); let form = Form::new() .max_files(10) - .max_file_size(CONFIG.max_file_size() * MEGABYTES) + .max_file_size(CONFIG.media.max_file_size * MEGABYTES) .transform_error(transform_error) .field( "images", Field::array(Field::file(move |filename, _, stream| { + let repo = repo2.clone(); let store = store2.clone(); - let manager = manager2.clone(); let span = tracing::info_span!("file-upload", ?filename); - async move { - let permit = PROCESS_SEMAPHORE.acquire().await?; + let stream = stream.map_err(Error::from); - let res = manager - .session(store) - .upload(map_error::map_crate_error(stream)) - .await; - - drop(permit); - res - } - .instrument(span) + Box::pin( + async move { ingest::ingest(&repo, &store, stream, None, true).await } + .instrument(span), + ) })), ); // Create a new Multipart Form validator for internal imports // // This form is expecting a single array field, 'images' with at most 10 files in it - let validate_imports = CONFIG.validate_imports(); - let manager2 = manager.clone(); + let repo2 = repo.clone(); let store2 = store.clone(); let import_form = Form::new() .max_files(10) - .max_file_size(CONFIG.max_file_size() * MEGABYTES) + .max_file_size(CONFIG.media.max_file_size * MEGABYTES) .transform_error(transform_error) .field( "images", Field::array(Field::file(move |filename, _, stream| { + let repo = repo2.clone(); let store = store2.clone(); - let manager = manager2.clone(); let span = tracing::info_span!("file-import", ?filename); - async move { - let permit = PROCESS_SEMAPHORE.acquire().await?; + let stream = stream.map_err(Error::from); - let res = manager - .session(store) - .import( - filename, - validate_imports, - map_error::map_crate_error(stream), + Box::pin( + async move { + ingest::ingest( + &repo, + &store, + stream, + Some(Alias::from_existing(&filename)), + !CONFIG.media.skip_validate_imports, ) - .await; + .await + } + .instrument(span), + ) + })), + ); - drop(permit); - res - } - .instrument(span) + // Create a new Multipart Form validator for backgrounded uploads + // + // This form is expecting a single array field, 'images' with at most 10 files in it + let repo2 = repo.clone(); + let store2 = store.clone(); + let backgrounded_form = Form::new() + .max_files(10) + .max_file_size(CONFIG.media.max_file_size * MEGABYTES) + .transform_error(transform_error) + .field( + "images", + Field::array(Field::file(move |filename, _, stream| { + let repo = repo2.clone(); + let store = store2.clone(); + + let span = tracing::info_span!("file-proxy", ?filename); + + let stream = stream.map_err(Error::from); + + Box::pin( + async move { Backgrounded::proxy(repo, store, stream).await }.instrument(span), + ) })), ); HttpServer::new(move || { + let store = store.clone(); + let repo = repo.clone(); + + actix_rt::spawn(queue::process_cleanup( + repo.clone(), + store.clone(), + next_worker_id(), + )); + actix_rt::spawn(queue::process_images( + repo.clone(), + store.clone(), + next_worker_id(), + )); + App::new() .wrap(TracingLogger::default()) .wrap(Deadline) - .app_data(web::Data::new(store.clone())) - .app_data(web::Data::new(manager.clone())) + .app_data(web::Data::new(repo)) + .app_data(web::Data::new(store)) .app_data(web::Data::new(build_client())) - .app_data(web::Data::new(CONFIG.allowed_filters())) .service( web::scope("/image") .service( web::resource("") .guard(guard::Post()) .wrap(form.clone()) - .route(web::post().to(upload::)), + .route(web::post().to(upload::)), ) - .service(web::resource("/download").route(web::get().to(download::))) + .service( + web::scope("/backgrounded") + .service( + web::resource("") + .guard(guard::Post()) + .wrap(backgrounded_form.clone()) + .route(web::post().to(upload_backgrounded::)), + ) + .service( + web::resource("/claim").route(web::get().to(claim_upload::)), + ), + ) + .service(web::resource("/download").route(web::get().to(download::))) .service( web::resource("/delete/{delete_token}/{filename}") - .route(web::delete().to(delete::)) - .route(web::get().to(delete::)), + .route(web::delete().to(delete::)) + .route(web::get().to(delete::)), + ) + .service( + web::resource("/original/{filename}").route(web::get().to(serve::)), + ) + .service(web::resource("/process.{ext}").route(web::get().to(process::))) + .service( + web::resource("/process_backgrounded.{ext}") + .route(web::get().to(process_backgrounded::)), ) - .service(web::resource("/original/{filename}").route(web::get().to(serve::))) - .service(web::resource("/process.{ext}").route(web::get().to(process::))) .service( web::scope("/details") .service( web::resource("/original/{filename}") - .route(web::get().to(details::)), + .route(web::get().to(details::)), ) .service( web::resource("/process.{ext}") - .route(web::get().to(process_details::)), + .route(web::get().to(process_details::)), ), ), ) .service( web::scope("/internal") - .wrap(Internal(CONFIG.api_key().map(|s| s.to_owned()))) + .wrap(Internal( + CONFIG.server.api_key.as_ref().map(|s| s.to_owned()), + )) .service( web::resource("/import") .wrap(import_form.clone()) - .route(web::post().to(upload::)), + .route(web::post().to(upload::)), ) - .service(web::resource("/purge").route(web::post().to(purge::))) - .service(web::resource("/aliases").route(web::get().to(aliases::))) - .service( - web::resource("/filename").route(web::get().to(filename_by_alias::)), - ), + .service(web::resource("/purge").route(web::post().to(purge::))) + .service(web::resource("/aliases").route(web::get().to(aliases::))), ) }) - .bind(CONFIG.bind_address())? + .bind(CONFIG.server.address)? .run() .await?; @@ -832,48 +826,40 @@ where Ok(()) } -async fn migrate_inner( - manager: &UploadManager, - db: &sled::Db, - from: S1, - to: &config::Store, -) -> anyhow::Result<()> +async fn migrate_inner(repo: &Repo, from: S1, to: &config::Store) -> color_eyre::Result<()> where S1: Store, - Error: From, { match to { - config::Store::FileStore { path } => { - let path = path.to_owned().unwrap_or_else(|| CONFIG.data_dir()); - - let to = FileStore::build(path, db)?; - manager.restructure(&to).await?; - - manager.migrate_store::(from, to).await?; + config::Store::Filesystem(config::Filesystem { path }) => { + let to = FileStore::build(path.clone(), repo.clone()).await?; + match repo { + Repo::Sled(repo) => migrate_store(repo, from, to).await?, + } } - #[cfg(feature = "object-storage")] - config::Store::S3Store { + config::Store::ObjectStorage(config::ObjectStorage { bucket_name, region, access_key, secret_key, security_token, session_token, - } => { - use store::object_store::ObjectStore; - + }) => { let to = ObjectStore::build( bucket_name, - (**region).clone(), - access_key.clone(), - secret_key.clone(), + region.as_ref().clone(), + Some(access_key.clone()), + Some(secret_key.clone()), security_token.clone(), session_token.clone(), - db, + repo.clone(), build_reqwest_client()?, - )?; + ) + .await?; - manager.migrate_store::(from, to).await?; + match repo { + Repo::Sled(repo) => migrate_store(repo, from, to).await?, + } } } @@ -881,87 +867,156 @@ where } #[actix_rt::main] -async fn main() -> anyhow::Result<()> { - init_tracing( - "pict-rs", - CONFIG.opentelemetry_url(), - CONFIG.console_buffer_capacity(), - )?; +async fn main() -> color_eyre::Result<()> { + init_tracing(&CONFIG.tracing)?; - let db = LatestDb::exists(CONFIG.data_dir(), CONFIG.sled_cache_capacity()).migrate()?; + let repo = Repo::open(CONFIG.repo.clone())?; + repo.from_db(CONFIG.old_db.path.clone()).await?; - let manager = UploadManager::new(db.clone(), CONFIG.format()).await?; - - if let Some(m) = MIGRATE.get() { - let from = m.from(); - let to = m.to(); - - match from { - config::Store::FileStore { path } => { - let path = path.to_owned().unwrap_or_else(|| CONFIG.data_dir()); - - let from = FileStore::build(path, &db)?; - manager.restructure(&from).await?; - - migrate_inner(&manager, &db, from, to).await?; - } - #[cfg(feature = "object-storage")] - config::Store::S3Store { - bucket_name, - region, - access_key, - secret_key, - security_token, - session_token, - } => { - let from = crate::store::object_store::ObjectStore::build( + match (*OPERATION).clone() { + Operation::Run => (), + Operation::MigrateStore { from, to } => { + match from { + config::Store::Filesystem(config::Filesystem { path }) => { + let from = FileStore::build(path.clone(), repo.clone()).await?; + migrate_inner(&repo, from, &to).await?; + } + config::Store::ObjectStorage(config::ObjectStorage { bucket_name, - (**region).clone(), - access_key.clone(), - secret_key.clone(), - security_token.clone(), - session_token.clone(), - &db, - build_reqwest_client()?, - )?; + region, + access_key, + secret_key, + security_token, + session_token, + }) => { + let from = ObjectStore::build( + &bucket_name, + Serde::into_inner(region), + Some(access_key), + Some(secret_key), + security_token, + session_token, + repo.clone(), + build_reqwest_client()?, + ) + .await?; - migrate_inner(&manager, &db, from, to).await?; + migrate_inner(&repo, from, &to).await?; + } } - } - return Ok(()); + return Ok(()); + } } - match CONFIG.store() { - config::Store::FileStore { path } => { - let path = path.to_owned().unwrap_or_else(|| CONFIG.data_dir()); - - let store = FileStore::build(path.clone(), &db)?; - manager.restructure(&store).await?; - - launch(manager, store).await + match CONFIG.store.clone() { + config::Store::Filesystem(config::Filesystem { path }) => { + let store = FileStore::build(path, repo.clone()).await?; + match repo { + Repo::Sled(sled_repo) => launch(sled_repo, store).await, + } } - #[cfg(feature = "object-storage")] - config::Store::S3Store { + config::Store::ObjectStorage(config::ObjectStorage { bucket_name, region, access_key, secret_key, security_token, session_token, - } => { - let store = crate::store::object_store::ObjectStore::build( - bucket_name, - (**region).clone(), - access_key.clone(), - secret_key.clone(), - security_token.clone(), - session_token.clone(), - &db, + }) => { + let store = ObjectStore::build( + &bucket_name, + Serde::into_inner(region), + Some(access_key), + Some(secret_key), + security_token, + session_token, + repo.clone(), build_reqwest_client()?, - )?; + ) + .await?; - launch(manager, store).await + match repo { + Repo::Sled(sled_repo) => launch(sled_repo, store).await, + } } } } + +const STORE_MIGRATION_PROGRESS: &str = "store-migration-progress"; + +async fn migrate_store(repo: &R, from: S1, to: S2) -> Result<(), Error> +where + S1: Store, + S2: Store, + R: IdentifierRepo + HashRepo + SettingsRepo, +{ + let stream = repo.hashes().await; + let mut stream = Box::pin(stream); + + while let Some(hash) = stream.next().await { + let hash = hash?; + if let Some(identifier) = repo + .motion_identifier(hash.as_ref().to_vec().into()) + .await? + { + let new_identifier = migrate_file(&from, &to, &identifier).await?; + migrate_details(repo, identifier, &new_identifier).await?; + repo.relate_motion_identifier(hash.as_ref().to_vec().into(), &new_identifier) + .await?; + } + + for (variant, identifier) in repo.variants(hash.as_ref().to_vec().into()).await? { + let new_identifier = migrate_file(&from, &to, &identifier).await?; + migrate_details(repo, identifier, &new_identifier).await?; + repo.relate_variant_identifier(hash.as_ref().to_vec().into(), variant, &new_identifier) + .await?; + } + + let identifier = repo.identifier(hash.as_ref().to_vec().into()).await?; + let new_identifier = migrate_file(&from, &to, &identifier).await?; + migrate_details(repo, identifier, &new_identifier).await?; + repo.relate_identifier(hash.as_ref().to_vec().into(), &new_identifier) + .await?; + + repo.set(STORE_MIGRATION_PROGRESS, hash.as_ref().to_vec().into()) + .await?; + } + + // clean up the migration key to avoid interfering with future migrations + repo.remove(STORE_MIGRATION_PROGRESS).await?; + + Ok(()) +} + +async fn migrate_file( + from: &S1, + to: &S2, + identifier: &S1::Identifier, +) -> Result +where + S1: Store, + S2: Store, +{ + let stream = from.to_stream(identifier, None, None).await?; + futures_util::pin_mut!(stream); + let mut reader = tokio_util::io::StreamReader::new(stream); + + let new_identifier = to.save_async_read(&mut reader).await?; + + Ok(new_identifier) +} + +async fn migrate_details(repo: &R, from: I1, to: &I2) -> Result<(), Error> +where + R: IdentifierRepo, + I1: Identifier, + I2: Identifier, +{ + if let Some(details) = repo.details(&from).await? { + repo.relate_details(to, &details).await?; + repo.cleanup(&from).await?; + } + + Ok(()) +} diff --git a/src/map_error.rs b/src/map_error.rs deleted file mode 100644 index b63b9a8..0000000 --- a/src/map_error.rs +++ /dev/null @@ -1,43 +0,0 @@ -use crate::error::Error; -use futures_util::stream::Stream; -use std::{ - marker::PhantomData, - pin::Pin, - task::{Context, Poll}, -}; - -pin_project_lite::pin_project! { - pub(super) struct MapError { - #[pin] - inner: S, - - _error: PhantomData, - } -} - -pub(super) fn map_crate_error(inner: S) -> MapError { - map_error(inner) -} - -pub(super) fn map_error(inner: S) -> MapError { - MapError { - inner, - _error: PhantomData, - } -} - -impl Stream for MapError -where - S: Stream>, - E: From, -{ - type Item = Result; - - fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { - let this = self.as_mut().project(); - - this.inner - .poll_next(cx) - .map(|opt| opt.map(|res| res.map_err(Into::into))) - } -} diff --git a/src/migrate.rs b/src/migrate.rs deleted file mode 100644 index 90a391d..0000000 --- a/src/migrate.rs +++ /dev/null @@ -1,129 +0,0 @@ -use crate::UploadError; -use std::path::PathBuf; - -mod s034; - -type SledIter = Box, Vec), UploadError>>>; - -trait SledDb { - type SledTree: SledTree; - - fn open_tree(&self, name: &str) -> Result; - - fn self_tree(&self) -> &Self::SledTree; -} - -impl SledDb for &T -where - T: SledDb, -{ - type SledTree = T::SledTree; - - fn open_tree(&self, name: &str) -> Result { - (*self).open_tree(name) - } - - fn self_tree(&self) -> &Self::SledTree { - (*self).self_tree() - } -} - -trait SledTree { - fn get(&self, key: K) -> Result>, UploadError> - where - K: AsRef<[u8]>; - - fn insert(&self, key: K, value: V) -> Result<(), UploadError> - where - K: AsRef<[u8]>, - V: AsRef<[u8]>; - - fn iter(&self) -> SledIter; - - fn range(&self, range: R) -> SledIter - where - K: AsRef<[u8]>, - R: std::ops::RangeBounds; - - fn flush(&self) -> Result<(), UploadError>; -} - -pub(crate) struct LatestDb { - root_dir: PathBuf, - version: DbVersion, - cache_capacity: u64, -} - -impl LatestDb { - pub(crate) fn exists(root_dir: PathBuf, cache_capacity: u64) -> Self { - let version = DbVersion::exists(root_dir.clone(), cache_capacity); - - LatestDb { - root_dir, - version, - cache_capacity, - } - } - - pub(crate) fn migrate(self) -> Result { - let LatestDb { - root_dir, - version, - cache_capacity, - } = self; - - loop { - let root_dir2 = root_dir.clone(); - let res = std::panic::catch_unwind(move || version.migrate(root_dir2, cache_capacity)); - - if let Ok(res) = res { - return res; - } - } - } -} - -#[derive(Clone, Copy)] -enum DbVersion { - Sled034, - Fresh, -} - -impl DbVersion { - fn exists(root: PathBuf, cache_capacity: u64) -> Self { - if s034::exists(root.clone()) && !s034::migrating(root, cache_capacity) { - return DbVersion::Sled034; - } - - DbVersion::Fresh - } - - fn migrate(self, root: PathBuf, cache_capacity: u64) -> Result { - match self { - DbVersion::Sled034 | DbVersion::Fresh => s034::open(root, cache_capacity), - } - } -} - -pub(crate) fn alias_key_bounds(hash: &[u8]) -> (Vec, Vec) { - let mut start = hash.to_vec(); - start.extend(&[0]); - - let mut end = hash.to_vec(); - end.extend(&[1]); - - (start, end) -} - -pub(crate) fn alias_id_key(alias: &str) -> String { - format!("{}/id", alias) -} - -pub(crate) fn alias_key(hash: &[u8], id: &str) -> Vec { - let mut key = hash.to_vec(); - // add a separator to the key between the hash and the ID - key.extend(&[0]); - key.extend(id.as_bytes()); - - key -} diff --git a/src/process.rs b/src/process.rs index 057d809..4d5b628 100644 --- a/src/process.rs +++ b/src/process.rs @@ -144,7 +144,7 @@ impl Process { }) } - pub(crate) fn store_read( + pub(crate) fn store_read( mut self, store: S, identifier: S::Identifier, diff --git a/src/processor.rs b/src/processor.rs index feb3c09..9e264ec 100644 --- a/src/processor.rs +++ b/src/processor.rs @@ -22,9 +22,9 @@ pub(crate) struct Blur(f64); #[instrument] pub(crate) fn build_chain( args: &[(String, String)], - filename: String, + ext: &str, ) -> Result<(PathBuf, Vec), Error> { - fn parse(key: &str, value: &str) -> Result, UploadError> { + fn parse(key: &str, value: &str) -> Result, Error> { if key == P::NAME { return Ok(Some(P::parse(key, value).ok_or(UploadError::ParsePath)?)); } @@ -40,7 +40,7 @@ pub(crate) fn build_chain( }}; } - let (path, args) = + let (mut path, args) = args.iter() .fold(Ok((PathBuf::default(), vec![])), |inner, (name, value)| { if let Ok(inner) = inner { @@ -56,7 +56,9 @@ pub(crate) fn build_chain( } })?; - Ok((path.join(filename), args)) + path.push(ext); + + Ok((path, args)) } impl Processor for Identity { diff --git a/src/queue.rs b/src/queue.rs new file mode 100644 index 0000000..22e095f --- /dev/null +++ b/src/queue.rs @@ -0,0 +1,176 @@ +use crate::{ + config::ImageFormat, + error::Error, + repo::{ + Alias, AliasRepo, DeleteToken, FullRepo, HashRepo, IdentifierRepo, QueueRepo, UploadId, + }, + serde_str::Serde, + store::{Identifier, Store}, +}; +use std::{future::Future, path::PathBuf, pin::Pin}; +use tracing::Instrument; + +mod cleanup; +mod process; + +const CLEANUP_QUEUE: &str = "cleanup"; +const PROCESS_QUEUE: &str = "process"; + +#[derive(Debug, serde::Deserialize, serde::Serialize)] +enum Cleanup { + Hash { + hash: Vec, + }, + Identifier { + identifier: Vec, + }, + Alias { + alias: Serde, + token: Serde, + }, +} + +#[derive(Debug, serde::Deserialize, serde::Serialize)] +enum Process { + Ingest { + identifier: Vec, + upload_id: Serde, + declared_alias: Option>, + should_validate: bool, + }, + Generate { + target_format: ImageFormat, + source: Serde, + process_path: PathBuf, + process_args: Vec, + }, +} + +pub(crate) async fn cleanup_alias( + repo: &R, + alias: Alias, + token: DeleteToken, +) -> Result<(), Error> { + let job = serde_json::to_vec(&Cleanup::Alias { + alias: Serde::new(alias), + token: Serde::new(token), + })?; + repo.push(CLEANUP_QUEUE, job.into()).await?; + Ok(()) +} + +pub(crate) async fn cleanup_hash(repo: &R, hash: R::Bytes) -> Result<(), Error> { + let job = serde_json::to_vec(&Cleanup::Hash { + hash: hash.as_ref().to_vec(), + })?; + repo.push(CLEANUP_QUEUE, job.into()).await?; + Ok(()) +} + +pub(crate) async fn cleanup_identifier( + repo: &R, + identifier: I, +) -> Result<(), Error> { + let job = serde_json::to_vec(&Cleanup::Identifier { + identifier: identifier.to_bytes()?, + })?; + repo.push(CLEANUP_QUEUE, job.into()).await?; + Ok(()) +} + +pub(crate) async fn queue_ingest( + repo: &R, + identifier: Vec, + upload_id: UploadId, + declared_alias: Option, + should_validate: bool, +) -> Result<(), Error> { + let job = serde_json::to_vec(&Process::Ingest { + identifier, + declared_alias: declared_alias.map(Serde::new), + upload_id: Serde::new(upload_id), + should_validate, + })?; + repo.push(PROCESS_QUEUE, job.into()).await?; + Ok(()) +} + +pub(crate) async fn queue_generate( + repo: &R, + target_format: ImageFormat, + source: Alias, + process_path: PathBuf, + process_args: Vec, +) -> Result<(), Error> { + let job = serde_json::to_vec(&Process::Generate { + target_format, + source: Serde::new(source), + process_path, + process_args, + })?; + repo.push(PROCESS_QUEUE, job.into()).await?; + Ok(()) +} + +pub(crate) async fn process_cleanup(repo: R, store: S, worker_id: String) { + process_jobs(&repo, &store, worker_id, CLEANUP_QUEUE, cleanup::perform).await +} + +pub(crate) async fn process_images( + repo: R, + store: S, + worker_id: String, +) { + process_jobs(&repo, &store, worker_id, PROCESS_QUEUE, process::perform).await +} + +type LocalBoxFuture<'a, T> = Pin + 'a>>; + +async fn process_jobs( + repo: &R, + store: &S, + worker_id: String, + queue: &'static str, + callback: F, +) where + R: QueueRepo + HashRepo + IdentifierRepo + AliasRepo, + R::Bytes: Clone, + S: Store, + for<'a> F: Fn(&'a R, &'a S, &'a [u8]) -> LocalBoxFuture<'a, Result<(), Error>> + Copy, +{ + loop { + let res = job_loop(repo, store, worker_id.clone(), queue, callback).await; + + if let Err(e) = res { + tracing::warn!("Error processing jobs: {}", e); + tracing::warn!("{:?}", e); + continue; + } + + break; + } +} + +async fn job_loop( + repo: &R, + store: &S, + worker_id: String, + queue: &'static str, + callback: F, +) -> Result<(), Error> +where + R: QueueRepo + HashRepo + IdentifierRepo + AliasRepo, + R::Bytes: Clone, + S: Store, + for<'a> F: Fn(&'a R, &'a S, &'a [u8]) -> LocalBoxFuture<'a, Result<(), Error>> + Copy, +{ + loop { + let bytes = repo.pop(queue, worker_id.as_bytes().to_vec()).await?; + + let span = tracing::info_span!("Running Job", worker_id = ?worker_id); + + span.in_scope(|| (callback)(repo, store, bytes.as_ref())) + .instrument(span) + .await?; + } +} diff --git a/src/queue/cleanup.rs b/src/queue/cleanup.rs new file mode 100644 index 0000000..0496695 --- /dev/null +++ b/src/queue/cleanup.rs @@ -0,0 +1,128 @@ +use crate::{ + error::{Error, UploadError}, + queue::{Cleanup, LocalBoxFuture}, + repo::{Alias, AliasRepo, DeleteToken, FullRepo, HashRepo, IdentifierRepo}, + serde_str::Serde, + store::{Identifier, Store}, +}; +use tracing::error; + +pub(super) fn perform<'a, R, S>( + repo: &'a R, + store: &'a S, + job: &'a [u8], +) -> LocalBoxFuture<'a, Result<(), Error>> +where + R: FullRepo, + S: Store, +{ + Box::pin(async move { + match serde_json::from_slice(job) { + Ok(job) => match job { + Cleanup::Hash { hash: in_hash } => hash::(repo, in_hash).await?, + Cleanup::Identifier { + identifier: in_identifier, + } => identifier(repo, &store, in_identifier).await?, + Cleanup::Alias { + alias: stored_alias, + token, + } => { + alias( + repo, + Serde::into_inner(stored_alias), + Serde::into_inner(token), + ) + .await? + } + }, + Err(e) => { + tracing::warn!("Invalid job: {}", e); + } + } + + Ok(()) + }) +} + +#[tracing::instrument(skip(repo, store))] +async fn identifier(repo: &R, store: &S, identifier: Vec) -> Result<(), Error> +where + R: FullRepo, + S: Store, +{ + let identifier = S::Identifier::from_bytes(identifier)?; + + let mut errors = Vec::new(); + + if let Err(e) = store.remove(&identifier).await { + errors.push(e); + } + + if let Err(e) = IdentifierRepo::cleanup(repo, &identifier).await { + errors.push(e); + } + + if !errors.is_empty() { + let span = tracing::error_span!("Error deleting files"); + span.in_scope(|| { + for error in errors { + error!("{}", error); + } + }); + } + + Ok(()) +} + +#[tracing::instrument(skip(repo))] +async fn hash(repo: &R, hash: Vec) -> Result<(), Error> +where + R: FullRepo, + S: Store, +{ + let hash: R::Bytes = hash.into(); + + let aliases = repo.aliases(hash.clone()).await?; + + if !aliases.is_empty() { + return Ok(()); + } + + let mut idents = repo + .variants::(hash.clone()) + .await? + .into_iter() + .map(|(_, v)| v) + .collect::>(); + idents.push(repo.identifier(hash.clone()).await?); + idents.extend(repo.motion_identifier(hash.clone()).await?); + + for identifier in idents { + let _ = crate::queue::cleanup_identifier(repo, identifier).await; + } + + HashRepo::cleanup(repo, hash).await?; + + Ok(()) +} + +async fn alias(repo: &R, alias: Alias, token: DeleteToken) -> Result<(), Error> +where + R: FullRepo, +{ + let saved_delete_token = repo.delete_token(&alias).await?; + if saved_delete_token != token { + return Err(UploadError::InvalidToken.into()); + } + + let hash = repo.hash(&alias).await?; + + AliasRepo::cleanup(repo, &alias).await?; + repo.remove_alias(hash.clone(), &alias).await?; + + if repo.aliases(hash.clone()).await?.is_empty() { + crate::queue::cleanup_hash(repo, hash).await?; + } + + Ok(()) +} diff --git a/src/queue/process.rs b/src/queue/process.rs new file mode 100644 index 0000000..6b31a78 --- /dev/null +++ b/src/queue/process.rs @@ -0,0 +1,150 @@ +use crate::{ + config::ImageFormat, + error::Error, + ingest::Session, + queue::{LocalBoxFuture, Process}, + repo::{Alias, DeleteToken, FullRepo, UploadId, UploadResult}, + serde_str::Serde, + store::{Identifier, Store}, +}; +use futures_util::TryStreamExt; +use std::path::PathBuf; + +pub(super) fn perform<'a, R, S>( + repo: &'a R, + store: &'a S, + job: &'a [u8], +) -> LocalBoxFuture<'a, Result<(), Error>> +where + R: FullRepo + 'static, + S: Store + 'static, +{ + Box::pin(async move { + match serde_json::from_slice(job) { + Ok(job) => match job { + Process::Ingest { + identifier, + upload_id, + declared_alias, + should_validate, + } => { + process_ingest( + repo, + store, + identifier, + Serde::into_inner(upload_id), + declared_alias.map(Serde::into_inner), + should_validate, + ) + .await? + } + Process::Generate { + target_format, + source, + process_path, + process_args, + } => { + generate( + repo, + store, + target_format, + Serde::into_inner(source), + process_path, + process_args, + ) + .await? + } + }, + Err(e) => { + tracing::warn!("Invalid job: {}", e); + } + } + + Ok(()) + }) +} + +#[tracing::instrument(skip(repo, store))] +async fn process_ingest( + repo: &R, + store: &S, + unprocessed_identifier: Vec, + upload_id: UploadId, + declared_alias: Option, + should_validate: bool, +) -> Result<(), Error> +where + R: FullRepo + 'static, + S: Store, +{ + let fut = async { + let unprocessed_identifier = S::Identifier::from_bytes(unprocessed_identifier)?; + + let stream = store + .to_stream(&unprocessed_identifier, None, None) + .await? + .map_err(Error::from); + + let session = + crate::ingest::ingest(repo, store, stream, declared_alias, should_validate).await?; + + let token = session.delete_token().await?; + + store.remove(&unprocessed_identifier).await?; + + Ok((session, token)) as Result<(Session, DeleteToken), Error> + }; + + let result = match fut.await { + Ok((mut session, token)) => { + let alias = session.alias().take().expect("Alias should exist").clone(); + let result = UploadResult::Success { alias, token }; + session.disarm(); + result + } + Err(e) => { + tracing::warn!("Failed to ingest {}, {:?}", e, e); + + UploadResult::Failure { + message: e.to_string(), + } + } + }; + + repo.complete(upload_id, result).await?; + + Ok(()) +} + +async fn generate( + repo: &R, + store: &S, + target_format: ImageFormat, + source: Alias, + process_path: PathBuf, + process_args: Vec, +) -> Result<(), Error> { + let hash = repo.hash(&source).await?; + + let path_string = process_path.to_string_lossy().to_string(); + let identifier_opt = repo + .variant_identifier::(hash.clone(), path_string) + .await?; + + if identifier_opt.is_some() { + return Ok(()); + } + + crate::generate::generate( + repo, + store, + target_format, + source, + process_path, + process_args, + hash, + ) + .await?; + + Ok(()) +} diff --git a/src/range.rs b/src/range.rs index 3b36bda..51066e9 100644 --- a/src/range.rs +++ b/src/range.rs @@ -17,7 +17,7 @@ pub(crate) fn chop_bytes( if let Some((start, end)) = byte_range.to_satisfiable_range(length) { // END IS INCLUSIVE let end = end as usize + 1; - return Ok(once(ready(Ok(bytes.slice(start as usize..end as usize))))); + return Ok(once(ready(Ok(bytes.slice(start as usize..end))))); } Err(UploadError::Range.into()) @@ -28,16 +28,13 @@ pub(crate) async fn chop_store( store: &S, identifier: &S::Identifier, length: u64, -) -> Result>, Error> -where - Error: From, -{ +) -> Result>, Error> { if let Some((start, end)) = byte_range.to_satisfiable_range(length) { // END IS INCLUSIVE let end = end + 1; - return Ok(store + return store .to_stream(identifier, Some(start), Some(end.saturating_sub(start))) - .await?); + .await; } Err(UploadError::Range.into()) diff --git a/src/repo.rs b/src/repo.rs new file mode 100644 index 0000000..f356a97 --- /dev/null +++ b/src/repo.rs @@ -0,0 +1,723 @@ +use crate::{config, details::Details, error::Error, store::Identifier}; +use futures_util::Stream; +use std::fmt::Debug; +use std::path::PathBuf; +use tracing::debug; +use uuid::Uuid; + +mod old; +pub(crate) mod sled; + +#[derive(Clone, Debug)] +pub(crate) enum Repo { + Sled(self::sled::SledRepo), +} + +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +enum MaybeUuid { + Uuid(Uuid), + Name(String), +} + +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub(crate) struct Alias { + id: MaybeUuid, + extension: Option, +} + +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub(crate) struct DeleteToken { + id: MaybeUuid, +} + +pub(crate) struct AlreadyExists; + +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub(crate) struct UploadId { + id: Uuid, +} + +pub(crate) enum UploadResult { + Success { alias: Alias, token: DeleteToken }, + Failure { message: String }, +} + +#[async_trait::async_trait(?Send)] +pub(crate) trait FullRepo: + UploadRepo + + SettingsRepo + + IdentifierRepo + + AliasRepo + + QueueRepo + + HashRepo + + Send + + Sync + + Clone + + Debug +{ + async fn identifier_from_alias( + &self, + alias: &Alias, + ) -> Result { + let hash = self.hash(alias).await?; + self.identifier(hash).await + } + + async fn aliases_from_alias(&self, alias: &Alias) -> Result, Error> { + let hash = self.hash(alias).await?; + self.aliases(hash).await + } + + async fn still_identifier_from_alias( + &self, + alias: &Alias, + ) -> Result, Error> { + let hash = self.hash(alias).await?; + let identifier = self.identifier::(hash.clone()).await?; + + match self.details(&identifier).await? { + Some(details) if details.is_motion() => self.motion_identifier::(hash).await, + Some(_) => Ok(Some(identifier)), + None => Ok(None), + } + } +} + +pub(crate) trait BaseRepo { + type Bytes: AsRef<[u8]> + From> + Clone; +} + +#[async_trait::async_trait(?Send)] +pub(crate) trait UploadRepo: BaseRepo { + async fn create(&self, upload_id: UploadId) -> Result<(), Error>; + + async fn wait(&self, upload_id: UploadId) -> Result; + + async fn claim(&self, upload_id: UploadId) -> Result<(), Error>; + + async fn complete(&self, upload_id: UploadId, result: UploadResult) -> Result<(), Error>; +} + +#[async_trait::async_trait(?Send)] +pub(crate) trait QueueRepo: BaseRepo { + async fn requeue_in_progress(&self, worker_prefix: Vec) -> Result<(), Error>; + + async fn push(&self, queue: &'static str, job: Self::Bytes) -> Result<(), Error>; + + async fn pop(&self, queue: &'static str, worker_id: Vec) -> Result; +} + +#[async_trait::async_trait(?Send)] +pub(crate) trait SettingsRepo: BaseRepo { + async fn set(&self, key: &'static str, value: Self::Bytes) -> Result<(), Error>; + async fn get(&self, key: &'static str) -> Result, Error>; + async fn remove(&self, key: &'static str) -> Result<(), Error>; +} + +#[async_trait::async_trait(?Send)] +pub(crate) trait IdentifierRepo: BaseRepo { + async fn relate_details( + &self, + identifier: &I, + details: &Details, + ) -> Result<(), Error>; + async fn details(&self, identifier: &I) -> Result, Error>; + + async fn cleanup(&self, identifier: &I) -> Result<(), Error>; +} + +#[async_trait::async_trait(?Send)] +pub(crate) trait HashRepo: BaseRepo { + type Stream: Stream>; + + async fn hashes(&self) -> Self::Stream; + + async fn create(&self, hash: Self::Bytes) -> Result, Error>; + + async fn relate_alias(&self, hash: Self::Bytes, alias: &Alias) -> Result<(), Error>; + async fn remove_alias(&self, hash: Self::Bytes, alias: &Alias) -> Result<(), Error>; + async fn aliases(&self, hash: Self::Bytes) -> Result, Error>; + + async fn relate_identifier( + &self, + hash: Self::Bytes, + identifier: &I, + ) -> Result<(), Error>; + async fn identifier(&self, hash: Self::Bytes) -> Result; + + async fn relate_variant_identifier( + &self, + hash: Self::Bytes, + variant: String, + identifier: &I, + ) -> Result<(), Error>; + async fn variant_identifier( + &self, + hash: Self::Bytes, + variant: String, + ) -> Result, Error>; + async fn variants( + &self, + hash: Self::Bytes, + ) -> Result, Error>; + + async fn relate_motion_identifier( + &self, + hash: Self::Bytes, + identifier: &I, + ) -> Result<(), Error>; + async fn motion_identifier( + &self, + hash: Self::Bytes, + ) -> Result, Error>; + + async fn cleanup(&self, hash: Self::Bytes) -> Result<(), Error>; +} + +#[async_trait::async_trait(?Send)] +pub(crate) trait AliasRepo: BaseRepo { + async fn create(&self, alias: &Alias) -> Result, Error>; + + async fn relate_delete_token( + &self, + alias: &Alias, + delete_token: &DeleteToken, + ) -> Result, Error>; + async fn delete_token(&self, alias: &Alias) -> Result; + + async fn relate_hash(&self, alias: &Alias, hash: Self::Bytes) -> Result<(), Error>; + async fn hash(&self, alias: &Alias) -> Result; + + async fn cleanup(&self, alias: &Alias) -> Result<(), Error>; +} + +impl Repo { + pub(crate) fn open(config: config::Repo) -> color_eyre::Result { + match config { + config::Repo::Sled(config::Sled { + mut path, + cache_capacity, + }) => { + path.push("v0.4.0-alpha.1"); + + let db = ::sled::Config::new() + .cache_capacity(cache_capacity) + .path(path) + .open()?; + + Ok(Self::Sled(self::sled::SledRepo::new(db)?)) + } + } + } + + #[tracing::instrument(skip_all)] + pub(crate) async fn from_db(&self, path: PathBuf) -> color_eyre::Result<()> { + if self.has_migrated().await? { + return Ok(()); + } + + let old = self::old::Old::open(path)?; + + for hash in old.hashes() { + match self { + Self::Sled(repo) => { + if let Err(e) = migrate_hash(repo, &old, hash).await { + tracing::error!("Failed to migrate hash: {}", e); + } + } + } + } + + self.mark_migrated().await?; + + Ok(()) + } + + async fn has_migrated(&self) -> color_eyre::Result { + match self { + Self::Sled(repo) => Ok(repo.get(REPO_MIGRATION_O1).await?.is_some()), + } + } + + async fn mark_migrated(&self) -> color_eyre::Result<()> { + match self { + Self::Sled(repo) => { + repo.set(REPO_MIGRATION_O1, b"1".to_vec().into()).await?; + } + } + + Ok(()) + } +} + +const REPO_MIGRATION_O1: &str = "repo-migration-01"; +const STORE_MIGRATION_PROGRESS: &str = "store-migration-progress"; +const GENERATOR_KEY: &str = "last-path"; + +async fn migrate_hash(repo: &T, old: &old::Old, hash: ::sled::IVec) -> color_eyre::Result<()> +where + T: IdentifierRepo + HashRepo + AliasRepo + SettingsRepo, +{ + if HashRepo::create(repo, hash.to_vec().into()).await?.is_err() { + debug!("Duplicate hash detected"); + return Ok(()); + } + + let main_ident = old.main_identifier(&hash)?.to_vec(); + + repo.relate_identifier(hash.to_vec().into(), &main_ident) + .await?; + + for alias in old.aliases(&hash) { + if let Ok(Ok(())) = AliasRepo::create(repo, &alias).await { + let _ = repo.relate_alias(hash.to_vec().into(), &alias).await; + let _ = repo.relate_hash(&alias, hash.to_vec().into()).await; + + if let Ok(Some(delete_token)) = old.delete_token(&alias) { + let _ = repo.relate_delete_token(&alias, &delete_token).await; + } + } + } + + if let Ok(Some(identifier)) = old.motion_identifier(&hash) { + let _ = repo + .relate_motion_identifier(hash.to_vec().into(), &identifier.to_vec()) + .await; + } + + for (variant_path, identifier) in old.variants(&hash)? { + let variant = variant_path.to_string_lossy().to_string(); + + let _ = repo + .relate_variant_identifier(hash.to_vec().into(), variant, &identifier.to_vec()) + .await; + } + + for (identifier, details) in old.details(&hash)? { + let _ = repo.relate_details(&identifier.to_vec(), &details).await; + } + + if let Ok(Some(value)) = old.setting(STORE_MIGRATION_PROGRESS.as_bytes()) { + repo.set(STORE_MIGRATION_PROGRESS, value.to_vec().into()) + .await?; + } + + if let Ok(Some(value)) = old.setting(GENERATOR_KEY.as_bytes()) { + repo.set(GENERATOR_KEY, value.to_vec().into()).await?; + } + + Ok(()) +} + +impl MaybeUuid { + fn from_str(s: &str) -> Self { + if let Ok(uuid) = Uuid::parse_str(s) { + MaybeUuid::Uuid(uuid) + } else { + MaybeUuid::Name(s.into()) + } + } + + fn as_bytes(&self) -> &[u8] { + match self { + Self::Uuid(uuid) => &uuid.as_bytes()[..], + Self::Name(name) => name.as_bytes(), + } + } +} + +fn split_at_dot(s: &str) -> Option<(&str, &str)> { + let index = s.find('.')?; + + Some(s.split_at(index)) +} + +impl Alias { + pub(crate) fn generate(extension: String) -> Self { + Alias { + id: MaybeUuid::Uuid(Uuid::new_v4()), + extension: Some(extension), + } + } + + pub(crate) fn from_existing(alias: &str) -> Self { + if let Some((start, end)) = split_at_dot(alias) { + Alias { + id: MaybeUuid::from_str(start), + extension: Some(end.into()), + } + } else { + Alias { + id: MaybeUuid::from_str(alias), + extension: None, + } + } + } + + pub(crate) fn extension(&self) -> Option<&str> { + self.extension.as_deref() + } + + fn to_bytes(&self) -> Vec { + let mut v = self.id.as_bytes().to_vec(); + + if let Some(ext) = self.extension() { + v.extend_from_slice(ext.as_bytes()); + } + + v + } + + fn from_slice(bytes: &[u8]) -> Option { + if let Ok(s) = std::str::from_utf8(bytes) { + Some(Self::from_existing(s)) + } else if bytes.len() >= 16 { + let id = Uuid::from_slice(&bytes[0..16]).expect("Already checked length"); + + let extension = if bytes.len() > 16 { + Some(String::from_utf8_lossy(&bytes[16..]).to_string()) + } else { + None + }; + + Some(Self { + id: MaybeUuid::Uuid(id), + extension, + }) + } else { + None + } + } +} + +impl DeleteToken { + pub(crate) fn from_existing(existing: &str) -> Self { + if let Ok(uuid) = Uuid::parse_str(existing) { + DeleteToken { + id: MaybeUuid::Uuid(uuid), + } + } else { + DeleteToken { + id: MaybeUuid::Name(existing.into()), + } + } + } + + pub(crate) fn generate() -> Self { + Self { + id: MaybeUuid::Uuid(Uuid::new_v4()), + } + } + + fn to_bytes(&self) -> Vec { + self.id.as_bytes().to_vec() + } + + fn from_slice(bytes: &[u8]) -> Option { + if let Ok(s) = std::str::from_utf8(bytes) { + Some(DeleteToken::from_existing(s)) + } else if bytes.len() == 16 { + Some(DeleteToken { + id: MaybeUuid::Uuid(Uuid::from_slice(bytes).ok()?), + }) + } else { + None + } + } +} + +impl UploadId { + pub(crate) fn generate() -> Self { + Self { id: Uuid::new_v4() } + } + + pub(crate) fn as_bytes(&self) -> &[u8] { + &self.id.as_bytes()[..] + } +} + +impl std::str::FromStr for UploadId { + type Err = ::Err; + + fn from_str(s: &str) -> Result { + Ok(UploadId { id: s.parse()? }) + } +} + +impl std::fmt::Display for UploadId { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + std::fmt::Display::fmt(&self.id, f) + } +} + +impl std::fmt::Display for MaybeUuid { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Uuid(id) => write!(f, "{}", id), + Self::Name(name) => write!(f, "{}", name), + } + } +} + +impl std::str::FromStr for DeleteToken { + type Err = std::convert::Infallible; + + fn from_str(s: &str) -> Result { + Ok(DeleteToken::from_existing(s)) + } +} + +impl std::fmt::Display for DeleteToken { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.id) + } +} + +impl std::str::FromStr for Alias { + type Err = std::convert::Infallible; + + fn from_str(s: &str) -> Result { + Ok(Alias::from_existing(s)) + } +} + +impl std::fmt::Display for Alias { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + if let Some(ext) = self.extension() { + write!(f, "{}{}", self.id, ext) + } else { + write!(f, "{}", self.id) + } + } +} + +impl Identifier for Vec { + fn from_bytes(bytes: Vec) -> Result + where + Self: Sized, + { + Ok(bytes) + } + + fn to_bytes(&self) -> Result, Error> { + Ok(self.clone()) + } +} + +#[cfg(test)] +mod tests { + use super::{Alias, DeleteToken, MaybeUuid, Uuid}; + + #[test] + fn string_delete_token() { + let delete_token = DeleteToken::from_existing("blah"); + + assert_eq!( + delete_token, + DeleteToken { + id: MaybeUuid::Name(String::from("blah")) + } + ) + } + + #[test] + fn uuid_string_delete_token() { + let uuid = Uuid::new_v4(); + + let delete_token = DeleteToken::from_existing(&uuid.to_string()); + + assert_eq!( + delete_token, + DeleteToken { + id: MaybeUuid::Uuid(uuid), + } + ) + } + + #[test] + fn bytes_delete_token() { + let delete_token = DeleteToken::from_slice(b"blah").unwrap(); + + assert_eq!( + delete_token, + DeleteToken { + id: MaybeUuid::Name(String::from("blah")) + } + ) + } + + #[test] + fn uuid_bytes_delete_token() { + let uuid = Uuid::new_v4(); + + let delete_token = DeleteToken::from_slice(&uuid.as_bytes()[..]).unwrap(); + + assert_eq!( + delete_token, + DeleteToken { + id: MaybeUuid::Uuid(uuid), + } + ) + } + + #[test] + fn uuid_bytes_string_delete_token() { + let uuid = Uuid::new_v4(); + + let delete_token = DeleteToken::from_slice(uuid.to_string().as_bytes()).unwrap(); + + assert_eq!( + delete_token, + DeleteToken { + id: MaybeUuid::Uuid(uuid), + } + ) + } + + #[test] + fn string_alias() { + let alias = Alias::from_existing("blah"); + + assert_eq!( + alias, + Alias { + id: MaybeUuid::Name(String::from("blah")), + extension: None + } + ); + } + + #[test] + fn string_alias_ext() { + let alias = Alias::from_existing("blah.mp4"); + + assert_eq!( + alias, + Alias { + id: MaybeUuid::Name(String::from("blah")), + extension: Some(String::from(".mp4")), + } + ); + } + + #[test] + fn uuid_string_alias() { + let uuid = Uuid::new_v4(); + + let alias = Alias::from_existing(&uuid.to_string()); + + assert_eq!( + alias, + Alias { + id: MaybeUuid::Uuid(uuid), + extension: None, + } + ) + } + + #[test] + fn uuid_string_alias_ext() { + let uuid = Uuid::new_v4(); + + let alias_str = format!("{}.mp4", uuid); + let alias = Alias::from_existing(&alias_str); + + assert_eq!( + alias, + Alias { + id: MaybeUuid::Uuid(uuid), + extension: Some(String::from(".mp4")), + } + ) + } + + #[test] + fn bytes_alias() { + let alias = Alias::from_slice(b"blah").unwrap(); + + assert_eq!( + alias, + Alias { + id: MaybeUuid::Name(String::from("blah")), + extension: None + } + ); + } + + #[test] + fn bytes_alias_ext() { + let alias = Alias::from_slice(b"blah.mp4").unwrap(); + + assert_eq!( + alias, + Alias { + id: MaybeUuid::Name(String::from("blah")), + extension: Some(String::from(".mp4")), + } + ); + } + + #[test] + fn uuid_bytes_alias() { + let uuid = Uuid::new_v4(); + + let alias = Alias::from_slice(&uuid.as_bytes()[..]).unwrap(); + + assert_eq!( + alias, + Alias { + id: MaybeUuid::Uuid(uuid), + extension: None, + } + ) + } + + #[test] + fn uuid_bytes_string_alias() { + let uuid = Uuid::new_v4(); + + let alias = Alias::from_slice(uuid.to_string().as_bytes()).unwrap(); + + assert_eq!( + alias, + Alias { + id: MaybeUuid::Uuid(uuid), + extension: None, + } + ) + } + + #[test] + fn uuid_bytes_alias_ext() { + let uuid = Uuid::new_v4(); + + let mut alias_bytes = uuid.as_bytes().to_vec(); + alias_bytes.extend_from_slice(b".mp4"); + + let alias = Alias::from_slice(&alias_bytes).unwrap(); + + assert_eq!( + alias, + Alias { + id: MaybeUuid::Uuid(uuid), + extension: Some(String::from(".mp4")), + } + ) + } + + #[test] + fn uuid_bytes_string_alias_ext() { + let uuid = Uuid::new_v4(); + + let alias_str = format!("{}.mp4", uuid); + let alias = Alias::from_slice(alias_str.as_bytes()).unwrap(); + + assert_eq!( + alias, + Alias { + id: MaybeUuid::Uuid(uuid), + extension: Some(String::from(".mp4")), + } + ) + } +} diff --git a/src/repo/old.rs b/src/repo/old.rs new file mode 100644 index 0000000..c555a17 --- /dev/null +++ b/src/repo/old.rs @@ -0,0 +1,184 @@ +// TREE STRUCTURE +// - Alias Tree +// - alias -> hash +// - alias / id -> u64(id) +// - alias / delete -> delete token +// - Main Tree +// - hash -> filename +// - hash 0 u64(id) -> alias +// - Filename Tree +// - filename -> hash +// - Details Tree +// - filename / S::Identifier -> details +// - Identifier Tree +// - filename -> S::Identifier +// - filename / variant path -> S::Identifier +// - filename / motion -> S::Identifier +// - Settings Tree +// - store-migration-progress -> Path Tree Key + +use super::{Alias, DeleteToken, Details}; +use std::path::PathBuf; + +mod migrate; + +#[derive(Debug)] +struct OldDbError(&'static str); + +impl std::fmt::Display for OldDbError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.0) + } +} + +impl std::error::Error for OldDbError {} + +pub(super) struct Old { + alias_tree: ::sled::Tree, + filename_tree: ::sled::Tree, + main_tree: ::sled::Tree, + details_tree: ::sled::Tree, + settings_tree: ::sled::Tree, + identifier_tree: ::sled::Tree, + _db: ::sled::Db, +} + +impl Old { + pub(super) fn open(path: PathBuf) -> color_eyre::Result { + let db = migrate::LatestDb::exists(path).migrate()?; + + Ok(Self { + alias_tree: db.open_tree("alias")?, + filename_tree: db.open_tree("filename")?, + main_tree: db.open_tree("main")?, + details_tree: db.open_tree("details")?, + settings_tree: db.open_tree("settings")?, + identifier_tree: db.open_tree("path")?, + _db: db, + }) + } + + pub(super) fn setting(&self, key: &[u8]) -> color_eyre::Result> { + Ok(self.settings_tree.get(key)?) + } + + pub(super) fn hashes(&self) -> impl std::iter::Iterator { + self.filename_tree + .iter() + .values() + .filter_map(|res| res.ok()) + } + + pub(super) fn details( + &self, + hash: &sled::IVec, + ) -> color_eyre::Result> { + let filename = self + .main_tree + .get(hash)? + .ok_or(OldDbError("Missing filename"))?; + + let filename = String::from_utf8_lossy(&filename); + + Ok(self + .identifier_tree + .scan_prefix(filename.as_bytes()) + .values() + .filter_map(Result::ok) + .filter_map(|identifier| { + let mut key = filename.as_bytes().to_vec(); + key.push(b'/'); + key.extend_from_slice(&identifier); + + let details = self.details_tree.get(key).ok()??; + let details = serde_json::from_slice(&details).ok()?; + + Some((identifier, details)) + }) + .collect()) + } + + pub(super) fn main_identifier(&self, hash: &sled::IVec) -> color_eyre::Result { + let filename = self + .main_tree + .get(hash)? + .ok_or(OldDbError("Missing filename"))?; + + Ok(self + .identifier_tree + .get(filename)? + .ok_or(OldDbError("Missing identifier"))?) + } + + pub(super) fn variants( + &self, + hash: &sled::IVec, + ) -> color_eyre::Result> { + let filename = self + .main_tree + .get(hash)? + .ok_or(OldDbError("Missing filename"))?; + + let filename_string = String::from_utf8_lossy(&filename); + + let variant_prefix = format!("{}/", filename_string); + + Ok(self + .identifier_tree + .scan_prefix(&variant_prefix) + .filter_map(|res| res.ok()) + .filter_map(|(key, value)| { + let variant_path_bytes = &key[variant_prefix.as_bytes().len()..]; + if variant_path_bytes == b"motion" { + return None; + } + + let path = String::from_utf8(variant_path_bytes.to_vec()).ok()?; + let mut path = PathBuf::from(path); + let extension = path.extension()?.to_str()?.to_string(); + path.pop(); + path.push(extension); + + Some((path, value)) + }) + .collect()) + } + + pub(super) fn motion_identifier( + &self, + hash: &sled::IVec, + ) -> color_eyre::Result> { + let filename = self + .main_tree + .get(hash)? + .ok_or(OldDbError("Missing filename"))?; + + let filename_string = String::from_utf8_lossy(&filename); + + let motion_key = format!("{}/motion", filename_string); + + Ok(self.filename_tree.get(motion_key)?) + } + + pub(super) fn aliases(&self, hash: &sled::IVec) -> Vec { + let mut key = hash.to_vec(); + key.push(0); + + self.main_tree + .scan_prefix(key) + .values() + .filter_map(|res| res.ok()) + .filter_map(|alias| Alias::from_slice(&alias)) + .collect() + } + + pub(super) fn delete_token(&self, alias: &Alias) -> color_eyre::Result> { + let key = format!("{}/delete", alias); + + if let Some(ivec) = self.alias_tree.get(key)? { + return Ok(DeleteToken::from_slice(&ivec)); + } + + Ok(None) + } +} diff --git a/src/repo/old/migrate.rs b/src/repo/old/migrate.rs new file mode 100644 index 0000000..5548340 --- /dev/null +++ b/src/repo/old/migrate.rs @@ -0,0 +1,97 @@ +use crate::Error; +use std::path::PathBuf; + +mod s034; + +type SledIter = Box, Vec), Error>>>; + +trait SledDb { + type SledTree: SledTree; + + fn open_tree(&self, name: &str) -> Result; + + fn self_tree(&self) -> &Self::SledTree; +} + +impl SledDb for &T +where + T: SledDb, +{ + type SledTree = T::SledTree; + + fn open_tree(&self, name: &str) -> Result { + (*self).open_tree(name) + } + + fn self_tree(&self) -> &Self::SledTree { + (*self).self_tree() + } +} + +trait SledTree { + fn get(&self, key: K) -> Result>, Error> + where + K: AsRef<[u8]>; + + fn insert(&self, key: K, value: V) -> Result<(), Error> + where + K: AsRef<[u8]>, + V: AsRef<[u8]>; + + fn iter(&self) -> SledIter; + + fn range(&self, range: R) -> SledIter + where + K: AsRef<[u8]>, + R: std::ops::RangeBounds; + + fn flush(&self) -> Result<(), Error>; +} + +pub(crate) struct LatestDb { + root_dir: PathBuf, + version: DbVersion, +} + +impl LatestDb { + pub(crate) fn exists(root_dir: PathBuf) -> Self { + let version = DbVersion::exists(root_dir.clone()); + + LatestDb { root_dir, version } + } + + pub(crate) fn migrate(self) -> Result { + let LatestDb { root_dir, version } = self; + + loop { + let root_dir2 = root_dir.clone(); + let res = std::panic::catch_unwind(move || version.migrate(root_dir2)); + + if let Ok(res) = res { + return res; + } + } + } +} + +#[derive(Clone, Copy)] +enum DbVersion { + Sled034, + Fresh, +} + +impl DbVersion { + fn exists(root: PathBuf) -> Self { + if s034::exists(root.clone()) && !s034::migrating(root) { + return DbVersion::Sled034; + } + + DbVersion::Fresh + } + + fn migrate(self, root: PathBuf) -> Result { + match self { + DbVersion::Sled034 | DbVersion::Fresh => s034::open(root), + } + } +} diff --git a/src/migrate/s034.rs b/src/repo/old/migrate/s034.rs similarity index 66% rename from src/migrate/s034.rs rename to src/repo/old/migrate/s034.rs index 5638b46..16340ed 100644 --- a/src/migrate/s034.rs +++ b/src/repo/old/migrate/s034.rs @@ -1,6 +1,6 @@ use crate::{ - migrate::{SledDb, SledIter, SledTree}, - UploadError, + error::Error, + repo::old::migrate::{SledDb, SledIter, SledTree}, }; use sled as sled034; use std::path::PathBuf; @@ -14,8 +14,8 @@ pub(crate) fn exists(mut base: PathBuf) -> bool { std::fs::metadata(base).is_ok() } -pub(crate) fn migrating(base: PathBuf, cache_capacity: u64) -> bool { - if let Ok(db) = open(base, cache_capacity) { +pub(crate) fn migrating(base: PathBuf) -> bool { + if let Ok(db) = open(base) { if let Ok(tree) = db.open_tree("migrate") { if let Ok(Some(_)) = tree.get("done") { return false; @@ -26,12 +26,12 @@ pub(crate) fn migrating(base: PathBuf, cache_capacity: u64) -> bool { true } -pub(crate) fn open(mut base: PathBuf, cache_capacity: u64) -> Result { +pub(crate) fn open(mut base: PathBuf) -> Result { base.push("sled"); base.push(SLED_034); let db = sled034::Config::default() - .cache_capacity(cache_capacity) + .cache_capacity(1024 * 1024 * 64) .path(base) .open()?; @@ -41,7 +41,7 @@ pub(crate) fn open(mut base: PathBuf, cache_capacity: u64) -> Result Result { + fn open_tree(&self, name: &str) -> Result { Ok(sled034::Db::open_tree(self, name)?) } @@ -51,14 +51,14 @@ impl SledDb for sled034::Db { } impl SledTree for sled034::Tree { - fn get(&self, key: K) -> Result>, UploadError> + fn get(&self, key: K) -> Result>, Error> where K: AsRef<[u8]>, { Ok(sled034::Tree::get(self, key)?.map(|v| Vec::from(v.as_ref()))) } - fn insert(&self, key: K, value: V) -> Result<(), UploadError> + fn insert(&self, key: K, value: V) -> Result<(), Error> where K: AsRef<[u8]>, V: AsRef<[u8]>, @@ -69,7 +69,7 @@ impl SledTree for sled034::Tree { fn iter(&self) -> SledIter { Box::new(sled034::Tree::iter(self).map(|res| { res.map(|(k, v)| (k.as_ref().to_vec(), v.as_ref().to_vec())) - .map_err(UploadError::from) + .map_err(Error::from) })) } @@ -80,13 +80,11 @@ impl SledTree for sled034::Tree { { Box::new(sled034::Tree::range(self, range).map(|res| { res.map(|(k, v)| (k.as_ref().to_vec(), v.as_ref().to_vec())) - .map_err(UploadError::from) + .map_err(Error::from) })) } - fn flush(&self) -> Result<(), UploadError> { - sled034::Tree::flush(self) - .map(|_| ()) - .map_err(UploadError::from) + fn flush(&self) -> Result<(), Error> { + sled034::Tree::flush(self).map(|_| ()).map_err(Error::from) } } diff --git a/src/repo/sled.rs b/src/repo/sled.rs new file mode 100644 index 0000000..7955309 --- /dev/null +++ b/src/repo/sled.rs @@ -0,0 +1,701 @@ +use crate::{ + error::{Error, UploadError}, + repo::{ + Alias, AliasRepo, AlreadyExists, BaseRepo, DeleteToken, Details, FullRepo, HashRepo, + Identifier, IdentifierRepo, QueueRepo, SettingsRepo, UploadId, UploadRepo, UploadResult, + }, + serde_str::Serde, + stream::from_iterator, +}; +use futures_util::Stream; +use sled::{Db, IVec, Tree}; +use std::{ + collections::HashMap, + pin::Pin, + sync::{Arc, RwLock}, +}; +use tokio::sync::Notify; + +macro_rules! b { + ($self:ident.$ident:ident, $expr:expr) => {{ + let $ident = $self.$ident.clone(); + + actix_rt::task::spawn_blocking(move || $expr) + .await + .map_err(SledError::from)?? + }}; +} + +#[derive(Debug, thiserror::Error)] +pub(crate) enum SledError { + #[error("Error in database")] + Sled(#[from] sled::Error), + + #[error("Invalid details json")] + Details(#[from] serde_json::Error), + + #[error("Required field was not present")] + Missing, + + #[error("Operation panicked")] + Panic, +} + +#[derive(Clone)] +pub(crate) struct SledRepo { + settings: Tree, + identifier_details: Tree, + hashes: Tree, + hash_aliases: Tree, + hash_identifiers: Tree, + hash_variant_identifiers: Tree, + hash_motion_identifiers: Tree, + aliases: Tree, + alias_hashes: Tree, + alias_delete_tokens: Tree, + queue: Tree, + in_progress_queue: Tree, + queue_notifier: Arc>>>, + uploads: Tree, + db: Db, +} + +impl SledRepo { + pub(crate) fn new(db: Db) -> Result { + Ok(SledRepo { + settings: db.open_tree("pict-rs-settings-tree")?, + identifier_details: db.open_tree("pict-rs-identifier-details-tree")?, + hashes: db.open_tree("pict-rs-hashes-tree")?, + hash_aliases: db.open_tree("pict-rs-hash-aliases-tree")?, + hash_identifiers: db.open_tree("pict-rs-hash-identifiers-tree")?, + hash_variant_identifiers: db.open_tree("pict-rs-hash-variant-identifiers-tree")?, + hash_motion_identifiers: db.open_tree("pict-rs-hash-motion-identifiers-tree")?, + aliases: db.open_tree("pict-rs-aliases-tree")?, + alias_hashes: db.open_tree("pict-rs-alias-hashes-tree")?, + alias_delete_tokens: db.open_tree("pict-rs-alias-delete-tokens-tree")?, + queue: db.open_tree("pict-rs-queue-tree")?, + in_progress_queue: db.open_tree("pict-rs-in-progress-queue-tree")?, + queue_notifier: Arc::new(RwLock::new(HashMap::new())), + uploads: db.open_tree("pict-rs-uploads-tree")?, + db, + }) + } +} + +impl BaseRepo for SledRepo { + type Bytes = IVec; +} + +impl FullRepo for SledRepo {} + +#[derive(serde::Deserialize, serde::Serialize)] +enum InnerUploadResult { + Success { + alias: Serde, + token: Serde, + }, + Failure { + message: String, + }, +} + +impl From for InnerUploadResult { + fn from(u: UploadResult) -> Self { + match u { + UploadResult::Success { alias, token } => InnerUploadResult::Success { + alias: Serde::new(alias), + token: Serde::new(token), + }, + UploadResult::Failure { message } => InnerUploadResult::Failure { message }, + } + } +} + +impl From for UploadResult { + fn from(i: InnerUploadResult) -> Self { + match i { + InnerUploadResult::Success { alias, token } => UploadResult::Success { + alias: Serde::into_inner(alias), + token: Serde::into_inner(token), + }, + InnerUploadResult::Failure { message } => UploadResult::Failure { message }, + } + } +} + +#[async_trait::async_trait(?Send)] +impl UploadRepo for SledRepo { + async fn create(&self, upload_id: UploadId) -> Result<(), Error> { + b!(self.uploads, uploads.insert(upload_id.as_bytes(), b"1")); + Ok(()) + } + + async fn wait(&self, upload_id: UploadId) -> Result { + let mut subscriber = self.uploads.watch_prefix(upload_id.as_bytes()); + + let bytes = upload_id.as_bytes().to_vec(); + let opt = b!(self.uploads, uploads.get(bytes)); + + if let Some(bytes) = opt { + if bytes != b"1" { + let result: InnerUploadResult = serde_json::from_slice(&bytes)?; + return Ok(result.into()); + } + } else { + return Err(UploadError::NoFiles.into()); + } + + while let Some(event) = (&mut subscriber).await { + match event { + sled::Event::Remove { .. } => { + return Err(UploadError::NoFiles.into()); + } + sled::Event::Insert { value, .. } => { + if value != b"1" { + let result: InnerUploadResult = serde_json::from_slice(&value)?; + return Ok(result.into()); + } + } + } + } + + Err(UploadError::Canceled.into()) + } + + async fn claim(&self, upload_id: UploadId) -> Result<(), Error> { + b!(self.uploads, uploads.remove(upload_id.as_bytes())); + Ok(()) + } + + async fn complete(&self, upload_id: UploadId, result: UploadResult) -> Result<(), Error> { + let result: InnerUploadResult = result.into(); + let result = serde_json::to_vec(&result)?; + + b!(self.uploads, uploads.insert(upload_id.as_bytes(), result)); + + Ok(()) + } +} + +#[async_trait::async_trait(?Send)] +impl QueueRepo for SledRepo { + #[tracing::instrument(skip_all, fields(worker_id = %String::from_utf8_lossy(&worker_prefix)))] + async fn requeue_in_progress(&self, worker_prefix: Vec) -> Result<(), Error> { + let vec: Vec<(String, IVec)> = b!(self.in_progress_queue, { + let vec = in_progress_queue + .scan_prefix(worker_prefix) + .values() + .filter_map(Result::ok) + .filter_map(|ivec| { + let index = ivec.as_ref().iter().enumerate().find_map(|(index, byte)| { + if *byte == 0 { + Some(index) + } else { + None + } + })?; + + let (queue, job) = ivec.split_at(index); + if queue.is_empty() || job.len() <= 1 { + return None; + } + let job = &job[1..]; + + Some((String::from_utf8_lossy(queue).to_string(), IVec::from(job))) + }) + .collect::>(); + + Ok(vec) as Result<_, Error> + }); + + let db = self.db.clone(); + b!(self.queue, { + for (queue_name, job) in vec { + let id = db.generate_id()?; + let mut key = queue_name.as_bytes().to_vec(); + key.extend(id.to_be_bytes()); + + queue.insert(key, job)?; + } + + Ok(()) as Result<(), Error> + }); + + Ok(()) + } + + #[tracing::instrument(skip(self, job), fields(worker_id = %String::from_utf8_lossy(&job)))] + async fn push(&self, queue_name: &'static str, job: Self::Bytes) -> Result<(), Error> { + let id = self.db.generate_id()?; + let mut key = queue_name.as_bytes().to_vec(); + key.extend(id.to_be_bytes()); + + b!(self.queue, queue.insert(key, job)); + + if let Some(notifier) = self.queue_notifier.read().unwrap().get(&queue_name) { + notifier.notify_one(); + return Ok(()); + } + + self.queue_notifier + .write() + .unwrap() + .entry(queue_name) + .or_insert_with(|| Arc::new(Notify::new())) + .notify_one(); + + Ok(()) + } + + #[tracing::instrument(skip(self, worker_id), fields(worker_id = %String::from_utf8_lossy(&worker_id)))] + async fn pop( + &self, + queue_name: &'static str, + worker_id: Vec, + ) -> Result { + loop { + let in_progress_queue = self.in_progress_queue.clone(); + + let worker_id = worker_id.clone(); + let job = b!(self.queue, { + in_progress_queue.remove(&worker_id)?; + + while let Some((key, job)) = queue + .scan_prefix(queue_name.as_bytes()) + .find_map(Result::ok) + { + let mut in_progress_value = queue_name.as_bytes().to_vec(); + in_progress_value.push(0); + in_progress_value.extend_from_slice(&job); + + in_progress_queue.insert(&worker_id, in_progress_value)?; + + if queue.remove(key)?.is_some() { + return Ok(Some(job)); + } + + in_progress_queue.remove(&worker_id)?; + } + + Ok(None) as Result<_, SledError> + }); + + if let Some(job) = job { + return Ok(job); + } + + let opt = self + .queue_notifier + .read() + .unwrap() + .get(&queue_name) + .map(Arc::clone); + + let notify = if let Some(notify) = opt { + notify + } else { + let mut guard = self.queue_notifier.write().unwrap(); + let entry = guard + .entry(queue_name) + .or_insert_with(|| Arc::new(Notify::new())); + Arc::clone(entry) + }; + + notify.notified().await + } + } +} + +#[async_trait::async_trait(?Send)] +impl SettingsRepo for SledRepo { + #[tracing::instrument(skip(value))] + async fn set(&self, key: &'static str, value: Self::Bytes) -> Result<(), Error> { + b!(self.settings, settings.insert(key, value)); + + Ok(()) + } + + #[tracing::instrument] + async fn get(&self, key: &'static str) -> Result, Error> { + let opt = b!(self.settings, settings.get(key)); + + Ok(opt) + } + + #[tracing::instrument] + async fn remove(&self, key: &'static str) -> Result<(), Error> { + b!(self.settings, settings.remove(key)); + + Ok(()) + } +} + +fn variant_key(hash: &[u8], variant: &str) -> Vec { + let mut bytes = hash.to_vec(); + bytes.push(b'/'); + bytes.extend_from_slice(variant.as_bytes()); + bytes +} + +fn variant_from_key(hash: &[u8], key: &[u8]) -> Option { + let prefix_len = hash.len() + 1; + let variant_bytes = key.get(prefix_len..)?.to_vec(); + String::from_utf8(variant_bytes).ok() +} + +#[async_trait::async_trait(?Send)] +impl IdentifierRepo for SledRepo { + #[tracing::instrument] + async fn relate_details( + &self, + identifier: &I, + details: &Details, + ) -> Result<(), Error> { + let key = identifier.to_bytes()?; + let details = serde_json::to_vec(&details)?; + + b!( + self.identifier_details, + identifier_details.insert(key, details) + ); + + Ok(()) + } + + #[tracing::instrument] + async fn details(&self, identifier: &I) -> Result, Error> { + let key = identifier.to_bytes()?; + + let opt = b!(self.identifier_details, identifier_details.get(key)); + + if let Some(ivec) = opt { + Ok(Some(serde_json::from_slice(&ivec)?)) + } else { + Ok(None) + } + } + + #[tracing::instrument] + async fn cleanup(&self, identifier: &I) -> Result<(), Error> { + let key = identifier.to_bytes()?; + + b!(self.identifier_details, identifier_details.remove(key)); + + Ok(()) + } +} + +type StreamItem = Result; +type LocalBoxStream<'a, T> = Pin + 'a>>; + +fn hash_alias_key(hash: &IVec, alias: &Alias) -> Vec { + let mut v = hash.to_vec(); + v.append(&mut alias.to_bytes()); + v +} + +#[async_trait::async_trait(?Send)] +impl HashRepo for SledRepo { + type Stream = LocalBoxStream<'static, StreamItem>; + + async fn hashes(&self) -> Self::Stream { + let iter = self + .hashes + .iter() + .keys() + .map(|res| res.map_err(Error::from)); + + Box::pin(from_iterator(iter, 8)) + } + + #[tracing::instrument] + async fn create(&self, hash: Self::Bytes) -> Result, Error> { + let res = b!(self.hashes, { + let hash2 = hash.clone(); + hashes.compare_and_swap(hash, None as Option, Some(hash2)) + }); + + Ok(res.map_err(|_| AlreadyExists)) + } + + #[tracing::instrument] + async fn relate_alias(&self, hash: Self::Bytes, alias: &Alias) -> Result<(), Error> { + let key = hash_alias_key(&hash, alias); + let value = alias.to_bytes(); + + b!(self.hash_aliases, hash_aliases.insert(key, value)); + + Ok(()) + } + + #[tracing::instrument] + async fn remove_alias(&self, hash: Self::Bytes, alias: &Alias) -> Result<(), Error> { + let key = hash_alias_key(&hash, alias); + + b!(self.hash_aliases, hash_aliases.remove(key)); + + Ok(()) + } + + #[tracing::instrument] + async fn aliases(&self, hash: Self::Bytes) -> Result, Error> { + let v = b!(self.hash_aliases, { + Ok(hash_aliases + .scan_prefix(hash) + .values() + .filter_map(Result::ok) + .filter_map(|ivec| Alias::from_slice(&ivec)) + .collect::>()) as Result<_, sled::Error> + }); + + Ok(v) + } + + #[tracing::instrument] + async fn relate_identifier( + &self, + hash: Self::Bytes, + identifier: &I, + ) -> Result<(), Error> { + let bytes = identifier.to_bytes()?; + + b!(self.hash_identifiers, hash_identifiers.insert(hash, bytes)); + + Ok(()) + } + + #[tracing::instrument] + async fn identifier(&self, hash: Self::Bytes) -> Result { + let opt = b!(self.hash_identifiers, hash_identifiers.get(hash)); + + opt.ok_or(SledError::Missing) + .map_err(Error::from) + .and_then(|ivec| I::from_bytes(ivec.to_vec())) + } + + #[tracing::instrument] + async fn relate_variant_identifier( + &self, + hash: Self::Bytes, + variant: String, + identifier: &I, + ) -> Result<(), Error> { + let key = variant_key(&hash, &variant); + let value = identifier.to_bytes()?; + + b!( + self.hash_variant_identifiers, + hash_variant_identifiers.insert(key, value) + ); + + Ok(()) + } + + #[tracing::instrument] + async fn variant_identifier( + &self, + hash: Self::Bytes, + variant: String, + ) -> Result, Error> { + let key = variant_key(&hash, &variant); + + let opt = b!( + self.hash_variant_identifiers, + hash_variant_identifiers.get(key) + ); + + if let Some(ivec) = opt { + Ok(Some(I::from_bytes(ivec.to_vec())?)) + } else { + Ok(None) + } + } + + #[tracing::instrument] + async fn variants( + &self, + hash: Self::Bytes, + ) -> Result, Error> { + let vec = b!( + self.hash_variant_identifiers, + Ok(hash_variant_identifiers + .scan_prefix(&hash) + .filter_map(|res| res.ok()) + .filter_map(|(key, ivec)| { + let identifier = I::from_bytes(ivec.to_vec()).ok()?; + let variant = variant_from_key(&hash, &key)?; + + Some((variant, identifier)) + }) + .collect::>()) as Result, sled::Error> + ); + + Ok(vec) + } + + #[tracing::instrument] + async fn relate_motion_identifier( + &self, + hash: Self::Bytes, + identifier: &I, + ) -> Result<(), Error> { + let bytes = identifier.to_bytes()?; + + b!( + self.hash_motion_identifiers, + hash_motion_identifiers.insert(hash, bytes) + ); + + Ok(()) + } + + #[tracing::instrument] + async fn motion_identifier( + &self, + hash: Self::Bytes, + ) -> Result, Error> { + let opt = b!( + self.hash_motion_identifiers, + hash_motion_identifiers.get(hash) + ); + + if let Some(ivec) = opt { + Ok(Some(I::from_bytes(ivec.to_vec())?)) + } else { + Ok(None) + } + } + + #[tracing::instrument] + async fn cleanup(&self, hash: Self::Bytes) -> Result<(), Error> { + let hash2 = hash.clone(); + b!(self.hashes, hashes.remove(hash2)); + + let hash2 = hash.clone(); + b!(self.hash_identifiers, hash_identifiers.remove(hash2)); + + let hash2 = hash.clone(); + b!( + self.hash_motion_identifiers, + hash_motion_identifiers.remove(hash2) + ); + + let aliases = self.aliases(hash.clone()).await?; + let hash2 = hash.clone(); + b!(self.hash_aliases, { + for alias in aliases { + let key = hash_alias_key(&hash2, &alias); + + let _ = hash_aliases.remove(key); + } + Ok(()) as Result<(), sled::Error> + }); + + let variant_keys = b!(self.hash_variant_identifiers, { + let v = hash_variant_identifiers + .scan_prefix(hash) + .keys() + .filter_map(Result::ok) + .collect::>(); + + Ok(v) as Result, sled::Error> + }); + b!(self.hash_variant_identifiers, { + for key in variant_keys { + let _ = hash_variant_identifiers.remove(key); + } + Ok(()) as Result<(), sled::Error> + }); + + Ok(()) + } +} + +#[async_trait::async_trait(?Send)] +impl AliasRepo for SledRepo { + #[tracing::instrument] + async fn create(&self, alias: &Alias) -> Result, Error> { + let bytes = alias.to_bytes(); + let bytes2 = bytes.clone(); + + let res = b!( + self.aliases, + aliases.compare_and_swap(bytes, None as Option, Some(bytes2)) + ); + + Ok(res.map_err(|_| AlreadyExists)) + } + + #[tracing::instrument] + async fn relate_delete_token( + &self, + alias: &Alias, + delete_token: &DeleteToken, + ) -> Result, Error> { + let key = alias.to_bytes(); + let token = delete_token.to_bytes(); + + let res = b!( + self.alias_delete_tokens, + alias_delete_tokens.compare_and_swap(key, None as Option, Some(token)) + ); + + Ok(res.map_err(|_| AlreadyExists)) + } + + #[tracing::instrument] + async fn delete_token(&self, alias: &Alias) -> Result { + let key = alias.to_bytes(); + + let opt = b!(self.alias_delete_tokens, alias_delete_tokens.get(key)); + + opt.and_then(|ivec| DeleteToken::from_slice(&ivec)) + .ok_or(SledError::Missing) + .map_err(Error::from) + } + + #[tracing::instrument] + async fn relate_hash(&self, alias: &Alias, hash: Self::Bytes) -> Result<(), Error> { + let key = alias.to_bytes(); + + b!(self.alias_hashes, alias_hashes.insert(key, hash)); + + Ok(()) + } + + #[tracing::instrument] + async fn hash(&self, alias: &Alias) -> Result { + let key = alias.to_bytes(); + + let opt = b!(self.alias_hashes, alias_hashes.get(key)); + + opt.ok_or(SledError::Missing).map_err(Error::from) + } + + #[tracing::instrument] + async fn cleanup(&self, alias: &Alias) -> Result<(), Error> { + let key = alias.to_bytes(); + + let key2 = key.clone(); + b!(self.aliases, aliases.remove(key2)); + + let key2 = key.clone(); + b!(self.alias_delete_tokens, alias_delete_tokens.remove(key2)); + + b!(self.alias_hashes, alias_hashes.remove(key)); + + Ok(()) + } +} + +impl std::fmt::Debug for SledRepo { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("SledRepo").finish() + } +} + +impl From for SledError { + fn from(_: actix_rt::task::JoinError) -> Self { + SledError::Panic + } +} diff --git a/src/serde_str.rs b/src/serde_str.rs index 53c6d1b..be311e7 100644 --- a/src/serde_str.rs +++ b/src/serde_str.rs @@ -12,6 +12,22 @@ impl Serde { pub(crate) fn new(inner: T) -> Self { Serde { inner } } + + pub(crate) fn into_inner(this: Self) -> T { + this.inner + } +} + +impl AsRef for Serde { + fn as_ref(&self) -> &T { + &self.inner + } +} + +impl AsMut for Serde { + fn as_mut(&mut self) -> &mut T { + &mut self.inner + } } impl Deref for Serde { diff --git a/src/store.rs b/src/store.rs index d9af237..4f8f0b0 100644 --- a/src/store.rs +++ b/src/store.rs @@ -1,49 +1,37 @@ -use std::fmt::Debug; - +use crate::error::Error; use actix_web::web::Bytes; use futures_util::stream::Stream; +use std::fmt::Debug; use tokio::io::{AsyncRead, AsyncWrite}; pub(crate) mod file_store; -#[cfg(feature = "object-storage")] pub(crate) mod object_store; pub(crate) trait Identifier: Send + Sync + Clone + Debug { - type Error: std::error::Error; + fn to_bytes(&self) -> Result, Error>; - fn to_bytes(&self) -> Result, Self::Error>; - - fn from_bytes(bytes: Vec) -> Result + fn from_bytes(bytes: Vec) -> Result where Self: Sized; } #[async_trait::async_trait(?Send)] -pub(crate) trait Store: Send + Sync + Clone + Debug + 'static { - type Error: std::error::Error; - type Identifier: Identifier; - type Stream: Stream>; +pub(crate) trait Store: Send + Sync + Clone + Debug { + type Identifier: Identifier + 'static; + type Stream: Stream> + 'static; - async fn save_async_read( - &self, - reader: &mut Reader, - filename: &str, - ) -> Result + async fn save_async_read(&self, reader: &mut Reader) -> Result where Reader: AsyncRead + Unpin; - async fn save_bytes( - &self, - bytes: Bytes, - filename: &str, - ) -> Result; + async fn save_bytes(&self, bytes: Bytes) -> Result; async fn to_stream( &self, identifier: &Self::Identifier, from_start: Option, len: Option, - ) -> Result; + ) -> Result; async fn read_into( &self, @@ -53,7 +41,55 @@ pub(crate) trait Store: Send + Sync + Clone + Debug + 'static { where Writer: AsyncWrite + Send + Unpin; - async fn len(&self, identifier: &Self::Identifier) -> Result; + async fn len(&self, identifier: &Self::Identifier) -> Result; - async fn remove(&self, identifier: &Self::Identifier) -> Result<(), Self::Error>; + async fn remove(&self, identifier: &Self::Identifier) -> Result<(), Error>; +} + +#[async_trait::async_trait(?Send)] +impl<'a, T> Store for &'a T +where + T: Store, +{ + type Identifier = T::Identifier; + type Stream = T::Stream; + + async fn save_async_read(&self, reader: &mut Reader) -> Result + where + Reader: AsyncRead + Unpin, + { + T::save_async_read(self, reader).await + } + + async fn save_bytes(&self, bytes: Bytes) -> Result { + T::save_bytes(self, bytes).await + } + + async fn to_stream( + &self, + identifier: &Self::Identifier, + from_start: Option, + len: Option, + ) -> Result { + T::to_stream(self, identifier, from_start, len).await + } + + async fn read_into( + &self, + identifier: &Self::Identifier, + writer: &mut Writer, + ) -> Result<(), std::io::Error> + where + Writer: AsyncWrite + Send + Unpin, + { + T::read_into(self, identifier, writer).await + } + + async fn len(&self, identifier: &Self::Identifier) -> Result { + T::len(self, identifier).await + } + + async fn remove(&self, identifier: &Self::Identifier) -> Result<(), Error> { + T::remove(self, identifier).await + } } diff --git a/src/store/file_store.rs b/src/store/file_store.rs index 1b885df..dede6f3 100644 --- a/src/store/file_store.rs +++ b/src/store/file_store.rs @@ -1,4 +1,9 @@ -use crate::{file::File, store::Store}; +use crate::{ + error::Error, + file::File, + repo::{Repo, SettingsRepo}, + store::Store, +}; use actix_web::web::Bytes; use futures_util::stream::Stream; use std::{ @@ -10,24 +15,19 @@ use tokio::io::{AsyncRead, AsyncWrite}; use tracing::{debug, error, instrument}; mod file_id; -mod restructure; pub(crate) use file_id::FileId; // - Settings Tree // - last-path -> last generated path -// - fs-restructure-01-complete -> bool -const GENERATOR_KEY: &[u8] = b"last-path"; +const GENERATOR_KEY: &str = "last-path"; #[derive(Debug, thiserror::Error)] pub(crate) enum FileError { - #[error(transparent)] - Sled(#[from] sled::Error), - - #[error(transparent)] + #[error("Failed to read or write file")] Io(#[from] std::io::Error), - #[error(transparent)] + #[error("Failed to generate path")] PathGenerator(#[from] storage_path_generator::PathError), #[error("Error formatting file store identifier")] @@ -44,48 +44,39 @@ pub(crate) enum FileError { pub(crate) struct FileStore { path_gen: Generator, root_dir: PathBuf, - settings_tree: sled::Tree, + repo: Repo, } #[async_trait::async_trait(?Send)] impl Store for FileStore { - type Error = FileError; type Identifier = FileId; type Stream = Pin>>>; #[tracing::instrument(skip(reader))] - async fn save_async_read( - &self, - reader: &mut Reader, - filename: &str, - ) -> Result + async fn save_async_read(&self, reader: &mut Reader) -> Result where Reader: AsyncRead + Unpin, { - let path = self.next_file(filename)?; + let path = self.next_file().await?; if let Err(e) = self.safe_save_reader(&path, reader).await { self.safe_remove_file(&path).await?; - return Err(e); + return Err(e.into()); } - self.file_id_from_path(path) + Ok(self.file_id_from_path(path)?) } #[tracing::instrument(skip(bytes))] - async fn save_bytes( - &self, - bytes: Bytes, - filename: &str, - ) -> Result { - let path = self.next_file(filename)?; + async fn save_bytes(&self, bytes: Bytes) -> Result { + let path = self.next_file().await?; if let Err(e) = self.safe_save_bytes(&path, bytes).await { self.safe_remove_file(&path).await?; - return Err(e); + return Err(e.into()); } - self.file_id_from_path(path) + Ok(self.file_id_from_path(path)?) } #[tracing::instrument] @@ -94,7 +85,7 @@ impl Store for FileStore { identifier: &Self::Identifier, from_start: Option, len: Option, - ) -> Result { + ) -> Result { let path = self.path_from_file_id(identifier); let stream = File::open(path) @@ -122,7 +113,7 @@ impl Store for FileStore { } #[tracing::instrument] - async fn len(&self, identifier: &Self::Identifier) -> Result { + async fn len(&self, identifier: &Self::Identifier) -> Result { let path = self.path_from_file_id(identifier); let len = tokio::fs::metadata(path).await?.len(); @@ -131,7 +122,7 @@ impl Store for FileStore { } #[tracing::instrument] - async fn remove(&self, identifier: &Self::Identifier) -> Result<(), Self::Error> { + async fn remove(&self, identifier: &Self::Identifier) -> Result<(), Error> { let path = self.path_from_file_id(identifier); self.safe_remove_file(path).await?; @@ -141,25 +132,28 @@ impl Store for FileStore { } impl FileStore { - pub fn build(root_dir: PathBuf, db: &sled::Db) -> Result { - let settings_tree = db.open_tree("settings")?; - - let path_gen = init_generator(&settings_tree)?; + pub(crate) async fn build(root_dir: PathBuf, repo: Repo) -> Result { + let path_gen = init_generator(&repo).await?; Ok(FileStore { root_dir, path_gen, - settings_tree, + repo, }) } - fn next_directory(&self) -> Result { + async fn next_directory(&self) -> Result { let path = self.path_gen.next(); - self.settings_tree - .insert(GENERATOR_KEY, path.to_be_bytes())?; + match self.repo { + Repo::Sled(ref sled_repo) => { + sled_repo + .set(GENERATOR_KEY, path.to_be_bytes().into()) + .await?; + } + } - let mut target_path = self.root_dir.join("files"); + let mut target_path = self.root_dir.clone(); for dir in path.to_strings() { target_path.push(dir) } @@ -167,8 +161,9 @@ impl FileStore { Ok(target_path) } - fn next_file(&self, filename: &str) -> Result { - let target_path = self.next_directory()?; + async fn next_file(&self) -> Result { + let target_path = self.next_directory().await?; + let filename = uuid::Uuid::new_v4().to_string(); Ok(target_path.join(filename)) } @@ -289,13 +284,17 @@ pub(crate) async fn safe_create_parent>(path: P) -> Result<(), Fi Ok(()) } -fn init_generator(settings: &sled::Tree) -> Result { - if let Some(ivec) = settings.get(GENERATOR_KEY)? { - Ok(Generator::from_existing( - storage_path_generator::Path::from_be_bytes(ivec.to_vec())?, - )) - } else { - Ok(Generator::new()) +async fn init_generator(repo: &Repo) -> Result { + match repo { + Repo::Sled(sled_repo) => { + if let Some(ivec) = sled_repo.get(GENERATOR_KEY).await? { + Ok(Generator::from_existing( + storage_path_generator::Path::from_be_bytes(ivec.to_vec())?, + )) + } else { + Ok(Generator::new()) + } + } } } diff --git a/src/store/file_store/file_id.rs b/src/store/file_store/file_id.rs index e811466..bb7a3ec 100644 --- a/src/store/file_store/file_id.rs +++ b/src/store/file_store/file_id.rs @@ -1,6 +1,9 @@ -use crate::store::{ - file_store::{FileError, FileStore}, - Identifier, +use crate::{ + error::Error, + store::{ + file_store::{FileError, FileStore}, + Identifier, + }, }; use std::path::PathBuf; @@ -8,9 +11,7 @@ use std::path::PathBuf; pub(crate) struct FileId(PathBuf); impl Identifier for FileId { - type Error = FileError; - - fn to_bytes(&self) -> Result, Self::Error> { + fn to_bytes(&self) -> Result, Error> { let vec = self .0 .to_str() @@ -21,7 +22,7 @@ impl Identifier for FileId { Ok(vec) } - fn from_bytes(bytes: Vec) -> Result + fn from_bytes(bytes: Vec) -> Result where Self: Sized, { diff --git a/src/store/file_store/restructure.rs b/src/store/file_store/restructure.rs deleted file mode 100644 index 81e990c..0000000 --- a/src/store/file_store/restructure.rs +++ /dev/null @@ -1,118 +0,0 @@ -use crate::{ - error::{Error, UploadError}, - store::file_store::FileStore, - upload_manager::UploadManager, -}; -use std::path::{Path, PathBuf}; - -const RESTRUCTURE_COMPLETE: &[u8] = b"fs-restructure-01-complete"; -const DETAILS: &[u8] = b"details"; - -impl UploadManager { - #[tracing::instrument(skip(self))] - pub(crate) async fn restructure(&self, store: &FileStore) -> Result<(), Error> { - if self.restructure_complete(store)? { - return Ok(()); - } - - for res in self.inner().filename_tree.iter() { - let (filename, hash) = res?; - let filename = String::from_utf8(filename.to_vec())?; - tracing::info!("Migrating {}", filename); - - let file_path = store.root_dir.join("files").join(&filename); - - if tokio::fs::metadata(&file_path).await.is_ok() { - let target_path = store.next_directory()?.join(&filename); - - let target_path_bytes = self - .generalize_path(store, &target_path)? - .to_str() - .ok_or(UploadError::Path)? - .as_bytes() - .to_vec(); - - self.inner() - .identifier_tree - .insert(filename.as_bytes(), target_path_bytes)?; - - store.safe_move_file(file_path, target_path).await?; - } - - let (start, end) = variant_key_bounds(&hash); - - for res in self.inner().main_tree.range(start..end) { - let (hash_variant_key, variant_path_or_details) = res?; - - if !hash_variant_key.ends_with(DETAILS) { - let variant_path = - PathBuf::from(String::from_utf8(variant_path_or_details.to_vec())?); - if tokio::fs::metadata(&variant_path).await.is_ok() { - let target_path = store.next_directory()?.join(&filename); - - let relative_target_path_bytes = self - .generalize_path(store, &target_path)? - .to_str() - .ok_or(UploadError::Path)? - .as_bytes() - .to_vec(); - - let variant_key = - self.migrate_variant_key(store, &variant_path, &filename)?; - - self.inner() - .identifier_tree - .insert(variant_key, relative_target_path_bytes)?; - - store - .safe_move_file(variant_path.clone(), target_path) - .await?; - store.try_remove_parents(&variant_path).await; - } - } - - self.inner().main_tree.remove(hash_variant_key)?; - } - } - - self.mark_restructure_complete(store)?; - Ok(()) - } - - fn restructure_complete(&self, store: &FileStore) -> Result { - Ok(store.settings_tree.get(RESTRUCTURE_COMPLETE)?.is_some()) - } - - fn mark_restructure_complete(&self, store: &FileStore) -> Result<(), Error> { - store.settings_tree.insert(RESTRUCTURE_COMPLETE, b"true")?; - - Ok(()) - } - - fn generalize_path<'a>(&self, store: &FileStore, path: &'a Path) -> Result<&'a Path, Error> { - Ok(path.strip_prefix(&store.root_dir)?) - } - - fn migrate_variant_key( - &self, - store: &FileStore, - variant_process_path: &Path, - filename: &str, - ) -> Result, Error> { - let path = self - .generalize_path(store, variant_process_path)? - .strip_prefix("files")?; - - self.variant_key(path, filename) - } -} - -pub(crate) fn variant_key_bounds(hash: &[u8]) -> (Vec, Vec) { - let mut start = hash.to_vec(); - start.extend(&[2]); - - let mut end = hash.to_vec(); - end.extend(&[3]); - - (start, end) -} diff --git a/src/store/object_store.rs b/src/store/object_store.rs index d07f29a..41bf188 100644 --- a/src/store/object_store.rs +++ b/src/store/object_store.rs @@ -1,16 +1,17 @@ -use crate::store::Store; +use crate::{ + error::Error, + repo::{Repo, SettingsRepo}, + store::Store, +}; use actix_web::web::Bytes; -use futures_util::stream::Stream; +use futures_util::{Stream, TryStreamExt}; use s3::{ client::Client, command::Command, creds::Credentials, request_trait::Request, Bucket, Region, }; -use std::{ - pin::Pin, - string::FromUtf8Error, - task::{Context, Poll}, -}; +use std::{pin::Pin, string::FromUtf8Error}; use storage_path_generator::{Generator, Path}; use tokio::io::{AsyncRead, AsyncWrite}; +use tracing::Instrument; mod object_id; pub(crate) use object_id::ObjectId; @@ -18,74 +19,59 @@ pub(crate) use object_id::ObjectId; // - Settings Tree // - last-path -> last generated path -const GENERATOR_KEY: &[u8] = b"last-path"; +const GENERATOR_KEY: &str = "last-path"; #[derive(Debug, thiserror::Error)] pub(crate) enum ObjectError { - #[error(transparent)] + #[error("Failed to generate path")] PathGenerator(#[from] storage_path_generator::PathError), - #[error(transparent)] - Sled(#[from] sled::Error), - - #[error(transparent)] + #[error("Failed to parse string")] Utf8(#[from] FromUtf8Error), #[error("Invalid length")] Length, - #[error("Storage error: {0}")] + #[error("Storage error")] Anyhow(#[from] anyhow::Error), } #[derive(Clone)] pub(crate) struct ObjectStore { path_gen: Generator, - settings_tree: sled::Tree, + repo: Repo, bucket: Bucket, client: reqwest::Client, } -pin_project_lite::pin_project! { - struct IoError { - #[pin] - inner: S, - } -} - #[async_trait::async_trait(?Send)] impl Store for ObjectStore { - type Error = ObjectError; type Identifier = ObjectId; type Stream = Pin>>>; #[tracing::instrument(skip(reader))] - async fn save_async_read( - &self, - reader: &mut Reader, - filename: &str, - ) -> Result + async fn save_async_read(&self, reader: &mut Reader) -> Result where Reader: AsyncRead + Unpin, { - let path = self.next_file(filename)?; + let path = self.next_file().await?; self.bucket .put_object_stream(&self.client, reader, &path) - .await?; + .await + .map_err(ObjectError::from)?; Ok(ObjectId::from_string(path)) } #[tracing::instrument(skip(bytes))] - async fn save_bytes( - &self, - bytes: Bytes, - filename: &str, - ) -> Result { - let path = self.next_file(filename)?; + async fn save_bytes(&self, bytes: Bytes) -> Result { + let path = self.next_file().await?; - self.bucket.put_object(&self.client, &path, &bytes).await?; + self.bucket + .put_object(&self.client, &path, &bytes) + .await + .map_err(ObjectError::from)?; Ok(ObjectId::from_string(path)) } @@ -96,22 +82,39 @@ impl Store for ObjectStore { identifier: &Self::Identifier, from_start: Option, len: Option, - ) -> Result { + ) -> Result { let path = identifier.as_str(); let start = from_start.unwrap_or(0); - let end = len.map(|len| start + len); + let end = len.map(|len| start + len - 1); - let request = Client::request( - &self.client, - &self.bucket, - path, - Command::GetObjectRange { start, end }, - ); + let request_span = tracing::info_span!(parent: None, "Get Object"); - let response = request.response().await?; + // NOTE: isolating reqwest in it's own span is to prevent the request's span from getting + // smuggled into a long-lived task. Unfortunately, I am unable to create a minimal + // reproduction of this problem so I can't open a bug about it. + let request = request_span.in_scope(|| { + Client::request( + &self.client, + &self.bucket, + path, + Command::GetObjectRange { start, end }, + ) + }); - Ok(Box::pin(io_error(response.bytes_stream()))) + let response = request_span + .in_scope(|| request.response()) + .instrument(request_span.clone()) + .await + .map_err(ObjectError::from)?; + + let stream = request_span.in_scope(|| { + response + .bytes_stream() + .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e)) + }); + + Ok(Box::pin(stream)) } #[tracing::instrument(skip(writer))] @@ -128,49 +131,55 @@ impl Store for ObjectStore { self.bucket .get_object_stream(&self.client, path, writer) .await - .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, Self::Error::from(e)))?; + .map_err(ObjectError::from) + .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, Error::from(e)))?; Ok(()) } #[tracing::instrument] - async fn len(&self, identifier: &Self::Identifier) -> Result { + async fn len(&self, identifier: &Self::Identifier) -> Result { let path = identifier.as_str(); - let (head, _) = self.bucket.head_object(&self.client, path).await?; + let (head, _) = self + .bucket + .head_object(&self.client, path) + .await + .map_err(ObjectError::from)?; let length = head.content_length.ok_or(ObjectError::Length)?; Ok(length as u64) } #[tracing::instrument] - async fn remove(&self, identifier: &Self::Identifier) -> Result<(), Self::Error> { + async fn remove(&self, identifier: &Self::Identifier) -> Result<(), Error> { let path = identifier.as_str(); - self.bucket.delete_object(&self.client, path).await?; + self.bucket + .delete_object(&self.client, path) + .await + .map_err(ObjectError::from)?; Ok(()) } } impl ObjectStore { #[allow(clippy::too_many_arguments)] - pub(crate) fn build( + pub(crate) async fn build( bucket_name: &str, region: Region, access_key: Option, secret_key: Option, security_token: Option, session_token: Option, - db: &sled::Db, + repo: Repo, client: reqwest::Client, - ) -> Result { - let settings_tree = db.open_tree("settings")?; - - let path_gen = init_generator(&settings_tree)?; + ) -> Result { + let path_gen = init_generator(&repo).await?; Ok(ObjectStore { path_gen, - settings_tree, + repo, bucket: Bucket::new_with_path_style( bucket_name, match region { @@ -186,65 +195,52 @@ impl ObjectStore { security_token, session_token, }, - )?, + ) + .map_err(ObjectError::from)?, client, }) } - fn next_directory(&self) -> Result { + async fn next_directory(&self) -> Result { let path = self.path_gen.next(); - self.settings_tree - .insert(GENERATOR_KEY, path.to_be_bytes())?; + match self.repo { + Repo::Sled(ref sled_repo) => { + sled_repo + .set(GENERATOR_KEY, path.to_be_bytes().into()) + .await?; + } + } Ok(path) } - fn next_file(&self, filename: &str) -> Result { - let path = self.next_directory()?.to_strings().join("/"); + async fn next_file(&self) -> Result { + let path = self.next_directory().await?.to_strings().join("/"); + let filename = uuid::Uuid::new_v4().to_string(); Ok(format!("{}/{}", path, filename)) } } -fn init_generator(settings: &sled::Tree) -> Result { - if let Some(ivec) = settings.get(GENERATOR_KEY)? { - Ok(Generator::from_existing( - storage_path_generator::Path::from_be_bytes(ivec.to_vec())?, - )) - } else { - Ok(Generator::new()) - } -} - -fn io_error(stream: S) -> impl Stream> -where - S: Stream>, - E: Into>, -{ - IoError { inner: stream } -} - -impl Stream for IoError -where - S: Stream>, - E: Into>, -{ - type Item = std::io::Result; - - fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { - let this = self.as_mut().project(); - - this.inner.poll_next(cx).map(|opt| { - opt.map(|res| res.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))) - }) +async fn init_generator(repo: &Repo) -> Result { + match repo { + Repo::Sled(sled_repo) => { + if let Some(ivec) = sled_repo.get(GENERATOR_KEY).await? { + Ok(Generator::from_existing( + storage_path_generator::Path::from_be_bytes(ivec.to_vec())?, + )) + } else { + Ok(Generator::new()) + } + } } } impl std::fmt::Debug for ObjectStore { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("ObjectStore") - .field("path_gen", &self.path_gen) + .field("path_gen", &"generator") .field("bucket", &self.bucket.name) .field("region", &self.bucket.region) .finish() diff --git a/src/store/object_store/object_id.rs b/src/store/object_store/object_id.rs index 6b3bb32..d9c8a4e 100644 --- a/src/store/object_store/object_id.rs +++ b/src/store/object_store/object_id.rs @@ -1,17 +1,20 @@ -use crate::store::{object_store::ObjectError, Identifier}; +use crate::{ + error::Error, + store::{object_store::ObjectError, Identifier}, +}; #[derive(Debug, Clone)] pub(crate) struct ObjectId(String); impl Identifier for ObjectId { - type Error = ObjectError; - - fn to_bytes(&self) -> Result, Self::Error> { + fn to_bytes(&self) -> Result, Error> { Ok(self.0.as_bytes().to_vec()) } - fn from_bytes(bytes: Vec) -> Result { - Ok(ObjectId(String::from_utf8(bytes)?)) + fn from_bytes(bytes: Vec) -> Result { + Ok(ObjectId( + String::from_utf8(bytes).map_err(ObjectError::from)?, + )) } } diff --git a/src/stream.rs b/src/stream.rs new file mode 100644 index 0000000..bd14aef --- /dev/null +++ b/src/stream.rs @@ -0,0 +1,233 @@ +use actix_rt::{task::JoinHandle, time::Sleep}; +use actix_web::web::Bytes; +use futures_util::Stream; +use std::{ + future::Future, + pin::Pin, + sync::{ + atomic::{AtomicBool, Ordering}, + Arc, + }, + task::{Context, Poll, Wake, Waker}, + time::Duration, +}; + +pub(crate) trait StreamLimit { + fn limit(self, limit: u64) -> Limit + where + Self: Sized, + { + Limit { + inner: self, + count: 0, + limit, + } + } +} + +pub(crate) trait StreamTimeout { + fn timeout(self, duration: Duration) -> Timeout + where + Self: Sized, + { + Timeout { + sleep: actix_rt::time::sleep(duration), + inner: self, + expired: false, + woken: Arc::new(AtomicBool::new(true)), + } + } +} + +pub(crate) fn from_iterator( + iterator: I, + buffer: usize, +) -> IterStream { + IterStream { + state: IterStreamState::New { iterator, buffer }, + } +} + +impl StreamLimit for S where S: Stream> {} +impl StreamTimeout for S where S: Stream {} + +pin_project_lite::pin_project! { + pub(crate) struct Limit { + #[pin] + inner: S, + + count: u64, + limit: u64, + } +} + +pin_project_lite::pin_project! { + pub(crate) struct Timeout { + #[pin] + sleep: Sleep, + + #[pin] + inner: S, + + expired: bool, + woken: Arc, + } +} + +enum IterStreamState { + New { + iterator: I, + buffer: usize, + }, + Running { + handle: JoinHandle<()>, + receiver: tokio::sync::mpsc::Receiver, + }, + Pending, +} + +pub(crate) struct IterStream { + state: IterStreamState, +} + +struct TimeoutWaker { + woken: Arc, + inner: Waker, +} + +#[derive(Debug, thiserror::Error)] +#[error("Resonse body larger than size limit")] +pub(crate) struct LimitError; + +#[derive(Debug, thiserror::Error)] +#[error("Timeout in body")] +pub(crate) struct TimeoutError; + +impl Stream for Limit +where + S: Stream>, + E: From, +{ + type Item = Result; + + fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + let this = self.as_mut().project(); + + let limit = this.limit; + let count = this.count; + let inner = this.inner; + + inner.poll_next(cx).map(|opt| { + opt.map(|res| match res { + Ok(bytes) => { + *count += bytes.len() as u64; + if *count > *limit { + return Err(LimitError.into()); + } + Ok(bytes) + } + Err(e) => Err(e), + }) + }) + } +} + +impl Wake for TimeoutWaker { + fn wake(self: Arc) { + self.wake_by_ref() + } + + fn wake_by_ref(self: &Arc) { + self.woken.store(true, Ordering::Release); + self.inner.wake_by_ref(); + } +} + +impl Stream for Timeout +where + S: Stream, +{ + type Item = Result; + + fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + let this = self.as_mut().project(); + + if *this.expired { + return Poll::Ready(None); + } + + if this.woken.swap(false, Ordering::Acquire) { + let timeout_waker = Arc::new(TimeoutWaker { + woken: Arc::clone(this.woken), + inner: cx.waker().clone(), + }) + .into(); + + let mut timeout_cx = Context::from_waker(&timeout_waker); + + if this.sleep.poll(&mut timeout_cx).is_ready() { + *this.expired = true; + return Poll::Ready(Some(Err(TimeoutError))); + } + } + + this.inner.poll_next(cx).map(|opt| opt.map(Ok)) + } +} + +impl Stream for IterStream +where + I: IntoIterator + Send + Unpin + 'static, + T: Send + 'static, +{ + type Item = T; + + fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + let this = self.as_mut().get_mut(); + + match std::mem::replace(&mut this.state, IterStreamState::Pending) { + IterStreamState::New { iterator, buffer } => { + let (sender, receiver) = tokio::sync::mpsc::channel(buffer); + + let mut handle = actix_rt::task::spawn_blocking(move || { + let iterator = iterator.into_iter(); + + for item in iterator { + if sender.blocking_send(item).is_err() { + break; + } + } + }); + + if Pin::new(&mut handle).poll(cx).is_ready() { + return Poll::Ready(None); + } + + this.state = IterStreamState::Running { handle, receiver }; + + self.poll_next(cx) + } + IterStreamState::Running { + mut handle, + mut receiver, + } => match Pin::new(&mut receiver).poll_recv(cx) { + Poll::Ready(Some(item)) => { + this.state = IterStreamState::Running { handle, receiver }; + + Poll::Ready(Some(item)) + } + Poll::Ready(None) => Poll::Ready(None), + Poll::Pending => { + if Pin::new(&mut handle).poll(cx).is_ready() { + return Poll::Ready(None); + } + + this.state = IterStreamState::Running { handle, receiver }; + + Poll::Pending + } + }, + IterStreamState::Pending => panic!("Polled after completion"), + } + } +} diff --git a/src/upload_manager.rs b/src/upload_manager.rs deleted file mode 100644 index e934d31..0000000 --- a/src/upload_manager.rs +++ /dev/null @@ -1,765 +0,0 @@ -use crate::{ - config::Format, - error::{Error, UploadError}, - ffmpeg::{InputFormat, ThumbnailFormat}, - magick::{details_hint, ValidInputType}, - migrate::{alias_id_key, alias_key, alias_key_bounds}, - serde_str::Serde, - store::{Identifier, Store}, -}; -use actix_web::web; -use sha2::Digest; -use std::{string::FromUtf8Error, sync::Arc}; -use tracing::{debug, error, info, instrument, warn, Span}; -use tracing_futures::Instrument; - -mod hasher; -mod session; - -pub(super) use session::UploadManagerSession; - -// TREE STRUCTURE -// - Alias Tree -// - alias -> hash -// - alias / id -> u64(id) -// - alias / delete -> delete token -// - Main Tree -// - hash -> filename -// - hash 0 u64(id) -> alias -// - DEPRECATED: -// - hash 2 variant path -> variant path -// - hash 2 vairant path details -> details -// - Filename Tree -// - filename -> hash -// - Details Tree -// - filename / S::Identifier -> details -// - Identifier Tree -// - filename -> S::Identifier -// - filename / variant path -> S::Identifier -// - filename / motion -> S::Identifier -// - Settings Tree -// - store-migration-progress -> Path Tree Key - -const STORE_MIGRATION_PROGRESS: &[u8] = b"store-migration-progress"; - -#[derive(Clone)] -pub(crate) struct UploadManager { - inner: Arc, -} - -pub(crate) struct UploadManagerInner { - format: Option, - hasher: sha2::Sha256, - pub(crate) alias_tree: sled::Tree, - pub(crate) filename_tree: sled::Tree, - pub(crate) main_tree: sled::Tree, - details_tree: sled::Tree, - settings_tree: sled::Tree, - pub(crate) identifier_tree: sled::Tree, - db: sled::Db, -} - -#[derive(Clone, Debug, serde::Deserialize, serde::Serialize)] -pub(crate) struct Details { - width: usize, - height: usize, - content_type: Serde, - created_at: time::OffsetDateTime, -} - -struct FilenameIVec { - inner: sled::IVec, -} - -impl UploadManager { - /// Create a new UploadManager - pub(crate) async fn new(db: sled::Db, format: Option) -> Result { - let manager = UploadManager { - inner: Arc::new(UploadManagerInner { - format, - hasher: sha2::Sha256::new(), - alias_tree: db.open_tree("alias")?, - filename_tree: db.open_tree("filename")?, - main_tree: db.open_tree("main")?, - details_tree: db.open_tree("details")?, - settings_tree: db.open_tree("settings")?, - identifier_tree: db.open_tree("path")?, - db, - }), - }; - - Ok(manager) - } - - pub(crate) async fn migrate_store(&self, from: S1, to: S2) -> Result<(), Error> - where - S1: Store, - S2: Store, - Error: From + From, - { - let iter = - if let Some(starting_line) = self.inner.settings_tree.get(STORE_MIGRATION_PROGRESS)? { - self.inner.identifier_tree.range(starting_line..) - } else { - self.inner.identifier_tree.iter() - }; - - for res in iter { - let (key, identifier) = res?; - - let identifier = S1::Identifier::from_bytes(identifier.to_vec())?; - - let filename = - if let Some((filename, _)) = String::from_utf8_lossy(&key).split_once('/') { - filename.to_string() - } else { - String::from_utf8_lossy(&key).to_string() - }; - - let stream = from.to_stream(&identifier, None, None).await?; - futures_util::pin_mut!(stream); - let mut reader = tokio_util::io::StreamReader::new(stream); - - let new_identifier = to.save_async_read(&mut reader, &filename).await?; - - let details_key = self.details_key(&identifier, &filename)?; - - if let Some(details) = self.inner.details_tree.get(details_key.clone())? { - let new_details_key = self.details_key(&new_identifier, &filename)?; - - self.inner.details_tree.insert(new_details_key, details)?; - } - - self.inner - .identifier_tree - .insert(key.clone(), new_identifier.to_bytes()?)?; - self.inner.details_tree.remove(details_key)?; - self.inner - .settings_tree - .insert(STORE_MIGRATION_PROGRESS, key)?; - - let (ident, detail, settings) = futures_util::future::join3( - self.inner.identifier_tree.flush_async(), - self.inner.details_tree.flush_async(), - self.inner.settings_tree.flush_async(), - ) - .await; - - ident?; - detail?; - settings?; - } - - // clean up the migration key to avoid interfering with future migrations - self.inner.settings_tree.remove(STORE_MIGRATION_PROGRESS)?; - self.inner.settings_tree.flush_async().await?; - - Ok(()) - } - - pub(crate) fn inner(&self) -> &UploadManagerInner { - &self.inner - } - - pub(crate) async fn still_identifier_from_filename( - &self, - store: S, - filename: String, - ) -> Result - where - Error: From, - { - let identifier = self.identifier_from_filename::(filename.clone()).await?; - let details = - if let Some(details) = self.variant_details(&identifier, filename.clone()).await? { - details - } else { - let hint = details_hint(&filename); - Details::from_store(store.clone(), identifier.clone(), hint).await? - }; - - if !details.is_motion() { - return Ok(identifier); - } - - if let Some(motion_identifier) = self.motion_identifier::(&filename).await? { - return Ok(motion_identifier); - } - - let permit = crate::PROCESS_SEMAPHORE.acquire().await; - let mut reader = crate::ffmpeg::thumbnail( - store.clone(), - identifier, - InputFormat::Mp4, - ThumbnailFormat::Jpeg, - ) - .await?; - let motion_identifier = store.save_async_read(&mut reader, &filename).await?; - drop(permit); - - self.store_motion_path(&filename, &motion_identifier) - .await?; - Ok(motion_identifier) - } - - async fn motion_identifier( - &self, - filename: &str, - ) -> Result, Error> - where - Error: From, - { - let identifier_tree = self.inner.identifier_tree.clone(); - let motion_key = format!("{}/motion", filename); - - let opt = web::block(move || identifier_tree.get(motion_key.as_bytes())).await??; - - if let Some(ivec) = opt { - return Ok(Some(S::Identifier::from_bytes(ivec.to_vec())?)); - } - - Ok(None) - } - - async fn store_motion_path( - &self, - filename: &str, - identifier: &I, - ) -> Result<(), Error> - where - Error: From, - { - let identifier_bytes = identifier.to_bytes()?; - let motion_key = format!("{}/motion", filename); - let identifier_tree = self.inner.identifier_tree.clone(); - - web::block(move || identifier_tree.insert(motion_key.as_bytes(), identifier_bytes)) - .await??; - Ok(()) - } - - #[instrument(skip(self))] - pub(crate) async fn identifier_from_filename( - &self, - filename: String, - ) -> Result - where - Error: From, - { - let identifier_tree = self.inner.identifier_tree.clone(); - let path_ivec = web::block(move || identifier_tree.get(filename.as_bytes())) - .await?? - .ok_or(UploadError::MissingFile)?; - - let identifier = S::Identifier::from_bytes(path_ivec.to_vec())?; - - Ok(identifier) - } - - #[instrument(skip(self))] - async fn store_identifier( - &self, - filename: String, - identifier: &I, - ) -> Result<(), Error> - where - Error: From, - { - let identifier_bytes = identifier.to_bytes()?; - let identifier_tree = self.inner.identifier_tree.clone(); - web::block(move || identifier_tree.insert(filename.as_bytes(), identifier_bytes)).await??; - Ok(()) - } - - #[instrument(skip(self))] - pub(crate) async fn variant_identifier( - &self, - process_path: &std::path::Path, - filename: &str, - ) -> Result, Error> - where - Error: From, - { - let key = self.variant_key(process_path, filename)?; - let identifier_tree = self.inner.identifier_tree.clone(); - let path_opt = web::block(move || identifier_tree.get(key)).await??; - - if let Some(ivec) = path_opt { - let identifier = S::Identifier::from_bytes(ivec.to_vec())?; - Ok(Some(identifier)) - } else { - Ok(None) - } - } - - /// Store the path to a generated image variant so we can easily clean it up later - #[instrument(skip(self))] - pub(crate) async fn store_variant( - &self, - variant_process_path: Option<&std::path::Path>, - identifier: &I, - filename: &str, - ) -> Result<(), Error> - where - Error: From, - { - let key = if let Some(path) = variant_process_path { - self.variant_key(path, filename)? - } else { - let mut vec = filename.as_bytes().to_vec(); - vec.extend(b"/"); - vec.extend(&identifier.to_bytes()?); - vec - }; - let identifier_tree = self.inner.identifier_tree.clone(); - let identifier_bytes = identifier.to_bytes()?; - - debug!("Storing variant"); - web::block(move || identifier_tree.insert(key, identifier_bytes)).await??; - debug!("Stored variant"); - - Ok(()) - } - - /// Get the image details for a given variant - #[instrument(skip(self))] - pub(crate) async fn variant_details( - &self, - identifier: &I, - filename: String, - ) -> Result, Error> - where - Error: From, - { - let key = self.details_key(identifier, &filename)?; - let details_tree = self.inner.details_tree.clone(); - - debug!("Getting details"); - let opt = match web::block(move || details_tree.get(key)).await?? { - Some(ivec) => match serde_json::from_slice(&ivec) { - Ok(details) => Some(details), - Err(_) => None, - }, - None => None, - }; - debug!("Got details"); - - Ok(opt) - } - - #[instrument(skip(self))] - pub(crate) async fn store_variant_details( - &self, - identifier: &I, - filename: String, - details: &Details, - ) -> Result<(), Error> - where - Error: From, - { - let key = self.details_key(identifier, &filename)?; - let details_tree = self.inner.details_tree.clone(); - let details_value = serde_json::to_vec(details)?; - - debug!("Storing details"); - web::block(move || details_tree.insert(key, details_value)).await??; - debug!("Stored details"); - - Ok(()) - } - - /// Get a list of aliases for a given file - pub(crate) async fn aliases_by_filename(&self, filename: String) -> Result, Error> { - let fname_tree = self.inner.filename_tree.clone(); - let hash = web::block(move || fname_tree.get(filename.as_bytes())) - .await?? - .ok_or(UploadError::MissingAlias)?; - - self.aliases_by_hash(&hash).await - } - - /// Get a list of aliases for a given alias - pub(crate) async fn aliases_by_alias(&self, alias: String) -> Result, Error> { - let alias_tree = self.inner.alias_tree.clone(); - let hash = web::block(move || alias_tree.get(alias.as_bytes())) - .await?? - .ok_or(UploadError::MissingFilename)?; - - self.aliases_by_hash(&hash).await - } - - async fn aliases_by_hash(&self, hash: &sled::IVec) -> Result, Error> { - let (start, end) = alias_key_bounds(hash); - let main_tree = self.inner.main_tree.clone(); - let aliases = web::block(move || { - main_tree - .range(start..end) - .values() - .collect::, _>>() - }) - .await??; - - debug!("Got {} aliases for hash", aliases.len()); - let aliases = aliases - .into_iter() - .filter_map(|s| String::from_utf8(s.to_vec()).ok()) - .collect::>(); - - for alias in aliases.iter() { - debug!("{}", alias); - } - - Ok(aliases) - } - - /// Delete an alias without a delete token - pub(crate) async fn delete_without_token( - &self, - store: S, - alias: String, - ) -> Result<(), Error> - where - Error: From, - { - let token_key = delete_key(&alias); - let alias_tree = self.inner.alias_tree.clone(); - let token = web::block(move || alias_tree.get(token_key.as_bytes())) - .await?? - .ok_or(UploadError::MissingAlias)?; - - self.delete(store, alias, String::from_utf8(token.to_vec())?) - .await - } - - /// Delete the alias, and the file & variants if no more aliases exist - #[instrument(skip(self, alias, token))] - pub(crate) async fn delete( - &self, - store: S, - alias: String, - token: String, - ) -> Result<(), Error> - where - Error: From, - { - use sled::Transactional; - let main_tree = self.inner.main_tree.clone(); - let alias_tree = self.inner.alias_tree.clone(); - - let span = Span::current(); - let alias2 = alias.clone(); - let hash = web::block(move || { - [&main_tree, &alias_tree].transaction(|v| { - let entered = span.enter(); - let main_tree = &v[0]; - let alias_tree = &v[1]; - - // -- GET TOKEN -- - debug!("Deleting alias -> delete-token mapping"); - let existing_token = alias_tree - .remove(delete_key(&alias2).as_bytes())? - .ok_or_else(|| trans_upload_error(UploadError::MissingAlias))?; - - // Bail if invalid token - if existing_token != token { - warn!("Invalid delete token"); - return Err(trans_upload_error(UploadError::InvalidToken)); - } - - // -- GET ID FOR HASH TREE CLEANUP -- - debug!("Deleting alias -> id mapping"); - let id = alias_tree - .remove(alias_id_key(&alias2).as_bytes())? - .ok_or_else(|| trans_upload_error(UploadError::MissingAlias))?; - let id = String::from_utf8(id.to_vec()).map_err(trans_utf8_error)?; - - // -- GET HASH FOR HASH TREE CLEANUP -- - debug!("Deleting alias -> hash mapping"); - let hash = alias_tree - .remove(alias2.as_bytes())? - .ok_or_else(|| trans_upload_error(UploadError::MissingAlias))?; - - // -- REMOVE HASH TREE ELEMENT -- - debug!("Deleting hash -> alias mapping"); - main_tree.remove(alias_key(&hash, &id))?; - drop(entered); - Ok(hash) - }) - }) - .await??; - - self.check_delete_files(store, hash).await - } - - async fn check_delete_files( - &self, - store: S, - hash: sled::IVec, - ) -> Result<(), Error> - where - Error: From, - { - // -- CHECK IF ANY OTHER ALIASES EXIST -- - let main_tree = self.inner.main_tree.clone(); - let (start, end) = alias_key_bounds(&hash); - debug!("Checking for additional aliases referencing hash"); - let any_aliases = web::block(move || { - Ok(main_tree.range(start..end).next().is_some()) as Result - }) - .await??; - - // Bail if there are existing aliases - if any_aliases { - debug!("Other aliases reference file, not removing from disk"); - return Ok(()); - } - - // -- DELETE HASH ENTRY -- - let main_tree = self.inner.main_tree.clone(); - let hash2 = hash.clone(); - debug!("Deleting hash -> filename mapping"); - let filename = web::block(move || main_tree.remove(&hash2)) - .await?? - .ok_or(UploadError::MissingFile)?; - - // -- DELETE FILES -- - let this = self.clone(); - let cleanup_span = tracing::info_span!( - parent: None, - "Cleanup", - filename = &tracing::field::display(String::from_utf8_lossy(&filename)), - ); - cleanup_span.follows_from(Span::current()); - debug!("Spawning cleanup task"); - actix_rt::spawn( - async move { - if let Err(e) = this - .cleanup_files(store, FilenameIVec::new(filename.clone())) - .await - { - error!("Error removing files from fs, {}", e); - } - info!( - "Files deleted for {:?}", - String::from_utf8(filename.to_vec()) - ); - } - .instrument(cleanup_span), - ); - - Ok(()) - } - - /// Fetch the real on-disk filename given an alias - #[instrument(skip(self))] - pub(crate) async fn from_alias(&self, alias: String) -> Result { - let tree = self.inner.alias_tree.clone(); - debug!("Getting hash from alias"); - let hash = web::block(move || tree.get(alias.as_bytes())) - .await?? - .ok_or(UploadError::MissingAlias)?; - - let main_tree = self.inner.main_tree.clone(); - debug!("Getting filename from hash"); - let filename = web::block(move || main_tree.get(hash)) - .await?? - .ok_or(UploadError::MissingFile)?; - - let filename = String::from_utf8(filename.to_vec())?; - - Ok(filename) - } - - pub(crate) fn session(&self, store: S) -> UploadManagerSession - where - Error: From, - { - UploadManagerSession::new(self.clone(), store) - } - - // Find image variants and remove them from the DB and the disk - #[instrument(skip(self))] - async fn cleanup_files(&self, store: S, filename: FilenameIVec) -> Result<(), Error> - where - Error: From, - { - let filename = filename.inner; - - let filename2 = filename.clone(); - let identifier_tree = self.inner.identifier_tree.clone(); - let identifier = web::block(move || identifier_tree.remove(filename2)).await??; - - let mut errors = Vec::new(); - if let Some(identifier) = identifier { - let identifier = S::Identifier::from_bytes(identifier.to_vec())?; - debug!("Deleting {:?}", identifier); - if let Err(e) = store.remove(&identifier).await { - errors.push(e); - } - } - - let filename2 = filename.clone(); - let fname_tree = self.inner.filename_tree.clone(); - debug!("Deleting filename -> hash mapping"); - web::block(move || fname_tree.remove(filename2)).await??; - - let path_prefix = filename.clone(); - let identifier_tree = self.inner.identifier_tree.clone(); - debug!("Fetching file variants"); - let identifiers = web::block(move || { - identifier_tree - .scan_prefix(path_prefix) - .values() - .collect::, sled::Error>>() - }) - .await??; - - debug!("{} files prepared for deletion", identifiers.len()); - - for id in identifiers { - let identifier = S::Identifier::from_bytes(id.to_vec())?; - - debug!("Deleting {:?}", identifier); - if let Err(e) = store.remove(&identifier).await { - errors.push(e); - } - } - - let path_prefix = filename.clone(); - let identifier_tree = self.inner.identifier_tree.clone(); - debug!("Deleting path info"); - web::block(move || { - for res in identifier_tree.scan_prefix(path_prefix).keys() { - let key = res?; - identifier_tree.remove(key)?; - } - Ok(()) as Result<(), Error> - }) - .await??; - - for error in errors { - error!("Error deleting files, {}", error); - } - Ok(()) - } - - pub(crate) fn variant_key( - &self, - variant_process_path: &std::path::Path, - filename: &str, - ) -> Result, Error> { - let path_string = variant_process_path - .to_str() - .ok_or(UploadError::Path)? - .to_string(); - - let vec = format!("{}/{}", filename, path_string).as_bytes().to_vec(); - Ok(vec) - } - - fn details_key(&self, identifier: &I, filename: &str) -> Result, Error> - where - Error: From, - { - let mut vec = filename.as_bytes().to_vec(); - vec.extend(b"/"); - vec.extend(&identifier.to_bytes()?); - - Ok(vec) - } -} - -impl Details { - fn is_motion(&self) -> bool { - self.content_type.type_() == "video" - || self.content_type.type_() == "image" && self.content_type.subtype() == "gif" - } - - #[tracing::instrument("Details from bytes", skip(input))] - pub(crate) async fn from_bytes( - input: web::Bytes, - hint: Option, - ) -> Result { - let details = crate::magick::details_bytes(input, hint).await?; - - Ok(Details::now( - details.width, - details.height, - details.mime_type, - )) - } - - #[tracing::instrument("Details from store")] - pub(crate) async fn from_store( - store: S, - identifier: S::Identifier, - expected_format: Option, - ) -> Result - where - Error: From, - { - let details = crate::magick::details_store(store, identifier, expected_format).await?; - - Ok(Details::now( - details.width, - details.height, - details.mime_type, - )) - } - - fn now(width: usize, height: usize, content_type: mime::Mime) -> Self { - Details { - width, - height, - content_type: Serde::new(content_type), - created_at: time::OffsetDateTime::now_utc(), - } - } - - pub(crate) fn content_type(&self) -> mime::Mime { - (*self.content_type).clone() - } - - pub(crate) fn system_time(&self) -> std::time::SystemTime { - self.created_at.into() - } -} - -impl FilenameIVec { - fn new(inner: sled::IVec) -> Self { - FilenameIVec { inner } - } -} - -fn trans_upload_error( - upload_error: UploadError, -) -> sled::transaction::ConflictableTransactionError { - trans_err(upload_error) -} - -fn trans_utf8_error(e: FromUtf8Error) -> sled::transaction::ConflictableTransactionError { - trans_err(e) -} - -fn trans_err(e: E) -> sled::transaction::ConflictableTransactionError -where - Error: From, -{ - sled::transaction::ConflictableTransactionError::Abort(e.into()) -} - -fn delete_key(alias: &str) -> String { - format!("{}/delete", alias) -} - -impl std::fmt::Debug for UploadManager { - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - f.debug_struct("UploadManager").finish() - } -} - -impl std::fmt::Debug for FilenameIVec { - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - write!(f, "{:?}", String::from_utf8(self.inner.to_vec())) - } -} diff --git a/src/upload_manager/session.rs b/src/upload_manager/session.rs deleted file mode 100644 index 9274697..0000000 --- a/src/upload_manager/session.rs +++ /dev/null @@ -1,414 +0,0 @@ -use crate::{ - error::{Error, UploadError}, - magick::ValidInputType, - migrate::{alias_id_key, alias_key}, - store::Store, - upload_manager::{ - delete_key, - hasher::{Hash, Hasher}, - UploadManager, - }, -}; -use actix_web::web; -use futures_util::stream::{Stream, StreamExt}; -use tracing::{debug, instrument, warn, Span}; -use tracing_futures::Instrument; -use uuid::Uuid; - -pub(crate) struct UploadManagerSession -where - Error: From, -{ - store: S, - manager: UploadManager, - alias: Option, - finished: bool, -} - -impl UploadManagerSession -where - Error: From, -{ - pub(super) fn new(manager: UploadManager, store: S) -> Self { - UploadManagerSession { - store, - manager, - alias: None, - finished: false, - } - } - - pub(crate) fn succeed(mut self) { - self.finished = true; - } - - pub(crate) fn alias(&self) -> Option<&str> { - self.alias.as_deref() - } -} - -enum Dup { - Exists, - New, -} - -impl Dup { - fn exists(&self) -> bool { - matches!(self, Dup::Exists) - } -} - -impl Drop for UploadManagerSession -where - Error: From, -{ - fn drop(&mut self) { - if self.finished { - return; - } - - if let Some(alias) = self.alias.take() { - let store = self.store.clone(); - let manager = self.manager.clone(); - let cleanup_span = tracing::info_span!( - parent: None, - "Upload cleanup", - alias = &tracing::field::display(&alias), - ); - cleanup_span.follows_from(Span::current()); - actix_rt::spawn( - async move { - // undo alias -> hash mapping - debug!("Remove alias -> hash mapping"); - if let Ok(Some(hash)) = manager.inner.alias_tree.remove(&alias) { - // undo alias -> id mapping - debug!("Remove alias -> id mapping"); - let key = alias_id_key(&alias); - if let Ok(Some(id)) = manager.inner.alias_tree.remove(&key) { - // undo hash/id -> alias mapping - debug!("Remove hash/id -> alias mapping"); - let id = String::from_utf8_lossy(&id); - let key = alias_key(&hash, &id); - let _ = manager.inner.main_tree.remove(&key); - } - - let _ = manager.check_delete_files(store, hash).await; - } - } - .instrument(cleanup_span), - ); - } - } -} - -impl UploadManagerSession -where - Error: From, -{ - /// Generate a delete token for an alias - #[instrument(skip(self))] - pub(crate) async fn delete_token(&self) -> Result { - let alias = self.alias.clone().ok_or(UploadError::MissingAlias)?; - - debug!("Generating delete token"); - let s: String = Uuid::new_v4().to_string(); - let delete_token = s.clone(); - - debug!("Saving delete token"); - let alias_tree = self.manager.inner.alias_tree.clone(); - let key = delete_key(&alias); - let res = web::block(move || { - alias_tree.compare_and_swap( - key.as_bytes(), - None as Option, - Some(s.as_bytes()), - ) - }) - .await??; - - if let Err(sled::CompareAndSwapError { - current: Some(ivec), - .. - }) = res - { - let s = String::from_utf8(ivec.to_vec())?; - - debug!("Returning existing delete token, {}", s); - return Ok(s); - } - - debug!("Returning new delete token, {}", delete_token); - Ok(delete_token) - } - - /// Upload the file while preserving the filename, optionally validating the uploaded image - #[instrument(skip(self, stream))] - pub(crate) async fn import( - mut self, - alias: String, - validate: bool, - mut stream: impl Stream> + Unpin, - ) -> Result { - let mut bytes_mut = actix_web::web::BytesMut::new(); - - debug!("Reading stream to memory"); - while let Some(res) = stream.next().await { - let bytes = res?; - bytes_mut.extend_from_slice(&bytes); - } - - debug!("Validating bytes"); - let (content_type, validated_reader) = crate::validate::validate_image_bytes( - bytes_mut.freeze(), - self.manager.inner.format, - validate, - ) - .await?; - - let mut hasher_reader = Hasher::new(validated_reader, self.manager.inner.hasher.clone()); - - let filename = self.next_file(content_type).await?; - - let identifier = self - .store - .save_async_read(&mut hasher_reader, &filename) - .await?; - let hash = hasher_reader.finalize_reset().await?; - - debug!("Storing alias"); - self.alias = Some(alias.clone()); - self.add_existing_alias(&hash, &alias).await?; - - debug!("Saving file"); - self.save_upload(&identifier, hash, filename).await?; - - // Return alias to file - Ok(self) - } - - /// Upload the file, discarding bytes if it's already present, or saving if it's new - #[instrument(skip(self, stream))] - pub(crate) async fn upload( - mut self, - mut stream: impl Stream> + Unpin, - ) -> Result { - let mut bytes_mut = actix_web::web::BytesMut::new(); - - debug!("Reading stream to memory"); - while let Some(res) = stream.next().await { - let bytes = res?; - bytes_mut.extend_from_slice(&bytes); - } - - debug!("Validating bytes"); - let (input_type, validated_reader) = crate::validate::validate_image_bytes( - bytes_mut.freeze(), - self.manager.inner.format, - true, - ) - .await?; - - let mut hasher_reader = Hasher::new(validated_reader, self.manager.inner.hasher.clone()); - - let filename = self.next_file(input_type).await?; - - let identifier = self - .store - .save_async_read(&mut hasher_reader, &filename) - .await?; - let hash = hasher_reader.finalize_reset().await?; - - debug!("Adding alias"); - self.add_alias(&hash, input_type).await?; - - debug!("Saving file"); - self.save_upload(&identifier, hash, filename).await?; - - // Return alias to file - Ok(self) - } - - // check duplicates & store image if new - async fn save_upload( - &self, - identifier: &S::Identifier, - hash: Hash, - filename: String, - ) -> Result<(), Error> { - let dup = self.check_duplicate(hash, filename.clone()).await?; - - // bail early with alias to existing file if this is a duplicate - if dup.exists() { - debug!("Duplicate exists, removing file"); - - self.store.remove(identifier).await?; - return Ok(()); - } - - self.manager.store_identifier(filename, identifier).await?; - - Ok(()) - } - - // check for an already-uploaded image with this hash, returning the path to the target file - #[instrument(skip(self, hash))] - async fn check_duplicate(&self, hash: Hash, filename: String) -> Result { - let main_tree = self.manager.inner.main_tree.clone(); - - let filename2 = filename.clone(); - let hash2 = hash.as_slice().to_vec(); - debug!("Inserting filename for hash"); - let res = web::block(move || { - main_tree.compare_and_swap( - hash2, - None as Option, - Some(filename2.as_bytes()), - ) - }) - .await??; - - if let Err(sled::CompareAndSwapError { - current: Some(ivec), - .. - }) = res - { - let name = String::from_utf8(ivec.to_vec())?; - debug!("Filename exists for hash, {}", name); - return Ok(Dup::Exists); - } - - let fname_tree = self.manager.inner.filename_tree.clone(); - debug!("Saving filename -> hash relation"); - web::block(move || fname_tree.insert(filename, hash.into_inner())).await??; - - Ok(Dup::New) - } - - // generate a short filename that isn't already in-use - #[instrument(skip(self, input_type))] - async fn next_file(&self, input_type: ValidInputType) -> Result { - loop { - debug!("Filename generation loop"); - let filename = file_name(Uuid::new_v4(), input_type); - - let identifier_tree = self.manager.inner.identifier_tree.clone(); - let filename2 = filename.clone(); - let filename_exists = web::block(move || identifier_tree.get(filename2.as_bytes())) - .await?? - .is_some(); - - if !filename_exists { - return Ok(filename); - } - - debug!("Filename exists, trying again"); - } - } - - #[instrument(skip(self, hash, alias))] - async fn add_existing_alias(&self, hash: &Hash, alias: &str) -> Result<(), Error> { - self.save_alias_hash_mapping(hash, alias).await??; - - self.store_hash_id_alias_mapping(hash, alias).await?; - - Ok(()) - } - - // Add an alias to an existing file - // - // This will help if multiple 'users' upload the same file, and one of them wants to delete it - #[instrument(skip(self, hash, input_type))] - async fn add_alias(&mut self, hash: &Hash, input_type: ValidInputType) -> Result<(), Error> { - let alias = self.next_alias(hash, input_type).await?; - - self.store_hash_id_alias_mapping(hash, &alias).await?; - - Ok(()) - } - - // Add a pre-defined alias to an existin file - // - // DANGER: this can cause BAD BAD BAD conflicts if the same alias is used for multiple files - #[instrument(skip(self, hash))] - async fn store_hash_id_alias_mapping(&self, hash: &Hash, alias: &str) -> Result<(), Error> { - let alias = alias.to_string(); - loop { - debug!("hash -> alias save loop"); - let db = self.manager.inner.db.clone(); - let id = web::block(move || db.generate_id()).await??.to_string(); - - let alias_tree = self.manager.inner.alias_tree.clone(); - let key = alias_id_key(&alias); - let id2 = id.clone(); - debug!("Saving alias -> id mapping"); - web::block(move || alias_tree.insert(key.as_bytes(), id2.as_bytes())).await??; - - let key = alias_key(hash.as_slice(), &id); - let main_tree = self.manager.inner.main_tree.clone(); - let alias2 = alias.clone(); - debug!("Saving hash/id -> alias mapping"); - let res = web::block(move || { - main_tree.compare_and_swap(key, None as Option, Some(alias2.as_bytes())) - }) - .await??; - - if res.is_ok() { - break; - } - - debug!("Id exists, trying again"); - } - - Ok(()) - } - - // Generate an alias to the file - #[instrument(skip(self, hash, input_type))] - async fn next_alias( - &mut self, - hash: &Hash, - input_type: ValidInputType, - ) -> Result { - loop { - debug!("Alias gen loop"); - let alias = file_name(Uuid::new_v4(), input_type); - self.alias = Some(alias.clone()); - - let res = self.save_alias_hash_mapping(hash, &alias).await?; - - if res.is_ok() { - return Ok(alias); - } - debug!("Alias exists, regenning"); - } - } - - // Save an alias to the database - #[instrument(skip(self, hash))] - async fn save_alias_hash_mapping( - &self, - hash: &Hash, - alias: &str, - ) -> Result, Error> { - let tree = self.manager.inner.alias_tree.clone(); - let vec = hash.as_slice().to_vec(); - let alias = alias.to_string(); - - debug!("Saving alias -> hash mapping"); - let res = web::block(move || { - tree.compare_and_swap(alias.as_bytes(), None as Option, Some(vec)) - }) - .await??; - - if res.is_err() { - warn!("Duplicate alias"); - return Ok(Err(UploadError::DuplicateAlias.into())); - } - - Ok(Ok(())) - } -} - -fn file_name(name: Uuid, input_type: ValidInputType) -> String { - format!("{}{}", name, input_type.as_ext()) -} diff --git a/src/validate.rs b/src/validate.rs index f04c750..0a78159 100644 --- a/src/validate.rs +++ b/src/validate.rs @@ -1,5 +1,9 @@ use crate::{ - config::Format, either::Either, error::Error, ffmpeg::InputFormat, magick::ValidInputType, + config::ImageFormat, + either::Either, + error::{Error, UploadError}, + ffmpeg::InputFormat, + magick::ValidInputType, }; use actix_web::web::Bytes; use tokio::io::AsyncRead; @@ -35,7 +39,8 @@ impl AsyncRead for UnvalidatedBytes { #[instrument(name = "Validate image", skip(bytes))] pub(crate) async fn validate_image_bytes( bytes: Bytes, - prescribed_format: Option, + prescribed_format: Option, + enable_silent_video: bool, validate: bool, ) -> Result<(ValidInputType, impl AsyncRead + Unpin), Error> { let input_type = crate::magick::input_type_bytes(bytes.clone()).await?; @@ -45,31 +50,41 @@ pub(crate) async fn validate_image_bytes( } match (prescribed_format, input_type) { - (_, ValidInputType::Gif) => Ok(( - ValidInputType::Mp4, - Either::right(Either::left( - crate::ffmpeg::to_mp4_bytes(bytes, InputFormat::Gif).await?, - )), - )), - (_, ValidInputType::Mp4) => Ok(( - ValidInputType::Mp4, - Either::right(Either::left( - crate::ffmpeg::to_mp4_bytes(bytes, InputFormat::Mp4).await?, - )), - )), - (Some(Format::Jpeg) | None, ValidInputType::Jpeg) => Ok(( + (_, ValidInputType::Gif) => { + if !enable_silent_video { + return Err(UploadError::SilentVideoDisabled.into()); + } + Ok(( + ValidInputType::Mp4, + Either::right(Either::left( + crate::ffmpeg::to_mp4_bytes(bytes, InputFormat::Gif).await?, + )), + )) + } + (_, ValidInputType::Mp4) => { + if !enable_silent_video { + return Err(UploadError::SilentVideoDisabled.into()); + } + Ok(( + ValidInputType::Mp4, + Either::right(Either::left( + crate::ffmpeg::to_mp4_bytes(bytes, InputFormat::Mp4).await?, + )), + )) + } + (Some(ImageFormat::Jpeg) | None, ValidInputType::Jpeg) => Ok(( ValidInputType::Jpeg, Either::right(Either::right(Either::left( crate::exiftool::clear_metadata_bytes_read(bytes)?, ))), )), - (Some(Format::Png) | None, ValidInputType::Png) => Ok(( + (Some(ImageFormat::Png) | None, ValidInputType::Png) => Ok(( ValidInputType::Png, Either::right(Either::right(Either::left( crate::exiftool::clear_metadata_bytes_read(bytes)?, ))), )), - (Some(Format::Webp) | None, ValidInputType::Webp) => Ok(( + (Some(ImageFormat::Webp) | None, ValidInputType::Webp) => Ok(( ValidInputType::Webp, Either::right(Either::right(Either::right(Either::left( crate::magick::clear_metadata_bytes_read(bytes)?,