From c0f6b1e988544f80901ceef404b9be59f615fd65 Mon Sep 17 00:00:00 2001 From: asonix Date: Tue, 29 Aug 2023 13:27:18 -0500 Subject: [PATCH] Enable searching hashes by date --- README.md | 37 ++++++++++++++++++++++++++++++++ src/lib.rs | 14 ++++++++++-- src/repo.rs | 14 ++++++++++++ src/repo/sled.rs | 56 ++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 119 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index e8aed6e..4ed91d0 100644 --- a/README.md +++ b/README.md @@ -586,6 +586,43 @@ A secure API key can be generated by any password generator. $ cp -r exports/2023-07-08T22:26:21.194126713Z sled-repo ``` 4. Starting pict-rs +- `GET /internal/hashes?{query}` Get a page of hashes ordered by newest to oldest based on the + provided query. On success, it will return the following json: + ```json + { + "msg": "ok", + "page": { + "limit": 20, + "current": "some-long-slug-string", + "next": "some-long-slug-string", + "prev": "some-long-slug-string", + "hashes": [{ + "hex": "some-long-hex-encoded-hash", + "aliases": [ + "file-alias.png", + "another-alias.png", + ], + "details": { + "width": 1920, + "height": 1080, + "frames": 30, + "content_type": "video/mp4", + "created_at": "2022-04-08T18:33:42.957791698Z" + } + }] + } + } + ``` + Note that some fields in this response are optional (including `next`, `prev`, `current`, `details` and `frames`) + + Available query options: + - empty: this fetches the first page of the results (e.g. the newest media) + - `?slug={slug}` this fetches a specific page of results. the `slug` field comes from the + `current`, `next`, or `prev` fields in the page json + - `?timestamp={timestamp}` this fetches results older than the specified timestamp for easily + searching into the data. the `timestamp` should be formatted according to RFC3339 + - `?limit={limit}` specifies how many results to return per page + Additionally, all endpoints support setting deadlines, after which the request will cease processing. To enable deadlines for your requests, you can set the `X-Request-Deadline` header to an diff --git a/src/lib.rs b/src/lib.rs index 2c5abec..1454e42 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -34,6 +34,7 @@ use actix_web::{ http::header::{CacheControl, CacheDirective, LastModified, Range, ACCEPT_RANGES}, web, App, HttpRequest, HttpResponse, HttpResponseBuilder, HttpServer, }; +use details::HumanDate; use futures_core::Stream; use metrics_exporter_prometheus::PrometheusBuilder; use middleware::Metrics; @@ -578,6 +579,7 @@ async fn do_download_backgrounded( #[derive(Debug, serde::Deserialize)] struct PageQuery { slug: Option, + timestamp: Option, limit: Option, } @@ -608,11 +610,19 @@ struct HashJson { #[tracing::instrument(name = "Hash Page", skip(repo))] async fn page( repo: web::Data, - web::Query(PageQuery { slug, limit }): web::Query, + web::Query(PageQuery { + slug, + timestamp, + limit, + }): web::Query, ) -> Result { let limit = limit.unwrap_or(20); - let page = repo.hash_page(slug, limit).await?; + let page = if let Some(timestamp) = timestamp { + repo.hash_page_by_date(timestamp.timestamp, limit).await? + } else { + repo.hash_page(slug, limit).await? + }; let mut hashes = Vec::with_capacity(page.hashes.len()); diff --git a/src/repo.rs b/src/repo.rs index 3bbb085..a025c04 100644 --- a/src/repo.rs +++ b/src/repo.rs @@ -556,6 +556,12 @@ pub(crate) trait HashRepo: BaseRepo { self.hashes_ordered(bound, limit).await } + async fn hash_page_by_date( + &self, + date: time::OffsetDateTime, + limit: usize, + ) -> Result; + async fn bound(&self, hash: Hash) -> Result, RepoError>; async fn hashes_ordered( @@ -637,6 +643,14 @@ where T::hashes_ordered(self, bound, limit).await } + async fn hash_page_by_date( + &self, + date: time::OffsetDateTime, + limit: usize, + ) -> Result { + T::hash_page_by_date(self, date, limit).await + } + async fn create_hash_with_timestamp( &self, hash: Hash, diff --git a/src/repo/sled.rs b/src/repo/sled.rs index 10d488b..1bad18b 100644 --- a/src/repo/sled.rs +++ b/src/repo/sled.rs @@ -1113,6 +1113,62 @@ impl HashRepo for SledRepo { .map_err(RepoError::from) } + async fn hash_page_by_date( + &self, + date: time::OffsetDateTime, + limit: usize, + ) -> Result { + let date_nanos = date.unix_timestamp_nanos().to_be_bytes(); + + let page_iter = self.hashes_inverse.range(..=date_nanos.clone()); + let prev_iter = Some(self.hashes_inverse.range(date_nanos..)); + + actix_rt::task::spawn_blocking(move || { + let page_iter = page_iter + .keys() + .rev() + .filter_map(|res| res.map(parse_ordered_hash).transpose()) + .take(limit + 1); + + let prev = prev_iter + .and_then(|prev_iter| { + prev_iter + .keys() + .filter_map(|res| res.map(parse_ordered_hash).transpose()) + .take(limit + 1) + .last() + }) + .transpose()?; + + let mut hashes = page_iter.collect::, _>>()?; + + let next = if hashes.len() > limit { + hashes.pop() + } else { + None + }; + + let prev = if prev.as_ref() == hashes.get(0) { + None + } else { + prev + }; + + Ok(HashPage { + limit, + prev: prev.map(|OrderedHash { hash, .. }| hash), + next: next.map(|OrderedHash { hash, .. }| hash), + hashes: hashes + .into_iter() + .map(|OrderedHash { hash, .. }| hash) + .collect(), + }) as Result + }) + .await + .map_err(|_| RepoError::Canceled)? + .map_err(RepoError::from) + } + #[tracing::instrument(level = "trace", skip(self))] async fn create_hash_with_timestamp( &self,