2023-09-21 00:29:19 +00:00
|
|
|
use std::{sync::Arc, time::Duration};
|
2023-09-02 23:30:45 +00:00
|
|
|
|
2022-04-02 21:44:03 +00:00
|
|
|
use crate::{
|
2022-10-01 17:06:33 +00:00
|
|
|
bytes_stream::BytesStream,
|
2022-09-25 20:17:33 +00:00
|
|
|
either::Either,
|
2022-04-02 21:44:03 +00:00
|
|
|
error::{Error, UploadError},
|
2023-07-13 22:42:21 +00:00
|
|
|
formats::{InternalFormat, Validations},
|
2023-09-21 00:29:19 +00:00
|
|
|
future::WithMetrics,
|
2023-08-16 21:09:40 +00:00
|
|
|
repo::{Alias, ArcRepo, DeleteToken, Hash},
|
2022-04-02 21:44:03 +00:00
|
|
|
store::Store,
|
|
|
|
};
|
2022-10-01 17:06:33 +00:00
|
|
|
use actix_web::web::Bytes;
|
2023-08-23 16:59:42 +00:00
|
|
|
use futures_core::Stream;
|
2023-09-06 01:45:07 +00:00
|
|
|
use reqwest::Body;
|
|
|
|
use reqwest_middleware::ClientWithMiddleware;
|
2023-09-10 22:55:13 +00:00
|
|
|
use streem::IntoStreamer;
|
2022-04-08 17:51:33 +00:00
|
|
|
use tracing::{Instrument, Span};
|
2022-04-02 21:44:03 +00:00
|
|
|
|
|
|
|
mod hasher;
|
|
|
|
use hasher::Hasher;
|
|
|
|
|
2022-04-07 02:40:49 +00:00
|
|
|
#[derive(Debug)]
|
2023-09-02 23:30:45 +00:00
|
|
|
pub(crate) struct Session {
|
2023-08-16 00:19:03 +00:00
|
|
|
repo: ArcRepo,
|
2023-07-26 01:08:18 +00:00
|
|
|
delete_token: DeleteToken,
|
2023-08-14 19:25:19 +00:00
|
|
|
hash: Option<Hash>,
|
2022-04-02 21:44:03 +00:00
|
|
|
alias: Option<Alias>,
|
2023-09-02 23:30:45 +00:00
|
|
|
identifier: Option<Arc<str>>,
|
2022-04-02 21:44:03 +00:00
|
|
|
}
|
|
|
|
|
2022-10-02 02:17:18 +00:00
|
|
|
#[tracing::instrument(skip(stream))]
|
2023-08-23 16:59:42 +00:00
|
|
|
async fn aggregate<S>(stream: S) -> Result<Bytes, Error>
|
2022-04-07 02:40:49 +00:00
|
|
|
where
|
2023-09-10 22:55:13 +00:00
|
|
|
S: Stream<Item = Result<Bytes, Error>>,
|
2022-04-07 02:40:49 +00:00
|
|
|
{
|
2022-10-01 17:06:33 +00:00
|
|
|
let mut buf = BytesStream::new();
|
2022-04-07 02:40:49 +00:00
|
|
|
|
2023-09-10 22:55:13 +00:00
|
|
|
let stream = std::pin::pin!(stream);
|
2023-08-23 16:59:42 +00:00
|
|
|
let mut stream = stream.into_streamer();
|
|
|
|
|
2022-04-07 02:40:49 +00:00
|
|
|
while let Some(res) = stream.next().await {
|
2022-10-01 17:06:33 +00:00
|
|
|
buf.add_bytes(res?);
|
2022-04-07 02:40:49 +00:00
|
|
|
}
|
|
|
|
|
2022-10-01 17:06:33 +00:00
|
|
|
Ok(buf.into_bytes())
|
2022-04-07 02:40:49 +00:00
|
|
|
}
|
|
|
|
|
2023-09-06 01:45:07 +00:00
|
|
|
#[tracing::instrument(skip(repo, store, client, stream, media))]
|
2023-08-16 00:19:03 +00:00
|
|
|
pub(crate) async fn ingest<S>(
|
|
|
|
repo: &ArcRepo,
|
2022-04-02 21:44:03 +00:00
|
|
|
store: &S,
|
2023-09-06 01:45:07 +00:00
|
|
|
client: &ClientWithMiddleware,
|
2023-09-10 22:55:13 +00:00
|
|
|
stream: impl Stream<Item = Result<Bytes, Error>> + 'static,
|
2022-04-02 21:44:03 +00:00
|
|
|
declared_alias: Option<Alias>,
|
2023-07-22 17:31:01 +00:00
|
|
|
media: &crate::config::Media,
|
2023-09-02 23:30:45 +00:00
|
|
|
) -> Result<Session, Error>
|
2022-04-02 21:44:03 +00:00
|
|
|
where
|
|
|
|
S: Store,
|
|
|
|
{
|
2022-04-07 17:56:40 +00:00
|
|
|
let permit = crate::PROCESS_SEMAPHORE.acquire().await;
|
2022-04-02 21:44:03 +00:00
|
|
|
|
2022-04-07 02:40:49 +00:00
|
|
|
let bytes = aggregate(stream).await?;
|
2022-04-02 21:44:03 +00:00
|
|
|
|
2023-07-13 22:42:21 +00:00
|
|
|
let prescribed = Validations {
|
|
|
|
image: &media.image,
|
|
|
|
animation: &media.animation,
|
|
|
|
video: &media.video,
|
2023-07-13 03:12:21 +00:00
|
|
|
};
|
|
|
|
|
2022-10-02 03:47:52 +00:00
|
|
|
tracing::trace!("Validating bytes");
|
2023-08-05 17:41:06 +00:00
|
|
|
let (input_type, validated_reader) =
|
|
|
|
crate::validate::validate_bytes(bytes, prescribed, media.process_timeout).await?;
|
2022-04-02 21:44:03 +00:00
|
|
|
|
2023-07-13 22:42:21 +00:00
|
|
|
let processed_reader = if let Some(operations) = media.preprocess_steps() {
|
2023-07-13 03:12:21 +00:00
|
|
|
if let Some(format) = input_type.processable_format() {
|
|
|
|
let (_, magick_args) =
|
|
|
|
crate::processor::build_chain(operations, format.file_extension())?;
|
2022-09-25 20:17:33 +00:00
|
|
|
|
2023-07-18 21:18:01 +00:00
|
|
|
let quality = match format {
|
|
|
|
crate::formats::ProcessableFormat::Image(format) => media.image.quality_for(format),
|
|
|
|
crate::formats::ProcessableFormat::Animation(format) => {
|
|
|
|
media.animation.quality_for(format)
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2023-07-14 00:21:57 +00:00
|
|
|
let processed_reader = crate::magick::process_image_async_read(
|
|
|
|
validated_reader,
|
|
|
|
magick_args,
|
|
|
|
format,
|
|
|
|
format,
|
2023-07-18 21:18:01 +00:00
|
|
|
quality,
|
2023-08-05 17:41:06 +00:00
|
|
|
media.process_timeout,
|
2023-07-14 00:21:57 +00:00
|
|
|
)
|
|
|
|
.await?;
|
2022-09-25 20:17:33 +00:00
|
|
|
|
|
|
|
Either::left(processed_reader)
|
|
|
|
} else {
|
|
|
|
Either::right(validated_reader)
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
Either::right(validated_reader)
|
|
|
|
};
|
|
|
|
|
2023-08-14 19:25:19 +00:00
|
|
|
let hasher_reader = Hasher::new(processed_reader);
|
|
|
|
let state = hasher_reader.state();
|
2022-04-02 21:44:03 +00:00
|
|
|
|
2023-07-14 19:53:37 +00:00
|
|
|
let identifier = store
|
|
|
|
.save_async_read(hasher_reader, input_type.media_type())
|
|
|
|
.await?;
|
2022-04-02 21:44:03 +00:00
|
|
|
|
|
|
|
drop(permit);
|
|
|
|
|
|
|
|
let mut session = Session {
|
|
|
|
repo: repo.clone(),
|
2023-07-26 01:08:18 +00:00
|
|
|
delete_token: DeleteToken::generate(),
|
2022-04-02 21:44:03 +00:00
|
|
|
hash: None,
|
|
|
|
alias: None,
|
|
|
|
identifier: Some(identifier.clone()),
|
|
|
|
};
|
|
|
|
|
2023-09-06 01:45:07 +00:00
|
|
|
if let Some(endpoint) = &media.external_validation {
|
2023-09-10 22:55:13 +00:00
|
|
|
let stream = store.to_stream(&identifier, None, None).await?;
|
2023-09-06 01:45:07 +00:00
|
|
|
|
|
|
|
let response = client
|
|
|
|
.post(endpoint.as_str())
|
2023-09-21 00:29:19 +00:00
|
|
|
.timeout(Duration::from_secs(media.external_validation_timeout))
|
2023-09-06 01:45:07 +00:00
|
|
|
.header("Content-Type", input_type.media_type().as_ref())
|
2023-09-10 22:55:13 +00:00
|
|
|
.body(Body::wrap_stream(crate::stream::make_send(stream)))
|
2023-09-06 01:45:07 +00:00
|
|
|
.send()
|
|
|
|
.instrument(tracing::info_span!("external-validation"))
|
2023-09-21 00:29:19 +00:00
|
|
|
.with_metrics("pict-rs.ingest.external-validation")
|
2023-09-06 01:45:07 +00:00
|
|
|
.await?;
|
|
|
|
|
|
|
|
if !response.status().is_success() {
|
|
|
|
return Err(UploadError::FailedExternalValidation.into());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-08-14 19:25:19 +00:00
|
|
|
let (hash, size) = state.borrow_mut().finalize_reset();
|
2022-04-02 21:44:03 +00:00
|
|
|
|
2023-08-14 19:25:19 +00:00
|
|
|
let hash = Hash::new(hash, size, input_type);
|
|
|
|
|
|
|
|
save_upload(&mut session, repo, store, hash.clone(), &identifier).await?;
|
2022-04-02 21:44:03 +00:00
|
|
|
|
|
|
|
if let Some(alias) = declared_alias {
|
2023-08-14 19:25:19 +00:00
|
|
|
session.add_existing_alias(hash, alias).await?
|
2022-04-02 21:44:03 +00:00
|
|
|
} else {
|
2023-08-14 19:25:19 +00:00
|
|
|
session.create_alias(hash, input_type).await?
|
2023-07-26 01:08:18 +00:00
|
|
|
};
|
2022-04-02 21:44:03 +00:00
|
|
|
|
|
|
|
Ok(session)
|
|
|
|
}
|
|
|
|
|
2022-10-02 03:47:52 +00:00
|
|
|
#[tracing::instrument(level = "trace", skip_all)]
|
2023-08-16 00:19:03 +00:00
|
|
|
async fn save_upload<S>(
|
2023-09-02 23:30:45 +00:00
|
|
|
session: &mut Session,
|
2023-08-16 00:19:03 +00:00
|
|
|
repo: &ArcRepo,
|
2022-04-02 21:44:03 +00:00
|
|
|
store: &S,
|
2023-08-14 19:25:19 +00:00
|
|
|
hash: Hash,
|
2023-09-02 23:30:45 +00:00
|
|
|
identifier: &Arc<str>,
|
2022-04-02 21:44:03 +00:00
|
|
|
) -> Result<(), Error>
|
|
|
|
where
|
|
|
|
S: Store,
|
|
|
|
{
|
2023-08-16 21:09:40 +00:00
|
|
|
if repo.create_hash(hash.clone(), identifier).await?.is_err() {
|
2023-06-23 16:39:43 +00:00
|
|
|
// duplicate upload
|
2022-04-02 21:44:03 +00:00
|
|
|
store.remove(identifier).await?;
|
2023-06-23 16:43:15 +00:00
|
|
|
session.identifier.take();
|
2022-04-02 21:44:03 +00:00
|
|
|
return Ok(());
|
|
|
|
}
|
|
|
|
|
2023-06-23 16:39:43 +00:00
|
|
|
// Set hash after upload uniquness check so we don't clean existing files on failure
|
2023-08-14 19:25:19 +00:00
|
|
|
session.hash = Some(hash);
|
2023-06-23 16:39:43 +00:00
|
|
|
|
2022-04-02 21:44:03 +00:00
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
|
2023-09-02 23:30:45 +00:00
|
|
|
impl Session {
|
2023-07-26 01:08:18 +00:00
|
|
|
pub(crate) fn disarm(mut self) -> DeleteToken {
|
2022-04-06 02:47:35 +00:00
|
|
|
let _ = self.hash.take();
|
2022-04-02 21:44:03 +00:00
|
|
|
let _ = self.alias.take();
|
|
|
|
let _ = self.identifier.take();
|
2023-07-26 01:08:18 +00:00
|
|
|
|
|
|
|
self.delete_token.clone()
|
2022-04-02 21:44:03 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
pub(crate) fn alias(&self) -> Option<&Alias> {
|
|
|
|
self.alias.as_ref()
|
|
|
|
}
|
|
|
|
|
2023-07-26 01:08:18 +00:00
|
|
|
pub(crate) fn delete_token(&self) -> &DeleteToken {
|
|
|
|
&self.delete_token
|
2022-04-02 21:44:03 +00:00
|
|
|
}
|
|
|
|
|
2022-10-02 03:47:52 +00:00
|
|
|
#[tracing::instrument(skip(self, hash))]
|
2023-08-14 19:25:19 +00:00
|
|
|
async fn add_existing_alias(&mut self, hash: Hash, alias: Alias) -> Result<(), Error> {
|
2023-08-16 21:09:40 +00:00
|
|
|
self.repo
|
|
|
|
.create_alias(&alias, &self.delete_token, hash)
|
2022-04-02 21:44:03 +00:00
|
|
|
.await?
|
|
|
|
.map_err(|_| UploadError::DuplicateAlias)?;
|
|
|
|
|
|
|
|
self.alias = Some(alias.clone());
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
|
2022-10-02 03:47:52 +00:00
|
|
|
#[tracing::instrument(level = "debug", skip(self, hash))]
|
2023-08-14 19:25:19 +00:00
|
|
|
async fn create_alias(&mut self, hash: Hash, input_type: InternalFormat) -> Result<(), Error> {
|
2022-04-02 21:44:03 +00:00
|
|
|
loop {
|
2023-07-13 03:12:21 +00:00
|
|
|
let alias = Alias::generate(input_type.file_extension().to_string());
|
2022-04-02 21:44:03 +00:00
|
|
|
|
2023-08-16 21:09:40 +00:00
|
|
|
if self
|
|
|
|
.repo
|
|
|
|
.create_alias(&alias, &self.delete_token, hash.clone())
|
2023-07-26 01:08:18 +00:00
|
|
|
.await?
|
|
|
|
.is_ok()
|
|
|
|
{
|
2022-04-02 21:44:03 +00:00
|
|
|
self.alias = Some(alias.clone());
|
|
|
|
|
|
|
|
return Ok(());
|
|
|
|
}
|
|
|
|
|
2022-10-02 03:47:52 +00:00
|
|
|
tracing::trace!("Alias exists, regenerating");
|
2022-04-02 21:44:03 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-09-02 23:30:45 +00:00
|
|
|
impl Drop for Session {
|
2022-04-02 21:44:03 +00:00
|
|
|
fn drop(&mut self) {
|
2023-07-23 02:11:28 +00:00
|
|
|
let any_items = self.hash.is_some() || self.alias.is_some() || self.identifier.is_some();
|
|
|
|
|
|
|
|
metrics::increment_counter!("pict-rs.ingest.end", "completed" => (!any_items).to_string());
|
2023-07-22 21:47:59 +00:00
|
|
|
|
2023-07-23 02:11:28 +00:00
|
|
|
if self.hash.is_some() || self.alias.is_some() | self.identifier.is_some() {
|
2022-10-02 03:47:52 +00:00
|
|
|
let cleanup_parent_span = tracing::info_span!(parent: None, "Dropped session cleanup");
|
|
|
|
cleanup_parent_span.follows_from(Span::current());
|
|
|
|
|
|
|
|
if let Some(hash) = self.hash.take() {
|
|
|
|
let repo = self.repo.clone();
|
|
|
|
|
2022-12-08 04:43:12 +00:00
|
|
|
let cleanup_span = tracing::info_span!(parent: &cleanup_parent_span, "Session cleanup hash", hash = ?hash);
|
2022-10-02 03:47:52 +00:00
|
|
|
|
2023-09-04 02:30:47 +00:00
|
|
|
crate::sync::spawn(
|
|
|
|
async move {
|
|
|
|
let _ = crate::queue::cleanup_hash(&repo, hash).await;
|
|
|
|
}
|
|
|
|
.instrument(cleanup_span),
|
|
|
|
);
|
2022-10-02 03:47:52 +00:00
|
|
|
}
|
2022-04-02 21:44:03 +00:00
|
|
|
|
2022-10-02 03:47:52 +00:00
|
|
|
if let Some(alias) = self.alias.take() {
|
|
|
|
let repo = self.repo.clone();
|
2023-07-26 01:08:18 +00:00
|
|
|
let token = self.delete_token.clone();
|
2022-04-02 21:44:03 +00:00
|
|
|
|
2022-12-08 04:43:12 +00:00
|
|
|
let cleanup_span = tracing::info_span!(parent: &cleanup_parent_span, "Session cleanup alias", alias = ?alias);
|
2022-04-08 17:51:33 +00:00
|
|
|
|
2023-09-04 02:30:47 +00:00
|
|
|
crate::sync::spawn(
|
|
|
|
async move {
|
|
|
|
let _ = crate::queue::cleanup_alias(&repo, alias, token).await;
|
|
|
|
}
|
|
|
|
.instrument(cleanup_span),
|
|
|
|
);
|
2022-10-02 03:47:52 +00:00
|
|
|
}
|
2022-04-02 21:44:03 +00:00
|
|
|
|
2022-10-02 03:47:52 +00:00
|
|
|
if let Some(identifier) = self.identifier.take() {
|
|
|
|
let repo = self.repo.clone();
|
2022-04-02 21:44:03 +00:00
|
|
|
|
2022-12-08 04:43:12 +00:00
|
|
|
let cleanup_span = tracing::info_span!(parent: &cleanup_parent_span, "Session cleanup identifier", identifier = ?identifier);
|
2022-04-08 17:51:33 +00:00
|
|
|
|
2023-09-04 02:30:47 +00:00
|
|
|
crate::sync::spawn(
|
|
|
|
async move {
|
|
|
|
let _ = crate::queue::cleanup_identifier(&repo, &identifier).await;
|
|
|
|
}
|
|
|
|
.instrument(cleanup_span),
|
|
|
|
);
|
2022-10-02 03:47:52 +00:00
|
|
|
}
|
2022-04-02 21:44:03 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|