2
0
Fork 0
mirror of https://git.asonix.dog/asonix/pict-rs synced 2025-01-05 09:11:24 +00:00
pict-rs/src/ingest.rs

366 lines
10 KiB
Rust
Raw Normal View History

2023-11-11 20:22:12 +00:00
use std::{cell::RefCell, rc::Rc, sync::Arc, time::Duration};
use crate::{
bytes_stream::BytesStream,
2023-09-24 17:58:16 +00:00
details::Details,
error::{Error, UploadError},
2023-07-13 22:42:21 +00:00
formats::{InternalFormat, Validations},
future::WithMetrics,
magick::PolicyDir,
2023-08-16 21:09:40 +00:00
repo::{Alias, ArcRepo, DeleteToken, Hash},
state::State,
store::Store,
tmp_file::TmpDir,
};
use actix_web::web::Bytes;
2023-08-23 16:59:42 +00:00
use futures_core::Stream;
2023-09-06 01:45:07 +00:00
use reqwest::Body;
use reqwest_middleware::ClientWithMiddleware;
use streem::IntoStreamer;
use tracing::{Instrument, Span};
mod hasher;
use hasher::Hasher;
2022-04-07 02:40:49 +00:00
#[derive(Debug)]
pub(crate) struct Session {
repo: ArcRepo,
delete_token: DeleteToken,
2023-08-14 19:25:19 +00:00
hash: Option<Hash>,
alias: Option<Alias>,
identifier: Option<Arc<str>>,
}
#[tracing::instrument(skip(stream))]
2023-08-23 16:59:42 +00:00
async fn aggregate<S>(stream: S) -> Result<Bytes, Error>
2022-04-07 02:40:49 +00:00
where
S: Stream<Item = Result<Bytes, Error>>,
2022-04-07 02:40:49 +00:00
{
let mut buf = BytesStream::new();
2022-04-07 02:40:49 +00:00
let stream = std::pin::pin!(stream);
2023-08-23 16:59:42 +00:00
let mut stream = stream.into_streamer();
2022-04-07 02:40:49 +00:00
while let Some(res) = stream.next().await {
tracing::trace!("aggregate: looping");
buf.add_bytes(res?);
2022-04-07 02:40:49 +00:00
}
Ok(buf.into_bytes())
2022-04-07 02:40:49 +00:00
}
2023-11-11 20:22:12 +00:00
async fn process_ingest<S>(
state: &State<S>,
stream: impl Stream<Item = Result<Bytes, Error>> + 'static,
) -> Result<
(
InternalFormat,
Arc<str>,
Details,
Rc<RefCell<hasher::State>>,
),
Error,
>
where
S: Store,
{
let bytes = tokio::time::timeout(Duration::from_secs(60), aggregate(stream))
.await
.map_err(|_| UploadError::AggregateTimeout)??;
let permit = crate::process_semaphore().acquire().await?;
tracing::trace!("Validating bytes");
let (input_type, process_read) = crate::validate::validate_bytes(state, bytes).await?;
let process_read = if let Some(operations) = state.config.media.preprocess_steps() {
if let Some(format) = input_type.processable_format() {
let (_, magick_args) =
crate::processor::build_chain(operations, format.file_extension())?;
let quality = match format {
crate::formats::ProcessableFormat::Image(format) => {
state.config.media.image.quality_for(format)
}
crate::formats::ProcessableFormat::Animation(format) => {
state.config.media.animation.quality_for(format)
}
};
crate::magick::process_image_process_read(
state,
process_read,
magick_args,
format,
format,
quality,
)
2023-09-24 17:42:23 +00:00
.await?
} else {
process_read
}
} else {
process_read
};
let (hash_state, identifier) = process_read
.with_stdout(|stdout| async move {
let hasher_reader = Hasher::new(stdout);
let state = hasher_reader.state();
store
.save_async_read(hasher_reader, input_type.media_type())
.await
.map(move |identifier| (hash_state, identifier))
})
.await??;
let bytes_stream = state.store.to_bytes(&identifier, None, None).await?;
let details = Details::from_bytes(
tmp_dir,
policy_dir,
media.process_timeout,
bytes_stream.into_bytes(),
)
.await?;
2023-09-24 17:58:16 +00:00
drop(permit);
Ok((input_type, identifier, details, hash_state))
2023-11-11 20:22:12 +00:00
}
async fn dummy_ingest<S>(
state: &State<S>,
2023-11-11 20:22:12 +00:00
stream: impl Stream<Item = Result<Bytes, Error>> + 'static,
) -> Result<
(
InternalFormat,
Arc<str>,
Details,
Rc<RefCell<hasher::State>>,
),
Error,
>
2023-11-11 20:22:12 +00:00
where
S: Store,
{
let stream = crate::stream::map(stream, |res| match res {
Ok(bytes) => Ok(bytes),
Err(e) => Err(std::io::Error::new(std::io::ErrorKind::Other, e)),
});
let reader = Box::pin(tokio_util::io::StreamReader::new(stream));
let hasher_reader = Hasher::new(reader);
let state = hasher_reader.state();
let input_type = InternalFormat::Image(crate::formats::ImageFormat::Png);
let identifier = state
.store
2023-11-11 20:22:12 +00:00
.save_async_read(hasher_reader, input_type.media_type())
.await?;
let details = Details::danger_dummy(input_type);
Ok((input_type, identifier, details, state))
}
#[allow(clippy::too_many_arguments)]
#[tracing::instrument(skip(state, stream))]
2023-11-11 20:22:12 +00:00
pub(crate) async fn ingest<S>(
state: &State<S>,
2023-11-11 20:22:12 +00:00
stream: impl Stream<Item = Result<Bytes, Error>> + 'static,
declared_alias: Option<Alias>,
) -> Result<Session, Error>
where
S: Store,
{
let (input_type, identifier, details, hash_state) = if state.config.server.danger_dummy_mode {
dummy_ingest(state, stream).await?
2023-11-11 20:22:12 +00:00
} else {
process_ingest(state, stream).await?
2023-11-11 20:22:12 +00:00
};
let mut session = Session {
repo: state.repo.clone(),
delete_token: DeleteToken::generate(),
hash: None,
alias: None,
identifier: Some(identifier.clone()),
};
if let Some(endpoint) = &state.config.media.external_validation {
let stream = store.to_stream(&identifier, None, None).await?;
2023-09-06 01:45:07 +00:00
let response = state
.client
2023-09-06 01:45:07 +00:00
.post(endpoint.as_str())
2023-11-11 20:22:12 +00:00
.timeout(Duration::from_secs(
state.config.media.external_validation_timeout,
2023-11-11 20:22:12 +00:00
))
2023-09-06 01:45:07 +00:00
.header("Content-Type", input_type.media_type().as_ref())
.body(Body::wrap_stream(crate::stream::make_send(stream)))
2023-09-06 01:45:07 +00:00
.send()
.instrument(tracing::info_span!("external-validation"))
.with_metrics("pict-rs.ingest.external-validation")
2023-09-06 01:45:07 +00:00
.await?;
if !response.status().is_success() {
return Err(UploadError::FailedExternalValidation.into());
}
}
let (hash, size) = hash_state.borrow_mut().finalize_reset();
2023-08-14 19:25:19 +00:00
let hash = Hash::new(hash, size, input_type);
save_upload(&mut session, state, hash.clone(), &identifier).await?;
state.repo.relate_details(&identifier, &details).await?;
2023-09-24 17:58:16 +00:00
if let Some(alias) = declared_alias {
2023-08-14 19:25:19 +00:00
session.add_existing_alias(hash, alias).await?
} else {
2023-08-14 19:25:19 +00:00
session.create_alias(hash, input_type).await?
};
Ok(session)
}
#[tracing::instrument(level = "trace", skip_all)]
async fn save_upload<S>(
session: &mut Session,
state: &State<S>,
2023-08-14 19:25:19 +00:00
hash: Hash,
identifier: &Arc<str>,
) -> Result<(), Error>
where
S: Store,
{
if state
.repo
.create_hash(hash.clone(), identifier)
.await?
.is_err()
{
// duplicate upload
state.store.remove(identifier).await?;
session.identifier.take();
return Ok(());
}
// Set hash after upload uniquness check so we don't clean existing files on failure
2023-08-14 19:25:19 +00:00
session.hash = Some(hash);
Ok(())
}
impl Session {
pub(crate) fn disarm(mut self) -> DeleteToken {
let _ = self.hash.take();
let _ = self.alias.take();
let _ = self.identifier.take();
self.delete_token.clone()
}
pub(crate) fn alias(&self) -> Option<&Alias> {
self.alias.as_ref()
}
pub(crate) fn delete_token(&self) -> &DeleteToken {
&self.delete_token
}
#[tracing::instrument(skip(self, hash))]
2023-08-14 19:25:19 +00:00
async fn add_existing_alias(&mut self, hash: Hash, alias: Alias) -> Result<(), Error> {
2023-08-16 21:09:40 +00:00
self.repo
.create_alias(&alias, &self.delete_token, hash)
.await?
.map_err(|_| UploadError::DuplicateAlias)?;
self.alias = Some(alias.clone());
Ok(())
}
#[tracing::instrument(level = "debug", skip(self, hash))]
2023-08-14 19:25:19 +00:00
async fn create_alias(&mut self, hash: Hash, input_type: InternalFormat) -> Result<(), Error> {
loop {
tracing::trace!("create_alias: looping");
let alias = Alias::generate(input_type.file_extension().to_string());
2023-08-16 21:09:40 +00:00
if self
.repo
.create_alias(&alias, &self.delete_token, hash.clone())
.await?
.is_ok()
{
self.alias = Some(alias.clone());
return Ok(());
}
}
}
}
impl Drop for Session {
fn drop(&mut self) {
2023-07-23 02:11:28 +00:00
let any_items = self.hash.is_some() || self.alias.is_some() || self.identifier.is_some();
metrics::counter!("pict-rs.ingest.end", "completed" => (!any_items).to_string())
.increment(1);
2023-07-22 21:47:59 +00:00
2023-07-23 02:11:28 +00:00
if self.hash.is_some() || self.alias.is_some() | self.identifier.is_some() {
let cleanup_parent_span = tracing::info_span!(parent: None, "Dropped session cleanup");
cleanup_parent_span.follows_from(Span::current());
if let Some(hash) = self.hash.take() {
let repo = self.repo.clone();
let cleanup_span = tracing::info_span!(parent: &cleanup_parent_span, "Session cleanup hash", hash = ?hash);
crate::sync::spawn(
"session-cleanup-hash",
async move {
let _ = crate::queue::cleanup_hash(&repo, hash).await;
}
.instrument(cleanup_span),
);
}
if let Some(alias) = self.alias.take() {
let repo = self.repo.clone();
let token = self.delete_token.clone();
let cleanup_span = tracing::info_span!(parent: &cleanup_parent_span, "Session cleanup alias", alias = ?alias);
crate::sync::spawn(
"session-cleanup-alias",
async move {
let _ = crate::queue::cleanup_alias(&repo, alias, token).await;
}
.instrument(cleanup_span),
);
}
if let Some(identifier) = self.identifier.take() {
let repo = self.repo.clone();
let cleanup_span = tracing::info_span!(parent: &cleanup_parent_span, "Session cleanup identifier", identifier = ?identifier);
crate::sync::spawn(
"session-cleanup-identifier",
async move {
let _ = crate::queue::cleanup_identifier(&repo, &identifier).await;
}
.instrument(cleanup_span),
);
}
}
}
}