2
0
Fork 0
mirror of https://git.asonix.dog/asonix/pict-rs synced 2024-11-20 11:21:14 +00:00

Upgrade Concurrency: Update docs, configs, limit maxiumum details operations

This commit is contained in:
asonix 2023-10-03 17:04:40 -05:00
parent f223d477c7
commit d28c1ac628
10 changed files with 107 additions and 41 deletions

View file

@ -17,11 +17,12 @@ _a simple image hosting service_
1. [Backups](#backups) 1. [Backups](#backups)
2. [0.4 to 0.5 Migration Guide](#0-4-to-0-5-migration-guide) 2. [0.4 to 0.5 Migration Guide](#0-4-to-0-5-migration-guide)
1. [Overview](#overview) 1. [Overview](#overview)
2. [Configuration Updates](#configuration-updates) 2. [Upgrade Configuration](#upgrade-configuration)
3. [Configuration Updates](#configuration-updates)
1. [Image Changes](#image-changes) 1. [Image Changes](#image-changes)
2. [Animation Changes](#animation-changes) 2. [Animation Changes](#animation-changes)
3. [Video Changes](#video-changes) 3. [Video Changes](#video-changes)
3. [Upgrading Directly to Postgres](#upgrading-directly-to-postgres) 4. [Upgrading Directly to Postgres](#upgrading-directly-to-postgres)
3. [Filesystem to Object Storage Migration](#filesystem-to-object-storage-migration) 3. [Filesystem to Object Storage Migration](#filesystem-to-object-storage-migration)
1. [Troubleshooting](#migration-troubleshooting) 1. [Troubleshooting](#migration-troubleshooting)
4. [Sled to Postgres Migration](#sled-to-postgres-migration) 4. [Sled to Postgres Migration](#sled-to-postgres-migration)
@ -645,6 +646,30 @@ uploaded before this was standard may not have ever had their details records ge
_This upgrade must be performed while pict-rs is offline._ _This upgrade must be performed while pict-rs is offline._
#### Upgrade Configuration
Because upgrades may take so long, there is a new configuration option introduced to attempt to
improve its speed.
```toml
[upgrade]
concurrency = 32
```
or
```
PICTRS__UPGRADE__CONCURRENCY=32
```
or
```bash
$ pict-rs run --upgrade-concurrency 32
```
This value dictates how many hashes pict-rs will attempt to migrate at the same time. Since this
value will increase the number of concurrent connections to the Repo and the Store, as well as
potentially increase CPU and memory use, it should be considered carefully before increasing.
For large-scale deployments, it is likely this value should be bumped to 128, 256, or even 512. The
default value is 32.
#### Configuration Updates #### Configuration Updates
Previously, pict-rs only had two categories for files: images and videos. pict-rs 0.5 adds a third Previously, pict-rs only had two categories for files: images and videos. pict-rs 0.5 adds a third
category: animation. With the new explicit support for animated file types, some configuration category: animation. With the new explicit support for animated file types, some configuration

View file

@ -6,6 +6,9 @@ max_file_count = 1
[client] [client]
timeout = 30 timeout = 30
[upgrade]
concurrency = 32
[tracing.logging] [tracing.logging]
format = "normal" format = "normal"
targets = "info" targets = "info"

View file

@ -27,6 +27,15 @@ max_file_count = 1
# distinct from the object storage client timeout, which can be configured separately # distinct from the object storage client timeout, which can be configured separately
timeout = 30 timeout = 30
[upgrade]
## Optional: How many hashes will be migrated from the previous version of pict-rs at the same time
# environment variable: PICTRS__UPGRADE__CONCURRENCY
# default: 32
#
# Increasing this value will increase the number of concurrent connections to the Repo, and to the
# Store, so make sure your deployment can handle more throughput before tweaking this.
concurrency = 32
## Logging configuration ## Logging configuration
[tracing.logging] [tracing.logging]

View file

@ -1,15 +1,10 @@
use std::sync::Arc;
use crate::{ use crate::{
bytes_stream::BytesStream,
discover::Discovery, discover::Discovery,
error::Error, error::Error,
formats::{InternalFormat, InternalVideoFormat}, formats::{InternalFormat, InternalVideoFormat},
serde_str::Serde, serde_str::Serde,
store::Store,
}; };
use actix_web::web; use actix_web::web;
use streem::IntoStreamer;
use time::{format_description::well_known::Rfc3339, OffsetDateTime}; use time::{format_description::well_known::Rfc3339, OffsetDateTime};
#[derive(Copy, Clone, Debug, serde::Deserialize, serde::Serialize)] #[derive(Copy, Clone, Debug, serde::Deserialize, serde::Serialize)]
@ -83,6 +78,7 @@ impl Details {
self.inner.created_at.timestamp self.inner.created_at.timestamp
} }
#[tracing::instrument(level = "DEBUG")]
pub(crate) async fn from_bytes(timeout: u64, input: web::Bytes) -> Result<Self, Error> { pub(crate) async fn from_bytes(timeout: u64, input: web::Bytes) -> Result<Self, Error> {
let Discovery { let Discovery {
input, input,
@ -99,28 +95,6 @@ impl Details {
)) ))
} }
#[tracing::instrument(level = "DEBUG")]
pub(crate) async fn from_store<S: Store>(
store: &S,
identifier: &Arc<str>,
timeout: u64,
) -> Result<Self, Error> {
let mut buf = BytesStream::new();
let mut stream = store
.to_stream(identifier, None, None)
.await?
.into_streamer();
while let Some(res) = stream.next().await {
buf.add_bytes(res?);
}
let bytes = buf.into_bytes();
Self::from_bytes(timeout, bytes).await
}
pub(crate) fn internal_format(&self) -> InternalFormat { pub(crate) fn internal_format(&self) -> InternalFormat {
self.inner.format self.inner.format
} }

View file

@ -99,7 +99,9 @@ async fn process<S: Store + 'static>(
let input_details = if let Some(details) = repo.details(&identifier).await? { let input_details = if let Some(details) = repo.details(&identifier).await? {
details details
} else { } else {
let details = Details::from_store(store, &identifier, media.process_timeout).await?; let bytes_stream = store.to_bytes(&identifier, None, None).await?;
let details = Details::from_bytes(media.process_timeout, bytes_stream.into_bytes()).await?;
repo.relate_details(&identifier, &details).await?; repo.relate_details(&identifier, &details).await?;

View file

@ -106,7 +106,8 @@ where
.save_async_read(hasher_reader, input_type.media_type()) .save_async_read(hasher_reader, input_type.media_type())
.await?; .await?;
let details = Details::from_store(store, &identifier, media.process_timeout).await?; let bytes_stream = store.to_bytes(&identifier, None, None).await?;
let details = Details::from_bytes(media.process_timeout, bytes_stream.into_bytes()).await?;
drop(permit); drop(permit);

View file

@ -122,8 +122,9 @@ async fn ensure_details<S: Store + 'static>(
} }
tracing::debug!("generating new details from {:?}", identifier); tracing::debug!("generating new details from {:?}", identifier);
let bytes_stream = store.to_bytes(&identifier, None, None).await?;
let new_details = let new_details =
Details::from_store(store, &identifier, config.media.process_timeout).await?; Details::from_bytes(config.media.process_timeout, bytes_stream.into_bytes()).await?;
tracing::debug!("storing details for {:?}", identifier); tracing::debug!("storing details for {:?}", identifier);
repo.relate_details(&identifier, &new_details).await?; repo.relate_details(&identifier, &new_details).await?;
tracing::debug!("stored"); tracing::debug!("stored");
@ -897,8 +898,10 @@ async fn process<S: Store + 'static>(
} }
tracing::debug!("generating new details from {:?}", identifier); tracing::debug!("generating new details from {:?}", identifier);
let bytes_stream = store.to_bytes(&identifier, None, None).await?;
let new_details = let new_details =
Details::from_store(&store, &identifier, config.media.process_timeout).await?; Details::from_bytes(config.media.process_timeout, bytes_stream.into_bytes())
.await?;
tracing::debug!("storing details for {:?}", identifier); tracing::debug!("storing details for {:?}", identifier);
repo.relate_details(&identifier, &new_details).await?; repo.relate_details(&identifier, &new_details).await?;
tracing::debug!("stored"); tracing::debug!("stored");
@ -1015,8 +1018,10 @@ async fn process_head<S: Store + 'static>(
} }
tracing::debug!("generating new details from {:?}", identifier); tracing::debug!("generating new details from {:?}", identifier);
let bytes_stream = store.to_bytes(&identifier, None, None).await?;
let new_details = let new_details =
Details::from_store(&store, &identifier, config.media.process_timeout).await?; Details::from_bytes(config.media.process_timeout, bytes_stream.into_bytes())
.await?;
tracing::debug!("storing details for {:?}", identifier); tracing::debug!("storing details for {:?}", identifier);
repo.relate_details(&identifier, &new_details).await?; repo.relate_details(&identifier, &new_details).await?;
tracing::debug!("stored"); tracing::debug!("stored");

View file

@ -407,7 +407,12 @@ where
let details = if let Some(details) = details_opt { let details = if let Some(details) = details_opt {
details details
} else { } else {
let new_details = Details::from_store(from, identifier, timeout) let bytes_stream = from
.to_bytes(&identifier, None, None)
.await
.map_err(From::from)
.map_err(MigrateError::Details)?;
let new_details = Details::from_bytes(timeout, bytes_stream.into_bytes())
.await .await
.map_err(MigrateError::Details)?; .map_err(MigrateError::Details)?;
repo.relate_details(identifier, &new_details) repo.relate_details(identifier, &new_details)

View file

@ -1,7 +1,7 @@
use std::sync::Arc; use std::sync::{Arc, OnceLock};
use streem::IntoStreamer; use streem::IntoStreamer;
use tokio::task::JoinSet; use tokio::{sync::Semaphore, task::JoinSet};
use crate::{ use crate::{
config::Configuration, config::Configuration,
@ -341,6 +341,18 @@ async fn set_details<S: Store>(
} }
} }
static DETAILS_SEMAPHORE: OnceLock<Semaphore> = OnceLock::new();
fn details_semaphore() -> &'static Semaphore {
DETAILS_SEMAPHORE.get_or_init(|| {
let parallelism = std::thread::available_parallelism()
.map(usize::from)
.unwrap_or(1);
crate::sync::bare_semaphore(parallelism * 2)
})
}
#[tracing::instrument(skip_all)] #[tracing::instrument(skip_all)]
async fn fetch_or_generate_details<S: Store>( async fn fetch_or_generate_details<S: Store>(
old_repo: &OldSledRepo, old_repo: &OldSledRepo,
@ -353,8 +365,13 @@ async fn fetch_or_generate_details<S: Store>(
if let Some(details) = details_opt { if let Some(details) = details_opt {
Ok(details) Ok(details)
} else { } else {
Details::from_store(store, identifier, config.media.process_timeout) let bytes_stream = store.to_bytes(identifier, None, None).await?;
.await let bytes = bytes_stream.into_bytes();
.map_err(From::from)
let guard = details_semaphore().acquire().await?;
let details = Details::from_bytes(config.media.process_timeout, bytes).await?;
drop(guard);
Ok(details)
} }
} }

View file

@ -1,9 +1,10 @@
use actix_web::web::Bytes; use actix_web::web::Bytes;
use futures_core::Stream; use futures_core::Stream;
use std::{fmt::Debug, sync::Arc}; use std::{fmt::Debug, sync::Arc};
use streem::IntoStreamer;
use tokio::io::{AsyncRead, AsyncWrite}; use tokio::io::{AsyncRead, AsyncWrite};
use crate::{error_code::ErrorCode, stream::LocalBoxStream}; use crate::{bytes_stream::BytesStream, error_code::ErrorCode, stream::LocalBoxStream};
pub(crate) mod file_store; pub(crate) mod file_store;
pub(crate) mod object_store; pub(crate) mod object_store;
@ -22,6 +23,9 @@ pub(crate) enum StoreError {
#[error("Error in 0.4 DB")] #[error("Error in 0.4 DB")]
Repo04(#[from] crate::repo_04::RepoError), Repo04(#[from] crate::repo_04::RepoError),
#[error("Error reading bytes stream")]
ReadStream(#[source] std::io::Error),
#[error("Requested file is not found")] #[error("Requested file is not found")]
FileNotFound(#[source] std::io::Error), FileNotFound(#[source] std::io::Error),
@ -35,6 +39,7 @@ impl StoreError {
Self::FileStore(e) => e.error_code(), Self::FileStore(e) => e.error_code(),
Self::ObjectStore(e) => e.error_code(), Self::ObjectStore(e) => e.error_code(),
Self::Repo(e) => e.error_code(), Self::Repo(e) => e.error_code(),
Self::ReadStream(_) => ErrorCode::IO_ERROR,
Self::Repo04(_) => ErrorCode::OLD_REPO_ERROR, Self::Repo04(_) => ErrorCode::OLD_REPO_ERROR,
Self::FileNotFound(_) | Self::ObjectNotFound(_) => ErrorCode::NOT_FOUND, Self::FileNotFound(_) | Self::ObjectNotFound(_) => ErrorCode::NOT_FOUND,
} }
@ -112,6 +117,26 @@ pub(crate) trait Store: Clone + Debug {
len: Option<u64>, len: Option<u64>,
) -> Result<LocalBoxStream<'static, std::io::Result<Bytes>>, StoreError>; ) -> Result<LocalBoxStream<'static, std::io::Result<Bytes>>, StoreError>;
async fn to_bytes(
&self,
identifier: &Arc<str>,
from_start: Option<u64>,
len: Option<u64>,
) -> Result<BytesStream, StoreError> {
let mut buf = BytesStream::new();
let mut streamer = self
.to_stream(identifier, from_start, len)
.await?
.into_streamer();
while let Some(bytes) = streamer.try_next().await.map_err(StoreError::ReadStream)? {
buf.add_bytes(bytes);
}
Ok(buf)
}
async fn read_into<Writer>( async fn read_into<Writer>(
&self, &self,
identifier: &Arc<str>, identifier: &Arc<str>,