mirror of
https://git.asonix.dog/asonix/pict-rs
synced 2024-11-20 11:21:14 +00:00
Upgrade Concurrency: Update docs, configs, limit maxiumum details operations
This commit is contained in:
parent
f223d477c7
commit
d28c1ac628
10 changed files with 107 additions and 41 deletions
29
README.md
29
README.md
|
@ -17,11 +17,12 @@ _a simple image hosting service_
|
||||||
1. [Backups](#backups)
|
1. [Backups](#backups)
|
||||||
2. [0.4 to 0.5 Migration Guide](#0-4-to-0-5-migration-guide)
|
2. [0.4 to 0.5 Migration Guide](#0-4-to-0-5-migration-guide)
|
||||||
1. [Overview](#overview)
|
1. [Overview](#overview)
|
||||||
2. [Configuration Updates](#configuration-updates)
|
2. [Upgrade Configuration](#upgrade-configuration)
|
||||||
|
3. [Configuration Updates](#configuration-updates)
|
||||||
1. [Image Changes](#image-changes)
|
1. [Image Changes](#image-changes)
|
||||||
2. [Animation Changes](#animation-changes)
|
2. [Animation Changes](#animation-changes)
|
||||||
3. [Video Changes](#video-changes)
|
3. [Video Changes](#video-changes)
|
||||||
3. [Upgrading Directly to Postgres](#upgrading-directly-to-postgres)
|
4. [Upgrading Directly to Postgres](#upgrading-directly-to-postgres)
|
||||||
3. [Filesystem to Object Storage Migration](#filesystem-to-object-storage-migration)
|
3. [Filesystem to Object Storage Migration](#filesystem-to-object-storage-migration)
|
||||||
1. [Troubleshooting](#migration-troubleshooting)
|
1. [Troubleshooting](#migration-troubleshooting)
|
||||||
4. [Sled to Postgres Migration](#sled-to-postgres-migration)
|
4. [Sled to Postgres Migration](#sled-to-postgres-migration)
|
||||||
|
@ -645,6 +646,30 @@ uploaded before this was standard may not have ever had their details records ge
|
||||||
|
|
||||||
_This upgrade must be performed while pict-rs is offline._
|
_This upgrade must be performed while pict-rs is offline._
|
||||||
|
|
||||||
|
#### Upgrade Configuration
|
||||||
|
Because upgrades may take so long, there is a new configuration option introduced to attempt to
|
||||||
|
improve its speed.
|
||||||
|
|
||||||
|
```toml
|
||||||
|
[upgrade]
|
||||||
|
concurrency = 32
|
||||||
|
```
|
||||||
|
or
|
||||||
|
```
|
||||||
|
PICTRS__UPGRADE__CONCURRENCY=32
|
||||||
|
```
|
||||||
|
or
|
||||||
|
```bash
|
||||||
|
$ pict-rs run --upgrade-concurrency 32
|
||||||
|
```
|
||||||
|
|
||||||
|
This value dictates how many hashes pict-rs will attempt to migrate at the same time. Since this
|
||||||
|
value will increase the number of concurrent connections to the Repo and the Store, as well as
|
||||||
|
potentially increase CPU and memory use, it should be considered carefully before increasing.
|
||||||
|
|
||||||
|
For large-scale deployments, it is likely this value should be bumped to 128, 256, or even 512. The
|
||||||
|
default value is 32.
|
||||||
|
|
||||||
#### Configuration Updates
|
#### Configuration Updates
|
||||||
Previously, pict-rs only had two categories for files: images and videos. pict-rs 0.5 adds a third
|
Previously, pict-rs only had two categories for files: images and videos. pict-rs 0.5 adds a third
|
||||||
category: animation. With the new explicit support for animated file types, some configuration
|
category: animation. With the new explicit support for animated file types, some configuration
|
||||||
|
|
|
@ -6,6 +6,9 @@ max_file_count = 1
|
||||||
[client]
|
[client]
|
||||||
timeout = 30
|
timeout = 30
|
||||||
|
|
||||||
|
[upgrade]
|
||||||
|
concurrency = 32
|
||||||
|
|
||||||
[tracing.logging]
|
[tracing.logging]
|
||||||
format = "normal"
|
format = "normal"
|
||||||
targets = "info"
|
targets = "info"
|
||||||
|
|
|
@ -27,6 +27,15 @@ max_file_count = 1
|
||||||
# distinct from the object storage client timeout, which can be configured separately
|
# distinct from the object storage client timeout, which can be configured separately
|
||||||
timeout = 30
|
timeout = 30
|
||||||
|
|
||||||
|
[upgrade]
|
||||||
|
## Optional: How many hashes will be migrated from the previous version of pict-rs at the same time
|
||||||
|
# environment variable: PICTRS__UPGRADE__CONCURRENCY
|
||||||
|
# default: 32
|
||||||
|
#
|
||||||
|
# Increasing this value will increase the number of concurrent connections to the Repo, and to the
|
||||||
|
# Store, so make sure your deployment can handle more throughput before tweaking this.
|
||||||
|
concurrency = 32
|
||||||
|
|
||||||
|
|
||||||
## Logging configuration
|
## Logging configuration
|
||||||
[tracing.logging]
|
[tracing.logging]
|
||||||
|
|
|
@ -1,15 +1,10 @@
|
||||||
use std::sync::Arc;
|
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
bytes_stream::BytesStream,
|
|
||||||
discover::Discovery,
|
discover::Discovery,
|
||||||
error::Error,
|
error::Error,
|
||||||
formats::{InternalFormat, InternalVideoFormat},
|
formats::{InternalFormat, InternalVideoFormat},
|
||||||
serde_str::Serde,
|
serde_str::Serde,
|
||||||
store::Store,
|
|
||||||
};
|
};
|
||||||
use actix_web::web;
|
use actix_web::web;
|
||||||
use streem::IntoStreamer;
|
|
||||||
use time::{format_description::well_known::Rfc3339, OffsetDateTime};
|
use time::{format_description::well_known::Rfc3339, OffsetDateTime};
|
||||||
|
|
||||||
#[derive(Copy, Clone, Debug, serde::Deserialize, serde::Serialize)]
|
#[derive(Copy, Clone, Debug, serde::Deserialize, serde::Serialize)]
|
||||||
|
@ -83,6 +78,7 @@ impl Details {
|
||||||
self.inner.created_at.timestamp
|
self.inner.created_at.timestamp
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[tracing::instrument(level = "DEBUG")]
|
||||||
pub(crate) async fn from_bytes(timeout: u64, input: web::Bytes) -> Result<Self, Error> {
|
pub(crate) async fn from_bytes(timeout: u64, input: web::Bytes) -> Result<Self, Error> {
|
||||||
let Discovery {
|
let Discovery {
|
||||||
input,
|
input,
|
||||||
|
@ -99,28 +95,6 @@ impl Details {
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
|
|
||||||
#[tracing::instrument(level = "DEBUG")]
|
|
||||||
pub(crate) async fn from_store<S: Store>(
|
|
||||||
store: &S,
|
|
||||||
identifier: &Arc<str>,
|
|
||||||
timeout: u64,
|
|
||||||
) -> Result<Self, Error> {
|
|
||||||
let mut buf = BytesStream::new();
|
|
||||||
|
|
||||||
let mut stream = store
|
|
||||||
.to_stream(identifier, None, None)
|
|
||||||
.await?
|
|
||||||
.into_streamer();
|
|
||||||
|
|
||||||
while let Some(res) = stream.next().await {
|
|
||||||
buf.add_bytes(res?);
|
|
||||||
}
|
|
||||||
|
|
||||||
let bytes = buf.into_bytes();
|
|
||||||
|
|
||||||
Self::from_bytes(timeout, bytes).await
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(crate) fn internal_format(&self) -> InternalFormat {
|
pub(crate) fn internal_format(&self) -> InternalFormat {
|
||||||
self.inner.format
|
self.inner.format
|
||||||
}
|
}
|
||||||
|
|
|
@ -99,7 +99,9 @@ async fn process<S: Store + 'static>(
|
||||||
let input_details = if let Some(details) = repo.details(&identifier).await? {
|
let input_details = if let Some(details) = repo.details(&identifier).await? {
|
||||||
details
|
details
|
||||||
} else {
|
} else {
|
||||||
let details = Details::from_store(store, &identifier, media.process_timeout).await?;
|
let bytes_stream = store.to_bytes(&identifier, None, None).await?;
|
||||||
|
|
||||||
|
let details = Details::from_bytes(media.process_timeout, bytes_stream.into_bytes()).await?;
|
||||||
|
|
||||||
repo.relate_details(&identifier, &details).await?;
|
repo.relate_details(&identifier, &details).await?;
|
||||||
|
|
||||||
|
|
|
@ -106,7 +106,8 @@ where
|
||||||
.save_async_read(hasher_reader, input_type.media_type())
|
.save_async_read(hasher_reader, input_type.media_type())
|
||||||
.await?;
|
.await?;
|
||||||
|
|
||||||
let details = Details::from_store(store, &identifier, media.process_timeout).await?;
|
let bytes_stream = store.to_bytes(&identifier, None, None).await?;
|
||||||
|
let details = Details::from_bytes(media.process_timeout, bytes_stream.into_bytes()).await?;
|
||||||
|
|
||||||
drop(permit);
|
drop(permit);
|
||||||
|
|
||||||
|
|
11
src/lib.rs
11
src/lib.rs
|
@ -122,8 +122,9 @@ async fn ensure_details<S: Store + 'static>(
|
||||||
}
|
}
|
||||||
|
|
||||||
tracing::debug!("generating new details from {:?}", identifier);
|
tracing::debug!("generating new details from {:?}", identifier);
|
||||||
|
let bytes_stream = store.to_bytes(&identifier, None, None).await?;
|
||||||
let new_details =
|
let new_details =
|
||||||
Details::from_store(store, &identifier, config.media.process_timeout).await?;
|
Details::from_bytes(config.media.process_timeout, bytes_stream.into_bytes()).await?;
|
||||||
tracing::debug!("storing details for {:?}", identifier);
|
tracing::debug!("storing details for {:?}", identifier);
|
||||||
repo.relate_details(&identifier, &new_details).await?;
|
repo.relate_details(&identifier, &new_details).await?;
|
||||||
tracing::debug!("stored");
|
tracing::debug!("stored");
|
||||||
|
@ -897,8 +898,10 @@ async fn process<S: Store + 'static>(
|
||||||
}
|
}
|
||||||
|
|
||||||
tracing::debug!("generating new details from {:?}", identifier);
|
tracing::debug!("generating new details from {:?}", identifier);
|
||||||
|
let bytes_stream = store.to_bytes(&identifier, None, None).await?;
|
||||||
let new_details =
|
let new_details =
|
||||||
Details::from_store(&store, &identifier, config.media.process_timeout).await?;
|
Details::from_bytes(config.media.process_timeout, bytes_stream.into_bytes())
|
||||||
|
.await?;
|
||||||
tracing::debug!("storing details for {:?}", identifier);
|
tracing::debug!("storing details for {:?}", identifier);
|
||||||
repo.relate_details(&identifier, &new_details).await?;
|
repo.relate_details(&identifier, &new_details).await?;
|
||||||
tracing::debug!("stored");
|
tracing::debug!("stored");
|
||||||
|
@ -1015,8 +1018,10 @@ async fn process_head<S: Store + 'static>(
|
||||||
}
|
}
|
||||||
|
|
||||||
tracing::debug!("generating new details from {:?}", identifier);
|
tracing::debug!("generating new details from {:?}", identifier);
|
||||||
|
let bytes_stream = store.to_bytes(&identifier, None, None).await?;
|
||||||
let new_details =
|
let new_details =
|
||||||
Details::from_store(&store, &identifier, config.media.process_timeout).await?;
|
Details::from_bytes(config.media.process_timeout, bytes_stream.into_bytes())
|
||||||
|
.await?;
|
||||||
tracing::debug!("storing details for {:?}", identifier);
|
tracing::debug!("storing details for {:?}", identifier);
|
||||||
repo.relate_details(&identifier, &new_details).await?;
|
repo.relate_details(&identifier, &new_details).await?;
|
||||||
tracing::debug!("stored");
|
tracing::debug!("stored");
|
||||||
|
|
|
@ -407,7 +407,12 @@ where
|
||||||
let details = if let Some(details) = details_opt {
|
let details = if let Some(details) = details_opt {
|
||||||
details
|
details
|
||||||
} else {
|
} else {
|
||||||
let new_details = Details::from_store(from, identifier, timeout)
|
let bytes_stream = from
|
||||||
|
.to_bytes(&identifier, None, None)
|
||||||
|
.await
|
||||||
|
.map_err(From::from)
|
||||||
|
.map_err(MigrateError::Details)?;
|
||||||
|
let new_details = Details::from_bytes(timeout, bytes_stream.into_bytes())
|
||||||
.await
|
.await
|
||||||
.map_err(MigrateError::Details)?;
|
.map_err(MigrateError::Details)?;
|
||||||
repo.relate_details(identifier, &new_details)
|
repo.relate_details(identifier, &new_details)
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
use std::sync::Arc;
|
use std::sync::{Arc, OnceLock};
|
||||||
|
|
||||||
use streem::IntoStreamer;
|
use streem::IntoStreamer;
|
||||||
use tokio::task::JoinSet;
|
use tokio::{sync::Semaphore, task::JoinSet};
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
config::Configuration,
|
config::Configuration,
|
||||||
|
@ -341,6 +341,18 @@ async fn set_details<S: Store>(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static DETAILS_SEMAPHORE: OnceLock<Semaphore> = OnceLock::new();
|
||||||
|
|
||||||
|
fn details_semaphore() -> &'static Semaphore {
|
||||||
|
DETAILS_SEMAPHORE.get_or_init(|| {
|
||||||
|
let parallelism = std::thread::available_parallelism()
|
||||||
|
.map(usize::from)
|
||||||
|
.unwrap_or(1);
|
||||||
|
|
||||||
|
crate::sync::bare_semaphore(parallelism * 2)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
#[tracing::instrument(skip_all)]
|
#[tracing::instrument(skip_all)]
|
||||||
async fn fetch_or_generate_details<S: Store>(
|
async fn fetch_or_generate_details<S: Store>(
|
||||||
old_repo: &OldSledRepo,
|
old_repo: &OldSledRepo,
|
||||||
|
@ -353,8 +365,13 @@ async fn fetch_or_generate_details<S: Store>(
|
||||||
if let Some(details) = details_opt {
|
if let Some(details) = details_opt {
|
||||||
Ok(details)
|
Ok(details)
|
||||||
} else {
|
} else {
|
||||||
Details::from_store(store, identifier, config.media.process_timeout)
|
let bytes_stream = store.to_bytes(identifier, None, None).await?;
|
||||||
.await
|
let bytes = bytes_stream.into_bytes();
|
||||||
.map_err(From::from)
|
|
||||||
|
let guard = details_semaphore().acquire().await?;
|
||||||
|
let details = Details::from_bytes(config.media.process_timeout, bytes).await?;
|
||||||
|
drop(guard);
|
||||||
|
|
||||||
|
Ok(details)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
27
src/store.rs
27
src/store.rs
|
@ -1,9 +1,10 @@
|
||||||
use actix_web::web::Bytes;
|
use actix_web::web::Bytes;
|
||||||
use futures_core::Stream;
|
use futures_core::Stream;
|
||||||
use std::{fmt::Debug, sync::Arc};
|
use std::{fmt::Debug, sync::Arc};
|
||||||
|
use streem::IntoStreamer;
|
||||||
use tokio::io::{AsyncRead, AsyncWrite};
|
use tokio::io::{AsyncRead, AsyncWrite};
|
||||||
|
|
||||||
use crate::{error_code::ErrorCode, stream::LocalBoxStream};
|
use crate::{bytes_stream::BytesStream, error_code::ErrorCode, stream::LocalBoxStream};
|
||||||
|
|
||||||
pub(crate) mod file_store;
|
pub(crate) mod file_store;
|
||||||
pub(crate) mod object_store;
|
pub(crate) mod object_store;
|
||||||
|
@ -22,6 +23,9 @@ pub(crate) enum StoreError {
|
||||||
#[error("Error in 0.4 DB")]
|
#[error("Error in 0.4 DB")]
|
||||||
Repo04(#[from] crate::repo_04::RepoError),
|
Repo04(#[from] crate::repo_04::RepoError),
|
||||||
|
|
||||||
|
#[error("Error reading bytes stream")]
|
||||||
|
ReadStream(#[source] std::io::Error),
|
||||||
|
|
||||||
#[error("Requested file is not found")]
|
#[error("Requested file is not found")]
|
||||||
FileNotFound(#[source] std::io::Error),
|
FileNotFound(#[source] std::io::Error),
|
||||||
|
|
||||||
|
@ -35,6 +39,7 @@ impl StoreError {
|
||||||
Self::FileStore(e) => e.error_code(),
|
Self::FileStore(e) => e.error_code(),
|
||||||
Self::ObjectStore(e) => e.error_code(),
|
Self::ObjectStore(e) => e.error_code(),
|
||||||
Self::Repo(e) => e.error_code(),
|
Self::Repo(e) => e.error_code(),
|
||||||
|
Self::ReadStream(_) => ErrorCode::IO_ERROR,
|
||||||
Self::Repo04(_) => ErrorCode::OLD_REPO_ERROR,
|
Self::Repo04(_) => ErrorCode::OLD_REPO_ERROR,
|
||||||
Self::FileNotFound(_) | Self::ObjectNotFound(_) => ErrorCode::NOT_FOUND,
|
Self::FileNotFound(_) | Self::ObjectNotFound(_) => ErrorCode::NOT_FOUND,
|
||||||
}
|
}
|
||||||
|
@ -112,6 +117,26 @@ pub(crate) trait Store: Clone + Debug {
|
||||||
len: Option<u64>,
|
len: Option<u64>,
|
||||||
) -> Result<LocalBoxStream<'static, std::io::Result<Bytes>>, StoreError>;
|
) -> Result<LocalBoxStream<'static, std::io::Result<Bytes>>, StoreError>;
|
||||||
|
|
||||||
|
async fn to_bytes(
|
||||||
|
&self,
|
||||||
|
identifier: &Arc<str>,
|
||||||
|
from_start: Option<u64>,
|
||||||
|
len: Option<u64>,
|
||||||
|
) -> Result<BytesStream, StoreError> {
|
||||||
|
let mut buf = BytesStream::new();
|
||||||
|
|
||||||
|
let mut streamer = self
|
||||||
|
.to_stream(identifier, from_start, len)
|
||||||
|
.await?
|
||||||
|
.into_streamer();
|
||||||
|
|
||||||
|
while let Some(bytes) = streamer.try_next().await.map_err(StoreError::ReadStream)? {
|
||||||
|
buf.add_bytes(bytes);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(buf)
|
||||||
|
}
|
||||||
|
|
||||||
async fn read_into<Writer>(
|
async fn read_into<Writer>(
|
||||||
&self,
|
&self,
|
||||||
identifier: &Arc<str>,
|
identifier: &Arc<str>,
|
||||||
|
|
Loading…
Reference in a new issue