Adding an image_details table to store image dimensions. (#4704)

* Adding an image_details table to store image dimensions.

- Adds an image_details table, which stores the height,
  width, and content_type for local and remote images.
- For LocalImages, this information already comes back with
  the upload.
- For RemoteImages, it calls the pictrs details endpoint.
- Fixed some issues with proxying non-image urls.
- Fixes #3328
- Also fixes #4703

* Running sql format.

* Running fmt.

* Don't fetch metadata in background for local API requests.

* Dont export remote_image table to typescript.

* Cleaning up validate.

* Dont proxy url.

* Fixing tests, fixing issue with federated thumbnails.

* Fix tests.

* Updating corepack, fixing issue.

* Refactoring image inserts to use transactions.

* Use select exists again.

* Fixing imports.

* Fix test.

* Removing pointless backgrounded metadata generation version.

* Removing public pictrs details route.

* Fixing clippy.

* Running prettier.

* A few more fixes.

* Moving diesel schema check back down.

* Addressing PR comments.

* Changing back request head to get.

* Fixing lockfile.

---------

Co-authored-by: SleeplessOne1917 <28871516+SleeplessOne1917@users.noreply.github.com>
This commit is contained in:
Dessalines 2024-06-20 06:44:06 -04:00 committed by Felix Ableitner
parent 026d385e69
commit ec1c638ed6
17 changed files with 287 additions and 82 deletions

View file

@ -1,42 +0,0 @@
{
"root": true,
"env": {
"browser": true
},
"plugins": ["@typescript-eslint"],
"extends": ["eslint:recommended", "plugin:@typescript-eslint/recommended"],
"parser": "@typescript-eslint/parser",
"parserOptions": {
"project": "./tsconfig.json",
"warnOnUnsupportedTypeScriptVersion": false
},
"rules": {
"@typescript-eslint/ban-ts-comment": 0,
"@typescript-eslint/no-explicit-any": 0,
"@typescript-eslint/explicit-module-boundary-types": 0,
"@typescript-eslint/no-var-requires": 0,
"arrow-body-style": 0,
"curly": 0,
"eol-last": 0,
"eqeqeq": 0,
"func-style": 0,
"import/no-duplicates": 0,
"max-statements": 0,
"max-params": 0,
"new-cap": 0,
"no-console": 0,
"no-duplicate-imports": 0,
"no-extra-parens": 0,
"no-return-assign": 0,
"no-throw-literal": 0,
"no-trailing-spaces": 0,
"no-unused-expressions": 0,
"no-useless-constructor": 0,
"no-useless-escape": 0,
"no-var": 0,
"prefer-const": 0,
"prefer-rest-params": 0,
"quote-props": 0,
"unicorn/filename-case": 0
}
}

View file

@ -0,0 +1,56 @@
import pluginJs from "@eslint/js";
import tseslint from "typescript-eslint";
export default [
pluginJs.configs.recommended,
...tseslint.configs.recommended,
{
languageOptions: {
parser: tseslint.parser,
},
},
// For some reason this has to be in its own block
{
ignores: [
"putTypesInIndex.js",
"dist/*",
"docs/*",
".yalc",
"jest.config.js",
],
},
{
files: ["src/**/*"],
rules: {
"@typescript-eslint/no-empty-interface": 0,
"@typescript-eslint/no-empty-function": 0,
"@typescript-eslint/ban-ts-comment": 0,
"@typescript-eslint/no-explicit-any": 0,
"@typescript-eslint/explicit-module-boundary-types": 0,
"@typescript-eslint/no-var-requires": 0,
"arrow-body-style": 0,
curly: 0,
"eol-last": 0,
eqeqeq: 0,
"func-style": 0,
"import/no-duplicates": 0,
"max-statements": 0,
"max-params": 0,
"new-cap": 0,
"no-console": 0,
"no-duplicate-imports": 0,
"no-extra-parens": 0,
"no-return-assign": 0,
"no-throw-literal": 0,
"no-trailing-spaces": 0,
"no-unused-expressions": 0,
"no-useless-constructor": 0,
"no-useless-escape": 0,
"no-var": 0,
"prefer-const": 0,
"prefer-rest-params": 0,
"quote-props": 0,
"unicorn/filename-case": 0,
},
},
];

View file

@ -27,7 +27,7 @@
"eslint": "^9.8.0",
"eslint-plugin-prettier": "^5.1.3",
"jest": "^29.5.0",
"lemmy-js-client": "0.19.4",
"lemmy-js-client": "0.19.5-alpha.1",
"prettier": "^3.2.5",
"ts-jest": "^29.1.0",
"typescript": "^5.5.4",

View file

@ -30,8 +30,8 @@ importers:
specifier: ^29.5.0
version: 29.7.0(@types/node@22.5.1)
lemmy-js-client:
specifier: 0.19.4
version: 0.19.4
specifier: 0.19.5-alpha.1
version: 0.19.5-alpha.1
prettier:
specifier: ^3.2.5
version: 3.3.3
@ -1179,8 +1179,8 @@ packages:
resolution: {integrity: sha512-eTIzlVOSUR+JxdDFepEYcBMtZ9Qqdef+rnzWdRZuMbOywu5tO2w2N7rqjoANZ5k9vywhL6Br1VRjUIgTQx4E8w==}
engines: {node: '>=6'}
lemmy-js-client@0.19.4:
resolution: {integrity: sha512-k3d+YRDj3+JuuEP+nuEg27efR/e4m8oMk2BoC8jq9AnMrwSAKfsN2F2vG70Zke0amXtOclDZrCSHkIpNw99ikg==}
lemmy-js-client@0.19.5-alpha.1:
resolution: {integrity: sha512-GOhaiTQzrpwdmc3DFYemT2SmNmpuQJe2BWUms9QOzdYlkA1WZ0uu7axPE3s+T5OOxfy7K9Q2gsLe72dcVSlffw==}
leven@3.1.0:
resolution: {integrity: sha512-qsda+H8jTaUaN/x5vzW2rzc+8Rw4TAQ/4KjB46IwK5VH+IlVeeeje/EoZRpiXvIqjFgK84QffqPztGI3VBLG1A==}
@ -3108,7 +3108,7 @@ snapshots:
kleur@3.0.3: {}
lemmy-js-client@0.19.4: {}
lemmy-js-client@0.19.5-alpha.1: {}
leven@3.1.0: {}

View file

@ -15,7 +15,7 @@ export LEMMY_TEST_FAST_FEDERATION=1 # by default, the persistent federation queu
# pictrs setup
if [ ! -f "api_tests/pict-rs" ]; then
curl "https://git.asonix.dog/asonix/pict-rs/releases/download/v0.5.13/pict-rs-linux-amd64" -o api_tests/pict-rs
curl "https://git.asonix.dog/asonix/pict-rs/releases/download/v0.5.16/pict-rs-linux-amd64" -o api_tests/pict-rs
chmod +x api_tests/pict-rs
fi
./api_tests/pict-rs \

View file

@ -164,6 +164,7 @@ test("Purge post, linked image removed", async () => {
upload.url,
);
expect(post.post_view.post.url).toBe(upload.url);
expect(post.post_view.image_details).toBeDefined();
// purge post
const purgeForm: PurgePost = {
@ -189,6 +190,9 @@ test("Images in remote image post are proxied if setting enabled", async () => {
const post = postRes.post_view.post;
expect(post).toBeDefined();
// Make sure it fetched the image details
expect(postRes.post_view.image_details).toBeDefined();
// remote image gets proxied after upload
expect(
post.thumbnail_url?.startsWith(

View file

@ -512,7 +512,7 @@ test("Enforce site ban federation for local user", async () => {
}
let newAlphaUserJwt = await loginUser(alpha, alphaUserPerson.name);
alphaUserHttp.setHeaders({
Authorization: "Bearer " + newAlphaUserJwt.jwt ?? "",
Authorization: "Bearer " + newAlphaUserJwt.jwt,
});
// alpha makes new post in beta community, it federates
let postRes2 = await createPost(alphaUserHttp, betaCommunity!.community.id);

View file

@ -12,7 +12,7 @@ use futures::StreamExt;
use lemmy_db_schema::{
newtypes::DbUrl,
source::{
images::{LocalImage, LocalImageForm},
images::{ImageDetailsForm, LocalImage, LocalImageForm},
local_site::LocalSite,
post::{Post, PostUpdateForm},
},
@ -263,6 +263,19 @@ pub struct PictrsFileDetails {
pub created_at: DateTime<Utc>,
}
impl PictrsFileDetails {
/// Builds the image form. This should always use the thumbnail_url,
/// Because the post_view joins to it
pub fn build_image_details_form(&self, thumbnail_url: &Url) -> ImageDetailsForm {
ImageDetailsForm {
link: thumbnail_url.clone().into(),
width: self.width.into(),
height: self.height.into(),
content_type: self.content_type.clone(),
}
}
}
#[derive(Deserialize, Serialize, Debug)]
struct PictrsPurgeResponse {
msg: String,
@ -370,11 +383,52 @@ async fn generate_pictrs_thumbnail(image_url: &Url, context: &LemmyContext) -> L
let protocol_and_hostname = context.settings().get_protocol_and_hostname();
let thumbnail_url = image.thumbnail_url(&protocol_and_hostname)?;
LocalImage::create(&mut context.pool(), &form).await?;
// Also store the details for the image
let details_form = image.details.build_image_details_form(&thumbnail_url);
LocalImage::create(&mut context.pool(), &form, &details_form).await?;
Ok(thumbnail_url)
}
/// Fetches the image details for pictrs proxied images
///
/// We don't need to check for image mode, as that's already been done
#[tracing::instrument(skip_all)]
pub async fn fetch_pictrs_proxied_image_details(
image_url: &Url,
context: &LemmyContext,
) -> LemmyResult<PictrsFileDetails> {
let pictrs_url = context.settings().pictrs_config()?.url;
let encoded_image_url = encode(image_url.as_str());
// Pictrs needs you to fetch the proxied image before you can fetch the details
let proxy_url = format!("{pictrs_url}image/original?proxy={encoded_image_url}");
let res = context
.client()
.get(&proxy_url)
.timeout(REQWEST_TIMEOUT)
.send()
.await?
.status();
if !res.is_success() {
Err(LemmyErrorType::NotAnImageType)?
}
let details_url = format!("{pictrs_url}image/details/original?proxy={encoded_image_url}");
let res = context
.client()
.get(&details_url)
.timeout(REQWEST_TIMEOUT)
.send()
.await?
.json()
.await?;
Ok(res)
}
// TODO: get rid of this by reading content type from db
#[tracing::instrument(skip_all)]
async fn is_image_content_type(client: &ClientWithMiddleware, url: &Url) -> LemmyResult<()> {

View file

@ -1,6 +1,10 @@
use crate::{
context::LemmyContext,
request::{delete_image_from_pictrs, purge_image_from_pictrs},
request::{
delete_image_from_pictrs,
fetch_pictrs_proxied_image_details,
purge_image_from_pictrs,
},
site::{FederatedInstances, InstanceWithFederationState},
};
use chrono::{DateTime, Days, Local, TimeZone, Utc};
@ -950,7 +954,18 @@ pub async fn process_markdown(
if context.settings().pictrs_config()?.image_mode() == PictrsImageMode::ProxyAllImages {
let (text, links) = markdown_rewrite_image_links(text);
RemoteImage::create(&mut context.pool(), links).await?;
// Create images and image detail rows
for link in links {
// Insert image details for the remote image
let details_res = fetch_pictrs_proxied_image_details(&link, context).await;
if let Ok(details) = details_res {
let proxied =
build_proxied_image_url(&link, &context.settings().get_protocol_and_hostname())?;
let details_form = details.build_image_details_form(&proxied);
RemoteImage::create(&mut context.pool(), &details_form).await?;
}
}
Ok(text)
} else {
Ok(text)
@ -985,8 +1000,14 @@ async fn proxy_image_link_internal(
Ok(link.into())
} else if image_mode == PictrsImageMode::ProxyAllImages {
let proxied = build_proxied_image_url(&link, &context.settings().get_protocol_and_hostname())?;
// This should fail softly, since pictrs might not even be running
let details_res = fetch_pictrs_proxied_image_details(&link, context).await;
if let Ok(details) = details_res {
let details_form = details.build_image_details_form(&proxied);
RemoteImage::create(&mut context.pool(), &details_form).await?;
};
RemoteImage::create(&mut context.pool(), vec![link]).await?;
Ok(proxied.into())
} else {
Ok(link.into())
@ -1124,10 +1145,13 @@ mod tests {
"https://lemmy-alpha/api/v3/image_proxy?url=http%3A%2F%2Flemmy-beta%2Fimage.png",
proxied.as_str()
);
// This fails, because the details can't be fetched without pictrs running,
// And a remote image won't be inserted.
assert!(
RemoteImage::validate(&mut context.pool(), remote_image.into())
.await
.is_ok()
.is_err()
);
}
}

View file

@ -1,7 +1,14 @@
use crate::{
newtypes::DbUrl,
schema::{local_image, remote_image},
source::images::{LocalImage, LocalImageForm, RemoteImage, RemoteImageForm},
schema::{image_details, local_image, remote_image},
source::images::{
ImageDetails,
ImageDetailsForm,
LocalImage,
LocalImageForm,
RemoteImage,
RemoteImageForm,
},
utils::{get_conn, DbPool},
};
use diesel::{
@ -13,15 +20,29 @@ use diesel::{
NotFound,
QueryDsl,
};
use diesel_async::RunQueryDsl;
use url::Url;
use diesel_async::{AsyncPgConnection, RunQueryDsl};
impl LocalImage {
pub async fn create(pool: &mut DbPool<'_>, form: &LocalImageForm) -> Result<Self, Error> {
pub async fn create(
pool: &mut DbPool<'_>,
form: &LocalImageForm,
image_details_form: &ImageDetailsForm,
) -> Result<Self, Error> {
let conn = &mut get_conn(pool).await?;
insert_into(local_image::table)
conn
.build_transaction()
.run(|conn| {
Box::pin(async move {
let local_insert = insert_into(local_image::table)
.values(form)
.get_result::<Self>(conn)
.await;
ImageDetails::create(conn, image_details_form).await?;
local_insert
}) as _
})
.await
}
@ -39,16 +60,26 @@ impl LocalImage {
}
impl RemoteImage {
pub async fn create(pool: &mut DbPool<'_>, links: Vec<Url>) -> Result<usize, Error> {
pub async fn create(pool: &mut DbPool<'_>, form: &ImageDetailsForm) -> Result<usize, Error> {
let conn = &mut get_conn(pool).await?;
let forms = links
.into_iter()
.map(|url| RemoteImageForm { link: url.into() })
.collect::<Vec<_>>();
insert_into(remote_image::table)
.values(forms)
conn
.build_transaction()
.run(|conn| {
Box::pin(async move {
let remote_image_form = RemoteImageForm {
link: form.link.clone(),
};
let remote_insert = insert_into(remote_image::table)
.values(remote_image_form)
.on_conflict_do_nothing()
.execute(conn)
.await;
ImageDetails::create(conn, form).await?;
remote_insert
}) as _
})
.await
}
@ -67,3 +98,16 @@ impl RemoteImage {
}
}
}
impl ImageDetails {
pub(crate) async fn create(
conn: &mut AsyncPgConnection,
form: &ImageDetailsForm,
) -> Result<usize, Error> {
insert_into(image_details::table)
.values(form)
.on_conflict_do_nothing()
.execute(conn)
.await
}
}

View file

@ -309,6 +309,15 @@ diesel::table! {
}
}
diesel::table! {
image_details (link) {
link -> Text,
width -> Int4,
height -> Int4,
content_type -> Text,
}
}
diesel::table! {
instance (id) {
id -> Int4,
@ -849,8 +858,7 @@ diesel::table! {
}
diesel::table! {
remote_image (id) {
id -> Int4,
remote_image (link) {
link -> Text,
published -> Timestamptz,
}
@ -1055,6 +1063,7 @@ diesel::allow_tables_to_appear_in_same_query!(
federation_allowlist,
federation_blocklist,
federation_queue_state,
image_details,
instance,
instance_block,
language,

View file

@ -1,13 +1,12 @@
use crate::newtypes::{DbUrl, LocalUserId};
#[cfg(feature = "full")]
use crate::schema::{local_image, remote_image};
use crate::schema::{image_details, local_image, remote_image};
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use serde_with::skip_serializing_none;
use std::fmt::Debug;
#[cfg(feature = "full")]
use ts_rs::TS;
use typed_builder::TypedBuilder;
#[skip_serializing_none]
#[derive(Clone, PartialEq, Eq, Debug, Serialize, Deserialize)]
@ -30,7 +29,7 @@ pub struct LocalImage {
pub published: DateTime<Utc>,
}
#[derive(Debug, Clone, TypedBuilder)]
#[derive(Debug, Clone)]
#[cfg_attr(feature = "full", derive(Insertable, AsChangeset))]
#[cfg_attr(feature = "full", diesel(table_name = local_image))]
pub struct LocalImageForm {
@ -46,15 +45,39 @@ pub struct LocalImageForm {
#[cfg_attr(feature = "full", derive(Queryable, Selectable, Identifiable))]
#[cfg_attr(feature = "full", diesel(table_name = remote_image))]
#[cfg_attr(feature = "full", diesel(check_for_backend(diesel::pg::Pg)))]
#[cfg_attr(feature = "full", diesel(primary_key(link)))]
pub struct RemoteImage {
pub id: i32,
pub link: DbUrl,
pub published: DateTime<Utc>,
}
#[derive(Debug, Clone, TypedBuilder)]
#[derive(Debug, Clone)]
#[cfg_attr(feature = "full", derive(Insertable, AsChangeset))]
#[cfg_attr(feature = "full", diesel(table_name = remote_image))]
pub struct RemoteImageForm {
pub link: DbUrl,
}
#[skip_serializing_none]
#[derive(Clone, PartialEq, Eq, Debug, Serialize, Deserialize)]
#[cfg_attr(feature = "full", derive(Queryable, Selectable, Identifiable, TS))]
#[cfg_attr(feature = "full", ts(export))]
#[cfg_attr(feature = "full", diesel(table_name = image_details))]
#[cfg_attr(feature = "full", diesel(check_for_backend(diesel::pg::Pg)))]
#[cfg_attr(feature = "full", diesel(primary_key(link)))]
pub struct ImageDetails {
pub link: DbUrl,
pub width: i32,
pub height: i32,
pub content_type: String,
}
#[derive(Debug, Clone)]
#[cfg_attr(feature = "full", derive(Insertable, AsChangeset))]
#[cfg_attr(feature = "full", diesel(table_name = image_details))]
pub struct ImageDetailsForm {
pub link: DbUrl,
pub width: i32,
pub height: i32,
pub content_type: String,
}

View file

@ -28,6 +28,7 @@ use lemmy_db_schema::{
community_follower,
community_moderator,
community_person_ban,
image_details,
instance_block,
local_user,
local_user_language,
@ -218,6 +219,7 @@ fn queries<'a>() -> Queries<
.inner_join(person::table)
.inner_join(community::table)
.inner_join(post::table)
.left_join(image_details::table.on(post::thumbnail_url.eq(image_details::link.nullable())))
.left_join(
post_saved::table.on(
post_aggregates::post_id
@ -229,6 +231,7 @@ fn queries<'a>() -> Queries<
post::all_columns,
person::all_columns,
community::all_columns,
image_details::all_columns.nullable(),
is_creator_banned_from_community,
is_local_user_banned_from_community_selection,
creator_is_moderator,
@ -1709,6 +1712,7 @@ mod tests {
public_key: inserted_person.public_key.clone(),
last_refreshed_at: inserted_person.last_refreshed_at,
},
image_details: None,
creator_banned_from_community: false,
banned_from_community: false,
creator_is_moderator: false,

View file

@ -8,7 +8,7 @@ use lemmy_db_schema::{
community::Community,
custom_emoji::CustomEmoji,
custom_emoji_keyword::CustomEmojiKeyword,
images::LocalImage,
images::{ImageDetails, LocalImage},
local_site::LocalSite,
local_site_rate_limit::LocalSiteRateLimit,
local_user::LocalUser,
@ -131,6 +131,7 @@ pub struct PostView {
pub post: Post,
pub creator: Person,
pub community: Community,
pub image_details: Option<ImageDetails>,
pub creator_banned_from_community: bool,
pub banned_from_community: bool,
pub creator_is_moderator: bool,

View file

@ -156,7 +156,13 @@ async fn upload(
pictrs_alias: image.file.to_string(),
pictrs_delete_token: image.delete_token.to_string(),
};
LocalImage::create(&mut context.pool(), &form).await?;
let protocol_and_hostname = context.settings().get_protocol_and_hostname();
let thumbnail_url = image.thumbnail_url(&protocol_and_hostname)?;
// Also store the details for the image
let details_form = image.details.build_image_details_form(&thumbnail_url);
LocalImage::create(&mut context.pool(), &form, &details_form).await?;
}
}

View file

@ -0,0 +1,7 @@
ALTER TABLE remote_image
ADD UNIQUE (link),
DROP CONSTRAINT remote_image_pkey,
ADD COLUMN id serial PRIMARY KEY;
DROP TABLE image_details;

View file

@ -0,0 +1,15 @@
-- Drop the id column from the remote_image table, just use link
ALTER TABLE remote_image
DROP COLUMN id,
ADD PRIMARY KEY (link),
DROP CONSTRAINT remote_image_link_key;
-- No good way to do references here unfortunately, unless we combine the images tables
-- The link should be the URL, not the pictrs_alias, to allow joining from post.thumbnail_url
CREATE TABLE image_details (
link text PRIMARY KEY,
width integer NOT NULL,
height integer NOT NULL,
content_type text NOT NULL
);