Merge remote-tracking branch 'origin/main' into combined_tables_2

2025-02-02 15:21:40 +00:00 · 2024-12-19 16:30:15 -05:00 · 2024-12-19 16:30:15 -05:00 · 154cea9366
commit 154cea9366
parent ae9da4da69 8b78ddeb68
21 changed files with 661 additions and 247 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -2685,8 +2685,10 @@ dependencies = [
 "lemmy_utils",
 "pretty_assertions",
 "serde",
+ "serde_json",
 "serde_with",
 "serial_test",
+ "test-context",
 "tokio",
 "tracing",
 "ts-rs",
--- a/crates/api_common/src/post.rs
+++ b/crates/api_common/src/post.rs
@ -1,5 +1,5 @@
 use lemmy_db_schema::{
-  newtypes::{CommentId, CommunityId, DbUrl, LanguageId, PostId},
+  newtypes::{CommentId, CommunityId, DbUrl, LanguageId, PostId, TagId},
  ListingType,
  PostFeatureType,
  PostSortType,
@ -37,6 +37,8 @@ pub struct CreatePost {
  /// Instead of fetching a thumbnail, use a custom one.
  #[cfg_attr(feature = "full", ts(optional))]
  pub custom_thumbnail: Option<String>,
+  #[cfg_attr(feature = "full", ts(optional))]
+  pub tags: Option<Vec<TagId>>,
  /// Time when this post should be scheduled. Null means publish immediately.
  #[cfg_attr(feature = "full", ts(optional))]
  pub scheduled_publish_time: Option<i64>,
@ -166,6 +168,8 @@ pub struct EditPost {
  /// Instead of fetching a thumbnail, use a custom one.
  #[cfg_attr(feature = "full", ts(optional))]
  pub custom_thumbnail: Option<String>,
+  #[cfg_attr(feature = "full", ts(optional))]
+  pub tags: Option<Vec<TagId>>,
  /// Time when this post should be scheduled. Null means publish immediately.
  #[cfg_attr(feature = "full", ts(optional))]
  pub scheduled_publish_time: Option<i64>,
--- a/crates/api_common/src/request.rs
+++ b/crates/api_common/src/request.rs
@ -51,9 +51,11 @@ pub fn client_builder(settings: &Settings) -> ClientBuilder {
 #[tracing::instrument(skip_all)]
 pub async fn fetch_link_metadata(url: &Url, context: &LemmyContext) -> LemmyResult<LinkMetadata> {
  info!("Fetching site metadata for url: {}", url);
-  // We only fetch the first 64kB of data in order to not waste bandwidth especially for large
-  // binary files
-  let bytes_to_fetch = 64 * 1024;
+  // We only fetch the first MB of data in order to not waste bandwidth especially for large
+  // binary files. This high limit is particularly needed for youtube, which includes a lot of
+  // javascript code before the opengraph tags. Mastodon also uses a 1 MB limit:
+  // https://github.com/mastodon/mastodon/blob/295ad6f19a016b3f16e1201ffcbb1b3ad6b455a2/app/lib/request.rb#L213
+  let bytes_to_fetch = 1024 * 1024;
  let response = context
    .client()
    .get(url.as_str())
--- a/crates/apub/src/fetcher/markdown_links.rs
+++ b/crates/apub/src/fetcher/markdown_links.rs
@ -42,7 +42,8 @@ pub async fn markdown_rewrite_remote_links(
      let mut local_url = local_url.to_string();
      // restore title
      if let Some(extra) = extra {
-        local_url = format!("{local_url} {extra}");
+        local_url.push(' ');
+        local_url.push_str(extra);
      }
      src.replace_range(start..end, local_url.as_str());
    }
--- a/crates/db_schema/src/impls/mod.rs
+++ b/crates/db_schema/src/impls/mod.rs
@ -35,4 +35,5 @@ pub mod private_message_report;
 pub mod registration_application;
 pub mod secret;
 pub mod site;
+pub mod tag;
 pub mod tagline;
--- a/crates/db_schema/src/impls/tag.rs
+++ b/crates/db_schema/src/impls/tag.rs
@ -0,0 +1,53 @@
+use crate::{
+  newtypes::TagId,
+  schema::{post_tag, tag},
+  source::tag::{PostTagInsertForm, Tag, TagInsertForm},
+  traits::Crud,
+  utils::{get_conn, DbPool},
+};
+use diesel::{insert_into, result::Error, QueryDsl};
+use diesel_async::RunQueryDsl;
+use lemmy_utils::error::LemmyResult;
+
+#[async_trait]
+impl Crud for Tag {
+  type InsertForm = TagInsertForm;
+
+  type UpdateForm = TagInsertForm;
+
+  type IdType = TagId;
+
+  async fn create(pool: &mut DbPool<'_>, form: &Self::InsertForm) -> Result<Self, Error> {
+    let conn = &mut get_conn(pool).await?;
+    insert_into(tag::table)
+      .values(form)
+      .get_result::<Self>(conn)
+      .await
+  }
+
+  async fn update(
+    pool: &mut DbPool<'_>,
+    pid: TagId,
+    form: &Self::UpdateForm,
+  ) -> Result<Self, Error> {
+    let conn = &mut get_conn(pool).await?;
+    diesel::update(tag::table.find(pid))
+      .set(form)
+      .get_result::<Self>(conn)
+      .await
+  }
+}
+
+impl PostTagInsertForm {
+  pub async fn insert_tag_associations(
+    pool: &mut DbPool<'_>,
+    tags: &[PostTagInsertForm],
+  ) -> LemmyResult<()> {
+    let conn = &mut get_conn(pool).await?;
+    insert_into(post_tag::table)
+      .values(tags)
+      .execute(conn)
+      .await?;
+    Ok(())
+  }
+}
--- a/crates/db_schema/src/newtypes.rs
+++ b/crates/db_schema/src/newtypes.rs
@ -288,3 +288,9 @@ impl InstanceId {
    self.0
  }
 }
+
+#[derive(Debug, Copy, Clone, Hash, Eq, PartialEq, Default, Serialize, Deserialize)]
+#[cfg_attr(feature = "full", derive(DieselNewType, TS))]
+#[cfg_attr(feature = "full", ts(export))]
+/// The internal tag id.
+pub struct TagId(pub i32);
--- a/crates/db_schema/src/schema.rs
+++ b/crates/db_schema/src/schema.rs
@ -826,6 +826,14 @@ diesel::table! {
    }
 }

+diesel::table! {
+    post_tag (post_id, tag_id) {
+        post_id -> Int4,
+        tag_id -> Int4,
+        published -> Timestamptz,
+    }
+}
+
 diesel::table! {
    private_message (id) {
        id -> Int4,
@ -961,6 +969,18 @@ diesel::table! {
    }
 }

+diesel::table! {
+    tag (id) {
+        id -> Int4,
+        ap_id -> Text,
+        name -> Text,
+        community_id -> Int4,
+        published -> Timestamptz,
+        updated -> Nullable<Timestamptz>,
+        deleted -> Bool,
+    }
+}
+
 diesel::table! {
    tagline (id) {
        id -> Int4,
@ -1042,6 +1062,8 @@ diesel::joinable!(post_aggregates -> instance (instance_id));
 diesel::joinable!(post_aggregates -> person (creator_id));
 diesel::joinable!(post_aggregates -> post (post_id));
 diesel::joinable!(post_report -> post (post_id));
+diesel::joinable!(post_tag -> post (post_id));
+diesel::joinable!(post_tag -> tag (tag_id));
 diesel::joinable!(private_message_report -> private_message (private_message_id));
 diesel::joinable!(registration_application -> local_user (local_user_id));
 diesel::joinable!(registration_application -> person (admin_id));
@ -1052,6 +1074,7 @@ diesel::joinable!(site -> instance (instance_id));
 diesel::joinable!(site_aggregates -> site (site_id));
 diesel::joinable!(site_language -> language (language_id));
 diesel::joinable!(site_language -> site (site_id));
+diesel::joinable!(tag -> community (community_id));

 diesel::allow_tables_to_appear_in_same_query!(
    admin_allow_instance,
@ -1111,6 +1134,7 @@ diesel::allow_tables_to_appear_in_same_query!(
    post_actions,
    post_aggregates,
    post_report,
+    post_tag,
    private_message,
    private_message_report,
    received_activity,
@ -1122,5 +1146,6 @@ diesel::allow_tables_to_appear_in_same_query!(
    site,
    site_aggregates,
    site_language,
+    tag,
    tagline,
 );
--- a/crates/db_schema/src/source/mod.rs
+++ b/crates/db_schema/src/source/mod.rs
@ -41,6 +41,7 @@ pub mod private_message_report;
 pub mod registration_application;
 pub mod secret;
 pub mod site;
+pub mod tag;
 pub mod tagline;

 /// Default value for columns like [community::Community.inbox_url] which are marked as serde(skip).
--- a/crates/db_schema/src/source/tag.rs
+++ b/crates/db_schema/src/source/tag.rs
@ -0,0 +1,57 @@
+use crate::newtypes::{CommunityId, DbUrl, PostId, TagId};
+#[cfg(feature = "full")]
+use crate::schema::{post_tag, tag};
+use chrono::{DateTime, Utc};
+use serde::{Deserialize, Serialize};
+use serde_with::skip_serializing_none;
+#[cfg(feature = "full")]
+use ts_rs::TS;
+
+/// A tag that can be assigned to a post within a community.
+/// The tag object is created by the community moderators.
+/// The assignment happens by the post creator and can be updated by the community moderators.
+///
+/// A tag is a federatable object that gives additional context to another object, which can be
+/// displayed and filtered on currently, we only have community post tags, which is a tag that is
+/// created by post authors as well as mods  of a community, to categorize a post. in the future we
+/// may add more tag types, depending on the requirements, this will lead to either expansion of
+/// this table (community_id optional, addition of tag_type enum) or split of this table / creation
+/// of new tables.
+#[skip_serializing_none]
+#[derive(Debug, PartialEq, Serialize, Deserialize, Clone)]
+#[cfg_attr(feature = "full", derive(TS, Queryable, Selectable, Identifiable))]
+#[cfg_attr(feature = "full", diesel(table_name = tag))]
+#[cfg_attr(feature = "full", diesel(check_for_backend(diesel::pg::Pg)))]
+#[cfg_attr(feature = "full", ts(export))]
+pub struct Tag {
+  pub id: TagId,
+  pub ap_id: DbUrl,
+  pub name: String,
+  /// the community that owns this tag
+  pub community_id: CommunityId,
+  pub published: DateTime<Utc>,
+  #[cfg_attr(feature = "full", ts(optional))]
+  pub updated: Option<DateTime<Utc>>,
+  pub deleted: bool,
+}
+
+#[derive(Debug, Clone)]
+#[cfg_attr(feature = "full", derive(Insertable, AsChangeset))]
+#[cfg_attr(feature = "full", diesel(table_name = tag))]
+pub struct TagInsertForm {
+  pub ap_id: DbUrl,
+  pub name: String,
+  pub community_id: CommunityId,
+  // default now
+  pub published: Option<DateTime<Utc>>,
+  pub updated: Option<DateTime<Utc>>,
+  pub deleted: bool,
+}
+
+#[derive(Debug, Clone)]
+#[cfg_attr(feature = "full", derive(Insertable, AsChangeset))]
+#[cfg_attr(feature = "full", diesel(table_name = post_tag))]
+pub struct PostTagInsertForm {
+  pub post_id: PostId,
+  pub tag_id: TagId,
+}
--- a/crates/db_schema/src/utils.rs
+++ b/crates/db_schema/src/utils.rs
@ -547,6 +547,11 @@ pub mod functions {

  // really this function is variadic, this just adds the two-argument version
  define_sql_function!(fn coalesce<T: diesel::sql_types::SqlType + diesel::sql_types::SingleValue>(x: diesel::sql_types::Nullable<T>, y: T) -> T);
+
+  define_sql_function! {
+    #[aggregate]
+    fn json_agg<T: diesel::sql_types::SqlType + diesel::sql_types::SingleValue>(obj: T) -> Json
+  }
 }

 pub const DELETED_REPLACEMENT_TEXT: &str = "*Permanently Deleted*";
--- a/crates/db_views/Cargo.toml
+++ b/crates/db_views/Cargo.toml
@ -35,6 +35,7 @@ diesel-async = { workspace = true, optional = true }
 diesel_ltree = { workspace = true, optional = true }
 serde = { workspace = true }
 serde_with = { workspace = true }
+serde_json = { workspace = true }
 tracing = { workspace = true, optional = true }
 ts-rs = { workspace = true, optional = true }
 actix-web = { workspace = true, optional = true }
@ -46,3 +47,4 @@ serial_test = { workspace = true }
 tokio = { workspace = true }
 pretty_assertions = { workspace = true }
 url = { workspace = true }
+test-context = "0.3.0"
--- a/crates/db_views/src/lib.rs
+++ b/crates/db_views/src/lib.rs
@ -14,6 +14,8 @@ pub mod local_user_view;
 #[cfg(feature = "full")]
 pub mod post_report_view;
 #[cfg(feature = "full")]
+pub mod post_tags_view;
+#[cfg(feature = "full")]
 pub mod post_view;
 #[cfg(feature = "full")]
 pub mod private_message_report_view;
--- a/crates/db_views/src/post_tags_view.rs
+++ b/crates/db_views/src/post_tags_view.rs
@ -0,0 +1,30 @@
+//! see post_view.rs for the reason for this json decoding
+use crate::structs::PostTags;
+use diesel::{
+  deserialize::FromSql,
+  pg::{Pg, PgValue},
+  serialize::ToSql,
+  sql_types::{self, Nullable},
+};
+
+impl FromSql<Nullable<sql_types::Json>, Pg> for PostTags {
+  fn from_sql(bytes: PgValue) -> diesel::deserialize::Result<Self> {
+    let value = <serde_json::Value as FromSql<sql_types::Json, Pg>>::from_sql(bytes)?;
+    Ok(serde_json::from_value::<PostTags>(value)?)
+  }
+  fn from_nullable_sql(
+    bytes: Option<<Pg as diesel::backend::Backend>::RawValue<'_>>,
+  ) -> diesel::deserialize::Result<Self> {
+    match bytes {
+      Some(bytes) => Self::from_sql(bytes),
+      None => Ok(Self { tags: vec![] }),
+    }
+  }
+}
+
+impl ToSql<Nullable<sql_types::Json>, Pg> for PostTags {
+  fn to_sql(&self, out: &mut diesel::serialize::Output<Pg>) -> diesel::serialize::Result {
+    let value = serde_json::to_value(self)?;
+    <serde_json::Value as ToSql<sql_types::Json, Pg>>::to_sql(&value, &mut out.reborrow())
+  }
+}
--- a/crates/db_views/src/post_view.rs
+++ b/crates/db_views/src/post_view.rs
--- a/crates/db_views/src/structs.rs
+++ b/crates/db_views/src/structs.rs
@ -1,5 +1,7 @@
 #[cfg(feature = "full")]
 use diesel::Queryable;
+#[cfg(feature = "full")]
+use diesel::{deserialize::FromSqlRow, expression::AsExpression, sql_types};
 use lemmy_db_schema::{
  aggregates::structs::{CommentAggregates, PersonAggregates, PostAggregates, SiteAggregates},
  source::{
@ -20,6 +22,7 @@ use lemmy_db_schema::{
    private_message_report::PrivateMessageReport,
    registration_application::RegistrationApplication,
    site::Site,
+    tag::Tag,
  },
  SubscribedType,
 };
@ -157,6 +160,7 @@ pub struct PostView {
  #[cfg_attr(feature = "full", ts(optional))]
  pub my_vote: Option<i16>,
  pub unread_comments: i64,
+  pub tags: PostTags,
 }

 #[derive(Debug, PartialEq, Eq, Serialize, Deserialize, Clone)]
@ -289,3 +293,12 @@ pub enum ReportCombinedView {
  Comment(CommentReportView),
  PrivateMessage(PrivateMessageReportView),
 }
+
+#[derive(Clone, serde::Serialize, serde::Deserialize, Debug, PartialEq, Default)]
+#[cfg_attr(feature = "full", derive(TS, FromSqlRow, AsExpression))]
+#[serde(transparent)]
+#[cfg_attr(feature = "full", diesel(sql_type = Nullable<sql_types::Json>))]
+/// we wrap this in a struct so we can implement FromSqlRow<Json> for it
+pub struct PostTags {
+  pub tags: Vec<Tag>,
+}
--- a/crates/routes/src/feeds.rs
+++ b/crates/routes/src/feeds.rs
@ -454,7 +454,6 @@ fn build_item(
  protocol_and_hostname: &str,
 ) -> LemmyResult<Item> {
  // TODO add images
-  let author_url = format!("{protocol_and_hostname}/u/{creator_name}");
  let guid = Some(Guid {
    permalink: true,
    value: url.to_owned(),
@ -464,7 +463,8 @@ fn build_item(
  Ok(Item {
    title: Some(format!("Reply from {creator_name}")),
    author: Some(format!(
-      "/u/{creator_name} <a href=\"{author_url}\">(link)</a>"
+      "/u/{creator_name} <a href=\"{}\">(link)</a>",
+      format_args!("{protocol_and_hostname}/u/{creator_name}")
    )),
    pub_date: Some(published.to_rfc2822()),
    comments: Some(url.to_owned()),
--- a/crates/utils/src/utils/markdown/image_links.rs
+++ b/crates/utils/src/utils/markdown/image_links.rs
@ -24,7 +24,8 @@ pub fn markdown_rewrite_image_links(mut src: String) -> (String, Vec<Url>) {
          );
          // restore custom emoji format
          if let Some(extra) = extra {
-            proxied = format!("{proxied} {extra}");
+            proxied.push(' ');
+            proxied.push_str(extra);
          }
          src.replace_range(start..end, &proxied);
        }
--- a/migrations/2024-12-17-144959_community-post-tags/down.sql
+++ b/migrations/2024-12-17-144959_community-post-tags/down.sql
@ -0,0 +1,4 @@
+DROP TABLE post_tag;
+
+DROP TABLE tag;
+
--- a/migrations/2024-12-17-144959_community-post-tags/up.sql
+++ b/migrations/2024-12-17-144959_community-post-tags/up.sql
@ -0,0 +1,23 @@
+-- a tag is a federatable object that gives additional context to another object, which can be displayed and filtered on
+-- currently, we only have community post tags, which is a tag that is created by post authors as well as mods  of a community,
+-- to categorize a post. in the future we may add more tag types, depending on the requirements,
+-- this will lead to either expansion of this table (community_id optional, addition of tag_type enum)
+-- or split of this table / creation of new tables.
+CREATE TABLE tag (
+    id serial PRIMARY KEY,
+    ap_id text NOT NULL UNIQUE,
+    name text NOT NULL,
+    community_id int NOT NULL REFERENCES community (id) ON UPDATE CASCADE ON DELETE CASCADE,
+    published timestamptz NOT NULL DEFAULT now(),
+    updated timestamptz,
+    deleted boolean NOT NULL DEFAULT FALSE
+);
+
+-- an association between a post and a tag. created/updated by the post author or mods of a community
+CREATE TABLE post_tag (
+    post_id int NOT NULL REFERENCES post (id) ON UPDATE CASCADE ON DELETE CASCADE,
+    tag_id int NOT NULL REFERENCES tag (id) ON UPDATE CASCADE ON DELETE CASCADE,
+    published timestamptz NOT NULL DEFAULT now(),
+    PRIMARY KEY (post_id, tag_id)
+);
+
--- a/src/scheduled_tasks.rs
+++ b/src/scheduled_tasks.rs
@ -190,10 +190,8 @@ async fn process_ranks_in_batches(
         UPDATE {aggregates_table} a {set_clause}
             FROM batch WHERE a.{id_column} = batch.{id_column} RETURNING a.published;
    "#,
-      id_column = format!("{table_name}_id"),
-      aggregates_table = format!("{table_name}_aggregates"),
-      set_clause = set_clause,
-      where_clause = where_clause
+      id_column = format_args!("{table_name}_id"),
+      aggregates_table = format_args!("{table_name}_aggregates"),
    ))
    .bind::<Timestamptz, _>(previous_batch_last_published)
    .bind::<Integer, _>(update_batch_size)