From bd020ebb0f572514cc49e8860d78bc0869331bdb Mon Sep 17 00:00:00 2001 From: Dull Bananas Date: Tue, 17 Dec 2024 16:25:45 -0700 Subject: [PATCH] get_random_community_id: add filters for nsfw and private, use algorithm that doesn't scan the entire table --- crates/api/src/community/random.rs | 2 +- crates/api_common/src/community.rs | 2 + crates/db_schema/src/impls/community.rs | 63 +++++++++++++++---- crates/db_schema/src/schema.rs | 1 + crates/db_schema/src/source/community.rs | 2 + crates/db_schema/src/utils.rs | 4 ++ crates/db_views/src/comment_report_view.rs | 1 + crates/db_views/src/comment_view.rs | 1 + crates/db_views/src/post_view.rs | 1 + .../down.sql | 5 ++ .../up.sql | 18 ++++++ 11 files changed, 86 insertions(+), 14 deletions(-) create mode 100644 migrations/2024-12-09-200602_optimize_get_random_community/down.sql create mode 100644 migrations/2024-12-09-200602_optimize_get_random_community/up.sql diff --git a/crates/api/src/community/random.rs b/crates/api/src/community/random.rs index 3cc04e126..55941229d 100644 --- a/crates/api/src/community/random.rs +++ b/crates/api/src/community/random.rs @@ -27,7 +27,7 @@ pub async fn get_random_community( let local_user = local_user_view.as_ref().map(|u| &u.local_user); let random_community_id = - Community::get_random_community_id(&mut context.pool(), &data.type_).await?; + Community::get_random_community_id(&mut context.pool(), &data.type_, data.show_nsfw).await?; let is_mod_or_admin = is_mod_or_admin_opt( &mut context.pool(), diff --git a/crates/api_common/src/community.rs b/crates/api_common/src/community.rs index 898767b34..52ba2025a 100644 --- a/crates/api_common/src/community.rs +++ b/crates/api_common/src/community.rs @@ -274,6 +274,8 @@ pub struct TransferCommunity { pub struct GetRandomCommunity { #[cfg_attr(feature = "full", ts(optional))] pub type_: Option, + #[cfg_attr(feature = "full", ts(optional))] + pub show_nsfw: Option, } #[skip_serializing_none] diff --git a/crates/db_schema/src/impls/community.rs b/crates/db_schema/src/impls/community.rs index 3a7ac7dc6..f0aa7ba9c 100644 --- a/crates/db_schema/src/impls/community.rs +++ b/crates/db_schema/src/impls/community.rs @@ -22,12 +22,13 @@ use crate::{ utils::{ action_query, find_action, - functions::{coalesce, lower, random}, + functions::{coalesce, coalesce_2_nullable, lower, random_smallint}, get_conn, now, uplete, DbPool, }, + CommunityVisibility, ListingType, SubscribedType, }; @@ -209,23 +210,58 @@ impl Community { pub async fn get_random_community_id( pool: &mut DbPool<'_>, type_: &Option, + show_nsfw: Option, ) -> Result { let conn = &mut get_conn(pool).await?; - let mut query = community::table - .filter(not(community::deleted)) - .filter(not(community::removed)) - .into_boxed(); + // This is based on the random page selection algorithm in MediaWiki. It assigns a random number + // X to each item. To pick a random one, it generates a random number Y and gets the item with + // the lowest X value where X >= Y. + // + // https://phabricator.wikimedia.org/source/mediawiki/browse/master/includes/specials/SpecialRandomPage.php;763c5f084101676ab1bc52862e1ffbd24585a365 + // + // The difference is we also regenerate the item's assigned number when the item is picked. + // Without this, items would have permanent variations in the probability of being picked. + // Additionally, in each group of multiple items that are assigned the same random number (a + // more likely occurence with `smallint`), there would be only one item that ever gets + // picked. - if let Some(ListingType::Local) = type_ { - query = query.filter(community::local); - } + let try_pick = || { + let mut query = community::table + .filter(not( + community::deleted + .or(community::removed) + .or(community::visibility.eq(CommunityVisibility::Private)), + )) + .order(community::random_number.asc()) + .select(community::id) + .into_boxed(); - query - .select(community::id) - .order(random()) - .limit(1) - .first::(conn) + if let Some(ListingType::Local) = type_ { + query = query.filter(community::local); + } + + if !show_nsfw.unwrap_or(false) { + query = query.filter(not(community::nsfw)); + } + + query + }; + + diesel::update(community::table) + .filter( + community::id.nullable().eq(coalesce_2_nullable( + try_pick() + .filter(community::random_number.ge(random_smallint())) + .single_value(), + // Wrap to the beginning if the generated number is higher than all + // `community::random_number` values, just like in the MediaWiki algorithm + try_pick().single_value(), + )), + ) + .set(community::random_number.eq(random_smallint())) + .returning(community::id) + .get_result::(conn) .await } } @@ -570,6 +606,7 @@ mod tests { posting_restricted_to_mods: false, instance_id: inserted_instance.id, visibility: CommunityVisibility::Public, + random_number: inserted_community.random_number, }; let community_follower_form = CommunityFollowerForm { diff --git a/crates/db_schema/src/schema.rs b/crates/db_schema/src/schema.rs index 66a65d143..91c91bef1 100644 --- a/crates/db_schema/src/schema.rs +++ b/crates/db_schema/src/schema.rs @@ -220,6 +220,7 @@ diesel::table! { visibility -> CommunityVisibility, #[max_length = 150] description -> Nullable, + random_number -> Int2, } } diff --git a/crates/db_schema/src/source/community.rs b/crates/db_schema/src/source/community.rs index f65ef06f9..92dc3c16a 100644 --- a/crates/db_schema/src/source/community.rs +++ b/crates/db_schema/src/source/community.rs @@ -76,6 +76,8 @@ pub struct Community { /// A shorter, one-line description of the site. #[cfg_attr(feature = "full", ts(optional))] pub description: Option, + #[serde(skip)] + pub random_number: i16, } #[derive(Debug, Clone, derive_new::new)] diff --git a/crates/db_schema/src/utils.rs b/crates/db_schema/src/utils.rs index aa213887e..fce2f7d45 100644 --- a/crates/db_schema/src/utils.rs +++ b/crates/db_schema/src/utils.rs @@ -545,8 +545,12 @@ pub mod functions { define_sql_function!(fn random() -> Text); + define_sql_function!(fn random_smallint() -> SmallInt); + // really this function is variadic, this just adds the two-argument version define_sql_function!(fn coalesce(x: diesel::sql_types::Nullable, y: T) -> T); + + define_sql_function!(#[sql_name = "coalesce"] fn coalesce_2_nullable(x: diesel::sql_types::Nullable, y: diesel::sql_types::Nullable) -> diesel::sql_types::Nullable); } pub const DELETED_REPLACEMENT_TEXT: &str = "*Permanently Deleted*"; diff --git a/crates/db_views/src/comment_report_view.rs b/crates/db_views/src/comment_report_view.rs index b4a23a0da..c536c1b76 100644 --- a/crates/db_views/src/comment_report_view.rs +++ b/crates/db_views/src/comment_report_view.rs @@ -389,6 +389,7 @@ mod tests { featured_url: inserted_community.featured_url, instance_id: inserted_instance.id, visibility: CommunityVisibility::Public, + random_number: inserted_community.random_number, }, creator: Person { id: inserted_jessica.id, diff --git a/crates/db_views/src/comment_view.rs b/crates/db_views/src/comment_view.rs index 2cf751f9f..cee2c5898 100644 --- a/crates/db_views/src/comment_view.rs +++ b/crates/db_views/src/comment_view.rs @@ -1058,6 +1058,7 @@ mod tests { moderators_url: data.inserted_community.moderators_url.clone(), featured_url: data.inserted_community.featured_url.clone(), visibility: CommunityVisibility::Public, + random_number: data.inserted_community.random_number, }, counts: CommentAggregates { comment_id: data.inserted_comment_0.id, diff --git a/crates/db_views/src/post_view.rs b/crates/db_views/src/post_view.rs index c6d1b036f..005fc1903 100644 --- a/crates/db_views/src/post_view.rs +++ b/crates/db_views/src/post_view.rs @@ -1719,6 +1719,7 @@ mod tests { moderators_url: inserted_community.moderators_url.clone(), featured_url: inserted_community.featured_url.clone(), visibility: CommunityVisibility::Public, + random_number: inserted_community.random_number, }, counts: PostAggregates { post_id: inserted_post.id, diff --git a/migrations/2024-12-09-200602_optimize_get_random_community/down.sql b/migrations/2024-12-09-200602_optimize_get_random_community/down.sql new file mode 100644 index 000000000..2793832cb --- /dev/null +++ b/migrations/2024-12-09-200602_optimize_get_random_community/down.sql @@ -0,0 +1,5 @@ +ALTER TABLE community + DROP COLUMN random_number; + +DROP FUNCTION random_smallint; + diff --git a/migrations/2024-12-09-200602_optimize_get_random_community/up.sql b/migrations/2024-12-09-200602_optimize_get_random_community/up.sql new file mode 100644 index 000000000..8157cc6cb --- /dev/null +++ b/migrations/2024-12-09-200602_optimize_get_random_community/up.sql @@ -0,0 +1,18 @@ +-- * `smallint` range from https://www.postgresql.org/docs/17/datatype-numeric.html +-- * built-in `random` function has `VOLATILE` and `PARALLEL RESTRICTED` according to: +-- * https://www.postgresql.org/docs/current/parallel-safety.html#PARALLEL-LABELING +-- * https://www.postgresql.org/docs/17/xfunc-volatility.html +CREATE FUNCTION random_smallint () + RETURNS smallint + LANGUAGE sql + VOLATILE PARALLEL RESTRICTED RETURN random ( + -32768, 32767 +); + +ALTER TABLE community + ADD COLUMN random_number smallint NOT NULL DEFAULT random_smallint (); + +CREATE INDEX idx_community_random_number ON community (random_number) INCLUDE (local, nsfw) +WHERE + NOT (deleted OR removed OR visibility = 'Private'); +