get_random_community_id: add filters for nsfw and private, use algorithm that doesn't scan the entire table

This commit is contained in:
Dull Bananas 2024-12-17 16:25:45 -07:00
parent 9e17c517fe
commit bd020ebb0f
11 changed files with 86 additions and 14 deletions

View file

@ -27,7 +27,7 @@ pub async fn get_random_community(
let local_user = local_user_view.as_ref().map(|u| &u.local_user);
let random_community_id =
Community::get_random_community_id(&mut context.pool(), &data.type_).await?;
Community::get_random_community_id(&mut context.pool(), &data.type_, data.show_nsfw).await?;
let is_mod_or_admin = is_mod_or_admin_opt(
&mut context.pool(),

View file

@ -274,6 +274,8 @@ pub struct TransferCommunity {
pub struct GetRandomCommunity {
#[cfg_attr(feature = "full", ts(optional))]
pub type_: Option<ListingType>,
#[cfg_attr(feature = "full", ts(optional))]
pub show_nsfw: Option<bool>,
}
#[skip_serializing_none]

View file

@ -22,12 +22,13 @@ use crate::{
utils::{
action_query,
find_action,
functions::{coalesce, lower, random},
functions::{coalesce, coalesce_2_nullable, lower, random_smallint},
get_conn,
now,
uplete,
DbPool,
},
CommunityVisibility,
ListingType,
SubscribedType,
};
@ -209,23 +210,58 @@ impl Community {
pub async fn get_random_community_id(
pool: &mut DbPool<'_>,
type_: &Option<ListingType>,
show_nsfw: Option<bool>,
) -> Result<CommunityId, Error> {
let conn = &mut get_conn(pool).await?;
let mut query = community::table
.filter(not(community::deleted))
.filter(not(community::removed))
.into_boxed();
// This is based on the random page selection algorithm in MediaWiki. It assigns a random number
// X to each item. To pick a random one, it generates a random number Y and gets the item with
// the lowest X value where X >= Y.
//
// https://phabricator.wikimedia.org/source/mediawiki/browse/master/includes/specials/SpecialRandomPage.php;763c5f084101676ab1bc52862e1ffbd24585a365
//
// The difference is we also regenerate the item's assigned number when the item is picked.
// Without this, items would have permanent variations in the probability of being picked.
// Additionally, in each group of multiple items that are assigned the same random number (a
// more likely occurence with `smallint`), there would be only one item that ever gets
// picked.
if let Some(ListingType::Local) = type_ {
query = query.filter(community::local);
}
let try_pick = || {
let mut query = community::table
.filter(not(
community::deleted
.or(community::removed)
.or(community::visibility.eq(CommunityVisibility::Private)),
))
.order(community::random_number.asc())
.select(community::id)
.into_boxed();
query
.select(community::id)
.order(random())
.limit(1)
.first::<CommunityId>(conn)
if let Some(ListingType::Local) = type_ {
query = query.filter(community::local);
}
if !show_nsfw.unwrap_or(false) {
query = query.filter(not(community::nsfw));
}
query
};
diesel::update(community::table)
.filter(
community::id.nullable().eq(coalesce_2_nullable(
try_pick()
.filter(community::random_number.ge(random_smallint()))
.single_value(),
// Wrap to the beginning if the generated number is higher than all
// `community::random_number` values, just like in the MediaWiki algorithm
try_pick().single_value(),
)),
)
.set(community::random_number.eq(random_smallint()))
.returning(community::id)
.get_result::<CommunityId>(conn)
.await
}
}
@ -570,6 +606,7 @@ mod tests {
posting_restricted_to_mods: false,
instance_id: inserted_instance.id,
visibility: CommunityVisibility::Public,
random_number: inserted_community.random_number,
};
let community_follower_form = CommunityFollowerForm {

View file

@ -220,6 +220,7 @@ diesel::table! {
visibility -> CommunityVisibility,
#[max_length = 150]
description -> Nullable<Varchar>,
random_number -> Int2,
}
}

View file

@ -76,6 +76,8 @@ pub struct Community {
/// A shorter, one-line description of the site.
#[cfg_attr(feature = "full", ts(optional))]
pub description: Option<String>,
#[serde(skip)]
pub random_number: i16,
}
#[derive(Debug, Clone, derive_new::new)]

View file

@ -545,8 +545,12 @@ pub mod functions {
define_sql_function!(fn random() -> Text);
define_sql_function!(fn random_smallint() -> SmallInt);
// really this function is variadic, this just adds the two-argument version
define_sql_function!(fn coalesce<T: diesel::sql_types::SqlType + diesel::sql_types::SingleValue>(x: diesel::sql_types::Nullable<T>, y: T) -> T);
define_sql_function!(#[sql_name = "coalesce"] fn coalesce_2_nullable<T: diesel::sql_types::SqlType + diesel::sql_types::SingleValue>(x: diesel::sql_types::Nullable<T>, y: diesel::sql_types::Nullable<T>) -> diesel::sql_types::Nullable<T>);
}
pub const DELETED_REPLACEMENT_TEXT: &str = "*Permanently Deleted*";

View file

@ -389,6 +389,7 @@ mod tests {
featured_url: inserted_community.featured_url,
instance_id: inserted_instance.id,
visibility: CommunityVisibility::Public,
random_number: inserted_community.random_number,
},
creator: Person {
id: inserted_jessica.id,

View file

@ -1058,6 +1058,7 @@ mod tests {
moderators_url: data.inserted_community.moderators_url.clone(),
featured_url: data.inserted_community.featured_url.clone(),
visibility: CommunityVisibility::Public,
random_number: data.inserted_community.random_number,
},
counts: CommentAggregates {
comment_id: data.inserted_comment_0.id,

View file

@ -1719,6 +1719,7 @@ mod tests {
moderators_url: inserted_community.moderators_url.clone(),
featured_url: inserted_community.featured_url.clone(),
visibility: CommunityVisibility::Public,
random_number: inserted_community.random_number,
},
counts: PostAggregates {
post_id: inserted_post.id,

View file

@ -0,0 +1,5 @@
ALTER TABLE community
DROP COLUMN random_number;
DROP FUNCTION random_smallint;

View file

@ -0,0 +1,18 @@
-- * `smallint` range from https://www.postgresql.org/docs/17/datatype-numeric.html
-- * built-in `random` function has `VOLATILE` and `PARALLEL RESTRICTED` according to:
-- * https://www.postgresql.org/docs/current/parallel-safety.html#PARALLEL-LABELING
-- * https://www.postgresql.org/docs/17/xfunc-volatility.html
CREATE FUNCTION random_smallint ()
RETURNS smallint
LANGUAGE sql
VOLATILE PARALLEL RESTRICTED RETURN random (
-32768, 32767
);
ALTER TABLE community
ADD COLUMN random_number smallint NOT NULL DEFAULT random_smallint ();
CREATE INDEX idx_community_random_number ON community (random_number) INCLUDE (local, nsfw)
WHERE
NOT (deleted OR removed OR visibility = 'Private');