Adding a scaled sort, to boost smaller communities. (#3907)
* Adding a scaled sort, to boost smaller communities. - Previously referred to as *best* . - Fixes #3622 * Fixing scheduled task update. * Converting hot_rank integers to floats. * Altering hot_rank psql function to default to zero after a week. * Setting scaled_rank to zero, where hot_rank is zero. * Adding image_upload table.
This commit is contained in:
parent
4121fc4d56
commit
9785b20843
13 changed files with 279 additions and 31 deletions
crates
apub/src/activities/create_or_update
db_schema/src
db_views/src
db_views_actor/src
migrations/2023-08-23-182533_scaled_rank
src
|
@ -150,7 +150,7 @@ impl ActivityHandler for CreateOrUpdatePage {
|
||||||
PostLike::like(&mut context.pool(), &like_form).await?;
|
PostLike::like(&mut context.pool(), &like_form).await?;
|
||||||
|
|
||||||
// Calculate initial hot_rank for post
|
// Calculate initial hot_rank for post
|
||||||
PostAggregates::update_hot_rank(&mut context.pool(), post.id).await?;
|
PostAggregates::update_ranks(&mut context.pool(), post.id).await?;
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,10 +1,14 @@
|
||||||
use crate::{
|
use crate::{
|
||||||
aggregates::structs::PostAggregates,
|
aggregates::structs::PostAggregates,
|
||||||
newtypes::PostId,
|
newtypes::PostId,
|
||||||
schema::post_aggregates,
|
schema::{community_aggregates, post, post_aggregates},
|
||||||
utils::{functions::hot_rank, get_conn, DbPool},
|
utils::{
|
||||||
|
functions::{hot_rank, scaled_rank},
|
||||||
|
get_conn,
|
||||||
|
DbPool,
|
||||||
|
},
|
||||||
};
|
};
|
||||||
use diesel::{result::Error, ExpressionMethods, QueryDsl};
|
use diesel::{result::Error, ExpressionMethods, JoinOnDsl, QueryDsl};
|
||||||
use diesel_async::RunQueryDsl;
|
use diesel_async::RunQueryDsl;
|
||||||
|
|
||||||
impl PostAggregates {
|
impl PostAggregates {
|
||||||
|
@ -16,9 +20,19 @@ impl PostAggregates {
|
||||||
.await
|
.await
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn update_hot_rank(pool: &mut DbPool<'_>, post_id: PostId) -> Result<Self, Error> {
|
pub async fn update_ranks(pool: &mut DbPool<'_>, post_id: PostId) -> Result<Self, Error> {
|
||||||
let conn = &mut get_conn(pool).await?;
|
let conn = &mut get_conn(pool).await?;
|
||||||
|
|
||||||
|
// Diesel can't update based on a join, which is necessary for the scaled_rank
|
||||||
|
// https://github.com/diesel-rs/diesel/issues/1478
|
||||||
|
// Just select the users_active_month manually for now, since its a single post anyway
|
||||||
|
let users_active_month = community_aggregates::table
|
||||||
|
.select(community_aggregates::users_active_month)
|
||||||
|
.inner_join(post::table.on(community_aggregates::community_id.eq(post::community_id)))
|
||||||
|
.filter(post::id.eq(post_id))
|
||||||
|
.first::<i64>(conn)
|
||||||
|
.await?;
|
||||||
|
|
||||||
diesel::update(post_aggregates::table)
|
diesel::update(post_aggregates::table)
|
||||||
.filter(post_aggregates::post_id.eq(post_id))
|
.filter(post_aggregates::post_id.eq(post_id))
|
||||||
.set((
|
.set((
|
||||||
|
@ -27,6 +41,11 @@ impl PostAggregates {
|
||||||
post_aggregates::score,
|
post_aggregates::score,
|
||||||
post_aggregates::newest_comment_time_necro,
|
post_aggregates::newest_comment_time_necro,
|
||||||
)),
|
)),
|
||||||
|
post_aggregates::scaled_rank.eq(scaled_rank(
|
||||||
|
post_aggregates::score,
|
||||||
|
post_aggregates::published,
|
||||||
|
users_active_month,
|
||||||
|
)),
|
||||||
))
|
))
|
||||||
.get_result::<Self>(conn)
|
.get_result::<Self>(conn)
|
||||||
.await
|
.await
|
||||||
|
|
|
@ -27,11 +27,11 @@ pub struct CommentAggregates {
|
||||||
pub published: DateTime<Utc>,
|
pub published: DateTime<Utc>,
|
||||||
/// The total number of children in this comment branch.
|
/// The total number of children in this comment branch.
|
||||||
pub child_count: i32,
|
pub child_count: i32,
|
||||||
pub hot_rank: i32,
|
pub hot_rank: f64,
|
||||||
pub controversy_rank: f64,
|
pub controversy_rank: f64,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(PartialEq, Eq, Debug, Serialize, Deserialize, Clone)]
|
#[derive(PartialEq, Debug, Serialize, Deserialize, Clone)]
|
||||||
#[cfg_attr(feature = "full", derive(Queryable, Associations, Identifiable, TS))]
|
#[cfg_attr(feature = "full", derive(Queryable, Associations, Identifiable, TS))]
|
||||||
#[cfg_attr(feature = "full", diesel(table_name = community_aggregates))]
|
#[cfg_attr(feature = "full", diesel(table_name = community_aggregates))]
|
||||||
#[cfg_attr(
|
#[cfg_attr(
|
||||||
|
@ -55,7 +55,7 @@ pub struct CommunityAggregates {
|
||||||
pub users_active_month: i64,
|
pub users_active_month: i64,
|
||||||
/// The number of users with any activity in the last year.
|
/// The number of users with any activity in the last year.
|
||||||
pub users_active_half_year: i64,
|
pub users_active_half_year: i64,
|
||||||
pub hot_rank: i32,
|
pub hot_rank: f64,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(PartialEq, Eq, Debug, Serialize, Deserialize, Clone, Default)]
|
#[derive(PartialEq, Eq, Debug, Serialize, Deserialize, Clone, Default)]
|
||||||
|
@ -95,11 +95,13 @@ pub struct PostAggregates {
|
||||||
pub featured_community: bool,
|
pub featured_community: bool,
|
||||||
/// If the post is featured on the site / to local.
|
/// If the post is featured on the site / to local.
|
||||||
pub featured_local: bool,
|
pub featured_local: bool,
|
||||||
pub hot_rank: i32,
|
pub hot_rank: f64,
|
||||||
pub hot_rank_active: i32,
|
pub hot_rank_active: f64,
|
||||||
pub community_id: CommunityId,
|
pub community_id: CommunityId,
|
||||||
pub creator_id: PersonId,
|
pub creator_id: PersonId,
|
||||||
pub controversy_rank: f64,
|
pub controversy_rank: f64,
|
||||||
|
/// A rank that amplifies smaller communities
|
||||||
|
pub scaled_rank: f64,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(PartialEq, Eq, Debug, Serialize, Deserialize, Clone)]
|
#[derive(PartialEq, Eq, Debug, Serialize, Deserialize, Clone)]
|
||||||
|
|
|
@ -54,6 +54,7 @@ use ts_rs::TS;
|
||||||
)]
|
)]
|
||||||
#[cfg_attr(feature = "full", DbValueStyle = "verbatim")]
|
#[cfg_attr(feature = "full", DbValueStyle = "verbatim")]
|
||||||
#[cfg_attr(feature = "full", ts(export))]
|
#[cfg_attr(feature = "full", ts(export))]
|
||||||
|
// TODO add the controversial and scaled rankings to the doc below
|
||||||
/// The post sort types. See here for descriptions: https://join-lemmy.org/docs/en/users/03-votes-and-ranking.html
|
/// The post sort types. See here for descriptions: https://join-lemmy.org/docs/en/users/03-votes-and-ranking.html
|
||||||
pub enum SortType {
|
pub enum SortType {
|
||||||
#[default]
|
#[default]
|
||||||
|
@ -75,6 +76,7 @@ pub enum SortType {
|
||||||
TopSixMonths,
|
TopSixMonths,
|
||||||
TopNineMonths,
|
TopNineMonths,
|
||||||
Controversial,
|
Controversial,
|
||||||
|
Scaled,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(EnumString, Display, Debug, Serialize, Deserialize, Clone, Copy)]
|
#[derive(EnumString, Display, Debug, Serialize, Deserialize, Clone, Copy)]
|
||||||
|
|
|
@ -100,7 +100,7 @@ diesel::table! {
|
||||||
downvotes -> Int8,
|
downvotes -> Int8,
|
||||||
published -> Timestamptz,
|
published -> Timestamptz,
|
||||||
child_count -> Int4,
|
child_count -> Int4,
|
||||||
hot_rank -> Int4,
|
hot_rank -> Float8,
|
||||||
controversy_rank -> Float8,
|
controversy_rank -> Float8,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -198,7 +198,7 @@ diesel::table! {
|
||||||
users_active_week -> Int8,
|
users_active_week -> Int8,
|
||||||
users_active_month -> Int8,
|
users_active_month -> Int8,
|
||||||
users_active_half_year -> Int8,
|
users_active_half_year -> Int8,
|
||||||
hot_rank -> Int4,
|
hot_rank -> Float8,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -299,6 +299,16 @@ diesel::table! {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
diesel::table! {
|
||||||
|
image_upload (id) {
|
||||||
|
id -> Int4,
|
||||||
|
local_user_id -> Int4,
|
||||||
|
pictrs_alias -> Text,
|
||||||
|
pictrs_delete_token -> Text,
|
||||||
|
published -> Timestamptz,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
diesel::table! {
|
diesel::table! {
|
||||||
instance (id) {
|
instance (id) {
|
||||||
id -> Int4,
|
id -> Int4,
|
||||||
|
@ -683,11 +693,12 @@ diesel::table! {
|
||||||
newest_comment_time -> Timestamptz,
|
newest_comment_time -> Timestamptz,
|
||||||
featured_community -> Bool,
|
featured_community -> Bool,
|
||||||
featured_local -> Bool,
|
featured_local -> Bool,
|
||||||
hot_rank -> Int4,
|
hot_rank -> Float8,
|
||||||
hot_rank_active -> Int4,
|
hot_rank_active -> Float8,
|
||||||
community_id -> Int4,
|
community_id -> Int4,
|
||||||
creator_id -> Int4,
|
creator_id -> Int4,
|
||||||
controversy_rank -> Float8,
|
controversy_rank -> Float8,
|
||||||
|
scaled_rank -> Float8,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -893,6 +904,7 @@ diesel::joinable!(custom_emoji_keyword -> custom_emoji (custom_emoji_id));
|
||||||
diesel::joinable!(email_verification -> local_user (local_user_id));
|
diesel::joinable!(email_verification -> local_user (local_user_id));
|
||||||
diesel::joinable!(federation_allowlist -> instance (instance_id));
|
diesel::joinable!(federation_allowlist -> instance (instance_id));
|
||||||
diesel::joinable!(federation_blocklist -> instance (instance_id));
|
diesel::joinable!(federation_blocklist -> instance (instance_id));
|
||||||
|
diesel::joinable!(image_upload -> local_user (local_user_id));
|
||||||
diesel::joinable!(local_site -> site (site_id));
|
diesel::joinable!(local_site -> site (site_id));
|
||||||
diesel::joinable!(local_site_rate_limit -> local_site (local_site_id));
|
diesel::joinable!(local_site_rate_limit -> local_site (local_site_id));
|
||||||
diesel::joinable!(local_user -> person (person_id));
|
diesel::joinable!(local_user -> person (person_id));
|
||||||
|
@ -967,6 +979,7 @@ diesel::allow_tables_to_appear_in_same_query!(
|
||||||
email_verification,
|
email_verification,
|
||||||
federation_allowlist,
|
federation_allowlist,
|
||||||
federation_blocklist,
|
federation_blocklist,
|
||||||
|
image_upload,
|
||||||
instance,
|
instance,
|
||||||
language,
|
language,
|
||||||
local_site,
|
local_site,
|
||||||
|
|
|
@ -347,7 +347,7 @@ pub fn naive_now() -> DateTime<Utc> {
|
||||||
|
|
||||||
pub fn post_to_comment_sort_type(sort: SortType) -> CommentSortType {
|
pub fn post_to_comment_sort_type(sort: SortType) -> CommentSortType {
|
||||||
match sort {
|
match sort {
|
||||||
SortType::Active | SortType::Hot => CommentSortType::Hot,
|
SortType::Active | SortType::Hot | SortType::Scaled => CommentSortType::Hot,
|
||||||
SortType::New | SortType::NewComments | SortType::MostComments => CommentSortType::New,
|
SortType::New | SortType::NewComments | SortType::MostComments => CommentSortType::New,
|
||||||
SortType::Old => CommentSortType::Old,
|
SortType::Old => CommentSortType::Old,
|
||||||
SortType::Controversial => CommentSortType::Controversial,
|
SortType::Controversial => CommentSortType::Controversial,
|
||||||
|
@ -384,7 +384,11 @@ pub mod functions {
|
||||||
use diesel::sql_types::{BigInt, Text, Timestamptz};
|
use diesel::sql_types::{BigInt, Text, Timestamptz};
|
||||||
|
|
||||||
sql_function! {
|
sql_function! {
|
||||||
fn hot_rank(score: BigInt, time: Timestamptz) -> Integer;
|
fn hot_rank(score: BigInt, time: Timestamptz) -> Double;
|
||||||
|
}
|
||||||
|
|
||||||
|
sql_function! {
|
||||||
|
fn scaled_rank(score: BigInt, time: Timestamptz, users_active_month: BigInt) -> Double;
|
||||||
}
|
}
|
||||||
|
|
||||||
sql_function! {
|
sql_function! {
|
||||||
|
|
|
@ -432,7 +432,7 @@ mod tests {
|
||||||
downvotes: 0,
|
downvotes: 0,
|
||||||
published: agg.published,
|
published: agg.published,
|
||||||
child_count: 0,
|
child_count: 0,
|
||||||
hot_rank: 1728,
|
hot_rank: 0.1728,
|
||||||
controversy_rank: 0.0,
|
controversy_rank: 0.0,
|
||||||
},
|
},
|
||||||
my_vote: None,
|
my_vote: None,
|
||||||
|
|
|
@ -886,7 +886,7 @@ mod tests {
|
||||||
downvotes: 0,
|
downvotes: 0,
|
||||||
published: agg.published,
|
published: agg.published,
|
||||||
child_count: 5,
|
child_count: 5,
|
||||||
hot_rank: 1728,
|
hot_rank: 0.1728,
|
||||||
controversy_rank: 0.0,
|
controversy_rank: 0.0,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
|
@ -380,6 +380,9 @@ fn queries<'a>() -> Queries<
|
||||||
SortType::Hot => query
|
SortType::Hot => query
|
||||||
.then_order_by(post_aggregates::hot_rank.desc())
|
.then_order_by(post_aggregates::hot_rank.desc())
|
||||||
.then_order_by(post_aggregates::published.desc()),
|
.then_order_by(post_aggregates::published.desc()),
|
||||||
|
SortType::Scaled => query
|
||||||
|
.then_order_by(post_aggregates::scaled_rank.desc())
|
||||||
|
.then_order_by(post_aggregates::published.desc()),
|
||||||
SortType::Controversial => query.then_order_by(post_aggregates::controversy_rank.desc()),
|
SortType::Controversial => query.then_order_by(post_aggregates::controversy_rank.desc()),
|
||||||
SortType::New => query.then_order_by(post_aggregates::published.desc()),
|
SortType::New => query.then_order_by(post_aggregates::published.desc()),
|
||||||
SortType::Old => query.then_order_by(post_aggregates::published.asc()),
|
SortType::Old => query.then_order_by(post_aggregates::published.asc()),
|
||||||
|
@ -1154,9 +1157,10 @@ mod tests {
|
||||||
newest_comment_time: inserted_post.published,
|
newest_comment_time: inserted_post.published,
|
||||||
featured_community: false,
|
featured_community: false,
|
||||||
featured_local: false,
|
featured_local: false,
|
||||||
hot_rank: 1728,
|
hot_rank: 0.1728,
|
||||||
hot_rank_active: 1728,
|
hot_rank_active: 0.1728,
|
||||||
controversy_rank: 0.0,
|
controversy_rank: 0.0,
|
||||||
|
scaled_rank: 0.3621,
|
||||||
community_id: inserted_post.community_id,
|
community_id: inserted_post.community_id,
|
||||||
creator_id: inserted_post.creator_id,
|
creator_id: inserted_post.creator_id,
|
||||||
},
|
},
|
||||||
|
|
|
@ -105,7 +105,7 @@ fn queries<'a>() -> Queries<
|
||||||
}
|
}
|
||||||
|
|
||||||
match options.sort.unwrap_or(Hot) {
|
match options.sort.unwrap_or(Hot) {
|
||||||
Hot | Active => query = query.order_by(community_aggregates::hot_rank.desc()),
|
Hot | Active | Scaled => query = query.order_by(community_aggregates::hot_rank.desc()),
|
||||||
NewComments | TopDay | TopTwelveHour | TopSixHour | TopHour => {
|
NewComments | TopDay | TopTwelveHour | TopSixHour | TopHour => {
|
||||||
query = query.order_by(community_aggregates::users_active_day.desc())
|
query = query.order_by(community_aggregates::users_active_day.desc())
|
||||||
}
|
}
|
||||||
|
|
87
migrations/2023-08-23-182533_scaled_rank/down.sql
Normal file
87
migrations/2023-08-23-182533_scaled_rank/down.sql
Normal file
|
@ -0,0 +1,87 @@
|
||||||
|
DROP FUNCTION scaled_rank;
|
||||||
|
|
||||||
|
ALTER TABLE community_aggregates
|
||||||
|
ALTER COLUMN hot_rank TYPE integer,
|
||||||
|
ALTER COLUMN hot_rank SET DEFAULT 1728;
|
||||||
|
|
||||||
|
ALTER TABLE comment_aggregates
|
||||||
|
ALTER COLUMN hot_rank TYPE integer,
|
||||||
|
ALTER COLUMN hot_rank SET DEFAULT 1728;
|
||||||
|
|
||||||
|
ALTER TABLE post_aggregates
|
||||||
|
ALTER COLUMN hot_rank TYPE integer,
|
||||||
|
ALTER COLUMN hot_rank SET DEFAULT 1728,
|
||||||
|
ALTER COLUMN hot_rank_active TYPE integer,
|
||||||
|
ALTER COLUMN hot_rank_active SET DEFAULT 1728;
|
||||||
|
|
||||||
|
-- Change back to integer version
|
||||||
|
DROP FUNCTION hot_rank (numeric, published timestamp with time zone);
|
||||||
|
|
||||||
|
CREATE OR REPLACE FUNCTION hot_rank (score numeric, published timestamp with time zone)
|
||||||
|
RETURNS integer
|
||||||
|
AS $$
|
||||||
|
DECLARE
|
||||||
|
hours_diff numeric := EXTRACT(EPOCH FROM (now() - published)) / 3600;
|
||||||
|
BEGIN
|
||||||
|
IF (hours_diff > 0) THEN
|
||||||
|
RETURN floor(10000 * log(greatest (1, score + 3)) / power((hours_diff + 2), 1.8))::integer;
|
||||||
|
ELSE
|
||||||
|
-- if the post is from the future, set hot score to 0. otherwise you can game the post to
|
||||||
|
-- always be on top even with only 1 vote by setting it to the future
|
||||||
|
RETURN 0;
|
||||||
|
END IF;
|
||||||
|
END;
|
||||||
|
$$
|
||||||
|
LANGUAGE plpgsql
|
||||||
|
IMMUTABLE PARALLEL SAFE;
|
||||||
|
|
||||||
|
ALTER TABLE post_aggregates
|
||||||
|
DROP COLUMN scaled_rank;
|
||||||
|
|
||||||
|
-- The following code is necessary because postgres can't remove
|
||||||
|
-- a single enum value.
|
||||||
|
ALTER TABLE local_user
|
||||||
|
ALTER default_sort_type DROP DEFAULT;
|
||||||
|
|
||||||
|
UPDATE
|
||||||
|
local_user
|
||||||
|
SET
|
||||||
|
default_sort_type = 'Hot'
|
||||||
|
WHERE
|
||||||
|
default_sort_type = 'Scaled';
|
||||||
|
|
||||||
|
-- rename the old enum
|
||||||
|
ALTER TYPE sort_type_enum RENAME TO sort_type_enum__;
|
||||||
|
|
||||||
|
-- create the new enum
|
||||||
|
CREATE TYPE sort_type_enum AS ENUM (
|
||||||
|
'Active',
|
||||||
|
'Hot',
|
||||||
|
'New',
|
||||||
|
'Old',
|
||||||
|
'TopDay',
|
||||||
|
'TopWeek',
|
||||||
|
'TopMonth',
|
||||||
|
'TopYear',
|
||||||
|
'TopAll',
|
||||||
|
'MostComments',
|
||||||
|
'NewComments',
|
||||||
|
'TopHour',
|
||||||
|
'TopSixHour',
|
||||||
|
'TopTwelveHour',
|
||||||
|
'TopThreeMonths',
|
||||||
|
'TopSixMonths',
|
||||||
|
'TopNineMonths'
|
||||||
|
);
|
||||||
|
|
||||||
|
-- alter all your enum columns
|
||||||
|
ALTER TABLE local_user
|
||||||
|
ALTER COLUMN default_sort_type TYPE sort_type_enum
|
||||||
|
USING default_sort_type::text::sort_type_enum;
|
||||||
|
|
||||||
|
ALTER TABLE local_user
|
||||||
|
ALTER default_sort_type SET DEFAULT 'Active';
|
||||||
|
|
||||||
|
-- drop the old enum
|
||||||
|
DROP TYPE sort_type_enum__;
|
||||||
|
|
74
migrations/2023-08-23-182533_scaled_rank/up.sql
Normal file
74
migrations/2023-08-23-182533_scaled_rank/up.sql
Normal file
|
@ -0,0 +1,74 @@
|
||||||
|
-- Change hot ranks and functions from an int to a float
|
||||||
|
ALTER TABLE community_aggregates
|
||||||
|
ALTER COLUMN hot_rank TYPE float,
|
||||||
|
ALTER COLUMN hot_rank SET DEFAULT 0.1728;
|
||||||
|
|
||||||
|
ALTER TABLE comment_aggregates
|
||||||
|
ALTER COLUMN hot_rank TYPE float,
|
||||||
|
ALTER COLUMN hot_rank SET DEFAULT 0.1728;
|
||||||
|
|
||||||
|
ALTER TABLE post_aggregates
|
||||||
|
ALTER COLUMN hot_rank TYPE float,
|
||||||
|
ALTER COLUMN hot_rank SET DEFAULT 0.1728,
|
||||||
|
ALTER COLUMN hot_rank_active TYPE float,
|
||||||
|
ALTER COLUMN hot_rank_active SET DEFAULT 0.1728;
|
||||||
|
|
||||||
|
DROP FUNCTION hot_rank (numeric, published timestamp with time zone);
|
||||||
|
|
||||||
|
CREATE OR REPLACE FUNCTION hot_rank (score numeric, published timestamp with time zone)
|
||||||
|
RETURNS float
|
||||||
|
AS $$
|
||||||
|
DECLARE
|
||||||
|
hours_diff numeric := EXTRACT(EPOCH FROM (now() - published)) / 3600;
|
||||||
|
BEGIN
|
||||||
|
-- 24 * 7 = 168, so after a week, it will default to 0.
|
||||||
|
IF (hours_diff > 0 AND hours_diff < 168) THEN
|
||||||
|
RETURN log(greatest (1, score + 3)) / power((hours_diff + 2), 1.8);
|
||||||
|
ELSE
|
||||||
|
-- if the post is from the future, set hot score to 0. otherwise you can game the post to
|
||||||
|
-- always be on top even with only 1 vote by setting it to the future
|
||||||
|
RETURN 0.0;
|
||||||
|
END IF;
|
||||||
|
END;
|
||||||
|
$$
|
||||||
|
LANGUAGE plpgsql
|
||||||
|
IMMUTABLE PARALLEL SAFE;
|
||||||
|
|
||||||
|
-- The new scaled rank function
|
||||||
|
CREATE OR REPLACE FUNCTION scaled_rank (score numeric, published timestamp with time zone, users_active_month numeric)
|
||||||
|
RETURNS float
|
||||||
|
AS $$
|
||||||
|
BEGIN
|
||||||
|
-- Add 2 to avoid divide by zero errors
|
||||||
|
-- Default for score = 1, active users = 1, and now, is (0.1728 / log(2 + 1)) = 0.3621
|
||||||
|
-- There may need to be a scale factor multiplied to users_active_month, to make
|
||||||
|
-- the log curve less pronounced. This can be tuned in the future.
|
||||||
|
RETURN (hot_rank (score, published) / log(2 + users_active_month));
|
||||||
|
END;
|
||||||
|
$$
|
||||||
|
LANGUAGE plpgsql
|
||||||
|
IMMUTABLE PARALLEL SAFE;
|
||||||
|
|
||||||
|
ALTER TABLE post_aggregates
|
||||||
|
ADD COLUMN scaled_rank float NOT NULL DEFAULT 0.3621;
|
||||||
|
|
||||||
|
UPDATE
|
||||||
|
post_aggregates
|
||||||
|
SET
|
||||||
|
scaled_rank = 0
|
||||||
|
WHERE
|
||||||
|
hot_rank = 0
|
||||||
|
OR hot_rank_active = 0;
|
||||||
|
|
||||||
|
CREATE INDEX idx_post_aggregates_featured_community_scaled ON post_aggregates (featured_community DESC, scaled_rank DESC, published DESC);
|
||||||
|
|
||||||
|
CREATE INDEX idx_post_aggregates_featured_local_scaled ON post_aggregates (featured_local DESC, scaled_rank DESC, published DESC);
|
||||||
|
|
||||||
|
-- We forgot to add the controversial sort type
|
||||||
|
ALTER TYPE sort_type_enum
|
||||||
|
ADD VALUE 'Controversial';
|
||||||
|
|
||||||
|
-- Add the Scaled enum
|
||||||
|
ALTER TYPE sort_type_enum
|
||||||
|
ADD VALUE 'Scaled';
|
||||||
|
|
|
@ -154,22 +154,16 @@ fn startup_jobs(db_url: &str) {
|
||||||
fn update_hot_ranks(conn: &mut PgConnection) {
|
fn update_hot_ranks(conn: &mut PgConnection) {
|
||||||
info!("Updating hot ranks for all history...");
|
info!("Updating hot ranks for all history...");
|
||||||
|
|
||||||
process_hot_ranks_in_batches(
|
process_post_aggregates_ranks_in_batches(conn);
|
||||||
conn,
|
|
||||||
"post_aggregates",
|
|
||||||
"a.hot_rank != 0 OR a.hot_rank_active != 0",
|
|
||||||
"SET hot_rank = hot_rank(a.score, a.published),
|
|
||||||
hot_rank_active = hot_rank(a.score, a.newest_comment_time_necro)",
|
|
||||||
);
|
|
||||||
|
|
||||||
process_hot_ranks_in_batches(
|
process_ranks_in_batches(
|
||||||
conn,
|
conn,
|
||||||
"comment_aggregates",
|
"comment_aggregates",
|
||||||
"a.hot_rank != 0",
|
"a.hot_rank != 0",
|
||||||
"SET hot_rank = hot_rank(a.score, a.published)",
|
"SET hot_rank = hot_rank(a.score, a.published)",
|
||||||
);
|
);
|
||||||
|
|
||||||
process_hot_ranks_in_batches(
|
process_ranks_in_batches(
|
||||||
conn,
|
conn,
|
||||||
"community_aggregates",
|
"community_aggregates",
|
||||||
"a.hot_rank != 0",
|
"a.hot_rank != 0",
|
||||||
|
@ -189,7 +183,7 @@ struct HotRanksUpdateResult {
|
||||||
/// In `where_clause` and `set_clause`, "a" will refer to the current aggregates table.
|
/// In `where_clause` and `set_clause`, "a" will refer to the current aggregates table.
|
||||||
/// Locked rows are skipped in order to prevent deadlocks (they will likely get updated on the next
|
/// Locked rows are skipped in order to prevent deadlocks (they will likely get updated on the next
|
||||||
/// run)
|
/// run)
|
||||||
fn process_hot_ranks_in_batches(
|
fn process_ranks_in_batches(
|
||||||
conn: &mut PgConnection,
|
conn: &mut PgConnection,
|
||||||
table_name: &str,
|
table_name: &str,
|
||||||
where_clause: &str,
|
where_clause: &str,
|
||||||
|
@ -241,6 +235,55 @@ fn process_hot_ranks_in_batches(
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Post aggregates is a special case, since it needs to join to the community_aggregates
|
||||||
|
/// table, to get the active monthly user counts.
|
||||||
|
fn process_post_aggregates_ranks_in_batches(conn: &mut PgConnection) {
|
||||||
|
let process_start_time: DateTime<Utc> = Utc
|
||||||
|
.timestamp_opt(0, 0)
|
||||||
|
.single()
|
||||||
|
.expect("0 timestamp creation");
|
||||||
|
|
||||||
|
let update_batch_size = 1000; // Bigger batches than this tend to cause seq scans
|
||||||
|
let mut processed_rows_count = 0;
|
||||||
|
let mut previous_batch_result = Some(process_start_time);
|
||||||
|
while let Some(previous_batch_last_published) = previous_batch_result {
|
||||||
|
let result = sql_query(
|
||||||
|
r#"WITH batch AS (SELECT pa.id
|
||||||
|
FROM post_aggregates pa
|
||||||
|
WHERE pa.published > $1
|
||||||
|
AND (pa.hot_rank != 0 OR pa.hot_rank_active != 0)
|
||||||
|
ORDER BY pa.published
|
||||||
|
LIMIT $2
|
||||||
|
FOR UPDATE SKIP LOCKED)
|
||||||
|
UPDATE post_aggregates pa
|
||||||
|
SET hot_rank = hot_rank(pa.score, pa.published),
|
||||||
|
hot_rank_active = hot_rank(pa.score, pa.newest_comment_time_necro),
|
||||||
|
scaled_rank = scaled_rank(pa.score, pa.published, ca.users_active_month)
|
||||||
|
FROM batch, community_aggregates ca
|
||||||
|
WHERE pa.id = batch.id and pa.community_id = ca.community_id RETURNING pa.published;
|
||||||
|
"#,
|
||||||
|
)
|
||||||
|
.bind::<Timestamptz, _>(previous_batch_last_published)
|
||||||
|
.bind::<Integer, _>(update_batch_size)
|
||||||
|
.get_results::<HotRanksUpdateResult>(conn);
|
||||||
|
|
||||||
|
match result {
|
||||||
|
Ok(updated_rows) => {
|
||||||
|
processed_rows_count += updated_rows.len();
|
||||||
|
previous_batch_result = updated_rows.last().map(|row| row.published);
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
error!("Failed to update {} hot_ranks: {}", "post_aggregates", e);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
info!(
|
||||||
|
"Finished process_hot_ranks_in_batches execution for {} (processed {} rows)",
|
||||||
|
"post_aggregates", processed_rows_count
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
fn delete_expired_captcha_answers(conn: &mut PgConnection) {
|
fn delete_expired_captcha_answers(conn: &mut PgConnection) {
|
||||||
diesel::delete(
|
diesel::delete(
|
||||||
captcha_answer::table.filter(captcha_answer::published.lt(now() - IntervalDsl::minutes(10))),
|
captcha_answer::table.filter(captcha_answer::published.lt(now() - IntervalDsl::minutes(10))),
|
||||||
|
|
Loading…
Reference in a new issue