Change 'Scaled' sort to use total interactions instead of monthly users

This commit is contained in:
Andrew Moise 2024-12-18 15:46:24 +00:00
parent a2a5cb091a
commit c5dfc80535
3 changed files with 25 additions and 17 deletions

View file

@ -33,16 +33,16 @@ now() - published) < '7 days' THEN
0.0 0.0
END; END;
CREATE FUNCTION r.scaled_rank (score numeric, published timestamp with time zone, users_active_month numeric) CREATE FUNCTION r.scaled_rank (score numeric, published timestamp with time zone, interactions_month numeric)
RETURNS double precision RETURNS double precision
LANGUAGE sql LANGUAGE sql
IMMUTABLE PARALLEL SAFE IMMUTABLE PARALLEL SAFE
-- Add 2 to avoid divide by zero errors -- Add 2 to avoid divide by zero errors
-- Default for score = 1, active users = 1, and now, is (0.1728 / log(2 + 1)) = 0.3621 -- Default for score = 1, active users = 1, and now, is (0.1728 / log(2 + 1)) = 0.3621
-- There may need to be a scale factor multiplied to users_active_month, to make -- There may need to be a scale factor multiplied to interactions_month, to make
-- the log curve less pronounced. This can be tuned in the future. -- the log curve less pronounced. This can be tuned in the future.
RETURN ( RETURN (
r.hot_rank (score, published) / log(2 + users_active_month) r.hot_rank (score, published) / log(2 + interactions_month)
); );
-- For tables with `deleted` and `removed` columns, this function determines which rows to include in a count. -- For tables with `deleted` and `removed` columns, this function determines which rows to include in a count.

View file

@ -531,7 +531,7 @@ pub mod functions {
define_sql_function! { define_sql_function! {
#[sql_name = "r.scaled_rank"] #[sql_name = "r.scaled_rank"]
fn scaled_rank(score: BigInt, time: Timestamptz, users_active_month: BigInt) -> Double; fn scaled_rank(score: BigInt, time: Timestamptz, interactions_month: BigInt) -> Double;
} }
define_sql_function! { define_sql_function! {

View file

@ -228,19 +228,27 @@ async fn process_post_aggregates_ranks_in_batches(conn: &mut AsyncPgConnection)
while let Some(previous_batch_last_published) = previous_batch_result { while let Some(previous_batch_last_published) = previous_batch_result {
let result = sql_query( let result = sql_query(
r#"WITH batch AS (SELECT pa.post_id r#"WITH batch AS (SELECT pa.post_id
FROM post_aggregates pa FROM post_aggregates pa
WHERE pa.published > $1 WHERE pa.published > $1
AND (pa.hot_rank != 0 OR pa.hot_rank_active != 0) AND (pa.hot_rank != 0 OR pa.hot_rank_active != 0)
ORDER BY pa.published ORDER BY pa.published
LIMIT $2 LIMIT $2
FOR UPDATE SKIP LOCKED) FOR UPDATE SKIP LOCKED),
UPDATE post_aggregates pa community_interactions AS (
SET hot_rank = r.hot_rank(pa.score, pa.published), SELECT community_id,
hot_rank_active = r.hot_rank(pa.score, pa.newest_comment_time_necro), SUM(comments + upvotes + downvotes) as total_interactions
scaled_rank = r.scaled_rank(pa.score, pa.published, ca.users_active_month) FROM post_aggregates
FROM batch, community_aggregates ca WHERE published >= date_trunc('month', CURRENT_TIMESTAMP - interval '1 month')
WHERE pa.post_id = batch.post_id and pa.community_id = ca.community_id RETURNING pa.published; GROUP BY community_id)
"#, UPDATE post_aggregates pa
SET hot_rank = r.hot_rank(pa.score, pa.published),
hot_rank_active = r.hot_rank(pa.score, pa.newest_comment_time_necro),
scaled_rank = r.scaled_rank(pa.score, pa.published, ci.total_interactions)
FROM batch, community_interactions ci
WHERE pa.post_id = batch.post_id
AND pa.community_id = ci.community_id
RETURNING pa.published;
"#,
) )
.bind::<Timestamptz, _>(previous_batch_last_published) .bind::<Timestamptz, _>(previous_batch_last_published)
.bind::<Integer, _>(update_batch_size) .bind::<Integer, _>(update_batch_size)