Change 'Scaled' sort to use total interactions instead of monthly users

2024-12-22 19:01:32 +00:00 · 2024-12-18 15:46:24 +00:00 · 2024-12-18 15:46:24 +00:00 · c5dfc80535
commit c5dfc80535
parent a2a5cb091a
3 changed files with 25 additions and 17 deletions
--- a/crates/db_schema/replaceable_schema/utils.sql
+++ b/crates/db_schema/replaceable_schema/utils.sql
@ -33,16 +33,16 @@ now() - published) < '7 days' THEN
        0.0
    END;
-CREATE FUNCTION r.scaled_rank (score numeric, published timestamp with time zone, users_active_month numeric)
+CREATE FUNCTION r.scaled_rank (score numeric, published timestamp with time zone, interactions_month numeric)
    RETURNS double precision
    LANGUAGE sql
    IMMUTABLE PARALLEL SAFE
    -- Add 2 to avoid divide by zero errors
    -- Default for score = 1, active users = 1, and now, is (0.1728 / log(2 + 1)) = 0.3621
-    -- There may need to be a scale factor multiplied to users_active_month, to make
+    -- There may need to be a scale factor multiplied to interactions_month, to make
    -- the log curve less pronounced. This can be tuned in the future.
    RETURN (
-        r.hot_rank (score, published) / log(2 + users_active_month)
+        r.hot_rank (score, published) / log(2 + interactions_month)
 );
 -- For tables with `deleted` and `removed` columns, this function determines which rows to include in a count.
--- a/crates/db_schema/src/utils.rs
+++ b/crates/db_schema/src/utils.rs
@ -531,7 +531,7 @@ pub mod functions {
  define_sql_function! {
    #[sql_name = "r.scaled_rank"]
-    fn scaled_rank(score: BigInt, time: Timestamptz, users_active_month: BigInt) -> Double;
+    fn scaled_rank(score: BigInt, time: Timestamptz, interactions_month: BigInt) -> Double;
  }
  define_sql_function! {
--- a/src/scheduled_tasks.rs
+++ b/src/scheduled_tasks.rs
@ -228,19 +228,27 @@ async fn process_post_aggregates_ranks_in_batches(conn: &mut AsyncPgConnection)
  while let Some(previous_batch_last_published) = previous_batch_result {
    let result = sql_query(
      r#"WITH batch AS (SELECT pa.post_id
-               FROM post_aggregates pa
+           FROM post_aggregates pa
-               WHERE pa.published > $1
+           WHERE pa.published > $1
-               AND (pa.hot_rank != 0 OR pa.hot_rank_active != 0)
+           AND (pa.hot_rank != 0 OR pa.hot_rank_active != 0)
-               ORDER BY pa.published
+           ORDER BY pa.published
-               LIMIT $2
+           LIMIT $2
-               FOR UPDATE SKIP LOCKED)
+          FOR UPDATE SKIP LOCKED),
-         UPDATE post_aggregates pa
+      community_interactions AS (
-           SET hot_rank = r.hot_rank(pa.score, pa.published),
+          SELECT community_id,
-           hot_rank_active = r.hot_rank(pa.score, pa.newest_comment_time_necro),
+                 SUM(comments + upvotes + downvotes) as total_interactions
-           scaled_rank = r.scaled_rank(pa.score, pa.published, ca.users_active_month)
+          FROM post_aggregates
-         FROM batch, community_aggregates ca
+          WHERE published >= date_trunc('month', CURRENT_TIMESTAMP - interval '1 month')
-         WHERE pa.post_id = batch.post_id and pa.community_id = ca.community_id RETURNING pa.published;
+          GROUP BY community_id)
-    "#,
+      UPDATE post_aggregates pa
      SET hot_rank = r.hot_rank(pa.score, pa.published),
          hot_rank_active = r.hot_rank(pa.score, pa.newest_comment_time_necro),
          scaled_rank = r.scaled_rank(pa.score, pa.published, ci.total_interactions)
      FROM batch, community_interactions ci
      WHERE pa.post_id = batch.post_id
      AND pa.community_id = ci.community_id
      RETURNING pa.published;
 "#,
    )
    .bind::<Timestamptz, _>(previous_batch_last_published)
    .bind::<Integer, _>(update_batch_size)