diff --git a/crates/db_schema/replaceable_schema/utils.sql b/crates/db_schema/replaceable_schema/utils.sql index 2573aeac1..8c3c96ce1 100644 --- a/crates/db_schema/replaceable_schema/utils.sql +++ b/crates/db_schema/replaceable_schema/utils.sql @@ -33,16 +33,16 @@ now() - published) < '7 days' THEN 0.0 END; -CREATE FUNCTION r.scaled_rank (score numeric, published timestamp with time zone, users_active_month numeric) +CREATE FUNCTION r.scaled_rank (score numeric, published timestamp with time zone, interactions_month numeric) RETURNS double precision LANGUAGE sql IMMUTABLE PARALLEL SAFE -- Add 2 to avoid divide by zero errors -- Default for score = 1, active users = 1, and now, is (0.1728 / log(2 + 1)) = 0.3621 - -- There may need to be a scale factor multiplied to users_active_month, to make + -- There may need to be a scale factor multiplied to interactions_month, to make -- the log curve less pronounced. This can be tuned in the future. RETURN ( - r.hot_rank (score, published) / log(2 + users_active_month) + r.hot_rank (score, published) / log(2 + interactions_month) ); -- For tables with `deleted` and `removed` columns, this function determines which rows to include in a count. @@ -212,6 +212,27 @@ GROUP BY END; $$; +-- Community aggregate function for adding up total number of interactions +CREATE OR REPLACE FUNCTION r.community_aggregates_interactions (i text) + RETURNS TABLE ( + count_ bigint, + community_id_ integer) + LANGUAGE plpgsql + AS $$ +BEGIN + RETURN query + SELECT + COALESCE(sum(comments + upvotes + downvotes)::bigint, 0) AS count_, + community_id AS community_id_ + FROM + post_aggregates + WHERE + published >= (CURRENT_TIMESTAMP - i::interval) + GROUP BY + community_id; +END; +$$; + -- Edit site aggregates to include voters and people who have read posts as active users CREATE OR REPLACE FUNCTION r.site_aggregates_activity (i text) RETURNS integer diff --git a/crates/db_schema/src/aggregates/post_aggregates.rs b/crates/db_schema/src/aggregates/post_aggregates.rs index c11ea6e05..af59fe761 100644 --- a/crates/db_schema/src/aggregates/post_aggregates.rs +++ b/crates/db_schema/src/aggregates/post_aggregates.rs @@ -22,9 +22,10 @@ impl PostAggregates { // Diesel can't update based on a join, which is necessary for the scaled_rank // https://github.com/diesel-rs/diesel/issues/1478 - // Just select the users_active_month manually for now, since its a single post anyway - let users_active_month = community_aggregates::table - .select(community_aggregates::users_active_month) + // Just select the metrics we need manually, for now, since its a single post anyway + + let interactions_month = community_aggregates::table + .select(community_aggregates::interactions_month) .inner_join(post::table.on(community_aggregates::community_id.eq(post::community_id))) .filter(post::id.eq(post_id)) .first::(conn) @@ -40,7 +41,7 @@ impl PostAggregates { post_aggregates::scaled_rank.eq(scaled_rank( post_aggregates::score, post_aggregates::published, - users_active_month, + interactions_month, )), )) .get_result::(conn) diff --git a/crates/db_schema/src/aggregates/structs.rs b/crates/db_schema/src/aggregates/structs.rs index a254b0a63..86176c14c 100644 --- a/crates/db_schema/src/aggregates/structs.rs +++ b/crates/db_schema/src/aggregates/structs.rs @@ -70,6 +70,9 @@ pub struct CommunityAggregates { pub users_active_month: i64, /// The number of users with any activity in the last year. pub users_active_half_year: i64, + /// Number of any interactions over the last month. + #[serde(skip)] + pub interactions_month: i64, #[serde(skip)] pub hot_rank: f64, pub subscribers_local: i64, diff --git a/crates/db_schema/src/schema.rs b/crates/db_schema/src/schema.rs index 84a14bf22..dea61bf4d 100644 --- a/crates/db_schema/src/schema.rs +++ b/crates/db_schema/src/schema.rs @@ -252,6 +252,7 @@ diesel::table! { users_active_week -> Int8, users_active_month -> Int8, users_active_half_year -> Int8, + interactions_month -> Int8, hot_rank -> Float8, subscribers_local -> Int8, report_count -> Int2, diff --git a/crates/db_schema/src/utils.rs b/crates/db_schema/src/utils.rs index 870c02fdd..312f219e1 100644 --- a/crates/db_schema/src/utils.rs +++ b/crates/db_schema/src/utils.rs @@ -531,7 +531,7 @@ pub mod functions { define_sql_function! { #[sql_name = "r.scaled_rank"] - fn scaled_rank(score: BigInt, time: Timestamptz, users_active_month: BigInt) -> Double; + fn scaled_rank(score: BigInt, time: Timestamptz, interactions_month: BigInt) -> Double; } define_sql_function! { diff --git a/crates/routes/src/utils/scheduled_tasks.rs b/crates/routes/src/utils/scheduled_tasks.rs index 4e7cd1c62..83463c344 100644 --- a/crates/routes/src/utils/scheduled_tasks.rs +++ b/crates/routes/src/utils/scheduled_tasks.rs @@ -256,24 +256,29 @@ async fn process_post_aggregates_ranks_in_batches(conn: &mut AsyncPgConnection) while let Some(previous_batch_last_published) = previous_batch_result { let updated_rows = sql_query( r#"WITH batch AS (SELECT pa.post_id - FROM post_aggregates pa - WHERE pa.published > $1 - AND (pa.hot_rank != 0 OR pa.hot_rank_active != 0) - ORDER BY pa.published - LIMIT $2 - FOR UPDATE SKIP LOCKED) - UPDATE post_aggregates pa - SET hot_rank = r.hot_rank(pa.score, pa.published), - hot_rank_active = r.hot_rank(pa.score, pa.newest_comment_time_necro), - scaled_rank = r.scaled_rank(pa.score, pa.published, ca.users_active_month) - FROM batch, community_aggregates ca - WHERE pa.post_id = batch.post_id and pa.community_id = ca.community_id RETURNING pa.published; - "#, + FROM post_aggregates pa + WHERE pa.published > $1 + AND (pa.hot_rank != 0 OR pa.hot_rank_active != 0) + ORDER BY pa.published + LIMIT $2 + FOR UPDATE SKIP LOCKED) + UPDATE post_aggregates pa + SET hot_rank = r.hot_rank(pa.score, pa.published), + hot_rank_active = r.hot_rank(pa.score, pa.newest_comment_time_necro), + scaled_rank = r.scaled_rank(pa.score, pa.published, ca.interactions_month) + FROM batch, community_aggregates ca + WHERE pa.post_id = batch.post_id + AND pa.community_id = ca.community_id + RETURNING pa.published; +"#, ) .bind::(previous_batch_last_published) .bind::(update_batch_size) .get_results::(conn) - .await.map_err(|e| LemmyErrorType::Unknown(format!("Failed to update {} hot_ranks: {}", "post_aggregates", e)))?; + .await + .map_err(|e| { + LemmyErrorType::Unknown(format!("Failed to update post_aggregates hot_ranks: {}", e)) + })?; processed_rows_count += updated_rows.len(); previous_batch_result = updated_rows.last().map(|row| row.published); @@ -371,7 +376,7 @@ async fn active_counts(pool: &mut DbPool<'_>) -> LemmyResult<()> { for (full_form, abbr) in &intervals { let update_site_stmt = format!( - "update site_aggregates set users_active_{} = (select * from r.site_aggregates_activity('{}')) where site_id = 1", + "update site_aggregates set users_active_{} = (select r.site_aggregates_activity('{}')) where site_id = 1", abbr, full_form ); sql_query(update_site_stmt).execute(&mut conn).await?; @@ -380,6 +385,11 @@ async fn active_counts(pool: &mut DbPool<'_>) -> LemmyResult<()> { sql_query(update_community_stmt).execute(&mut conn).await?; } + let update_interactions_stmt = "update community_aggregates ca set interactions_month = mv.count_ from r.community_aggregates_interactions('1 month') mv where ca.community_id = mv.community_id_"; + sql_query(update_interactions_stmt) + .execute(&mut conn) + .await?; + info!("Done."); Ok(()) } diff --git a/migrations/2025-01-21-000000_interactions_per_month_schema/down.sql b/migrations/2025-01-21-000000_interactions_per_month_schema/down.sql new file mode 100644 index 000000000..22f2af14e --- /dev/null +++ b/migrations/2025-01-21-000000_interactions_per_month_schema/down.sql @@ -0,0 +1,3 @@ +ALTER TABLE community_aggregates + DROP COLUMN interactions_month; + diff --git a/migrations/2025-01-21-000000_interactions_per_month_schema/up.sql b/migrations/2025-01-21-000000_interactions_per_month_schema/up.sql new file mode 100644 index 000000000..2ad5bf688 --- /dev/null +++ b/migrations/2025-01-21-000000_interactions_per_month_schema/up.sql @@ -0,0 +1,4 @@ +-- Add the interactions_month column +ALTER TABLE community_aggregates + ADD COLUMN interactions_month bigint NOT NULL DEFAULT 0; +