mirror of
https://github.com/LemmyNet/lemmy.git
synced 2025-02-04 16:14:48 +00:00
Use different counts for denominator in scaled rank for posts (#5261)
* Change 'Scaled' sort to use total interactions instead of monthly users * Misc SQL fixes * Fix formatting of error message * Run rustfmt
This commit is contained in:
parent
8f73c10e01
commit
5240576de8
8 changed files with 66 additions and 23 deletions
|
@ -33,16 +33,16 @@ now() - published) < '7 days' THEN
|
||||||
0.0
|
0.0
|
||||||
END;
|
END;
|
||||||
|
|
||||||
CREATE FUNCTION r.scaled_rank (score numeric, published timestamp with time zone, users_active_month numeric)
|
CREATE FUNCTION r.scaled_rank (score numeric, published timestamp with time zone, interactions_month numeric)
|
||||||
RETURNS double precision
|
RETURNS double precision
|
||||||
LANGUAGE sql
|
LANGUAGE sql
|
||||||
IMMUTABLE PARALLEL SAFE
|
IMMUTABLE PARALLEL SAFE
|
||||||
-- Add 2 to avoid divide by zero errors
|
-- Add 2 to avoid divide by zero errors
|
||||||
-- Default for score = 1, active users = 1, and now, is (0.1728 / log(2 + 1)) = 0.3621
|
-- Default for score = 1, active users = 1, and now, is (0.1728 / log(2 + 1)) = 0.3621
|
||||||
-- There may need to be a scale factor multiplied to users_active_month, to make
|
-- There may need to be a scale factor multiplied to interactions_month, to make
|
||||||
-- the log curve less pronounced. This can be tuned in the future.
|
-- the log curve less pronounced. This can be tuned in the future.
|
||||||
RETURN (
|
RETURN (
|
||||||
r.hot_rank (score, published) / log(2 + users_active_month)
|
r.hot_rank (score, published) / log(2 + interactions_month)
|
||||||
);
|
);
|
||||||
|
|
||||||
-- For tables with `deleted` and `removed` columns, this function determines which rows to include in a count.
|
-- For tables with `deleted` and `removed` columns, this function determines which rows to include in a count.
|
||||||
|
@ -212,6 +212,27 @@ GROUP BY
|
||||||
END;
|
END;
|
||||||
$$;
|
$$;
|
||||||
|
|
||||||
|
-- Community aggregate function for adding up total number of interactions
|
||||||
|
CREATE OR REPLACE FUNCTION r.community_aggregates_interactions (i text)
|
||||||
|
RETURNS TABLE (
|
||||||
|
count_ bigint,
|
||||||
|
community_id_ integer)
|
||||||
|
LANGUAGE plpgsql
|
||||||
|
AS $$
|
||||||
|
BEGIN
|
||||||
|
RETURN query
|
||||||
|
SELECT
|
||||||
|
COALESCE(sum(comments + upvotes + downvotes)::bigint, 0) AS count_,
|
||||||
|
community_id AS community_id_
|
||||||
|
FROM
|
||||||
|
post_aggregates
|
||||||
|
WHERE
|
||||||
|
published >= (CURRENT_TIMESTAMP - i::interval)
|
||||||
|
GROUP BY
|
||||||
|
community_id;
|
||||||
|
END;
|
||||||
|
$$;
|
||||||
|
|
||||||
-- Edit site aggregates to include voters and people who have read posts as active users
|
-- Edit site aggregates to include voters and people who have read posts as active users
|
||||||
CREATE OR REPLACE FUNCTION r.site_aggregates_activity (i text)
|
CREATE OR REPLACE FUNCTION r.site_aggregates_activity (i text)
|
||||||
RETURNS integer
|
RETURNS integer
|
||||||
|
|
|
@ -22,9 +22,10 @@ impl PostAggregates {
|
||||||
|
|
||||||
// Diesel can't update based on a join, which is necessary for the scaled_rank
|
// Diesel can't update based on a join, which is necessary for the scaled_rank
|
||||||
// https://github.com/diesel-rs/diesel/issues/1478
|
// https://github.com/diesel-rs/diesel/issues/1478
|
||||||
// Just select the users_active_month manually for now, since its a single post anyway
|
// Just select the metrics we need manually, for now, since its a single post anyway
|
||||||
let users_active_month = community_aggregates::table
|
|
||||||
.select(community_aggregates::users_active_month)
|
let interactions_month = community_aggregates::table
|
||||||
|
.select(community_aggregates::interactions_month)
|
||||||
.inner_join(post::table.on(community_aggregates::community_id.eq(post::community_id)))
|
.inner_join(post::table.on(community_aggregates::community_id.eq(post::community_id)))
|
||||||
.filter(post::id.eq(post_id))
|
.filter(post::id.eq(post_id))
|
||||||
.first::<i64>(conn)
|
.first::<i64>(conn)
|
||||||
|
@ -40,7 +41,7 @@ impl PostAggregates {
|
||||||
post_aggregates::scaled_rank.eq(scaled_rank(
|
post_aggregates::scaled_rank.eq(scaled_rank(
|
||||||
post_aggregates::score,
|
post_aggregates::score,
|
||||||
post_aggregates::published,
|
post_aggregates::published,
|
||||||
users_active_month,
|
interactions_month,
|
||||||
)),
|
)),
|
||||||
))
|
))
|
||||||
.get_result::<Self>(conn)
|
.get_result::<Self>(conn)
|
||||||
|
|
|
@ -70,6 +70,9 @@ pub struct CommunityAggregates {
|
||||||
pub users_active_month: i64,
|
pub users_active_month: i64,
|
||||||
/// The number of users with any activity in the last year.
|
/// The number of users with any activity in the last year.
|
||||||
pub users_active_half_year: i64,
|
pub users_active_half_year: i64,
|
||||||
|
/// Number of any interactions over the last month.
|
||||||
|
#[serde(skip)]
|
||||||
|
pub interactions_month: i64,
|
||||||
#[serde(skip)]
|
#[serde(skip)]
|
||||||
pub hot_rank: f64,
|
pub hot_rank: f64,
|
||||||
pub subscribers_local: i64,
|
pub subscribers_local: i64,
|
||||||
|
|
|
@ -252,6 +252,7 @@ diesel::table! {
|
||||||
users_active_week -> Int8,
|
users_active_week -> Int8,
|
||||||
users_active_month -> Int8,
|
users_active_month -> Int8,
|
||||||
users_active_half_year -> Int8,
|
users_active_half_year -> Int8,
|
||||||
|
interactions_month -> Int8,
|
||||||
hot_rank -> Float8,
|
hot_rank -> Float8,
|
||||||
subscribers_local -> Int8,
|
subscribers_local -> Int8,
|
||||||
report_count -> Int2,
|
report_count -> Int2,
|
||||||
|
|
|
@ -531,7 +531,7 @@ pub mod functions {
|
||||||
|
|
||||||
define_sql_function! {
|
define_sql_function! {
|
||||||
#[sql_name = "r.scaled_rank"]
|
#[sql_name = "r.scaled_rank"]
|
||||||
fn scaled_rank(score: BigInt, time: Timestamptz, users_active_month: BigInt) -> Double;
|
fn scaled_rank(score: BigInt, time: Timestamptz, interactions_month: BigInt) -> Double;
|
||||||
}
|
}
|
||||||
|
|
||||||
define_sql_function! {
|
define_sql_function! {
|
||||||
|
|
|
@ -265,15 +265,20 @@ async fn process_post_aggregates_ranks_in_batches(conn: &mut AsyncPgConnection)
|
||||||
UPDATE post_aggregates pa
|
UPDATE post_aggregates pa
|
||||||
SET hot_rank = r.hot_rank(pa.score, pa.published),
|
SET hot_rank = r.hot_rank(pa.score, pa.published),
|
||||||
hot_rank_active = r.hot_rank(pa.score, pa.newest_comment_time_necro),
|
hot_rank_active = r.hot_rank(pa.score, pa.newest_comment_time_necro),
|
||||||
scaled_rank = r.scaled_rank(pa.score, pa.published, ca.users_active_month)
|
scaled_rank = r.scaled_rank(pa.score, pa.published, ca.interactions_month)
|
||||||
FROM batch, community_aggregates ca
|
FROM batch, community_aggregates ca
|
||||||
WHERE pa.post_id = batch.post_id and pa.community_id = ca.community_id RETURNING pa.published;
|
WHERE pa.post_id = batch.post_id
|
||||||
"#,
|
AND pa.community_id = ca.community_id
|
||||||
|
RETURNING pa.published;
|
||||||
|
"#,
|
||||||
)
|
)
|
||||||
.bind::<Timestamptz, _>(previous_batch_last_published)
|
.bind::<Timestamptz, _>(previous_batch_last_published)
|
||||||
.bind::<Integer, _>(update_batch_size)
|
.bind::<Integer, _>(update_batch_size)
|
||||||
.get_results::<HotRanksUpdateResult>(conn)
|
.get_results::<HotRanksUpdateResult>(conn)
|
||||||
.await.map_err(|e| LemmyErrorType::Unknown(format!("Failed to update {} hot_ranks: {}", "post_aggregates", e)))?;
|
.await
|
||||||
|
.map_err(|e| {
|
||||||
|
LemmyErrorType::Unknown(format!("Failed to update post_aggregates hot_ranks: {}", e))
|
||||||
|
})?;
|
||||||
|
|
||||||
processed_rows_count += updated_rows.len();
|
processed_rows_count += updated_rows.len();
|
||||||
previous_batch_result = updated_rows.last().map(|row| row.published);
|
previous_batch_result = updated_rows.last().map(|row| row.published);
|
||||||
|
@ -371,7 +376,7 @@ async fn active_counts(pool: &mut DbPool<'_>) -> LemmyResult<()> {
|
||||||
|
|
||||||
for (full_form, abbr) in &intervals {
|
for (full_form, abbr) in &intervals {
|
||||||
let update_site_stmt = format!(
|
let update_site_stmt = format!(
|
||||||
"update site_aggregates set users_active_{} = (select * from r.site_aggregates_activity('{}')) where site_id = 1",
|
"update site_aggregates set users_active_{} = (select r.site_aggregates_activity('{}')) where site_id = 1",
|
||||||
abbr, full_form
|
abbr, full_form
|
||||||
);
|
);
|
||||||
sql_query(update_site_stmt).execute(&mut conn).await?;
|
sql_query(update_site_stmt).execute(&mut conn).await?;
|
||||||
|
@ -380,6 +385,11 @@ async fn active_counts(pool: &mut DbPool<'_>) -> LemmyResult<()> {
|
||||||
sql_query(update_community_stmt).execute(&mut conn).await?;
|
sql_query(update_community_stmt).execute(&mut conn).await?;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let update_interactions_stmt = "update community_aggregates ca set interactions_month = mv.count_ from r.community_aggregates_interactions('1 month') mv where ca.community_id = mv.community_id_";
|
||||||
|
sql_query(update_interactions_stmt)
|
||||||
|
.execute(&mut conn)
|
||||||
|
.await?;
|
||||||
|
|
||||||
info!("Done.");
|
info!("Done.");
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,3 @@
|
||||||
|
ALTER TABLE community_aggregates
|
||||||
|
DROP COLUMN interactions_month;
|
||||||
|
|
|
@ -0,0 +1,4 @@
|
||||||
|
-- Add the interactions_month column
|
||||||
|
ALTER TABLE community_aggregates
|
||||||
|
ADD COLUMN interactions_month bigint NOT NULL DEFAULT 0;
|
||||||
|
|
Loading…
Reference in a new issue