From d8f9e8a64ca1ccbdfb486935c2ca54fdac76786a Mon Sep 17 00:00:00 2001 From: dullbananas Date: Wed, 24 Jan 2024 08:50:11 -0700 Subject: [PATCH] Post view: move cursor pagination to separate library, add backward pagination to PostQuery (#4320) * stuff * stuff * crates.io * Update up.sql * Rerun federation tests * Update post_view.rs * Update post_view.rs * Update up.sql * Update utils.rs * Fix precision loss * Update up.sql * Update down.sql * remove unwrap * Update post_view.rs --------- Co-authored-by: Dessalines --- Cargo.lock | 22 ++ Cargo.toml | 1 + crates/db_schema/Cargo.toml | 2 + crates/db_schema/src/aggregates/structs.rs | 12 +- crates/db_schema/src/utils.rs | 24 +- crates/db_views/Cargo.toml | 2 + crates/db_views/src/post_view.rs | 249 ++++++++---------- .../down.sql | 4 + .../up.sql | 18 ++ 9 files changed, 195 insertions(+), 139 deletions(-) create mode 100644 migrations/2023-12-22-040137_make-mixed-sorting-directions-work-with-tuple-comparison/down.sql create mode 100644 migrations/2023-12-22-040137_make-mixed-sorting-directions-work-with-tuple-comparison/up.sql diff --git a/Cargo.lock b/Cargo.lock index ba0a5cfdc9..7c8f8eb87a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2298,6 +2298,26 @@ dependencies = [ "tokio-native-tls", ] +[[package]] +name = "i-love-jesus" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39fa60e3281e1529cc56d96cca925215f51f9b39a96bc677982fbfdf2663cc84" +dependencies = [ + "diesel", + "i-love-jesus-macros", +] + +[[package]] +name = "i-love-jesus-macros" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8215279f83f9b829403812f845aa2d0dd5966332aa2fd0334a375256f3dd0322" +dependencies = [ + "quote", + "syn 2.0.40", +] + [[package]] name = "iana-time-zone" version = "0.1.58" @@ -2683,6 +2703,7 @@ dependencies = [ "diesel_ltree", "diesel_migrations", "futures-util", + "i-love-jesus", "lemmy_utils", "once_cell", "pretty_assertions", @@ -2713,6 +2734,7 @@ dependencies = [ "diesel", "diesel-async", "diesel_ltree", + "i-love-jesus", "lemmy_db_schema", "lemmy_utils", "pretty_assertions", diff --git a/Cargo.toml b/Cargo.toml index c8b5b93d7b..05403de9c4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -157,6 +157,7 @@ tokio-postgres = "0.7.10" tokio-postgres-rustls = "0.10.0" enum-map = "2.7" moka = { version = "0.12.1", features = ["future"] } +i-love-jesus = { version = "0.1.0" } clap = { version = "4.4.11", features = ["derive"] } pretty_assertions = "1.4.0" diff --git a/crates/db_schema/Cargo.toml b/crates/db_schema/Cargo.toml index d01864a8db..91ff39bfef 100644 --- a/crates/db_schema/Cargo.toml +++ b/crates/db_schema/Cargo.toml @@ -36,6 +36,7 @@ full = [ "tokio-postgres", "tokio-postgres-rustls", "rustls", + "i-love-jesus", ] [dependencies] @@ -76,6 +77,7 @@ tokio-postgres = { workspace = true, optional = true } tokio-postgres-rustls = { workspace = true, optional = true } rustls = { workspace = true, optional = true } uuid = { workspace = true, features = ["v4"] } +i-love-jesus = { workspace = true, optional = true } anyhow = { workspace = true } [dev-dependencies] diff --git a/crates/db_schema/src/aggregates/structs.rs b/crates/db_schema/src/aggregates/structs.rs index 7ca9429f65..45a43adf8f 100644 --- a/crates/db_schema/src/aggregates/structs.rs +++ b/crates/db_schema/src/aggregates/structs.rs @@ -9,6 +9,8 @@ use crate::schema::{ site_aggregates, }; use chrono::{DateTime, Utc}; +#[cfg(feature = "full")] +use i_love_jesus::CursorKeysModule; use serde::{Deserialize, Serialize}; #[cfg(feature = "full")] use ts_rs::TS; @@ -93,13 +95,21 @@ pub struct PersonAggregates { #[derive(PartialEq, Debug, Serialize, Deserialize, Clone)] #[cfg_attr( feature = "full", - derive(Queryable, Selectable, Associations, Identifiable, TS) + derive( + Queryable, + Selectable, + Associations, + Identifiable, + TS, + CursorKeysModule + ) )] #[cfg_attr(feature = "full", diesel(table_name = post_aggregates))] #[cfg_attr(feature = "full", diesel(belongs_to(crate::source::post::Post)))] #[cfg_attr(feature = "full", diesel(primary_key(post_id)))] #[cfg_attr(feature = "full", diesel(check_for_backend(diesel::pg::Pg)))] #[cfg_attr(feature = "full", ts(export))] +#[cfg_attr(feature = "full", cursor_keys_module(name = post_aggregates_keys))] /// Aggregate data for a post. pub struct PostAggregates { pub post_id: PostId, diff --git a/crates/db_schema/src/utils.rs b/crates/db_schema/src/utils.rs index e7300201f8..945a006479 100644 --- a/crates/db_schema/src/utils.rs +++ b/crates/db_schema/src/utils.rs @@ -18,7 +18,7 @@ use diesel::{ query_dsl::methods::LimitDsl, result::{ConnectionError, ConnectionResult, Error as DieselError, Error::QueryBuilderError}, serialize::{Output, ToSql}, - sql_types::{Text, Timestamptz}, + sql_types::{self, Text, Timestamptz}, IntoSql, PgConnection, }; @@ -32,6 +32,7 @@ use diesel_async::{ }; use diesel_migrations::EmbeddedMigrations; use futures_util::{future::BoxFuture, Future, FutureExt}; +use i_love_jesus::CursorKey; use lemmy_utils::{ error::{LemmyError, LemmyErrorExt, LemmyErrorType}, settings::SETTINGS, @@ -153,6 +154,25 @@ macro_rules! try_join_with_pool { }}; } +pub struct ReverseTimestampKey(pub K); + +impl CursorKey for ReverseTimestampKey +where + K: CursorKey, +{ + type SqlType = sql_types::BigInt; + type CursorValue = functions::reverse_timestamp_sort::HelperType; + type SqlValue = functions::reverse_timestamp_sort::HelperType; + + fn get_cursor_value(cursor: &C) -> Self::CursorValue { + functions::reverse_timestamp_sort(K::get_cursor_value(cursor)) + } + + fn get_sql_value() -> Self::SqlValue { + functions::reverse_timestamp_sort(K::get_sql_value()) + } +} + /// Includes an SQL comment before `T`, which can be used to label auto_explain output #[derive(QueryId)] pub struct Commented { @@ -424,6 +444,8 @@ pub mod functions { fn controversy_rank(upvotes: BigInt, downvotes: BigInt, score: BigInt) -> Double; } + sql_function!(fn reverse_timestamp_sort(time: Timestamptz) -> BigInt); + sql_function!(fn lower(x: Text) -> Text); // really this function is variadic, this just adds the two-argument version diff --git a/crates/db_views/Cargo.toml b/crates/db_views/Cargo.toml index e5bebed36d..3f0ba5afff 100644 --- a/crates/db_views/Cargo.toml +++ b/crates/db_views/Cargo.toml @@ -23,6 +23,7 @@ full = [ "tracing", "ts-rs", "actix-web", + "i-love-jesus", "lemmy_db_schema/full", ] @@ -37,6 +38,7 @@ serde_with = { workspace = true } tracing = { workspace = true, optional = true } ts-rs = { workspace = true, optional = true } actix-web = { workspace = true, optional = true } +i-love-jesus = { workspace = true, optional = true } [dev-dependencies] serial_test = { workspace = true } diff --git a/crates/db_views/src/post_view.rs b/crates/db_views/src/post_view.rs index 4848b5c4e6..f91e768cbe 100644 --- a/crates/db_views/src/post_view.rs +++ b/crates/db_views/src/post_view.rs @@ -3,6 +3,7 @@ use diesel::{ debug_query, dsl::{exists, not, IntervalDsl}, pg::Pg, + query_builder::AsQuery, result::Error, sql_types, BoolExpressionMethods, @@ -16,8 +17,9 @@ use diesel::{ QueryDsl, }; use diesel_async::RunQueryDsl; +use i_love_jesus::PaginatedQueryBuilder; use lemmy_db_schema::{ - aggregates::structs::PostAggregates, + aggregates::structs::{post_aggregates_keys as key, PostAggregates}, newtypes::{CommunityId, LocalUserId, PersonId, PostId}, schema::{ community, @@ -49,40 +51,13 @@ use lemmy_db_schema::{ ListFn, Queries, ReadFn, + ReverseTimestampKey, }, ListingType, SortType, }; use tracing::debug; -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -enum Ord { - Desc, - Asc, -} - -struct PaginationCursorField { - then_order_by_desc: fn(Q) -> Q, - then_order_by_asc: fn(Q) -> Q, - le: fn(&PostAggregates) -> Box>, - ge: fn(&PostAggregates) -> Box>, - ne: fn(&PostAggregates) -> Box>, -} - -/// Returns `PaginationCursorField<_, _>` for the given name -macro_rules! field { - ($name:ident) => { - // Type inference doesn't work if normal method call syntax is used - PaginationCursorField { - then_order_by_desc: |query| QueryDsl::then_order_by(query, post_aggregates::$name.desc()), - then_order_by_asc: |query| QueryDsl::then_order_by(query, post_aggregates::$name.asc()), - le: |e| Box::new(post_aggregates::$name.le(e.$name)), - ge: |e| Box::new(post_aggregates::$name.ge(e.$name)), - ne: |e| Box::new(post_aggregates::$name.ne(e.$name)), - } - }; -} - fn queries<'a>() -> Queries< impl ReadFn<'a, PostView, (PostId, Option, bool)>, impl ListFn<'a, PostView, PostQuery<'a>>, @@ -462,110 +437,71 @@ fn queries<'a>() -> Queries< query = query.filter(not(is_creator_blocked(person_id))); } - let featured_field = if options.community_id.is_none() || options.community_id_just_for_prefetch - { - field!(featured_local) - } else { - field!(featured_community) - }; - - let (main_sort, top_sort_interval) = match options.sort.unwrap_or(SortType::Hot) { - SortType::Active => ((Ord::Desc, field!(hot_rank_active)), None), - SortType::Hot => ((Ord::Desc, field!(hot_rank)), None), - SortType::Scaled => ((Ord::Desc, field!(scaled_rank)), None), - SortType::Controversial => ((Ord::Desc, field!(controversy_rank)), None), - SortType::New => ((Ord::Desc, field!(published)), None), - SortType::Old => ((Ord::Asc, field!(published)), None), - SortType::NewComments => ((Ord::Desc, field!(newest_comment_time)), None), - SortType::MostComments => ((Ord::Desc, field!(comments)), None), - SortType::TopAll => ((Ord::Desc, field!(score)), None), - SortType::TopYear => ((Ord::Desc, field!(score)), Some(1.years())), - SortType::TopMonth => ((Ord::Desc, field!(score)), Some(1.months())), - SortType::TopWeek => ((Ord::Desc, field!(score)), Some(1.weeks())), - SortType::TopDay => ((Ord::Desc, field!(score)), Some(1.days())), - SortType::TopHour => ((Ord::Desc, field!(score)), Some(1.hours())), - SortType::TopSixHour => ((Ord::Desc, field!(score)), Some(6.hours())), - SortType::TopTwelveHour => ((Ord::Desc, field!(score)), Some(12.hours())), - SortType::TopThreeMonths => ((Ord::Desc, field!(score)), Some(3.months())), - SortType::TopSixMonths => ((Ord::Desc, field!(score)), Some(6.months())), - SortType::TopNineMonths => ((Ord::Desc, field!(score)), Some(9.months())), - }; - - if let Some(interval) = top_sort_interval { - query = query.filter(post_aggregates::published.gt(now() - interval)); - } - - let sorts = [ - // featured posts first - Some((Ord::Desc, featured_field)), - // then use the main sort - Some(main_sort), - // hot rank reaches zero after some days, use publish as fallback. necessary because old - // posts can be fetched over federation and inserted with high post id - Some((Ord::Desc, field!(published))), - // finally use unique post id as tie breaker - Some((Ord::Desc, field!(post_id))), - ]; - let sorts_iter = sorts.iter().flatten(); - - // This loop does almost the same thing as sorting by and comparing tuples. If the rows were - // only sorted by 1 field called `foo` in descending order, then it would be like this: - // - // ``` - // query = query.then_order_by(foo.desc()); - // if let Some(first) = &options.page_after { - // query = query.filter(foo.le(first.foo)); - // } - // if let Some(last) = &page_before_or_equal { - // query = query.filter(foo.ge(last.foo)); - // } - // ``` - // - // If multiple rows have the same value for a sorted field, then they are - // grouped together, and the rows in that group are sorted by the next fields. - // When checking if a row is within the range determined by the cursors, a field - // that's sorted after other fields is only compared if the row and the cursor - // are in the same group created by the previous sort, which is checked by using - // `or` to skip the comparison if any previously sorted field is not equal. - for (i, (order, field)) in sorts_iter.clone().enumerate() { - // Both cursors are treated as inclusive here. `page_after` is made exclusive - // by adding `1` to the offset. - let (then_order_by_field, compare_first, compare_last) = match order { - Ord::Desc => (field.then_order_by_desc, field.le, field.ge), - Ord::Asc => (field.then_order_by_asc, field.ge, field.le), - }; - - query = then_order_by_field(query); - - for (cursor_data, compare) in [ - (&options.page_after, compare_first), - (&options.page_before_or_equal, compare_last), - ] { - let Some(cursor_data) = cursor_data else { - continue; - }; - let mut condition: Box> = - Box::new(compare(&cursor_data.0)); - - // For each field that was sorted before the current one, skip the filter by changing - // `condition` to `true` if the row's value doesn't equal the cursor's value. - for (_, other_field) in sorts_iter.clone().take(i) { - condition = Box::new(condition.or((other_field.ne)(&cursor_data.0))); - } - - query = query.filter(condition); - } - } - - let (limit, mut offset) = limit_and_offset(options.page, options.limit)?; - if options.page_after.is_some() { - // always skip exactly one post because that's the last post of the previous page - // fixing the where clause is more difficult because we'd have to change only the last order-by-where clause - // e.g. WHERE (featured_local<=, hot_rank<=, published<=) to WHERE (<=, <=, <) - offset = 1; - } + let (limit, offset) = limit_and_offset(options.page, options.limit)?; query = query.limit(limit).offset(offset); + let mut query = PaginatedQueryBuilder::new(query); + + let page_after = options.page_after.map(|c| c.0); + let page_before_or_equal = options.page_before_or_equal.map(|c| c.0); + + if options.page_back { + query = query + .before(page_after) + .after_or_equal(page_before_or_equal) + .limit_and_offset_from_end(); + } else { + query = query + .after(page_after) + .before_or_equal(page_before_or_equal); + } + + // featured posts first + query = if options.community_id.is_none() || options.community_id_just_for_prefetch { + query.then_desc(key::featured_local) + } else { + query.then_desc(key::featured_community) + }; + + let time = |interval| post_aggregates::published.gt(now() - interval); + + // then use the main sort + query = match options.sort.unwrap_or(SortType::Hot) { + SortType::Active => query.then_desc(key::hot_rank_active), + SortType::Hot => query.then_desc(key::hot_rank), + SortType::Scaled => query.then_desc(key::scaled_rank), + SortType::Controversial => query.then_desc(key::controversy_rank), + SortType::New => query.then_desc(key::published), + SortType::Old => query.then_desc(ReverseTimestampKey(key::published)), + SortType::NewComments => query.then_desc(key::newest_comment_time), + SortType::MostComments => query.then_desc(key::comments), + SortType::TopAll => query.then_desc(key::score), + SortType::TopYear => query.then_desc(key::score).filter(time(1.years())), + SortType::TopMonth => query.then_desc(key::score).filter(time(1.months())), + SortType::TopWeek => query.then_desc(key::score).filter(time(1.weeks())), + SortType::TopDay => query.then_desc(key::score).filter(time(1.days())), + SortType::TopHour => query.then_desc(key::score).filter(time(1.hours())), + SortType::TopSixHour => query.then_desc(key::score).filter(time(6.hours())), + SortType::TopTwelveHour => query.then_desc(key::score).filter(time(12.hours())), + SortType::TopThreeMonths => query.then_desc(key::score).filter(time(3.months())), + SortType::TopSixMonths => query.then_desc(key::score).filter(time(6.months())), + SortType::TopNineMonths => query.then_desc(key::score).filter(time(9.months())), + }; + + // use publish as fallback. especially useful for hot rank which reaches zero after some days. + // necessary because old posts can be fetched over federation and inserted with high post id + query = match options.sort.unwrap_or(SortType::Hot) { + // A second time-based sort would not be very useful + SortType::New | SortType::Old | SortType::NewComments => query, + _ => query.then_desc(key::published), + }; + + // finally use unique post id as tie breaker + query = query.then_desc(key::post_id); + + // Not done by debug_query + let query = query.as_query(); + debug!("Post View Query: {:?}", debug_query::(&query)); Commented::new(query) @@ -642,6 +578,7 @@ pub struct PostQuery<'a> { pub limit: Option, pub page_after: Option, pub page_before_or_equal: Option, + pub page_back: bool, } impl<'a> PostQuery<'a> { @@ -709,9 +646,15 @@ impl<'a> PostQuery<'a> { if (v.len() as i64) < limit { Ok(Some(self.clone())) } else { - let page_before_or_equal = Some(PaginationCursorData(v.pop().expect("else case").counts)); + let item = if self.page_back { + // for backward pagination, get first element instead + v.into_iter().next() + } else { + v.pop() + }; + let limit_cursor = Some(PaginationCursorData(item.expect("else case").counts)); Ok(Some(PostQuery { - page_before_or_equal, + page_before_or_equal: limit_cursor, ..self.clone() })) } @@ -1388,15 +1331,19 @@ mod tests { } } + let options = PostQuery { + community_id: Some(inserted_community.id), + sort: Some(SortType::MostComments), + limit: Some(10), + ..Default::default() + }; + let mut listed_post_ids = vec![]; let mut page_after = None; loop { let post_listings = PostQuery { - community_id: Some(inserted_community.id), - sort: Some(SortType::MostComments), - limit: Some(10), page_after, - ..Default::default() + ..options.clone() } .list(pool) .await?; @@ -1410,6 +1357,34 @@ mod tests { } } + // Check that backward pagination matches forward pagination + let mut listed_post_ids_forward = listed_post_ids.clone(); + let mut page_before = None; + loop { + let post_listings = PostQuery { + page_after: page_before, + page_back: true, + ..options.clone() + } + .list(pool) + .await?; + + let listed_post_ids = post_listings.iter().map(|p| p.post.id).collect::>(); + + let index = listed_post_ids_forward.len() - listed_post_ids.len(); + assert_eq!( + listed_post_ids_forward.get(index..), + listed_post_ids.get(..) + ); + listed_post_ids_forward.truncate(index); + + if let Some(p) = post_listings.into_iter().next() { + page_before = Some(PaginationCursorData(p.counts)); + } else { + break; + } + } + inserted_post_ids.sort_unstable_by_key(|id| id.0); listed_post_ids.sort_unstable_by_key(|id| id.0); diff --git a/migrations/2023-12-22-040137_make-mixed-sorting-directions-work-with-tuple-comparison/down.sql b/migrations/2023-12-22-040137_make-mixed-sorting-directions-work-with-tuple-comparison/down.sql new file mode 100644 index 0000000000..0b84b2ba1a --- /dev/null +++ b/migrations/2023-12-22-040137_make-mixed-sorting-directions-work-with-tuple-comparison/down.sql @@ -0,0 +1,4 @@ +DROP INDEX idx_post_aggregates_community_published_asc, idx_post_aggregates_featured_community_published_asc, idx_post_aggregates_featured_local_published_asc, idx_post_aggregates_published_asc; + +DROP FUNCTION reverse_timestamp_sort (t timestamp with time zone); + diff --git a/migrations/2023-12-22-040137_make-mixed-sorting-directions-work-with-tuple-comparison/up.sql b/migrations/2023-12-22-040137_make-mixed-sorting-directions-work-with-tuple-comparison/up.sql new file mode 100644 index 0000000000..450a6b5827 --- /dev/null +++ b/migrations/2023-12-22-040137_make-mixed-sorting-directions-work-with-tuple-comparison/up.sql @@ -0,0 +1,18 @@ +CREATE FUNCTION reverse_timestamp_sort (t timestamp with time zone) + RETURNS bigint + AS $$ +BEGIN + RETURN (-1000000 * EXTRACT(EPOCH FROM t))::bigint; +END; +$$ +LANGUAGE plpgsql +IMMUTABLE PARALLEL SAFE; + +CREATE INDEX idx_post_aggregates_community_published_asc ON public.post_aggregates USING btree (community_id, featured_local DESC, reverse_timestamp_sort (published) DESC); + +CREATE INDEX idx_post_aggregates_featured_community_published_asc ON public.post_aggregates USING btree (community_id, featured_community DESC, reverse_timestamp_sort (published) DESC); + +CREATE INDEX idx_post_aggregates_featured_local_published_asc ON public.post_aggregates USING btree (featured_local DESC, reverse_timestamp_sort (published) DESC); + +CREATE INDEX idx_post_aggregates_published_asc ON public.post_aggregates USING btree (reverse_timestamp_sort (published) DESC); +