From 917e408735cd9347096e1d6f1e5a2bcfd3cf745f Mon Sep 17 00:00:00 2001 From: Dessalines Date: Wed, 6 Nov 2024 10:58:40 -0500 Subject: [PATCH] Fix postgres connection options causing slow query speed. (#5150) * Adding a query speed check. * Fixing slow queries due to connection config options. * Remove pointless set_config sql function. * Removing pointless bool. * Removing comment * Removing test.sh changes. * Add analyze to speed up query * Trying to fix DB perf connection try #1 * Try encoding option * Fix woodpecker * Try to use path character. * Fixing lemmy config location. * Removing pointless connection options. * Use OnceLock to create a once-init psql connection. * Fixing comment. * Fix host encoding for dev DB. * Address PR comments. * Revert query mut change. --- .woodpecker.yml | 4 +- crates/db_schema/src/utils.rs | 56 ++++++++++++++++------------ crates/db_views/src/post_view.rs | 64 +++++++++++++++++++++++++++++++- scripts/start_dev_db.sh | 6 ++- 4 files changed, 100 insertions(+), 30 deletions(-) diff --git a/.woodpecker.yml b/.woodpecker.yml index 16cd49375..8930c21fc 100644 --- a/.woodpecker.yml +++ b/.woodpecker.yml @@ -122,7 +122,6 @@ steps: environment: CARGO_HOME: .cargo_home commands: - - export LEMMY_CONFIG_LOCATION=./config/config.hjson - ./scripts/update_config_defaults.sh config/defaults_current.hjson - diff config/defaults.hjson config/defaults_current.hjson when: *slow_check_paths @@ -147,7 +146,6 @@ steps: CARGO_HOME: .cargo_home commands: # same as scripts/db_perf.sh but without creating a new database server - - export LEMMY_CONFIG_LOCATION=config/config.hjson - cargo run --package lemmy_db_perf -- --posts 10 --read-post-pages 1 when: *slow_check_paths @@ -176,8 +174,8 @@ steps: RUST_BACKTRACE: "1" CARGO_HOME: .cargo_home LEMMY_TEST_FAST_FEDERATION: "1" + LEMMY_CONFIG_LOCATION: ../../config/config.hjson commands: - - export LEMMY_CONFIG_LOCATION=../../config/config.hjson - cargo test --workspace --no-fail-fast when: *slow_check_paths diff --git a/crates/db_schema/src/utils.rs b/crates/db_schema/src/utils.rs index 1e56563bc..6c5b792eb 100644 --- a/crates/db_schema/src/utils.rs +++ b/crates/db_schema/src/utils.rs @@ -22,7 +22,6 @@ use diesel_async::{ ManagerConfig, }, AsyncConnection, - RunQueryDsl, }; use futures_util::{future::BoxFuture, Future, FutureExt}; use i_love_jesus::CursorKey; @@ -47,7 +46,7 @@ use rustls::{ }; use std::{ ops::{Deref, DerefMut}, - sync::{Arc, LazyLock}, + sync::{Arc, LazyLock, OnceLock}, time::Duration, }; use tracing::error; @@ -59,6 +58,8 @@ pub const SITEMAP_LIMIT: i64 = 50000; pub const SITEMAP_DAYS: Option = TimeDelta::try_days(31); pub const RANK_DEFAULT: f64 = 0.0001; +/// Some connection options to speed up queries +const CONNECTION_OPTIONS: [&str; 1] = ["geqo_threshold=12"]; pub type ActualDbPool = Pool; /// References a pool or connection. Functions must take `&mut DbPool<'_>` to allow implicit @@ -345,10 +346,37 @@ pub fn diesel_url_create(opt: Option<&str>) -> LemmyResult> { } } +/// Sets a few additional config options necessary for starting lemmy +fn build_config_options_uri_segment(config: &str) -> String { + let mut url = Url::parse(config).expect("Couldn't parse postgres connection URI"); + + // Set `lemmy.protocol_and_hostname` so triggers can use it + let lemmy_protocol_and_hostname_option = + "lemmy.protocol_and_hostname=".to_owned() + &SETTINGS.get_protocol_and_hostname(); + let mut options = CONNECTION_OPTIONS.to_vec(); + options.push(&lemmy_protocol_and_hostname_option); + + // Create the connection uri portion + let options_segments = options + .iter() + .map(|o| "-c ".to_owned() + o) + .collect::>() + .join(" "); + + url.set_query(Some(&format!("options={options_segments}"))); + url.into() +} + fn establish_connection(config: &str) -> BoxFuture> { let fut = async { + /// Use a once_lock to create the postgres connection config, since this config never changes + static POSTGRES_CONFIG_WITH_OPTIONS: OnceLock = OnceLock::new(); + + let config = + POSTGRES_CONFIG_WITH_OPTIONS.get_or_init(|| build_config_options_uri_segment(config)); + // We only support TLS with sslmode=require currently - let mut conn = if config.contains("sslmode=require") { + let conn = if config.contains("sslmode=require") { let rustls_config = DangerousClientConfigBuilder { cfg: ClientConfig::builder(), } @@ -369,24 +397,6 @@ fn establish_connection(config: &str) -> BoxFuture = LazyLock::new(|| { }); pub mod functions { - use diesel::sql_types::{BigInt, Bool, Text, Timestamptz}; + use diesel::sql_types::{BigInt, Text, Timestamptz}; sql_function! { #[sql_name = "r.hot_rank"] @@ -521,8 +531,6 @@ pub mod functions { // really this function is variadic, this just adds the two-argument version sql_function!(fn coalesce(x: diesel::sql_types::Nullable, y: T) -> T); - - sql_function!(fn set_config(setting_name: Text, new_value: Text, is_local: Bool) -> Text); } pub const DELETED_REPLACEMENT_TEXT: &str = "*Permanently Deleted*"; diff --git a/crates/db_views/src/post_view.rs b/crates/db_views/src/post_view.rs index 13520f1cf..dc00b0438 100644 --- a/crates/db_views/src/post_view.rs +++ b/crates/db_views/src/post_view.rs @@ -734,6 +734,7 @@ mod tests { structs::LocalUserView, }; use chrono::Utc; + use diesel_async::SimpleAsyncConnection; use lemmy_db_schema::{ aggregates::structs::PostAggregates, impls::actor_language::UNDETERMINED_ID, @@ -774,7 +775,7 @@ mod tests { site::Site, }, traits::{Bannable, Blockable, Crud, Followable, Joinable, Likeable, Saveable}, - utils::{build_db_pool, build_db_pool_for_tests, DbPool, RANK_DEFAULT}, + utils::{build_db_pool, build_db_pool_for_tests, get_conn, DbPool, RANK_DEFAULT}, CommunityVisibility, PostSortType, SubscribedType, @@ -782,7 +783,10 @@ mod tests { use lemmy_utils::error::LemmyResult; use pretty_assertions::assert_eq; use serial_test::serial; - use std::{collections::HashSet, time::Duration}; + use std::{ + collections::HashSet, + time::{Duration, Instant}, + }; use url::Url; const POST_WITH_ANOTHER_TITLE: &str = "Another title"; @@ -1995,6 +1999,62 @@ mod tests { cleanup(data, pool).await } + #[tokio::test] + #[serial] + async fn speed_check() -> LemmyResult<()> { + let pool = &build_db_pool().await?; + let pool = &mut pool.into(); + let data = init_data(pool).await?; + + // Make sure the post_view query is less than this time + let duration_max = Duration::from_millis(40); + + // Create some dummy posts + let num_posts = 1000; + for x in 1..num_posts { + let name = format!("post_{x}"); + let url = Some(Url::parse(&format!("https://google.com/{name}"))?.into()); + + let post_form = PostInsertForm { + url, + ..PostInsertForm::new( + name, + data.local_user_view.person.id, + data.inserted_community.id, + ) + }; + Post::create(pool, &post_form).await?; + } + + // Manually trigger and wait for a statistics update to ensure consistent and high amount of + // accuracy in the statistics used for query planning + println!("🧮 updating database statistics"); + let conn = &mut get_conn(pool).await?; + conn.batch_execute("ANALYZE;").await?; + + // Time how fast the query took + let now = Instant::now(); + PostQuery { + sort: Some(PostSortType::Active), + local_user: Some(&data.local_user_view.local_user), + ..Default::default() + } + .list(&data.site, pool) + .await?; + + let elapsed = now.elapsed(); + println!("Elapsed: {:.0?}", elapsed); + + assert!( + elapsed.lt(&duration_max), + "Query took {:.0?}, longer than the max of {:.0?}", + elapsed, + duration_max + ); + + cleanup(data, pool).await + } + #[tokio::test] #[serial] async fn post_listings_no_comments_only() -> LemmyResult<()> { diff --git a/scripts/start_dev_db.sh b/scripts/start_dev_db.sh index 5965316ba..1cbe9e16a 100644 --- a/scripts/start_dev_db.sh +++ b/scripts/start_dev_db.sh @@ -2,8 +2,12 @@ export PGDATA="$PWD/dev_pgdata" export PGHOST=$PWD + +# Necessary to encode the dev db path into proper URL params +export ENCODED_HOST=$(printf $PWD | jq -sRr @uri) + export PGUSER=postgres -export DATABASE_URL="postgresql://lemmy:password@/lemmy?host=$PWD" +export DATABASE_URL="postgresql://lemmy:password@$ENCODED_HOST/lemmy" export LEMMY_DATABASE_URL=$DATABASE_URL export PGDATABASE=lemmy