Fix postgres connection options causing slow query speed. (#5150)

* Adding a query speed check.

* Fixing slow queries due to connection config options.

* Remove pointless set_config sql function.

* Removing pointless bool.

* Removing comment

* Removing test.sh changes.

* Add analyze to speed up query

* Trying to fix DB perf connection try #1

* Try encoding option

* Fix woodpecker

* Try to use path character.

* Fixing lemmy config location.

* Removing pointless connection options.

* Use OnceLock to create a once-init psql connection.

* Fixing comment.

* Fix host encoding for dev DB.

* Address PR comments.

* Revert query mut change.
This commit is contained in:
Dessalines 2024-11-06 10:58:40 -05:00 committed by GitHub
parent a55e7fd9fe
commit 917e408735
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 100 additions and 30 deletions

View file

@ -122,7 +122,6 @@ steps:
environment:
CARGO_HOME: .cargo_home
commands:
- export LEMMY_CONFIG_LOCATION=./config/config.hjson
- ./scripts/update_config_defaults.sh config/defaults_current.hjson
- diff config/defaults.hjson config/defaults_current.hjson
when: *slow_check_paths
@ -147,7 +146,6 @@ steps:
CARGO_HOME: .cargo_home
commands:
# same as scripts/db_perf.sh but without creating a new database server
- export LEMMY_CONFIG_LOCATION=config/config.hjson
- cargo run --package lemmy_db_perf -- --posts 10 --read-post-pages 1
when: *slow_check_paths
@ -176,8 +174,8 @@ steps:
RUST_BACKTRACE: "1"
CARGO_HOME: .cargo_home
LEMMY_TEST_FAST_FEDERATION: "1"
LEMMY_CONFIG_LOCATION: ../../config/config.hjson
commands:
- export LEMMY_CONFIG_LOCATION=../../config/config.hjson
- cargo test --workspace --no-fail-fast
when: *slow_check_paths

View file

@ -22,7 +22,6 @@ use diesel_async::{
ManagerConfig,
},
AsyncConnection,
RunQueryDsl,
};
use futures_util::{future::BoxFuture, Future, FutureExt};
use i_love_jesus::CursorKey;
@ -47,7 +46,7 @@ use rustls::{
};
use std::{
ops::{Deref, DerefMut},
sync::{Arc, LazyLock},
sync::{Arc, LazyLock, OnceLock},
time::Duration,
};
use tracing::error;
@ -59,6 +58,8 @@ pub const SITEMAP_LIMIT: i64 = 50000;
pub const SITEMAP_DAYS: Option<TimeDelta> = TimeDelta::try_days(31);
pub const RANK_DEFAULT: f64 = 0.0001;
/// Some connection options to speed up queries
const CONNECTION_OPTIONS: [&str; 1] = ["geqo_threshold=12"];
pub type ActualDbPool = Pool<AsyncPgConnection>;
/// References a pool or connection. Functions must take `&mut DbPool<'_>` to allow implicit
@ -345,10 +346,37 @@ pub fn diesel_url_create(opt: Option<&str>) -> LemmyResult<Option<DbUrl>> {
}
}
/// Sets a few additional config options necessary for starting lemmy
fn build_config_options_uri_segment(config: &str) -> String {
let mut url = Url::parse(config).expect("Couldn't parse postgres connection URI");
// Set `lemmy.protocol_and_hostname` so triggers can use it
let lemmy_protocol_and_hostname_option =
"lemmy.protocol_and_hostname=".to_owned() + &SETTINGS.get_protocol_and_hostname();
let mut options = CONNECTION_OPTIONS.to_vec();
options.push(&lemmy_protocol_and_hostname_option);
// Create the connection uri portion
let options_segments = options
.iter()
.map(|o| "-c ".to_owned() + o)
.collect::<Vec<String>>()
.join(" ");
url.set_query(Some(&format!("options={options_segments}")));
url.into()
}
fn establish_connection(config: &str) -> BoxFuture<ConnectionResult<AsyncPgConnection>> {
let fut = async {
/// Use a once_lock to create the postgres connection config, since this config never changes
static POSTGRES_CONFIG_WITH_OPTIONS: OnceLock<String> = OnceLock::new();
let config =
POSTGRES_CONFIG_WITH_OPTIONS.get_or_init(|| build_config_options_uri_segment(config));
// We only support TLS with sslmode=require currently
let mut conn = if config.contains("sslmode=require") {
let conn = if config.contains("sslmode=require") {
let rustls_config = DangerousClientConfigBuilder {
cfg: ClientConfig::builder(),
}
@ -369,24 +397,6 @@ fn establish_connection(config: &str) -> BoxFuture<ConnectionResult<AsyncPgConne
AsyncPgConnection::establish(config).await?
};
diesel::select((
// Change geqo_threshold back to default value if it was changed, so it's higher than the
// collapse limits
functions::set_config("geqo_threshold", "12", false),
// Change collapse limits from 8 to 11 so the query planner can find a better table join
// order for more complicated queries
functions::set_config("from_collapse_limit", "11", false),
functions::set_config("join_collapse_limit", "11", false),
// Set `lemmy.protocol_and_hostname` so triggers can use it
functions::set_config(
"lemmy.protocol_and_hostname",
SETTINGS.get_protocol_and_hostname(),
false,
),
))
.execute(&mut conn)
.await
.map_err(ConnectionError::CouldntSetupConfiguration)?;
Ok(conn)
};
fut.boxed()
@ -498,7 +508,7 @@ static EMAIL_REGEX: LazyLock<Regex> = LazyLock::new(|| {
});
pub mod functions {
use diesel::sql_types::{BigInt, Bool, Text, Timestamptz};
use diesel::sql_types::{BigInt, Text, Timestamptz};
sql_function! {
#[sql_name = "r.hot_rank"]
@ -521,8 +531,6 @@ pub mod functions {
// really this function is variadic, this just adds the two-argument version
sql_function!(fn coalesce<T: diesel::sql_types::SqlType + diesel::sql_types::SingleValue>(x: diesel::sql_types::Nullable<T>, y: T) -> T);
sql_function!(fn set_config(setting_name: Text, new_value: Text, is_local: Bool) -> Text);
}
pub const DELETED_REPLACEMENT_TEXT: &str = "*Permanently Deleted*";

View file

@ -734,6 +734,7 @@ mod tests {
structs::LocalUserView,
};
use chrono::Utc;
use diesel_async::SimpleAsyncConnection;
use lemmy_db_schema::{
aggregates::structs::PostAggregates,
impls::actor_language::UNDETERMINED_ID,
@ -774,7 +775,7 @@ mod tests {
site::Site,
},
traits::{Bannable, Blockable, Crud, Followable, Joinable, Likeable, Saveable},
utils::{build_db_pool, build_db_pool_for_tests, DbPool, RANK_DEFAULT},
utils::{build_db_pool, build_db_pool_for_tests, get_conn, DbPool, RANK_DEFAULT},
CommunityVisibility,
PostSortType,
SubscribedType,
@ -782,7 +783,10 @@ mod tests {
use lemmy_utils::error::LemmyResult;
use pretty_assertions::assert_eq;
use serial_test::serial;
use std::{collections::HashSet, time::Duration};
use std::{
collections::HashSet,
time::{Duration, Instant},
};
use url::Url;
const POST_WITH_ANOTHER_TITLE: &str = "Another title";
@ -1995,6 +1999,62 @@ mod tests {
cleanup(data, pool).await
}
#[tokio::test]
#[serial]
async fn speed_check() -> LemmyResult<()> {
let pool = &build_db_pool().await?;
let pool = &mut pool.into();
let data = init_data(pool).await?;
// Make sure the post_view query is less than this time
let duration_max = Duration::from_millis(40);
// Create some dummy posts
let num_posts = 1000;
for x in 1..num_posts {
let name = format!("post_{x}");
let url = Some(Url::parse(&format!("https://google.com/{name}"))?.into());
let post_form = PostInsertForm {
url,
..PostInsertForm::new(
name,
data.local_user_view.person.id,
data.inserted_community.id,
)
};
Post::create(pool, &post_form).await?;
}
// Manually trigger and wait for a statistics update to ensure consistent and high amount of
// accuracy in the statistics used for query planning
println!("🧮 updating database statistics");
let conn = &mut get_conn(pool).await?;
conn.batch_execute("ANALYZE;").await?;
// Time how fast the query took
let now = Instant::now();
PostQuery {
sort: Some(PostSortType::Active),
local_user: Some(&data.local_user_view.local_user),
..Default::default()
}
.list(&data.site, pool)
.await?;
let elapsed = now.elapsed();
println!("Elapsed: {:.0?}", elapsed);
assert!(
elapsed.lt(&duration_max),
"Query took {:.0?}, longer than the max of {:.0?}",
elapsed,
duration_max
);
cleanup(data, pool).await
}
#[tokio::test]
#[serial]
async fn post_listings_no_comments_only() -> LemmyResult<()> {

View file

@ -2,8 +2,12 @@
export PGDATA="$PWD/dev_pgdata"
export PGHOST=$PWD
# Necessary to encode the dev db path into proper URL params
export ENCODED_HOST=$(printf $PWD | jq -sRr @uri)
export PGUSER=postgres
export DATABASE_URL="postgresql://lemmy:password@/lemmy?host=$PWD"
export DATABASE_URL="postgresql://lemmy:password@$ENCODED_HOST/lemmy"
export LEMMY_DATABASE_URL=$DATABASE_URL
export PGDATABASE=lemmy