From ad2af54384a6d915f5e832a1ab93795e037be47f Mon Sep 17 00:00:00 2001 From: dull b Date: Wed, 20 Dec 2023 20:56:37 +0000 Subject: [PATCH] stuff --- crates/db_perf/src/main.rs | 50 +++++-- .../down.sql | 46 +++++++ .../up.sql | 122 ++++++++++++++++-- scripts/start_dev_db.sh | 8 +- 4 files changed, 201 insertions(+), 25 deletions(-) diff --git a/crates/db_perf/src/main.rs b/crates/db_perf/src/main.rs index 5ec4e843c..d071779a9 100644 --- a/crates/db_perf/src/main.rs +++ b/crates/db_perf/src/main.rs @@ -12,7 +12,7 @@ use lemmy_db_schema::{ utils::{ build_db_pool, get_conn, - series::{self, ValuesFromSeries}, + series::{self, ValuesFromSeries}, DbConn, DbPool, }, SortType, }; @@ -31,8 +31,10 @@ struct CmdArgs { people: NonZeroU32, #[arg(long, default_value_t = 100000.try_into().unwrap())] posts: NonZeroU32, + #[arg(long, default_value_t = 0)] + read_post_pages: u32, #[arg(long)] - read_posts: bool, + explain_insertions: bool, } #[tokio::main] @@ -41,6 +43,14 @@ async fn main() -> LemmyResult<()> { let pool = &build_db_pool().await?; let pool = &mut pool.into(); + let conn = &mut get_conn(pool).await?; + if args.explain_insertions { + sql_query("SET auto_explain.log_min_duration = 0") + .execute(conn) + .await?; + } + let pool = &mut conn.into(); + let instance = Instance::read_or_create(pool, "reddit.com".to_owned()).await?; println!("🫃 creating {} people", args.people); @@ -120,10 +130,10 @@ async fn main() -> LemmyResult<()> { .execute(conn) .await?; let pool = &mut conn.into(); - - if args.read_posts { + + { let mut page_after = None; - for page_num in 1..=2 { + for page_num in 1..=args.read_post_pages { println!( "👀 getting page {page_num} of posts (pagination cursor used: {})", page_after.is_some() @@ -138,19 +148,31 @@ async fn main() -> LemmyResult<()> { } .list(pool) .await?; - if let Some(post_view) = post_views.into_iter().last() { - println!("👀 getting pagination cursor data for next page "); - let cursor_data = PaginationCursor::after_post(&post_view).read(pool).await?; - page_after = Some(cursor_data); - } else { - break; - } + if let Some(post_view) = post_views.into_iter().last() { + println!("👀 getting pagination cursor data for next page"); + let cursor_data = PaginationCursor::after_post(&post_view).read(pool).await?; + page_after = Some(cursor_data); + } else { + println!("🚫 reached empty page"); + break; + } } } - + + // TODO show this path when there's an error if let Ok(path) = std::env::var("PGDATA") { println!("🪵 query plans written in {path}/log"); } - + Ok(()) } + +async fn conn_with_auto_explain<'a, 'b: 'a>(pool: &'a mut DbPool<'b>) -> LemmyResult> { + let mut conn = get_conn(pool).await?; + + sql_query("SET auto_explain.log_min_duration = 0") + .execute(&mut conn) + .await?; + + Ok(conn) +} diff --git a/migrations/2023-12-19-210053_tolerable-batch-insert-speed/down.sql b/migrations/2023-12-19-210053_tolerable-batch-insert-speed/down.sql index 9c1ae769e..f9282175f 100644 --- a/migrations/2023-12-19-210053_tolerable-batch-insert-speed/down.sql +++ b/migrations/2023-12-19-210053_tolerable-batch-insert-speed/down.sql @@ -30,6 +30,46 @@ CREATE OR REPLACE TRIGGER post_aggregates_post FOR EACH ROW EXECUTE PROCEDURE post_aggregates_post (); +CREATE OR REPLACE TRIGGER community_aggregates_post_count + AFTER INSERT OR DELETE OR UPDATE OF removed, + deleted ON post + FOR EACH ROW + EXECUTE PROCEDURE community_aggregates_post_count (); + +DROP FUNCTION IF EXISTS community_aggregates_post_count_insert CASCADE; + +DROP FUNCTION IF EXISTS community_aggregates_post_update CASCADE; + +DROP FUNCTION IF EXISTS site_aggregates_post_update CASCADE; + +DROP FUNCTION IF EXISTS person_aggregates_post_insert CASCADE; + +CREATE OR REPLACE FUNCTION site_aggregates_post_insert () + RETURNS TRIGGER + LANGUAGE plpgsql + AS $$ +BEGIN + IF (was_restored_or_created (TG_OP, OLD, NEW)) THEN + UPDATE + site_aggregates sa + SET + posts = posts + 1 + FROM + site s + WHERE + sa.site_id = s.id; + END IF; + RETURN NULL; +END +$$; + +CREATE OR REPLACE TRIGGER site_aggregates_post_insert + AFTER INSERT OR UPDATE OF removed, + deleted ON post + FOR EACH ROW + WHEN (NEW.local = TRUE) + EXECUTE PROCEDURE site_aggregates_post_insert (); + CREATE OR REPLACE FUNCTION generate_unique_changeme () RETURNS text LANGUAGE sql @@ -38,5 +78,11 @@ CREATE OR REPLACE FUNCTION generate_unique_changeme () 'http://changeme.invalid/' || substr(md5(random()::text), 0, 25); $$; +CREATE TRIGGER person_aggregates_post_count + AFTER INSERT OR DELETE OR UPDATE OF removed, + deleted ON post + FOR EACH ROW + EXECUTE PROCEDURE person_aggregates_post_count (); + DROP SEQUENCE IF EXISTS changeme_seq; diff --git a/migrations/2023-12-19-210053_tolerable-batch-insert-speed/up.sql b/migrations/2023-12-19-210053_tolerable-batch-insert-speed/up.sql index 0157e4bb7..593f9638e 100644 --- a/migrations/2023-12-19-210053_tolerable-batch-insert-speed/up.sql +++ b/migrations/2023-12-19-210053_tolerable-batch-insert-speed/up.sql @@ -1,5 +1,6 @@ --- Change post_aggregates trigger to run once per statement instead of once per row. --- The trigger doesn't need to handle deletion because the post_id column has ON DELETE CASCADE. +-- Change triggers to run once per statement instead of once per row + +-- post_aggregates_post trigger doesn't need to handle deletion because the post_id column has ON DELETE CASCADE CREATE OR REPLACE FUNCTION post_aggregates_post () RETURNS TRIGGER @@ -8,25 +9,128 @@ CREATE OR REPLACE FUNCTION post_aggregates_post () BEGIN INSERT INTO post_aggregates (post_id, published, newest_comment_time, newest_comment_time_necro, community_id, creator_id, instance_id) SELECT - new_post.id, - new_post.published, - new_post.published, - new_post.published, - new_post.community_id, - new_post.creator_id, - (SELECT community.instance_id FROM community WHERE community.id = new_post.community_id LIMIT 1) + id, + published, + published, + published, + community_id, + creator_id, + (SELECT community.instance_id FROM community WHERE community.id = community_id LIMIT 1) FROM new_post; RETURN NULL; END $$; +CREATE OR REPLACE FUNCTION community_aggregates_post_count_insert () + RETURNS TRIGGER + LANGUAGE plpgsql + AS $$ +BEGIN + UPDATE community_aggregates + SET posts = posts + post_group.count + FROM (SELECT community_id, count(*) FROM new_post GROUP BY community_id) post_group + WHERE community_aggregates.community_id = post_group.community_id; + RETURN NULL; +END +$$; + +CREATE OR REPLACE FUNCTION person_aggregates_post_insert () + RETURNS TRIGGER + LANGUAGE plpgsql + AS $$ +BEGIN + UPDATE person_aggregates + SET post_count = post_count + post_group.count + FROM (SELECT creator_id, count(*) FROM new_post GROUP BY creator_id) post_group + WHERE person_aggregates.person_id = post_group.creator_id; + RETURN NULL; +END +$$; + CREATE OR REPLACE TRIGGER post_aggregates_post AFTER INSERT ON post REFERENCING NEW TABLE AS new_post FOR EACH STATEMENT EXECUTE PROCEDURE post_aggregates_post (); +-- Don't run old trigger for insert + +CREATE OR REPLACE TRIGGER community_aggregates_post_count + AFTER DELETE OR UPDATE OF removed, + deleted ON post + FOR EACH ROW + EXECUTE PROCEDURE community_aggregates_post_count (); + +CREATE OR REPLACE TRIGGER community_aggregates_post_count_insert + AFTER INSERT ON post + REFERENCING NEW TABLE AS new_post + FOR EACH STATEMENT + EXECUTE PROCEDURE community_aggregates_post_count_insert (); + +DROP FUNCTION IF EXISTS site_aggregates_community_delete CASCADE; + +CREATE OR REPLACE FUNCTION site_aggregates_post_update () + RETURNS TRIGGER + LANGUAGE plpgsql + AS $$ +BEGIN + IF (was_restored_or_created (TG_OP, OLD, NEW)) THEN + UPDATE + site_aggregates sa + SET + posts = posts + 1 + FROM + site s + WHERE + sa.site_id = s.id; + END IF; + RETURN NULL; +END +$$; + +CREATE OR REPLACE FUNCTION site_aggregates_post_insert () + RETURNS TRIGGER + LANGUAGE plpgsql + AS $$ +BEGIN + UPDATE + site_aggregates sa + SET + posts = posts + (SELECT count(*) FROM new_post) + FROM + site s + WHERE + sa.site_id = s.id; + RETURN NULL; +END +$$; + +CREATE OR REPLACE TRIGGER site_aggregates_post_update + AFTER UPDATE OF removed, + deleted ON post + FOR EACH ROW + WHEN (NEW.local = TRUE) + EXECUTE PROCEDURE site_aggregates_post_update (); + +CREATE OR REPLACE TRIGGER site_aggregates_post_insert + AFTER INSERT ON post + REFERENCING NEW TABLE AS new_post + FOR EACH STATEMENT + EXECUTE PROCEDURE site_aggregates_post_insert (); + +CREATE OR REPLACE TRIGGER person_aggregates_post_count + AFTER DELETE OR UPDATE OF removed, + deleted ON post + FOR EACH ROW + EXECUTE PROCEDURE person_aggregates_post_count (); + +CREATE OR REPLACE TRIGGER person_aggregates_post_insert + AFTER INSERT ON post + REFERENCING NEW TABLE AS new_post + FOR EACH STATEMENT + EXECUTE PROCEDURE person_aggregates_post_insert (); + -- Avoid running hash function and random number generation for default ap_id CREATE SEQUENCE IF NOT EXISTS changeme_seq AS bigint CYCLE; diff --git a/scripts/start_dev_db.sh b/scripts/start_dev_db.sh index 839b68b5e..f6b4e2a9a 100644 --- a/scripts/start_dev_db.sh +++ b/scripts/start_dev_db.sh @@ -2,12 +2,14 @@ export PGDATA="$PWD/dev_pgdata" export PGHOST=$PWD -export LEMMY_DATABASE_URL="postgresql://lemmy:password@/lemmy?host=$PWD" +export DATABASE_URL="postgresql://lemmy:password@/lemmy?host=$PWD" +export LEMMY_DATABASE_URL=$DATABASE_URL # If cluster exists, stop the server and delete the cluster if [[ -d $PGDATA ]] then # Only stop server if it is running + pg_status_exit_code=0 (pg_ctl status > /dev/null) || pg_status_exit_code=$? if [[ ${pg_status_exit_code} -ne 3 ]] then @@ -27,7 +29,9 @@ config_args=( # Allow auto_explain to be turned on -c session_preload_libraries=auto_explain - #-c auto_explain.log_min_duration=0 + + # Log triggers + -c auto_explain.log_nested_statements=on # Include actual row amounts and run times for query plan nodes -c auto_explain.log_analyze=on