From 6b1b29419d33a9cbff9c45d980cb741249e7cc5f Mon Sep 17 00:00:00 2001 From: dullbananas Date: Wed, 8 Jan 2025 03:07:32 -0700 Subject: [PATCH] Add custom migration runner, forbid some `diesel migration` commands, fix old migrations (#4673) * Update schema.rs * rename * stuff * finish new implementation of schema_setup::run (not including revert, test, etc.) * fmt * refactor * fix sql * migriation run command * use trigger on migrations table * add Options with disable_migrations field for test * rename to enable_forbid_diesel_cli_trigger * fix * fix merge * diff_checker (partial) * Revert "diff_checker (partial)" This reverts commit 6709882e148ceff48d6f097895af18d13fbdf4bf. * Revert "Revert "diff_checker (partial)"" This reverts commit d4bdda5d11216f4acd13b9585d2392ca8c252a73. * diff check * improve schema diff * timestamp replacement * ignore column order * remove fedi_name default * stuff * improve diff * stuff * attempt parallel pg_dump * attempt 2 * Revert "attempt 2" This reverts commit a909d2d6438d0b53382f762cf916ee92286a9965. * Revert "attempt parallel pg_dump" This reverts commit 592a12795428000ab15b572f89cbd9c6e25fb76c. * improve diff check * finish fixing migrations * stuff * use advisory lock * stuff * Update lib.rs * fmt * fmt * clippy * Update diff_check.rs * Update .woodpecker.yml * Update lib.rs * Update lib.rs * Update lib.rs * Update .woodpecker.yml * Update .woodpecker.yml * Update lib.rs * re-run ci * fmt * fmt * Update .woodpecker.yml * Update .woodpecker.yml * create separate database in ci * Update .woodpecker.yml * Update .woodpecker.yml * Update .woodpecker.yml * Update .woodpecker.yml * try to fix env var * Update diff_check.rs * Remove condition that's not needed anymore * clippy * exclude views and fast tables * revert some migration changes * fix * fmt * re-attempt checking character after skipped trigger name, and make code less confusing * fmt * fix * rerun ci * rerun ci * fix strip_prefix order * fix weird big Cargo.lock change by running `git checkout upstream/main Cargo.lock` then letting it auto update again * fix * remove installation commands that were removed in main branch * Revert "remove installation commands that were removed in main branch" This reverts commit fd65234a760b8b1bb42bc5b59b0ec972b08ded13. * move create_database_user woodpecker step to make diff less weird * fix clippy * Make diff check work just like before * Move new migrations to the end * Revert changes to old migrations * don't assume that migrations are already sorted * retry CI * fix merge * find migrations dir in debug mode using CARGO_MANIFEST_DIR variable instead of current working directory * always use embedded migrations * improve doc comments for migration subcommand * clippy fix * move cfg(test) attribute to diff_check.rs * copy `o` variable instead of calling `o` function * use chrono::TimeDelta Display implementation to show migration duration --- .woodpecker.yml | 63 ++- Cargo.lock | 14 + crates/db_schema/Cargo.toml | 1 + crates/db_schema/src/lib.rs | 7 +- crates/db_schema/src/schema.rs | 8 + crates/db_schema/src/schema_setup.rs | 393 +++++++++++++++--- .../db_schema/src/schema_setup/diff_check.rs | 41 ++ crates/db_schema/src/utils.rs | 6 +- .../down.sql | 2 + .../up.sql | 33 ++ .../down.sql | 2 + .../up.sql | 12 + scripts/dump_schema.sh | 4 +- src/lib.rs | 76 +++- 14 files changed, 548 insertions(+), 114 deletions(-) create mode 100644 crates/db_schema/src/schema_setup/diff_check.rs create mode 100644 migrations/2024-11-18-012112_forbid_diesel_cli/down.sql create mode 100644 migrations/2024-11-18-012112_forbid_diesel_cli/up.sql create mode 100644 migrations/2024-11-18-012113_custom_migration_runner/down.sql create mode 100644 migrations/2024-11-18-012113_custom_migration_runner/up.sql diff --git a/.woodpecker.yml b/.woodpecker.yml index 2eb7d277e..fe549aa8a 100644 --- a/.woodpecker.yml +++ b/.woodpecker.yml @@ -104,6 +104,18 @@ steps: - cargo clippy --workspace --tests --all-targets -- -D warnings when: *slow_check_paths + # `DROP OWNED` doesn't work for default user + create_database_user: + image: postgres:16-alpine + environment: + PGUSER: postgres + PGPASSWORD: password + PGHOST: database + PGDATABASE: lemmy + commands: + - psql -c "CREATE USER lemmy WITH PASSWORD 'password' SUPERUSER;" + when: *slow_check_paths + cargo_test: image: *rust_image environment: @@ -113,6 +125,12 @@ steps: LEMMY_TEST_FAST_FEDERATION: "1" LEMMY_CONFIG_LOCATION: ../../config/config.hjson commands: + # Install pg_dump for the schema setup test (must match server version) + - apt update && apt install -y lsb-release + - sh -c 'echo "deb https://apt.postgresql.org/pub/repos/apt $(lsb_release -cs)-pgdg main" > /etc/apt/sources.list.d/pgdg.list' + - wget --quiet -O - https://www.postgresql.org/media/keys/ACCC4CF8.asc | apt-key add - + - apt update && apt install -y postgresql-client-16 + # Run tests - cargo test --workspace --no-fail-fast when: *slow_check_paths @@ -160,18 +178,6 @@ steps: - diff config/defaults.hjson config/defaults_current.hjson when: *slow_check_paths - check_diesel_schema: - image: *rust_image - environment: - CARGO_HOME: .cargo_home - DATABASE_URL: postgres://lemmy:password@database:5432/lemmy - commands: - - <<: *install_diesel_cli - - cp crates/db_schema/src/schema.rs tmp.schema - - diesel migration run - - diff tmp.schema crates/db_schema/src/schema.rs - when: *slow_check_paths - cargo_build: image: *rust_image environment: @@ -181,37 +187,19 @@ steps: - mv target/debug/lemmy_server target/lemmy_server when: *slow_check_paths - check_diesel_migration: - # TODO: use willsquire/diesel-cli image when shared libraries become optional in lemmy_server + check_diesel_schema: image: *rust_image environment: LEMMY_DATABASE_URL: postgres://lemmy:password@database:5432/lemmy + DATABASE_URL: postgres://lemmy:password@database:5432/lemmy RUST_BACKTRACE: "1" CARGO_HOME: .cargo_home - DATABASE_URL: postgres://lemmy:password@database:5432/lemmy - PGUSER: lemmy - PGPASSWORD: password - PGHOST: database - PGDATABASE: lemmy commands: - # Install diesel_cli + - cp crates/db_schema/src/schema.rs tmp.schema + - target/lemmy_server migration --all run - <<: *install_diesel_cli - # Run all migrations - - diesel migration run - - psql -c "DROP SCHEMA IF EXISTS r CASCADE;" - - pg_dump --no-owner --no-privileges --no-table-access-method --schema-only --no-sync -f before.sqldump - # Make sure that the newest migration is revertable without the `r` schema - - diesel migration redo - # Run schema setup twice, which fails on the 2nd time if `DROP SCHEMA IF EXISTS r CASCADE` drops the wrong things - - alias lemmy_schema_setup="target/lemmy_server --disable-scheduled-tasks --disable-http-server --disable-activity-sending" - - lemmy_schema_setup - - lemmy_schema_setup - # Make sure that the newest migration is revertable with the `r` schema - - diesel migration redo - # Check for changes in the schema, which would be caused by an incorrect migration - - psql -c "DROP SCHEMA IF EXISTS r CASCADE;" - - pg_dump --no-owner --no-privileges --no-table-access-method --schema-only --no-sync -f after.sqldump - - diff before.sqldump after.sqldump + - diesel print-schema + - diff tmp.schema crates/db_schema/src/schema.rs when: *slow_check_paths check_db_perf_tool: @@ -318,5 +306,6 @@ services: # 15-alpine image necessary because of diesel tests image: pgautoupgrade/pgautoupgrade:15-alpine environment: - POSTGRES_USER: lemmy + POSTGRES_DB: lemmy + POSTGRES_USER: postgres POSTGRES_PASSWORD: password diff --git a/Cargo.lock b/Cargo.lock index 3fa8ac655..7afc8ec0d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1359,6 +1359,19 @@ version = "0.1.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8" +[[package]] +name = "diffutils" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8d7ce619b5c0e13f7543dc2c203a7e6fa37e0111d876339aada7ec9540a58d5" +dependencies = [ + "chrono", + "diff", + "regex", + "same-file", + "unicode-width", +] + [[package]] name = "digest" version = "0.10.7" @@ -2649,6 +2662,7 @@ dependencies = [ "diesel-derive-newtype", "diesel_ltree", "diesel_migrations", + "diffutils", "futures-util", "i-love-jesus", "lemmy_utils", diff --git a/crates/db_schema/Cargo.toml b/crates/db_schema/Cargo.toml index a511508f8..cf260f326 100644 --- a/crates/db_schema/Cargo.toml +++ b/crates/db_schema/Cargo.toml @@ -85,3 +85,4 @@ tuplex = { workspace = true, optional = true } [dev-dependencies] serial_test = { workspace = true } pretty_assertions = { workspace = true } +diffutils = "0.4.2" diff --git a/crates/db_schema/src/lib.rs b/crates/db_schema/src/lib.rs index 9a292cf82..19a106761 100644 --- a/crates/db_schema/src/lib.rs +++ b/crates/db_schema/src/lib.rs @@ -11,11 +11,6 @@ extern crate diesel_derive_newtype; #[macro_use] extern crate diesel_derive_enum; -// this is used in tests -#[cfg(feature = "full")] -#[macro_use] -extern crate diesel_migrations; - #[cfg(feature = "full")] #[macro_use] extern crate async_trait; @@ -44,7 +39,7 @@ pub mod traits; pub mod utils; #[cfg(feature = "full")] -mod schema_setup; +pub mod schema_setup; use serde::{Deserialize, Serialize}; use strum::{Display, EnumString}; diff --git a/crates/db_schema/src/schema.rs b/crates/db_schema/src/schema.rs index 7bdeeb6f2..59ba7be69 100644 --- a/crates/db_schema/src/schema.rs +++ b/crates/db_schema/src/schema.rs @@ -853,6 +853,13 @@ diesel::table! { } } +diesel::table! { + previously_run_sql (id) { + id -> Bool, + content -> Text, + } +} + diesel::table! { private_message (id) { id -> Int4, @@ -1161,6 +1168,7 @@ diesel::allow_tables_to_appear_in_same_query!( post_aggregates, post_report, post_tag, + previously_run_sql, private_message, private_message_report, received_activity, diff --git a/crates/db_schema/src/schema_setup.rs b/crates/db_schema/src/schema_setup.rs index fb4affa91..762612d18 100644 --- a/crates/db_schema/src/schema_setup.rs +++ b/crates/db_schema/src/schema_setup.rs @@ -1,65 +1,358 @@ -use anyhow::Context; -use diesel::{connection::SimpleConnection, Connection, PgConnection}; -use diesel_migrations::{EmbeddedMigrations, MigrationHarness}; -use lemmy_utils::error::LemmyError; +mod diff_check; -const MIGRATIONS: EmbeddedMigrations = embed_migrations!(); +use crate::schema::previously_run_sql; +use anyhow::{anyhow, Context}; +use chrono::TimeDelta; +use diesel::{ + connection::SimpleConnection, + dsl::exists, + migration::{Migration, MigrationVersion}, + pg::Pg, + select, + update, + BoolExpressionMethods, + Connection, + ExpressionMethods, + PgConnection, + QueryDsl, + RunQueryDsl, +}; +use diesel_migrations::MigrationHarness; +use lemmy_utils::{error::LemmyResult, settings::SETTINGS}; +use std::time::Instant; + +diesel::table! { + pg_namespace (nspname) { + nspname -> Text, + } +} + +fn migrations() -> diesel_migrations::EmbeddedMigrations { + // Using `const` here is required by the borrow checker + const MIGRATIONS: diesel_migrations::EmbeddedMigrations = diesel_migrations::embed_migrations!(); + MIGRATIONS +} /// This SQL code sets up the `r` schema, which contains things that can be safely dropped and /// replaced instead of being changed using migrations. It may not create or modify things outside /// of the `r` schema (indicated by `r.` before the name), unless a comment says otherwise. -/// -/// Currently, this code is only run after the server starts and there's at least 1 pending -/// migration to run. This means every time you change something here, you must also create a -/// migration (a blank up.sql file works fine). This behavior will be removed when we implement a -/// better way to avoid useless schema updates and locks. -/// -/// If you add something that depends on something (such as a table) created in a new migration, -/// then down.sql must use `CASCADE` when dropping it. This doesn't need to be fixed in old -/// migrations because the "replaceable-schema" migration runs `DROP SCHEMA IF EXISTS r CASCADE` in -/// down.sql. -const REPLACEABLE_SCHEMA: &[&str] = &[ - "DROP SCHEMA IF EXISTS r CASCADE;", - "CREATE SCHEMA r;", - include_str!("../replaceable_schema/utils.sql"), - include_str!("../replaceable_schema/triggers.sql"), -]; +fn replaceable_schema() -> String { + [ + "CREATE SCHEMA r;", + include_str!("../replaceable_schema/utils.sql"), + include_str!("../replaceable_schema/triggers.sql"), + ] + .join("\n") +} -pub fn run(db_url: &str) -> Result<(), LemmyError> { - // Migrations don't support async connection - let mut conn = PgConnection::establish(db_url).with_context(|| "Error connecting to database")?; +const REPLACEABLE_SCHEMA_PATH: &str = "crates/db_schema/replaceable_schema"; - // Run all pending migrations except for the newest one, then run the newest one in the same - // transaction as `REPLACEABLE_SCHEMA`. This code will be becone less hacky when the conditional - // setup of things in `REPLACEABLE_SCHEMA` is done without using the number of pending - // migrations. - println!("Running Database migrations (This may take a long time)..."); - let migrations = conn - .pending_migrations(MIGRATIONS) - .map_err(|e| anyhow::anyhow!("Couldn't determine pending migrations: {e}"))?; - for migration in migrations.iter().rev().skip(1).rev() { - conn - .run_migration(migration) - .map_err(|e| anyhow::anyhow!("Couldn't run DB Migrations: {e}"))?; +struct MigrationHarnessWrapper<'a> { + conn: &'a mut PgConnection, + #[cfg(test)] + diff_checked_migration_name: Option, +} + +impl MigrationHarnessWrapper<'_> { + fn run_migration_inner( + &mut self, + migration: &dyn Migration, + ) -> diesel::migration::Result> { + let start_time = Instant::now(); + + let result = self.conn.run_migration(migration); + + let duration = TimeDelta::from_std(start_time.elapsed()) + .map(|d| d.to_string()) + .unwrap_or_default(); + let name = migration.name(); + println!("{duration} run {name}"); + + result } - conn.transaction::<_, LemmyError, _>(|conn| { - if let Some(migration) = migrations.last() { - // Migration is run with a savepoint since there's already a transaction - conn - .run_migration(migration) - .map_err(|e| anyhow::anyhow!("Couldn't run DB Migrations: {e}"))?; - } else if !cfg!(debug_assertions) { - // In production, skip running `REPLACEABLE_SCHEMA` to avoid locking things in the schema. In - // CI, always run it because `diesel migration` commands would otherwise prevent it. - return Ok(()); +} + +impl MigrationHarness for MigrationHarnessWrapper<'_> { + fn run_migration( + &mut self, + migration: &dyn Migration, + ) -> diesel::migration::Result> { + #[cfg(test)] + if self.diff_checked_migration_name == Some(migration.name().to_string()) { + let before = diff_check::get_dump(); + + self.run_migration_inner(migration)?; + self.revert_migration(migration)?; + + let after = diff_check::get_dump(); + + diff_check::check_dump_diff( + after, + before, + &format!( + "These changes need to be applied in migrations/{}/down.sql:", + migration.name() + ), + ); } + + self.run_migration_inner(migration) + } + + fn revert_migration( + &mut self, + migration: &dyn Migration, + ) -> diesel::migration::Result> { + let start_time = Instant::now(); + + let result = self.conn.revert_migration(migration); + + let duration = TimeDelta::from_std(start_time.elapsed()) + .map(|d| d.to_string()) + .unwrap_or_default(); + let name = migration.name(); + println!("{duration} revert {name}"); + + result + } + + fn applied_migrations(&mut self) -> diesel::migration::Result>> { + self.conn.applied_migrations() + } +} + +#[derive(Default, Clone, Copy)] +pub struct Options { + #[cfg(test)] + enable_diff_check: bool, + revert: bool, + run: bool, + limit: Option, +} + +impl Options { + #[cfg(test)] + fn enable_diff_check(mut self) -> Self { + self.enable_diff_check = true; + self + } + + pub fn run(mut self) -> Self { + self.run = true; + self + } + + pub fn revert(mut self) -> Self { + self.revert = true; + self + } + + pub fn limit(mut self, limit: u64) -> Self { + self.limit = Some(limit); + self + } +} + +/// Checked by tests +#[derive(PartialEq, Eq, Debug)] +pub enum Branch { + EarlyReturn, + ReplaceableSchemaRebuilt, + ReplaceableSchemaNotRebuilt, +} + +pub fn run(options: Options) -> LemmyResult { + let db_url = SETTINGS.get_database_url(); + + // Migrations don't support async connection, and this function doesn't need to be async + let mut conn = PgConnection::establish(&db_url)?; + + // If possible, skip getting a lock and recreating the "r" schema, so + // lemmy_server processes in a horizontally scaled setup can start without causing locks + if !options.revert + && options.run + && options.limit.is_none() + && !conn + .has_pending_migration(migrations()) + .map_err(convert_err)? + { + // The condition above implies that the migration that creates the previously_run_sql table was + // already run + let sql_unchanged = exists( + previously_run_sql::table.filter(previously_run_sql::content.eq(replaceable_schema())), + ); + + let schema_exists = exists(pg_namespace::table.find("r")); + + if select(sql_unchanged.and(schema_exists)).get_result(&mut conn)? { + return Ok(Branch::EarlyReturn); + } + } + + // Block concurrent attempts to run migrations until `conn` is closed, and disable the + // trigger that prevents the Diesel CLI from running migrations + println!("Waiting for lock..."); + conn.batch_execute("SELECT pg_advisory_lock(0);")?; + println!("Running Database migrations (This may take a long time)..."); + + // Drop `r` schema, so migrations don't need to be made to work both with and without things in + // it existing + revert_replaceable_schema(&mut conn)?; + + run_selected_migrations(&mut conn, &options).map_err(convert_err)?; + + // Only run replaceable_schema if newest migration was applied + let output = if (options.run && options.limit.is_none()) + || !conn + .has_pending_migration(migrations()) + .map_err(convert_err)? + { + #[cfg(test)] + if options.enable_diff_check { + let before = diff_check::get_dump(); + + run_replaceable_schema(&mut conn)?; + revert_replaceable_schema(&mut conn)?; + + let after = diff_check::get_dump(); + + diff_check::check_dump_diff(before, after, "The code in crates/db_schema/replaceable_schema incorrectly created or modified things outside of the `r` schema, causing these changes to be left behind after dropping the schema:"); + } + + run_replaceable_schema(&mut conn)?; + + Branch::ReplaceableSchemaRebuilt + } else { + Branch::ReplaceableSchemaNotRebuilt + }; + + println!("Database migrations complete."); + + Ok(output) +} + +fn run_replaceable_schema(conn: &mut PgConnection) -> LemmyResult<()> { + conn.transaction(|conn| { conn - .batch_execute(&REPLACEABLE_SCHEMA.join("\n")) - .context("Couldn't run SQL files in crates/db_schema/replaceable_schema")?; + .batch_execute(&replaceable_schema()) + .with_context(|| format!("Failed to run SQL files in {REPLACEABLE_SCHEMA_PATH}"))?; + + let num_rows_updated = update(previously_run_sql::table) + .set(previously_run_sql::content.eq(replaceable_schema())) + .execute(conn)?; + + debug_assert_eq!(num_rows_updated, 1); Ok(()) - })?; - println!("Database migrations complete."); + }) +} + +fn revert_replaceable_schema(conn: &mut PgConnection) -> LemmyResult<()> { + conn + .batch_execute("DROP SCHEMA IF EXISTS r CASCADE;") + .with_context(|| format!("Failed to revert SQL files in {REPLACEABLE_SCHEMA_PATH}"))?; + + // Value in `previously_run_sql` table is not set here because the table might not exist, + // and that's fine because the existence of the `r` schema is also checked Ok(()) } + +fn run_selected_migrations( + conn: &mut PgConnection, + options: &Options, +) -> diesel::migration::Result<()> { + let mut wrapper = MigrationHarnessWrapper { + conn, + #[cfg(test)] + diff_checked_migration_name: options + .enable_diff_check + .then(|| diesel::migration::MigrationSource::::migrations(&migrations())) + .transpose()? + // Get the migration with the highest version + .and_then(|migrations| { + migrations + .into_iter() + .map(|migration| migration.name().to_string()) + .max() + }), + }; + + if options.revert { + if let Some(limit) = options.limit { + for _ in 0..limit { + wrapper.revert_last_migration(migrations())?; + } + } else { + wrapper.revert_all_migrations(migrations())?; + } + } + + if options.run { + if let Some(limit) = options.limit { + for _ in 0..limit { + wrapper.run_next_migration(migrations())?; + } + } else { + wrapper.run_pending_migrations(migrations())?; + } + } + + Ok(()) +} + +/// Makes `diesel::migration::Result` work with `anyhow` and `LemmyError` +fn convert_err(e: Box) -> anyhow::Error { + anyhow!(e) +} + +#[cfg(test)] +mod tests { + use super::{ + Branch::{EarlyReturn, ReplaceableSchemaNotRebuilt, ReplaceableSchemaRebuilt}, + *, + }; + use lemmy_utils::{error::LemmyResult, settings::SETTINGS}; + use serial_test::serial; + + #[test] + #[serial] + fn test_schema_setup() -> LemmyResult<()> { + let o = Options::default(); + let db_url = SETTINGS.get_database_url(); + let mut conn = PgConnection::establish(&db_url)?; + + // Start with consistent state by dropping everything + conn.batch_execute("DROP OWNED BY CURRENT_USER;")?; + + // Run all migrations, make sure the newest migration can be redone, and check the newest + // down.sql file + assert_eq!(run(o.run().enable_diff_check())?, ReplaceableSchemaRebuilt); + + // Check for early return + assert_eq!(run(o.run())?, EarlyReturn); + + // Test `limit` + assert_eq!(run(o.revert().limit(1))?, ReplaceableSchemaNotRebuilt); + assert_eq!( + conn + .pending_migrations(migrations()) + .map_err(convert_err)? + .len(), + 1 + ); + assert_eq!(run(o.run().limit(1))?, ReplaceableSchemaRebuilt); + + // This should throw an error saying to use lemmy_server instead of diesel CLI + conn.batch_execute("DROP OWNED BY CURRENT_USER;")?; + assert!(matches!( + conn.run_pending_migrations(migrations()), + Err(e) if e.to_string().contains("lemmy_server") + )); + + // Diesel CLI's way of running migrations shouldn't break the custom migration runner + assert_eq!(run(o.run())?, ReplaceableSchemaRebuilt); + + Ok(()) + } +} diff --git a/crates/db_schema/src/schema_setup/diff_check.rs b/crates/db_schema/src/schema_setup/diff_check.rs new file mode 100644 index 000000000..0268d4dbc --- /dev/null +++ b/crates/db_schema/src/schema_setup/diff_check.rs @@ -0,0 +1,41 @@ +#![cfg(test)] +#![expect(clippy::expect_used)] +use lemmy_utils::settings::SETTINGS; +use std::process::{Command, Stdio}; + +// It's not possible to call `export_snapshot()` for each dump and run the dumps in parallel with +// the `--snapshot` flag. Don't waste your time!!!! + +pub fn get_dump() -> String { + let db_url = SETTINGS.get_database_url(); + let output = Command::new("pg_dump") + .args([ + // Specify database URL + "--dbname", + &db_url, + // Disable some things + "--no-owner", + "--no-privileges", + "--no-table-access-method", + "--schema-only", + "--no-sync", + ]) + .stderr(Stdio::inherit()) + .output() + .expect("failed to start pg_dump process"); + + // TODO: use exit_ok method when it's stable + assert!(output.status.success()); + + String::from_utf8(output.stdout).expect("pg_dump output is not valid UTF-8 text") +} + +pub fn check_dump_diff(before: String, after: String, label: &str) { + if before != after { + let diff_bytes = + diffutilslib::unified_diff(before.as_bytes(), after.as_bytes(), &Default::default()); + let diff = String::from_utf8_lossy(&diff_bytes); + + panic!("{label}\n\n{diff}"); + } +} diff --git a/crates/db_schema/src/utils.rs b/crates/db_schema/src/utils.rs index 5bbf007ae..043f669f3 100644 --- a/crates/db_schema/src/utils.rs +++ b/crates/db_schema/src/utils.rs @@ -1,6 +1,6 @@ pub mod uplete; -use crate::{newtypes::DbUrl, CommentSortType, PostSortType}; +use crate::{newtypes::DbUrl, schema_setup, CommentSortType, PostSortType}; use chrono::TimeDelta; use deadpool::Runtime; use diesel::{ @@ -475,7 +475,7 @@ pub fn build_db_pool() -> LemmyResult { // provide a setup function which handles creating the connection let mut config = ManagerConfig::default(); config.custom_setup = Box::new(establish_connection); - let manager = AsyncDieselConnectionManager::::new_with_config(&db_url, config); + let manager = AsyncDieselConnectionManager::::new_with_config(db_url, config); let pool = Pool::builder(manager) .max_size(SETTINGS.database.pool_size) .runtime(Runtime::Tokio1) @@ -493,7 +493,7 @@ pub fn build_db_pool() -> LemmyResult { })) .build()?; - crate::schema_setup::run(&db_url)?; + schema_setup::run(schema_setup::Options::default().run())?; Ok(pool) } diff --git a/migrations/2024-11-18-012112_forbid_diesel_cli/down.sql b/migrations/2024-11-18-012112_forbid_diesel_cli/down.sql new file mode 100644 index 000000000..d6e8132be --- /dev/null +++ b/migrations/2024-11-18-012112_forbid_diesel_cli/down.sql @@ -0,0 +1,2 @@ +DROP FUNCTION forbid_diesel_cli CASCADE; + diff --git a/migrations/2024-11-18-012112_forbid_diesel_cli/up.sql b/migrations/2024-11-18-012112_forbid_diesel_cli/up.sql new file mode 100644 index 000000000..a2f839156 --- /dev/null +++ b/migrations/2024-11-18-012112_forbid_diesel_cli/up.sql @@ -0,0 +1,33 @@ +-- This trigger prevents using the Diesel CLI to run or revert migrations, so the custom migration runner +-- can drop and recreate the `r` schema for new migrations. +-- +-- This migration being seperate from the next migration (created in the same PR) guarantees that the +-- Diesel CLI will fail to bring the number of pending migrations to 0, which is one of the conditions +-- required to skip running replaceable_schema. +-- +-- If the Diesel CLI could run or revert migrations, this scenario would be possible: +-- +-- Run `diesel migration redo` when the newest migration has a new table with triggers. End up with triggers +-- being dropped and not replaced because triggers are created outside of up.sql. The custom migration runner +-- sees that there are no pending migrations and the value in the `previously_run_sql` trigger is correct, so +-- it doesn't rebuild the `r` schema. There is now incorrect behavior but no error messages. +CREATE FUNCTION forbid_diesel_cli () + RETURNS TRIGGER + LANGUAGE plpgsql + AS $$ +BEGIN + IF NOT EXISTS ( + SELECT + FROM + pg_locks + WHERE (locktype, pid, objid) = ('advisory', pg_backend_pid(), 0)) THEN +RAISE 'migrations must be managed using lemmy_server instead of diesel CLI'; +END IF; + RETURN NULL; +END; +$$; + +CREATE TRIGGER forbid_diesel_cli + BEFORE INSERT OR UPDATE OR DELETE OR TRUNCATE ON __diesel_schema_migrations + EXECUTE FUNCTION forbid_diesel_cli (); + diff --git a/migrations/2024-11-18-012113_custom_migration_runner/down.sql b/migrations/2024-11-18-012113_custom_migration_runner/down.sql new file mode 100644 index 000000000..6ea5a2e22 --- /dev/null +++ b/migrations/2024-11-18-012113_custom_migration_runner/down.sql @@ -0,0 +1,2 @@ +DROP TABLE previously_run_sql; + diff --git a/migrations/2024-11-18-012113_custom_migration_runner/up.sql b/migrations/2024-11-18-012113_custom_migration_runner/up.sql new file mode 100644 index 000000000..61985c2cf --- /dev/null +++ b/migrations/2024-11-18-012113_custom_migration_runner/up.sql @@ -0,0 +1,12 @@ +DROP SCHEMA IF EXISTS r CASCADE; + +CREATE TABLE previously_run_sql ( + -- For compatibility with Diesel + id boolean PRIMARY KEY, + -- Too big to be used as primary key + content text NOT NULL +); + +INSERT INTO previously_run_sql (id, content) + VALUES (TRUE, ''); + diff --git a/scripts/dump_schema.sh b/scripts/dump_schema.sh index c32cf20e5..da34defb7 100755 --- a/scripts/dump_schema.sh +++ b/scripts/dump_schema.sh @@ -9,8 +9,8 @@ cd "$CWD/../" source scripts/start_dev_db.sh -diesel migration run -pg_dump --no-owner --no-privileges --no-table-access-method --schema-only --no-sync -f schema.sqldump +cargo run --package lemmy_server -- migration run +pg_dump --no-owner --no-privileges --no-table-access-method --schema-only --exclude-schema=r --no-sync -f schema.sqldump pg_ctl stop rm -rf $PGDATA diff --git a/src/lib.rs b/src/lib.rs index f344f2927..2f4af9ea3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -17,7 +17,7 @@ use actix_web::{ HttpServer, }; use actix_web_prom::PrometheusMetricsBuilder; -use clap::Parser; +use clap::{Parser, Subcommand}; use lemmy_api_common::{ context::LemmyContext, lemmy_db_views::structs::SiteView, @@ -34,7 +34,7 @@ use lemmy_apub::{ VerifyUrlData, FEDERATION_HTTP_FETCH_LIMIT, }; -use lemmy_db_schema::{source::secret::Secret, utils::build_db_pool}; +use lemmy_db_schema::{schema_setup, source::secret::Secret, utils::build_db_pool}; use lemmy_federate::{Opts, SendManager}; use lemmy_routes::{feeds, images, nodeinfo, webfinger}; use lemmy_utils::{ @@ -103,6 +103,31 @@ pub struct CmdArgs { /// If set, make sure to set --federate-process-index differently for each. #[arg(long, default_value_t = 1, env = "LEMMY_FEDERATE_PROCESS_COUNT")] federate_process_count: i32, + #[command(subcommand)] + subcommand: Option, +} + +#[derive(Subcommand, Debug)] +enum CmdSubcommand { + /// Do something with migrations, then exit. + Migration { + #[command(subcommand)] + subcommand: MigrationSubcommand, + /// Stop after there's no remaining migrations. + #[arg(long, default_value_t = false)] + all: bool, + /// Stop after the given number of migrations. + #[arg(long, default_value_t = 1)] + number: u64, + }, +} + +#[derive(Subcommand, Debug)] +enum MigrationSubcommand { + /// Run up.sql for pending migrations, oldest to newest. + Run, + /// Run down.sql for non-pending migrations, newest to oldest. + Revert, } /// Placing the main function in lib.rs allows other crates to import it and embed Lemmy @@ -110,6 +135,26 @@ pub async fn start_lemmy_server(args: CmdArgs) -> LemmyResult<()> { // Print version number to log println!("Starting Lemmy v{VERSION}"); + if let Some(CmdSubcommand::Migration { + subcommand, + all, + number, + }) = args.subcommand + { + let mut options = match subcommand { + MigrationSubcommand::Run => schema_setup::Options::default().run(), + MigrationSubcommand::Revert => schema_setup::Options::default().revert(), + }; + + if !all { + options = options.limit(number); + } + + schema_setup::run(options)?; + + return Ok(()); + } + // return error 503 while running db migrations and startup tasks let mut startup_server_handle = None; if !args.disable_http_server { @@ -186,10 +231,11 @@ pub async fn start_lemmy_server(args: CmdArgs) -> LemmyResult<()> { request_data.reset_request_count(), )); - let scheduled_tasks = (!args.disable_scheduled_tasks).then(|| { + if !args.disable_scheduled_tasks { // Schedules various cleanup tasks for the DB - tokio::task::spawn(scheduled_tasks::setup(request_data.reset_request_count())) - }); + let _scheduled_tasks = + tokio::task::spawn(scheduled_tasks::setup(request_data.reset_request_count())); + } let server = if !args.disable_http_server { if let Some(startup_server_handle) = startup_server_handle { @@ -227,17 +273,15 @@ pub async fn start_lemmy_server(args: CmdArgs) -> LemmyResult<()> { let mut interrupt = tokio::signal::unix::signal(SignalKind::interrupt())?; let mut terminate = tokio::signal::unix::signal(SignalKind::terminate())?; - if server.is_some() || federate.is_some() || scheduled_tasks.is_some() { - tokio::select! { - _ = tokio::signal::ctrl_c() => { - tracing::warn!("Received ctrl-c, shutting down gracefully..."); - } - _ = interrupt.recv() => { - tracing::warn!("Received interrupt, shutting down gracefully..."); - } - _ = terminate.recv() => { - tracing::warn!("Received terminate, shutting down gracefully..."); - } + tokio::select! { + _ = tokio::signal::ctrl_c() => { + tracing::warn!("Received ctrl-c, shutting down gracefully..."); + } + _ = interrupt.recv() => { + tracing::warn!("Received interrupt, shutting down gracefully..."); + } + _ = terminate.recv() => { + tracing::warn!("Received terminate, shutting down gracefully..."); } } if let Some(server) = server {