Make diff check work just like before

This commit is contained in:
Dull Bananas 2024-11-18 17:14:55 -07:00
parent 2aead25bc6
commit 8390c0eb30
4 changed files with 34 additions and 270 deletions

15
Cargo.lock generated
View file

@ -1361,6 +1361,19 @@ version = "0.1.13"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8" checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8"
[[package]]
name = "diffutils"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c8d7ce619b5c0e13f7543dc2c203a7e6fa37e0111d876339aada7ec9540a58d5"
dependencies = [
"chrono",
"diff",
"regex",
"same-file",
"unicode-width",
]
[[package]] [[package]]
name = "digest" name = "digest"
version = "0.10.7" version = "0.10.7"
@ -2627,7 +2640,7 @@ dependencies = [
"diesel-derive-newtype", "diesel-derive-newtype",
"diesel_ltree", "diesel_ltree",
"diesel_migrations", "diesel_migrations",
"diff", "diffutils",
"futures-util", "futures-util",
"i-love-jesus", "i-love-jesus",
"lemmy_utils", "lemmy_utils",

View file

@ -86,4 +86,4 @@ tuplex = { workspace = true, optional = true }
[dev-dependencies] [dev-dependencies]
serial_test = { workspace = true } serial_test = { workspace = true }
pretty_assertions = { workspace = true } pretty_assertions = { workspace = true }
diff = "0.1.13" diffutils = "0.4.2"

View file

@ -60,7 +60,7 @@ const REPLACEABLE_SCHEMA_PATH: &str = "crates/db_schema/replaceable_schema";
struct MigrationHarnessWrapper<'a> { struct MigrationHarnessWrapper<'a> {
conn: &'a mut PgConnection, conn: &'a mut PgConnection,
#[cfg(test)] #[cfg(test)]
enable_diff_check: bool, diff_checked_migration_name: Option<String>,
} }
impl<'a> MigrationHarnessWrapper<'a> { impl<'a> MigrationHarnessWrapper<'a> {
@ -86,7 +86,7 @@ impl<'a> MigrationHarness<Pg> for MigrationHarnessWrapper<'a> {
migration: &dyn Migration<Pg>, migration: &dyn Migration<Pg>,
) -> diesel::migration::Result<MigrationVersion<'static>> { ) -> diesel::migration::Result<MigrationVersion<'static>> {
#[cfg(test)] #[cfg(test)]
if self.enable_diff_check { if self.diff_checked_migration_name == Some(migration.name().to_string()) {
let before = diff_check::get_dump(); let before = diff_check::get_dump();
self.run_migration_inner(migration)?; self.run_migration_inner(migration)?;
@ -271,7 +271,11 @@ fn run_selected_migrations(
let mut wrapper = MigrationHarnessWrapper { let mut wrapper = MigrationHarnessWrapper {
conn, conn,
#[cfg(test)] #[cfg(test)]
enable_diff_check: options.enable_diff_check, diff_checked_migration_name: options
.enable_diff_check
.then(|| diesel::migration::MigrationSource::<Pg>::migrations(&migrations()))
.transpose()?
.and_then(|migrations| Some(migrations.last()?.name().to_string())),
}; };
if options.revert { if options.revert {
@ -325,7 +329,8 @@ mod tests {
// Start with consistent state by dropping everything // Start with consistent state by dropping everything
conn.batch_execute("DROP OWNED BY CURRENT_USER;")?; conn.batch_execute("DROP OWNED BY CURRENT_USER;")?;
// Run all migrations and check for mistakes in down.sql files // Run all migrations, make sure the newest migration can be redone, and check the newest
// down.sql file
assert_eq!( assert_eq!(
run(o().run().enable_diff_check())?, run(o().run().enable_diff_check())?,
ReplaceableSchemaRebuilt ReplaceableSchemaRebuilt
@ -345,10 +350,8 @@ mod tests {
); );
assert_eq!(run(o().run().limit(1))?, ReplaceableSchemaRebuilt); assert_eq!(run(o().run().limit(1))?, ReplaceableSchemaRebuilt);
// Revert all migrations
assert_eq!(run(o().revert())?, ReplaceableSchemaNotRebuilt);
// This should throw an error saying to use lemmy_server instead of diesel CLI // This should throw an error saying to use lemmy_server instead of diesel CLI
conn.batch_execute("DROP OWNED BY CURRENT_USER;")?;
assert!(matches!( assert!(matches!(
conn.run_pending_migrations(migrations()), conn.run_pending_migrations(migrations()),
Err(e) if e.to_string().contains("lemmy_server") Err(e) if e.to_string().contains("lemmy_server")

View file

@ -1,10 +1,6 @@
#![expect(clippy::expect_used)] #![expect(clippy::expect_used)]
use lemmy_utils::settings::SETTINGS; use lemmy_utils::settings::SETTINGS;
use std::{ use std::process::{Command, Stdio};
borrow::Cow,
collections::btree_set::{self, BTreeSet},
process::{Command, Stdio},
};
// It's not possible to call `export_snapshot()` for each dump and run the dumps in parallel with // It's not possible to call `export_snapshot()` for each dump and run the dumps in parallel with
// the `--snapshot` flag. Don't waste your time!!!! // the `--snapshot` flag. Don't waste your time!!!!
@ -16,21 +12,12 @@ pub fn get_dump() -> String {
// Specify database URL // Specify database URL
"--dbname", "--dbname",
&db_url, &db_url,
// Allow differences in row data and old fast tables // Disable some things
"--schema-only",
"--exclude-table=comment_aggregates_fast",
"--exclude-table=community_aggregates_fast",
"--exclude-table=post_aggregates_fast",
"--exclude-table=user_fast",
// Ignore some things to reduce the amount of queries done by pg_dump
"--no-owner", "--no-owner",
"--no-privileges", "--no-privileges",
"--no-comments",
"--no-publications",
"--no-security-labels",
"--no-subscriptions",
"--no-table-access-method", "--no-table-access-method",
"--no-tablespaces", "--schema-only",
"--no-sync",
]) ])
.stderr(Stdio::inherit()) .stderr(Stdio::inherit())
.output() .output()
@ -42,251 +29,12 @@ pub fn get_dump() -> String {
String::from_utf8(output.stdout).expect("pg_dump output is not valid UTF-8 text") String::from_utf8(output.stdout).expect("pg_dump output is not valid UTF-8 text")
} }
const PATTERN_LEN: usize = 19;
pub fn check_dump_diff(before: String, after: String, label: &str) { pub fn check_dump_diff(before: String, after: String, label: &str) {
// Performance optimization if before != after {
if after == before { let diff_bytes =
return; diffutilslib::unified_diff(before.as_bytes(), after.as_bytes(), &Default::default());
} let diff = String::from_utf8_lossy(&diff_bytes);
// Borrow checker requires this to be a separate variable panic!("{label}\n\n{diff}");
let normalized_chunk_vecs = [&before, &after]
// Remove identical items
.map(|dump| chunks(dump).collect::<BTreeSet<_>>())
.differences()
// Remove items without unwanted types of differences (if migrations are correct, then this
// removes everything)
.map(|chunks| chunks.map(|&i| normalize_chunk(i)).collect::<BTreeSet<_>>());
let [only_in_before, only_in_after] = normalized_chunk_vecs // Imagine that this line doesn't exist
.differences()
.map(|chunks| chunks.map(|i| &**i).collect::<Vec<_>>());
if only_in_before.is_empty() && only_in_after.is_empty() {
return;
}
// Build the panic message
let after_has_more = only_in_before.len() < only_in_after.len();
let [chunks, mut other_chunks] = if after_has_more {
[only_in_before, only_in_after]
} else {
[only_in_after, only_in_before]
};
let diffs = chunks
.into_iter()
.chain(std::iter::repeat(""))
.map_while(|chunk| {
let (most_similar_chunk_index, most_similar_chunk) = other_chunks
.iter()
.enumerate()
.max_by_key(|(_, other_chunk)| {
if sql_command_name(chunk) != sql_command_name(other_chunk) {
0
} else {
similarity(chunk, other_chunk)
}
})?;
let diff_lines = if after_has_more {
diff::lines(most_similar_chunk, chunk)
} else {
diff::lines(chunk, most_similar_chunk)
};
other_chunks.swap_remove(most_similar_chunk_index);
Some(
diff_lines
.into_iter()
.flat_map(|line| match line {
diff::Result::Left(s) => ["- ", s, "\n"],
diff::Result::Right(s) => ["+ ", s, "\n"],
diff::Result::Both(s, _) => [" ", s, "\n"],
})
.chain(["\n"])
.collect::<String>(),
)
});
panic!(
"{}",
std::iter::once(format!("{label}\n\n"))
.chain(diffs)
.collect::<String>()
);
}
trait Differences<T> {
fn differences(&self) -> [btree_set::Difference<'_, T>; 2];
}
impl<T: Ord> Differences<T> for [BTreeSet<T>; 2] {
/// Items only in `a`, and items only in `b`
fn differences(&self) -> [btree_set::Difference<'_, T>; 2] {
let [a, b] = self;
[a.difference(b), b.difference(a)]
} }
} }
fn sql_command_name(chunk: &str) -> &str {
chunk
.split_once(|c: char| c.is_lowercase())
.unwrap_or_default()
.0
}
fn similarity(chunk: &str, other_chunk: &str) -> usize {
diff::chars(chunk, other_chunk)
.into_iter()
.filter(|i| {
match i {
diff::Result::Both(c, _) => {
// Prevent whitespace from affecting similarity level
!c.is_whitespace()
&& (
// Increase accuracy for some trigger function diffs
c.is_lowercase()
// Preserve differences in names that contain a number
|| c.is_numeric()
)
}
_ => false,
}
})
.count()
}
fn normalize_chunk(chunk: &str) -> Cow<'_, str> {
let mut chunk = Cow::Borrowed(chunk);
let stripped_lines = chunk
.lines()
.map(|line| line.strip_suffix(',').unwrap_or(line));
// Sort column names, so differences in column order are ignored
if chunk.starts_with("CREATE TABLE ") {
let mut lines = stripped_lines.collect::<Vec<_>>();
sort_within_sections(&mut lines, |line| {
match line.chars().next() {
// CREATE
Some('C') => 0,
// Indented column name
Some(' ') => 1,
// End
Some(')') => 2,
_ => panic!("unrecognized part of `CREATE TABLE` statement: {line}"),
}
});
chunk = Cow::Owned(lines.join("\n"));
}
// Replace timestamps with a constant string, so differences in timestamps are ignored
for index in 0.. {
// Performance optimization
let Some(byte) = chunk.as_bytes().get(index) else {
break;
};
if !byte.is_ascii_digit() {
continue;
}
// Check for this pattern: 0000-00-00 00:00:00
let Some((
&[a0, a1, a2, a3, b0, a4, a5, b1, a6, a7, b2, a8, a9, b3, a10, a11, b4, a12, a13],
remaining,
)) = chunk
.get(index..)
.and_then(|s| s.as_bytes().split_first_chunk::<PATTERN_LEN>())
else {
break;
};
if [a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13]
.into_iter()
.all(|byte| byte.is_ascii_digit())
&& [b0, b1, b2, b3, b4] == *b"-- ::"
{
// Replace the part of the string that has the checked pattern and an optional fractional part
let len_after = if let Some((b'.', s)) = remaining.split_first() {
1 + s.iter().position(|c| !c.is_ascii_digit()).unwrap_or(0)
} else {
0
};
// Length of replacement string is likely to match previous string
// (there's up to 6 digits after the decimal point)
chunk.to_mut().replace_range(
index..(index + PATTERN_LEN + len_after),
"AAAAAAAAAAAAAAAAAAAAAAAAAA",
);
}
}
chunk
}
fn sort_within_sections<T: Ord + ?Sized>(vec: &mut [&T], mut section: impl FnMut(&T) -> u8) {
vec.sort_unstable_by_key(|&i| (section(i), i));
}
fn chunks(dump: &str) -> impl Iterator<Item = &str> {
let mut remaining = dump;
std::iter::from_fn(move || {
remaining = remaining.trim_start();
while let Some(s) = remove_skipped_item_from_beginning(remaining) {
remaining = s.trim_start();
}
// `trim_start` guarantees that `result` is not empty
let (result, after_result) = remaining.split_once("\n\n")?;
remaining = after_result;
Some(result)
})
}
fn remove_skipped_item_from_beginning(s: &str) -> Option<&str> {
// Skip commented line
if let Some(after) = s.strip_prefix("--") {
Some(after_first_occurence(after, "\n"))
}
// Skip old views and fast table triggers
else if let Some(after) = s
.strip_prefix("CREATE VIEW ")
.or_else(|| s.strip_prefix("CREATE OR REPLACE VIEW "))
.or_else(|| s.strip_prefix("CREATE MATERIALIZED VIEW "))
.or_else(|| {
s.strip_prefix("CREATE FUNCTION public.")
.and_then(after_skipped_trigger_name)
.and_then(|a| a.strip_prefix('('))
})
.or_else(|| {
s.strip_prefix("CREATE TRIGGER ")
.and_then(after_skipped_trigger_name)
.and_then(|a| a.strip_prefix(' '))
})
{
Some(after_first_occurence(after, "\n\n"))
} else {
None
}
}
fn after_first_occurence<'a>(s: &'a str, pat: &str) -> &'a str {
s.split_once(pat).unwrap_or_default().1
}
fn after_skipped_trigger_name(s: &str) -> Option<&str> {
s.strip_prefix("refresh_comment_like")
.or_else(|| s.strip_prefix("refresh_comment"))
.or_else(|| s.strip_prefix("refresh_community_follower"))
.or_else(|| s.strip_prefix("refresh_community_user_ban"))
.or_else(|| s.strip_prefix("refresh_community"))
.or_else(|| s.strip_prefix("refresh_post_like"))
.or_else(|| s.strip_prefix("refresh_post"))
.or_else(|| s.strip_prefix("refresh_private_message"))
.or_else(|| s.strip_prefix("refresh_user"))
}