improve diff check

This commit is contained in:
Dull Bananas 2024-05-20 21:15:08 +00:00
parent 9a528fb38a
commit 5596cb79cb

View file

@ -22,7 +22,7 @@ pub fn get_dump(conn: &mut PgConnection) -> String {
.expect("pg_export_snapshot failed"); .expect("pg_export_snapshot failed");
let snapshot_arg = format!("--snapshot={snapshot}");*/ let snapshot_arg = format!("--snapshot={snapshot}");*/
let output = Command::new("pg_dump") let output = Command::new("pg_dump")
.args(["--schema-only"]) .args(["--schema-only", "--no-owner", "--no-privileges", "--no-comments", "--no-publications", "--no-security-labels", "--no-subscriptions", "--no-table-access-method", "--no-tablespaces"])
.env("DATABASE_URL", SETTINGS.get_database_url()) .env("DATABASE_URL", SETTINGS.get_database_url())
.stderr(Stdio::inherit()) .stderr(Stdio::inherit())
.output() .output()
@ -45,6 +45,8 @@ pub fn check_dump_diff(conn: &mut PgConnection, mut before: String, name: &str)
// Ignore timestamp differences by removing timestamps // Ignore timestamp differences by removing timestamps
for dump in [&mut before, &mut after] { for dump in [&mut before, &mut after] {
for index in 0.. { for index in 0.. {
let Some(byte)=dump.as_bytes().get(index) else{break};
if !byte.is_ascii_digit() {continue;}
// Check for this pattern: 0000-00-00 00:00:00 // Check for this pattern: 0000-00-00 00:00:00
let Some(( let Some((
&[a0, a1, a2, a3, b0, a4, a5, b1, a6, a7, b2, a8, a9, b3, a10, a11, b4, a12, a13], &[a0, a1, a2, a3, b0, a4, a5, b1, a6, a7, b2, a8, a9, b3, a10, a11, b4, a12, a13],
@ -52,16 +54,16 @@ pub fn check_dump_diff(conn: &mut PgConnection, mut before: String, name: &str)
)) = dump )) = dump
.get(index..) .get(index..)
.and_then(|s| s.as_bytes().split_first_chunk::<PATTERN_LEN>()) .and_then(|s| s.as_bytes().split_first_chunk::<PATTERN_LEN>())
else { else {
break; break;
}; };
if [a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13] if [a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13]
.into_iter() .into_iter()
.all(|byte| byte.is_ascii_digit()) .all(|byte| byte.is_ascii_digit())
&& [b0, b1, b2, b3, b4] == *b"-- ::" && [b0, b1, b2, b3, b4] == *b"-- ::"
{ {
// Replace the part of the string that has the checked pattern and an optional fractional part // Replace the part of the string that has the checked pattern and an optional fractional part
let len_after = if let Some((b'.', s)) = remaining.split_first() { let len_after = if let Some((b'.', s)) = remaining.split_first() {
1 + s.iter().position(|c| !c.is_ascii_digit()).unwrap_or(0) 1 + s.iter().position(|c| !c.is_ascii_digit()).unwrap_or(0)
} else { } else {
0 0
@ -77,12 +79,14 @@ pub fn check_dump_diff(conn: &mut PgConnection, mut before: String, name: &str)
} }
let [before_chunks, after_chunks] = let [before_chunks, after_chunks] =
[&before, &after].map(|dump| chunks(dump).collect::<BTreeSet<_>>()); [&before, &after].map(|dump| chunks(dump).collect::<BTreeSet<_>>());
let only_b = before_chunks.difference(&after_chunks).copied().map(process_chunk).collect::<BTreeSet<_>>();
let only_a = after_chunks.difference(&before_chunks).copied().map(process_chunk).collect::<BTreeSet<_>>();
// todo dont collect only_in_before? // todo dont collect only_in_before?
let [mut only_in_before, mut only_in_after] = [ let [mut only_in_before, mut only_in_after] = [
before_chunks.difference(&after_chunks), only_b.difference(&only_a),
after_chunks.difference(&before_chunks), only_a.difference(&only_b),
] ]
.map(|chunks| { .map(|chunks| {
chunks chunks
@ -121,12 +125,15 @@ pub fn check_dump_diff(conn: &mut PgConnection, mut before: String, name: &str)
.iter() .iter()
.enumerate() .enumerate()
.max_by_key(|(_, (after_chunk, after_chunk_filtered))| { .max_by_key(|(_, (after_chunk, after_chunk_filtered))| {
if
after_chunk.split_once(|c:char|c.is_lowercase()).unwrap_or_default().0 !=
before_chunk.split_once(|c:char|c.is_lowercase()).unwrap_or_default().0 {0}else{
diff::chars(after_chunk_filtered, &before_chunk_filtered) diff::chars(after_chunk_filtered, &before_chunk_filtered)
.into_iter() .into_iter()
.filter(|i| matches!(i, diff::Result::Both(c, _) .filter(|i| matches!(i, diff::Result::Both(c, _)
// `is_lowercase` increases accuracy for some trigger function diffs // `is_lowercase` increases accuracy for some trigger function diffs
if c.is_lowercase() || c.is_numeric())) if c.is_lowercase() || c.is_numeric()))
.count() .count()}
}) })
.unwrap_or((0,&default)); .unwrap_or((0,&default));
@ -151,8 +158,60 @@ pub fn check_dump_diff(conn: &mut PgConnection, mut before: String, name: &str)
panic!("{output}"); panic!("{output}");
} }
// todo inline? fn process_chunk<'a>(result: &'a str) -> Cow<'a, str> {
fn chunks<'a>(dump: &'a str) -> impl Iterator<Item = Cow<'a, str>> { if result.starts_with("CREATE TABLE ") {
// Allow column order to change
let mut lines = result
.lines()
.map(|line| line.strip_suffix(',').unwrap_or(line))
.collect::<Vec<_>>();
lines.sort_unstable_by_key(|line| -> (u8, &str) {
let placement = match line.chars().next() {
Some('C') => 0,
Some(' ') => 1,
Some(')') => 2,
_ => panic!("unrecognized part of `CREATE TABLE` statement: {line}"),
};
(placement, line)
});
Cow::Owned(lines.join("\n"))
} else if result.starts_with("CREATE VIEW") || result.starts_with("CREATE OR REPLACE VIEW") {
// Allow column order to change
let is_simple_select_statement = result
.lines()
.enumerate()
.all(|(i, mut line)| {
line = line.trim_start();
match (i, line.chars().next()) {
(0, Some('C')) => true, // create
(1, Some('S')) => true, // select
(_, Some('F')) if line.ends_with(';') => true, // from
(_, Some(c)) if c.is_lowercase() => true, // column name
_ => false
}
});
if is_simple_select_statement {
let mut lines = result
.lines()
.map(|line| line.strip_suffix(',').unwrap_or(line))
.collect::<Vec<_>>();
lines.sort_unstable_by_key(|line| -> (u8, &str) {
let placement = match line.trim_start().chars().next() {
Some('C') => 0,
Some('S') => 1,
Some('F') => 3,
_ => 2,
};
(placement, line)
});
Cow::Owned(lines.join("\n"))
}else{Cow::Borrowed(result)}
} else {
Cow::Borrowed(result)
}
}
fn chunks(dump: &str) -> impl Iterator<Item = &str> {
let mut remaining = dump; let mut remaining = dump;
std::iter::from_fn(move || { std::iter::from_fn(move || {
remaining = remaining.trim_start(); remaining = remaining.trim_start();
@ -162,56 +221,7 @@ fn chunks<'a>(dump: &'a str) -> impl Iterator<Item = Cow<'a, str>> {
// `a` can't be empty because of trim_start // `a` can't be empty because of trim_start
let (result, after_result) = remaining.split_once("\n\n")?; let (result, after_result) = remaining.split_once("\n\n")?;
remaining = after_result; remaining = after_result;
Some(if result.starts_with("CREATE TABLE ") { Some(result)
// Allow column order to change
let mut lines = result
.lines()
.map(|line| line.strip_suffix(',').unwrap_or(line))
.collect::<Vec<_>>();
lines.sort_unstable_by_key(|line| -> (u8, &str) {
let placement = match line.chars().next() {
Some('C') => 0,
Some(' ') => 1,
Some(')') => 2,
_ => panic!("unrecognized part of `CREATE TABLE` statement: {line}"),
};
(placement, line)
});
Cow::Owned(lines.join("\n"))
} else if result.starts_with("CREATE VIEW") || result.starts_with("CREATE OR REPLACE VIEW") {
// Allow column order to change
let is_simple_select_statement = result
.lines()
.enumerate()
.all(|(i, mut line)| {
line = line.trim_start();
match (i, line.chars().next()) {
(0, Some('C')) => true, // create
(1, Some('S')) => true, // select
(_, Some('F')) if line.ends_with(';') => true, // from
(_, Some(c)) if c.is_lowercase() => true, // column name
_ => false
}
});
if is_simple_select_statement {
let mut lines = result
.lines()
.map(|line| line.strip_suffix(',').unwrap_or(line))
.collect::<Vec<_>>();
lines.sort_unstable_by_key(|line| -> (u8, &str) {
let placement = match line.trim_start().chars().next() {
Some('C') => 0,
Some('S') => 1,
Some('F') => 3,
_ => 2,
};
(placement, line)
});
Cow::Owned(lines.join("\n"))
}else{Cow::Borrowed(result)}
} else {
Cow::Borrowed(result)
})
}) })
} }