mirror of
https://github.com/LemmyNet/lemmy.git
synced 2025-01-07 10:42:19 +00:00
Make diff check work just like before
This commit is contained in:
parent
2aead25bc6
commit
8390c0eb30
4 changed files with 34 additions and 270 deletions
15
Cargo.lock
generated
15
Cargo.lock
generated
|
@ -1361,6 +1361,19 @@ version = "0.1.13"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8"
|
||||
|
||||
[[package]]
|
||||
name = "diffutils"
|
||||
version = "0.4.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c8d7ce619b5c0e13f7543dc2c203a7e6fa37e0111d876339aada7ec9540a58d5"
|
||||
dependencies = [
|
||||
"chrono",
|
||||
"diff",
|
||||
"regex",
|
||||
"same-file",
|
||||
"unicode-width",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "digest"
|
||||
version = "0.10.7"
|
||||
|
@ -2627,7 +2640,7 @@ dependencies = [
|
|||
"diesel-derive-newtype",
|
||||
"diesel_ltree",
|
||||
"diesel_migrations",
|
||||
"diff",
|
||||
"diffutils",
|
||||
"futures-util",
|
||||
"i-love-jesus",
|
||||
"lemmy_utils",
|
||||
|
|
|
@ -86,4 +86,4 @@ tuplex = { workspace = true, optional = true }
|
|||
[dev-dependencies]
|
||||
serial_test = { workspace = true }
|
||||
pretty_assertions = { workspace = true }
|
||||
diff = "0.1.13"
|
||||
diffutils = "0.4.2"
|
||||
|
|
|
@ -60,7 +60,7 @@ const REPLACEABLE_SCHEMA_PATH: &str = "crates/db_schema/replaceable_schema";
|
|||
struct MigrationHarnessWrapper<'a> {
|
||||
conn: &'a mut PgConnection,
|
||||
#[cfg(test)]
|
||||
enable_diff_check: bool,
|
||||
diff_checked_migration_name: Option<String>,
|
||||
}
|
||||
|
||||
impl<'a> MigrationHarnessWrapper<'a> {
|
||||
|
@ -86,7 +86,7 @@ impl<'a> MigrationHarness<Pg> for MigrationHarnessWrapper<'a> {
|
|||
migration: &dyn Migration<Pg>,
|
||||
) -> diesel::migration::Result<MigrationVersion<'static>> {
|
||||
#[cfg(test)]
|
||||
if self.enable_diff_check {
|
||||
if self.diff_checked_migration_name == Some(migration.name().to_string()) {
|
||||
let before = diff_check::get_dump();
|
||||
|
||||
self.run_migration_inner(migration)?;
|
||||
|
@ -271,7 +271,11 @@ fn run_selected_migrations(
|
|||
let mut wrapper = MigrationHarnessWrapper {
|
||||
conn,
|
||||
#[cfg(test)]
|
||||
enable_diff_check: options.enable_diff_check,
|
||||
diff_checked_migration_name: options
|
||||
.enable_diff_check
|
||||
.then(|| diesel::migration::MigrationSource::<Pg>::migrations(&migrations()))
|
||||
.transpose()?
|
||||
.and_then(|migrations| Some(migrations.last()?.name().to_string())),
|
||||
};
|
||||
|
||||
if options.revert {
|
||||
|
@ -325,7 +329,8 @@ mod tests {
|
|||
// Start with consistent state by dropping everything
|
||||
conn.batch_execute("DROP OWNED BY CURRENT_USER;")?;
|
||||
|
||||
// Run all migrations and check for mistakes in down.sql files
|
||||
// Run all migrations, make sure the newest migration can be redone, and check the newest
|
||||
// down.sql file
|
||||
assert_eq!(
|
||||
run(o().run().enable_diff_check())?,
|
||||
ReplaceableSchemaRebuilt
|
||||
|
@ -345,10 +350,8 @@ mod tests {
|
|||
);
|
||||
assert_eq!(run(o().run().limit(1))?, ReplaceableSchemaRebuilt);
|
||||
|
||||
// Revert all migrations
|
||||
assert_eq!(run(o().revert())?, ReplaceableSchemaNotRebuilt);
|
||||
|
||||
// This should throw an error saying to use lemmy_server instead of diesel CLI
|
||||
conn.batch_execute("DROP OWNED BY CURRENT_USER;")?;
|
||||
assert!(matches!(
|
||||
conn.run_pending_migrations(migrations()),
|
||||
Err(e) if e.to_string().contains("lemmy_server")
|
||||
|
|
|
@ -1,10 +1,6 @@
|
|||
#![expect(clippy::expect_used)]
|
||||
use lemmy_utils::settings::SETTINGS;
|
||||
use std::{
|
||||
borrow::Cow,
|
||||
collections::btree_set::{self, BTreeSet},
|
||||
process::{Command, Stdio},
|
||||
};
|
||||
use std::process::{Command, Stdio};
|
||||
|
||||
// It's not possible to call `export_snapshot()` for each dump and run the dumps in parallel with
|
||||
// the `--snapshot` flag. Don't waste your time!!!!
|
||||
|
@ -16,21 +12,12 @@ pub fn get_dump() -> String {
|
|||
// Specify database URL
|
||||
"--dbname",
|
||||
&db_url,
|
||||
// Allow differences in row data and old fast tables
|
||||
"--schema-only",
|
||||
"--exclude-table=comment_aggregates_fast",
|
||||
"--exclude-table=community_aggregates_fast",
|
||||
"--exclude-table=post_aggregates_fast",
|
||||
"--exclude-table=user_fast",
|
||||
// Ignore some things to reduce the amount of queries done by pg_dump
|
||||
// Disable some things
|
||||
"--no-owner",
|
||||
"--no-privileges",
|
||||
"--no-comments",
|
||||
"--no-publications",
|
||||
"--no-security-labels",
|
||||
"--no-subscriptions",
|
||||
"--no-table-access-method",
|
||||
"--no-tablespaces",
|
||||
"--schema-only",
|
||||
"--no-sync",
|
||||
])
|
||||
.stderr(Stdio::inherit())
|
||||
.output()
|
||||
|
@ -42,251 +29,12 @@ pub fn get_dump() -> String {
|
|||
String::from_utf8(output.stdout).expect("pg_dump output is not valid UTF-8 text")
|
||||
}
|
||||
|
||||
const PATTERN_LEN: usize = 19;
|
||||
|
||||
pub fn check_dump_diff(before: String, after: String, label: &str) {
|
||||
// Performance optimization
|
||||
if after == before {
|
||||
return;
|
||||
}
|
||||
if before != after {
|
||||
let diff_bytes =
|
||||
diffutilslib::unified_diff(before.as_bytes(), after.as_bytes(), &Default::default());
|
||||
let diff = String::from_utf8_lossy(&diff_bytes);
|
||||
|
||||
// Borrow checker requires this to be a separate variable
|
||||
let normalized_chunk_vecs = [&before, &after]
|
||||
// Remove identical items
|
||||
.map(|dump| chunks(dump).collect::<BTreeSet<_>>())
|
||||
.differences()
|
||||
// Remove items without unwanted types of differences (if migrations are correct, then this
|
||||
// removes everything)
|
||||
.map(|chunks| chunks.map(|&i| normalize_chunk(i)).collect::<BTreeSet<_>>());
|
||||
let [only_in_before, only_in_after] = normalized_chunk_vecs // Imagine that this line doesn't exist
|
||||
.differences()
|
||||
.map(|chunks| chunks.map(|i| &**i).collect::<Vec<_>>());
|
||||
|
||||
if only_in_before.is_empty() && only_in_after.is_empty() {
|
||||
return;
|
||||
}
|
||||
|
||||
// Build the panic message
|
||||
|
||||
let after_has_more = only_in_before.len() < only_in_after.len();
|
||||
let [chunks, mut other_chunks] = if after_has_more {
|
||||
[only_in_before, only_in_after]
|
||||
} else {
|
||||
[only_in_after, only_in_before]
|
||||
};
|
||||
|
||||
let diffs = chunks
|
||||
.into_iter()
|
||||
.chain(std::iter::repeat(""))
|
||||
.map_while(|chunk| {
|
||||
let (most_similar_chunk_index, most_similar_chunk) = other_chunks
|
||||
.iter()
|
||||
.enumerate()
|
||||
.max_by_key(|(_, other_chunk)| {
|
||||
if sql_command_name(chunk) != sql_command_name(other_chunk) {
|
||||
0
|
||||
} else {
|
||||
similarity(chunk, other_chunk)
|
||||
}
|
||||
})?;
|
||||
|
||||
let diff_lines = if after_has_more {
|
||||
diff::lines(most_similar_chunk, chunk)
|
||||
} else {
|
||||
diff::lines(chunk, most_similar_chunk)
|
||||
};
|
||||
|
||||
other_chunks.swap_remove(most_similar_chunk_index);
|
||||
|
||||
Some(
|
||||
diff_lines
|
||||
.into_iter()
|
||||
.flat_map(|line| match line {
|
||||
diff::Result::Left(s) => ["- ", s, "\n"],
|
||||
diff::Result::Right(s) => ["+ ", s, "\n"],
|
||||
diff::Result::Both(s, _) => [" ", s, "\n"],
|
||||
})
|
||||
.chain(["\n"])
|
||||
.collect::<String>(),
|
||||
)
|
||||
});
|
||||
|
||||
panic!(
|
||||
"{}",
|
||||
std::iter::once(format!("{label}\n\n"))
|
||||
.chain(diffs)
|
||||
.collect::<String>()
|
||||
);
|
||||
}
|
||||
|
||||
trait Differences<T> {
|
||||
fn differences(&self) -> [btree_set::Difference<'_, T>; 2];
|
||||
}
|
||||
|
||||
impl<T: Ord> Differences<T> for [BTreeSet<T>; 2] {
|
||||
/// Items only in `a`, and items only in `b`
|
||||
fn differences(&self) -> [btree_set::Difference<'_, T>; 2] {
|
||||
let [a, b] = self;
|
||||
[a.difference(b), b.difference(a)]
|
||||
panic!("{label}\n\n{diff}");
|
||||
}
|
||||
}
|
||||
|
||||
fn sql_command_name(chunk: &str) -> &str {
|
||||
chunk
|
||||
.split_once(|c: char| c.is_lowercase())
|
||||
.unwrap_or_default()
|
||||
.0
|
||||
}
|
||||
|
||||
fn similarity(chunk: &str, other_chunk: &str) -> usize {
|
||||
diff::chars(chunk, other_chunk)
|
||||
.into_iter()
|
||||
.filter(|i| {
|
||||
match i {
|
||||
diff::Result::Both(c, _) => {
|
||||
// Prevent whitespace from affecting similarity level
|
||||
!c.is_whitespace()
|
||||
&& (
|
||||
// Increase accuracy for some trigger function diffs
|
||||
c.is_lowercase()
|
||||
// Preserve differences in names that contain a number
|
||||
|| c.is_numeric()
|
||||
)
|
||||
}
|
||||
_ => false,
|
||||
}
|
||||
})
|
||||
.count()
|
||||
}
|
||||
|
||||
fn normalize_chunk(chunk: &str) -> Cow<'_, str> {
|
||||
let mut chunk = Cow::Borrowed(chunk);
|
||||
|
||||
let stripped_lines = chunk
|
||||
.lines()
|
||||
.map(|line| line.strip_suffix(',').unwrap_or(line));
|
||||
|
||||
// Sort column names, so differences in column order are ignored
|
||||
if chunk.starts_with("CREATE TABLE ") {
|
||||
let mut lines = stripped_lines.collect::<Vec<_>>();
|
||||
|
||||
sort_within_sections(&mut lines, |line| {
|
||||
match line.chars().next() {
|
||||
// CREATE
|
||||
Some('C') => 0,
|
||||
// Indented column name
|
||||
Some(' ') => 1,
|
||||
// End
|
||||
Some(')') => 2,
|
||||
_ => panic!("unrecognized part of `CREATE TABLE` statement: {line}"),
|
||||
}
|
||||
});
|
||||
|
||||
chunk = Cow::Owned(lines.join("\n"));
|
||||
}
|
||||
|
||||
// Replace timestamps with a constant string, so differences in timestamps are ignored
|
||||
for index in 0.. {
|
||||
// Performance optimization
|
||||
let Some(byte) = chunk.as_bytes().get(index) else {
|
||||
break;
|
||||
};
|
||||
if !byte.is_ascii_digit() {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check for this pattern: 0000-00-00 00:00:00
|
||||
let Some((
|
||||
&[a0, a1, a2, a3, b0, a4, a5, b1, a6, a7, b2, a8, a9, b3, a10, a11, b4, a12, a13],
|
||||
remaining,
|
||||
)) = chunk
|
||||
.get(index..)
|
||||
.and_then(|s| s.as_bytes().split_first_chunk::<PATTERN_LEN>())
|
||||
else {
|
||||
break;
|
||||
};
|
||||
|
||||
if [a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13]
|
||||
.into_iter()
|
||||
.all(|byte| byte.is_ascii_digit())
|
||||
&& [b0, b1, b2, b3, b4] == *b"-- ::"
|
||||
{
|
||||
// Replace the part of the string that has the checked pattern and an optional fractional part
|
||||
let len_after = if let Some((b'.', s)) = remaining.split_first() {
|
||||
1 + s.iter().position(|c| !c.is_ascii_digit()).unwrap_or(0)
|
||||
} else {
|
||||
0
|
||||
};
|
||||
// Length of replacement string is likely to match previous string
|
||||
// (there's up to 6 digits after the decimal point)
|
||||
chunk.to_mut().replace_range(
|
||||
index..(index + PATTERN_LEN + len_after),
|
||||
"AAAAAAAAAAAAAAAAAAAAAAAAAA",
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
chunk
|
||||
}
|
||||
|
||||
fn sort_within_sections<T: Ord + ?Sized>(vec: &mut [&T], mut section: impl FnMut(&T) -> u8) {
|
||||
vec.sort_unstable_by_key(|&i| (section(i), i));
|
||||
}
|
||||
|
||||
fn chunks(dump: &str) -> impl Iterator<Item = &str> {
|
||||
let mut remaining = dump;
|
||||
std::iter::from_fn(move || {
|
||||
remaining = remaining.trim_start();
|
||||
while let Some(s) = remove_skipped_item_from_beginning(remaining) {
|
||||
remaining = s.trim_start();
|
||||
}
|
||||
|
||||
// `trim_start` guarantees that `result` is not empty
|
||||
let (result, after_result) = remaining.split_once("\n\n")?;
|
||||
remaining = after_result;
|
||||
Some(result)
|
||||
})
|
||||
}
|
||||
|
||||
fn remove_skipped_item_from_beginning(s: &str) -> Option<&str> {
|
||||
// Skip commented line
|
||||
if let Some(after) = s.strip_prefix("--") {
|
||||
Some(after_first_occurence(after, "\n"))
|
||||
}
|
||||
// Skip old views and fast table triggers
|
||||
else if let Some(after) = s
|
||||
.strip_prefix("CREATE VIEW ")
|
||||
.or_else(|| s.strip_prefix("CREATE OR REPLACE VIEW "))
|
||||
.or_else(|| s.strip_prefix("CREATE MATERIALIZED VIEW "))
|
||||
.or_else(|| {
|
||||
s.strip_prefix("CREATE FUNCTION public.")
|
||||
.and_then(after_skipped_trigger_name)
|
||||
.and_then(|a| a.strip_prefix('('))
|
||||
})
|
||||
.or_else(|| {
|
||||
s.strip_prefix("CREATE TRIGGER ")
|
||||
.and_then(after_skipped_trigger_name)
|
||||
.and_then(|a| a.strip_prefix(' '))
|
||||
})
|
||||
{
|
||||
Some(after_first_occurence(after, "\n\n"))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn after_first_occurence<'a>(s: &'a str, pat: &str) -> &'a str {
|
||||
s.split_once(pat).unwrap_or_default().1
|
||||
}
|
||||
|
||||
fn after_skipped_trigger_name(s: &str) -> Option<&str> {
|
||||
s.strip_prefix("refresh_comment_like")
|
||||
.or_else(|| s.strip_prefix("refresh_comment"))
|
||||
.or_else(|| s.strip_prefix("refresh_community_follower"))
|
||||
.or_else(|| s.strip_prefix("refresh_community_user_ban"))
|
||||
.or_else(|| s.strip_prefix("refresh_community"))
|
||||
.or_else(|| s.strip_prefix("refresh_post_like"))
|
||||
.or_else(|| s.strip_prefix("refresh_post"))
|
||||
.or_else(|| s.strip_prefix("refresh_private_message"))
|
||||
.or_else(|| s.strip_prefix("refresh_user"))
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue