mirror of
https://github.com/LemmyNet/lemmy.git
synced 2024-12-04 18:21:19 +00:00
293 lines
8.2 KiB
Rust
293 lines
8.2 KiB
Rust
use lemmy_utils::settings::SETTINGS;
|
|
use std::{
|
|
borrow::Cow,
|
|
collections::btree_set::{self, BTreeSet},
|
|
process::{Command, Stdio},
|
|
};
|
|
|
|
// It's not possible to call `export_snapshot()` for each dump and run the dumps in parallel with
|
|
// the `--snapshot` flag. Don't waste your time!!!!
|
|
|
|
pub fn get_dump() -> String {
|
|
let output = Command::new("pg_dump")
|
|
.args([
|
|
"--schema-only",
|
|
"--no-owner",
|
|
"--no-privileges",
|
|
"--no-comments",
|
|
"--no-publications",
|
|
"--no-security-labels",
|
|
"--no-subscriptions",
|
|
"--no-table-access-method",
|
|
"--no-tablespaces",
|
|
])
|
|
.env("DATABASE_URL", SETTINGS.get_database_url())
|
|
.stderr(Stdio::inherit())
|
|
.output()
|
|
.expect("failed to start pg_dump process");
|
|
|
|
// TODO: use exit_ok method when it's stable
|
|
assert!(output.status.success());
|
|
|
|
String::from_utf8(output.stdout).expect("pg_dump output is not valid UTF-8 text")
|
|
}
|
|
|
|
const PATTERN_LEN: usize = 19;
|
|
|
|
pub fn check_dump_diff(before: String, after: String, label: &str) {
|
|
// Performance optimization
|
|
if after == before {
|
|
return;
|
|
}
|
|
|
|
let normalized_chunk_vecs = [&before, &after]
|
|
// Remove identical items
|
|
.map(|dump| chunks(dump).collect::<BTreeSet<_>>())
|
|
.differences()
|
|
// Remove items without unwanted types of differences (if migrations are correct, then this
|
|
// removes everything)
|
|
.map(|chunks| chunks.map(|&i| normalize_chunk(i)).collect::<BTreeSet<_>>());
|
|
|
|
let [only_in_before, only_in_after] = normalized_chunk_vecs
|
|
.differences()
|
|
.map(|chunks| chunks.map(|i| &**i).collect::<Vec<_>>());
|
|
|
|
if only_in_before.is_empty() && only_in_after.is_empty() {
|
|
return;
|
|
}
|
|
|
|
// Build the panic message
|
|
|
|
let after_has_more = only_in_before.len() < only_in_after.len();
|
|
let [chunks, mut other_chunks] = if after_has_more {
|
|
[only_in_before, only_in_after]
|
|
} else {
|
|
[only_in_after, only_in_before]
|
|
};
|
|
|
|
let diffs = chunks
|
|
.into_iter()
|
|
.chain(std::iter::repeat(""))
|
|
.map_while(|chunk| {
|
|
let (most_similar_chunk_index, most_similar_chunk) = other_chunks
|
|
.iter()
|
|
.enumerate()
|
|
.max_by_key(|(_, other_chunk)| {
|
|
if sql_command_name(chunk) != sql_command_name(other_chunk) {
|
|
0
|
|
} else {
|
|
similarity(chunk, other_chunk)
|
|
}
|
|
})?;
|
|
|
|
let lines = if after_has_more {
|
|
diff::lines(most_similar_chunk, chunk)
|
|
} else {
|
|
diff::lines(chunk, most_similar_chunk)
|
|
};
|
|
|
|
other_chunks.swap_remove(most_similar_chunk_index);
|
|
|
|
Some(
|
|
lines
|
|
.into_iter()
|
|
.map(|line| {
|
|
Cow::Owned(match line {
|
|
diff::Result::Left(s) => format!("- {s}\n"),
|
|
diff::Result::Right(s) => format!("+ {s}\n"),
|
|
diff::Result::Both(s, _) => format!(" {s}\n"),
|
|
})
|
|
})
|
|
.chain([Cow::Borrowed("\n")])
|
|
.collect::<String>(),
|
|
)
|
|
});
|
|
|
|
panic!(
|
|
"{}",
|
|
std::iter::once(format!("{label}\n\n"))
|
|
.chain(diffs)
|
|
.collect::<String>()
|
|
);
|
|
}
|
|
|
|
trait Differences<T> {
|
|
fn differences(&self) -> [btree_set::Difference<'_, T>; 2];
|
|
}
|
|
|
|
impl<T: Ord> Differences<T> for [BTreeSet<T>; 2] {
|
|
/// Items only in `a`, and items only in `b`
|
|
fn differences(&self) -> [btree_set::Difference<'_, T>; 2] {
|
|
let [a, b] = self;
|
|
[a.difference(b), b.difference(a)]
|
|
}
|
|
}
|
|
|
|
fn sql_command_name(chunk: &str) -> &str {
|
|
chunk
|
|
.split_once(|c: char| c.is_lowercase())
|
|
.unwrap_or_default()
|
|
.0
|
|
}
|
|
|
|
fn similarity(chunk: &str, other_chunk: &str) -> usize {
|
|
diff::chars(chunk, other_chunk)
|
|
.into_iter()
|
|
.filter(|i| {
|
|
match i {
|
|
diff::Result::Both(c, _) => {
|
|
// Prevent whitespace from affecting similarity level
|
|
!c.is_whitespace()
|
|
&& (
|
|
// Increase accuracy for some trigger function diffs
|
|
c.is_lowercase()
|
|
// Preserve differences in names that contain a number
|
|
|| c.is_numeric()
|
|
)
|
|
}
|
|
_ => false,
|
|
}
|
|
})
|
|
.count()
|
|
}
|
|
|
|
fn normalize_chunk(chunk: &str) -> Cow<'_, str> {
|
|
let mut chunk = Cow::Borrowed(chunk);
|
|
|
|
let stripped_lines = chunk
|
|
.lines()
|
|
.map(|line| line.strip_suffix(',').unwrap_or(line));
|
|
|
|
// Sort column names, so differences in column order are ignored
|
|
if chunk.starts_with("CREATE TABLE ") {
|
|
let mut lines = stripped_lines.collect::<Vec<_>>();
|
|
|
|
sort_within_sections(&mut lines, |line| {
|
|
match line.chars().next() {
|
|
// CREATE
|
|
Some('C') => 0,
|
|
// Indented column name
|
|
Some(' ') => 1,
|
|
// End
|
|
Some(')') => 2,
|
|
_ => panic!("unrecognized part of `CREATE TABLE` statement: {line}"),
|
|
}
|
|
});
|
|
|
|
chunk = Cow::Owned(lines.join("\n"));
|
|
} else if chunk.starts_with("CREATE VIEW ") || chunk.starts_with("CREATE OR REPLACE VIEW ") {
|
|
let is_simple_select_statement = chunk.lines().enumerate().all(|(i, line)| {
|
|
match (i, line.trim_start().chars().next()) {
|
|
// CREATE
|
|
(0, Some('C')) => true,
|
|
// SELECT
|
|
(1, Some('S')) => true,
|
|
// FROM
|
|
(_, Some('F')) if line.ends_with(';') => true,
|
|
// Column name
|
|
(_, Some(c)) if c.is_lowercase() => true,
|
|
_ => false,
|
|
}
|
|
});
|
|
|
|
if is_simple_select_statement {
|
|
let mut lines = stripped_lines.collect::<Vec<_>>();
|
|
|
|
sort_within_sections(&mut lines, |line| {
|
|
match line.trim_start().chars().next() {
|
|
// CREATE
|
|
Some('C') => 0,
|
|
// SELECT
|
|
Some('S') => 1,
|
|
// FROM
|
|
Some('F') => 3,
|
|
// Column name
|
|
_ => 2,
|
|
}
|
|
});
|
|
|
|
chunk = Cow::Owned(lines.join("\n"));
|
|
}
|
|
}
|
|
|
|
// Replace timestamps with a constant string, so differences in timestamps are ignored
|
|
for index in 0.. {
|
|
// Performance optimization
|
|
let Some(byte) = chunk.as_bytes().get(index) else {
|
|
break;
|
|
};
|
|
if !byte.is_ascii_digit() {
|
|
continue;
|
|
}
|
|
|
|
// Check for this pattern: 0000-00-00 00:00:00
|
|
let Some((
|
|
&[a0, a1, a2, a3, b0, a4, a5, b1, a6, a7, b2, a8, a9, b3, a10, a11, b4, a12, a13],
|
|
remaining,
|
|
)) = chunk
|
|
.get(index..)
|
|
.and_then(|s| s.as_bytes().split_first_chunk::<PATTERN_LEN>())
|
|
else {
|
|
break;
|
|
};
|
|
|
|
if [a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13]
|
|
.into_iter()
|
|
.all(|byte| byte.is_ascii_digit())
|
|
&& [b0, b1, b2, b3, b4] == *b"-- ::"
|
|
{
|
|
// Replace the part of the string that has the checked pattern and an optional fractional part
|
|
let len_after = if let Some((b'.', s)) = remaining.split_first() {
|
|
1 + s.iter().position(|c| !c.is_ascii_digit()).unwrap_or(0)
|
|
} else {
|
|
0
|
|
};
|
|
// Length of replacement string is likely to match previous string
|
|
// (there's up to 6 digits after the decimal point)
|
|
chunk.to_mut().replace_range(
|
|
index..(index + PATTERN_LEN + len_after),
|
|
"AAAAAAAAAAAAAAAAAAAAAAAAAA",
|
|
);
|
|
}
|
|
}
|
|
|
|
chunk
|
|
}
|
|
|
|
fn sort_within_sections<T: Ord + ?Sized>(vec: &mut [&T], mut section: impl FnMut(&T) -> u8) {
|
|
vec.sort_unstable_by_key(|&i| (section(i), i));
|
|
}
|
|
|
|
fn chunks(dump: &str) -> impl Iterator<Item = &str> {
|
|
let mut remaining = dump;
|
|
std::iter::from_fn(move || {
|
|
remaining = remaining.trim_start();
|
|
while let Some(s) = remove_skipped_item_from_beginning(remaining) {
|
|
remaining = s.trim_start();
|
|
}
|
|
|
|
// `trim_start` guarantees that `result` is not empty
|
|
let (result, after_result) = remaining.split_once("\n\n")?;
|
|
remaining = after_result;
|
|
Some(result)
|
|
})
|
|
}
|
|
|
|
fn remove_skipped_item_from_beginning(s: &str) -> Option<&str> {
|
|
// Skip commented line
|
|
if let Some(after) = s.strip_prefix("--") {
|
|
Some(after_first_occurence(after, "\n"))
|
|
}
|
|
// Skip view definition that's replaced later (the first definition selects all nulls)
|
|
else if let Some(after) = s.strip_prefix("CREATE VIEW ") {
|
|
let (name, after_name) = after.split_once(' ').unwrap_or_default();
|
|
Some(after_first_occurence(after_name, "\n\n"))
|
|
.filter(|after_view| after_view.contains(&format!("\nCREATE OR REPLACE VIEW {name} ")))
|
|
} else {
|
|
None
|
|
}
|
|
}
|
|
|
|
fn after_first_occurence<'a>(s: &'a str, pat: &str) -> &'a str {
|
|
s.split_once(pat).unwrap_or_default().1
|
|
}
|