diff --git a/CHANGELOG.md b/CHANGELOG.md index 79cb37e..9ce1cbf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,8 @@ - `Patch::from_multiple` no longer returns an error on an input that contains no patches, including an empty string. It instead returns an empty vector. +- `Patch` now has an additional `binary` field indicating a "binary files differ" message. + ### Fixed - Issue #4: Fixed parsing of “No newline at end of file” markers so they are recognized even when not the final line of a hunk. diff --git a/src/ast.rs b/src/ast.rs index ae036c2..09ed367 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -14,25 +14,35 @@ pub struct Patch<'a> { pub new: File<'a>, /// hunks of differences; each hunk shows one area where the files differ pub hunks: Vec>, + /// If there was a `No newline at end of file` indicator after the last line of the old version of the file pub old_missing_newline: bool, /// If there was a `No newline at end of file` indicator after the last line of the new version of the file pub new_missing_newline: bool, + + /// True if this patch is between two binary files. + /// + /// For binary files, `hunks` is empty and the newline indicators are false. + pub binary: bool, } impl fmt::Display for Patch<'_> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { // Display implementations typically hold up the invariant that there is no trailing // newline. This isn't enforced, but it allows them to work well with `println!` - - write!(f, "--- {}", self.old)?; - write!(f, "\n+++ {}", self.new)?; - for (i, hunk) in self.hunks.iter().enumerate() { - writeln!(f)?; - if i == self.hunks.len() - 1 { - hunk.fmt(f, self.old_missing_newline, self.new_missing_newline)?; - } else { - hunk.fmt(f, false, false)?; + if self.binary { + assert!(self.hunks.is_empty()); + write!(f, "Binary files {} and {} differ", self.old, self.new)?; + } else { + writeln!(f, "--- {}", self.old)?; + write!(f, "+++ {}", self.new)?; + for (i, hunk) in self.hunks.iter().enumerate() { + writeln!(f)?; + if i == self.hunks.len() - 1 { + hunk.fmt(f, self.old_missing_newline, self.new_missing_newline)?; + } else { + hunk.fmt(f, false, false)?; + } } } Ok(()) diff --git a/src/parser.rs b/src/parser.rs index 2ca42c7..22ca43f 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -2,16 +2,19 @@ use std::borrow::Cow; use std::error::Error; use chrono::DateTime; +use nom::character::complete::{hex_digit1, space1}; +use nom::sequence::separated_pair; use nom::*; use nom::{ branch::alt, bytes::complete::{is_not, tag, take_until}, character::complete::{char, digit1, line_ending, none_of, not_line_ending, one_of}, - combinator::{all_consuming, map, map_opt, not, opt, verify}, + combinator::{all_consuming, map, map_opt, not, opt, recognize, verify}, error::context, multi::{many0, many1}, - sequence::{delimited, preceded, terminated, tuple}, + sequence::{delimited, pair, preceded, terminated, tuple}, }; +use nom_locate::LocatedSpan; use crate::ast::*; @@ -50,8 +53,8 @@ impl std::fmt::Display for ParseError<'_> { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { write!( f, - "Line {}: Error while parsing: {}", - self.line, self.fragment + "Line {}: Error while parsing: {:?}: {:?}", + self.line, self.fragment, self.kind ) } } @@ -99,10 +102,12 @@ fn multiple_patches(input: Input) -> IResult> { } fn patch(input: Input) -> IResult { - if let Ok(patch) = binary_files_differ(input) { + if let Ok(patch) = file_rename_only(input) { return Ok(patch); } - if let Ok(patch) = file_rename_only(input) { + let (input, _diff_command) = diff_command(input)?; + let (input, _git_index) = git_index_line(input)?; + if let Ok(patch) = binary_files_differ(input) { return Ok(patch); } let (input, files) = headers(input)?; @@ -126,6 +131,7 @@ fn patch(input: Input) -> IResult { hunks, old_missing_newline, new_missing_newline, + binary: false, }, )) } @@ -144,7 +150,7 @@ fn binary_files_differ(input: Input) -> IResult { .strip_suffix(" differ") .and_then(|s| s.split_once(" and ")) }), - line_ending, + many1(line_ending), ), )(input)?; Ok(( @@ -161,6 +167,7 @@ fn binary_files_differ(input: Input) -> IResult { hunks: Vec::new(), old_missing_newline: false, new_missing_newline: false, + binary: true, }, )) } @@ -192,10 +199,43 @@ fn file_rename_only(input: Input<'_>) -> IResult, Patch<'_>> { hunks: Vec::new(), old_missing_newline: false, new_missing_newline: false, + binary: false, }, )) } +/// Parse a diff command line from the input, if one is present. +/// +/// git inserts e.g. +/// `diff --git a/file1 b/file2` +fn diff_command(input: Input<'_>) -> IResult, Option>> { + context( + "diff command line", + opt(recognize(pair( + tag("diff "), + terminated(not_line_ending, opt(line_ending)), + ))), + )(input) +} + +/// Parse a git "index" line from the input, if one is present. +/// +/// e.g. `index 4805bf6..e807873 100644` +fn git_index_line(input: Input<'_>) -> IResult, Option>> { + // TODO: We could return the hashes, and maybe more usefully the unix file + // mode into the AST. + context( + "git index line", + opt(recognize(tuple(( + tag("index "), + separated_pair(hex_digit1, tag(".."), hex_digit1), + space1, + digit1, + opt(line_ending), + )))), + )(input) +} + // Header lines fn headers(input: Input) -> IResult { // Ignore any preamble lines in produced diffs @@ -766,6 +806,7 @@ mod tests { ], old_missing_newline: false, new_missing_newline: false, + binary: false, }; test_parser!(patch(sample) -> expected); diff --git a/tests/parse_samples.rs b/tests/parse_samples.rs index c062dea..36395cf 100644 --- a/tests/parse_samples.rs +++ b/tests/parse_samples.rs @@ -24,16 +24,14 @@ where // patch file. fn verify_patch_roundtrip(data: &str, path: &PathBuf) { let patches = Patch::from_multiple(data) - .unwrap_or_else(|err| panic!("failed to parse {:?}, error: {}", path, err)); + .unwrap_or_else(|err| panic!("failed to parse {path:?}, error: {err:?}")); - #[allow(clippy::format_collect)] // Display::fmt is the only way to resolve Patch->str + #[allow(clippy::format_collect)] // it's not performance-sensitive let patch_file: String = patches.iter().map(|patch| format!("{}\n", patch)).collect(); - println!("{}", patch_file); + println!("Emitting parsed file:\n{}", patch_file); + let patches2 = Patch::from_multiple(&patch_file).unwrap_or_else(|err| { - panic!( - "failed to re-parse {:?} after formatting, error: {}", - path, err - ) + panic!("failed to re-parse {path:?} after formatting, error: {err:?}",) }); assert_eq!(patches, patches2); } diff --git a/tests/regressions.rs b/tests/regressions.rs index f02afb7..f80d140 100644 --- a/tests/regressions.rs +++ b/tests/regressions.rs @@ -42,7 +42,8 @@ fn crlf_breaks_stuff_17() -> Result<(), ParseError<'static>> { lines: vec![Line::Context("x")], }], old_missing_newline: false, - new_missing_newline: false + new_missing_newline: false, + binary: false, } ); Ok(()) diff --git a/tests/wild-samples/gitbinary.patch b/tests/wild-samples/gitbinary.patch new file mode 100644 index 0000000..0afaf5f --- /dev/null +++ b/tests/wild-samples/gitbinary.patch @@ -0,0 +1,3 @@ +diff --git a/test-renderers/expected/renderers/fog-None-wgpu.png b/test-renderers/expected/renderers/fog-None-wgpu.png +index 616d6ea8..afd1b043 100644 +Binary files a/test-renderers/expected/renderers/fog-None-wgpu.png and b/test-renderers/expected/renderers/fog-None-wgpu.png differ diff --git a/tests/wild-samples/gitsmallbinary.patch b/tests/wild-samples/gitsmallbinary.patch new file mode 100644 index 0000000..975477b --- /dev/null +++ b/tests/wild-samples/gitsmallbinary.patch @@ -0,0 +1,3 @@ +diff --git a/bin b/bin +index 4805bf6..e807873 100644 +Binary files a/bin and b/bin differ