Skip to content

Commit 5c7e155

Browse files
Update GitHub pages site with latest diff.js
1 parent 0c1e5f9 commit 5c7e155

File tree

1 file changed

+105
-38
lines changed

1 file changed

+105
-38
lines changed

diff.js

Lines changed: 105 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -370,7 +370,7 @@
370370
}
371371
function trailingWs(string) {
372372
// Yes, this looks overcomplicated and dumb - why not replace the whole function with
373-
// return string match(/\s*$/)[0]
373+
// return string.match(/\s*$/)[0]
374374
// you ask? Because:
375375
// 1. the trap described at https://markamery.com/blog/quadratic-time-regexes/ would mean doing
376376
// this would cause this function to take O(n²) time in the worst case (specifically when
@@ -396,27 +396,29 @@
396396

397397
// Based on https://en.wikipedia.org/wiki/Latin_script_in_Unicode
398398
//
399-
// Ranges and exceptions:
400-
// Latin-1 Supplement, 0080–00FF
401-
// - U+00D7 × Multiplication sign
402-
// - U+00F7 ÷ Division sign
403-
// Latin Extended-A, 0100–017F
404-
// Latin Extended-B, 0180–024F
405-
// IPA Extensions, 0250–02AF
406-
// Spacing Modifier Letters, 02B0–02FF
407-
// - U+02C7 ˇ ˇ Caron
408-
// - U+02D8 ˘ ˘ Breve
409-
// - U+02D9 ˙ ˙ Dot Above
410-
// - U+02DA ˚ ˚ Ring Above
411-
// - U+02DB ˛ ˛ Ogonek
412-
// - U+02DC ˜ ˜ Small Tilde
413-
// - U+02DD ˝ ˝ Double Acute Accent
414-
// Latin Extended Additional, 1E00–1EFF
415-
const extendedWordChars = 'a-zA-Z0-9_\\u{C0}-\\u{FF}\\u{D8}-\\u{F6}\\u{F8}-\\u{2C6}\\u{2C8}-\\u{2D7}\\u{2DE}-\\u{2FF}\\u{1E00}-\\u{1EFF}';
399+
// Chars/ranges counted as "word" characters by this regex are as follows:
400+
//
401+
// + U+00AD Soft hyphen
402+
// + 00C0–00FF (letters with diacritics from the Latin-1 Supplement), except:
403+
// - U+00D7 × Multiplication sign
404+
// - U+00F7 ÷ Division sign
405+
// + Latin Extended-A, 0100–017F
406+
// + Latin Extended-B, 0180–024F
407+
// + IPA Extensions, 0250–02AF
408+
// + Spacing Modifier Letters, 02B0–02FF, except:
409+
// - U+02C7 ˇ ˇ Caron
410+
// - U+02D8 ˘ ˘ Breve
411+
// - U+02D9 ˙ ˙ Dot Above
412+
// - U+02DA ˚ ˚ Ring Above
413+
// - U+02DB ˛ ˛ Ogonek
414+
// - U+02DC ˜ ˜ Small Tilde
415+
// - U+02DD ˝ ˝ Double Acute Accent
416+
// + Latin Extended Additional, 1E00–1EFF
417+
const extendedWordChars = 'a-zA-Z0-9_\\u{AD}\\u{C0}-\\u{D6}\\u{D8}-\\u{F6}\\u{F8}-\\u{2C6}\\u{2C8}-\\u{2D7}\\u{2DE}-\\u{2FF}\\u{1E00}-\\u{1EFF}';
416418
// Each token is one of the following:
417419
// - A punctuation mark plus the surrounding whitespace
418420
// - A word plus the surrounding whitespace
419-
// - Pure whitespace (but only in the special case where this the entire text
421+
// - Pure whitespace (but only in the special case where the entire text
420422
// is just whitespace)
421423
//
422424
// We have to include surrounding whitespace in the tokens because the two
@@ -453,7 +455,25 @@
453455
if (segmenter.resolvedOptions().granularity != 'word') {
454456
throw new Error('The segmenter passed must have a granularity of "word"');
455457
}
456-
parts = Array.from(segmenter.segment(value), segment => segment.segment);
458+
// We want `parts` to be an array whose elements alternate between being
459+
// pure whitespace and being pure non-whitespace. This is ALMOST what the
460+
// segments returned by a word-based Intl.Segmenter already look like,
461+
// and therefore we can ALMOST get what we want by simply doing...
462+
// parts = Array.from(segmenter.segment(value), segment => segment.segment);
463+
// ... but not QUITE, because there's of one annoying special case: every
464+
// newline character gets its own segment, instead of sharing a segment
465+
// with other surrounding whitespace. We therefore need to manually merge
466+
// consecutive segments of whitespace into a single part:
467+
parts = [];
468+
for (const segmentObj of Array.from(segmenter.segment(value))) {
469+
const segment = segmentObj.segment;
470+
if (parts.length && (/\s/).test(parts[parts.length - 1]) && (/\s/).test(segment)) {
471+
parts[parts.length - 1] += segment;
472+
}
473+
else {
474+
parts.push(segment);
475+
}
476+
}
457477
}
458478
else {
459479
parts = value.match(tokenizeIncludingWhitespace) || [];
@@ -656,7 +676,7 @@
656676
class WordsWithSpaceDiff extends Diff {
657677
tokenize(value) {
658678
// Slightly different to the tokenizeIncludingWhitespace regex used above in
659-
// that this one treats each individual newline as a distinct tokens, rather
679+
// that this one treats each individual newline as a distinct token, rather
660680
// than merging them into other surrounding whitespace. This was requested
661681
// in https://github.com/kpdecker/jsdiff/issues/180 &
662682
// https://github.com/kpdecker/jsdiff/issues/211
@@ -957,10 +977,27 @@
957977
if ((/^(---|\+\+\+|@@)\s/).test(line)) {
958978
break;
959979
}
960-
// Diff index
961-
const header = (/^(?:Index:|diff(?: -r \w+)+)\s+(.+?)\s*$/).exec(line);
962-
if (header) {
963-
index.index = header[1];
980+
// Try to parse the line as a diff header, like
981+
// Index: README.md
982+
// or
983+
// diff -r 9117c6561b0b -r 273ce12ad8f1 .hgignore
984+
// or
985+
// Index: something with multiple words
986+
// and extract the filename (or whatever else is used as an index name)
987+
// from the end (i.e. 'README.md', '.hgignore', or
988+
// 'something with multiple words' in the examples above).
989+
//
990+
// TODO: It seems awkward that we indiscriminately trim off trailing
991+
// whitespace here. Theoretically, couldn't that be meaningful -
992+
// e.g. if the patch represents a diff of a file whose name ends
993+
// with a space? Seems wrong to nuke it.
994+
// But this behaviour has been around since v2.2.1 in 2015, so if
995+
// it's going to change, it should be done cautiously and in a new
996+
// major release, for backwards-compat reasons.
997+
// -- ExplodingCabbage
998+
const headerMatch = (/^(?:Index:|diff(?: -r \w+)+)\s+/).exec(line);
999+
if (headerMatch) {
1000+
index.index = line.substring(headerMatch[0].length).trim();
9641001
}
9651002
i++;
9661003
}
@@ -989,14 +1026,14 @@
9891026
// Parses the --- and +++ headers, if none are found, no lines
9901027
// are consumed.
9911028
function parseFileHeader(index) {
992-
const fileHeader = (/^(---|\+\+\+)\s+(.*)\r?$/).exec(diffstr[i]);
993-
if (fileHeader) {
994-
const data = fileHeader[2].split('\t', 2), header = (data[1] || '').trim();
1029+
const fileHeaderMatch = (/^(---|\+\+\+)\s+/).exec(diffstr[i]);
1030+
if (fileHeaderMatch) {
1031+
const prefix = fileHeaderMatch[1], data = diffstr[i].substring(3).trim().split('\t', 2), header = (data[1] || '').trim();
9951032
let fileName = data[0].replace(/\\\\/g, '\\');
996-
if ((/^".*"$/).test(fileName)) {
1033+
if (fileName.startsWith('"') && fileName.endsWith('"')) {
9971034
fileName = fileName.substr(1, fileName.length - 2);
9981035
}
999-
if (fileHeader[1] === '---') {
1036+
if (prefix === '---') {
10001037
index.oldFileName = fileName;
10011038
index.oldHeader = header;
10021039
}
@@ -1386,6 +1423,21 @@
13861423
}) });
13871424
}
13881425

1426+
const INCLUDE_HEADERS = {
1427+
includeIndex: true,
1428+
includeUnderline: true,
1429+
includeFileHeaders: true
1430+
};
1431+
const FILE_HEADERS_ONLY = {
1432+
includeIndex: false,
1433+
includeUnderline: false,
1434+
includeFileHeaders: true
1435+
};
1436+
const OMIT_HEADERS = {
1437+
includeIndex: false,
1438+
includeUnderline: false,
1439+
includeFileHeaders: false
1440+
};
13891441
function structuredPatch(oldFileName, newFileName, oldStr, newStr, oldHeader, newHeader, options) {
13901442
let optionsObj;
13911443
if (!options) {
@@ -1515,17 +1567,29 @@
15151567
* creates a unified diff patch.
15161568
* @param patch either a single structured patch object (as returned by `structuredPatch`) or an array of them (as returned by `parsePatch`)
15171569
*/
1518-
function formatPatch(patch) {
1570+
function formatPatch(patch, headerOptions) {
1571+
if (!headerOptions) {
1572+
headerOptions = INCLUDE_HEADERS;
1573+
}
15191574
if (Array.isArray(patch)) {
1520-
return patch.map(formatPatch).join('\n');
1575+
if (patch.length > 1 && !headerOptions.includeFileHeaders) {
1576+
throw new Error('Cannot omit file headers on a multi-file patch. '
1577+
+ '(The result would be unparseable; how would a tool trying to apply '
1578+
+ 'the patch know which changes are to which file?)');
1579+
}
1580+
return patch.map(p => formatPatch(p, headerOptions)).join('\n');
15211581
}
15221582
const ret = [];
1523-
if (patch.oldFileName == patch.newFileName) {
1583+
if (headerOptions.includeIndex && patch.oldFileName == patch.newFileName) {
15241584
ret.push('Index: ' + patch.oldFileName);
15251585
}
1526-
ret.push('===================================================================');
1527-
ret.push('--- ' + patch.oldFileName + (typeof patch.oldHeader === 'undefined' ? '' : '\t' + patch.oldHeader));
1528-
ret.push('+++ ' + patch.newFileName + (typeof patch.newHeader === 'undefined' ? '' : '\t' + patch.newHeader));
1586+
if (headerOptions.includeUnderline) {
1587+
ret.push('===================================================================');
1588+
}
1589+
if (headerOptions.includeFileHeaders) {
1590+
ret.push('--- ' + patch.oldFileName + (typeof patch.oldHeader === 'undefined' ? '' : '\t' + patch.oldHeader));
1591+
ret.push('+++ ' + patch.newFileName + (typeof patch.newHeader === 'undefined' ? '' : '\t' + patch.newHeader));
1592+
}
15291593
for (let i = 0; i < patch.hunks.length; i++) {
15301594
const hunk = patch.hunks[i];
15311595
// Unified Diff Format quirk: If the chunk size is 0,
@@ -1555,7 +1619,7 @@
15551619
if (!patchObj) {
15561620
return;
15571621
}
1558-
return formatPatch(patchObj);
1622+
return formatPatch(patchObj, options === null || options === void 0 ? void 0 : options.headerOptions);
15591623
}
15601624
else {
15611625
const { callback } = options;
@@ -1564,7 +1628,7 @@
15641628
callback(undefined);
15651629
}
15661630
else {
1567-
callback(formatPatch(patchObj));
1631+
callback(formatPatch(patchObj, options.headerOptions));
15681632
}
15691633
} }));
15701634
}
@@ -1642,6 +1706,9 @@
16421706
}
16431707

16441708
exports.Diff = Diff;
1709+
exports.FILE_HEADERS_ONLY = FILE_HEADERS_ONLY;
1710+
exports.INCLUDE_HEADERS = INCLUDE_HEADERS;
1711+
exports.OMIT_HEADERS = OMIT_HEADERS;
16451712
exports.applyPatch = applyPatch;
16461713
exports.applyPatches = applyPatches;
16471714
exports.arrayDiff = arrayDiff;

0 commit comments

Comments
 (0)