|
370 | 370 | } |
371 | 371 | function trailingWs(string) { |
372 | 372 | // Yes, this looks overcomplicated and dumb - why not replace the whole function with |
373 | | - // return string match(/\s*$/)[0] |
| 373 | + // return string.match(/\s*$/)[0] |
374 | 374 | // you ask? Because: |
375 | 375 | // 1. the trap described at https://markamery.com/blog/quadratic-time-regexes/ would mean doing |
376 | 376 | // this would cause this function to take O(n²) time in the worst case (specifically when |
|
396 | 396 |
|
397 | 397 | // Based on https://en.wikipedia.org/wiki/Latin_script_in_Unicode |
398 | 398 | // |
399 | | - // Ranges and exceptions: |
400 | | - // Latin-1 Supplement, 0080–00FF |
401 | | - // - U+00D7 × Multiplication sign |
402 | | - // - U+00F7 ÷ Division sign |
403 | | - // Latin Extended-A, 0100–017F |
404 | | - // Latin Extended-B, 0180–024F |
405 | | - // IPA Extensions, 0250–02AF |
406 | | - // Spacing Modifier Letters, 02B0–02FF |
407 | | - // - U+02C7 ˇ ˇ Caron |
408 | | - // - U+02D8 ˘ ˘ Breve |
409 | | - // - U+02D9 ˙ ˙ Dot Above |
410 | | - // - U+02DA ˚ ˚ Ring Above |
411 | | - // - U+02DB ˛ ˛ Ogonek |
412 | | - // - U+02DC ˜ ˜ Small Tilde |
413 | | - // - U+02DD ˝ ˝ Double Acute Accent |
414 | | - // Latin Extended Additional, 1E00–1EFF |
415 | | - const extendedWordChars = 'a-zA-Z0-9_\\u{C0}-\\u{FF}\\u{D8}-\\u{F6}\\u{F8}-\\u{2C6}\\u{2C8}-\\u{2D7}\\u{2DE}-\\u{2FF}\\u{1E00}-\\u{1EFF}'; |
| 399 | + // Chars/ranges counted as "word" characters by this regex are as follows: |
| 400 | + // |
| 401 | + // + U+00AD Soft hyphen |
| 402 | + // + 00C0–00FF (letters with diacritics from the Latin-1 Supplement), except: |
| 403 | + // - U+00D7 × Multiplication sign |
| 404 | + // - U+00F7 ÷ Division sign |
| 405 | + // + Latin Extended-A, 0100–017F |
| 406 | + // + Latin Extended-B, 0180–024F |
| 407 | + // + IPA Extensions, 0250–02AF |
| 408 | + // + Spacing Modifier Letters, 02B0–02FF, except: |
| 409 | + // - U+02C7 ˇ ˇ Caron |
| 410 | + // - U+02D8 ˘ ˘ Breve |
| 411 | + // - U+02D9 ˙ ˙ Dot Above |
| 412 | + // - U+02DA ˚ ˚ Ring Above |
| 413 | + // - U+02DB ˛ ˛ Ogonek |
| 414 | + // - U+02DC ˜ ˜ Small Tilde |
| 415 | + // - U+02DD ˝ ˝ Double Acute Accent |
| 416 | + // + Latin Extended Additional, 1E00–1EFF |
| 417 | + const extendedWordChars = 'a-zA-Z0-9_\\u{AD}\\u{C0}-\\u{D6}\\u{D8}-\\u{F6}\\u{F8}-\\u{2C6}\\u{2C8}-\\u{2D7}\\u{2DE}-\\u{2FF}\\u{1E00}-\\u{1EFF}'; |
416 | 418 | // Each token is one of the following: |
417 | 419 | // - A punctuation mark plus the surrounding whitespace |
418 | 420 | // - A word plus the surrounding whitespace |
419 | | - // - Pure whitespace (but only in the special case where this the entire text |
| 421 | + // - Pure whitespace (but only in the special case where the entire text |
420 | 422 | // is just whitespace) |
421 | 423 | // |
422 | 424 | // We have to include surrounding whitespace in the tokens because the two |
|
453 | 455 | if (segmenter.resolvedOptions().granularity != 'word') { |
454 | 456 | throw new Error('The segmenter passed must have a granularity of "word"'); |
455 | 457 | } |
456 | | - parts = Array.from(segmenter.segment(value), segment => segment.segment); |
| 458 | + // We want `parts` to be an array whose elements alternate between being |
| 459 | + // pure whitespace and being pure non-whitespace. This is ALMOST what the |
| 460 | + // segments returned by a word-based Intl.Segmenter already look like, |
| 461 | + // and therefore we can ALMOST get what we want by simply doing... |
| 462 | + // parts = Array.from(segmenter.segment(value), segment => segment.segment); |
| 463 | + // ... but not QUITE, because there's of one annoying special case: every |
| 464 | + // newline character gets its own segment, instead of sharing a segment |
| 465 | + // with other surrounding whitespace. We therefore need to manually merge |
| 466 | + // consecutive segments of whitespace into a single part: |
| 467 | + parts = []; |
| 468 | + for (const segmentObj of Array.from(segmenter.segment(value))) { |
| 469 | + const segment = segmentObj.segment; |
| 470 | + if (parts.length && (/\s/).test(parts[parts.length - 1]) && (/\s/).test(segment)) { |
| 471 | + parts[parts.length - 1] += segment; |
| 472 | + } |
| 473 | + else { |
| 474 | + parts.push(segment); |
| 475 | + } |
| 476 | + } |
457 | 477 | } |
458 | 478 | else { |
459 | 479 | parts = value.match(tokenizeIncludingWhitespace) || []; |
|
656 | 676 | class WordsWithSpaceDiff extends Diff { |
657 | 677 | tokenize(value) { |
658 | 678 | // Slightly different to the tokenizeIncludingWhitespace regex used above in |
659 | | - // that this one treats each individual newline as a distinct tokens, rather |
| 679 | + // that this one treats each individual newline as a distinct token, rather |
660 | 680 | // than merging them into other surrounding whitespace. This was requested |
661 | 681 | // in https://github.com/kpdecker/jsdiff/issues/180 & |
662 | 682 | // https://github.com/kpdecker/jsdiff/issues/211 |
|
957 | 977 | if ((/^(---|\+\+\+|@@)\s/).test(line)) { |
958 | 978 | break; |
959 | 979 | } |
960 | | - // Diff index |
961 | | - const header = (/^(?:Index:|diff(?: -r \w+)+)\s+(.+?)\s*$/).exec(line); |
962 | | - if (header) { |
963 | | - index.index = header[1]; |
| 980 | + // Try to parse the line as a diff header, like |
| 981 | + // Index: README.md |
| 982 | + // or |
| 983 | + // diff -r 9117c6561b0b -r 273ce12ad8f1 .hgignore |
| 984 | + // or |
| 985 | + // Index: something with multiple words |
| 986 | + // and extract the filename (or whatever else is used as an index name) |
| 987 | + // from the end (i.e. 'README.md', '.hgignore', or |
| 988 | + // 'something with multiple words' in the examples above). |
| 989 | + // |
| 990 | + // TODO: It seems awkward that we indiscriminately trim off trailing |
| 991 | + // whitespace here. Theoretically, couldn't that be meaningful - |
| 992 | + // e.g. if the patch represents a diff of a file whose name ends |
| 993 | + // with a space? Seems wrong to nuke it. |
| 994 | + // But this behaviour has been around since v2.2.1 in 2015, so if |
| 995 | + // it's going to change, it should be done cautiously and in a new |
| 996 | + // major release, for backwards-compat reasons. |
| 997 | + // -- ExplodingCabbage |
| 998 | + const headerMatch = (/^(?:Index:|diff(?: -r \w+)+)\s+/).exec(line); |
| 999 | + if (headerMatch) { |
| 1000 | + index.index = line.substring(headerMatch[0].length).trim(); |
964 | 1001 | } |
965 | 1002 | i++; |
966 | 1003 | } |
|
989 | 1026 | // Parses the --- and +++ headers, if none are found, no lines |
990 | 1027 | // are consumed. |
991 | 1028 | function parseFileHeader(index) { |
992 | | - const fileHeader = (/^(---|\+\+\+)\s+(.*)\r?$/).exec(diffstr[i]); |
993 | | - if (fileHeader) { |
994 | | - const data = fileHeader[2].split('\t', 2), header = (data[1] || '').trim(); |
| 1029 | + const fileHeaderMatch = (/^(---|\+\+\+)\s+/).exec(diffstr[i]); |
| 1030 | + if (fileHeaderMatch) { |
| 1031 | + const prefix = fileHeaderMatch[1], data = diffstr[i].substring(3).trim().split('\t', 2), header = (data[1] || '').trim(); |
995 | 1032 | let fileName = data[0].replace(/\\\\/g, '\\'); |
996 | | - if ((/^".*"$/).test(fileName)) { |
| 1033 | + if (fileName.startsWith('"') && fileName.endsWith('"')) { |
997 | 1034 | fileName = fileName.substr(1, fileName.length - 2); |
998 | 1035 | } |
999 | | - if (fileHeader[1] === '---') { |
| 1036 | + if (prefix === '---') { |
1000 | 1037 | index.oldFileName = fileName; |
1001 | 1038 | index.oldHeader = header; |
1002 | 1039 | } |
|
1386 | 1423 | }) }); |
1387 | 1424 | } |
1388 | 1425 |
|
| 1426 | + const INCLUDE_HEADERS = { |
| 1427 | + includeIndex: true, |
| 1428 | + includeUnderline: true, |
| 1429 | + includeFileHeaders: true |
| 1430 | + }; |
| 1431 | + const FILE_HEADERS_ONLY = { |
| 1432 | + includeIndex: false, |
| 1433 | + includeUnderline: false, |
| 1434 | + includeFileHeaders: true |
| 1435 | + }; |
| 1436 | + const OMIT_HEADERS = { |
| 1437 | + includeIndex: false, |
| 1438 | + includeUnderline: false, |
| 1439 | + includeFileHeaders: false |
| 1440 | + }; |
1389 | 1441 | function structuredPatch(oldFileName, newFileName, oldStr, newStr, oldHeader, newHeader, options) { |
1390 | 1442 | let optionsObj; |
1391 | 1443 | if (!options) { |
|
1515 | 1567 | * creates a unified diff patch. |
1516 | 1568 | * @param patch either a single structured patch object (as returned by `structuredPatch`) or an array of them (as returned by `parsePatch`) |
1517 | 1569 | */ |
1518 | | - function formatPatch(patch) { |
| 1570 | + function formatPatch(patch, headerOptions) { |
| 1571 | + if (!headerOptions) { |
| 1572 | + headerOptions = INCLUDE_HEADERS; |
| 1573 | + } |
1519 | 1574 | if (Array.isArray(patch)) { |
1520 | | - return patch.map(formatPatch).join('\n'); |
| 1575 | + if (patch.length > 1 && !headerOptions.includeFileHeaders) { |
| 1576 | + throw new Error('Cannot omit file headers on a multi-file patch. ' |
| 1577 | + + '(The result would be unparseable; how would a tool trying to apply ' |
| 1578 | + + 'the patch know which changes are to which file?)'); |
| 1579 | + } |
| 1580 | + return patch.map(p => formatPatch(p, headerOptions)).join('\n'); |
1521 | 1581 | } |
1522 | 1582 | const ret = []; |
1523 | | - if (patch.oldFileName == patch.newFileName) { |
| 1583 | + if (headerOptions.includeIndex && patch.oldFileName == patch.newFileName) { |
1524 | 1584 | ret.push('Index: ' + patch.oldFileName); |
1525 | 1585 | } |
1526 | | - ret.push('==================================================================='); |
1527 | | - ret.push('--- ' + patch.oldFileName + (typeof patch.oldHeader === 'undefined' ? '' : '\t' + patch.oldHeader)); |
1528 | | - ret.push('+++ ' + patch.newFileName + (typeof patch.newHeader === 'undefined' ? '' : '\t' + patch.newHeader)); |
| 1586 | + if (headerOptions.includeUnderline) { |
| 1587 | + ret.push('==================================================================='); |
| 1588 | + } |
| 1589 | + if (headerOptions.includeFileHeaders) { |
| 1590 | + ret.push('--- ' + patch.oldFileName + (typeof patch.oldHeader === 'undefined' ? '' : '\t' + patch.oldHeader)); |
| 1591 | + ret.push('+++ ' + patch.newFileName + (typeof patch.newHeader === 'undefined' ? '' : '\t' + patch.newHeader)); |
| 1592 | + } |
1529 | 1593 | for (let i = 0; i < patch.hunks.length; i++) { |
1530 | 1594 | const hunk = patch.hunks[i]; |
1531 | 1595 | // Unified Diff Format quirk: If the chunk size is 0, |
|
1555 | 1619 | if (!patchObj) { |
1556 | 1620 | return; |
1557 | 1621 | } |
1558 | | - return formatPatch(patchObj); |
| 1622 | + return formatPatch(patchObj, options === null || options === void 0 ? void 0 : options.headerOptions); |
1559 | 1623 | } |
1560 | 1624 | else { |
1561 | 1625 | const { callback } = options; |
|
1564 | 1628 | callback(undefined); |
1565 | 1629 | } |
1566 | 1630 | else { |
1567 | | - callback(formatPatch(patchObj)); |
| 1631 | + callback(formatPatch(patchObj, options.headerOptions)); |
1568 | 1632 | } |
1569 | 1633 | } })); |
1570 | 1634 | } |
|
1642 | 1706 | } |
1643 | 1707 |
|
1644 | 1708 | exports.Diff = Diff; |
| 1709 | + exports.FILE_HEADERS_ONLY = FILE_HEADERS_ONLY; |
| 1710 | + exports.INCLUDE_HEADERS = INCLUDE_HEADERS; |
| 1711 | + exports.OMIT_HEADERS = OMIT_HEADERS; |
1645 | 1712 | exports.applyPatch = applyPatch; |
1646 | 1713 | exports.applyPatches = applyPatches; |
1647 | 1714 | exports.arrayDiff = arrayDiff; |
|
0 commit comments