diff --git a/unicodetools/src/main/java/org/unicode/props/PropertyParsingInfo.java b/unicodetools/src/main/java/org/unicode/props/PropertyParsingInfo.java index ec50fdb3e..44ff974c3 100644 --- a/unicodetools/src/main/java/org/unicode/props/PropertyParsingInfo.java +++ b/unicodetools/src/main/java/org/unicode/props/PropertyParsingInfo.java @@ -1642,6 +1642,42 @@ private static void parseFields( // 21EA..21F3;;⇪..⇳;;;; 21EA-21F3 are keyboard value = "None"; } + if (line.getParts().length == 3 + && (propInfo.property == UcdProperty.Block + || propInfo.property == UcdProperty.Pretty_Block)) { + // The old Blocks files had First; Last; Block. + IntRange range = new IntRange(); + range.start = Utility.codePointFromHex(line.getParts()[0]); + range.end = Utility.codePointFromHex(line.getParts()[1]); + // Unicode 2 puts FEFF both in Arabic Presentation Forms-B and in Specials. + // We are not going to make Block multivalued for that, so we let the second + // assignment win. + // This fits with assignments in Unicode 2.1.4..3.1.1 where + // Arabic Presentation Forms-B ended on FEFE and Specials was a + // split Block of FEFF & FFF0..FFFD. + // Since Unicode 3.2, blocks were contiguous xxx0..yyyF: + // https://www.unicode.org/reports/tr28/tr28-3.html#database + // The normative blocks defined in Blocks.txt have been adjusted slightly, + // in accordance with Unicode Technical Committee decisions. + // - Every block starts and ends on a column boundary. + // That is, the last digit of the first code point in the block is always 0, + // and the last digit of the final code point in the block is always F. + // - Every block is contiguous. [...] + propInfo.put( + data, + line.getMissingSet(), + range, + line.getParts()[2], + indexUnicodeProperties.ucdVersion.getMajor() == 2 + ? new PropertyUtilities.Overrider() + : null, + false, + nextProperties == null + ? null + : nextProperties.getProperty(propInfo.property), + indexUnicodeProperties.getUcdVersion()); + continue; + } if (propInfo.getFieldMapping(indexUnicodeProperties.ucdVersion).keyField == 0) { propInfo.put( data, @@ -1725,36 +1761,7 @@ private static void parseSimpleFieldFile( } } Merge merger = null; - if (line.getParts().length == 3 && propInfo.property == UcdProperty.Block) { - // The old Blocks files had First; Last; Block. - IntRange range = new IntRange(); - range.start = Utility.codePointFromHex(line.getParts()[0]); - range.end = Utility.codePointFromHex(line.getParts()[1]); - // Unicode 2 puts FEFF both in Arabic Presentation Forms-B and in Specials. - // We are not going to make Block multivalued for that, so we let the second - // assignment win. - // This fits with assignments in Unicode 2.1.4..3.1.1 where - // Arabic Presentation Forms-B ended on FEFE and Specials was a - // split Block of FEFF & FFF0..FFFD. - // Since Unicode 3.2, blocks were contiguous xxx0..yyyF: - // https://www.unicode.org/reports/tr28/tr28-3.html#database - // The normative blocks defined in Blocks.txt have been adjusted slightly, - // in accordance with Unicode Technical Committee decisions. - // - Every block starts and ends on a column boundary. - // That is, the last digit of the first code point in the block is always 0, - // and the last digit of the final code point in the block is always F. - // - Every block is contiguous. [...] - propInfo.put( - data, - line.getMissingSet(), - range, - line.getParts()[2], - version.getMajor() == 2 ? new PropertyUtilities.Overrider() : null, - false, - nextVersion, - indexUnicodeProperties.getUcdVersion()); - continue; - } else if (propInfo.property == UcdProperty.Numeric_Value) { + if (propInfo.property == UcdProperty.Numeric_Value) { String extractedValue = line.getParts()[1]; for (int cp = line.getRange().start; cp <= line.getRange().end; ++cp) { String unicodeDataValue = diff --git a/unicodetools/src/main/java/org/unicode/props/UcdProperty.java b/unicodetools/src/main/java/org/unicode/props/UcdProperty.java index 0760983d6..08b2b4a11 100644 --- a/unicodetools/src/main/java/org/unicode/props/UcdProperty.java +++ b/unicodetools/src/main/java/org/unicode/props/UcdProperty.java @@ -199,6 +199,7 @@ public enum UcdProperty { PropertyType.Miscellaneous, DerivedPropertyStatus.UCDNonProperty, "Names_List_Subheader_Notice"), + Pretty_Block(PropertyType.Miscellaneous, DerivedPropertyStatus.UCDNonProperty, "Pretty_Block"), Standardized_Variant( PropertyType.Miscellaneous, DerivedPropertyStatus.UCDNonProperty, diff --git a/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java b/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java index 030fce126..7aaf4b121 100644 --- a/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java +++ b/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java @@ -2214,6 +2214,7 @@ public static Other_Joining_Type_Values forName(String name) { } } + // Pretty_Block public enum RGI_Emoji_Qualification_Values implements Named { None("None"), Fully_Qualified("FQE"), diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/TestUnicodeInvariants.java b/unicodetools/src/main/java/org/unicode/text/UCD/TestUnicodeInvariants.java index 82f7f5350..bf04edf3b 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/TestUnicodeInvariants.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/TestUnicodeInvariants.java @@ -483,7 +483,7 @@ private static void propertywiseAlikeLine( final var iup = IndexUnicodeProperties.make(Settings.latestVersion); final List errorMessageLines = new ArrayList<>(); for (var p : UcdProperty.values()) { - if (p.name().startsWith("Names_List_")) { + if (p.name().startsWith("Names_List_") || p == UcdProperty.Pretty_Block) { continue; } final var property = iup.getProperty(p); @@ -654,7 +654,7 @@ public ExpectedPropertyDifference(String actualValueAlias, String referenceValue } while (Lookahead.oneToken(pp, source).accept(",")); } for (var p : UcdProperty.values()) { - if (p.name().startsWith("Names_List_")) { + if (p.name().startsWith("Names_List_") || p == UcdProperty.Pretty_Block) { continue; } final var property = iup.getProperty(p); diff --git a/unicodetools/src/main/resources/org/unicode/props/ExtraPropertyAliases.txt b/unicodetools/src/main/resources/org/unicode/props/ExtraPropertyAliases.txt index 3def1d830..a86cec2d3 100644 --- a/unicodetools/src/main/resources/org/unicode/props/ExtraPropertyAliases.txt +++ b/unicodetools/src/main/resources/org/unicode/props/ExtraPropertyAliases.txt @@ -96,6 +96,12 @@ Link_Bracket ; Link_Bracket ; NonUCDProperty # Miscellaneous Properties # ================================================ +# The spelling of the block name in Blocks.txt, which is not exactly the +# matching one in PropertyValueAliases.txt (the latter has _ instead of both +# spaces and hyphens, and they also differ in case, e.g. Greek_And_Coptic vs. +# Greek and Coptic.). +Pretty_Block ; Pretty_Block ; UCDNonProperty + Arabic_Shaping_Schematic_Name ; Arabic_Shaping_Schematic_Name ; UCDNonProperty CJKR ; CJK_Radical ; UCDNonProperty diff --git a/unicodetools/src/main/resources/org/unicode/props/IndexUnicodeProperties.txt b/unicodetools/src/main/resources/org/unicode/props/IndexUnicodeProperties.txt index 93557469c..e740392ad 100644 --- a/unicodetools/src/main/resources/org/unicode/props/IndexUnicodeProperties.txt +++ b/unicodetools/src/main/resources/org/unicode/props/IndexUnicodeProperties.txt @@ -125,6 +125,7 @@ DerivedJoiningType; Joining_Type; 1 ArabicShaping; Joining_Group; 3 BidiMirroring; Bidi_Mirroring_Glyph; Blocks ; Block +Blocks ; Pretty_Block CompositionExclusions ; Composition_Exclusion DerivedAge ; Age EastAsianWidth ; East_Asian_Width diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt index e76eb8a96..6a0cca00e 100644 --- a/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt +++ b/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt @@ -1538,6 +1538,10 @@ In \P{U6.0:Math_Class_Ex=None}, U6.0:Math_Class = U6.0:Math_Class_Ex \p{U6.1:Math_Class=None} ⊂ \p{U6.1:Math_Class_Ex=None} \p{U6.0:Math_Class=None} ⊂ \p{U6.0:Math_Class_Ex=None} +# Pretty_Block is just Block, but prettier. + +OnPairsOf $code_points, EqualityOf Block ⇔ EqualityOf Pretty_Block + # Basic Propertywise tests. Ignoring Name: diff --git a/unicodetools/src/test/java/org/unicode/propstest/TestInvariants.java b/unicodetools/src/test/java/org/unicode/propstest/TestInvariants.java index eb1ef4e4e..12b4553a5 100644 --- a/unicodetools/src/test/java/org/unicode/propstest/TestInvariants.java +++ b/unicodetools/src/test/java/org/unicode/propstest/TestInvariants.java @@ -149,6 +149,7 @@ public void TestUniformUnassigned() { UcdProperty.NFKC_Simple_Casefold, UcdProperty.Age, UcdProperty.Block, + UcdProperty.Pretty_Block, UcdProperty.Bidi_Class, UcdProperty.East_Asian_Width, UcdProperty.Grapheme_Cluster_Break, @@ -166,8 +167,10 @@ public void TestUniformUnassigned() { UcdProperty.Names_List_Subheader_Notice)); exceptions.putAll( General_Category_Values.Private_Use, - Arrays.asList(UcdProperty.Age, UcdProperty.Block)); - exceptions.put(General_Category_Values.Surrogate, UcdProperty.Block); + Arrays.asList(UcdProperty.Age, UcdProperty.Block, UcdProperty.Pretty_Block)); + exceptions.putAll( + General_Category_Values.Surrogate, + Arrays.asList(UcdProperty.Block, UcdProperty.Pretty_Block)); List ordered = new ArrayList<>(); // ordered.add(UcdProperty.Bidi_Class);