-
Notifications
You must be signed in to change notification settings - Fork 16
Feature/add inline flags #34
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
797f157
507971f
83e573e
30f0342
58594eb
6c2e804
d78992c
ce46ec1
43c3443
fb2ff5b
9236c84
92d9c6e
fa5858c
12f24ab
e44b940
a690981
b326621
0534cd3
48a529f
38dee25
09af9a5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -300,6 +300,123 @@ class NegativeLookbehind(ParensSyntax): | |
| Lookbehind.INVERTED = NegativeLookbehind | ||
|
|
||
|
|
||
| class InlineFlag(namedtuple("InlineFlag", ["flag_character", "name", "sub"]), Asm): | ||
| A = "a" | ||
| L = "L" | ||
| U = "u" | ||
| I = "i" | ||
| M = "m" | ||
| S = "s" | ||
| regex_to_kleenexp = { | ||
| A: "ascii_only", | ||
| L: "locale_dependent", | ||
| U: "unicode", | ||
| I: "ignore_case", | ||
| M: "multiline", | ||
| S: "any_matches_all", | ||
| } | ||
| UNSET = "unset" | ||
| UNSETTABLE = set([I, M, S]) | ||
| INCOMPATIBLE = set([A, L, U]) | ||
| # Unicode flag CANNOT be used in bytes-like, | ||
| # locale_dependent can ONLY be used in bytes-like. | ||
| # This value is being set before compilation start. | ||
| PATTERN_IS_BYTES_LIKE = False | ||
|
|
||
| def to_regex(self, flavor, capture_names, wrap=False): | ||
| # Here we break the tree structure, using this specialized | ||
| # function to create a flagging expression for the rest of | ||
| # the subtree. The only flag in a flagging sequence that | ||
| # reaches the to_regex() method is the first one in the | ||
| # sequence (there may be other sequences). This is necessary | ||
| # because the parenthesis wrapping and regex legality are | ||
| # dependent on the whole flagging expression. | ||
|
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If we drop all the validations, there is always the option to simply compile |
||
| return self.flag_sub_expression(flavor, capture_names, wrap) | ||
|
|
||
| def flag_sub_expression(self, flavor, capture_names, wrap): | ||
| # true_flags is a dictionary instead of a list to allow | ||
| # overriding previous flag declarations - for example: | ||
| # 'ignore_case ignore_case:unset' should give '(?-i:)' | ||
| # rather than '(?i-i:)' (this behaviour might be replaced | ||
| # with an error in the future). | ||
| true_flags = {self.flag_character: self} | ||
| curr = self.sub | ||
| while isinstance(curr, InlineFlag): | ||
| true_flags[curr.flag_character] = curr | ||
| curr = curr.sub | ||
| setting = [] | ||
| unsetting = [] | ||
| flags = list(true_flags.values()) | ||
| got_one_incompatible = False | ||
| for flag in flags: | ||
| self.validate_parameter_is_valid(flag) | ||
| self.validate_unset_flag_is_unsettable(flag) | ||
| self.validate_bytes_like_against_unicode_or_locale_only(flag) | ||
| got_one_incompatible = self.validate_no_incompatibles_together( | ||
| flag, got_one_incompatible | ||
| ) | ||
| if flag.name == InlineFlag.UNSET: | ||
| unsetting.append(flag) | ||
| else: | ||
| setting.append(flag) | ||
|
|
||
| setting_str = "".join([flag.flag_character for flag in setting]) | ||
| unsetting_str = ( | ||
| "-" + "".join([flag.flag_character for flag in unsetting]) | ||
| if len(unsetting) > 0 | ||
| else "" | ||
| ) | ||
| return f"(?{setting_str}{unsetting_str}:{curr.to_regex(flavor, capture_names, wrap)})" | ||
|
|
||
| def validate_parameter_is_valid(self, flag): | ||
| if flag.name not in [InlineFlag.UNSET, None]: | ||
| raise CompileError( | ||
| f"Unrecognized token passed to flag: " | ||
| f"{InlineFlag.regex_to_kleenexp[self.flag_character]} does not accept {self.name}" | ||
| ) | ||
|
|
||
| def validate_unset_flag_is_unsettable(self, flag): | ||
| if flag.name == InlineFlag.UNSET: | ||
| if flag.flag_character not in InlineFlag.UNSETTABLE: | ||
| raise CompileError( | ||
| f"Unsetting not supported for this flag: " | ||
| f"{InlineFlag.regex_to_kleenexp[self.flag_character]}" | ||
| ) | ||
|
|
||
| def validate_bytes_like_against_unicode_or_locale_only(self, flag): | ||
| if InlineFlag.PATTERN_IS_BYTES_LIKE and flag.flag_character == InlineFlag.U: | ||
| raise CompileError( | ||
| f"Cannot use {InlineFlag.regex_to_kleenexp[flag.flag_character]} flag " | ||
| f"with a bytes pattern" | ||
| ) | ||
| if not InlineFlag.PATTERN_IS_BYTES_LIKE and flag.flag_character == InlineFlag.L: | ||
| raise CompileError( | ||
| f"Can only use {InlineFlag.regex_to_kleenexp[flag.flag_character]} flag " | ||
| f"with a bytes pattern" | ||
| ) | ||
|
|
||
| def validate_no_incompatibles_together(self, flag, got_one_incompatible): | ||
| """ | ||
| Raises CompileError if found second incompatible, otherwise returns | ||
| true if found the first. | ||
| """ | ||
| if flag.flag_character in InlineFlag.INCOMPATIBLE: | ||
| if got_one_incompatible: | ||
| raise CompileError( | ||
| f"{InlineFlag.regex_to_kleenexp[InlineFlag.A]}, " | ||
| f"{InlineFlag.regex_to_kleenexp[InlineFlag.L]} and " | ||
| f"{InlineFlag.regex_to_kleenexp[InlineFlag.U]} are " | ||
| f"incompatible and cannot be used in the same flag " | ||
| f"setting expression" | ||
| ) | ||
| else: | ||
| return True | ||
| return False | ||
|
|
||
| def is_empty(self): | ||
| return self.sub.is_empty() | ||
|
|
||
|
|
||
| class Setting(namedtuple("Setting", ["setting", "sub"]), Asm): | ||
| def to_regex(self, flavor, capture_names, wrap=False): | ||
| if not self.setting: | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -13,6 +13,7 @@ | |
| START_LINE, | ||
| START_STRING, | ||
| WORD_BOUNDARY, | ||
| InlineFlag, | ||
| ) | ||
| from ke._errors import CompileError | ||
|
|
||
|
|
@@ -125,3 +126,113 @@ def test_boundary(): | |
| assert assemble(START_LINE) == r"^" | ||
| assert assemble(START_STRING) == r"\A" | ||
| assert assemble(WORD_BOUNDARY) == r"\b" | ||
|
|
||
|
|
||
| def test_inline_flags(): | ||
| save_PATTERN_IS_BYTES_LIKE = InlineFlag.PATTERN_IS_BYTES_LIKE | ||
| assert assemble(InlineFlag("a", None, Literal("test"))) == r"(?a:test)" | ||
| InlineFlag.PATTERN_IS_BYTES_LIKE = True | ||
| assert assemble(InlineFlag("L", None, Literal("test"))) == r"(?L:test)" | ||
| InlineFlag.PATTERN_IS_BYTES_LIKE = False | ||
| assert assemble(InlineFlag("u", None, Literal("test"))) == r"(?u:test)" | ||
| assert assemble(InlineFlag("i", None, Literal("test"))) == r"(?i:test)" | ||
| assert assemble(InlineFlag("m", None, Literal("test"))) == r"(?m:test)" | ||
| assert assemble(InlineFlag("s", None, Literal("test"))) == r"(?s:test)" | ||
| assert assemble(InlineFlag("i", "unset", Literal("test"))) == r"(?-i:test)" | ||
| assert assemble(InlineFlag("m", "unset", Literal("test"))) == r"(?-m:test)" | ||
| assert assemble(InlineFlag("s", "unset", Literal("test"))) == r"(?-s:test)" | ||
| assert ( | ||
| assemble(InlineFlag("s", None, InlineFlag("s", None, Literal("test")))) | ||
| == r"(?s:test)" | ||
| ) | ||
| assert ( | ||
| assemble(InlineFlag("s", "unset", InlineFlag("s", "unset", Literal("test")))) | ||
| == r"(?-s:test)" | ||
| ) | ||
| with pytest.raises(CompileError): | ||
| assemble(InlineFlag("a", "unset", Literal("test"))) | ||
| InlineFlag.PATTERN_IS_BYTES_LIKE = True | ||
| with pytest.raises(CompileError): | ||
| assemble(InlineFlag("L", "unset", Literal("test"))) | ||
| InlineFlag.PATTERN_IS_BYTES_LIKE = False | ||
| with pytest.raises(CompileError): | ||
| assemble(InlineFlag("u", "unset", Literal("test"))) | ||
| with pytest.raises(CompileError): | ||
| assemble(InlineFlag("i", "garbage-nothing", Literal("test"))) | ||
| with pytest.raises(CompileError): | ||
| assemble(InlineFlag("i", "", Literal("test"))) | ||
| with pytest.raises(CompileError): | ||
| assemble(InlineFlag("a", None, InlineFlag("L", None, Literal("test")))) | ||
| with pytest.raises(CompileError): | ||
| assemble(InlineFlag("L", None, InlineFlag("u", None, Literal("test")))) | ||
| with pytest.raises(CompileError): | ||
| assemble(InlineFlag("u", None, InlineFlag("a", None, Literal("test")))) | ||
| InlineFlag.PATTERN_IS_BYTES_LIKE = True | ||
| assert ( | ||
| assemble(InlineFlag("L", None, InlineFlag("L", None, Literal("test")))) | ||
| == r"(?L:test)" | ||
| ) | ||
|
|
||
| InlineFlag.PATTERN_IS_BYTES_LIKE = False | ||
|
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. pre-commit did not like my formatting of these tests, and made these really line-heavy trees. Originally each call was in it's own line. |
||
| assert ( | ||
| assemble( | ||
| InlineFlag( | ||
| "u", | ||
| None, | ||
| InlineFlag( | ||
| "i", | ||
| None, | ||
| InlineFlag( | ||
| "m", | ||
| None, | ||
| InlineFlag( | ||
| "s", | ||
| None, | ||
| InlineFlag( | ||
| "i", | ||
| "unset", | ||
| InlineFlag( | ||
| "m", | ||
| "unset", | ||
| InlineFlag("s", "unset", Literal("test")), | ||
| ), | ||
| ), | ||
| ), | ||
| ), | ||
| ), | ||
| ), | ||
| ) | ||
| == r"(?u-ims:test)" | ||
| ) | ||
|
|
||
| assert ( | ||
| assemble( | ||
| InlineFlag( | ||
| "i", | ||
| "unset", | ||
| InlineFlag( | ||
| "a", | ||
| None, | ||
| InlineFlag( | ||
| "m", | ||
| "unset", | ||
| InlineFlag( | ||
| "m", | ||
| None, | ||
| InlineFlag( | ||
| "i", | ||
| None, | ||
| InlineFlag( | ||
| "s", | ||
| None, | ||
| InlineFlag("s", "unset", Literal("test")), | ||
| ), | ||
| ), | ||
| ), | ||
| ), | ||
| ), | ||
| ), | ||
| ) | ||
| == r"(?iam-s:test)" | ||
| ) | ||
| InlineFlag.PATTERN_IS_BYTES_LIKE = save_PATTERN_IS_BYTES_LIKE | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This might be better placed in some global values-per-compilation dictionary somewhere.