Skip to content

Commit fd00e60

Browse files
authored
Extract: Merge in per-format keywords and auto_comments (#1243)
* Merge in per-format keywords and auto_comments * Update documentation for extraction configuration Fixes #1224 Fixes #71
1 parent 12a14b6 commit fd00e60

File tree

8 files changed

+313
-18
lines changed

8 files changed

+313
-18
lines changed

babel/messages/extract.py

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -316,13 +316,31 @@ def check_and_call_extract_file(
316316
if pathmatch(opattern, filename):
317317
options = odict
318318
break
319+
320+
# Merge keywords and comment_tags from per-format options if present.
321+
file_keywords = keywords
322+
file_comment_tags = comment_tags
323+
if keywords_opt := options.get("keywords"):
324+
if not isinstance(keywords_opt, dict): # pragma: no cover
325+
raise TypeError(
326+
f"The `keywords` option must be a dict of parsed keywords, not {keywords_opt!r}",
327+
)
328+
file_keywords = {**keywords, **keywords_opt}
329+
330+
if comments_opt := options.get("add_comments"):
331+
if not isinstance(comments_opt, (list, tuple, set)): # pragma: no cover
332+
raise TypeError(
333+
f"The `add_comments` option must be a collection of comment tags, not {comments_opt!r}.",
334+
)
335+
file_comment_tags = tuple(set(comment_tags) | set(comments_opt))
336+
319337
if callback:
320338
callback(filename, method, options)
321339
for message_tuple in extract_from_file(
322340
method,
323341
filepath,
324-
keywords=keywords,
325-
comment_tags=comment_tags,
342+
keywords=file_keywords,
343+
comment_tags=file_comment_tags,
326344
options=options,
327345
strip_comment_tags=strip_comment_tags,
328346
):

babel/messages/frontend.py

Lines changed: 42 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
import warnings
2424
from configparser import RawConfigParser
2525
from io import StringIO
26-
from typing import BinaryIO, Iterable, Literal
26+
from typing import Any, BinaryIO, Iterable, Literal
2727

2828
from babel import Locale, localedata
2929
from babel import __version__ as VERSION
@@ -584,7 +584,7 @@ def _get_mappings(self):
584584
method_map, options_map = [], {}
585585
for pattern, method, options in mapping:
586586
method_map.append((pattern, method))
587-
options_map[pattern] = options or {}
587+
options_map[pattern] = _parse_string_options(options or {})
588588
mappings.append((path, method_map, options_map))
589589

590590
else:
@@ -1075,7 +1075,7 @@ def parse_mapping_cfg(fileobj, filename=None):
10751075
else:
10761076
method, pattern = (part.strip() for part in section.split(':', 1))
10771077
method_map.append((pattern, method))
1078-
options_map[pattern] = dict(parser.items(section))
1078+
options_map[pattern] = _parse_string_options(dict(parser.items(section)))
10791079

10801080
if extractors:
10811081
for idx, (pattern, method) in enumerate(method_map):
@@ -1086,6 +1086,25 @@ def parse_mapping_cfg(fileobj, filename=None):
10861086
return method_map, options_map
10871087

10881088

1089+
def _parse_string_options(options: dict[str, str]) -> dict[str, Any]:
1090+
"""
1091+
Parse string-formatted options from a mapping configuration.
1092+
1093+
The `keywords` and `add_comments` options are parsed into a canonical
1094+
internal format, so they can be merged with global keywords/comment tags
1095+
during extraction.
1096+
"""
1097+
options: dict[str, Any] = options.copy()
1098+
1099+
if keywords_val := options.pop("keywords", None):
1100+
options['keywords'] = parse_keywords(listify_value(keywords_val))
1101+
1102+
if comments_val := options.pop("add_comments", None):
1103+
options['add_comments'] = listify_value(comments_val)
1104+
1105+
return options
1106+
1107+
10891108
def _parse_config_object(config: dict, *, filename="(unknown)"):
10901109
extractors = {}
10911110
method_map = []
@@ -1140,6 +1159,26 @@ def _parse_config_object(config: dict, *, filename="(unknown)"):
11401159
if not isinstance(pattern, list):
11411160
pattern = [pattern]
11421161

1162+
if keywords_val := entry.pop("keywords", None):
1163+
if isinstance(keywords_val, str):
1164+
entry["keywords"] = parse_keywords(listify_value(keywords_val))
1165+
elif isinstance(keywords_val, list):
1166+
entry["keywords"] = parse_keywords(keywords_val)
1167+
else:
1168+
raise ConfigurationError(
1169+
f"{filename}: mappings[{idx}]: 'keywords' must be a string or list, got {keywords_val!r}",
1170+
)
1171+
1172+
if comments_val := entry.pop("add_comments", None):
1173+
if isinstance(comments_val, str):
1174+
entry["add_comments"] = [comments_val]
1175+
elif isinstance(comments_val, list):
1176+
entry["add_comments"] = comments_val
1177+
else:
1178+
raise ConfigurationError(
1179+
f"{filename}: mappings[{idx}]: 'add_comments' must be a string or list, got {comments_val!r}",
1180+
)
1181+
11431182
for pat in pattern:
11441183
if not isinstance(pat, str):
11451184
raise ConfigurationError(

docs/messages.rst

Lines changed: 123 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -139,14 +139,6 @@ Genshi markup templates and text templates:
139139
[javascript: **.js]
140140
extract_messages = $._, jQuery._
141141
142-
The configuration file syntax is based on the format commonly found in ``.INI``
143-
files on Windows systems, and as supported by the ``ConfigParser`` module in
144-
the Python standard library. Section names (the strings enclosed in square
145-
brackets) specify both the name of the extraction method, and the extended glob
146-
pattern to specify the files that this extraction method should be used for,
147-
separated by a colon. The options in the sections are passed to the extraction
148-
method. Which options are available is specific to the extraction method used.
149-
150142
The extended glob patterns used in this configuration are similar to the glob
151143
patterns provided by most shells. A single asterisk (``*``) is a wildcard for
152144
any number of characters (except for the pathname component separator "/"),
@@ -155,9 +147,132 @@ two subsequent asterisk characters (``**``) can be used to make the wildcard
155147
match any directory level, so the pattern ``**.txt`` matches any file with the
156148
extension ``.txt`` in any directory.
157149

150+
Babel supports two configuration file formats: INI and TOML.
151+
152+
INI Configuration Format
153+
^^^^^^^^^^^^^^^^^^^^^^^^
154+
155+
The INI configuration file syntax is based on the format commonly found in ``.INI``
156+
files on Windows systems, and as supported by the ``ConfigParser`` module in
157+
the Python standard library. Section names (the strings enclosed in square
158+
brackets) specify both the name of the extraction method, and the extended glob
159+
pattern to specify the files that this extraction method should be used for,
160+
separated by a colon. The options in the sections are passed to the extraction
161+
method. Which options are available is specific to the extraction method used.
162+
158163
Lines that start with a ``#`` or ``;`` character are ignored and can be used
159164
for comments. Empty lines are ignored, too.
160165

166+
TOML Configuration Format
167+
^^^^^^^^^^^^^^^^^^^^^^^^^^
168+
169+
Babel also supports TOML format for configuration files, when the ``tomllib``
170+
module is available (Python 3.11+), or when the ``tomli`` package is installed
171+
(for Python versions prior to 3.11).
172+
173+
TOML provides a more structured format and is particularly useful when combined
174+
with ``pyproject.toml``.
175+
176+
The same configuration examples shown above can be written in TOML format:
177+
178+
.. code-block:: toml
179+
180+
# Extraction from Python source files
181+
[[mappings]]
182+
method = "python"
183+
pattern = "**.py"
184+
185+
# Extraction from Genshi HTML and text templates
186+
[[mappings]]
187+
method = "genshi"
188+
pattern = "**/templates/**.html"
189+
ignore_tags = "script,style"
190+
include_attrs = "alt title summary"
191+
192+
[[mappings]]
193+
method = "genshi"
194+
pattern = "**/templates/**.txt"
195+
template_class = "genshi.template:TextTemplate"
196+
encoding = "ISO-8819-15"
197+
198+
# Extraction from JavaScript files
199+
[[mappings]]
200+
method = "javascript"
201+
pattern = "**.js"
202+
extract_messages = "$._, jQuery._"
203+
204+
In TOML format, each ``[[mappings]]`` section defines a mapping. The ``method``
205+
and ``pattern`` fields are required. The ``pattern`` field can be a string or
206+
an array of strings to match multiple patterns with the same configuration.
207+
208+
If you're using ``pyproject.toml``, nest the configuration under ``[tool.babel]``:
209+
210+
.. code-block:: toml
211+
212+
[tool.babel]
213+
[[tool.babel.mappings]]
214+
method = "python"
215+
pattern = "**.py"
216+
217+
You can reference custom extractors in both formats. In TOML:
218+
219+
.. code-block:: toml
220+
221+
[extractors]
222+
custom = "mypackage.module:extract_custom"
223+
224+
[[mappings]]
225+
method = "custom"
226+
pattern = "**.ctm"
227+
some_option = "foo"
228+
229+
Common Options
230+
^^^^^^^^^^^^^^
231+
232+
In addition to extractor-specific options, the following options can be specified
233+
in any mapping section and will be merged with global settings:
234+
235+
``keywords``
236+
A list of keywords (function names) to extract messages from.
237+
This uses the same syntax as the ``--keyword`` command-line option.
238+
Keywords specified here are added to (not replacing) the default keywords or
239+
those specified via command-line.
240+
241+
In INI format, whitespace-separated: ``keywords = _ gettext ngettext:1,2 pgettext:1c,2``
242+
243+
In TOML format, use either a whitespace-separated string or an array:
244+
``keywords = "_ gettext ngettext:1,2"`` or
245+
``keywords = ["_", "gettext", "ngettext:1,2"]``
246+
247+
``add_comments``
248+
A list of comment tag prefixes to extract and include in the
249+
output. This uses the same syntax as the ``--add-comments`` command-line option.
250+
Comment tags specified here are added to those specified via command-line.
251+
252+
In INI format, whitespace-separated: ``add_comments = TRANSLATOR: NOTE:``
253+
254+
In TOML format, use either a string or an array:
255+
``add_comments = "TRANSLATOR NOTE:"`` (parsed as a single string!) or
256+
``add_comments = ["TRANSLATOR:", "NOTE:"]``
257+
258+
**Example in INI format:**
259+
260+
.. code-block:: ini
261+
262+
[python: **.py]
263+
keywords = _ _l _n:1,2
264+
add_comments = TRANSLATOR:
265+
266+
**Example in TOML format:**
267+
268+
.. code-block:: toml
269+
270+
[[mappings]]
271+
method = "python"
272+
pattern = "**.py"
273+
keywords = ["_", "_l", "_n:1,2"]
274+
add_comments = ["TRANSLATOR:"]
275+
161276
.. note:: if you're performing message extraction using the command Babel
162277
provides for integration into ``setup.py`` scripts, you can also
163278
provide this configuration in a different way, namely as a keyword
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# Test mapping file with keywords option (issue #1224)
2+
3+
[python: **.py]
4+
encoding = utf-8
5+
keywords = _ _l _n:1,2 _nl:1,2 _p:1c,2 _pl:1c,2 _np:1c,2,3 _npl:1c,2,3
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
# Test mapping file with keywords and add_comments options (issue #1224)
2+
3+
[[mappings]]
4+
method = "python"
5+
pattern = "**.py"
6+
encoding = "utf-8"
7+
keywords = ["_", "_l", "_n:1,2"]
8+
add_comments = ["SPECIAL:"]
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
from myproject.i18n import lazy_gettext as _l, lazy_ngettext as _n
2+
3+
4+
class Choices:
5+
# SPECIAL: This comment should be extracted
6+
CHOICE_X = 1, _l("Choice X")
7+
# SPECIAL: Another special comment
8+
CHOICE_Y = 2, _l("Choice Y")
9+
# No comment...
10+
OPTION_C = 3, _l("Option C")
11+
# Test for _n too! (but no comment... shush...)
12+
OPTION_A = 4, (_n("Option A", "Options of the A kind", 1))

tests/messages/frontend/test_extract.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -281,3 +281,54 @@ def test_extraction_add_location_file(extract_cmd, pot_file):
281281
282282
"""
283283
assert expected_content == pot_file.read_text()
284+
285+
286+
def test_extraction_with_mapping_file_with_keywords(extract_cmd, pot_file):
287+
"""
288+
Test that keywords specified in mapping config file are properly parsed,
289+
and merged with default keywords.
290+
"""
291+
extract_cmd.mapping_file = 'mapping_with_keywords.cfg'
292+
extract_cmd.output_file = pot_file
293+
extract_cmd.input_paths = 'project'
294+
295+
extract_cmd.finalize_options()
296+
extract_cmd.run()
297+
298+
with pot_file.open() as f:
299+
catalog = read_po(f)
300+
301+
for msgid in ('bar', 'Choice X', 'Choice Y', 'Option C', 'Option A'):
302+
msg = catalog[msgid]
303+
assert not msg.auto_comments # This configuration didn't specify SPECIAL:...
304+
assert msg.pluralizable == (msgid == 'Option A')
305+
306+
307+
def test_extraction_with_mapping_file_with_comments(extract_cmd, pot_file):
308+
"""
309+
Test that add_comments specified in mapping config file are properly parsed.
310+
Uses TOML format to test that code path.
311+
"""
312+
extract_cmd.mapping_file = 'mapping_with_keywords_and_comments.toml'
313+
extract_cmd.output_file = pot_file
314+
extract_cmd.input_paths = 'project/issue_1224_test.py'
315+
316+
extract_cmd.finalize_options()
317+
extract_cmd.run()
318+
319+
with pot_file.open() as f:
320+
catalog = read_po(f)
321+
322+
# Check that messages were extracted and have the expected auto_comments
323+
for msgid, expected_comment in [
324+
('Choice X', 'extracted'),
325+
('Choice Y', 'special'),
326+
('Option C', None),
327+
('Option A', None),
328+
]:
329+
msg = catalog[msgid]
330+
if expected_comment:
331+
assert any('SPECIAL' in comment and expected_comment in comment for comment in msg.auto_comments)
332+
else:
333+
assert not msg.auto_comments
334+
assert msg.pluralizable == (msgid == 'Option A')

0 commit comments

Comments
 (0)