https://github.com/python/cpython/commit/44213bc57c0a4b674463e170ad9d80896d866a64 commit: 44213bc57c0a4b674463e170ad9d80896d866a64 branch: main author: Tomas R. <tomas.ro...@gmail.com> committer: serhiy-storchaka <storch...@gmail.com> date: 2025-02-25T12:10:54+02:00 summary:
gh-130453: pygettext: Extend support for specifying custom keywords (GH-130463) files: A Lib/test/test_tools/i18n_data/custom_keywords.pot A Lib/test/test_tools/i18n_data/custom_keywords.py A Misc/NEWS.d/next/Tools-Demos/2025-02-22-18-08-35.gh-issue-130453.njRXG8.rst M Lib/test/test_tools/test_i18n.py M Tools/i18n/pygettext.py diff --git a/Lib/test/test_tools/i18n_data/custom_keywords.pot b/Lib/test/test_tools/i18n_data/custom_keywords.pot new file mode 100644 index 00000000000000..48df2e7f579cc7 --- /dev/null +++ b/Lib/test/test_tools/i18n_data/custom_keywords.pot @@ -0,0 +1,45 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) YEAR ORGANIZATION +# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR. +# +msgid "" +msgstr "" +"Project-Id-Version: PACKAGE VERSION\n" +"POT-Creation-Date: 2000-01-01 00:00+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n" +"Language-Team: LANGUAGE <l...@li.org>\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: pygettext.py 1.5\n" + + +#: custom_keywords.py:9 custom_keywords.py:10 +msgid "bar" +msgstr "" + +#: custom_keywords.py:12 +msgid "cat" +msgid_plural "cats" +msgstr[0] "" +msgstr[1] "" + +#: custom_keywords.py:13 +msgid "dog" +msgid_plural "dogs" +msgstr[0] "" +msgstr[1] "" + +#: custom_keywords.py:15 +msgctxt "context" +msgid "bar" +msgstr "" + +#: custom_keywords.py:17 +msgctxt "context" +msgid "cat" +msgid_plural "cats" +msgstr[0] "" +msgstr[1] "" + diff --git a/Lib/test/test_tools/i18n_data/custom_keywords.py b/Lib/test/test_tools/i18n_data/custom_keywords.py new file mode 100644 index 00000000000000..01ea56c348cb55 --- /dev/null +++ b/Lib/test/test_tools/i18n_data/custom_keywords.py @@ -0,0 +1,30 @@ +from gettext import ( + gettext as foo, + ngettext as nfoo, + pgettext as pfoo, + npgettext as npfoo, + gettext as bar, +) + +foo('bar') +foo('bar', 'baz') + +nfoo('cat', 'cats', 1) +nfoo('dog', 'dogs') + +pfoo('context', 'bar') + +npfoo('context', 'cat', 'cats', 1) + +# This is an unknown keyword and should be ignored +bar('baz') + +# 'nfoo' requires at least 2 arguments +nfoo('dog') + +# 'pfoo' requires at least 2 arguments +pfoo('context') + +# 'npfoo' requires at least 3 arguments +npfoo('context') +npfoo('context', 'cat') diff --git a/Lib/test/test_tools/test_i18n.py b/Lib/test/test_tools/test_i18n.py index d97fdb116fcd19..d73fcff4c9cb11 100644 --- a/Lib/test/test_tools/test_i18n.py +++ b/Lib/test/test_tools/test_i18n.py @@ -8,7 +8,7 @@ from pathlib import Path from test.support.script_helper import assert_python_ok -from test.test_tools import skip_if_missing, toolsdir +from test.test_tools import imports_under_tool, skip_if_missing, toolsdir from test.support.os_helper import temp_cwd, temp_dir @@ -17,6 +17,10 @@ DATA_DIR = Path(__file__).resolve().parent / 'i18n_data' +with imports_under_tool("i18n"): + from pygettext import parse_spec + + def normalize_POT_file(pot): """Normalize the POT creation timestamp, charset and file locations to make the POT file easier to compare. @@ -377,16 +381,8 @@ class _(object): def test_pygettext_output(self): """Test that the pygettext output exactly matches snapshots.""" - for input_file in DATA_DIR.glob('*.py'): - output_file = input_file.with_suffix('.pot') - with self.subTest(input_file=f'i18n_data/{input_file}'): - contents = input_file.read_text(encoding='utf-8') - with temp_cwd(None): - Path(input_file.name).write_text(contents) - assert_python_ok('-Xutf8', self.script, '--docstrings', - '--add-comments=i18n:', input_file.name) - output = Path('messages.pot').read_text(encoding='utf-8') - + for input_file, output_file, output in extract_from_snapshots(): + with self.subTest(input_file=input_file): expected = output_file.read_text(encoding='utf-8') self.assert_POT_equal(expected, output) @@ -485,17 +481,67 @@ def test_comments_not_extracted_without_tags(self): '''), raw=True) self.assertNotIn('#.', data) - -def update_POT_snapshots(): - for input_file in DATA_DIR.glob('*.py'): + def test_parse_keyword_spec(self): + valid = ( + ('foo', ('foo', {0: 'msgid'})), + ('foo:1', ('foo', {0: 'msgid'})), + ('foo:1,2', ('foo', {0: 'msgid', 1: 'msgid_plural'})), + ('foo:1, 2', ('foo', {0: 'msgid', 1: 'msgid_plural'})), + ('foo:1,2c', ('foo', {0: 'msgid', 1: 'msgctxt'})), + ('foo:2c,1', ('foo', {0: 'msgid', 1: 'msgctxt'})), + ('foo:2c ,1', ('foo', {0: 'msgid', 1: 'msgctxt'})), + ('foo:1,2,3c', ('foo', {0: 'msgid', 1: 'msgid_plural', 2: 'msgctxt'})), + ('foo:1, 2, 3c', ('foo', {0: 'msgid', 1: 'msgid_plural', 2: 'msgctxt'})), + ('foo:3c,1,2', ('foo', {0: 'msgid', 1: 'msgid_plural', 2: 'msgctxt'})), + ) + for spec, expected in valid: + with self.subTest(spec=spec): + self.assertEqual(parse_spec(spec), expected) + + invalid = ( + ('foo:', "Invalid keyword spec 'foo:': missing argument positions"), + ('foo:bar', "Invalid keyword spec 'foo:bar': position is not an integer"), + ('foo:0', "Invalid keyword spec 'foo:0': argument positions must be strictly positive"), + ('foo:-2', "Invalid keyword spec 'foo:-2': argument positions must be strictly positive"), + ('foo:1,1', "Invalid keyword spec 'foo:1,1': duplicate positions"), + ('foo:1,2,1', "Invalid keyword spec 'foo:1,2,1': duplicate positions"), + ('foo:1c,2,1c', "Invalid keyword spec 'foo:1c,2,1c': duplicate positions"), + ('foo:1c,2,3c', "Invalid keyword spec 'foo:1c,2,3c': msgctxt can only appear once"), + ('foo:1,2,3', "Invalid keyword spec 'foo:1,2,3': too many positions"), + ('foo:1c', "Invalid keyword spec 'foo:1c': msgctxt cannot appear without msgid"), + ) + for spec, message in invalid: + with self.subTest(spec=spec): + with self.assertRaises(ValueError) as cm: + parse_spec(spec) + self.assertEqual(str(cm.exception), message) + + +def extract_from_snapshots(): + snapshots = { + 'messages.py': (), + 'fileloc.py': ('--docstrings',), + 'docstrings.py': ('--docstrings',), + 'comments.py': ('--add-comments=i18n:',), + 'custom_keywords.py': ('--keyword=foo', '--keyword=nfoo:1,2', + '--keyword=pfoo:1c,2', + '--keyword=npfoo:1c,2,3'), + } + + for filename, args in snapshots.items(): + input_file = DATA_DIR / filename output_file = input_file.with_suffix('.pot') contents = input_file.read_bytes() with temp_cwd(None): Path(input_file.name).write_bytes(contents) - assert_python_ok('-Xutf8', Test_pygettext.script, '--docstrings', - '--add-comments=i18n:', input_file.name) - output = Path('messages.pot').read_text(encoding='utf-8') + assert_python_ok('-Xutf8', Test_pygettext.script, *args, + input_file.name) + yield (input_file, output_file, + Path('messages.pot').read_text(encoding='utf-8')) + +def update_POT_snapshots(): + for _, output_file, output in extract_from_snapshots(): output = normalize_POT_file(output) output_file.write_text(output, encoding='utf-8') diff --git a/Misc/NEWS.d/next/Tools-Demos/2025-02-22-18-08-35.gh-issue-130453.njRXG8.rst b/Misc/NEWS.d/next/Tools-Demos/2025-02-22-18-08-35.gh-issue-130453.njRXG8.rst new file mode 100644 index 00000000000000..cb7b3d4cbdc8e1 --- /dev/null +++ b/Misc/NEWS.d/next/Tools-Demos/2025-02-22-18-08-35.gh-issue-130453.njRXG8.rst @@ -0,0 +1 @@ +Extend support for specifying custom keywords in :program:`pygettext`. diff --git a/Tools/i18n/pygettext.py b/Tools/i18n/pygettext.py index 4681c84387958e..0f5f32c7d6c18f 100755 --- a/Tools/i18n/pygettext.py +++ b/Tools/i18n/pygettext.py @@ -294,6 +294,88 @@ def getFilesForName(name): } +def parse_spec(spec): + """Parse a keyword spec string into a dictionary. + + The keyword spec format defines the name of the gettext function and the + positions of the arguments that correspond to msgid, msgid_plural, and + msgctxt. The format is as follows: + + name - the name of the gettext function, assumed to + have a single argument that is the msgid. + name:pos1 - the name of the gettext function and the position + of the msgid argument. + name:pos1,pos2 - the name of the gettext function and the positions + of the msgid and msgid_plural arguments. + name:pos1,pos2c - the name of the gettext function and the positions + of the msgid and msgctxt arguments. + name:pos1,pos2,pos3c - the name of the gettext function and the + positions of the msgid, msgid_plural, and + msgctxt arguments. + + As an example, the spec 'foo:1,2,3c' means that the function foo has three + arguments, the first one is the msgid, the second one is the msgid_plural, + and the third one is the msgctxt. The positions are 1-based. + + The msgctxt argument can appear in any position, but it can only appear + once. For example, the keyword specs 'foo:3c,1,2' and 'foo:1,2,3c' are + equivalent. + + See https://www.gnu.org/software/gettext/manual/gettext.html + for more information. + """ + parts = spec.strip().split(':', 1) + if len(parts) == 1: + name = parts[0] + return name, {0: 'msgid'} + + name, args = parts + if not args: + raise ValueError(f'Invalid keyword spec {spec!r}: ' + 'missing argument positions') + + result = {} + for arg in args.split(','): + arg = arg.strip() + is_context = False + if arg.endswith('c'): + is_context = True + arg = arg[:-1] + + try: + pos = int(arg) - 1 + except ValueError as e: + raise ValueError(f'Invalid keyword spec {spec!r}: ' + 'position is not an integer') from e + + if pos < 0: + raise ValueError(f'Invalid keyword spec {spec!r}: ' + 'argument positions must be strictly positive') + + if pos in result.values(): + raise ValueError(f'Invalid keyword spec {spec!r}: ' + 'duplicate positions') + + if is_context: + if 'msgctxt' in result: + raise ValueError(f'Invalid keyword spec {spec!r}: ' + 'msgctxt can only appear once') + result['msgctxt'] = pos + elif 'msgid' not in result: + result['msgid'] = pos + elif 'msgid_plural' not in result: + result['msgid_plural'] = pos + else: + raise ValueError(f'Invalid keyword spec {spec!r}: ' + 'too many positions') + + if 'msgid' not in result and 'msgctxt' in result: + raise ValueError(f'Invalid keyword spec {spec!r}: ' + 'msgctxt cannot appear without msgid') + + return name, {v: k for k, v in result.items()} + + @dataclass(frozen=True) class Location: filename: str @@ -646,7 +728,11 @@ class Options: make_escapes(not options.escape) # calculate all keywords - options.keywords = {kw: {0: 'msgid'} for kw in options.keywords} + try: + options.keywords = dict(parse_spec(spec) for spec in options.keywords) + except ValueError as e: + print(e, file=sys.stderr) + sys.exit(1) if not no_default_keywords: options.keywords |= DEFAULTKEYWORDS _______________________________________________ Python-checkins mailing list -- python-checkins@python.org To unsubscribe send an email to python-checkins-le...@python.org https://mail.python.org/mailman3/lists/python-checkins.python.org/ Member address: arch...@mail-archive.com