[Python-checkins] gh-130453: pygettext: Allow specifying multiple keywords with the same function name (GH-131380)

serhiy-storchaka Thu, 10 Apr 2025 04:07:10 -0700

https://github.com/python/cpython/commit/b6760b7fa5727d3c1f56dcc84e25c6b0efdc80a4
commit: b6760b7fa5727d3c1f56dcc84e25c6b0efdc80a4
branch: main
author: Tomas R. <tomas.ro...@gmail.com>
committer: serhiy-storchaka <storch...@gmail.com>
date: 2025-04-10T11:06:40Z
summary:


gh-130453: pygettext: Allow specifying multiple keywords with the same function 
name (GH-131380)

files:
A Lib/test/test_tools/i18n_data/multiple_keywords.pot
A Lib/test/test_tools/i18n_data/multiple_keywords.py
A Misc/NEWS.d/next/Tools-Demos/2025-03-10-08-19-22.gh-issue-130453.9B0x8k.rst
M Lib/test/test_tools/test_i18n.py
M Tools/i18n/pygettext.py

diff --git a/Lib/test/test_tools/i18n_data/multiple_keywords.pot 
b/Lib/test/test_tools/i18n_data/multiple_keywords.pot
new file mode 100644
index 00000000000000..954cb8e994838a
--- /dev/null
+++ b/Lib/test/test_tools/i18n_data/multiple_keywords.pot
@@ -0,0 +1,38 @@
+# SOME DESCRIPTIVE TITLE.
+# Copyright (C) YEAR ORGANIZATION
+# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: PACKAGE VERSION\n"
+"POT-Creation-Date: 2000-01-01 00:00+0000\n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language-Team: LANGUAGE <l...@li.org>\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Generated-By: pygettext.py 1.5\n"
+
+
+#: multiple_keywords.py:3
+msgid "bar"
+msgstr ""
+
+#: multiple_keywords.py:5
+msgctxt "baz"
+msgid "qux"
+msgstr ""
+
+#: multiple_keywords.py:9
+msgctxt "corge"
+msgid "grault"
+msgstr ""
+
+#: multiple_keywords.py:11
+msgctxt "xyzzy"
+msgid "foo"
+msgid_plural "foos"
+msgstr[0] ""
+msgstr[1] ""
+
diff --git a/Lib/test/test_tools/i18n_data/multiple_keywords.py 
b/Lib/test/test_tools/i18n_data/multiple_keywords.py
new file mode 100644
index 00000000000000..7bde349505b839
--- /dev/null
+++ b/Lib/test/test_tools/i18n_data/multiple_keywords.py
@@ -0,0 +1,11 @@
+from gettext import gettext as foo
+
+foo('bar')
+
+foo('baz', 'qux')
+
+# The 't' specifier is not supported, so the following
+# call is extracted as pgettext instead of ngettext.
+foo('corge', 'grault', 1)
+
+foo('xyzzy', 'foo', 'foos', 1)
diff --git a/Lib/test/test_tools/test_i18n.py b/Lib/test/test_tools/test_i18n.py
index 66c33077423229..8416b1bad825eb 100644
--- a/Lib/test/test_tools/test_i18n.py
+++ b/Lib/test/test_tools/test_i18n.py
@@ -18,7 +18,8 @@
 
 
 with imports_under_tool("i18n"):
-    from pygettext import parse_spec
+    from pygettext import (parse_spec, process_keywords, DEFAULTKEYWORDS,
+                           unparse_spec)
 
 
 def normalize_POT_file(pot):
@@ -483,20 +484,22 @@ def test_comments_not_extracted_without_tags(self):
 
     def test_parse_keyword_spec(self):
         valid = (
-            ('foo', ('foo', {0: 'msgid'})),
-            ('foo:1', ('foo', {0: 'msgid'})),
-            ('foo:1,2', ('foo', {0: 'msgid', 1: 'msgid_plural'})),
-            ('foo:1, 2', ('foo', {0: 'msgid', 1: 'msgid_plural'})),
-            ('foo:1,2c', ('foo', {0: 'msgid', 1: 'msgctxt'})),
-            ('foo:2c,1', ('foo', {0: 'msgid', 1: 'msgctxt'})),
-            ('foo:2c ,1', ('foo', {0: 'msgid', 1: 'msgctxt'})),
-            ('foo:1,2,3c', ('foo', {0: 'msgid', 1: 'msgid_plural', 2: 
'msgctxt'})),
-            ('foo:1, 2, 3c', ('foo', {0: 'msgid', 1: 'msgid_plural', 2: 
'msgctxt'})),
-            ('foo:3c,1,2', ('foo', {0: 'msgid', 1: 'msgid_plural', 2: 
'msgctxt'})),
+            ('foo', ('foo', {'msgid': 0})),
+            ('foo:1', ('foo', {'msgid': 0})),
+            ('foo:1,2', ('foo', {'msgid': 0, 'msgid_plural': 1})),
+            ('foo:1, 2', ('foo', {'msgid': 0, 'msgid_plural': 1})),
+            ('foo:1,2c', ('foo', {'msgid': 0, 'msgctxt': 1})),
+            ('foo:2c,1', ('foo', {'msgid': 0, 'msgctxt': 1})),
+            ('foo:2c ,1', ('foo', {'msgid': 0, 'msgctxt': 1})),
+            ('foo:1,2,3c', ('foo', {'msgid': 0, 'msgid_plural': 1, 'msgctxt': 
2})),
+            ('foo:1, 2, 3c', ('foo', {'msgid': 0, 'msgid_plural': 1, 
'msgctxt': 2})),
+            ('foo:3c,1,2', ('foo', {'msgid': 0, 'msgid_plural': 1, 'msgctxt': 
2})),
         )
         for spec, expected in valid:
             with self.subTest(spec=spec):
                 self.assertEqual(parse_spec(spec), expected)
+                # test unparse-parse round-trip
+                self.assertEqual(parse_spec(unparse_spec(*expected)), expected)
 
         invalid = (
             ('foo:', "Invalid keyword spec 'foo:': missing argument 
positions"),
@@ -516,6 +519,70 @@ def test_parse_keyword_spec(self):
                     parse_spec(spec)
                 self.assertEqual(str(cm.exception), message)
 
+    def test_process_keywords(self):
+        default_keywords = {name: [spec] for name, spec
+                            in DEFAULTKEYWORDS.items()}
+        inputs = (
+            (['foo'], True),
+            (['_:1,2'], True),
+            (['foo', 'foo:1,2'], True),
+            (['foo'], False),
+            (['_:1,2', '_:1c,2,3', 'pgettext'], False),
+            # Duplicate entries
+            (['foo', 'foo'], True),
+            (['_'], False)
+        )
+        expected = (
+            {'foo': [{'msgid': 0}]},
+            {'_': [{'msgid': 0, 'msgid_plural': 1}]},
+            {'foo': [{'msgid': 0}, {'msgid': 0, 'msgid_plural': 1}]},
+            default_keywords | {'foo': [{'msgid': 0}]},
+            default_keywords | {'_': [{'msgid': 0, 'msgid_plural': 1},
+                                      {'msgctxt': 0, 'msgid': 1, 
'msgid_plural': 2},
+                                      {'msgid': 0}],
+                                'pgettext': [{'msgid': 0},
+                                             {'msgctxt': 0, 'msgid': 1}]},
+            {'foo': [{'msgid': 0}]},
+            default_keywords,
+        )
+        for (keywords, no_default_keywords), expected in zip(inputs, expected):
+            with self.subTest(keywords=keywords,
+                              no_default_keywords=no_default_keywords):
+                processed = process_keywords(
+                    keywords,
+                    no_default_keywords=no_default_keywords)
+                self.assertEqual(processed, expected)
+
+    def test_multiple_keywords_same_funcname_errors(self):
+        # If at least one keyword spec for a given funcname matches,
+        # no error should be printed.
+        msgids, stderr = self.extract_from_str(dedent('''\
+        _("foo", 42)
+        _(42, "bar")
+        '''), args=('--keyword=_:1', '--keyword=_:2'), with_stderr=True)
+        self.assertIn('foo', msgids)
+        self.assertIn('bar', msgids)
+        self.assertEqual(stderr, b'')
+
+        # If no keyword spec for a given funcname matches,
+        # all errors are printed.
+        msgids, stderr = self.extract_from_str(dedent('''\
+        _(x, 42)
+        _(42, y)
+        '''), args=('--keyword=_:1', '--keyword=_:2'), with_stderr=True,
+              strict=False)
+        self.assertEqual(msgids, [''])
+        # Normalize line endings on Windows
+        stderr = stderr.decode('utf-8').replace('\r', '')
+        self.assertEqual(
+            stderr,
+            '*** test.py:1: No keywords matched gettext call "_":\n'
+            '\tkeyword="_": Expected a string constant for argument 1, got x\n'
+            '\tkeyword="_:2": Expected a string constant for argument 2, got 
42\n'
+            '*** test.py:2: No keywords matched gettext call "_":\n'
+            '\tkeyword="_": Expected a string constant for argument 1, got 
42\n'
+            '\tkeyword="_:2": Expected a string constant for argument 2, got 
y\n')
+
 
 def extract_from_snapshots():
     snapshots = {
@@ -526,6 +593,10 @@ def extract_from_snapshots():
         'custom_keywords.py': ('--keyword=foo', '--keyword=nfoo:1,2',
                                '--keyword=pfoo:1c,2',
                                '--keyword=npfoo:1c,2,3', '--keyword=_:1,2'),
+        'multiple_keywords.py': ('--keyword=foo:1c,2,3', '--keyword=foo:1c,2',
+                                 '--keyword=foo:1,2',
+                                 # repeat a keyword to make sure it is 
extracted only once
+                                 '--keyword=foo', '--keyword=foo'),
         # == Test character escaping
         # Escape ascii and unicode:
         'escapes.py': ('--escape', '--add-comments='),
diff --git 
a/Misc/NEWS.d/next/Tools-Demos/2025-03-10-08-19-22.gh-issue-130453.9B0x8k.rst 
b/Misc/NEWS.d/next/Tools-Demos/2025-03-10-08-19-22.gh-issue-130453.9B0x8k.rst
new file mode 100644
index 00000000000000..fdab48a2f7b25c
--- /dev/null
+++ 
b/Misc/NEWS.d/next/Tools-Demos/2025-03-10-08-19-22.gh-issue-130453.9B0x8k.rst
@@ -0,0 +1,2 @@
+Allow passing multiple keyword arguments with the same function name in
+:program:`pygettext`.
diff --git a/Tools/i18n/pygettext.py b/Tools/i18n/pygettext.py
index a4af1d2be82914..351b47a160e999 100755
--- a/Tools/i18n/pygettext.py
+++ b/Tools/i18n/pygettext.py
@@ -282,15 +282,15 @@ def getFilesForName(name):
 # Key is the function name, value is a dictionary mapping argument positions 
to the
 # type of the argument. The type is one of 'msgid', 'msgid_plural', or 
'msgctxt'.
 DEFAULTKEYWORDS = {
-    '_': {0: 'msgid'},
-    'gettext': {0: 'msgid'},
-    'ngettext': {0: 'msgid', 1: 'msgid_plural'},
-    'pgettext': {0: 'msgctxt', 1: 'msgid'},
-    'npgettext': {0: 'msgctxt', 1: 'msgid', 2: 'msgid_plural'},
-    'dgettext': {1: 'msgid'},
-    'dngettext': {1: 'msgid', 2: 'msgid_plural'},
-    'dpgettext': {1: 'msgctxt', 2: 'msgid'},
-    'dnpgettext': {1: 'msgctxt', 2: 'msgid', 3: 'msgid_plural'},
+    '_': {'msgid': 0},
+    'gettext': {'msgid': 0},
+    'ngettext': {'msgid': 0, 'msgid_plural': 1},
+    'pgettext': {'msgctxt': 0, 'msgid': 1},
+    'npgettext': {'msgctxt': 0, 'msgid': 1, 'msgid_plural': 2},
+    'dgettext': {'msgid': 1},
+    'dngettext': {'msgid': 1, 'msgid_plural': 2},
+    'dpgettext': {'msgctxt': 1, 'msgid': 2},
+    'dnpgettext': {'msgctxt': 1, 'msgid': 2, 'msgid_plural': 3},
 }
 
 
@@ -327,7 +327,7 @@ def parse_spec(spec):
     parts = spec.strip().split(':', 1)
     if len(parts) == 1:
         name = parts[0]
-        return name, {0: 'msgid'}
+        return name, {'msgid': 0}
 
     name, args = parts
     if not args:
@@ -373,7 +373,41 @@ def parse_spec(spec):
         raise ValueError(f'Invalid keyword spec {spec!r}: '
                          'msgctxt cannot appear without msgid')
 
-    return name, {v: k for k, v in result.items()}
+    return name, result
+
+
+def unparse_spec(name, spec):
+    """Unparse a keyword spec dictionary into a string."""
+    if spec == {'msgid': 0}:
+        return name
+
+    parts = []
+    for arg, pos in sorted(spec.items(), key=lambda x: x[1]):
+        if arg == 'msgctxt':
+            parts.append(f'{pos + 1}c')
+        else:
+            parts.append(str(pos + 1))
+    return f'{name}:{','.join(parts)}'
+
+
+def process_keywords(keywords, *, no_default_keywords):
+    custom_keywords = {}
+    for spec in dict.fromkeys(keywords):
+        name, spec = parse_spec(spec)
+        if name not in custom_keywords:
+            custom_keywords[name] = []
+        custom_keywords[name].append(spec)
+
+    if no_default_keywords:
+        return custom_keywords
+
+    # custom keywords override default keywords
+    for name, spec in DEFAULTKEYWORDS.items():
+        if name not in custom_keywords:
+            custom_keywords[name] = []
+        if spec not in custom_keywords[name]:
+            custom_keywords[name].append(spec)
+    return custom_keywords
 
 
 @dataclass(frozen=True)
@@ -459,32 +493,53 @@ def _extract_docstring(self, node):
 
     def _extract_message(self, node):
         func_name = self._get_func_name(node)
-        spec = self.options.keywords.get(func_name)
-        if spec is None:
+        errors = []
+        specs = self.options.keywords.get(func_name, [])
+        for spec in specs:
+            err = self._extract_message_with_spec(node, spec)
+            if err is None:
+                return
+            errors.append(err)
+
+        if not errors:
             return
+        if len(errors) == 1:
+            print(f'*** {self.filename}:{node.lineno}: {errors[0]}',
+                  file=sys.stderr)
+        else:
+            # There are multiple keyword specs for the function name and
+            # none of them could be extracted. Print a general error
+            # message and list the errors for each keyword spec.
+            print(f'*** {self.filename}:{node.lineno}: '
+                  f'No keywords matched gettext call "{func_name}":',
+                  file=sys.stderr)
+            for spec, err in zip(specs, errors, strict=True):
+                unparsed = unparse_spec(func_name, spec)
+                print(f'\tkeyword="{unparsed}": {err}', file=sys.stderr)
+
+    def _extract_message_with_spec(self, node, spec):
+        """Extract a gettext call with the given spec.
 
-        max_index = max(spec)
+        Return None if the gettext call was successfully extracted,
+        otherwise return an error message.
+        """
+        max_index = max(spec.values())
         has_var_positional = any(isinstance(arg, ast.Starred) for
                                  arg in node.args[:max_index+1])
         if has_var_positional:
-            print(f'*** {self.filename}:{node.lineno}: Variable positional '
-                  f'arguments are not allowed in gettext calls', 
file=sys.stderr)
-            return
+            return ('Variable positional arguments are not '
+                    'allowed in gettext calls')
 
         if max_index >= len(node.args):
-            print(f'*** {self.filename}:{node.lineno}: Expected at least '
-                  f'{max(spec) + 1} positional argument(s) in gettext call, '
-                  f'got {len(node.args)}', file=sys.stderr)
-            return
+            return (f'Expected at least {max_index + 1} positional '
+                    f'argument(s) in gettext call, got {len(node.args)}')
 
         msg_data = {}
-        for position, arg_type in spec.items():
+        for arg_type, position in spec.items():
             arg = node.args[position]
             if not self._is_string_const(arg):
-                print(f'*** {self.filename}:{arg.lineno}: Expected a string '
-                      f'constant for argument {position + 1}, '
-                      f'got {ast.unparse(arg)}', file=sys.stderr)
-                return
+                return (f'Expected a string constant for argument '
+                        f'{position + 1}, got {ast.unparse(arg)}')
             msg_data[arg_type] = arg.value
 
         lineno = node.lineno
@@ -729,15 +784,12 @@ class Options:
 
     # calculate all keywords
     try:
-        custom_keywords = dict(parse_spec(spec) for spec in options.keywords)
+        options.keywords = process_keywords(
+            options.keywords,
+            no_default_keywords=no_default_keywords)
     except ValueError as e:
         print(e, file=sys.stderr)
         sys.exit(1)
-    options.keywords = {}
-    if not no_default_keywords:
-        options.keywords |= DEFAULTKEYWORDS
-    # custom keywords override default keywords
-    options.keywords |= custom_keywords
 
     # initialize list of strings to exclude
     if options.excludefilename:

_______________________________________________
Python-checkins mailing list -- python-checkins@python.org
To unsubscribe send an email to python-checkins-le...@python.org
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: arch...@mail-archive.com

[Python-checkins] gh-130453: pygettext: Allow specifying multiple keywords with the same function name (GH-131380)

Reply via email to