https://github.com/python/cpython/commit/aa845af9bb39b3e2ed08bbb00a8e932a97be8fc0 commit: aa845af9bb39b3e2ed08bbb00a8e932a97be8fc0 branch: main author: Tomas R. <tomas.ro...@gmail.com> committer: serhiy-storchaka <storch...@gmail.com> date: 2025-02-17T12:41:28+02:00 summary:
gh-130057: Pygettext: Support translator comments (GH-130061) files: A Lib/test/test_tools/i18n_data/comments.pot A Lib/test/test_tools/i18n_data/comments.py A Misc/NEWS.d/next/Tools-Demos/2025-02-12-23-24-37.gh-issue-130057.TKUKI6.rst M Lib/test/test_tools/test_i18n.py M Tools/i18n/pygettext.py diff --git a/Lib/test/test_tools/i18n_data/comments.pot b/Lib/test/test_tools/i18n_data/comments.pot new file mode 100644 index 00000000000000..a1df46d453c546 --- /dev/null +++ b/Lib/test/test_tools/i18n_data/comments.pot @@ -0,0 +1,110 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) YEAR ORGANIZATION +# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR. +# +msgid "" +msgstr "" +"Project-Id-Version: PACKAGE VERSION\n" +"POT-Creation-Date: 2000-01-01 00:00+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n" +"Language-Team: LANGUAGE <l...@li.org>\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: pygettext.py 1.5\n" + + +#: comments.py:4 +msgid "foo" +msgstr "" + +#. i18n: This is a translator comment +#: comments.py:7 +msgid "bar" +msgstr "" + +#. i18n: This is a translator comment +#. i18n: This is another translator comment +#: comments.py:11 +msgid "baz" +msgstr "" + +#. i18n: This is a translator comment +#. with multiple +#. lines +#: comments.py:16 +msgid "qux" +msgstr "" + +#. i18n: This is a translator comment +#: comments.py:21 +msgid "quux" +msgstr "" + +#. i18n: This is a translator comment +#. with multiple lines +#. i18n: This is another translator comment +#. with multiple lines +#: comments.py:27 +msgid "corge" +msgstr "" + +#: comments.py:31 +msgid "grault" +msgstr "" + +#. i18n: This is another translator comment +#: comments.py:36 +msgid "garply" +msgstr "" + +#: comments.py:40 +msgid "george" +msgstr "" + +#. i18n: This is another translator comment +#: comments.py:45 +msgid "waldo" +msgstr "" + +#. i18n: This is a translator comment +#. i18n: This is also a translator comment +#. i18n: This is another translator comment +#: comments.py:50 +msgid "waldo2" +msgstr "" + +#. i18n: This is a translator comment +#. i18n: This is another translator comment +#. i18n: This is yet another translator comment +#. i18n: This is a translator comment +#. with multiple lines +#: comments.py:53 comments.py:56 comments.py:59 comments.py:63 +msgid "fred" +msgstr "" + +#: comments.py:65 +msgid "plugh" +msgstr "" + +#: comments.py:67 +msgid "foobar" +msgstr "" + +#. i18n: This is a translator comment +#: comments.py:71 +msgid "xyzzy" +msgstr "" + +#: comments.py:72 +msgid "thud" +msgstr "" + +#. i18n: This is a translator comment +#. i18n: This is another translator comment +#. i18n: This is yet another translator comment +#: comments.py:78 +msgid "foos" +msgstr "" + diff --git a/Lib/test/test_tools/i18n_data/comments.py b/Lib/test/test_tools/i18n_data/comments.py new file mode 100644 index 00000000000000..dca4dfa57b1dd9 --- /dev/null +++ b/Lib/test/test_tools/i18n_data/comments.py @@ -0,0 +1,78 @@ +from gettext import gettext as _ + +# Not a translator comment +_('foo') + +# i18n: This is a translator comment +_('bar') + +# i18n: This is a translator comment +# i18n: This is another translator comment +_('baz') + +# i18n: This is a translator comment +# with multiple +# lines +_('qux') + +# This comment should not be included because +# it does not start with the prefix +# i18n: This is a translator comment +_('quux') + +# i18n: This is a translator comment +# with multiple lines +# i18n: This is another translator comment +# with multiple lines +_('corge') + +# i18n: This comment should be ignored + +_('grault') + +# i18n: This comment should be ignored + +# i18n: This is another translator comment +_('garply') + +# i18n: comment should be ignored +x = 1 +_('george') + +# i18n: This comment should be ignored +x = 1 +# i18n: This is another translator comment +_('waldo') + +# i18n: This is a translator comment +x = 1 # i18n: This is also a translator comment +# i18n: This is another translator comment +_('waldo2') + +# i18n: This is a translator comment +_('fred') + +# i18n: This is another translator comment +_('fred') + +# i18n: This is yet another translator comment +_('fred') + +# i18n: This is a translator comment +# with multiple lines +_('fred') + +_('plugh') # i18n: This comment should be ignored + +_('foo' # i18n: This comment should be ignored + 'bar') # i18n: This comment should be ignored + +# i18n: This is a translator comment +_('xyzzy') +_('thud') + + +## i18n: This is a translator comment +# # i18n: This is another translator comment +### ### i18n: This is yet another translator comment +_('foos') diff --git a/Lib/test/test_tools/test_i18n.py b/Lib/test/test_tools/test_i18n.py index f5aba31ed42c10..d97fdb116fcd19 100644 --- a/Lib/test/test_tools/test_i18n.py +++ b/Lib/test/test_tools/test_i18n.py @@ -87,7 +87,8 @@ def assert_POT_equal(self, expected, actual): self.maxDiff = None self.assertEqual(normalize_POT_file(expected), normalize_POT_file(actual)) - def extract_from_str(self, module_content, *, args=(), strict=True, with_stderr=False): + def extract_from_str(self, module_content, *, args=(), strict=True, + with_stderr=False, raw=False): """Return all msgids extracted from module_content.""" filename = 'test.py' with temp_cwd(None): @@ -98,10 +99,11 @@ def extract_from_str(self, module_content, *, args=(), strict=True, with_stderr= self.assertEqual(res.err, b'') with open('messages.pot', encoding='utf-8') as fp: data = fp.read() - msgids = self.get_msgids(data) + if not raw: + data = self.get_msgids(data) if not with_stderr: - return msgids - return msgids, res.err + return data + return data, res.err def extract_docstrings_from_str(self, module_content): """Return all docstrings extracted from module_content.""" @@ -381,7 +383,8 @@ def test_pygettext_output(self): contents = input_file.read_text(encoding='utf-8') with temp_cwd(None): Path(input_file.name).write_text(contents) - assert_python_ok('-Xutf8', self.script, '--docstrings', input_file.name) + assert_python_ok('-Xutf8', self.script, '--docstrings', + '--add-comments=i18n:', input_file.name) output = Path('messages.pot').read_text(encoding='utf-8') expected = output_file.read_text(encoding='utf-8') @@ -437,6 +440,51 @@ def test_error_messages(self): "*** test.py:3: Variable positional arguments are not allowed in gettext calls\n" ) + def test_extract_all_comments(self): + """ + Test that the --add-comments option without an + explicit tag extracts all translator comments. + """ + for arg in ('--add-comments', '-c'): + with self.subTest(arg=arg): + data = self.extract_from_str(dedent('''\ + # Translator comment + _("foo") + '''), args=(arg,), raw=True) + self.assertIn('#. Translator comment', data) + + def test_comments_with_multiple_tags(self): + """ + Test that multiple --add-comments tags can be specified. + """ + for arg in ('--add-comments={}', '-c{}'): + with self.subTest(arg=arg): + args = (arg.format('foo:'), arg.format('bar:')) + data = self.extract_from_str(dedent('''\ + # foo: comment + _("foo") + + # bar: comment + _("bar") + + # baz: comment + _("baz") + '''), args=args, raw=True) + self.assertIn('#. foo: comment', data) + self.assertIn('#. bar: comment', data) + self.assertNotIn('#. baz: comment', data) + + def test_comments_not_extracted_without_tags(self): + """ + Test that translator comments are not extracted without + specifying --add-comments. + """ + data = self.extract_from_str(dedent('''\ + # Translator comment + _("foo") + '''), raw=True) + self.assertNotIn('#.', data) + def update_POT_snapshots(): for input_file in DATA_DIR.glob('*.py'): @@ -444,7 +492,8 @@ def update_POT_snapshots(): contents = input_file.read_bytes() with temp_cwd(None): Path(input_file.name).write_bytes(contents) - assert_python_ok('-Xutf8', Test_pygettext.script, '--docstrings', input_file.name) + assert_python_ok('-Xutf8', Test_pygettext.script, '--docstrings', + '--add-comments=i18n:', input_file.name) output = Path('messages.pot').read_text(encoding='utf-8') output = normalize_POT_file(output) diff --git a/Misc/NEWS.d/next/Tools-Demos/2025-02-12-23-24-37.gh-issue-130057.TKUKI6.rst b/Misc/NEWS.d/next/Tools-Demos/2025-02-12-23-24-37.gh-issue-130057.TKUKI6.rst new file mode 100644 index 00000000000000..0e89fa652dcbba --- /dev/null +++ b/Misc/NEWS.d/next/Tools-Demos/2025-02-12-23-24-37.gh-issue-130057.TKUKI6.rst @@ -0,0 +1 @@ +Add support for translator comments in :program:`pygettext.py`. diff --git a/Tools/i18n/pygettext.py b/Tools/i18n/pygettext.py index 4177d46048f9b9..9654dd45067ff9 100755 --- a/Tools/i18n/pygettext.py +++ b/Tools/i18n/pygettext.py @@ -46,6 +46,12 @@ --extract-all Extract all strings. + -cTAG + --add-comments=TAG + Extract translator comments. Comments must start with TAG and + must precede the gettext call. Multiple -cTAG options are allowed. + In that case, any comment matching any of the TAGs will be extracted. + -d name --default-domain=name Rename the default output file from messages.pot to name.pot. @@ -141,7 +147,9 @@ import os import sys import time +import tokenize from dataclasses import dataclass, field +from io import BytesIO from operator import itemgetter __version__ = '1.5' @@ -302,12 +310,30 @@ class Message: msgctxt: str | None locations: set[Location] = field(default_factory=set) is_docstring: bool = False + comments: list[str] = field(default_factory=list) - def add_location(self, filename, lineno, msgid_plural=None, *, is_docstring=False): + def add_location(self, filename, lineno, msgid_plural=None, *, + is_docstring=False, comments=None): if self.msgid_plural is None: self.msgid_plural = msgid_plural self.locations.add(Location(filename, lineno)) self.is_docstring |= is_docstring + if comments: + self.comments.extend(comments) + + +def get_source_comments(source): + """ + Return a dictionary mapping line numbers to + comments in the source code. + """ + comments = {} + for token in tokenize.tokenize(BytesIO(source).readline): + if token.type == tokenize.COMMENT: + # Remove any leading combination of '#' and whitespace + comment = token.string.lstrip('# \t') + comments[token.start[0]] = comment + return comments class GettextVisitor(ast.NodeVisitor): @@ -316,10 +342,18 @@ def __init__(self, options): self.options = options self.filename = None self.messages = {} + self.comments = {} + + def visit_file(self, source, filename): + try: + module_tree = ast.parse(source) + except SyntaxError: + return - def visit_file(self, node, filename): self.filename = filename - self.visit(node) + if self.options.comment_tags: + self.comments = get_source_comments(source) + self.visit(module_tree) def visit_Module(self, node): self._extract_docstring(node) @@ -372,14 +406,51 @@ def _extract_message(self, node): msg_data[arg_type] = arg.value lineno = node.lineno - self._add_message(lineno, **msg_data) + comments = self._extract_comments(node) + self._add_message(lineno, **msg_data, comments=comments) + + def _extract_comments(self, node): + """Extract translator comments. + + Translator comments must precede the gettext call and + start with one of the comment prefixes defined by + --add-comments=TAG. See the tests for examples. + """ + if not self.options.comment_tags: + return [] + + comments = [] + lineno = node.lineno - 1 + # Collect an unbroken sequence of comments starting from + # the line above the gettext call. + while lineno >= 1: + comment = self.comments.get(lineno) + if comment is None: + break + comments.append(comment) + lineno -= 1 + + # Find the first translator comment in the sequence and + # return all comments starting from that comment. + comments = comments[::-1] + first_index = next((i for i, comment in enumerate(comments) + if self._is_translator_comment(comment)), None) + if first_index is None: + return [] + return comments[first_index:] + + def _is_translator_comment(self, comment): + return comment.startswith(self.options.comment_tags) def _add_message( self, lineno, msgid, msgid_plural=None, msgctxt=None, *, - is_docstring=False): + is_docstring=False, comments=None): if msgid in self.options.toexclude: return + if not comments: + comments = [] + key = self._key_for(msgid, msgctxt) message = self.messages.get(key) if message: @@ -388,6 +459,7 @@ def _add_message( lineno, msgid_plural, is_docstring=is_docstring, + comments=comments, ) else: self.messages[key] = Message( @@ -396,6 +468,7 @@ def _add_message( msgctxt=msgctxt, locations={Location(self.filename, lineno)}, is_docstring=is_docstring, + comments=comments, ) @staticmethod @@ -435,6 +508,10 @@ def write_pot_file(messages, options, fp): for key, locations in sorted_keys: msg = messages[key] + + for comment in msg.comments: + print(f'#. {comment}', file=fp) + if options.writelocations: # location comments are different b/w Solaris and GNU: if options.locationstyle == options.SOLARIS: @@ -473,9 +550,9 @@ def main(): try: opts, args = getopt.getopt( sys.argv[1:], - 'ad:DEhk:Kno:p:S:Vvw:x:X:', - ['extract-all', 'default-domain=', 'escape', 'help', - 'keyword=', 'no-default-keywords', + 'ac::d:DEhk:Kno:p:S:Vvw:x:X:', + ['extract-all', 'add-comments=?', 'default-domain=', 'escape', + 'help', 'keyword=', 'no-default-keywords', 'add-location', 'no-location', 'output=', 'output-dir=', 'style=', 'verbose', 'version', 'width=', 'exclude-file=', 'docstrings', 'no-docstrings', @@ -501,6 +578,7 @@ class Options: excludefilename = '' docstrings = 0 nodocstrings = {} + comment_tags = set() options = Options() locations = {'gnu' : options.GNU, @@ -513,6 +591,8 @@ class Options: usage(0) elif opt in ('-a', '--extract-all'): options.extractall = 1 + elif opt in ('-c', '--add-comments'): + options.comment_tags.add(arg) elif opt in ('-d', '--default-domain'): options.outfile = arg + '.pot' elif opt in ('-E', '--escape'): @@ -558,6 +638,8 @@ class Options: finally: fp.close() + options.comment_tags = tuple(options.comment_tags) + # calculate escapes make_escapes(not options.escape) @@ -600,12 +682,7 @@ class Options: with open(filename, 'rb') as fp: source = fp.read() - try: - module_tree = ast.parse(source) - except SyntaxError: - continue - - visitor.visit_file(module_tree, filename) + visitor.visit_file(source, filename) # write the output if options.outfile == '-': _______________________________________________ Python-checkins mailing list -- python-checkins@python.org To unsubscribe send an email to python-checkins-le...@python.org https://mail.python.org/mailman3/lists/python-checkins.python.org/ Member address: arch...@mail-archive.com