https://github.com/python/cpython/commit/e41ec8e18b078024b02a742272e675ae39778536
commit: e41ec8e18b078024b02a742272e675ae39778536
branch: main
author: Tomas R. <[email protected]>
committer: AA-Turner <[email protected]>
date: 2025-02-04T22:59:23Z
summary:
gh-104400: pygettext: Prepare to replace TokenEater with a NodeVisitor (#129672)
* Update the module docstring
* Move ``key_for`` inside the class
* Move ``write_pot_file`` outside the class
files:
M Tools/i18n/pygettext.py
diff --git a/Tools/i18n/pygettext.py b/Tools/i18n/pygettext.py
index 81d9fdbb36017b..d8a0e379ab82cb 100755
--- a/Tools/i18n/pygettext.py
+++ b/Tools/i18n/pygettext.py
@@ -7,15 +7,9 @@
the programming language and can be used from within Python programs.
Martin von Loewis' work[1] helps considerably in this regard.
-There's one problem though; xgettext is the program that scans source code
-looking for message strings, but it groks only C (or C++). Python
-introduces a few wrinkles, such as dual quoting characters, triple quoted
-strings, and raw strings. xgettext understands none of this.
-
-Enter pygettext, which uses Python's standard tokenize module to scan
-Python source code, generating .pot files identical to what GNU xgettext[2]
-generates for C and C++ code. From there, the standard GNU tools can be
-used.
+pygettext uses Python's standard tokenize module to scan Python source
+code, generating .pot files identical to what GNU xgettext[2] generates
+for C and C++ code. From there, the standard GNU tools can be used.
A word about marking Python strings as candidates for translation. GNU
xgettext recognizes the following keywords: gettext, dgettext, dcgettext,
@@ -41,6 +35,9 @@
option arguments is broken, and in these cases, pygettext just defines
additional switches.
+NOTE: The public interface of pygettext is limited to the command-line
+interface only. The internal API is subject to change without notice.
+
Usage: pygettext [options] inputfile ...
Options:
@@ -328,12 +325,6 @@ def add_location(self, filename, lineno,
msgid_plural=None, *, is_docstring=Fals
self.is_docstring |= is_docstring
-def key_for(msgid, msgctxt=None):
- if msgctxt is not None:
- return (msgctxt, msgid)
- return msgid
-
-
class TokenEater:
def __init__(self, options):
self.__options = options
@@ -354,6 +345,10 @@ def __call__(self, ttype, tstring, stup, etup, line):
## file=sys.stderr)
self.__state(ttype, tstring, stup[0])
+ @property
+ def messages(self):
+ return self.__messages
+
def __waiting(self, ttype, tstring, lineno):
opts = self.__options
# Do docstring extractions, if enabled
@@ -513,7 +508,7 @@ def __addentry(self, msg, lineno=None, *,
is_docstring=False):
lineno = self.__lineno
msgctxt = msg.get('msgctxt')
msgid_plural = msg.get('msgid_plural')
- key = key_for(msgid, msgctxt)
+ key = self._key_for(msgid, msgctxt)
if key in self.__messages:
self.__messages[key].add_location(
self.__curfile,
@@ -530,6 +525,12 @@ def __addentry(self, msg, lineno=None, *,
is_docstring=False):
is_docstring=is_docstring,
)
+ @staticmethod
+ def _key_for(msgid, msgctxt=None):
+ if msgctxt is not None:
+ return (msgctxt, msgid)
+ return msgid
+
def warn_unexpected_token(self, token):
print((
'*** %(file)s:%(lineno)s: Seen unexpected token "%(token)s"'
@@ -543,58 +544,58 @@ def set_filename(self, filename):
self.__curfile = filename
self.__freshmodule = 1
- def write(self, fp):
- options = self.__options
- timestamp = time.strftime('%Y-%m-%d %H:%M%z')
- encoding = fp.encoding if fp.encoding else 'UTF-8'
- print(pot_header % {'time': timestamp, 'version': __version__,
- 'charset': encoding,
- 'encoding': '8bit'}, file=fp)
-
- # Sort locations within each message by filename and lineno
- sorted_keys = [
- (key, sorted(msg.locations))
- for key, msg in self.__messages.items()
- ]
- # Sort messages by locations
- # For example, a message with locations [('test.py', 1), ('test.py',
2)] will
- # appear before a message with locations [('test.py', 1), ('test.py',
3)]
- sorted_keys.sort(key=itemgetter(1))
-
- for key, locations in sorted_keys:
- msg = self.__messages[key]
- if options.writelocations:
- # location comments are different b/w Solaris and GNU:
- if options.locationstyle == options.SOLARIS:
- for location in locations:
- print(f'# File: {location.filename}, line:
{location.lineno}', file=fp)
- elif options.locationstyle == options.GNU:
- # fit as many locations on one line, as long as the
- # resulting line length doesn't exceed 'options.width'
- locline = '#:'
- for location in locations:
- s = f' {location.filename}:{location.lineno}'
- if len(locline) + len(s) <= options.width:
- locline = locline + s
- else:
- print(locline, file=fp)
- locline = f'#:{s}'
- if len(locline) > 2:
+
+def write_pot_file(messages, options, fp):
+ timestamp = time.strftime('%Y-%m-%d %H:%M%z')
+ encoding = fp.encoding if fp.encoding else 'UTF-8'
+ print(pot_header % {'time': timestamp, 'version': __version__,
+ 'charset': encoding,
+ 'encoding': '8bit'}, file=fp)
+
+ # Sort locations within each message by filename and lineno
+ sorted_keys = [
+ (key, sorted(msg.locations))
+ for key, msg in messages.items()
+ ]
+ # Sort messages by locations
+ # For example, a message with locations [('test.py', 1), ('test.py', 2)]
will
+ # appear before a message with locations [('test.py', 1), ('test.py', 3)]
+ sorted_keys.sort(key=itemgetter(1))
+
+ for key, locations in sorted_keys:
+ msg = messages[key]
+ if options.writelocations:
+ # location comments are different b/w Solaris and GNU:
+ if options.locationstyle == options.SOLARIS:
+ for location in locations:
+ print(f'# File: {location.filename}, line:
{location.lineno}', file=fp)
+ elif options.locationstyle == options.GNU:
+ # fit as many locations on one line, as long as the
+ # resulting line length doesn't exceed 'options.width'
+ locline = '#:'
+ for location in locations:
+ s = f' {location.filename}:{location.lineno}'
+ if len(locline) + len(s) <= options.width:
+ locline = locline + s
+ else:
print(locline, file=fp)
- if msg.is_docstring:
- # If the entry was gleaned out of a docstring, then add a
- # comment stating so. This is to aid translators who may wish
- # to skip translating some unimportant docstrings.
- print('#, docstring', file=fp)
- if msg.msgctxt is not None:
- print('msgctxt', normalize(msg.msgctxt, encoding), file=fp)
- print('msgid', normalize(msg.msgid, encoding), file=fp)
- if msg.msgid_plural is not None:
- print('msgid_plural', normalize(msg.msgid_plural, encoding),
file=fp)
- print('msgstr[0] ""', file=fp)
- print('msgstr[1] ""\n', file=fp)
- else:
- print('msgstr ""\n', file=fp)
+ locline = f'#:{s}'
+ if len(locline) > 2:
+ print(locline, file=fp)
+ if msg.is_docstring:
+ # If the entry was gleaned out of a docstring, then add a
+ # comment stating so. This is to aid translators who may wish
+ # to skip translating some unimportant docstrings.
+ print('#, docstring', file=fp)
+ if msg.msgctxt is not None:
+ print('msgctxt', normalize(msg.msgctxt, encoding), file=fp)
+ print('msgid', normalize(msg.msgid, encoding), file=fp)
+ if msg.msgid_plural is not None:
+ print('msgid_plural', normalize(msg.msgid_plural, encoding),
file=fp)
+ print('msgstr[0] ""', file=fp)
+ print('msgstr[1] ""\n', file=fp)
+ else:
+ print('msgstr ""\n', file=fp)
def main():
@@ -752,7 +753,7 @@ class Options:
fp = open(options.outfile, 'w')
closep = 1
try:
- eater.write(fp)
+ write_pot_file(eater.messages, options, fp)
finally:
if closep:
fp.close()
_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: [email protected]