[Python-checkins] [3.14] gh-134752: Improve speed of test_tokenize.StringPrefixTest.test_prefixes. (GH-134766) (#134782)

ericvsmith Tue, 27 May 2025 02:16:51 -0700

https://github.com/python/cpython/commit/452d098c0b024266702c64d6a76d908d721c5067
commit: 452d098c0b024266702c64d6a76d908d721c5067
branch: 3.14
author: Miss Islington (bot) <[email protected]>
committer: ericvsmith <[email protected]>
date: 2025-05-27T09:16:23Z
summary:


[3.14] gh-134752: Improve speed of 
test_tokenize.StringPrefixTest.test_prefixes. (GH-134766) (#134782)

gh-134752: Improve speed of test_tokenize.StringPrefixTest.test_prefixes. 
(GH-134766)
(cherry picked from commit 579686d9fb1bccc74c694d569f0a8bf28d9ca85a)

Co-authored-by: Eric V. Smith <[email protected]>

files:
M Lib/test/test_tokenize.py

diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py
index d4b51841891b28..865e0c5b40ddd3 100644
--- a/Lib/test/test_tokenize.py
+++ b/Lib/test/test_tokenize.py
@@ -3241,39 +3241,40 @@ def test_exact_flag(self):
 
 
 class StringPrefixTest(unittest.TestCase):
-    def test_prefixes(self):
-        # Get the list of defined string prefixes.  I don't see an
-        # obvious documented way of doing this, but probably the best
-        # thing is to split apart tokenize.StringPrefix.
-
-        # Make sure StringPrefix begins and ends in parens.
-        self.assertEqual(tokenize.StringPrefix[0], '(')
-        self.assertEqual(tokenize.StringPrefix[-1], ')')
-
-        # Then split apart everything else by '|'.
-        defined_prefixes = set(tokenize.StringPrefix[1:-1].split('|'))
-
-        # Now compute the actual string prefixes, by exec-ing all
-        # valid prefix combinations, followed by an empty string.
-
-        # Try all prefix lengths until we find a length that has zero
-        # valid prefixes.  This will miss the case where for example
-        # there are no valid 3 character prefixes, but there are valid
-        # 4 character prefixes.  That seems extremely unlikely.
-
-        # Note that the empty prefix is being included, because length
-        # starts at 0.  That's expected, since StringPrefix includes
-        # the empty prefix.
+    @staticmethod
+    def determine_valid_prefixes():
+        # Try all lengths until we find a length that has zero valid
+        # prefixes.  This will miss the case where for example there
+        # are no valid 3 character prefixes, but there are valid 4
+        # character prefixes.  That seems unlikely.
+
+        single_char_valid_prefixes = set()
+
+        # Find all of the single character string prefixes. Just get
+        # the lowercase version, we'll deal with combinations of upper
+        # and lower case later.  I'm using this logic just in case
+        # some uppercase-only prefix is added.
+        for letter in itertools.chain(string.ascii_lowercase, 
string.ascii_uppercase):
+            try:
+                eval(f'{letter}""')
+                single_char_valid_prefixes.add(letter.lower())
+            except SyntaxError:
+                pass
 
+        # This logic assumes that all combinations of valid prefixes only use
+        # the characters that are valid single character prefixes.  That seems
+        # like a valid assumption, but if it ever changes this will need
+        # adjusting.
         valid_prefixes = set()
         for length in itertools.count():
             num_at_this_length = 0
             for prefix in (
-                "".join(l) for l in 
list(itertools.combinations(string.ascii_lowercase, length))
+                "".join(l)
+                for l in itertools.combinations(single_char_valid_prefixes, 
length)
             ):
                 for t in itertools.permutations(prefix):
                     for u in itertools.product(*[(c, c.upper()) for c in t]):
-                        p = ''.join(u)
+                        p = "".join(u)
                         if p == "not":
                             # 'not' can never be a string prefix,
                             # because it's a valid expression: not ""
@@ -3289,9 +3290,26 @@ def test_prefixes(self):
                         except SyntaxError:
                             pass
             if num_at_this_length == 0:
-                break
+                return valid_prefixes
+
+
+    def test_prefixes(self):
+        # Get the list of defined string prefixes.  I don't see an
+        # obvious documented way of doing this, but probably the best
+        # thing is to split apart tokenize.StringPrefix.
+
+        # Make sure StringPrefix begins and ends in parens.  We're
+        # assuming it's of the form "(a|b|ab)", if a, b, and cd are
+        # valid string prefixes.
+        self.assertEqual(tokenize.StringPrefix[0], '(')
+        self.assertEqual(tokenize.StringPrefix[-1], ')')
+
+        # Then split apart everything else by '|'.
+        defined_prefixes = set(tokenize.StringPrefix[1:-1].split('|'))
 
-        self.assertEqual(defined_prefixes, valid_prefixes)
+        # Now compute the actual allowed string prefixes and compare
+        # to what is defined in the tokenize module.
+        self.assertEqual(defined_prefixes, self.determine_valid_prefixes())
 
 
 if __name__ == "__main__":

_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]

[Python-checkins] [3.14] gh-134752: Improve speed of test_tokenize.StringPrefixTest.test_prefixes. (GH-134766) (#134782)

Reply via email to