[Python-checkins] gh-133197: Improve error message for incompatible string / bytes prefixes (#133242)

sobolevn Fri, 02 May 2025 04:28:49 -0700

https://github.com/python/cpython/commit/1e9cc3d502773d2dd3b420ecf5b949d902e010c3
commit: 1e9cc3d502773d2dd3b420ecf5b949d902e010c3
branch: main
author: sobolevn <m...@sobolevn.me>
committer: sobolevn <m...@sobolevn.me>
date: 2025-05-02T14:28:17+03:00
summary:


gh-133197: Improve error message for incompatible string / bytes prefixes 
(#133242)

files:
A 
Misc/NEWS.d/next/Core_and_Builtins/2025-05-01-11-06-29.gh-issue-133197.BHjfh4.rst
M Lib/test/test_fstring.py
M Lib/test/test_grammar.py
M Lib/test/test_syntax.py
M Parser/lexer/lexer.c

diff --git a/Lib/test/test_fstring.py b/Lib/test/test_fstring.py
index a10d1fd5fd2b1f..dd58e032a8befe 100644
--- a/Lib/test/test_fstring.py
+++ b/Lib/test/test_fstring.py
@@ -1304,7 +1304,7 @@ def test_invalid_string_prefixes(self):
                              "Bf''",
                              "BF''",]
         double_quote_cases = [case.replace("'", '"') for case in 
single_quote_cases]
-        self.assertAllRaise(SyntaxError, 'invalid syntax',
+        self.assertAllRaise(SyntaxError, 'prefixes are incompatible',
                             single_quote_cases + double_quote_cases)
 
     def test_leading_trailing_spaces(self):
diff --git a/Lib/test/test_grammar.py b/Lib/test/test_grammar.py
index c0681bccd9ec32..c39565144bf7f4 100644
--- a/Lib/test/test_grammar.py
+++ b/Lib/test/test_grammar.py
@@ -216,6 +216,27 @@ def test_string_literals(self):
 '
         self.assertEqual(x, y)
 
+    def test_string_prefixes(self):
+        def check(s):
+            parsed = eval(s)
+            self.assertIs(type(parsed), str)
+            self.assertGreater(len(parsed), 0)
+
+        check("u'abc'")
+        check("r'abc\t'")
+        check("rf'abc\a {1 + 1}'")
+        check("fr'abc\a {1 + 1}'")
+
+    def test_bytes_prefixes(self):
+        def check(s):
+            parsed = eval(s)
+            self.assertIs(type(parsed), bytes)
+            self.assertGreater(len(parsed), 0)
+
+        check("b'abc'")
+        check("br'abc\t'")
+        check("rb'abc\a'")
+
     def test_ellipsis(self):
         x = ...
         self.assertTrue(x is Ellipsis)
diff --git a/Lib/test/test_syntax.py b/Lib/test/test_syntax.py
index 74e52ac8fd5e7d..0ee17849e28121 100644
--- a/Lib/test/test_syntax.py
+++ b/Lib/test/test_syntax.py
@@ -1877,21 +1877,77 @@
 Traceback (most recent call last):
 SyntaxError: cannot assign to f-string expression here. Maybe you meant '==' 
instead of '='?
 
->>> ft'abc'
+>>> ub''
 Traceback (most recent call last):
-SyntaxError: can't use 'f' and 't' string prefixes together
+SyntaxError: 'u' and 'b' prefixes are incompatible
 
->>> tf"{x=}"
+>>> bu"привет"
 Traceback (most recent call last):
-SyntaxError: can't use 'f' and 't' string prefixes together
+SyntaxError: 'u' and 'b' prefixes are incompatible
 
->>> tb''
+>>> ur''
+Traceback (most recent call last):
+SyntaxError: 'u' and 'r' prefixes are incompatible
+
+>>> ru"\t"
+Traceback (most recent call last):
+SyntaxError: 'u' and 'r' prefixes are incompatible
+
+>>> uf'{1 + 1}'
+Traceback (most recent call last):
+SyntaxError: 'u' and 'f' prefixes are incompatible
+
+>>> fu""
+Traceback (most recent call last):
+SyntaxError: 'u' and 'f' prefixes are incompatible
+
+>>> ut'{1}'
+Traceback (most recent call last):
+SyntaxError: 'u' and 't' prefixes are incompatible
+
+>>> tu"234"
+Traceback (most recent call last):
+SyntaxError: 'u' and 't' prefixes are incompatible
+
+>>> bf'{x!r}'
+Traceback (most recent call last):
+SyntaxError: 'b' and 'f' prefixes are incompatible
+
+>>> fb"text"
 Traceback (most recent call last):
-SyntaxError: can't use 'b' and 't' string prefixes together
+SyntaxError: 'b' and 'f' prefixes are incompatible
 
 >>> bt"text"
 Traceback (most recent call last):
-SyntaxError: can't use 'b' and 't' string prefixes together
+SyntaxError: 'b' and 't' prefixes are incompatible
+
+>>> tb''
+Traceback (most recent call last):
+SyntaxError: 'b' and 't' prefixes are incompatible
+
+>>> tf"{0.3:.02f}"
+Traceback (most recent call last):
+SyntaxError: 'f' and 't' prefixes are incompatible
+
+>>> ft'{x=}'
+Traceback (most recent call last):
+SyntaxError: 'f' and 't' prefixes are incompatible
+
+>>> tfu"{x=}"
+Traceback (most recent call last):
+SyntaxError: 'u' and 'f' prefixes are incompatible
+
+>>> turf"{x=}"
+Traceback (most recent call last):
+SyntaxError: 'u' and 'r' prefixes are incompatible
+
+>>> burft"{x=}"
+Traceback (most recent call last):
+SyntaxError: 'u' and 'b' prefixes are incompatible
+
+>>> brft"{x=}"
+Traceback (most recent call last):
+SyntaxError: 'b' and 'f' prefixes are incompatible
 
 >>> t'{x}' = 42
 Traceback (most recent call last):
diff --git 
a/Misc/NEWS.d/next/Core_and_Builtins/2025-05-01-11-06-29.gh-issue-133197.BHjfh4.rst
 
b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-01-11-06-29.gh-issue-133197.BHjfh4.rst
new file mode 100644
index 00000000000000..009bc3760535d5
--- /dev/null
+++ 
b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-01-11-06-29.gh-issue-133197.BHjfh4.rst
@@ -0,0 +1,2 @@
+Improve :exc:`SyntaxError` error messages for incompatible string / bytes
+prefixes.
diff --git a/Parser/lexer/lexer.c b/Parser/lexer/lexer.c
index 98adf7447c5626..4d10bccf0a53f2 100644
--- a/Parser/lexer/lexer.c
+++ b/Parser/lexer/lexer.c
@@ -404,6 +404,51 @@ tok_continuation_line(struct tok_state *tok) {
     return c;
 }
 
+static int
+maybe_raise_syntax_error_for_string_prefixes(struct tok_state *tok,
+                                             int saw_b, int saw_r, int saw_u,
+                                             int saw_f, int saw_t) {
+    // Supported: rb, rf, rt (in any order)
+    // Unsupported: ub, ur, uf, ut, bf, bt, ft (in any order)
+
+#define RETURN_SYNTAX_ERROR(PREFIX1, PREFIX2)                             \
+    do {                                                                  \
+        (void)_PyTokenizer_syntaxerror_known_range(                       \
+            tok, (int)(tok->start + 1 - tok->line_start),                 \
+            (int)(tok->cur - tok->line_start),                            \
+            "'" PREFIX1 "' and '" PREFIX2 "' prefixes are incompatible"); \
+        return -1;                                                        \
+    } while (0)
+
+    if (saw_u && saw_b) {
+        RETURN_SYNTAX_ERROR("u", "b");
+    }
+    if (saw_u && saw_r) {
+        RETURN_SYNTAX_ERROR("u", "r");
+    }
+    if (saw_u && saw_f) {
+        RETURN_SYNTAX_ERROR("u", "f");
+    }
+    if (saw_u && saw_t) {
+        RETURN_SYNTAX_ERROR("u", "t");
+    }
+
+    if (saw_b && saw_f) {
+        RETURN_SYNTAX_ERROR("b", "f");
+    }
+    if (saw_b && saw_t) {
+        RETURN_SYNTAX_ERROR("b", "t");
+    }
+
+    if (saw_f && saw_t) {
+        RETURN_SYNTAX_ERROR("f", "t");
+    }
+
+#undef RETURN_SYNTAX_ERROR
+
+    return 0;
+}
+
 static int
 tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct 
token *token)
 {
@@ -648,22 +693,22 @@ tok_get_normal_mode(struct tok_state *tok, 
tokenizer_mode* current_tok, struct t
         /* Process the various legal combinations of b"", r"", u"", and f"". */
         int saw_b = 0, saw_r = 0, saw_u = 0, saw_f = 0, saw_t = 0;
         while (1) {
-            if (!(saw_b || saw_u || saw_f) && (c == 'b' || c == 'B'))
+            if (!saw_b && (c == 'b' || c == 'B')) {
                 saw_b = 1;
+            }
             /* Since this is a backwards compatibility support literal we don't
                want to support it in arbitrary order like byte literals. */
-            else if (!(saw_b || saw_u || saw_r || saw_f || saw_t)
-                     && (c == 'u'|| c == 'U')) {
+            else if (!saw_u && (c == 'u'|| c == 'U')) {
                 saw_u = 1;
             }
             /* ur"" and ru"" are not supported */
-            else if (!(saw_r || saw_u) && (c == 'r' || c == 'R')) {
+            else if (!saw_r && (c == 'r' || c == 'R')) {
                 saw_r = 1;
             }
-            else if (!(saw_f || saw_b || saw_u) && (c == 'f' || c == 'F')) {
+            else if (!saw_f && (c == 'f' || c == 'F')) {
                 saw_f = 1;
             }
-            else if (!(saw_t || saw_u) && (c == 't' || c == 'T')) {
+            else if (!saw_t && (c == 't' || c == 'T')) {
                 saw_t = 1;
             }
             else {
@@ -671,17 +716,11 @@ tok_get_normal_mode(struct tok_state *tok, 
tokenizer_mode* current_tok, struct t
             }
             c = tok_nextc(tok);
             if (c == '"' || c == '\'') {
-                if (saw_b && saw_t) {
-                    return MAKE_TOKEN(_PyTokenizer_syntaxerror_known_range(
-                        tok, (int)(tok->start + 1 - tok->line_start),
-                        (int)(tok->cur - tok->line_start),
-                        "can't use 'b' and 't' string prefixes together"));
-                }
-                if (saw_f && saw_t) {
-                    return MAKE_TOKEN(_PyTokenizer_syntaxerror_known_range(
-                        tok, (int)(tok->start + 1 - tok->line_start),
-                        (int)(tok->cur - tok->line_start),
-                        "can't use 'f' and 't' string prefixes together"));
+                // Raise error on incompatible string prefixes:
+                int status = maybe_raise_syntax_error_for_string_prefixes(
+                    tok, saw_b, saw_r, saw_u, saw_f, saw_t);
+                if (status < 0) {
+                    return MAKE_TOKEN(ERRORTOKEN);
                 }
 
                 // Handle valid f or t string creation:

_______________________________________________
Python-checkins mailing list -- python-checkins@python.org
To unsubscribe send an email to python-checkins-le...@python.org
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: arch...@mail-archive.com

[Python-checkins] gh-133197: Improve error message for incompatible string / bytes prefixes (#133242)

Reply via email to