https://github.com/python/cpython/commit/539461d9ec8e5322ead638f7be733fd196aa6c79
commit: 539461d9ec8e5322ead638f7be733fd196aa6c79
branch: main
author: Tomasz Pytel <[email protected]>
committer: pablogsal <[email protected]>
date: 2025-10-07T17:28:15+01:00
summary:
gh-139516: Fix lambda colon start format spec in f-string in tokenizer (#139657)
files:
A
Misc/NEWS.d/next/Core_and_Builtins/2025-10-06-13-15-26.gh-issue-139516.d9Pkur.rst
M Lib/test/test_fstring.py
M Lib/test/test_tokenize.py
M Parser/lexer/lexer.c
M Parser/lexer/state.h
diff --git a/Lib/test/test_fstring.py b/Lib/test/test_fstring.py
index 41cefe0e286d50..05d0cbd2445c4c 100644
--- a/Lib/test/test_fstring.py
+++ b/Lib/test/test_fstring.py
@@ -1859,6 +1859,13 @@ def __format__(self, format):
# Test multiple format specs in same raw f-string
self.assertEqual(rf"{UnchangedFormat():\xFF} {UnchangedFormat():\n}",
'\\xFF \\n')
+ def test_gh139516(self):
+ with temp_cwd():
+ script = 'script.py'
+ with open(script, 'wb') as f:
+ f.write('''def f(a): pass\nf"{f(a=lambda: 'à'\n)}"'''.encode())
+ assert_python_ok(script)
+
if __name__ == '__main__':
unittest.main()
diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py
index d90a7659c4237c..8fdd03f347b632 100644
--- a/Lib/test/test_tokenize.py
+++ b/Lib/test/test_tokenize.py
@@ -1216,6 +1216,23 @@ def test_multiline_non_ascii_fstring_with_expr(self):
FSTRING_END "\'\'\'" (3, 1) (3, 4)
""")
+ # gh-139516, the '\n' is explicit to ensure no trailing whitespace
which would invalidate the test
+ self.check_tokenize('''f"{f(a=lambda: 'à'\n)}"''', """\
+ FSTRING_START \'f"\' (1, 0) (1, 2)
+ OP '{' (1, 2) (1, 3)
+ NAME 'f' (1, 3) (1, 4)
+ OP '(' (1, 4) (1, 5)
+ NAME 'a' (1, 5) (1, 6)
+ OP '=' (1, 6) (1, 7)
+ NAME 'lambda' (1, 7) (1, 13)
+ OP ':' (1, 13) (1, 14)
+ STRING "\'à\'" (1, 15) (1, 18)
+ NL '\\n' (1, 18) (1, 19)
+ OP ')' (2, 0) (2, 1)
+ OP '}' (2, 1) (2, 2)
+ FSTRING_END \'"\' (2, 2) (2, 3)
+ """)
+
class GenerateTokensTest(TokenizeTest):
def check_tokenize(self, s, expected):
# Format the tokens in s in a table format.
diff --git
a/Misc/NEWS.d/next/Core_and_Builtins/2025-10-06-13-15-26.gh-issue-139516.d9Pkur.rst
b/Misc/NEWS.d/next/Core_and_Builtins/2025-10-06-13-15-26.gh-issue-139516.d9Pkur.rst
new file mode 100644
index 00000000000000..a709112306025f
--- /dev/null
+++
b/Misc/NEWS.d/next/Core_and_Builtins/2025-10-06-13-15-26.gh-issue-139516.d9Pkur.rst
@@ -0,0 +1 @@
+Fix lambda colon erroneously start format spec in f-string in tokenizer.
diff --git a/Parser/lexer/lexer.c b/Parser/lexer/lexer.c
index 81363cf8e810fe..a69994e9b3d005 100644
--- a/Parser/lexer/lexer.c
+++ b/Parser/lexer/lexer.c
@@ -1376,7 +1376,7 @@ tok_get_normal_mode(struct tok_state *tok,
tokenizer_mode* current_tok, struct t
return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid non-printable
character U+%04X", c));
}
- if( c == '=' && INSIDE_FSTRING_EXPR(current_tok)) {
+ if( c == '=' && INSIDE_FSTRING_EXPR_AT_TOP(current_tok)) {
current_tok->in_debug = 1;
}
diff --git a/Parser/lexer/state.h b/Parser/lexer/state.h
index 5e8cac7249b21c..877127125a7652 100644
--- a/Parser/lexer/state.h
+++ b/Parser/lexer/state.h
@@ -9,6 +9,8 @@
#define INSIDE_FSTRING(tok) (tok->tok_mode_stack_index > 0)
#define INSIDE_FSTRING_EXPR(tok) (tok->curly_bracket_expr_start_depth >= 0)
+#define INSIDE_FSTRING_EXPR_AT_TOP(tok) \
+ (tok->curly_bracket_depth - tok->curly_bracket_expr_start_depth == 1)
enum decoding_state {
STATE_INIT,
_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]