https://github.com/python/cpython/commit/23a3e892ee34484676ec3d4ba683ff747e74e778 commit: 23a3e892ee34484676ec3d4ba683ff747e74e778 branch: 3.13 author: Miss Islington (bot) <31488909+miss-isling...@users.noreply.github.com> committer: pablogsal <pablog...@gmail.com> date: 2025-08-03T16:58:49Z summary:
[3.13] gh-137314: Fix incorrect treatment of format specs in raw fstrings (GH-137328) (#137345) gh-137314: Fix incorrect treatment of format specs in raw fstrings (GH-137328) (cherry picked from commit 0153d82a5ab0c6ac16c046bdd4438ea11b58d59d) Co-authored-by: Pablo Galindo Salgado <pablog...@gmail.com> files: A Misc/NEWS.d/next/Core_and_Builtins/2025-08-02-23-04-57.gh-issue-137314.wjEdzD.rst M Lib/test/test_fstring.py M Parser/action_helpers.c M Parser/lexer/lexer.c M Parser/lexer/state.h diff --git a/Lib/test/test_fstring.py b/Lib/test/test_fstring.py index c2ab603a8a775c..5e743d34d2a573 100644 --- a/Lib/test/test_fstring.py +++ b/Lib/test/test_fstring.py @@ -1821,6 +1821,34 @@ def test_newlines_in_format_specifiers(self): for case in valid_cases: compile(case, "<string>", "exec") + def test_raw_fstring_format_spec(self): + # Test raw f-string format spec behavior (Issue #137314). + # + # Raw f-strings should preserve literal backslashes in format specifications, + # not interpret them as escape sequences. + class UnchangedFormat: + """Test helper that returns the format spec unchanged.""" + def __format__(self, format): + return format + + # Test basic escape sequences + self.assertEqual(f"{UnchangedFormat():\xFF}", 'ÿ') + self.assertEqual(rf"{UnchangedFormat():\xFF}", '\\xFF') + + # Test nested expressions with raw/non-raw combinations + self.assertEqual(rf"{UnchangedFormat():{'\xFF'}}", 'ÿ') + self.assertEqual(f"{UnchangedFormat():{r'\xFF'}}", '\\xFF') + self.assertEqual(rf"{UnchangedFormat():{r'\xFF'}}", '\\xFF') + + # Test continuation character in format specs + self.assertEqual(f"""{UnchangedFormat():{'a'\ + 'b'}}""", 'ab') + self.assertEqual(rf"""{UnchangedFormat():{'a'\ + 'b'}}""", 'ab') + + # Test multiple format specs in same raw f-string + self.assertEqual(rf"{UnchangedFormat():\xFF} {UnchangedFormat():\n}", '\\xFF \\n') + if __name__ == '__main__': unittest.main() diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-08-02-23-04-57.gh-issue-137314.wjEdzD.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-08-02-23-04-57.gh-issue-137314.wjEdzD.rst new file mode 100644 index 00000000000000..09d0c3e68fc1ed --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-08-02-23-04-57.gh-issue-137314.wjEdzD.rst @@ -0,0 +1,5 @@ +Fixed a regression where raw f-strings incorrectly interpreted +escape sequences in format specifications. Raw f-strings now properly preserve +literal backslashes in format specs, matching the behavior from Python 3.11. +For example, ``rf"{obj:\xFF}"`` now correctly produces ``'\\xFF'`` instead of +``'ÿ'``. Patch by Pablo Galindo. diff --git a/Parser/action_helpers.c b/Parser/action_helpers.c index 99dd0976c6137d..6f841ef2e4f9c3 100644 --- a/Parser/action_helpers.c +++ b/Parser/action_helpers.c @@ -1,6 +1,7 @@ #include <Python.h> #include "pegen.h" +#include "lexer/state.h" #include "string_parser.h" #include "pycore_runtime.h" // _PyRuntime #include "pycore_pystate.h" // _PyInterpreterState_GET() @@ -1369,7 +1370,15 @@ expr_ty _PyPegen_decoded_constant_from_token(Parser* p, Token* tok) { if (PyBytes_AsStringAndSize(tok->bytes, &bstr, &bsize) == -1) { return NULL; } - PyObject* str = _PyPegen_decode_string(p, 0, bstr, bsize, tok); + + // Check if we're inside a raw f-string for format spec decoding + int is_raw = 0; + if (INSIDE_FSTRING(p->tok)) { + tokenizer_mode *mode = TOK_GET_MODE(p->tok); + is_raw = mode->f_string_raw; + } + + PyObject* str = _PyPegen_decode_string(p, is_raw, bstr, bsize, tok); if (str == NULL) { return NULL; } diff --git a/Parser/lexer/lexer.c b/Parser/lexer/lexer.c index d29b0bbd25d2ab..384239bd414c38 100644 --- a/Parser/lexer/lexer.c +++ b/Parser/lexer/lexer.c @@ -22,22 +22,6 @@ || c == '_'\ || (c >= 128)) -#ifdef Py_DEBUG -static inline tokenizer_mode* TOK_GET_MODE(struct tok_state* tok) { - assert(tok->tok_mode_stack_index >= 0); - assert(tok->tok_mode_stack_index < MAXFSTRINGLEVEL); - return &(tok->tok_mode_stack[tok->tok_mode_stack_index]); -} -static inline tokenizer_mode* TOK_NEXT_MODE(struct tok_state* tok) { - assert(tok->tok_mode_stack_index >= 0); - assert(tok->tok_mode_stack_index + 1 < MAXFSTRINGLEVEL); - return &(tok->tok_mode_stack[++tok->tok_mode_stack_index]); -} -#else -#define TOK_GET_MODE(tok) (&(tok->tok_mode_stack[tok->tok_mode_stack_index])) -#define TOK_NEXT_MODE(tok) (&(tok->tok_mode_stack[++tok->tok_mode_stack_index])) -#endif - #define MAKE_TOKEN(token_type) _PyLexer_token_setup(tok, token, token_type, p_start, p_end) #define MAKE_TYPE_COMMENT_TOKEN(token_type, col_offset, end_col_offset) (\ _PyLexer_type_comment_token_setup(tok, token, token_type, col_offset, end_col_offset, p_start, p_end)) diff --git a/Parser/lexer/state.h b/Parser/lexer/state.h index 9ed3babfdbfbf1..d8791d89dd02db 100644 --- a/Parser/lexer/state.h +++ b/Parser/lexer/state.h @@ -1,6 +1,7 @@ #ifndef _PY_LEXER_H_ #define _PY_LEXER_H_ +#include "Python.h" #include "object.h" #define MAXINDENT 100 /* Max indentation level */ @@ -138,5 +139,20 @@ void _PyTokenizer_Free(struct tok_state *); void _PyToken_Free(struct token *); void _PyToken_Init(struct token *); +#ifdef Py_DEBUG +static inline tokenizer_mode* TOK_GET_MODE(struct tok_state* tok) { + assert(tok->tok_mode_stack_index >= 0); + assert(tok->tok_mode_stack_index < MAXFSTRINGLEVEL); + return &(tok->tok_mode_stack[tok->tok_mode_stack_index]); +} +static inline tokenizer_mode* TOK_NEXT_MODE(struct tok_state* tok) { + assert(tok->tok_mode_stack_index >= 0); + assert(tok->tok_mode_stack_index + 1 < MAXFSTRINGLEVEL); + return &(tok->tok_mode_stack[++tok->tok_mode_stack_index]); +} +#else +#define TOK_GET_MODE(tok) (&(tok->tok_mode_stack[tok->tok_mode_stack_index])) +#define TOK_NEXT_MODE(tok) (&(tok->tok_mode_stack[++tok->tok_mode_stack_index])) +#endif #endif _______________________________________________ Python-checkins mailing list -- python-checkins@python.org To unsubscribe send an email to python-checkins-le...@python.org https://mail.python.org/mailman3//lists/python-checkins.python.org Member address: arch...@mail-archive.com