https://github.com/python/cpython/commit/60a3a0dd6fe140fdc87f6e769ee5bb17d92efe4e
commit: 60a3a0dd6fe140fdc87f6e769ee5bb17d92efe4e
branch: main
author: Pablo Galindo Salgado <[email protected]>
committer: pablogsal <[email protected]>
date: 2025-01-22T16:24:54Z
summary:
gh-124363: Treat debug expressions in f-string as raw strings (#128399)
Co-authored-by: blurb-it[bot] <43283697+blurb-it[bot]@users.noreply.github.com>
files:
A
Misc/NEWS.d/next/Core_and_Builtins/2025-01-21-19-48-30.gh-issue-124363.vOFhHW.rst
M Lib/test/test_fstring.py
M Parser/action_helpers.c
diff --git a/Lib/test/test_fstring.py b/Lib/test/test_fstring.py
index 31d91215889460..1d96b7a2c2459b 100644
--- a/Lib/test/test_fstring.py
+++ b/Lib/test/test_fstring.py
@@ -1649,6 +1649,14 @@ def __repr__(self):
#self.assertEqual(f'X{x =}Y', 'Xx\t='+repr(x)+'Y')
#self.assertEqual(f'X{x = }Y', 'Xx\t=\t'+repr(x)+'Y')
+ def test_debug_expressions_are_raw_strings(self):
+
+ self.assertEqual(f'{b"\N{OX}"=}', 'b"\\N{OX}"=b\'\\\\N{OX}\'')
+ self.assertEqual(f'{r"\xff"=}', 'r"\\xff"=\'\\\\xff\'')
+ self.assertEqual(f'{r"\n"=}', 'r"\\n"=\'\\\\n\'')
+ self.assertEqual(f"{'\''=}", "'\\''=\"'\"")
+ self.assertEqual(f'{'\xc5'=}', r"'\xc5'='Å'")
+
def test_walrus(self):
x = 20
# This isn't an assignment expression, it's 'x', with a format
diff --git
a/Misc/NEWS.d/next/Core_and_Builtins/2025-01-21-19-48-30.gh-issue-124363.vOFhHW.rst
b/Misc/NEWS.d/next/Core_and_Builtins/2025-01-21-19-48-30.gh-issue-124363.vOFhHW.rst
new file mode 100644
index 00000000000000..553aa5a4dd573e
--- /dev/null
+++
b/Misc/NEWS.d/next/Core_and_Builtins/2025-01-21-19-48-30.gh-issue-124363.vOFhHW.rst
@@ -0,0 +1 @@
+Treat debug expressions in f-string as raw strings. Patch by Pablo Galindo
diff --git a/Parser/action_helpers.c b/Parser/action_helpers.c
index 5ac1dd7813689c..2fe8d11badcbac 100644
--- a/Parser/action_helpers.c
+++ b/Parser/action_helpers.c
@@ -969,8 +969,6 @@ _PyPegen_check_fstring_conversion(Parser *p, Token*
conv_token, expr_ty conv)
return result_token_with_metadata(p, conv, conv_token->metadata);
}
-static asdl_expr_seq *
-unpack_top_level_joined_strs(Parser *p, asdl_expr_seq *raw_expressions);
ResultTokenWithMetadata *
_PyPegen_setup_full_format_spec(Parser *p, Token *colon, asdl_expr_seq *spec,
int lineno, int col_offset,
int end_lineno, int end_col_offset, PyArena
*arena)
@@ -1279,9 +1277,9 @@ _PyPegen_decode_fstring_part(Parser* p, int is_raw,
expr_ty constant, Token* tok
p->arena);
}
-static asdl_expr_seq *
-unpack_top_level_joined_strs(Parser *p, asdl_expr_seq *raw_expressions)
-{
+expr_ty
+_PyPegen_joined_str(Parser *p, Token* a, asdl_expr_seq* expr, Token*b) {
+
/* The parser might put multiple f-string values into an individual
* JoinedStr node at the top level due to stuff like f-string debugging
* expressions. This function flattens those and promotes them to the
@@ -1289,44 +1287,14 @@ unpack_top_level_joined_strs(Parser *p, asdl_expr_seq
*raw_expressions)
* of the regular output, so this is not necessary if you are not going
* to expose the output AST to Python level. */
- Py_ssize_t i, req_size, raw_size;
-
- req_size = raw_size = asdl_seq_LEN(raw_expressions);
- expr_ty expr;
- for (i = 0; i < raw_size; i++) {
- expr = asdl_seq_GET(raw_expressions, i);
- if (expr->kind == JoinedStr_kind) {
- req_size += asdl_seq_LEN(expr->v.JoinedStr.values) - 1;
- }
- }
-
- asdl_expr_seq *expressions = _Py_asdl_expr_seq_new(req_size, p->arena);
- if (expressions == NULL) {
- return NULL;
- }
-
- Py_ssize_t raw_index, req_index = 0;
- for (raw_index = 0; raw_index < raw_size; raw_index++) {
- expr = asdl_seq_GET(raw_expressions, raw_index);
- if (expr->kind == JoinedStr_kind) {
- asdl_expr_seq *values = expr->v.JoinedStr.values;
- for (Py_ssize_t n = 0; n < asdl_seq_LEN(values); n++) {
- asdl_seq_SET(expressions, req_index, asdl_seq_GET(values, n));
- req_index++;
- }
- } else {
- asdl_seq_SET(expressions, req_index, expr);
- req_index++;
+ Py_ssize_t n_items = asdl_seq_LEN(expr);
+ Py_ssize_t total_items = n_items;
+ for (Py_ssize_t i = 0; i < n_items; i++) {
+ expr_ty item = asdl_seq_GET(expr, i);
+ if (item->kind == JoinedStr_kind) {
+ total_items += asdl_seq_LEN(item->v.JoinedStr.values) - 1;
}
}
- return expressions;
-}
-
-expr_ty
-_PyPegen_joined_str(Parser *p, Token* a, asdl_expr_seq* raw_expressions,
Token*b) {
-
- asdl_expr_seq *expr = unpack_top_level_joined_strs(p, raw_expressions);
- Py_ssize_t n_items = asdl_seq_LEN(expr);
const char* quote_str = PyBytes_AsString(a->bytes);
if (quote_str == NULL) {
@@ -1334,7 +1302,7 @@ _PyPegen_joined_str(Parser *p, Token* a, asdl_expr_seq*
raw_expressions, Token*b
}
int is_raw = strpbrk(quote_str, "rR") != NULL;
- asdl_expr_seq *seq = _Py_asdl_expr_seq_new(n_items, p->arena);
+ asdl_expr_seq *seq = _Py_asdl_expr_seq_new(total_items, p->arena);
if (seq == NULL) {
return NULL;
}
@@ -1342,6 +1310,31 @@ _PyPegen_joined_str(Parser *p, Token* a, asdl_expr_seq*
raw_expressions, Token*b
Py_ssize_t index = 0;
for (Py_ssize_t i = 0; i < n_items; i++) {
expr_ty item = asdl_seq_GET(expr, i);
+
+ // This should correspond to a JoinedStr node of two elements
+ // created _PyPegen_formatted_value. This situation can only be the
result of
+ // a f-string debug expression where the first element is a constant
with the text and the second
+ // a formatted value with the expression.
+ if (item->kind == JoinedStr_kind) {
+ asdl_expr_seq *values = item->v.JoinedStr.values;
+ if (asdl_seq_LEN(values) != 2) {
+ PyErr_Format(PyExc_SystemError,
+ "unexpected JoinedStr node without debug data in
f-string at line %d",
+ item->lineno);
+ return NULL;
+ }
+
+ expr_ty first = asdl_seq_GET(values, 0);
+ assert(first->kind == Constant_kind);
+ asdl_seq_SET(seq, index++, first);
+
+ expr_ty second = asdl_seq_GET(values, 1);
+ assert(second->kind == FormattedValue_kind);
+ asdl_seq_SET(seq, index++, second);
+
+ continue;
+ }
+
if (item->kind == Constant_kind) {
item = _PyPegen_decode_fstring_part(p, is_raw, item, b);
if (item == NULL) {
@@ -1360,7 +1353,7 @@ _PyPegen_joined_str(Parser *p, Token* a, asdl_expr_seq*
raw_expressions, Token*b
}
asdl_expr_seq *resized_exprs;
- if (index != n_items) {
+ if (index != total_items) {
resized_exprs = _Py_asdl_expr_seq_new(index, p->arena);
if (resized_exprs == NULL) {
return NULL;
_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: [email protected]