https://github.com/python/cpython/commit/459d493ce3288cda7dcebb868970b199764502f5
commit: 459d493ce3288cda7dcebb868970b199764502f5
branch: main
author: Maurycy Pawłowski-Wieroński <[email protected]>
committer: vstinner <[email protected]>
date: 2025-10-16T19:24:34+02:00
summary:

gh-140149: Use PyBytesWriter in _build_concatenated_bytes() (#140150)

Use PyBytesWriter in action_helpers.c _build_concatenated_bytes().
3x faster bytes concat in the parser.

Co-authored-by: Victor Stinner <[email protected]>

files:
A 
Misc/NEWS.d/next/Core_and_Builtins/2025-10-15-17-12-32.gh-issue-140149.cy1m3d.rst
M Parser/action_helpers.c

diff --git 
a/Misc/NEWS.d/next/Core_and_Builtins/2025-10-15-17-12-32.gh-issue-140149.cy1m3d.rst
 
b/Misc/NEWS.d/next/Core_and_Builtins/2025-10-15-17-12-32.gh-issue-140149.cy1m3d.rst
new file mode 100644
index 00000000000000..e98e28802cfee9
--- /dev/null
+++ 
b/Misc/NEWS.d/next/Core_and_Builtins/2025-10-15-17-12-32.gh-issue-140149.cy1m3d.rst
@@ -0,0 +1,2 @@
+Speed up parsing bytes literals concatenation by using PyBytesWriter API and
+a single memory allocation (about 3x faster).
diff --git a/Parser/action_helpers.c b/Parser/action_helpers.c
index 57e46b4399c66d..b7a5b9d5e307b1 100644
--- a/Parser/action_helpers.c
+++ b/Parser/action_helpers.c
@@ -1612,19 +1612,46 @@ _build_concatenated_bytes(Parser *p, asdl_expr_seq 
*strings, int lineno,
     Py_ssize_t len = asdl_seq_LEN(strings);
     assert(len > 0);
 
-    PyObject* res = Py_GetConstant(Py_CONSTANT_EMPTY_BYTES);
-
     /* Bytes literals never get a kind, but just for consistency
         since they are represented as Constant nodes, we'll mirror
         the same behavior as unicode strings for determining the
         kind. */
-    PyObject* kind = asdl_seq_GET(strings, 0)->v.Constant.kind;
+    PyObject *kind = asdl_seq_GET(strings, 0)->v.Constant.kind;
+
+    Py_ssize_t total = 0;
+    for (Py_ssize_t i = 0; i < len; i++) {
+        expr_ty elem = asdl_seq_GET(strings, i);
+        PyObject *bytes = elem->v.Constant.value;
+        Py_ssize_t part = PyBytes_GET_SIZE(bytes);
+        if (part > PY_SSIZE_T_MAX - total) {
+            PyErr_NoMemory();
+            return NULL;
+        }
+        total += part;
+    }
+
+    PyBytesWriter *writer = PyBytesWriter_Create(total);
+    if (writer == NULL) {
+        return NULL;
+    }
+    char *out = PyBytesWriter_GetData(writer);
+
     for (Py_ssize_t i = 0; i < len; i++) {
         expr_ty elem = asdl_seq_GET(strings, i);
-        PyBytes_Concat(&res, elem->v.Constant.value);
+        PyObject *bytes = elem->v.Constant.value;
+        Py_ssize_t part = PyBytes_GET_SIZE(bytes);
+        if (part > 0) {
+            memcpy(out, PyBytes_AS_STRING(bytes), part);
+            out += part;
+        }
     }
-    if (!res || _PyArena_AddPyObject(arena, res) < 0) {
-        Py_XDECREF(res);
+
+    PyObject *res = PyBytesWriter_Finish(writer);
+    if (res == NULL) {
+        return NULL;
+    }
+    if (_PyArena_AddPyObject(arena, res) < 0) {
+        Py_DECREF(res);
         return NULL;
     }
     return _PyAST_Constant(res, kind, lineno, col_offset, end_lineno, 
end_col_offset, p->arena);

_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]

Reply via email to