https://github.com/python/cpython/commit/14e6052b438d99acaba7a39d454af98cfdf5cac7
commit: 14e6052b438d99acaba7a39d454af98cfdf5cac7
branch: main
author: Cody Maloney <[email protected]>
committer: vstinner <[email protected]>
date: 2025-12-15T13:10:31+01:00
summary:
gh-139871: Optimize bytearray construction with encoding (#142243)
When a `str` is encoded in `bytearray.__init__` the encoder tends to
create a new unique bytes object. Rather than allocate new memory and
copy the bytes use the already created bytes object as bytearray
backing. The bigger the `str` the bigger the saving.
Mean +- std dev: [main_encoding] 497 us +- 9 us -> [encoding] 14.2 us +- 0.3
us: 34.97x faster
```python
import pyperf
runner = pyperf.Runner()
runner.timeit(
name="encode",
setup="a = 'a' * 1_000_000",
stmt="bytearray(a, encoding='utf8')")
```
files:
M Objects/bytearrayobject.c
diff --git a/Objects/bytearrayobject.c b/Objects/bytearrayobject.c
index 99e1c9b13f7879..25cc0bfcbaba45 100644
--- a/Objects/bytearrayobject.c
+++ b/Objects/bytearrayobject.c
@@ -914,6 +914,10 @@ bytearray___init___impl(PyByteArrayObject *self, PyObject
*arg,
return -1;
}
+ /* Should be caused by first init or the resize to 0. */
+ assert(self->ob_bytes_object ==
Py_GetConstantBorrowed(Py_CONSTANT_EMPTY_BYTES));
+ assert(self->ob_exports == 0);
+
/* Make a quick exit if no first argument */
if (arg == NULL) {
if (encoding != NULL || errors != NULL) {
@@ -935,9 +939,20 @@ bytearray___init___impl(PyByteArrayObject *self, PyObject
*arg,
return -1;
}
encoded = PyUnicode_AsEncodedString(arg, encoding, errors);
- if (encoded == NULL)
+ if (encoded == NULL) {
return -1;
+ }
assert(PyBytes_Check(encoded));
+
+ /* Most encodes return a new unique bytes, just use it as buffer. */
+ if (_PyObject_IsUniquelyReferenced(encoded)
+ && PyBytes_CheckExact(encoded))
+ {
+ Py_ssize_t size = Py_SIZE(encoded);
+ self->ob_bytes_object = encoded;
+ bytearray_reinit_from_bytes(self, size, size);
+ return 0;
+ }
new = bytearray_iconcat((PyObject*)self, encoded);
Py_DECREF(encoded);
if (new == NULL)
_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]