https://github.com/python/cpython/commit/7ac0868708f342b8990404174a4d200105a4f728
commit: 7ac0868708f342b8990404174a4d200105a4f728
branch: main
author: Serhiy Storchaka <[email protected]>
committer: serhiy-storchaka <[email protected]>
date: 2026-02-18T13:20:31+02:00
summary:
gh-135573: Make pickled lists, sets and dicts a tiny bit smaller (GH-144162)
Ensure that APPENDS and SETITEMS are never used for a batch of size 1.
Ensure that ADDITEMS and SETITEMS are never used for a batch of size 0.
This harmonizes the C implementation with the Python implementation
which already guarantees this and makes a pickle a tiny bit smaller
with a tiny chance (about 0.1%).
Saves 1 byte for list and dict with size 1001, 2001, ...
Saves 2 bytes for set and dict with size 1000, 2000, ...
files:
M Modules/_pickle.c
diff --git a/Modules/_pickle.c b/Modules/_pickle.c
index a897e45f00fab6..24d3443dd8abfe 100644
--- a/Modules/_pickle.c
+++ b/Modules/_pickle.c
@@ -3066,11 +3066,6 @@ batch_list(PickleState *state, PicklerObject *self,
PyObject *iter, PyObject *or
assert(iter != NULL);
- /* XXX: I think this function could be made faster by avoiding the
- iterator interface and fetching objects directly from list using
- PyList_GET_ITEM.
- */
-
if (self->proto == 0) {
/* APPENDS isn't available; do one at a time. */
for (;; total++) {
@@ -3192,24 +3187,24 @@ batch_list_exact(PickleState *state, PicklerObject
*self, PyObject *obj)
assert(obj != NULL);
assert(self->proto > 0);
assert(PyList_CheckExact(obj));
-
- if (PyList_GET_SIZE(obj) == 1) {
- item = PyList_GET_ITEM(obj, 0);
- Py_INCREF(item);
- int err = save(state, self, item, 0);
- Py_DECREF(item);
- if (err < 0) {
- _PyErr_FormatNote("when serializing %T item 0", obj);
- return -1;
- }
- if (_Pickler_Write(self, &append_op, 1) < 0)
- return -1;
- return 0;
- }
+ assert(PyList_GET_SIZE(obj));
/* Write in batches of BATCHSIZE. */
total = 0;
do {
+ if (PyList_GET_SIZE(obj) - total == 1) {
+ item = PyList_GET_ITEM(obj, total);
+ Py_INCREF(item);
+ int err = save(state, self, item, 0);
+ Py_DECREF(item);
+ if (err < 0) {
+ _PyErr_FormatNote("when serializing %T item %zd", obj, total);
+ return -1;
+ }
+ if (_Pickler_Write(self, &append_op, 1) < 0)
+ return -1;
+ return 0;
+ }
this_batch = 0;
if (_Pickler_Write(self, &mark_op, 1) < 0)
return -1;
@@ -3470,28 +3465,29 @@ batch_dict_exact(PickleState *state, PicklerObject
*self, PyObject *obj)
assert(self->proto > 0);
dict_size = PyDict_GET_SIZE(obj);
-
- /* Special-case len(d) == 1 to save space. */
- if (dict_size == 1) {
- PyDict_Next(obj, &ppos, &key, &value);
- Py_INCREF(key);
- Py_INCREF(value);
- if (save(state, self, key, 0) < 0) {
- goto error;
- }
- if (save(state, self, value, 0) < 0) {
- _PyErr_FormatNote("when serializing %T item %R", obj, key);
- goto error;
- }
- Py_CLEAR(key);
- Py_CLEAR(value);
- if (_Pickler_Write(self, &setitem_op, 1) < 0)
- return -1;
- return 0;
- }
+ assert(dict_size);
/* Write in batches of BATCHSIZE. */
+ Py_ssize_t total = 0;
do {
+ if (dict_size - total == 1) {
+ PyDict_Next(obj, &ppos, &key, &value);
+ Py_INCREF(key);
+ Py_INCREF(value);
+ if (save(state, self, key, 0) < 0) {
+ goto error;
+ }
+ if (save(state, self, value, 0) < 0) {
+ _PyErr_FormatNote("when serializing %T item %R", obj, key);
+ goto error;
+ }
+ Py_CLEAR(key);
+ Py_CLEAR(value);
+ if (_Pickler_Write(self, &setitem_op, 1) < 0)
+ return -1;
+ return 0;
+ }
+
i = 0;
if (_Pickler_Write(self, &mark_op, 1) < 0)
return -1;
@@ -3507,6 +3503,7 @@ batch_dict_exact(PickleState *state, PicklerObject *self,
PyObject *obj)
}
Py_CLEAR(key);
Py_CLEAR(value);
+ total++;
if (++i == BATCHSIZE)
break;
}
@@ -3519,7 +3516,7 @@ batch_dict_exact(PickleState *state, PicklerObject *self,
PyObject *obj)
return -1;
}
- } while (i == BATCHSIZE);
+ } while (total < dict_size);
return 0;
error:
Py_XDECREF(key);
@@ -3637,6 +3634,7 @@ save_set(PickleState *state, PicklerObject *self,
PyObject *obj)
return 0; /* nothing to do */
/* Write in batches of BATCHSIZE. */
+ Py_ssize_t total = 0;
do {
i = 0;
if (_Pickler_Write(self, &mark_op, 1) < 0)
@@ -3651,6 +3649,7 @@ save_set(PickleState *state, PicklerObject *self,
PyObject *obj)
_PyErr_FormatNote("when serializing %T element", obj);
break;
}
+ total++;
if (++i == BATCHSIZE)
break;
}
@@ -3666,7 +3665,7 @@ save_set(PickleState *state, PicklerObject *self,
PyObject *obj)
"set changed size during iteration");
return -1;
}
- } while (i == BATCHSIZE);
+ } while (total < set_size);
return 0;
}
_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]