https://github.com/python/cpython/commit/7ac0868708f342b8990404174a4d200105a4f728
commit: 7ac0868708f342b8990404174a4d200105a4f728
branch: main
author: Serhiy Storchaka <[email protected]>
committer: serhiy-storchaka <[email protected]>
date: 2026-02-18T13:20:31+02:00
summary:

gh-135573: Make pickled lists, sets and dicts a tiny bit smaller (GH-144162)

Ensure that APPENDS and SETITEMS are never used for a batch of size 1.
Ensure that ADDITEMS and SETITEMS are never used for a batch of size 0.

This harmonizes the C implementation with the Python implementation
which already guarantees this and makes a pickle a tiny bit smaller
with a tiny chance (about 0.1%).

Saves 1 byte for list and dict with size 1001, 2001, ...
Saves 2 bytes for set and dict with size 1000, 2000, ...

files:
M Modules/_pickle.c

diff --git a/Modules/_pickle.c b/Modules/_pickle.c
index a897e45f00fab6..24d3443dd8abfe 100644
--- a/Modules/_pickle.c
+++ b/Modules/_pickle.c
@@ -3066,11 +3066,6 @@ batch_list(PickleState *state, PicklerObject *self, 
PyObject *iter, PyObject *or
 
     assert(iter != NULL);
 
-    /* XXX: I think this function could be made faster by avoiding the
-       iterator interface and fetching objects directly from list using
-       PyList_GET_ITEM.
-    */
-
     if (self->proto == 0) {
         /* APPENDS isn't available; do one at a time. */
         for (;; total++) {
@@ -3192,24 +3187,24 @@ batch_list_exact(PickleState *state, PicklerObject 
*self, PyObject *obj)
     assert(obj != NULL);
     assert(self->proto > 0);
     assert(PyList_CheckExact(obj));
-
-    if (PyList_GET_SIZE(obj) == 1) {
-        item = PyList_GET_ITEM(obj, 0);
-        Py_INCREF(item);
-        int err = save(state, self, item, 0);
-        Py_DECREF(item);
-        if (err < 0) {
-            _PyErr_FormatNote("when serializing %T item 0", obj);
-            return -1;
-        }
-        if (_Pickler_Write(self, &append_op, 1) < 0)
-            return -1;
-        return 0;
-    }
+    assert(PyList_GET_SIZE(obj));
 
     /* Write in batches of BATCHSIZE. */
     total = 0;
     do {
+        if (PyList_GET_SIZE(obj) - total == 1) {
+            item = PyList_GET_ITEM(obj, total);
+            Py_INCREF(item);
+            int err = save(state, self, item, 0);
+            Py_DECREF(item);
+            if (err < 0) {
+                _PyErr_FormatNote("when serializing %T item %zd", obj, total);
+                return -1;
+            }
+            if (_Pickler_Write(self, &append_op, 1) < 0)
+                return -1;
+            return 0;
+        }
         this_batch = 0;
         if (_Pickler_Write(self, &mark_op, 1) < 0)
             return -1;
@@ -3470,28 +3465,29 @@ batch_dict_exact(PickleState *state, PicklerObject 
*self, PyObject *obj)
     assert(self->proto > 0);
 
     dict_size = PyDict_GET_SIZE(obj);
-
-    /* Special-case len(d) == 1 to save space. */
-    if (dict_size == 1) {
-        PyDict_Next(obj, &ppos, &key, &value);
-        Py_INCREF(key);
-        Py_INCREF(value);
-        if (save(state, self, key, 0) < 0) {
-            goto error;
-        }
-        if (save(state, self, value, 0) < 0) {
-            _PyErr_FormatNote("when serializing %T item %R", obj, key);
-            goto error;
-        }
-        Py_CLEAR(key);
-        Py_CLEAR(value);
-        if (_Pickler_Write(self, &setitem_op, 1) < 0)
-            return -1;
-        return 0;
-    }
+    assert(dict_size);
 
     /* Write in batches of BATCHSIZE. */
+    Py_ssize_t total = 0;
     do {
+        if (dict_size - total == 1) {
+            PyDict_Next(obj, &ppos, &key, &value);
+            Py_INCREF(key);
+            Py_INCREF(value);
+            if (save(state, self, key, 0) < 0) {
+                goto error;
+            }
+            if (save(state, self, value, 0) < 0) {
+                _PyErr_FormatNote("when serializing %T item %R", obj, key);
+                goto error;
+            }
+            Py_CLEAR(key);
+            Py_CLEAR(value);
+            if (_Pickler_Write(self, &setitem_op, 1) < 0)
+                return -1;
+            return 0;
+        }
+
         i = 0;
         if (_Pickler_Write(self, &mark_op, 1) < 0)
             return -1;
@@ -3507,6 +3503,7 @@ batch_dict_exact(PickleState *state, PicklerObject *self, 
PyObject *obj)
             }
             Py_CLEAR(key);
             Py_CLEAR(value);
+            total++;
             if (++i == BATCHSIZE)
                 break;
         }
@@ -3519,7 +3516,7 @@ batch_dict_exact(PickleState *state, PicklerObject *self, 
PyObject *obj)
             return -1;
         }
 
-    } while (i == BATCHSIZE);
+    } while (total < dict_size);
     return 0;
 error:
     Py_XDECREF(key);
@@ -3637,6 +3634,7 @@ save_set(PickleState *state, PicklerObject *self, 
PyObject *obj)
         return 0;  /* nothing to do */
 
     /* Write in batches of BATCHSIZE. */
+    Py_ssize_t total = 0;
     do {
         i = 0;
         if (_Pickler_Write(self, &mark_op, 1) < 0)
@@ -3651,6 +3649,7 @@ save_set(PickleState *state, PicklerObject *self, 
PyObject *obj)
                 _PyErr_FormatNote("when serializing %T element", obj);
                 break;
             }
+            total++;
             if (++i == BATCHSIZE)
                 break;
         }
@@ -3666,7 +3665,7 @@ save_set(PickleState *state, PicklerObject *self, 
PyObject *obj)
                 "set changed size during iteration");
             return -1;
         }
-    } while (i == BATCHSIZE);
+    } while (total < set_size);
 
     return 0;
 }

_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]

Reply via email to