https://github.com/python/cpython/commit/737b4ba020ecaf4b30d5a4c8f99882ce0001ddd6
commit: 737b4ba020ecaf4b30d5a4c8f99882ce0001ddd6
branch: main
author: Bénédikt Tran <10796600+picn...@users.noreply.github.com>
committer: picnixz <10796600+picn...@users.noreply.github.com>
date: 2025-05-27T10:48:34+02:00
summary:

gh-134635: add `zlib.{adler32,crc32}_combine` to combine checksums (#134650)

files:
A Misc/NEWS.d/next/Library/2025-05-25-13-46-37.gh-issue-134635.ZlPrlX.rst
M Doc/library/zlib.rst
M Doc/whatsnew/3.15.rst
M Lib/test/test_zlib.py
M Modules/clinic/zlibmodule.c.h
M Modules/zlibmodule.c

diff --git a/Doc/library/zlib.rst b/Doc/library/zlib.rst
index 75ead3c4cb144c..7c5e9b086e170d 100644
--- a/Doc/library/zlib.rst
+++ b/Doc/library/zlib.rst
@@ -44,6 +44,20 @@ The available exception and functions in this module are:
    .. versionchanged:: 3.0
       The result is always unsigned.
 
+.. function:: adler32_combine(adler1, adler2, len2, /)
+
+   Combine two Adler-32 checksums into one.
+
+   Given the Adler-32 checksum *adler1* of a sequence ``A`` and the
+   Adler-32 checksum *adler2* of a sequence ``B`` of length *len2*,
+   return the Adler-32 checksum of ``A`` and ``B`` concatenated.
+
+   This function is typically useful to combine Adler-32 checksums
+   that were concurrently computed. To compute checksums sequentially, use
+   :func:`adler32` with the running checksum as the ``value`` argument.
+
+   .. versionadded:: next
+
 .. function:: compress(data, /, level=-1, wbits=MAX_WBITS)
 
    Compresses the bytes in *data*, returning a bytes object containing 
compressed data.
@@ -136,6 +150,20 @@ The available exception and functions in this module are:
    .. versionchanged:: 3.0
       The result is always unsigned.
 
+.. function:: crc32_combine(crc1, crc2, len2, /)
+
+   Combine two CRC-32 checksums into one.
+
+   Given the CRC-32 checksum *crc1* of a sequence ``A`` and the
+   CRC-32 checksum *crc2* of a sequence ``B`` of length *len2*,
+   return the CRC-32 checksum of ``A`` and ``B`` concatenated.
+
+   This function is typically useful to combine CRC-32 checksums
+   that were concurrently computed. To compute checksums sequentially, use
+   :func:`crc32` with the running checksum as the ``value`` argument.
+
+   .. versionadded:: next
+
 .. function:: decompress(data, /, wbits=MAX_WBITS, bufsize=DEF_BUF_SIZE)
 
    Decompresses the bytes in *data*, returning a bytes object containing the
diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst
index bf186c191b04d1..cd4b2e8b3dd8ed 100644
--- a/Doc/whatsnew/3.15.rst
+++ b/Doc/whatsnew/3.15.rst
@@ -97,6 +97,16 @@ ssl
   (Contributed by Will Childs-Klein in :gh:`133624`.)
 
 
+zlib
+----
+
+* Allow combining two Adler-32 checksums via :func:`~zlib.adler32_combine`.
+  (Contributed by Callum Attryde and Bénédikt Tran in :gh:`134635`.)
+
+* Allow combining two CRC-32 checksums via :func:`~zlib.crc32_combine`.
+  (Contributed by Bénédikt Tran in :gh:`134635`.)
+
+
 .. Add improved modules above alphabetically, not here at the end.
 
 Optimizations
diff --git a/Lib/test/test_zlib.py b/Lib/test/test_zlib.py
index 4d97fe56f3a094..c57ab51eca16b4 100644
--- a/Lib/test/test_zlib.py
+++ b/Lib/test/test_zlib.py
@@ -119,6 +119,114 @@ def test_same_as_binascii_crc32(self):
         self.assertEqual(binascii.crc32(b'spam'), zlib.crc32(b'spam'))
 
 
+class ChecksumCombineMixin:
+    """Mixin class for testing checksum combination."""
+
+    N = 1000
+    default_iv: int
+
+    def parse_iv(self, iv):
+        """Parse an IV value.
+
+        - The default IV is returned if *iv* is None.
+        - A random IV is returned if *iv* is -1.
+        - Otherwise, *iv* is returned as is.
+        """
+        if iv is None:
+            return self.default_iv
+        if iv == -1:
+            return random.randint(1, 0x80000000)
+        return iv
+
+    def checksum(self, data, init=None):
+        """Compute the checksum of data with a given initial value.
+
+        The *init* value is parsed by ``parse_iv``.
+        """
+        iv = self.parse_iv(init)
+        return self._checksum(data, iv)
+
+    def _checksum(self, data, init):
+        raise NotImplementedError
+
+    def combine(self, a, b, blen):
+        """Combine two checksums together."""
+        raise NotImplementedError
+
+    def get_random_data(self, data_len, *, iv=None):
+        """Get a triplet (data, iv, checksum)."""
+        data = random.randbytes(data_len)
+        init = self.parse_iv(iv)
+        checksum = self.checksum(data, init)
+        return data, init, checksum
+
+    def test_combine_empty(self):
+        for _ in range(self.N):
+            a, iv, checksum = self.get_random_data(32, iv=-1)
+            res = self.combine(iv, self.checksum(a), len(a))
+            self.assertEqual(res, checksum)
+
+    def test_combine_no_iv(self):
+        for _ in range(self.N):
+            a, _, chk_a = self.get_random_data(32)
+            b, _, chk_b = self.get_random_data(64)
+            res = self.combine(chk_a, chk_b, len(b))
+            self.assertEqual(res, self.checksum(a + b))
+
+    def test_combine_no_iv_invalid_length(self):
+        a, _, chk_a = self.get_random_data(32)
+        b, _, chk_b = self.get_random_data(64)
+        checksum = self.checksum(a + b)
+        for invalid_len in [1, len(a), 48, len(b) + 1, 191]:
+            invalid_res = self.combine(chk_a, chk_b, invalid_len)
+            self.assertNotEqual(invalid_res, checksum)
+
+        self.assertRaises(TypeError, self.combine, 0, 0, "len")
+
+    def test_combine_with_iv(self):
+        for _ in range(self.N):
+            a, iv_a, chk_a_with_iv = self.get_random_data(32, iv=-1)
+            chk_a_no_iv = self.checksum(a)
+            b, iv_b, chk_b_with_iv = self.get_random_data(64, iv=-1)
+            chk_b_no_iv = self.checksum(b)
+
+            # We can represent c = COMBINE(CHK(a, iv_a), CHK(b, iv_b)) as:
+            #
+            #   c = CHK(CHK(b'', iv_a) + CHK(a) + CHK(b'', iv_b) + CHK(b))
+            #     = COMBINE(
+            #           COMBINE(CHK(b'', iv_a), CHK(a)),
+            #           COMBINE(CHK(b'', iv_b), CHK(b)),
+            #       )
+            #     = COMBINE(COMBINE(iv_a, CHK(a)), COMBINE(iv_b, CHK(b)))
+            tmp0 = self.combine(iv_a, chk_a_no_iv, len(a))
+            tmp1 = self.combine(iv_b, chk_b_no_iv, len(b))
+            expected = self.combine(tmp0, tmp1, len(b))
+            checksum = self.combine(chk_a_with_iv, chk_b_with_iv, len(b))
+            self.assertEqual(checksum, expected)
+
+
+class CRC32CombineTestCase(ChecksumCombineMixin, unittest.TestCase):
+
+    default_iv = 0
+
+    def _checksum(self, data, init):
+        return zlib.crc32(data, init)
+
+    def combine(self, a, b, blen):
+        return zlib.crc32_combine(a, b, blen)
+
+
+class Adler32CombineTestCase(ChecksumCombineMixin, unittest.TestCase):
+
+    default_iv = 1
+
+    def _checksum(self, data, init):
+        return zlib.adler32(data, init)
+
+    def combine(self, a, b, blen):
+        return zlib.adler32_combine(a, b, blen)
+
+
 # Issue #10276 - check that inputs >=4 GiB are handled correctly.
 class ChecksumBigBufferTestCase(unittest.TestCase):
 
diff --git 
a/Misc/NEWS.d/next/Library/2025-05-25-13-46-37.gh-issue-134635.ZlPrlX.rst 
b/Misc/NEWS.d/next/Library/2025-05-25-13-46-37.gh-issue-134635.ZlPrlX.rst
new file mode 100644
index 00000000000000..4cabbf2f896917
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-05-25-13-46-37.gh-issue-134635.ZlPrlX.rst
@@ -0,0 +1,3 @@
+:mod:`zlib`: Allow to combine Adler-32 and CRC-32 checksums via
+:func:`~zlib.adler32_combine` and :func:`~zlib.crc32_combine`. Patch by
+Callum Attryde and Bénédikt Tran.
diff --git a/Modules/clinic/zlibmodule.c.h b/Modules/clinic/zlibmodule.c.h
index 2710f65a840db9..146a7e250019f0 100644
--- a/Modules/clinic/zlibmodule.c.h
+++ b/Modules/clinic/zlibmodule.c.h
@@ -1044,6 +1044,65 @@ zlib_adler32(PyObject *module, PyObject *const *args, 
Py_ssize_t nargs)
     return return_value;
 }
 
+PyDoc_STRVAR(zlib_adler32_combine__doc__,
+"adler32_combine($module, adler1, adler2, len2, /)\n"
+"--\n"
+"\n"
+"Combine two Adler-32 checksums into one.\n"
+"\n"
+"  adler1\n"
+"    Adler-32 checksum for sequence A\n"
+"  adler2\n"
+"    Adler-32 checksum for sequence B\n"
+"  len2\n"
+"    Length of sequence B\n"
+"\n"
+"Given the Adler-32 checksum \'adler1\' of a sequence A and the\n"
+"Adler-32 checksum \'adler2\' of a sequence B of length \'len2\',\n"
+"return the Adler-32 checksum of A and B concatenated.");
+
+#define ZLIB_ADLER32_COMBINE_METHODDEF    \
+    {"adler32_combine", _PyCFunction_CAST(zlib_adler32_combine), 
METH_FASTCALL, zlib_adler32_combine__doc__},
+
+static unsigned int
+zlib_adler32_combine_impl(PyObject *module, unsigned int adler1,
+                          unsigned int adler2, PyObject *len2);
+
+static PyObject *
+zlib_adler32_combine(PyObject *module, PyObject *const *args, Py_ssize_t nargs)
+{
+    PyObject *return_value = NULL;
+    unsigned int adler1;
+    unsigned int adler2;
+    PyObject *len2;
+    unsigned int _return_value;
+
+    if (!_PyArg_CheckPositional("adler32_combine", nargs, 3, 3)) {
+        goto exit;
+    }
+    adler1 = (unsigned int)PyLong_AsUnsignedLongMask(args[0]);
+    if (adler1 == (unsigned int)-1 && PyErr_Occurred()) {
+        goto exit;
+    }
+    adler2 = (unsigned int)PyLong_AsUnsignedLongMask(args[1]);
+    if (adler2 == (unsigned int)-1 && PyErr_Occurred()) {
+        goto exit;
+    }
+    if (!PyLong_Check(args[2])) {
+        _PyArg_BadArgument("adler32_combine", "argument 3", "int", args[2]);
+        goto exit;
+    }
+    len2 = args[2];
+    _return_value = zlib_adler32_combine_impl(module, adler1, adler2, len2);
+    if ((_return_value == (unsigned int)-1) && PyErr_Occurred()) {
+        goto exit;
+    }
+    return_value = PyLong_FromUnsignedLong((unsigned long)_return_value);
+
+exit:
+    return return_value;
+}
+
 PyDoc_STRVAR(zlib_crc32__doc__,
 "crc32($module, data, value=0, /)\n"
 "--\n"
@@ -1098,6 +1157,65 @@ zlib_crc32(PyObject *module, PyObject *const *args, 
Py_ssize_t nargs)
     return return_value;
 }
 
+PyDoc_STRVAR(zlib_crc32_combine__doc__,
+"crc32_combine($module, crc1, crc2, len2, /)\n"
+"--\n"
+"\n"
+"Combine two CRC-32 checksums into one.\n"
+"\n"
+"  crc1\n"
+"    CRC-32 checksum for sequence A\n"
+"  crc2\n"
+"    CRC-32 checksum for sequence B\n"
+"  len2\n"
+"    Length of sequence B\n"
+"\n"
+"Given the CRC-32 checksum \'crc1\' of a sequence A and the\n"
+"CRC-32 checksum \'crc2\' of a sequence B of length \'len2\',\n"
+"return the CRC-32 checksum of A and B concatenated.");
+
+#define ZLIB_CRC32_COMBINE_METHODDEF    \
+    {"crc32_combine", _PyCFunction_CAST(zlib_crc32_combine), METH_FASTCALL, 
zlib_crc32_combine__doc__},
+
+static unsigned int
+zlib_crc32_combine_impl(PyObject *module, unsigned int crc1,
+                        unsigned int crc2, PyObject *len2);
+
+static PyObject *
+zlib_crc32_combine(PyObject *module, PyObject *const *args, Py_ssize_t nargs)
+{
+    PyObject *return_value = NULL;
+    unsigned int crc1;
+    unsigned int crc2;
+    PyObject *len2;
+    unsigned int _return_value;
+
+    if (!_PyArg_CheckPositional("crc32_combine", nargs, 3, 3)) {
+        goto exit;
+    }
+    crc1 = (unsigned int)PyLong_AsUnsignedLongMask(args[0]);
+    if (crc1 == (unsigned int)-1 && PyErr_Occurred()) {
+        goto exit;
+    }
+    crc2 = (unsigned int)PyLong_AsUnsignedLongMask(args[1]);
+    if (crc2 == (unsigned int)-1 && PyErr_Occurred()) {
+        goto exit;
+    }
+    if (!PyLong_Check(args[2])) {
+        _PyArg_BadArgument("crc32_combine", "argument 3", "int", args[2]);
+        goto exit;
+    }
+    len2 = args[2];
+    _return_value = zlib_crc32_combine_impl(module, crc1, crc2, len2);
+    if ((_return_value == (unsigned int)-1) && PyErr_Occurred()) {
+        goto exit;
+    }
+    return_value = PyLong_FromUnsignedLong((unsigned long)_return_value);
+
+exit:
+    return return_value;
+}
+
 #ifndef ZLIB_COMPRESS_COPY_METHODDEF
     #define ZLIB_COMPRESS_COPY_METHODDEF
 #endif /* !defined(ZLIB_COMPRESS_COPY_METHODDEF) */
@@ -1121,4 +1239,4 @@ zlib_crc32(PyObject *module, PyObject *const *args, 
Py_ssize_t nargs)
 #ifndef ZLIB_DECOMPRESS___DEEPCOPY___METHODDEF
     #define ZLIB_DECOMPRESS___DEEPCOPY___METHODDEF
 #endif /* !defined(ZLIB_DECOMPRESS___DEEPCOPY___METHODDEF) */
-/*[clinic end generated code: output=33938c7613a8c1c7 input=a9049054013a1b77]*/
+/*[clinic end generated code: output=3f7692eb3b5d5a0c input=a9049054013a1b77]*/
diff --git a/Modules/zlibmodule.c b/Modules/zlibmodule.c
index d4b4b91697c08e..f7009364644b7e 100644
--- a/Modules/zlibmodule.c
+++ b/Modules/zlibmodule.c
@@ -17,6 +17,16 @@
 #error "At least zlib version 1.2.2.1 is required"
 #endif
 
+#if (SIZEOF_OFF_T == SIZEOF_SIZE_T)
+#  define convert_to_z_off_t  PyLong_AsSsize_t
+#elif (SIZEOF_OFF_T == SIZEOF_LONG_LONG)
+#  define convert_to_z_off_t  PyLong_AsLongLong
+#elif (SIZEOF_OFF_T == SIZEOF_LONG)
+#  define convert_to_z_off_t  PyLong_AsLong
+#else
+#  error off_t does not match either size_t, long, or long long!
+#endif
+
 // Blocks output buffer wrappers
 #include "pycore_blocks_output_buffer.h"
 
@@ -1876,6 +1886,44 @@ zlib_adler32_impl(PyObject *module, Py_buffer *data, 
unsigned int value)
     return PyLong_FromUnsignedLong(value & 0xffffffffU);
 }
 
+/*[clinic input]
+zlib.adler32_combine -> unsigned_int
+
+    adler1: unsigned_int(bitwise=True)
+        Adler-32 checksum for sequence A
+
+    adler2: unsigned_int(bitwise=True)
+        Adler-32 checksum for sequence B
+
+    len2: object(subclass_of='&PyLong_Type')
+        Length of sequence B
+    /
+
+Combine two Adler-32 checksums into one.
+
+Given the Adler-32 checksum 'adler1' of a sequence A and the
+Adler-32 checksum 'adler2' of a sequence B of length 'len2',
+return the Adler-32 checksum of A and B concatenated.
+[clinic start generated code]*/
+
+static unsigned int
+zlib_adler32_combine_impl(PyObject *module, unsigned int adler1,
+                          unsigned int adler2, PyObject *len2)
+/*[clinic end generated code: output=61842cefb16afb1b input=51bb045c95130c6f]*/
+{
+#if defined(Z_WANT64)
+    z_off64_t len = convert_to_z_off_t(len2);
+#else
+    z_off_t len = convert_to_z_off_t(len2);
+#endif
+    if (PyErr_Occurred()) {
+        return (unsigned int)-1;
+    }
+    return adler32_combine(adler1, adler2, len);
+}
+
+
+
 /*[clinic input]
 zlib.crc32 -> unsigned_int
 
@@ -1923,13 +1971,50 @@ zlib_crc32_impl(PyObject *module, Py_buffer *data, 
unsigned int value)
     return value;
 }
 
+/*[clinic input]
+zlib.crc32_combine -> unsigned_int
+
+    crc1: unsigned_int(bitwise=True)
+        CRC-32 checksum for sequence A
+
+    crc2: unsigned_int(bitwise=True)
+        CRC-32 checksum for sequence B
+
+    len2: object(subclass_of='&PyLong_Type')
+        Length of sequence B
+    /
+
+Combine two CRC-32 checksums into one.
+
+Given the CRC-32 checksum 'crc1' of a sequence A and the
+CRC-32 checksum 'crc2' of a sequence B of length 'len2',
+return the CRC-32 checksum of A and B concatenated.
+[clinic start generated code]*/
+
+static unsigned int
+zlib_crc32_combine_impl(PyObject *module, unsigned int crc1,
+                        unsigned int crc2, PyObject *len2)
+/*[clinic end generated code: output=c4def907c602e6eb input=9c8a065d9040dc66]*/
+{
+#if defined(Z_WANT64)
+    z_off64_t len = convert_to_z_off_t(len2);
+#else
+    z_off_t len = convert_to_z_off_t(len2);
+#endif
+    if (PyErr_Occurred()) {
+        return (unsigned int)-1;
+    }
+    return crc32_combine(crc1, crc2, len);
+}
 
 static PyMethodDef zlib_methods[] =
 {
     ZLIB_ADLER32_METHODDEF
+    ZLIB_ADLER32_COMBINE_METHODDEF
     ZLIB_COMPRESS_METHODDEF
     ZLIB_COMPRESSOBJ_METHODDEF
     ZLIB_CRC32_METHODDEF
+    ZLIB_CRC32_COMBINE_METHODDEF
     ZLIB_DECOMPRESS_METHODDEF
     ZLIB_DECOMPRESSOBJ_METHODDEF
     {NULL, NULL}
@@ -1981,14 +2066,17 @@ static PyType_Spec ZlibDecompressor_type_spec = {
     .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE),
     .slots = ZlibDecompressor_type_slots,
 };
+
 PyDoc_STRVAR(zlib_module_documentation,
 "The functions in this module allow compression and decompression using the\n"
 "zlib library, which is based on GNU zip.\n"
 "\n"
 "adler32(string[, start]) -- Compute an Adler-32 checksum.\n"
+"adler32_combine(adler1, adler2, len2, /) -- Combine two Adler-32 checksums.\n"
 "compress(data[, level]) -- Compress data, with compression level 0-9 or -1.\n"
 "compressobj([level[, ...]]) -- Return a compressor object.\n"
 "crc32(string[, start]) -- Compute a CRC-32 checksum.\n"
+"crc32_combine(crc1, crc2, len2, /) -- Combine two CRC-32 checksums.\n"
 "decompress(string,[wbits],[bufsize]) -- Decompresses a compressed string.\n"
 "decompressobj([wbits[, zdict]]) -- Return a decompressor object.\n"
 "\n"

_______________________________________________
Python-checkins mailing list -- python-checkins@python.org
To unsubscribe send an email to python-checkins-le...@python.org
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: arch...@mail-archive.com

Reply via email to