Author: Armin Rigo <[email protected]>
Branch: 
Changeset: r89185:f5d06c5fc573
Date: 2016-12-19 17:12 +0100
http://bitbucket.org/pypy/pypy/changeset/f5d06c5fc573/

Log:    zlib.crc32() and zlib.adler32(): support >2**32 strings

diff --git a/rpython/rlib/rzlib.py b/rpython/rlib/rzlib.py
--- a/rpython/rlib/rzlib.py
+++ b/rpython/rlib/rzlib.py
@@ -172,6 +172,18 @@
 
 # ____________________________________________________________
 
+def _crc_or_adler(string, start, function):
+    with rffi.scoped_nonmovingbuffer(string) as bytes:
+        remaining = len(string)
+        checksum = start
+        ptr = rffi.cast(Bytefp, bytes)
+        while remaining > 0:
+            count = min(remaining, 32*1024*1024)
+            checksum = function(checksum, ptr, count)
+            ptr = rffi.ptradd(ptr, count)
+            remaining -= count
+    return checksum
+
 CRC32_DEFAULT_START = 0
 
 def crc32(string, start=CRC32_DEFAULT_START):
@@ -179,13 +191,18 @@
     Compute the CRC32 checksum of the string, possibly with the given
     start value, and return it as a unsigned 32 bit integer.
     """
-    with rffi.scoped_nonmovingbuffer(string) as bytes:
-        checksum = _crc32(start, rffi.cast(Bytefp, bytes), len(string))
-    return checksum
-
+    return _crc_or_adler(string, start, _crc32)
 
 ADLER32_DEFAULT_START = 1
 
+def adler32(string, start=ADLER32_DEFAULT_START):
+    """
+    Compute the Adler-32 checksum of the string, possibly with the given
+    start value, and return it as a unsigned 32 bit integer.
+    """
+    return _crc_or_adler(string, start, _adler32)
+
+
 def deflateSetDictionary(stream, string):
     with rffi.scoped_nonmovingbuffer(string) as buf:
         err = _deflateSetDictionary(stream, rffi.cast(Bytefp, buf), 
len(string))
@@ -200,16 +217,6 @@
     elif err == Z_DATA_ERROR:
         raise RZlibError("The given dictionary doesn't match the expected one")
 
-    
-def adler32(string, start=ADLER32_DEFAULT_START):
-    """
-    Compute the Adler-32 checksum of the string, possibly with the given
-    start value, and return it as a unsigned 32 bit integer.
-    """
-    with rffi.scoped_nonmovingbuffer(string) as bytes:
-        checksum = _adler32(start, rffi.cast(Bytefp, bytes), len(string))
-    return checksum
-
 def zlibVersion():
     """Return the runtime version of zlib library"""
     return rffi.charp2str(_zlibVersion())
diff --git a/rpython/rlib/test/test_rzlib.py b/rpython/rlib/test/test_rzlib.py
--- a/rpython/rlib/test/test_rzlib.py
+++ b/rpython/rlib/test/test_rzlib.py
@@ -278,20 +278,29 @@
 def test_translate_and_large_input():
     from rpython.translator.c.test.test_genc import compile
 
-    def f(i):
+    def f(i, check):
         bytes = "s" * i
-        for j in range(3):
-            stream = rzlib.deflateInit()
-            bytes = rzlib.compress(stream, bytes, rzlib.Z_FINISH)
-            rzlib.deflateEnd(stream)
-        return bytes
+        if check == 1:
+            for j in range(3):
+                stream = rzlib.deflateInit()
+                bytes = rzlib.compress(stream, bytes, rzlib.Z_FINISH)
+                rzlib.deflateEnd(stream)
+            return bytes
+        if check == 2:
+            return str(rzlib.adler32(bytes))
+        if check == 3:
+            return str(rzlib.crc32(bytes))
+        return '?'
 
-    fc = compile(f, [int])
+    fc = compile(f, [int, int])
 
     test_list = [1, 2, 3, 5, 8, 87, 876, 8765, 87654, 876543, 8765432,
                  127329129]       # up to ~128MB
     if sys.maxint > 2**32:
-        test_list.append(2971215073)    # 3GB (greater than INPUT_BUFFER_MAX)
+        test_list.append(4305704715)    # 4.01GB
+        # XXX should we have a way to say "I don't have enough RAM,
+        # don't run this"?
+
     for a in test_list:
         print 'Testing compression of "s" * %d' % a
         z = zlib.compressobj()
@@ -306,4 +315,17 @@
         del pieces
         expected = zlib.compress(expected)
         expected = zlib.compress(expected)
-        assert fc(a) == expected
+        assert fc(a, 1) == expected
+
+        print 'Testing adler32 and crc32 of "s" * %d' % a
+        def compute(function, start):
+            count = a
+            while count > 0:
+                count1 = min(count, 1024*1024)
+                start = function("s" * count1, start)
+                count -= count1
+            return start
+        expected_adler32 = compute(zlib.adler32, 1) & (2**32-1)
+        expected_crc32 = compute(zlib.crc32, 0) & (2**32-1)
+        assert fc(a, 2) == str(expected_adler32)
+        assert fc(a, 3) == str(expected_crc32)
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to