Author: Armin Rigo <[email protected]>
Branch:
Changeset: r89185:f5d06c5fc573
Date: 2016-12-19 17:12 +0100
http://bitbucket.org/pypy/pypy/changeset/f5d06c5fc573/
Log: zlib.crc32() and zlib.adler32(): support >2**32 strings
diff --git a/rpython/rlib/rzlib.py b/rpython/rlib/rzlib.py
--- a/rpython/rlib/rzlib.py
+++ b/rpython/rlib/rzlib.py
@@ -172,6 +172,18 @@
# ____________________________________________________________
+def _crc_or_adler(string, start, function):
+ with rffi.scoped_nonmovingbuffer(string) as bytes:
+ remaining = len(string)
+ checksum = start
+ ptr = rffi.cast(Bytefp, bytes)
+ while remaining > 0:
+ count = min(remaining, 32*1024*1024)
+ checksum = function(checksum, ptr, count)
+ ptr = rffi.ptradd(ptr, count)
+ remaining -= count
+ return checksum
+
CRC32_DEFAULT_START = 0
def crc32(string, start=CRC32_DEFAULT_START):
@@ -179,13 +191,18 @@
Compute the CRC32 checksum of the string, possibly with the given
start value, and return it as a unsigned 32 bit integer.
"""
- with rffi.scoped_nonmovingbuffer(string) as bytes:
- checksum = _crc32(start, rffi.cast(Bytefp, bytes), len(string))
- return checksum
-
+ return _crc_or_adler(string, start, _crc32)
ADLER32_DEFAULT_START = 1
+def adler32(string, start=ADLER32_DEFAULT_START):
+ """
+ Compute the Adler-32 checksum of the string, possibly with the given
+ start value, and return it as a unsigned 32 bit integer.
+ """
+ return _crc_or_adler(string, start, _adler32)
+
+
def deflateSetDictionary(stream, string):
with rffi.scoped_nonmovingbuffer(string) as buf:
err = _deflateSetDictionary(stream, rffi.cast(Bytefp, buf),
len(string))
@@ -200,16 +217,6 @@
elif err == Z_DATA_ERROR:
raise RZlibError("The given dictionary doesn't match the expected one")
-
-def adler32(string, start=ADLER32_DEFAULT_START):
- """
- Compute the Adler-32 checksum of the string, possibly with the given
- start value, and return it as a unsigned 32 bit integer.
- """
- with rffi.scoped_nonmovingbuffer(string) as bytes:
- checksum = _adler32(start, rffi.cast(Bytefp, bytes), len(string))
- return checksum
-
def zlibVersion():
"""Return the runtime version of zlib library"""
return rffi.charp2str(_zlibVersion())
diff --git a/rpython/rlib/test/test_rzlib.py b/rpython/rlib/test/test_rzlib.py
--- a/rpython/rlib/test/test_rzlib.py
+++ b/rpython/rlib/test/test_rzlib.py
@@ -278,20 +278,29 @@
def test_translate_and_large_input():
from rpython.translator.c.test.test_genc import compile
- def f(i):
+ def f(i, check):
bytes = "s" * i
- for j in range(3):
- stream = rzlib.deflateInit()
- bytes = rzlib.compress(stream, bytes, rzlib.Z_FINISH)
- rzlib.deflateEnd(stream)
- return bytes
+ if check == 1:
+ for j in range(3):
+ stream = rzlib.deflateInit()
+ bytes = rzlib.compress(stream, bytes, rzlib.Z_FINISH)
+ rzlib.deflateEnd(stream)
+ return bytes
+ if check == 2:
+ return str(rzlib.adler32(bytes))
+ if check == 3:
+ return str(rzlib.crc32(bytes))
+ return '?'
- fc = compile(f, [int])
+ fc = compile(f, [int, int])
test_list = [1, 2, 3, 5, 8, 87, 876, 8765, 87654, 876543, 8765432,
127329129] # up to ~128MB
if sys.maxint > 2**32:
- test_list.append(2971215073) # 3GB (greater than INPUT_BUFFER_MAX)
+ test_list.append(4305704715) # 4.01GB
+ # XXX should we have a way to say "I don't have enough RAM,
+ # don't run this"?
+
for a in test_list:
print 'Testing compression of "s" * %d' % a
z = zlib.compressobj()
@@ -306,4 +315,17 @@
del pieces
expected = zlib.compress(expected)
expected = zlib.compress(expected)
- assert fc(a) == expected
+ assert fc(a, 1) == expected
+
+ print 'Testing adler32 and crc32 of "s" * %d' % a
+ def compute(function, start):
+ count = a
+ while count > 0:
+ count1 = min(count, 1024*1024)
+ start = function("s" * count1, start)
+ count -= count1
+ return start
+ expected_adler32 = compute(zlib.adler32, 1) & (2**32-1)
+ expected_crc32 = compute(zlib.crc32, 0) & (2**32-1)
+ assert fc(a, 2) == str(expected_adler32)
+ assert fc(a, 3) == str(expected_crc32)
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit