Author: Philip Jenvey <pjen...@underboss.org> Branch: py3k-newhash Changeset: r61581:32410af09b82 Date: 2013-02-21 16:42 -0800 http://bitbucket.org/pypy/pypy/changeset/32410af09b82/
Log: modernize float's hash diff --git a/pypy/module/sys/system.py b/pypy/module/sys/system.py --- a/pypy/module/sys/system.py +++ b/pypy/module/sys/system.py @@ -2,7 +2,7 @@ from pypy.interpreter import gateway from rpython.rlib import rfloat, rbigint from rpython.rtyper.lltypesystem import rffi, lltype -from pypy.objspace.std.floatobject import HASH_INF, HASH_NAN +from pypy.objspace.std.floatobject import HASH_INF, HASH_MODULUS, HASH_NAN from pypy.objspace.std.complexobject import HASH_IMAG @@ -65,11 +65,9 @@ return space.call_function(w_int_info, space.newtuple(info_w)) def get_hash_info(space): - # XXX our _hash_float() always give values that fit in 32bit - modulus = (1 << 31) - 1 # Must be a prime number info_w = [ space.wrap(8 * rffi.sizeof(lltype.Signed)), - space.wrap(modulus), + space.wrap(HASH_MODULUS), space.wrap(HASH_INF), space.wrap(HASH_NAN), space.wrap(HASH_IMAG), diff --git a/pypy/objspace/std/complexobject.py b/pypy/objspace/std/complexobject.py --- a/pypy/objspace/std/complexobject.py +++ b/pypy/objspace/std/complexobject.py @@ -129,7 +129,7 @@ hashreal = _hash_float(space, w_value.realval) hashimg = _hash_float(space, w_value.imagval) combined = intmask(hashreal + HASH_IMAG * hashimg) - return space.newint(combined) + return space.newint(-2 if combined == -1 else combined) def add__Complex_Complex(space, w_complex1, w_complex2): return W_ComplexObject(w_complex1.realval + w_complex2.realval, diff --git a/pypy/objspace/std/floatobject.py b/pypy/objspace/std/floatobject.py --- a/pypy/objspace/std/floatobject.py +++ b/pypy/objspace/std/floatobject.py @@ -1,4 +1,5 @@ import operator +import sys from pypy.interpreter import gateway from pypy.interpreter.error import OperationError from pypy.objspace.std import model, newformat @@ -7,10 +8,11 @@ from pypy.objspace.std.register_all import register_all from pypy.objspace.std.noneobject import W_NoneObject from pypy.objspace.std.longobject import W_LongObject -from rpython.rlib.rarithmetic import ovfcheck_float_to_int, intmask, LONG_BIT +from rpython.rlib.rarithmetic import ( + LONG_BIT, intmask, ovfcheck_float_to_int, r_uint) from rpython.rlib.rfloat import ( isinf, isnan, isfinite, INFINITY, NAN, copysign, formatd, - DTSF_ADD_DOT_0, DTSF_STR_PRECISION, float_as_rbigint_ratio) + DTSF_ADD_DOT_0, float_as_rbigint_ratio) from rpython.rlib.rbigint import rbigint from rpython.rlib import rfloat from rpython.tool.sourcetools import func_with_new_name @@ -21,6 +23,8 @@ HASH_INF = 314159 HASH_NAN = 0 +HASH_BITS = 61 if sys.maxsize > 2 ** 31 - 1 else 31 +HASH_MODULUS = (1 << HASH_BITS) - 1 class W_AbstractFloatObject(W_Object): __slots__ = () @@ -284,51 +288,37 @@ return space.wrap(_hash_float(space, w_value.floatval)) def _hash_float(space, v): - if isnan(v): + if not isfinite(v): + if isinf(v): + return HASH_INF if v > 0 else -HASH_INF return HASH_NAN - # This is designed so that Python numbers of different types - # that compare equal hash to the same value; otherwise comparisons - # of mapping keys will turn out weird. - fractpart, intpart = math.modf(v) + m, e = math.frexp(v) - if fractpart == 0.0: - # This must return the same hash as an equal int or long. - try: - x = ovfcheck_float_to_int(intpart) - # Fits in a C long == a Python int, so is its own hash. - return x - except OverflowError: - # Convert to long and use its hash. - try: - w_lval = W_LongObject.fromfloat(space, v) - except (OverflowError, ValueError): - # can't convert to long int -- arbitrary - if v < 0: - return -HASH_INF - else: - return HASH_INF - return space.int_w(space.hash(w_lval)) + sign = 1 + if m < 0: + sign = -1 + m = -m - # The fractional part is non-zero, so we don't have to worry about - # making this match the hash of some other type. - # Use frexp to get at the bits in the double. - # Since the VAX D double format has 56 mantissa bits, which is the - # most of any double format in use, each of these parts may have as - # many as (but no more than) 56 significant bits. - # So, assuming sizeof(long) >= 4, each part can be broken into two - # longs; frexp and multiplication are used to do that. - # Also, since the Cray double format has 15 exponent bits, which is - # the most of any double format in use, shifting the exponent field - # left by 15 won't overflow a long (again assuming sizeof(long) >= 4). + # process 28 bits at a time; this should work well both for binary + # and hexadecimal floating point. + x = r_uint(0) + while m: + x = ((x << 28) & HASH_MODULUS) | x >> (HASH_BITS - 28) + m *= 268435456.0 # 2**28 + e -= 28 + y = int(m) # pull out integer part + m -= y + x += y + if x >= HASH_MODULUS: + x -= HASH_MODULUS - v, expo = math.frexp(v) - v *= 2147483648.0 # 2**31 - hipart = int(v) # take the top 32 bits - v = (v - hipart) * 2147483648.0 # get the next 32 bits - x = intmask(hipart + int(v) + (expo << 15)) - return x + # adjust for the exponent; first reduce it modulo HASH_BITS + e = e % HASH_BITS if e >= 0 else HASH_BITS - 1 - ((-1 - e) % HASH_BITS) + x = ((x << e) & HASH_MODULUS) | x >> (HASH_BITS - e) + x = intmask(intmask(x) * sign) + return -2 if x == -1 else x def add__Float_Float(space, w_float1, w_float2): diff --git a/pypy/objspace/std/test/test_floatobject.py b/pypy/objspace/std/test/test_floatobject.py --- a/pypy/objspace/std/test/test_floatobject.py +++ b/pypy/objspace/std/test/test_floatobject.py @@ -100,16 +100,27 @@ # these are taken from standard Python, which produces # the same but for -1. import math + import sys + + assert hash(-1.0) == -2 + assert hash(-2.0) == -2 + assert hash(-3.0) == -3 assert hash(42.0) == 42 - assert hash(42.125) == 1413677056 - assert hash(math.ldexp(0.125, 1000)) in ( - 32, # answer on 32-bit machines - 137438953472) # answer on 64-bit machines - # testing special overflow values - inf = 1e200 * 1e200 + if sys.maxsize > 2 ** 31 - 1: + assert hash(42.125) == 288230376151711786 + assert hash(math.ldexp(0.125, 1000)) == 2097152 + assert hash(3.141593) == 326491229203594243 + assert hash(2.5) == 1152921504606846978 + else: + assert hash(42.125) == 268435498 + assert hash(math.ldexp(0.125, 1000)) == 32 + assert hash(3.141593) == 671854639 + assert hash(2.5) == 1073741826 + inf = float('inf') + nan = float('nan') assert hash(inf) == 314159 assert hash(-inf) == -314159 - assert hash(inf/inf) == 0 + assert hash(nan) == 0 def test_int_float(self): assert int(42.1234) == 42 _______________________________________________ pypy-commit mailing list pypy-commit@python.org http://mail.python.org/mailman/listinfo/pypy-commit