Author: stian Branch: improve-rbigint Changeset: r56344:f89eae2a4218 Date: 2012-07-04 01:34 +0200 http://bitbucket.org/pypy/pypy/changeset/f89eae2a4218/
Log: Add some _always_inline_ (for some reason it doesn't always happend). This makes lshift 15% faster diff --git a/pypy/rlib/rbigint.py b/pypy/rlib/rbigint.py --- a/pypy/rlib/rbigint.py +++ b/pypy/rlib/rbigint.py @@ -89,7 +89,7 @@ return r_longlonglong(x) else: return r_longlong(x) - +_widen_digit._always_inline_ = True def _store_digit(x): """if not we_are_translated(): assert is_valid_int(x), "store_digit() takes an int, got a %r" % type(x)""" @@ -102,6 +102,7 @@ else: raise ValueError("SHIFT too large!") _store_digit._annspecialcase_ = 'specialize:argtype(0)' +_store_digit._always_inline_ = True def _load_digit(x): if SHIFT < LONG_BIT: # This would be the case for any SHIFT < LONG_BIT @@ -109,6 +110,7 @@ else: # x already is a type large enough, just not as fast. return x +_load_digit._always_inline_ = True def _load_unsigned_digit(x): if SHIFT < LONG_BIT: # This would be the case for any SHIFT < LONG_BIT @@ -117,6 +119,7 @@ # This needs a performance test on 32bit return rffi.cast(rffi.ULONGLONG, x) #return r_ulonglong(x) +_load_unsigned_digit._always_inline_ = True NULLDIGIT = _store_digit(0) ONEDIGIT = _store_digit(1) @@ -151,25 +154,30 @@ def digit(self, x): """Return the x'th digit, as an int.""" return _load_digit(self._digits[x]) - + digit._always_inline_ = True + def widedigit(self, x): """Return the x'th digit, as a long long int if needed to have enough room to contain two digits.""" return _widen_digit(_load_digit(self._digits[x])) - + widedigit._always_inline_ = True + def udigit(self, x): """Return the x'th digit, as an unsigned int.""" return _load_unsigned_digit(self._digits[x]) - + udigit._always_inline_ = True + def setdigit(self, x, val): val = _mask_digit(val) assert val >= 0 self._digits[x] = _store_digit(val) setdigit._annspecialcase_ = 'specialize:argtype(2)' + setdigit._always_inline_ = True def numdigits(self): return len(self._digits) - + numdigits._always_inline_ = True + @staticmethod @jit.elidable def fromint(intval): @@ -708,7 +716,8 @@ z._normalize() return z - + lshift._always_inline_ = True # It's so fast that it's always benefitial. + @jit.elidable def lqshift(self, int_other): " A quicker one with much less checks, int_other is valid and for the most part constant." @@ -727,6 +736,7 @@ z.setdigit(oldsize, accum) z._normalize() return z + lqshift._always_inline_ = True # It's so fast that it's always benefitial. @jit.elidable def rshift(self, int_other, dont_invert=False): @@ -761,7 +771,8 @@ j += 1 z._normalize() return z - + rshift._always_inline_ = True # It's so fast that it's always benefitial. + @jit.elidable def and_(self, other): return _bitwise(self, '&', other) @@ -1690,15 +1701,15 @@ def _divrem(a, b): """ Long division with remainder, top-level routine """ - size_a = _load_unsigned_digit(a.numdigits()) - size_b = _load_unsigned_digit(b.numdigits()) + size_a = a.numdigits() + size_b = b.numdigits() if b.sign == 0: raise ZeroDivisionError("long division or modulo by zero") if (size_a < size_b or (size_a == size_b and - a.digit(size_a-1) < b.digit(size_b-1))): + a.digit(abs(size_a-1)) < b.digit(abs(size_b-1)))): # |a| < |b| return NULLRBIGINT, a# result is 0 if size_b == 1: diff --git a/pypy/translator/goal/targetbigintbenchmark.py b/pypy/translator/goal/targetbigintbenchmark.py --- a/pypy/translator/goal/targetbigintbenchmark.py +++ b/pypy/translator/goal/targetbigintbenchmark.py @@ -12,37 +12,38 @@ A cutout with some benchmarks. Pypy default: - 2.777119 - 2.316023 - 2.418211 - 5.147583 - 5.139127 - 484.5688 - 334.611903 - 8.637287 - 12.211942 - 18.270045 - 2.512140 - 14.148920 - 18.576713 - 6.647562 - + 2.803071 + 2.366586 + 2.428205 + 4.408400 + 4.424533 + 537.338 + 268.3339 + 8.548186 + 12.197392 + 17.629869 + 2.360716 + 14.315827 + 17.963899 + 6.604541 + Sum: 901.7231250000001 + Pypy with improvements: - 2.822389 # Little slower, divmod - 2.522946 # Little shower, rshift - 4.600970 # Much slower, lshift - 2.126048 # Twice as fast - 4.276203 # Little faster - 9.662745 # 50 times faster - 1.621029 # 200 times faster - 3.956685 # Twice as fast - 5.752223 # Twice as fast - 7.660295 # More than twice as fast - 0.039137 # 50 times faster - 4.437456 # 3 times faster - 9.078680 # Twice as fast - 4.995520 # 1/3 faster, add - + 2.884540 + 2.499774 + 3.796117 + 1.681326 + 4.060521 + 9.696996 + 1.643792 + 4.045248 + 4.714733 + 6.589811 + 0.039319 + 3.503355 + 8.266362 + 5.044856 + Sum: 58.466750 A pure python form of those tests where also run Improved pypy | Pypy | CPython 2.7.3 @@ -61,7 +62,8 @@ 9.19830608368 17.0125601292 11.1488289833 5.40441417694 6.59027791023 3.63601899147 """ - + sumTime = 0.0 + t = time() num = rbigint.pow(rbigint.fromint(100000000), rbigint.fromint(1024)) by = rbigint.pow(rbigint.fromint(2), rbigint.fromint(128)) @@ -69,7 +71,9 @@ rbigint.divmod(num, by) - print time() - t + _time = time() - t + sumTime += _time + print _time t = time() num = rbigint.fromint(1000000000) @@ -77,7 +81,9 @@ rbigint.rshift(num, 16) - print time() - t + _time = time() - t + sumTime += _time + print _time t = time() num = rbigint.fromint(1000000000) @@ -85,7 +91,9 @@ rbigint.lshift(num, 4) - print time() - t + _time = time() - t + sumTime += _time + print _time t = time() num = rbigint.fromint(100000000) @@ -94,7 +102,9 @@ rbigint.floordiv(num, V2) - print time() - t + _time = time() - t + sumTime += _time + print _time t = time() num = rbigint.fromint(100000000) @@ -103,7 +113,9 @@ rbigint.floordiv(num, V3) - print time() - t + _time = time() - t + sumTime += _time + print _time t = time() num = rbigint.fromint(10000000) @@ -111,7 +123,9 @@ rbigint.pow(V2, num) - print time() - t + _time = time() - t + sumTime += _time + print _time t = time() num = rbigint.fromint(100000000) @@ -119,7 +133,9 @@ rbigint.pow(rbigint.pow(V2, rbigint.fromint(n)), num) - print time() - t + _time = time() - t + sumTime += _time + print _time t = time() num = rbigint.pow(rbigint.fromint(10000), rbigint.fromint(2 ** 8)) @@ -129,7 +145,9 @@ rbigint.pow(P10_4, num, V100) - print time() - t + _time = time() - t + sumTime += _time + print _time t = time() i = rbigint.fromint(2**31) @@ -137,7 +155,9 @@ for n in xrange(75000): i = i.mul(i2) - print time() - t + _time = time() - t + sumTime += _time + print _time t = time() @@ -145,7 +165,9 @@ rbigint.pow(rbigint.fromint(n), P10_4) - print time() - t + _time = time() - t + sumTime += _time + print _time t = time() @@ -154,7 +176,9 @@ rbigint.pow(V1024, V1024) - print time() - t + _time = time() - t + sumTime += _time + print _time t = time() @@ -164,7 +188,9 @@ v = v.mul(P62) - print time() - t + _time = time() - t + sumTime += _time + print _time t = time() v2 = rbigint.fromint(2**8) @@ -172,7 +198,9 @@ v2 = v2.mul(v2) - print time() - t + _time = time() - t + sumTime += _time + print _time t = time() v3 = rbigint.fromint(2**62) @@ -180,7 +208,11 @@ v3 = v3.add(v3) - print time() - t + _time = time() - t + sumTime += _time + print _time + + print "Sum: ", sumTime return 0 _______________________________________________ pypy-commit mailing list pypy-commit@python.org http://mail.python.org/mailman/listinfo/pypy-commit