[pypy-commit] pypy improve-rbigint: Add some _always_inline_ (for some reason it doesn't always happend). This makes lshift 15% faster

stian Sat, 21 Jul 2012 09:46:25 -0700

Author: stian
Branch: improve-rbigint
Changeset: r56344:f89eae2a4218
Date: 2012-07-04 01:34 +0200
http://bitbucket.org/pypy/pypy/changeset/f89eae2a4218/


Log:    Add some _always_inline_ (for some reason it doesn't always
        happend). This makes lshift 15% faster

diff --git a/pypy/rlib/rbigint.py b/pypy/rlib/rbigint.py
--- a/pypy/rlib/rbigint.py
+++ b/pypy/rlib/rbigint.py
@@ -89,7 +89,7 @@
         return r_longlonglong(x)
     else:
         return r_longlong(x)
-
+_widen_digit._always_inline_ = True
 def _store_digit(x):
     """if not we_are_translated():
         assert is_valid_int(x), "store_digit() takes an int, got a %r" % 
type(x)"""
@@ -102,6 +102,7 @@
     else:
         raise ValueError("SHIFT too large!")
 _store_digit._annspecialcase_ = 'specialize:argtype(0)'
+_store_digit._always_inline_ = True
 
 def _load_digit(x):
     if SHIFT < LONG_BIT: # This would be the case for any SHIFT < LONG_BIT
@@ -109,6 +110,7 @@
     else:
         # x already is a type large enough, just not as fast.
         return x
+_load_digit._always_inline_ = True
 
 def _load_unsigned_digit(x):
     if SHIFT < LONG_BIT: # This would be the case for any SHIFT < LONG_BIT
@@ -117,6 +119,7 @@
         # This needs a performance test on 32bit
         return rffi.cast(rffi.ULONGLONG, x)
         #return r_ulonglong(x)
+_load_unsigned_digit._always_inline_ = True
 
 NULLDIGIT = _store_digit(0)
 ONEDIGIT  = _store_digit(1)
@@ -151,25 +154,30 @@
     def digit(self, x):
         """Return the x'th digit, as an int."""
         return _load_digit(self._digits[x])
-
+    digit._always_inline_ = True
+    
     def widedigit(self, x):
         """Return the x'th digit, as a long long int if needed
         to have enough room to contain two digits."""
         return _widen_digit(_load_digit(self._digits[x]))
-
+    widedigit._always_inline_ = True
+    
     def udigit(self, x):
         """Return the x'th digit, as an unsigned int."""
         return _load_unsigned_digit(self._digits[x])
-
+    udigit._always_inline_ = True
+    
     def setdigit(self, x, val):
         val = _mask_digit(val)
         assert val >= 0
         self._digits[x] = _store_digit(val)
     setdigit._annspecialcase_ = 'specialize:argtype(2)'
+    setdigit._always_inline_ = True
 
     def numdigits(self):
         return len(self._digits)
-
+    numdigits._always_inline_ = True
+    
     @staticmethod
     @jit.elidable
     def fromint(intval):
@@ -708,7 +716,8 @@
 
         z._normalize()
         return z
-
+    lshift._always_inline_ = True # It's so fast that it's always benefitial.
+    
     @jit.elidable
     def lqshift(self, int_other):
         " A quicker one with much less checks, int_other is valid and for the 
most part constant."
@@ -727,6 +736,7 @@
         z.setdigit(oldsize, accum)
         z._normalize()
         return z
+    lqshift._always_inline_ = True # It's so fast that it's always benefitial.
     
     @jit.elidable
     def rshift(self, int_other, dont_invert=False):
@@ -761,7 +771,8 @@
             j += 1
         z._normalize()
         return z
-
+    rshift._always_inline_ = True # It's so fast that it's always benefitial.
+    
     @jit.elidable
     def and_(self, other):
         return _bitwise(self, '&', other)
@@ -1690,15 +1701,15 @@
         
 def _divrem(a, b):
     """ Long division with remainder, top-level routine """
-    size_a = _load_unsigned_digit(a.numdigits())
-    size_b = _load_unsigned_digit(b.numdigits())
+    size_a = a.numdigits()
+    size_b = b.numdigits()
 
     if b.sign == 0:
         raise ZeroDivisionError("long division or modulo by zero")
 
     if (size_a < size_b or
         (size_a == size_b and
-         a.digit(size_a-1) < b.digit(size_b-1))):
+         a.digit(abs(size_a-1)) < b.digit(abs(size_b-1)))):
         # |a| < |b|
         return NULLRBIGINT, a# result is 0
     if size_b == 1:
diff --git a/pypy/translator/goal/targetbigintbenchmark.py 
b/pypy/translator/goal/targetbigintbenchmark.py
--- a/pypy/translator/goal/targetbigintbenchmark.py
+++ b/pypy/translator/goal/targetbigintbenchmark.py
@@ -12,37 +12,38 @@
         
         A cutout with some benchmarks.
         Pypy default:
-        2.777119
-        2.316023
-        2.418211
-        5.147583
-        5.139127
-        484.5688
-        334.611903
-        8.637287
-        12.211942
-        18.270045
-        2.512140
-        14.148920
-        18.576713
-        6.647562
-
+        2.803071
+        2.366586
+        2.428205
+        4.408400
+        4.424533
+        537.338
+        268.3339
+        8.548186
+        12.197392
+        17.629869
+        2.360716
+        14.315827
+        17.963899
+        6.604541
+        Sum: 901.7231250000001
+        
         Pypy with improvements:
-        2.822389 # Little slower, divmod
-        2.522946 # Little shower, rshift
-        4.600970 # Much slower, lshift
-        2.126048 # Twice as fast
-        4.276203 # Little faster
-        9.662745 # 50 times faster
-        1.621029 # 200 times faster
-        3.956685 # Twice as fast
-        5.752223 # Twice as fast
-        7.660295 # More than twice as fast
-        0.039137 # 50 times faster
-        4.437456 # 3 times faster
-        9.078680 # Twice as fast
-        4.995520 # 1/3 faster, add
-
+        2.884540
+        2.499774
+        3.796117
+        1.681326
+        4.060521
+        9.696996
+        1.643792
+        4.045248
+        4.714733
+        6.589811
+        0.039319
+        3.503355
+        8.266362
+        5.044856
+        Sum:  58.466750
 
         A pure python form of those tests where also run
         Improved pypy           | Pypy                  | CPython 2.7.3
@@ -61,7 +62,8 @@
         9.19830608368           17.0125601292             11.1488289833
         5.40441417694           6.59027791023             3.63601899147
     """
-
+    sumTime = 0.0
+    
     t = time()
     num = rbigint.pow(rbigint.fromint(100000000), rbigint.fromint(1024))
     by = rbigint.pow(rbigint.fromint(2), rbigint.fromint(128))
@@ -69,7 +71,9 @@
         rbigint.divmod(num, by)
         
 
-    print time() - t
+    _time = time() - t
+    sumTime += _time
+    print _time
     
     t = time()
     num = rbigint.fromint(1000000000)
@@ -77,7 +81,9 @@
         rbigint.rshift(num, 16)
         
 
-    print time() - t
+    _time = time() - t
+    sumTime += _time
+    print _time
     
     t = time()
     num = rbigint.fromint(1000000000)
@@ -85,7 +91,9 @@
         rbigint.lshift(num, 4)
         
 
-    print time() - t
+    _time = time() - t
+    sumTime += _time
+    print _time
     
     t = time()
     num = rbigint.fromint(100000000)
@@ -94,7 +102,9 @@
         rbigint.floordiv(num, V2)
         
 
-    print time() - t
+    _time = time() - t
+    sumTime += _time
+    print _time
     
     t = time()
     num = rbigint.fromint(100000000)
@@ -103,7 +113,9 @@
         rbigint.floordiv(num, V3)
         
 
-    print time() - t
+    _time = time() - t
+    sumTime += _time
+    print _time
     
     t = time()
     num = rbigint.fromint(10000000)
@@ -111,7 +123,9 @@
         rbigint.pow(V2, num)
         
 
-    print time() - t
+    _time = time() - t
+    sumTime += _time
+    print _time
 
     t = time()
     num = rbigint.fromint(100000000)
@@ -119,7 +133,9 @@
         rbigint.pow(rbigint.pow(V2, rbigint.fromint(n)), num)
         
 
-    print time() - t
+    _time = time() - t
+    sumTime += _time
+    print _time
     
     t = time()
     num = rbigint.pow(rbigint.fromint(10000), rbigint.fromint(2 ** 8))
@@ -129,7 +145,9 @@
         rbigint.pow(P10_4, num, V100)
         
 
-    print time() - t
+    _time = time() - t
+    sumTime += _time
+    print _time
     
     t = time()
     i = rbigint.fromint(2**31)
@@ -137,7 +155,9 @@
     for n in xrange(75000):
         i = i.mul(i2)
 
-    print time() - t
+    _time = time() - t
+    sumTime += _time
+    print _time
     
     t = time()
     
@@ -145,7 +165,9 @@
         rbigint.pow(rbigint.fromint(n), P10_4)
         
 
-    print time() - t
+    _time = time() - t
+    sumTime += _time
+    print _time
     
     t = time()
     
@@ -154,7 +176,9 @@
         rbigint.pow(V1024, V1024)
         
 
-    print time() - t
+    _time = time() - t
+    sumTime += _time
+    print _time
     
     
     t = time()
@@ -164,7 +188,9 @@
         v = v.mul(P62)
         
 
-    print time() - t
+    _time = time() - t
+    sumTime += _time
+    print _time
     
     t = time()
     v2 = rbigint.fromint(2**8)
@@ -172,7 +198,9 @@
         v2 = v2.mul(v2)
         
 
-    print time() - t
+    _time = time() - t
+    sumTime += _time
+    print _time
     
     t = time()
     v3 = rbigint.fromint(2**62)
@@ -180,7 +208,11 @@
         v3 = v3.add(v3)
         
 
-    print time() - t
+    _time = time() - t
+    sumTime += _time
+    print _time
+    
+    print "Sum: ", sumTime
     
     return 0
 
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
http://mail.python.org/mailman/listinfo/pypy-commit

[pypy-commit] pypy improve-rbigint: Add some _always_inline_ (for some reason it doesn't always happend). This makes lshift 15% faster

Reply via email to