[pypy-commit] pypy default: don't slice the string several times when doing str-to-int conversion

cfbolz Mon, 26 Aug 2019 12:15:19 -0700

Author: Carl Friedrich Bolz-Tereick <[email protected]>
Branch: 
Changeset: r97281:520b3c9072f2
Date: 2019-08-26 21:05 +0200
http://bitbucket.org/pypy/pypy/changeset/520b3c9072f2/


Log:    don't slice the string several times when doing str-to-int
        conversion

diff --git a/rpython/rlib/rarithmetic.py b/rpython/rlib/rarithmetic.py
--- a/rpython/rlib/rarithmetic.py
+++ b/rpython/rlib/rarithmetic.py
@@ -878,9 +878,8 @@
     Raises ParseStringOverflowError in case the result does not fit.
     """
     from rpython.rlib.rstring import (
-        NumberStringParser, ParseStringOverflowError, strip_spaces)
-    s = literal = strip_spaces(s)
-    p = NumberStringParser(s, literal, base, 'int',
+        NumberStringParser, ParseStringOverflowError)
+    p = NumberStringParser(s, s, base, 'int',
                            allow_underscores=allow_underscores,
                            no_implicit_octal=no_implicit_octal)
     base = p.base
diff --git a/rpython/rlib/rbigint.py b/rpython/rlib/rbigint.py
--- a/rpython/rlib/rbigint.py
+++ b/rpython/rlib/rbigint.py
@@ -296,14 +296,17 @@
     def fromstr(s, base=0, allow_underscores=False):
         """As string_to_int(), but ignores an optional 'l' or 'L' suffix
         and returns an rbigint."""
+        from rpython.rlib.rstring import NumberStringParser
         from rpython.rlib.rstring import NumberStringParser, \
             strip_spaces
-        s = literal = strip_spaces(s)
+        s = literal = strip_spaces(s) # XXX could get rid of this slice
+        end = len(s)
         if (s.endswith('l') or s.endswith('L')) and base < 22:
             # in base 22 and above, 'L' is a valid digit!  try: long('L',22)
-            s = s[:-1]
+            end -= 1
         parser = NumberStringParser(s, literal, base, 'long',
-                                    allow_underscores=allow_underscores)
+                                    allow_underscores=allow_underscores,
+                                    end=end)
         return rbigint._from_numberstring_parser(parser)
 
     @staticmethod
diff --git a/rpython/rlib/rstring.py b/rpython/rlib/rstring.py
--- a/rpython/rlib/rstring.py
+++ b/rpython/rlib/rstring.py
@@ -500,26 +500,34 @@
                                (self.fname, self.original_base))
 
     def __init__(self, s, literal, base, fname, allow_underscores=False,
-                 no_implicit_octal=False):
+                 no_implicit_octal=False, start=0, end=-1):
         self.fname = fname
         sign = 1
-        if s.startswith('-'):
+        self.s = s
+        self.start = start
+        if end == -1:
+            end = len(s)
+        self.end = end
+        self._strip_spaces()
+        if self._startswith('-'):
             sign = -1
-            s = strip_spaces(s[1:])
-        elif s.startswith('+'):
-            s = strip_spaces(s[1:])
+            self.start += 1
+            self._strip_spaces()
+        elif self._startswith('+'):
+            self.start += 1
+            self._strip_spaces()
         self.sign = sign
         self.original_base = base
         self.allow_underscores = allow_underscores
 
         if base == 0:
-            if s.startswith('0x') or s.startswith('0X'):
+            if self._startswith('0x') or self._startswith('0X'):
                 base = 16
-            elif s.startswith('0b') or s.startswith('0B'):
+            elif self._startswith('0b') or self._startswith('0B'):
                 base = 2
-            elif s.startswith('0'): # also covers the '0o' case
-                if no_implicit_octal and not (s.startswith('0o') or
-                                              s.startswith('0O')):
+            elif self._startswith('0'): # also covers the '0o' case
+                if no_implicit_octal and not (self._startswith('0o') or
+                                              self._startswith('0O')):
                     base = 1    # this makes only the digit '0' valid...
                 else:
                     base = 8
@@ -530,30 +538,44 @@
         self.base = base
 
         # Leading underscores are not allowed
-        if s.startswith('_'):
+        if self._startswith('_'):
             self.error()
 
-        if base == 16 and (s.startswith('0x') or s.startswith('0X')):
-            s = s[2:]
-        if base == 8 and (s.startswith('0o') or s.startswith('0O')):
-            s = s[2:]
-        if base == 2 and (s.startswith('0b') or s.startswith('0B')):
-            s = s[2:]
-        if not s:
+        if base == 16 and (self._startswith('0x') or self._startswith('0X')):
+            self.start += 2
+        if base == 8 and (self._startswith('0o') or self._startswith('0O')):
+            self.start += 2
+        if base == 2 and (self._startswith('0b') or self._startswith('0B')):
+            self.start += 2
+        if self.start == self.end:
             self.error()
-        self.s = s
-        self.n = len(s)
-        self.i = 0
+        self.i = self.start
+
+    def _startswith(self, prefix):
+        return startswith(self.s, prefix, start=self.start, end=self.end)
+
+    def _strip_spaces(self):
+        # XXX this is not locale-dependent
+        p = self.start
+        q = self.end
+        s = self.s
+        while p < q and s[p] in ' \f\n\r\t\v':
+            p += 1
+        while p < q and s[q-1] in ' \f\n\r\t\v':
+            q -= 1
+        assert q >= p
+        self.start = p
+        self.end = q
 
     def rewind(self):
         self.i = 0
 
     def next_digit(self): # -1 => exhausted
-        if self.i < self.n:
+        if self.i < self.end:
             c = self.s[self.i]
             if self.allow_underscores and c == '_':
                 self.i += 1
-                if self.i >= self.n:
+                if self.i >= self.end:
                     self.error()
                 c = self.s[self.i]
             digit = ord(c)
@@ -576,7 +598,7 @@
         # After exhausting all n digits in next_digit(), you can walk them
         # again in reverse order by calling prev_digit() exactly n times
         i = self.i - 1
-        assert i >= 0
+        assert i >= self.start
         self.i = i
         c = self.s[i]
         digit = ord(c)
diff --git a/rpython/rlib/test/test_rarithmetic.py 
b/rpython/rlib/test/test_rarithmetic.py
--- a/rpython/rlib/test/test_rarithmetic.py
+++ b/rpython/rlib/test/test_rarithmetic.py
@@ -337,6 +337,10 @@
         res = self.interpret(f, [123])
         assert res == 4 + 2
 
+    def test_string_to_int_translates(self):
+        def f(s):
+            return string_to_int(str(s))
+        self.interpret(f, [123]) == 123
 
 def test_int_real_union():
     from rpython.rtyper.lltypesystem.rffi import r_int_real
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

[pypy-commit] pypy default: don't slice the string several times when doing str-to-int conversion

Reply via email to