Author: fijal
Branch: unicode-utf8
Changeset: r92889:81c556f8f8b4
Date: 2017-10-31 18:31 +0100
http://bitbucket.org/pypy/pypy/changeset/81c556f8f8b4/

Log:    rewrite to_decimal

diff --git a/pypy/objspace/std/unicodeobject.py 
b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -1746,21 +1746,22 @@
 def unicode_to_decimal_w(space, w_unistr):
     if not isinstance(w_unistr, W_UnicodeObject):
         raise oefmt(space.w_TypeError, "expected unicode, got '%T'", w_unistr)
-    unistr = w_unistr._utf8.decode("utf8")
-    # XXX speed up
+    unistr = w_unistr._utf8
     result = ['\0'] * len(unistr)
     digits = ['0', '1', '2', '3', '4',
               '5', '6', '7', '8', '9']
-    for i in xrange(len(unistr)):
-        uchr = ord(unistr[i])
-        if unicodedb.isspace(uchr):
-            result[i] = ' '
+    i = 0
+    res_pos = 0
+    while i < len(unistr):
+        uchr = rutf8.codepoint_at_pos(unistr, i)
+        if rutf8.isspace(unistr, i):
+            result[res_pos] = ' '
             continue
         try:
-            result[i] = digits[unicodedb.decimal(uchr)]
+            result[res_pos] = digits[unicodedb.decimal(uchr)]
         except KeyError:
             if 0 < uchr < 256:
-                result[i] = chr(uchr)
+                result[res_pos] = chr(uchr)
             else:
                 w_encoding = space.newtext('decimal')
                 w_start = space.newint(i)
@@ -1770,6 +1771,8 @@
                                      space.newtuple([w_encoding, w_unistr,
                                                      w_start, w_end,
                                                      w_reason]))
+        i = rutf8.next_codepoint_pos(unistr, i)
+        res_pos += 1
     return ''.join(result)
 
 
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to