[pypy-commit] pypy unicode-utf8: try to improve latin1 handling

fijal Wed, 06 Dec 2017 11:19:19 -0800

Author: fijal
Branch: unicode-utf8
Changeset: r93289:a6a28d7e46a8
Date: 2017-12-06 21:17 +0200
http://bitbucket.org/pypy/pypy/changeset/a6a28d7e46a8/


Log:    try to improve latin1 handling

diff --git a/pypy/interpreter/unicodehelper.py 
b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -149,37 +149,32 @@
 
 def _utf8_encode_latin_1_slowpath(s, errors, errorhandler):
     res = StringBuilder(len(s))
-    size = len(s)
     cur = 0
-    i = 0
-    while i < size:
-        if ord(s[i]) <= 0x7F:
-            res.append(s[i])
-            i += 1
-            cur += 1
-        else:
-            oc = rutf8.codepoint_at_pos(s, i)
-            if oc <= 0xFF:
-                res.append(chr(oc))
+    iter = rutf8.Utf8StringIterator(s)
+    try:
+        while True:
+            ch = iter.next()
+            if ch <= 0xFF:
+                res.append(chr(ch))
                 cur += 1
-                i = rutf8.next_codepoint_pos(s, i)
             else:
                 r, pos = errorhandler(errors, 'latin1',
                                       'ordinal not in range(256)', s, cur,
                                       cur + 1)
-                for j in range(pos - cur):
-                    i = rutf8.next_codepoint_pos(s, i)
 
-                j = 0
-                while j < len(r):
-                    c = rutf8.codepoint_at_pos(r, j)
+                for c in rutf8.Utf8StringIterator(r):
                     if c > 0xFF:
                         errorhandler("strict", 'latin1',
                                      'ordinal not in range(256)', s,
                                      cur, cur + 1)
-                    j = rutf8.next_codepoint_pos(r, j)
                     res.append(chr(c))
+
+                for j in range(pos - cur - 1):
+                    iter.next()
+
                 cur = pos
+    except StopIteration:
+        pass
     r = res.build()
     return r
 
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit

[pypy-commit] pypy unicode-utf8: try to improve latin1 handling

Reply via email to