Author: Maciej Fijalkowski <[email protected]>
Branch: 
Changeset: r71520:af21a0ec95a5
Date: 2014-05-14 22:13 +0200
http://bitbucket.org/pypy/pypy/changeset/af21a0ec95a5/

Log:    make the utf_8_decode slightly more reusable in different contexts,
        e.g. when we want to calculate the size

diff --git a/rpython/rlib/runicode.py b/rpython/rlib/runicode.py
--- a/rpython/rlib/runicode.py
+++ b/rpython/rlib/runicode.py
@@ -125,15 +125,18 @@
                      errorhandler=None, allow_surrogates=False):
     if errorhandler is None:
         errorhandler = default_unicode_error_decode
-    return str_decode_utf_8_impl(s, size, errors, final, errorhandler,
-                                 allow_surrogates=allow_surrogates)
+    result = UnicodeBuilder(size)
+    pos = str_decode_utf_8_impl(s, size, errors, final, errorhandler,
+                                 allow_surrogates=allow_surrogates,
+                                 result=result)
+    return result.build(), pos
 
[email protected](6)
 def str_decode_utf_8_impl(s, size, errors, final, errorhandler,
-                          allow_surrogates):
+                          allow_surrogates, result):
     if size == 0:
-        return u'', 0
+        return 0
 
-    result = UnicodeBuilder(size)
     pos = 0
     while pos < size:
         ordch1 = ord(s[pos])
@@ -291,7 +294,7 @@
                 result.append(unichr(0xDC00 + (c & 0x03FF)))
             pos += 4
 
-    return result.build(), pos
+    return pos
 
 def _encodeUCS4(result, ch):
     # Encode UCS4 Unicode ordinals
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to