Author: Maciej Fijalkowski <[email protected]>
Branch:
Changeset: r71520:af21a0ec95a5
Date: 2014-05-14 22:13 +0200
http://bitbucket.org/pypy/pypy/changeset/af21a0ec95a5/
Log: make the utf_8_decode slightly more reusable in different contexts,
e.g. when we want to calculate the size
diff --git a/rpython/rlib/runicode.py b/rpython/rlib/runicode.py
--- a/rpython/rlib/runicode.py
+++ b/rpython/rlib/runicode.py
@@ -125,15 +125,18 @@
errorhandler=None, allow_surrogates=False):
if errorhandler is None:
errorhandler = default_unicode_error_decode
- return str_decode_utf_8_impl(s, size, errors, final, errorhandler,
- allow_surrogates=allow_surrogates)
+ result = UnicodeBuilder(size)
+ pos = str_decode_utf_8_impl(s, size, errors, final, errorhandler,
+ allow_surrogates=allow_surrogates,
+ result=result)
+ return result.build(), pos
[email protected](6)
def str_decode_utf_8_impl(s, size, errors, final, errorhandler,
- allow_surrogates):
+ allow_surrogates, result):
if size == 0:
- return u'', 0
+ return 0
- result = UnicodeBuilder(size)
pos = 0
while pos < size:
ordch1 = ord(s[pos])
@@ -291,7 +294,7 @@
result.append(unichr(0xDC00 + (c & 0x03FF)))
pos += 4
- return result.build(), pos
+ return pos
def _encodeUCS4(result, ch):
# Encode UCS4 Unicode ordinals
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit