Author: Armin Rigo <[email protected]>
Branch: unicode-utf8
Changeset: r92251:d602bc94d49f
Date: 2017-08-24 15:03 +0200
http://bitbucket.org/pypy/pypy/changeset/d602bc94d49f/
Log: Tweaks tweaks, test_unicodeobject starts to pass again
diff --git a/pypy/interpreter/unicodehelper.py
b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -63,8 +63,9 @@
# you still get two surrogate unicode characters in the result.
# These are the Python2 rules; Python3 differs.
try:
- consumed, length = rutf8.str_check_utf8(string, len(string), True)
- except rutf8.Utf8CheckError as e:
+ length = rutf8.check_utf8(string, allow_surrogates=True)
+ except rutf8.CheckError as e:
+ XXX
decode_error_handler(space)('strict', 'utf8', e.msg, string,
e.startpos,
e.endpos)
raise False, "unreachable"
diff --git a/pypy/module/__builtin__/operation.py
b/pypy/module/__builtin__/operation.py
--- a/pypy/module/__builtin__/operation.py
+++ b/pypy/module/__builtin__/operation.py
@@ -5,9 +5,8 @@
from pypy.interpreter import gateway
from pypy.interpreter.error import OperationError, oefmt
from pypy.interpreter.gateway import unwrap_spec, WrappedDefault
-from rpython.rlib.rutf8 import unichr_as_utf8
from rpython.rlib.rfloat import isfinite, isinf, round_double, round_away
-from rpython.rlib import rfloat
+from rpython.rlib import rfloat, rutf8
import __builtin__
def abs(space, w_val):
@@ -25,12 +24,11 @@
@unwrap_spec(code=int)
def unichr(space, code):
"Return a Unicode string of one character with the given ordinal."
- # XXX this assumes unichr would be happy to return you surrogates
try:
- s, lgt = unichr_as_utf8(code)
+ s = rutf8.unichr_as_utf8(code, allow_surrogates=True)
except ValueError:
raise oefmt(space.w_ValueError, "unichr() arg out of range")
- return space.newutf8(s, lgt)
+ return space.newutf8(s, 1)
def len(space, w_obj):
"len(object) -> integer\n\nReturn the number of items of a sequence or
mapping."
diff --git a/pypy/objspace/std/unicodeobject.py
b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -34,12 +34,13 @@
@enforceargs(utf8str=str)
def __init__(self, utf8str, length, ucs4str=None):
assert isinstance(utf8str, str)
+ assert length >= 0
if ucs4str is not None:
assert isinstance(ucs4str, unicode)
self._utf8 = utf8str
self._length = length
self._ucs4 = ucs4str
- if not we_are_translated() and length != -1:
+ if not we_are_translated():
assert rutf8.compute_length_utf8(utf8str) == length
def __repr__(self):
@@ -133,8 +134,8 @@
return W_UnicodeObject.EMPTY
def _len(self):
- if self._length == -1:
- self._length = self._compute_length()
+ #if self._length == -1:
+ # self._length = self._compute_length()
return self._length
def _compute_length(self):
@@ -902,7 +903,7 @@
s = space.bytes_w(w_bytes)
try:
rutf8.check_ascii(s)
- except rutf8.AsciiCheckError:
+ except rutf8.CheckError:
# raising UnicodeDecodeError is messy, "please crash for me"
return unicode_from_encoded_object(space, w_bytes, "ascii", "strict")
return W_UnicodeObject(s, len(s))
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit