Author: Armin Rigo <[email protected]>
Branch: unicode-utf8
Changeset: r92251:d602bc94d49f
Date: 2017-08-24 15:03 +0200
http://bitbucket.org/pypy/pypy/changeset/d602bc94d49f/

Log:    Tweaks tweaks, test_unicodeobject starts to pass again

diff --git a/pypy/interpreter/unicodehelper.py 
b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -63,8 +63,9 @@
     # you still get two surrogate unicode characters in the result.
     # These are the Python2 rules; Python3 differs.
     try:
-        consumed, length = rutf8.str_check_utf8(string, len(string), True)
-    except rutf8.Utf8CheckError as e:
+        length = rutf8.check_utf8(string, allow_surrogates=True)
+    except rutf8.CheckError as e:
+        XXX
         decode_error_handler(space)('strict', 'utf8', e.msg, string, 
e.startpos,
                                     e.endpos)
         raise False, "unreachable"
diff --git a/pypy/module/__builtin__/operation.py 
b/pypy/module/__builtin__/operation.py
--- a/pypy/module/__builtin__/operation.py
+++ b/pypy/module/__builtin__/operation.py
@@ -5,9 +5,8 @@
 from pypy.interpreter import gateway
 from pypy.interpreter.error import OperationError, oefmt
 from pypy.interpreter.gateway import unwrap_spec, WrappedDefault
-from rpython.rlib.rutf8 import unichr_as_utf8
 from rpython.rlib.rfloat import isfinite, isinf, round_double, round_away
-from rpython.rlib import rfloat
+from rpython.rlib import rfloat, rutf8
 import __builtin__
 
 def abs(space, w_val):
@@ -25,12 +24,11 @@
 @unwrap_spec(code=int)
 def unichr(space, code):
     "Return a Unicode string of one character with the given ordinal."
-    # XXX this assumes unichr would be happy to return you surrogates
     try:
-        s, lgt = unichr_as_utf8(code)
+        s = rutf8.unichr_as_utf8(code, allow_surrogates=True)
     except ValueError:
         raise oefmt(space.w_ValueError, "unichr() arg out of range")
-    return space.newutf8(s, lgt)
+    return space.newutf8(s, 1)
 
 def len(space, w_obj):
     "len(object) -> integer\n\nReturn the number of items of a sequence or 
mapping."
diff --git a/pypy/objspace/std/unicodeobject.py 
b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -34,12 +34,13 @@
     @enforceargs(utf8str=str)
     def __init__(self, utf8str, length, ucs4str=None):
         assert isinstance(utf8str, str)
+        assert length >= 0
         if ucs4str is not None:
             assert isinstance(ucs4str, unicode)
         self._utf8 = utf8str
         self._length = length
         self._ucs4 = ucs4str
-        if not we_are_translated() and length != -1:
+        if not we_are_translated():
             assert rutf8.compute_length_utf8(utf8str) == length
 
     def __repr__(self):
@@ -133,8 +134,8 @@
         return W_UnicodeObject.EMPTY
 
     def _len(self):
-        if self._length == -1:
-            self._length = self._compute_length()
+        #if self._length == -1:
+        #    self._length = self._compute_length()
         return self._length
 
     def _compute_length(self):
@@ -902,7 +903,7 @@
     s = space.bytes_w(w_bytes)
     try:
         rutf8.check_ascii(s)
-    except rutf8.AsciiCheckError:
+    except rutf8.CheckError:
         # raising UnicodeDecodeError is messy, "please crash for me"
         return unicode_from_encoded_object(space, w_bytes, "ascii", "strict")
     return W_UnicodeObject(s, len(s))
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to