Author: Matti Picus <[email protected]>
Branch: unicode-utf8-py3
Changeset: r94954:22f9db3e5c61
Date: 2018-08-05 14:52 -0700
http://bitbucket.org/pypy/pypy/changeset/22f9db3e5c61/

Log:    translation - len must be non-negative

diff --git a/pypy/module/_pypyjson/interp_decoder.py 
b/pypy/module/_pypyjson/interp_decoder.py
--- a/pypy/module/_pypyjson/interp_decoder.py
+++ b/pypy/module/_pypyjson/interp_decoder.py
@@ -307,9 +307,9 @@
             # ascii only, fast path (ascii is a strict subset of
             # latin1, and we already checked that all the chars are <
             # 128)
-            assert end >= start
-            return self.space.newutf8(self.getslice(start, end),
-                                      end - start)
+            lgt = end - start
+            assert lgt >= 0
+            return self.space.newutf8(self.getslice(start, end), lgt)
 
     def decode_string_escaped(self, start):
         i = self.pos
diff --git a/pypy/module/array/interp_array.py 
b/pypy/module/array/interp_array.py
--- a/pypy/module/array/interp_array.py
+++ b/pypy/module/array/interp_array.py
@@ -494,8 +494,11 @@
         an array of some other type.
         """
         if self.typecode == 'u':
+            s = self.len
+            if s < 0:
+                s = 0
             buf = rffi.cast(UNICODE_ARRAY, self._buffer_as_unsigned())
-            return space.newutf8(rffi.wcharpsize2utf8(buf, self.len), self.len)
+            return space.newutf8(rffi.wcharpsize2utf8(buf, s), s)
         else:
             raise oefmt(space.w_ValueError,
                         "tounicode() may only be called on type 'u' arrays")
diff --git a/pypy/module/cpyext/unicodeobject.py 
b/pypy/module/cpyext/unicodeobject.py
--- a/pypy/module/cpyext/unicodeobject.py
+++ b/pypy/module/cpyext/unicodeobject.py
@@ -85,7 +85,7 @@
     s = rffi.wcharpsize2unicode(get_wbuffer(py_obj), get_wsize(py_obj))
     w_type = from_ref(space, rffi.cast(PyObject, py_obj.c_ob_type))
     w_obj = space.allocate_instance(unicodeobject.W_UnicodeObject, w_type)
-    w_obj.__init__(s, len(s))
+    w_obj.__init__(s.encode('utf8'), len(s))
     track_reference(space, py_obj, w_obj)
     return w_obj
 
@@ -271,7 +271,7 @@
     assert isinstance(w_obj, unicodeobject.W_UnicodeObject)
     py_obj = as_pyobj(space, w_obj)
     assert get_kind(py_obj) == WCHAR_KIND
-    return _readify(space, py_obj, w_obj._value)
+    return _readify(space, py_obj, space.utf8_w(w_obj))
 
 def _readify(space, py_obj, value):
     maxchar = 0
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to