[pypy-commit] pypy unicode-utf8-py3: use a signature to find where len < 0, fix

mattip Sun, 05 Aug 2018 11:13:39 -0700

Author: Matti Picus <[email protected]>
Branch: unicode-utf8-py3
Changeset: r94950:9c9fca815956
Date: 2018-08-05 10:36 -0700
http://bitbucket.org/pypy/pypy/changeset/9c9fca815956/


Log:    use a signature to find where len < 0, fix

diff --git a/pypy/module/_pypyjson/interp_decoder.py 
b/pypy/module/_pypyjson/interp_decoder.py
--- a/pypy/module/_pypyjson/interp_decoder.py
+++ b/pypy/module/_pypyjson/interp_decoder.py
@@ -307,6 +307,7 @@
             # ascii only, fast path (ascii is a strict subset of
             # latin1, and we already checked that all the chars are <
             # 128)
+            assert end >= start
             return self.space.newutf8(self.getslice(start, end),
                                       end - start)
 
diff --git a/pypy/module/cpyext/longobject.py b/pypy/module/cpyext/longobject.py
--- a/pypy/module/cpyext/longobject.py
+++ b/pypy/module/cpyext/longobject.py
@@ -200,6 +200,8 @@
     string, length gives the number of characters, and base is the radix
     for the conversion.  The radix must be in the range [2, 36]; if it is
     out of range, ValueError will be raised."""
+    if length < 0:
+        length = 0
     w_value = space.newutf8(wcharpsize2utf8(space, u, length), length)
     return PyLong_FromUnicodeObject(space, w_value, base)
 
diff --git a/pypy/module/cpyext/unicodeobject.py 
b/pypy/module/cpyext/unicodeobject.py
--- a/pypy/module/cpyext/unicodeobject.py
+++ b/pypy/module/cpyext/unicodeobject.py
@@ -472,6 +472,8 @@
     object. If the buffer is not NULL, the return value might be a shared 
object.
     Therefore, modification of the resulting Unicode object is only allowed 
when u
     is NULL."""
+    if length < 0:
+        length = 0
     if wchar_p:
         s = wcharpsize2utf8(space, wchar_p, length)
         return make_ref(space, space.newutf8(s, length))
@@ -755,6 +757,8 @@
         """Encode the Py_UNICODE buffer of the given size and return a
         Python string object.  Return NULL if an exception was raised
         by the codec."""
+        if size < 0:
+            size = 0
         u = wcharpsize2utf8(space, s, size)
         w_u = space.newutf8(u, size)
         if errors:
diff --git a/pypy/objspace/std/objspace.py b/pypy/objspace/std/objspace.py
--- a/pypy/objspace/std/objspace.py
+++ b/pypy/objspace/std/objspace.py
@@ -13,7 +13,8 @@
 from rpython.rlib.rarithmetic import base_int, widen, is_valid_int
 from rpython.rlib.objectmodel import import_from_mixin, we_are_translated
 from rpython.rlib.objectmodel import not_rpython
-from rpython.rlib import jit, rutf8
+from rpython.rlib import jit, rutf8, types
+from rpython.rlib.signature import signature, finishsigs
 
 # Object imports
 from pypy.objspace.std.boolobject import W_BoolObject
@@ -36,7 +37,7 @@
 from pypy.objspace.std.typeobject import W_TypeObject, TypeCache
 from pypy.objspace.std.unicodeobject import W_UnicodeObject
 
-
+@finishsigs
 class StdObjSpace(ObjSpace):
     """The standard object space, implementing a general-purpose object
     library in Restricted Python."""
@@ -381,10 +382,9 @@
 
     @specialize.argtype(1)
     def newtext(self, s):
-        assert not isinstance(s, unicode)
-        #if isinstance(s, unicode):
-            #s, lgt = s.encode('utf8'), len(s)
-        if isinstance(s, str):
+        if isinstance(s, unicode):
+            s, lgt = s.encode('utf8'), len(s)
+        elif isinstance(s, str):
             s, lgt, codepoints = decode_utf8sp(self, s)
         elif isinstance(s, tuple):
             # result of decode_utf8
@@ -400,8 +400,9 @@
             return self.w_None
         return self.newtext(s)
 
+    # XXX find where length is annotated as negative int
+    @signature(types.any(), types.str(), types.int_nonneg(), 
returns=types.any())
     def newutf8(self, utf8s, length):
-        assert length >= 0
         assert isinstance(utf8s, str)
         return W_UnicodeObject(utf8s, length)
 
diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py
--- a/rpython/rlib/rutf8.py
+++ b/rpython/rlib/rutf8.py
@@ -28,7 +28,7 @@
 
 
 # we need a way to accept both r_uint and int(nonneg=True)
-@signature(types.int_nonneg(), types.bool(), returns=types.str())
+#@signature(types.int_nonneg(), types.bool(), returns=types.str())
 def unichr_as_utf8(code, allow_surrogates=False):
     """Encode code (numeric value) as utf8 encoded string
     """
diff --git a/rpython/rlib/signature.py b/rpython/rlib/signature.py
--- a/rpython/rlib/signature.py
+++ b/rpython/rlib/signature.py
@@ -9,7 +9,7 @@
       def foo(...)
 
     The arguments paramNtype and returntype should be instances
-    of the classes in rpython.annotator.types.
+    of the classes in rpython.rlib.types.
     """
     returntype = kwargs.pop('returns', None)
     if returntype is None:
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

[pypy-commit] pypy unicode-utf8-py3: use a signature to find where len < 0, fix

Reply via email to