[pypy-commit] pypy unicode-utf8-py3: merge heads

mattip Sun, 16 Sep 2018 22:46:57 -0700

Author: Matti Picus <[email protected]>
Branch: unicode-utf8-py3
Changeset: r95136:fa0322eb4ef4
Date: 2018-09-17 08:04 +0300
http://bitbucket.org/pypy/pypy/changeset/fa0322eb4ef4/


Log:    merge heads

diff --git a/README.rst b/README.rst
--- a/README.rst
+++ b/README.rst
@@ -4,7 +4,7 @@
 
 Welcome to PyPy!
 
-PyPy is an interperter that implements the Python programming language, based
+PyPy is an interpreter that implements the Python programming language, based
 on the RPython compiler framework for dynamic language implementations.
 
 The home page for the interpreter is:
@@ -15,29 +15,29 @@
 
     http://doc.pypy.org/
 
-More documentation about the RPython framework can be found here
+More documentation about the RPython framework can be found here:
 
-    http://rpython.readthedocs.io
+    http://rpython.readthedocs.io/
 
-The source for the documentation is in the pypy/doc directory 
+The source for the documentation is in the pypy/doc directory.
+
 
 Using PyPy instead of CPython
-=============================
+-----------------------------
 
-Please read the information at http://pypy.org to find the correct way to
+Please read the information at http://pypy.org/ to find the correct way to
 download and use PyPy as an alternative to CPython. 
 
+
 Building
-========
+--------
 
 Building PyPy is not the recommended way to obtain the PyPy alternative python
 interpreter. It is time-consuming and requires significant computing resources.
-More information can be found here
+More information can be found here:
 
     http://doc.pypy.org/en/latest/build.html
 
 Enjoy and send us feedback!
 
     the pypy-dev team <[email protected]>
-
-
diff --git a/pypy/interpreter/unicodehelper.py 
b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -282,7 +282,7 @@
                 pos = rutf8._pos_at_index(s, newindex)
     return result.build()
 
-def utf8_encode_ascii(s, errors, errorhandler):
+def utf8_encode_ascii(s, errors, errorhandler, allow_surrogates=False):
     """ Don't be confused - this is a slowpath for errors e.g. "ignore"
     or an obscure errorhandler
     """
@@ -1200,14 +1200,15 @@
         else:
             res_8, newindex = errorhandler(
                 errors, public_encoding_name, 'surrogates not allowed',
-                s, pos - 1, pos)
-            for cp in rutf8.Utf8StringIterator(res_8):
-                if cp < 0xD800:
+                s, pos, pos+1)
+            #for cp in rutf8.Utf8StringIterator(res_8):
+            for cp in res_8:
+                if cp < 0xD800 or allow_surrogates:
                     _STORECHAR(result, cp, byteorder)
                 else:
                     errorhandler('strict', public_encoding_name,
                                  'surrogates not allowed',
-                                 s, pos-1, pos)
+                                 s, pos, pos+1)
             if index != newindex:  # Should be uncommon
                 index = newindex
                 pos = rutf8._pos_at_index(s, newindex)
diff --git a/pypy/module/_cffi_backend/ctypeptr.py 
b/pypy/module/_cffi_backend/ctypeptr.py
--- a/pypy/module/_cffi_backend/ctypeptr.py
+++ b/pypy/module/_cffi_backend/ctypeptr.py
@@ -313,9 +313,7 @@
             if isinstance(self.ctitem, ctypeprim.W_CTypePrimitiveBool):
                 self._must_be_string_of_zero_or_one(value)
             keepalives[i] = value
-            buf, buf_flag = rffi.get_nonmovingbuffer_final_null(value)
-            rffi.cast(rffi.CCHARPP, cdata)[0] = buf
-            return ord(buf_flag)    # 4, 5 or 6
+            return misc.write_string_as_charp(cdata, value)
         #
         if (space.isinstance_w(w_init, space.w_list) or
             space.isinstance_w(w_init, space.w_tuple)):
diff --git a/pypy/module/_cffi_backend/misc.py 
b/pypy/module/_cffi_backend/misc.py
--- a/pypy/module/_cffi_backend/misc.py
+++ b/pypy/module/_cffi_backend/misc.py
@@ -3,7 +3,7 @@
 from pypy.interpreter.error import OperationError, oefmt
 
 from rpython.rlib import jit
-from rpython.rlib.objectmodel import specialize
+from rpython.rlib.objectmodel import specialize, we_are_translated
 from rpython.rlib.rarithmetic import r_uint, r_ulonglong
 from rpython.rlib.unroll import unrolling_iterable
 from rpython.rtyper.lltypesystem import lltype, llmemory, rffi
@@ -102,6 +102,12 @@
 def write_raw_longdouble_data(target, source):
     rffi.cast(rffi.LONGDOUBLEP, target)[0] = source
 
[email protected]_look_inside    # lets get_nonmovingbuffer_final_null be inlined
+def write_string_as_charp(target, string):
+    buf, buf_flag = rffi.get_nonmovingbuffer_final_null(string)
+    rffi.cast(rffi.CCHARPP, target)[0] = buf
+    return ord(buf_flag)    # 4, 5 or 6
+
 # ____________________________________________________________
 
 sprintf_longdouble = rffi.llexternal(
@@ -129,21 +135,14 @@
     # This version accepts a Python int too, and does convertions from
     # other types of objects.  It refuses floats.
     try:
-        value = space.int_w(w_ob)
+        return space.int_w(w_ob, allow_conversion=False)
     except OperationError as e:
         if not (e.match(space, space.w_OverflowError) or
                 e.match(space, space.w_TypeError)):
             raise
-    else:
-        return value
-    try:
-        bigint = space.bigint_w(w_ob, allow_conversion=False)
-    except OperationError as e:
-        if not e.match(space, space.w_TypeError):
-            raise
         if _is_a_float(space, w_ob):
             raise
-        bigint = space.bigint_w(space.int(w_ob), allow_conversion=False)
+    bigint = space.bigint_w(w_ob, allow_conversion=True)
     try:
         return bigint.tolonglong()
     except OverflowError:
@@ -151,20 +150,15 @@
 
 def as_long(space, w_ob):
     # Same as as_long_long(), but returning an int instead.
-    if space.is_w(space.type(w_ob), space.w_int):   # shortcut
-        return space.int_w(w_ob)
     try:
-        bigint = space.bigint_w(w_ob, allow_conversion=False)
+        return space.int_w(w_ob, allow_conversion=False)
     except OperationError as e:
-        if not e.match(space, space.w_TypeError):
+        if not (e.match(space, space.w_OverflowError) or
+                e.match(space, space.w_TypeError)):
             raise
         if _is_a_float(space, w_ob):
             raise
-        bigint = space.bigint_w(space.int(w_ob), allow_conversion=False)
-    try:
-        return bigint.toint()
-    except OverflowError:
-        raise OperationError(space.w_OverflowError, space.newtext(ovf_msg))
+    return space.int_w(w_ob, allow_conversion=True)
 
 def as_unsigned_long_long(space, w_ob, strict):
     # (possibly) convert and cast a Python object to an unsigned long long.
@@ -172,23 +166,19 @@
     # objects.  If 'strict', complains with OverflowError; if 'not strict',
     # mask the result and round floats.
     try:
-        value = space.int_w(w_ob)
+        value = space.int_w(w_ob, allow_conversion=False)
     except OperationError as e:
         if not (e.match(space, space.w_OverflowError) or
                 e.match(space, space.w_TypeError)):
             raise
+        if strict and _is_a_float(space, w_ob):
+            raise
     else:
         if strict and value < 0:
             raise OperationError(space.w_OverflowError, space.newtext(neg_msg))
         return r_ulonglong(value)
-    try:
-        bigint = space.bigint_w(w_ob, allow_conversion=False)
-    except OperationError as e:
-        if not e.match(space, space.w_TypeError):
-            raise
-        if strict and _is_a_float(space, w_ob):
-            raise
-        bigint = space.bigint_w(space.int(w_ob), allow_conversion=False)
+    # note that if not 'strict', then space.int() will round down floats
+    bigint = space.bigint_w(space.int(w_ob), allow_conversion=False)
     if strict:
         try:
             return bigint.toulonglong()
@@ -202,13 +192,19 @@
 def as_unsigned_long(space, w_ob, strict):
     # same as as_unsigned_long_long(), but returning just an Unsigned
     try:
-        bigint = space.bigint_w(w_ob, allow_conversion=False)
+        value = space.int_w(w_ob, allow_conversion=False)
     except OperationError as e:
-        if not e.match(space, space.w_TypeError):
+        if not (e.match(space, space.w_OverflowError) or
+                e.match(space, space.w_TypeError)):
             raise
         if strict and _is_a_float(space, w_ob):
             raise
-        bigint = space.bigint_w(space.int(w_ob), allow_conversion=False)
+    else:
+        if strict and value < 0:
+            raise OperationError(space.w_OverflowError, space.newtext(neg_msg))
+        return r_uint(value)
+    # note that if not 'strict', then space.int() will round down floats
+    bigint = space.bigint_w(space.int(w_ob), allow_conversion=False)
     if strict:
         try:
             return bigint.touint()
@@ -241,7 +237,12 @@
 
 def _standard_object_as_bool(space, w_ob):
     if space.isinstance_w(w_ob, space.w_int):
-        return space.bigint_w(w_ob).tobool()
+        try:
+            return space.int_w(w_ob) != 0
+        except OperationError as e:
+            if not e.match(space, space.w_OverflowError):
+                raise
+            return space.bigint_w(w_ob).tobool()
     if space.isinstance_w(w_ob, space.w_float):
         return space.float_w(w_ob) != 0.0
     raise _NotStandardObject
diff --git a/pypy/module/_cffi_backend/test/_backend_test_c.py 
b/pypy/module/_cffi_backend/test/_backend_test_c.py
--- a/pypy/module/_cffi_backend/test/_backend_test_c.py
+++ b/pypy/module/_cffi_backend/test/_backend_test_c.py
@@ -334,8 +334,18 @@
         max = (1 << (8*size-1)) - 1
         assert newp(pp, min)[0] == min
         assert newp(pp, max)[0] == max
+        py.test.raises(OverflowError, newp, pp, min - 2 ** 32)
+        py.test.raises(OverflowError, newp, pp, min - 2 ** 64)
+        py.test.raises(OverflowError, newp, pp, max + 2 ** 32)
+        py.test.raises(OverflowError, newp, pp, max + 2 ** 64)
         py.test.raises(OverflowError, newp, pp, min - 1)
         py.test.raises(OverflowError, newp, pp, max + 1)
+        py.test.raises(OverflowError, newp, pp, min - 1 - 2 ** 32)
+        py.test.raises(OverflowError, newp, pp, min - 1 - 2 ** 64)
+        py.test.raises(OverflowError, newp, pp, max + 1)
+        py.test.raises(OverflowError, newp, pp, max + 1 + 2 ** 32)
+        py.test.raises(OverflowError, newp, pp, max + 1 + 2 ** 64)
+        py.test.raises(TypeError, newp, pp, 1.0)
     for name in ['char', 'short', 'int', 'long', 'long long']:
         p = new_primitive_type('unsigned ' + name)
         pp = new_pointer_type(p)
diff --git a/pypy/module/_codecs/interp_codecs.py 
b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -649,9 +649,13 @@
         w_arg = space.convert_arg_to_w_unicode(w_arg, errors)
         if errors is None:
             errors = 'strict'
+        allow_surrogates = False
+        if errors in ('surrogatepass',):
+            allow_surrogates = True
         state = space.fromcache(CodecState)
         ulen = w_arg._length
-        result = func(w_arg._utf8, errors, state.encode_error_handler)
+        result = func(w_arg._utf8, errors, state.encode_error_handler,
+                      allow_surrogates=allow_surrogates)
         return space.newtuple([space.newbytes(result), space.newint(ulen)])
     wrap_encoder.__name__ = func.__name__
     globals()[name] = wrap_encoder
diff --git a/pypy/objspace/std/unicodeobject.py 
b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -1170,7 +1170,7 @@
     first = u[0]
     it = rutf8.Utf8StringIterator(u)
     code = it.next()
-    if not (unicodedb.isxidstart(code) or first == u'_'):
+    if not (unicodedb.isxidstart(code) or first == '_'):
         return False
 
     for ch in it:
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

[pypy-commit] pypy unicode-utf8-py3: merge heads

Reply via email to