[pypy-commit] pypy unicode-utf8: merge from (old) default

rlamy Sun, 18 Mar 2018 04:21:35 -0700

Author: Ronan Lamy <ronan.l...@gmail.com>
Branch: unicode-utf8
Changeset: r93985:542b2a7958bf
Date: 2018-03-18 11:20 +0000
http://bitbucket.org/pypy/pypy/changeset/542b2a7958bf/


Log:    merge from (old) default

diff --git a/extra_tests/test_json.py b/extra_tests/test_json.py
--- a/extra_tests/test_json.py
+++ b/extra_tests/test_json.py
@@ -1,5 +1,6 @@
 import pytest
 import json
+from hypothesis import given, strategies
 
 def is_(x, y):
     return type(x) is type(y) and x == y
@@ -18,3 +19,15 @@
 
 def test_issue2191():
     assert is_(json.dumps(u"xxx", ensure_ascii=False), u'"xxx"')
+
+jsondata = strategies.recursive(
+    strategies.none() |
+    strategies.booleans() |
+    strategies.floats(allow_nan=False) |
+    strategies.text(),
+    lambda children: strategies.lists(children) |
+        strategies.dictionaries(strategies.text(), children))
+
+@given(jsondata)
+def test_roundtrip(d):
+    assert json.loads(json.dumps(d)) == d
diff --git a/pypy/interpreter/unicodehelper.py 
b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -2,11 +2,10 @@
 
 from pypy.interpreter.error import OperationError, oefmt
 from rpython.rlib.objectmodel import specialize
+from rpython.rlib.rstring import StringBuilder
 from rpython.rlib import rutf8
 from rpython.rlib.rarithmetic import r_uint, intmask
-from rpython.rlib.rstring import StringBuilder
 from rpython.rtyper.lltypesystem import rffi
-from pypy.module._codecs import interp_codecs
 from pypy.module.unicodedata import unicodedb
 
 @specialize.memo()
@@ -64,6 +63,7 @@
 
 # These functions take and return unwrapped rpython strings
 def decode_unicode_escape(space, string):
+    from pypy.module._codecs import interp_codecs
     state = space.fromcache(interp_codecs.CodecState)
     unicodedata_handler = state.get_unicodedata_handler(space)
     result_utf8, consumed, length = str_decode_unicode_escape(
@@ -1268,6 +1268,41 @@
     return unicode_encode_utf_32_helper(s, errors, errorhandler,
                                         allow_surrogates, "little")
 
+def py3k_str_decode_utf_32(s, size, errors, final=True,
+                           errorhandler=None):
+    result, length, byteorder = str_decode_utf_32_helper(
+        s, size, errors, final, errorhandler, "native", 'utf-32-' + BYTEORDER2)
+    return result, length
+
+def py3k_str_decode_utf_32_be(s, size, errors, final=True,
+                              errorhandler=None):
+    result, length, byteorder = str_decode_utf_32_helper(
+        s, size, errors, final, errorhandler, "big", 'utf-32-be')
+    return result, length
+
+def py3k_str_decode_utf_32_le(s, size, errors, final=True,
+                              errorhandler=None):
+    result, length, byteorder = str_decode_utf_32_helper(
+        s, size, errors, final, errorhandler, "little", 'utf-32-le')
+    return result, length
+
+def py3k_unicode_encode_utf_32(s, size, errors,
+                               errorhandler=None, allow_surrogates=True):
+    return unicode_encode_utf_32_helper(s, size, errors, errorhandler,
+                                        allow_surrogates, "native",
+                                        'utf-32-' + BYTEORDER2)
+
+def py3k_unicode_encode_utf_32_be(s, size, errors,
+                                  errorhandler=None, allow_surrogates=True):
+    return unicode_encode_utf_32_helper(s, size, errors, errorhandler,
+                                        allow_surrogates, "big",
+                                        'utf-32-be')
+
+def py3k_unicode_encode_utf_32_le(s, size, errors,
+                                  errorhandler=None, allow_surrogates=True):
+    return unicode_encode_utf_32_helper(s, size, errors, errorhandler,
+                                        allow_surrogates, "little",
+                                        'utf-32-le')
 # ____________________________________________________________
 # unicode-internal
 
diff --git a/pypy/module/_codecs/interp_codecs.py 
b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -1,9 +1,12 @@
 from rpython.rlib import jit, rutf8
 from rpython.rlib.objectmodel import we_are_translated, not_rpython
-from rpython.rlib.rstring import StringBuilder
+from rpython.rlib.rstring import StringBuilder, UnicodeBuilder
+from rpython.rlib import runicode
+from rpython.rlib.runicode import code_to_unichr, MAXUNICODE
 
 from pypy.interpreter.error import OperationError, oefmt
 from pypy.interpreter.gateway import interp2app, unwrap_spec, WrappedDefault
+from pypy.interpreter import unicodehelper
 
 
 class VersionTag(object):
@@ -379,12 +382,15 @@
         raise oefmt(space.w_TypeError, "handler must be callable")
 
 # ____________________________________________________________
-# delegation to runicode
+# delegation to runicode/unicodehelper
 
-from rpython.rlib import runicode
+def _find_implementation(impl_name):
+    func = getattr(unicodehelper, impl_name)
+    return func
 
 def make_encoder_wrapper(name):
     rname = "utf8_encode_%s" % (name.replace("_encode", ""), )
+    func = _find_implementation(rname)
     @unwrap_spec(errors='text_or_none')
     def wrap_encoder(space, w_arg, errors="strict"):
         from pypy.interpreter import unicodehelper
@@ -393,7 +399,6 @@
         if errors is None:
             errors = 'strict'
         state = space.fromcache(CodecState)
-        func = getattr(unicodehelper, rname)
         utf8len = w_arg._length
         # XXX deal with func() returning length or not
         result = func(w_arg._utf8, errors, state.encode_error_handler)
@@ -403,7 +408,7 @@
 
 def make_decoder_wrapper(name):
     rname = "str_decode_%s" % (name.replace("_decode", ""), )
-    assert hasattr(runicode, rname)
+    func = _find_implementation(rname)
     @unwrap_spec(string='bufferstr', errors='text_or_none',
                  w_final=WrappedDefault(False))
     def wrap_decoder(space, string, errors="strict", w_final=None):
@@ -413,7 +418,6 @@
             errors = 'strict'
         final = space.is_true(w_final)
         state = space.fromcache(CodecState)
-        func = getattr(unicodehelper, rname)
         result, consumed, length = func(string, errors,
                                               final, 
state.decode_error_handler)
         return space.newtuple([space.newutf8(result, length),
diff --git a/pypy/module/cpyext/unicodeobject.py 
b/pypy/module/cpyext/unicodeobject.py
--- a/pypy/module/cpyext/unicodeobject.py
+++ b/pypy/module/cpyext/unicodeobject.py
@@ -18,6 +18,7 @@
 from pypy.module.cpyext.bytesobject import PyString_Check
 from pypy.module.sys.interp_encoding import setdefaultencoding
 from pypy.module._codecs.interp_codecs import CodecState
+from pypy.interpreter import unicodehelper
 from pypy.objspace.std import unicodeobject
 import sys
 
@@ -622,7 +623,7 @@
     else:
         errors = None
 
-    result, _,  length, byteorder = str_decode_utf_32_helper(
+    result, _,  length, byteorder = unicodehelper.str_decode_utf_32_helper(
         string, errors, final=True, errorhandler=None, byteorder=byteorder)
     if pbyteorder is not None:
         pbyteorder[0] = rffi.cast(rffi.INT, byteorder)
diff --git a/rpython/rlib/runicode.py b/rpython/rlib/runicode.py
--- a/rpython/rlib/runicode.py
+++ b/rpython/rlib/runicode.py
@@ -712,7 +712,7 @@
 
 
 # ____________________________________________________________
-# utf-32
+# utf-32 (not used in PyPy any more)
 
 def str_decode_utf_32(s, size, errors, final=True,
                       errorhandler=None):
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit

[pypy-commit] pypy unicode-utf8: merge from (old) default

Reply via email to