[pypy-commit] pypy unicode-utf8-py3: cleanups: typos, test adjustements, encode/decode disambiguation

mattip Sat, 11 Aug 2018 23:40:41 -0700

Author: Matti Picus <[email protected]>
Branch: unicode-utf8-py3
Changeset: r94992:fcc25b6ffd38
Date: 2018-08-11 13:01 -0700
http://bitbucket.org/pypy/pypy/changeset/fcc25b6ffd38/


Log:    cleanups: typos, test adjustements, encode/decode disambiguation

diff --git a/pypy/interpreter/argument.py b/pypy/interpreter/argument.py
--- a/pypy/interpreter/argument.py
+++ b/pypy/interpreter/argument.py
@@ -604,7 +604,7 @@
         if self.num_kwds == 1:
             if isinstance(self.kwd_name, unicode):
                 uname = unicode_encode_utf_8(self.kwd_name, len(self.kwd_name),
-                        'strict', allow_surroagates=False)
+                        'strict', allow_surrogates=False)
             else:
                 uname = self.kwd_name
             msg = "got an unexpected keyword argument '%s'" % uname
diff --git a/pypy/interpreter/test/test_error.py 
b/pypy/interpreter/test/test_error.py
--- a/pypy/interpreter/test/test_error.py
+++ b/pypy/interpreter/test/test_error.py
@@ -19,7 +19,7 @@
     assert strings == ("abc ", " def ", "")
     assert issubclass(cls, OperationError)
     inst = cls("w_type", strings, "hello", 42)
-    assert inst._compute_value(space) == "abc hello def 42"
+    assert inst._compute_value(space) == ("abc hello def 42", 16)
     cls2, strings2 = get_operrcls2('a %s b %d c')
     assert cls2 is cls     # caching
     assert strings2 == ("a ", " b ", " c")
@@ -30,8 +30,7 @@
     assert operr.w_type == "w_type"
     assert operr._w_value is None
     val = operr._compute_value(space)
-    assert val == u"abc foo def 42"
-    assert isinstance(val, unicode)
+    assert val == ("abc foo def 42", 14)
     operr2 = oefmt("w_type2", "a %s b %d c", "bar", 43)
     assert operr2.__class__ is operr.__class__
     operr3 = oefmt("w_type2", "a %s b %s c", "bar", "4b")
@@ -49,49 +48,49 @@
     operr = oefmt(space.w_AttributeError,
                   "'%T' object has no attribute '%s'",
                   space.wrap('foo'), 'foo')
-    assert operr._compute_value(space) == "'str' object has no attribute 'foo'"
+    assert operr._compute_value(space) == ("'str' object has no attribute 
'foo'", 35)
     operr = oefmt("w_type",
                   "'%T' object has no attribute '%s'",
                   space.wrap('foo'), 'foo')
-    assert operr._compute_value(space) == "'str' object has no attribute 'foo'"
+    assert operr._compute_value(space) == ("'str' object has no attribute 
'foo'", 35)
 
 def test_oefmt_N(space):
     operr = oefmt(space.w_AttributeError,
                   "'%N' object has no attribute '%s'",
                   space.type(space.wrap('foo')), 'foo')
-    assert operr._compute_value(space) == "'str' object has no attribute 'foo'"
+    assert operr._compute_value(space) == ("'str' object has no attribute 
'foo'", 35)
     operr = oefmt("w_type",
                   "'%N' object has no attribute '%s'",
                   space.type(space.wrap('foo')), 'foo')
-    assert operr._compute_value(space) == "'str' object has no attribute 'foo'"
+    assert operr._compute_value(space) == ("'str' object has no attribute 
'foo'", 35)
     operr = oefmt(space.w_AttributeError,
                   "'%N' object has no attribute '%s'",
                   space.wrap('foo'), 'foo')
-    assert operr._compute_value(space) == "'?' object has no attribute 'foo'"
+    assert operr._compute_value(space) == ("'?' object has no attribute 
'foo'", 33)
     operr = oefmt("w_type",
                   "'%N' object has no attribute '%s'",
                   space.wrap('foo'), 'foo')
-    assert operr._compute_value(space) == "'?' object has no attribute 'foo'"
+    assert operr._compute_value(space) == ("'?' object has no attribute 
'foo'", 33)
 
 def test_oefmt_R(space):
     operr = oefmt(space.w_ValueError,
                   "illegal newline value: %R", space.wrap('foo'))
-    assert operr._compute_value(space) == "illegal newline value: 'foo'"
+    assert operr._compute_value(space) == ("illegal newline value: 'foo'", 28)
     operr = oefmt(space.w_ValueError, "illegal newline value: %R",
                   space.wrap("'PyLadies'"))
-    expected = "illegal newline value: \"'PyLadies'\""
+    expected = ("illegal newline value: \"'PyLadies'\"", 35)
     assert operr._compute_value(space) == expected
 
 def test_oefmt_unicode(space):
     operr = oefmt("w_type", "abc %s", u"&#224;&#232;&#236;&#242;&#249;")
     val = operr._compute_value(space)
-    assert val == u"abc &#224;&#232;&#236;&#242;&#249;"
+    assert val == (u"abc &#224;&#232;&#236;&#242;&#249;".encode('utf8'), 9)
 
 def test_oefmt_utf8(space):
     arg = u"&#224;&#232;&#236;&#242;&#249;".encode('utf-8')
     operr = oefmt("w_type", "abc %8", arg)
     val = operr._compute_value(space)
-    assert val == u"abc &#224;&#232;&#236;&#242;&#249;"
+    assert val == (u"abc &#224;&#232;&#236;&#242;&#249;".encode('utf8'), 9)
     #
     # if the arg is a byte string and we specify '%s', then we
     # also get utf-8 encoding.  This should be the common case
@@ -99,7 +98,7 @@
     # sources of PyPy.
     operr = oefmt("w_type", "abc %s", arg)
     val = operr._compute_value(space)
-    assert val == u"abc &#224;&#232;&#236;&#242;&#249;"
+    assert val == (u"abc &#224;&#232;&#236;&#242;&#249;".encode('utf8'), 9)
     #
     # if the byte string is not valid utf-8, then don't crash
     arg = '\xe9'
diff --git a/pypy/module/_codecs/interp_codecs.py 
b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -503,14 +503,21 @@
     message = "%s with '%s' codec failed" % (action, encoding)
     return operr.try_set_from_cause(space, message)
 
-def _call_codec(space, w_decoder, w_obj, action, encoding, errors):
+def _call_codec(space, w_coder, w_obj, action, encoding, errors):
     try:
-        w_res = space.call_function(w_decoder, w_obj, space.newtext(errors))
+        w_res = space.call_function(w_coder, w_obj, space.newtext(errors))
     except OperationError as operr:
         raise _wrap_codec_error(space, operr, action, encoding)
     if (not space.isinstance_w(w_res, space.w_tuple) or space.len_w(w_res) != 
2):
-        raise oefmt(space.w_TypeError,
+        if action[:2] == 'en':
+            raise oefmt(space.w_TypeError,
                     "encoder must return a tuple (object, integer)")
+        elif action[:2] == 'de':
+            raise oefmt(space.w_TypeError,
+                    "decoder must return a tuple (object, integer)")
+        else:
+            raise oefmt(space.w_TypeError,
+                    "%s must return a tuple (object, integer)", action)
     return space.getitem(w_res, space.newint(0))
 
 @unwrap_spec(errors='text')
diff --git a/pypy/objspace/std/unicodeobject.py 
b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -1226,14 +1226,20 @@
     return w_retval
 
 
-def decode_object(space, w_obj, encoding, errors):
+def decode_object(space, w_obj, encoding, errors='strict'):
+    assert errors is not None
     if encoding is None:
         encoding = getdefaultencoding(space)
-    if errors is None or errors == 'strict' or errors == 'surrogateescape':
+    if errors == 'surrogateescape':
+        s = space.charbuf_w(w_obj)
+        s, lgt, pos = unicodehelper.str_decode_utf8(s, errors, True,
+                    unicodehelper.decode_surrogateescape, True)
+        return space.newutf8(s, pos)
+    elif errors == 'strict':
         if encoding == 'ascii':
             s = space.charbuf_w(w_obj)
             unicodehelper.check_ascii_or_raise(space, s)
-            return space.newutf8(s, len(s))
+            return space.newtext(s, len(s))
         if encoding == 'utf-8' or encoding == 'utf8':
             s = space.charbuf_w(w_obj)
             lgt = unicodehelper.check_utf8_or_raise(space, s)
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

[pypy-commit] pypy unicode-utf8-py3: cleanups: typos, test adjustements, encode/decode disambiguation

Reply via email to