Author: Matti Picus <[email protected]>
Branch: unicode-utf8-py3
Changeset: r94992:fcc25b6ffd38
Date: 2018-08-11 13:01 -0700
http://bitbucket.org/pypy/pypy/changeset/fcc25b6ffd38/
Log: cleanups: typos, test adjustements, encode/decode disambiguation
diff --git a/pypy/interpreter/argument.py b/pypy/interpreter/argument.py
--- a/pypy/interpreter/argument.py
+++ b/pypy/interpreter/argument.py
@@ -604,7 +604,7 @@
if self.num_kwds == 1:
if isinstance(self.kwd_name, unicode):
uname = unicode_encode_utf_8(self.kwd_name, len(self.kwd_name),
- 'strict', allow_surroagates=False)
+ 'strict', allow_surrogates=False)
else:
uname = self.kwd_name
msg = "got an unexpected keyword argument '%s'" % uname
diff --git a/pypy/interpreter/test/test_error.py
b/pypy/interpreter/test/test_error.py
--- a/pypy/interpreter/test/test_error.py
+++ b/pypy/interpreter/test/test_error.py
@@ -19,7 +19,7 @@
assert strings == ("abc ", " def ", "")
assert issubclass(cls, OperationError)
inst = cls("w_type", strings, "hello", 42)
- assert inst._compute_value(space) == "abc hello def 42"
+ assert inst._compute_value(space) == ("abc hello def 42", 16)
cls2, strings2 = get_operrcls2('a %s b %d c')
assert cls2 is cls # caching
assert strings2 == ("a ", " b ", " c")
@@ -30,8 +30,7 @@
assert operr.w_type == "w_type"
assert operr._w_value is None
val = operr._compute_value(space)
- assert val == u"abc foo def 42"
- assert isinstance(val, unicode)
+ assert val == ("abc foo def 42", 14)
operr2 = oefmt("w_type2", "a %s b %d c", "bar", 43)
assert operr2.__class__ is operr.__class__
operr3 = oefmt("w_type2", "a %s b %s c", "bar", "4b")
@@ -49,49 +48,49 @@
operr = oefmt(space.w_AttributeError,
"'%T' object has no attribute '%s'",
space.wrap('foo'), 'foo')
- assert operr._compute_value(space) == "'str' object has no attribute 'foo'"
+ assert operr._compute_value(space) == ("'str' object has no attribute
'foo'", 35)
operr = oefmt("w_type",
"'%T' object has no attribute '%s'",
space.wrap('foo'), 'foo')
- assert operr._compute_value(space) == "'str' object has no attribute 'foo'"
+ assert operr._compute_value(space) == ("'str' object has no attribute
'foo'", 35)
def test_oefmt_N(space):
operr = oefmt(space.w_AttributeError,
"'%N' object has no attribute '%s'",
space.type(space.wrap('foo')), 'foo')
- assert operr._compute_value(space) == "'str' object has no attribute 'foo'"
+ assert operr._compute_value(space) == ("'str' object has no attribute
'foo'", 35)
operr = oefmt("w_type",
"'%N' object has no attribute '%s'",
space.type(space.wrap('foo')), 'foo')
- assert operr._compute_value(space) == "'str' object has no attribute 'foo'"
+ assert operr._compute_value(space) == ("'str' object has no attribute
'foo'", 35)
operr = oefmt(space.w_AttributeError,
"'%N' object has no attribute '%s'",
space.wrap('foo'), 'foo')
- assert operr._compute_value(space) == "'?' object has no attribute 'foo'"
+ assert operr._compute_value(space) == ("'?' object has no attribute
'foo'", 33)
operr = oefmt("w_type",
"'%N' object has no attribute '%s'",
space.wrap('foo'), 'foo')
- assert operr._compute_value(space) == "'?' object has no attribute 'foo'"
+ assert operr._compute_value(space) == ("'?' object has no attribute
'foo'", 33)
def test_oefmt_R(space):
operr = oefmt(space.w_ValueError,
"illegal newline value: %R", space.wrap('foo'))
- assert operr._compute_value(space) == "illegal newline value: 'foo'"
+ assert operr._compute_value(space) == ("illegal newline value: 'foo'", 28)
operr = oefmt(space.w_ValueError, "illegal newline value: %R",
space.wrap("'PyLadies'"))
- expected = "illegal newline value: \"'PyLadies'\""
+ expected = ("illegal newline value: \"'PyLadies'\"", 35)
assert operr._compute_value(space) == expected
def test_oefmt_unicode(space):
operr = oefmt("w_type", "abc %s", u"àèìòù")
val = operr._compute_value(space)
- assert val == u"abc àèìòù"
+ assert val == (u"abc àèìòù".encode('utf8'), 9)
def test_oefmt_utf8(space):
arg = u"àèìòù".encode('utf-8')
operr = oefmt("w_type", "abc %8", arg)
val = operr._compute_value(space)
- assert val == u"abc àèìòù"
+ assert val == (u"abc àèìòù".encode('utf8'), 9)
#
# if the arg is a byte string and we specify '%s', then we
# also get utf-8 encoding. This should be the common case
@@ -99,7 +98,7 @@
# sources of PyPy.
operr = oefmt("w_type", "abc %s", arg)
val = operr._compute_value(space)
- assert val == u"abc àèìòù"
+ assert val == (u"abc àèìòù".encode('utf8'), 9)
#
# if the byte string is not valid utf-8, then don't crash
arg = '\xe9'
diff --git a/pypy/module/_codecs/interp_codecs.py
b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -503,14 +503,21 @@
message = "%s with '%s' codec failed" % (action, encoding)
return operr.try_set_from_cause(space, message)
-def _call_codec(space, w_decoder, w_obj, action, encoding, errors):
+def _call_codec(space, w_coder, w_obj, action, encoding, errors):
try:
- w_res = space.call_function(w_decoder, w_obj, space.newtext(errors))
+ w_res = space.call_function(w_coder, w_obj, space.newtext(errors))
except OperationError as operr:
raise _wrap_codec_error(space, operr, action, encoding)
if (not space.isinstance_w(w_res, space.w_tuple) or space.len_w(w_res) !=
2):
- raise oefmt(space.w_TypeError,
+ if action[:2] == 'en':
+ raise oefmt(space.w_TypeError,
"encoder must return a tuple (object, integer)")
+ elif action[:2] == 'de':
+ raise oefmt(space.w_TypeError,
+ "decoder must return a tuple (object, integer)")
+ else:
+ raise oefmt(space.w_TypeError,
+ "%s must return a tuple (object, integer)", action)
return space.getitem(w_res, space.newint(0))
@unwrap_spec(errors='text')
diff --git a/pypy/objspace/std/unicodeobject.py
b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -1226,14 +1226,20 @@
return w_retval
-def decode_object(space, w_obj, encoding, errors):
+def decode_object(space, w_obj, encoding, errors='strict'):
+ assert errors is not None
if encoding is None:
encoding = getdefaultencoding(space)
- if errors is None or errors == 'strict' or errors == 'surrogateescape':
+ if errors == 'surrogateescape':
+ s = space.charbuf_w(w_obj)
+ s, lgt, pos = unicodehelper.str_decode_utf8(s, errors, True,
+ unicodehelper.decode_surrogateescape, True)
+ return space.newutf8(s, pos)
+ elif errors == 'strict':
if encoding == 'ascii':
s = space.charbuf_w(w_obj)
unicodehelper.check_ascii_or_raise(space, s)
- return space.newutf8(s, len(s))
+ return space.newtext(s, len(s))
if encoding == 'utf-8' or encoding == 'utf8':
s = space.charbuf_w(w_obj)
lgt = unicodehelper.check_utf8_or_raise(space, s)
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit