Author: Matti Picus <[email protected]>
Branch: unicode-utf8-py3
Changeset: r95987:d89df30bad0b
Date: 2019-02-12 23:10 +0200
http://bitbucket.org/pypy/pypy/changeset/d89df30bad0b/
Log: raise correct error
diff --git a/pypy/objspace/std/unicodeobject.py
b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -1901,16 +1901,30 @@
utf8 = space.utf8_w(w_unistr)
lgt = space.len_w(w_unistr)
result = StringBuilder(lgt)
+ pos = 0
for uchr in rutf8.Utf8StringIterator(utf8):
if uchr > 127:
if unicodedb.isspace(uchr):
result.append(' ')
+ pos += 1
continue
try:
uchr = ord(u'0') + unicodedb.decimal(uchr)
except KeyError:
pass
- result.append(rutf8.unichr_as_utf8(r_uint(uchr), True))
+ try:
+ c = rutf8.unichr_as_utf8(r_uint(uchr))
+ except ValueError:
+ w_encoding = space.newtext('utf-8')
+ w_start = space.newint(pos)
+ w_end = space.newint(pos+1)
+ w_reason = space.newtext('surrogates not allowed')
+ raise OperationError(space.w_UnicodeEncodeError,
+ space.newtuple([w_encoding, w_unistr,
+ w_start, w_end,
+ w_reason]))
+ result.append(c)
+ pos += 1
return result.build()
_repr_function = rutf8.make_utf8_escape_function(
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit