Author: Matti Picus <[email protected]>
Branch: unicode-utf8-py3
Changeset: r95345:1b408df3f46d
Date: 2018-11-19 23:57 -0800
http://bitbucket.org/pypy/pypy/changeset/1b408df3f46d/
Log: fix logic, fix interface for _multibytecodec
diff --git a/pypy/module/_codecs/interp_codecs.py
b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -74,7 +74,7 @@
w_replace, w_newpos = space.fixedview(w_res, 2)
if space.isinstance_w(w_replace, space.w_unicode):
rettype = 'u'
- elif encode and space.isinstance_w(w_replace, space.w_bytes):
+ elif not decode and space.isinstance_w(w_replace, space.w_bytes):
rettype = 'b'
else:
if decode:
diff --git a/pypy/module/_multibytecodec/c_codecs.py
b/pypy/module/_multibytecodec/c_codecs.py
--- a/pypy/module/_multibytecodec/c_codecs.py
+++ b/pypy/module/_multibytecodec/c_codecs.py
@@ -154,9 +154,9 @@
replace = UNICODE_REPLACEMENT_CHARACTER
else:
assert errorcb
- replace, end = errorcb(errors, namecb, reason,
+ replace, end, rettype = errorcb(errors, namecb, reason,
stringdata, start, end)
- # 'replace' is RPython unicode here
+ # 'replace' is UTF8 encoded unicode, rettype is 'u'
lgt = rutf8.get_utf8_length(replace)
inbuf = rffi.utf82wcharp(replace, lgt)
try:
@@ -265,9 +265,13 @@
replace = "?"
else:
assert errorcb
- replace, end = errorcb(errors, namecb, reason,
+ replace, end, rettype = errorcb(errors, namecb, reason,
unicodedata, start, end)
+ if rettype == 'u':
+ codec = pypy_cjk_enc_getcodec(encodebuf)
+ replace = encode(codec, replace, end - start)
+ lgt = len(replace)
with rffi.scoped_nonmovingbuffer(replace) as inbuf:
- r = pypy_cjk_enc_replace_on_error(encodebuf, inbuf, len(replace), end)
+ r = pypy_cjk_enc_replace_on_error(encodebuf, inbuf, lgt, end)
if r == MBERR_NOMEMORY:
raise MemoryError
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit