[pypy-commit] pypy py3.6: hg merge default

2020-01-23 Thread arigo
Author: Armin Rigo 
Branch: py3.6
Changeset: r98577:be4941bb07ae
Date: 2020-01-23 13:08 +0100
http://bitbucket.org/pypy/pypy/changeset/be4941bb07ae/

Log:hg merge default

diff --git a/pypy/interpreter/test/test_argument.py 
b/pypy/interpreter/test/test_argument.py
--- a/pypy/interpreter/test/test_argument.py
+++ b/pypy/interpreter/test/test_argument.py
@@ -823,12 +823,12 @@
 # does not contain the warning about missing self
 assert exc.value.args[0] == "f0() takes 0 positional arguments but 1 
was given"
 
-@pytest.mark.pypy_only
 def test_error_message_module_function(self):
 import operator # use countOf because it's defined at applevel
 exc = raises(TypeError, lambda : operator.countOf(1, 2, 3))
-# does not contain the warning about missing self
-assert exc.value.args[0] == "countOf() takes 2 positional arguments 
but 3 were given"
+# does not contain the warning
+# 'Did you forget 'self' in the function definition?'
+assert 'self' not in str(exc.value)
 
 @pytest.mark.pypy_only
 def test_error_message_bound_method(self):
___
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit


[pypy-commit] pypy py3.6: hg merge default

2020-01-23 Thread arigo
Author: Armin Rigo 
Branch: py3.6
Changeset: r98578:73d20edb41e9
Date: 2020-01-23 13:20 +0100
http://bitbucket.org/pypy/pypy/changeset/73d20edb41e9/

Log:hg merge default

diff --git a/pypy/module/_multibytecodec/c_codecs.py 
b/pypy/module/_multibytecodec/c_codecs.py
--- a/pypy/module/_multibytecodec/c_codecs.py
+++ b/pypy/module/_multibytecodec/c_codecs.py
@@ -194,17 +194,23 @@
rffi.SSIZE_T)
 pypy_cjk_enc_getcodec = llexternal('pypy_cjk_enc_getcodec',
[ENCODEBUF_P], MULTIBYTECODEC_P)
+pypy_cjk_enc_copystate = llexternal('pypy_cjk_enc_copystate',
+[ENCODEBUF_P, ENCODEBUF_P], lltype.Void)
 MBENC_FLUSH = 1
 MBENC_RESET = 2
 
 def encode(codec, unicodedata, length, errors="strict", errorcb=None,
-   namecb=None):
+   namecb=None, copystate=lltype.nullptr(ENCODEBUF_P.TO)):
 encodebuf = pypy_cjk_enc_new(codec)
 if not encodebuf:
 raise MemoryError
+if copystate:
+pypy_cjk_enc_copystate(encodebuf, copystate)
 try:
 return encodeex(encodebuf, unicodedata, length, errors, errorcb, 
namecb)
 finally:
+if copystate:
+pypy_cjk_enc_copystate(copystate, encodebuf)
 pypy_cjk_enc_free(encodebuf)
 
 def encodeex(encodebuf, utf8data, length, errors="strict", errorcb=None,
@@ -257,22 +263,21 @@
 raise EncodeDecodeError(start, end, reason)
 elif errors == "ignore":
 replace = ""
+rettype = 'b'   # != 'u'
 elif errors == "replace":
-codec = pypy_cjk_enc_getcodec(encodebuf)
-try:
-replace = encode(codec, "?", 1)
-except EncodeDecodeError:
-replace = "?"
+replace = "?"# utf-8 unicode
+rettype = 'u'
 else:
 assert errorcb
 replace, end, rettype = errorcb(errors, namecb, reason,
 unicodedata, start, end)
-if rettype == 'u':
-codec = pypy_cjk_enc_getcodec(encodebuf)
-lgt = rutf8.check_utf8(replace, False)
-replace = encode(codec, replace, lgt)
-lgt = len(replace)
+if rettype == 'u':
+codec = pypy_cjk_enc_getcodec(encodebuf)
+lgt = rutf8.check_utf8(replace, False)
+replace = encode(codec, replace, lgt, copystate=encodebuf)
+#else:
+#   replace is meant to be a byte string already
 with rffi.scoped_nonmovingbuffer(replace) as inbuf:
-r = pypy_cjk_enc_replace_on_error(encodebuf, inbuf, lgt, end)
+r = pypy_cjk_enc_replace_on_error(encodebuf, inbuf, len(replace), end)
 if r == MBERR_NOMEMORY:
 raise MemoryError
diff --git a/pypy/module/_multibytecodec/src/cjkcodecs/multibytecodec.c 
b/pypy/module/_multibytecodec/src/cjkcodecs/multibytecodec.c
--- a/pypy/module/_multibytecodec/src/cjkcodecs/multibytecodec.c
+++ b/pypy/module/_multibytecodec/src/cjkcodecs/multibytecodec.c
@@ -135,6 +135,11 @@
   return d;
 }
 
+void pypy_cjk_enc_copystate(struct pypy_cjk_enc_s *dst, struct pypy_cjk_enc_s 
*src)
+{
+dst->state = src->state;
+}
+
 Py_ssize_t pypy_cjk_enc_init(struct pypy_cjk_enc_s *d,
  Py_UNICODE *inbuf, Py_ssize_t inlen)
 {
diff --git a/pypy/module/_multibytecodec/src/cjkcodecs/multibytecodec.h 
b/pypy/module/_multibytecodec/src/cjkcodecs/multibytecodec.h
--- a/pypy/module/_multibytecodec/src/cjkcodecs/multibytecodec.h
+++ b/pypy/module/_multibytecodec/src/cjkcodecs/multibytecodec.h
@@ -146,6 +146,8 @@
   char *, pypymbc_ssize_t, 
pypymbc_ssize_t);
 RPY_EXTERN
 const MultibyteCodec *pypy_cjk_enc_getcodec(struct pypy_cjk_enc_s *);
+RPY_EXTERN
+void pypy_cjk_enc_copystate(struct pypy_cjk_enc_s *dst, struct pypy_cjk_enc_s 
*src);
 
 /* list of codecs defined in the .c files */
 
diff --git a/pypy/module/_multibytecodec/test/test_app_codecs.py 
b/pypy/module/_multibytecodec/test/test_app_codecs.py
--- a/pypy/module/_multibytecodec/test/test_app_codecs.py
+++ b/pypy/module/_multibytecodec/test/test_app_codecs.py
@@ -126,3 +126,33 @@
   lambda e: (b'\xc3', e.end))
 result = "\uDDA1".encode("gbk", 
"test.test_encode_custom_error_handler_type")
 assert b'\xc3' in result
+
+def test_encode_replacement_with_state(self):
+import codecs
+s = u'\u4ee4\u477c\u4ee4'.encode("iso-2022-jp", errors="replace")
+assert s == b'\x1b$BNa\x1b(B?\x1b$BNa\x1b(B'
+
+def test_streaming_codec(self):
+test_0 = u'\uc5fc\u76d0\u5869\u9e7d\u477c\u4e3d/\u3012'
+test_1 = 
u'\u4ee4\u477c\u3080\u304b\u3057\u3080\u304b\u3057\u3042\u308b\u3068\u3053\u308d\u306b'
+test_2 = u' foo = "Quoted string \u4ee4\u477c" '
+
+ereplace = {'errors': 'replace'}
+exml = {'errors': 'xmlcharrefreplace'}
+for codec in ("iso-2022-jp", "iso-2022-jp-ext", "iso-2022-jp-1",
+  "iso-2022-jp-2", "iso-2022-jp-3", 

[pypy-commit] pypy default: Test really just what we want to, i.e. that the message does not contain the

2020-01-23 Thread arigo
Author: Armin Rigo 
Branch: 
Changeset: r98575:76b34bb85fe5
Date: 2020-01-23 13:07 +0100
http://bitbucket.org/pypy/pypy/changeset/76b34bb85fe5/

Log:Test really just what we want to, i.e. that the message does not
contain the word 'self'

diff --git a/pypy/interpreter/test/test_argument.py 
b/pypy/interpreter/test/test_argument.py
--- a/pypy/interpreter/test/test_argument.py
+++ b/pypy/interpreter/test/test_argument.py
@@ -762,9 +762,9 @@
 def test_error_message_module_function(self):
 import operator # use repeat because it's defined at applevel
 exc = raises(TypeError, lambda : operator.repeat(1, 2, 3))
-# does not contain the warning about missing self
-# in particular it must not end with ' Did you forget 'self' in the 
function definition?'
-assert str(exc.value).endswith("takes exactly 2 arguments (3 given)")
+# does not contain the warning
+# 'Did you forget 'self' in the function definition?'
+assert 'self' not in str(exc.value)
 
 @pytest.mark.pypy_only
 def test_error_message_bound_method(self):
___
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit


[pypy-commit] pypy default: merge heads

2020-01-23 Thread arigo
Author: Armin Rigo 
Branch: 
Changeset: r98576:a7c7b4c7dcae
Date: 2020-01-23 13:08 +0100
http://bitbucket.org/pypy/pypy/changeset/a7c7b4c7dcae/

Log:merge heads

diff --git a/pypy/module/_multibytecodec/c_codecs.py 
b/pypy/module/_multibytecodec/c_codecs.py
--- a/pypy/module/_multibytecodec/c_codecs.py
+++ b/pypy/module/_multibytecodec/c_codecs.py
@@ -194,17 +194,23 @@
rffi.SSIZE_T)
 pypy_cjk_enc_getcodec = llexternal('pypy_cjk_enc_getcodec',
[ENCODEBUF_P], MULTIBYTECODEC_P)
+pypy_cjk_enc_copystate = llexternal('pypy_cjk_enc_copystate',
+[ENCODEBUF_P, ENCODEBUF_P], lltype.Void)
 MBENC_FLUSH = 1
 MBENC_RESET = 2
 
 def encode(codec, unicodedata, length, errors="strict", errorcb=None,
-   namecb=None):
+   namecb=None, copystate=lltype.nullptr(ENCODEBUF_P.TO)):
 encodebuf = pypy_cjk_enc_new(codec)
 if not encodebuf:
 raise MemoryError
+if copystate:
+pypy_cjk_enc_copystate(encodebuf, copystate)
 try:
 return encodeex(encodebuf, unicodedata, length, errors, errorcb, 
namecb)
 finally:
+if copystate:
+pypy_cjk_enc_copystate(copystate, encodebuf)
 pypy_cjk_enc_free(encodebuf)
 
 def encodeex(encodebuf, utf8data, length, errors="strict", errorcb=None,
@@ -258,18 +264,18 @@
 elif errors == "ignore":
 replace = ""
 elif errors == "replace":
-codec = pypy_cjk_enc_getcodec(encodebuf)
-try:
-replace = encode(codec, "?", 1)
-except EncodeDecodeError:
-replace = "?"
+replace = "?"# utf-8 unicode
 else:
 assert errorcb
-rets, end = errorcb(errors, namecb, reason,
+replace, end = errorcb(errors, namecb, reason,
 unicodedata, start, end)
+if len(replace) > 0:
 codec = pypy_cjk_enc_getcodec(encodebuf)
-lgt = rutf8.codepoints_in_utf8(rets)
-replace = encode(codec, rets, lgt, "strict", errorcb, namecb)
+lgt = rutf8.codepoints_in_utf8(replace)
+replace = encode(codec, replace, lgt, copystate=encodebuf)
+#else:
+#   replace is an empty utf-8 unicode, which we directly consider to
+#   encode as an empty byte string.
 with rffi.scoped_nonmovingbuffer(replace) as inbuf:
 r = pypy_cjk_enc_replace_on_error(encodebuf, inbuf, len(replace), end)
 if r == MBERR_NOMEMORY:
diff --git a/pypy/module/_multibytecodec/src/cjkcodecs/multibytecodec.c 
b/pypy/module/_multibytecodec/src/cjkcodecs/multibytecodec.c
--- a/pypy/module/_multibytecodec/src/cjkcodecs/multibytecodec.c
+++ b/pypy/module/_multibytecodec/src/cjkcodecs/multibytecodec.c
@@ -135,6 +135,11 @@
   return d;
 }
 
+void pypy_cjk_enc_copystate(struct pypy_cjk_enc_s *dst, struct pypy_cjk_enc_s 
*src)
+{
+dst->state = src->state;
+}
+
 Py_ssize_t pypy_cjk_enc_init(struct pypy_cjk_enc_s *d,
  Py_UNICODE *inbuf, Py_ssize_t inlen)
 {
diff --git a/pypy/module/_multibytecodec/src/cjkcodecs/multibytecodec.h 
b/pypy/module/_multibytecodec/src/cjkcodecs/multibytecodec.h
--- a/pypy/module/_multibytecodec/src/cjkcodecs/multibytecodec.h
+++ b/pypy/module/_multibytecodec/src/cjkcodecs/multibytecodec.h
@@ -146,6 +146,8 @@
   char *, pypymbc_ssize_t, 
pypymbc_ssize_t);
 RPY_EXTERN
 const MultibyteCodec *pypy_cjk_enc_getcodec(struct pypy_cjk_enc_s *);
+RPY_EXTERN
+void pypy_cjk_enc_copystate(struct pypy_cjk_enc_s *dst, struct pypy_cjk_enc_s 
*src);
 
 /* list of codecs defined in the .c files */
 
diff --git a/pypy/module/_multibytecodec/test/test_app_codecs.py 
b/pypy/module/_multibytecodec/test/test_app_codecs.py
--- a/pypy/module/_multibytecodec/test/test_app_codecs.py
+++ b/pypy/module/_multibytecodec/test/test_app_codecs.py
@@ -110,3 +110,33 @@
   lambda e: ('\xc3', e.end))
 raises(TypeError, u"\uDDA1".encode, "gbk",
"test.test_encode_custom_error_handler_type")
+
+def test_encode_replacement_with_state(self):
+import codecs
+s = u'\u4ee4\u477c\u4ee4'.encode("iso-2022-jp", errors="replace")
+assert s == '\x1b$BNa\x1b(B?\x1b$BNa\x1b(B'
+
+def test_streaming_codec(self):
+test_0 = u'\uc5fc\u76d0\u5869\u9e7d\u477c\u4e3d/\u3012'
+test_1 = 
u'\u4ee4\u477c\u3080\u304b\u3057\u3080\u304b\u3057\u3042\u308b\u3068\u3053\u308d\u306b'
+test_2 = u' foo = "Quoted string \u4ee4\u477c" '
+
+ereplace = {'errors': 'replace'}
+exml = {'errors': 'xmlcharrefreplace'}
+for codec in ("iso-2022-jp", "iso-2022-jp-ext", "iso-2022-jp-1",
+  "iso-2022-jp-2", "iso-2022-jp-3", "iso-2022-jp-2004",
+  "iso-2022-kr",
+ ):
+
+out_1 = test_1.encode(codec, **ereplace).decode(codec, **ereplace)
+assert 

[pypy-commit] pypy py3.6: fix issue with @pytest.mark.pypy_only

2020-01-23 Thread cfbolz
Author: Carl Friedrich Bolz-Tereick 
Branch: py3.6
Changeset: r98574:deaa30ceb571
Date: 2020-01-23 12:30 +0100
http://bitbucket.org/pypy/pypy/changeset/deaa30ceb571/

Log:fix issue with @pytest.mark.pypy_only

before it would actually crash

diff --git a/pypy/conftest.py b/pypy/conftest.py
--- a/pypy/conftest.py
+++ b/pypy/conftest.py
@@ -71,6 +71,8 @@
 if mode_A:
 from pypy.tool.pytest.apptest import PythonInterpreter
 config.applevel = PythonInterpreter(config.option.python)
+else:
+config.applevel = None
 
 def pytest_addoption(parser):
 group = parser.getgroup("pypy options")
@@ -205,8 +207,9 @@
 def pytest_runtest_setup(item):
 if isinstance(item, py.test.collect.Function):
 config = item.config
-if item.get_marker(name='pypy_only') and not config.applevel.is_pypy:
-pytest.skip('PyPy-specific test')
+if item.get_marker(name='pypy_only'):
+if config.applevel is not None and not config.applevel.is_pypy:
+pytest.skip('PyPy-specific test')
 appclass = item.getparent(py.test.Class)
 if appclass is not None:
 from pypy.tool.pytest.objspace import gettestobjspace
___
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit


[pypy-commit] pypy py3.7: support async generator expressions in normal functions

2020-01-23 Thread cfbolz
Author: Carl Friedrich Bolz-Tereick 
Branch: py3.7
Changeset: r98573:bde40d73c3de
Date: 2020-01-23 12:01 +0100
http://bitbucket.org/pypy/pypy/changeset/bde40d73c3de/

Log:support async generator expressions in normal functions

diff --git a/pypy/interpreter/astcompiler/codegen.py 
b/pypy/interpreter/astcompiler/codegen.py
--- a/pypy/interpreter/astcompiler/codegen.py
+++ b/pypy/interpreter/astcompiler/codegen.py
@@ -1569,10 +1569,6 @@
 code, qualname = self.sub_scope(sub_scope, name, node, node.lineno)
 is_async_generator = self.symbols.find_scope(node).is_coroutine
 
-if is_async_generator and not is_async_function:
-self.error("asynchronous comprehension outside of "
-   "an asynchronous function", node)
-
 self.update_position(node.lineno)
 self._make_function(code, qualname=qualname)
 first_comp = node.get_generators()[0]
diff --git a/pypy/interpreter/test/apptest_coroutine.py 
b/pypy/interpreter/test/apptest_coroutine.py
--- a/pypy/interpreter/test/apptest_coroutine.py
+++ b/pypy/interpreter/test/apptest_coroutine.py
@@ -699,6 +699,20 @@
 
 assert run_async(run()) == ([], (1,))
 
+def test_async_genexpr_in_regular_function():
+async def arange(n):
+for i in range(n):
+yield i
+
+def make_arange(n):
+# This syntax is legal starting with Python 3.7
+return (i * 2 async for i in arange(n))
+
+async def run():
+return [i async for i in make_arange(10)]
+res = run_async(run())
+assert res[1] == [i * 2 for i in range(10)]
+
 # Helpers for test_async_gen_exception_11() below
 def sync_iterate(g):
 res = []
___
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit


[pypy-commit] pypy default: PyCodec_Encoder(), PyCodec_Decoder()

2020-01-23 Thread arigo
Author: Armin Rigo 
Branch: 
Changeset: r98572:fb95321122db
Date: 2020-01-23 11:51 +0100
http://bitbucket.org/pypy/pypy/changeset/fb95321122db/

Log:PyCodec_Encoder(), PyCodec_Decoder()

diff --git a/pypy/module/cpyext/codecs.py b/pypy/module/cpyext/codecs.py
--- a/pypy/module/cpyext/codecs.py
+++ b/pypy/module/cpyext/codecs.py
@@ -20,3 +20,12 @@
 else:
 return space.call_method(w_codec, "incrementaldecoder")
 
+@cpython_api([CONST_STRING], PyObject)
+def PyCodec_Encoder(space, encoding):
+w_codec = interp_codecs.lookup_codec(space, rffi.charp2str(encoding))
+return space.getitem(w_codec, space.newint(0))
+
+@cpython_api([CONST_STRING], PyObject)
+def PyCodec_Decoder(space, encoding):
+w_codec = interp_codecs.lookup_codec(space, rffi.charp2str(encoding))
+return space.getitem(w_codec, space.newint(1))
diff --git a/pypy/module/cpyext/test/test_codecs.py 
b/pypy/module/cpyext/test/test_codecs.py
--- a/pypy/module/cpyext/test/test_codecs.py
+++ b/pypy/module/cpyext/test/test_codecs.py
@@ -2,7 +2,8 @@
 from pypy.module.cpyext.test.test_api import BaseApiTest
 from rpython.rtyper.lltypesystem import rffi
 from pypy.module.cpyext.codecs import (
-PyCodec_IncrementalEncoder, PyCodec_IncrementalDecoder)
+PyCodec_IncrementalEncoder, PyCodec_IncrementalDecoder,
+PyCodec_Encoder, PyCodec_Decoder)
 
 class TestCodecs(BaseApiTest):
 def test_incremental(self, space):
@@ -13,3 +14,13 @@
 w_decoded = space.call_method(w_decoder, 'decode', w_encoded)
 assert space.utf8_w(w_decoded) == u'spm'.encode('utf8')
 rffi.free_charp(utf8)
+
+def test_encoder_decoder(self, space):
+utf8 = rffi.str2charp('utf-8')
+w_encoder = PyCodec_Encoder(space, utf8)
+w_decoder = PyCodec_Decoder(space, utf8)
+rffi.free_charp(utf8)
+space.appexec([w_encoder, w_decoder], """(encoder, decoder):
+assert encoder(u"\u1234") == ('\xe1\x88\xb4', 1)
+assert decoder("\xe1\x88\xb4") == (u'\u1234', 3)
+""")
___
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit


[pypy-commit] pypy default: Fix a corner case in multibytecodec: for stateful codecs, when encoding fails

2020-01-23 Thread arigo
Author: Armin Rigo 
Branch: 
Changeset: r98571:2ed84f7866b6
Date: 2020-01-23 11:37 +0100
http://bitbucket.org/pypy/pypy/changeset/2ed84f7866b6/

Log:Fix a corner case in multibytecodec: for stateful codecs, when
encoding fails and we use replacement, the replacement string must
be written in the output preserving the state.

diff --git a/pypy/module/_multibytecodec/c_codecs.py 
b/pypy/module/_multibytecodec/c_codecs.py
--- a/pypy/module/_multibytecodec/c_codecs.py
+++ b/pypy/module/_multibytecodec/c_codecs.py
@@ -194,17 +194,23 @@
rffi.SSIZE_T)
 pypy_cjk_enc_getcodec = llexternal('pypy_cjk_enc_getcodec',
[ENCODEBUF_P], MULTIBYTECODEC_P)
+pypy_cjk_enc_copystate = llexternal('pypy_cjk_enc_copystate',
+[ENCODEBUF_P, ENCODEBUF_P], lltype.Void)
 MBENC_FLUSH = 1
 MBENC_RESET = 2
 
 def encode(codec, unicodedata, length, errors="strict", errorcb=None,
-   namecb=None):
+   namecb=None, copystate=lltype.nullptr(ENCODEBUF_P.TO)):
 encodebuf = pypy_cjk_enc_new(codec)
 if not encodebuf:
 raise MemoryError
+if copystate:
+pypy_cjk_enc_copystate(encodebuf, copystate)
 try:
 return encodeex(encodebuf, unicodedata, length, errors, errorcb, 
namecb)
 finally:
+if copystate:
+pypy_cjk_enc_copystate(copystate, encodebuf)
 pypy_cjk_enc_free(encodebuf)
 
 def encodeex(encodebuf, utf8data, length, errors="strict", errorcb=None,
@@ -258,18 +264,18 @@
 elif errors == "ignore":
 replace = ""
 elif errors == "replace":
-codec = pypy_cjk_enc_getcodec(encodebuf)
-try:
-replace = encode(codec, "?", 1)
-except EncodeDecodeError:
-replace = "?"
+replace = "?"# utf-8 unicode
 else:
 assert errorcb
-rets, end = errorcb(errors, namecb, reason,
+replace, end = errorcb(errors, namecb, reason,
 unicodedata, start, end)
+if len(replace) > 0:
 codec = pypy_cjk_enc_getcodec(encodebuf)
-lgt = rutf8.codepoints_in_utf8(rets)
-replace = encode(codec, rets, lgt, "strict", errorcb, namecb)
+lgt = rutf8.codepoints_in_utf8(replace)
+replace = encode(codec, replace, lgt, copystate=encodebuf)
+#else:
+#   replace is an empty utf-8 unicode, which we directly consider to
+#   encode as an empty byte string.
 with rffi.scoped_nonmovingbuffer(replace) as inbuf:
 r = pypy_cjk_enc_replace_on_error(encodebuf, inbuf, len(replace), end)
 if r == MBERR_NOMEMORY:
diff --git a/pypy/module/_multibytecodec/src/cjkcodecs/multibytecodec.c 
b/pypy/module/_multibytecodec/src/cjkcodecs/multibytecodec.c
--- a/pypy/module/_multibytecodec/src/cjkcodecs/multibytecodec.c
+++ b/pypy/module/_multibytecodec/src/cjkcodecs/multibytecodec.c
@@ -135,6 +135,11 @@
   return d;
 }
 
+void pypy_cjk_enc_copystate(struct pypy_cjk_enc_s *dst, struct pypy_cjk_enc_s 
*src)
+{
+dst->state = src->state;
+}
+
 Py_ssize_t pypy_cjk_enc_init(struct pypy_cjk_enc_s *d,
  Py_UNICODE *inbuf, Py_ssize_t inlen)
 {
diff --git a/pypy/module/_multibytecodec/src/cjkcodecs/multibytecodec.h 
b/pypy/module/_multibytecodec/src/cjkcodecs/multibytecodec.h
--- a/pypy/module/_multibytecodec/src/cjkcodecs/multibytecodec.h
+++ b/pypy/module/_multibytecodec/src/cjkcodecs/multibytecodec.h
@@ -146,6 +146,8 @@
   char *, pypymbc_ssize_t, 
pypymbc_ssize_t);
 RPY_EXTERN
 const MultibyteCodec *pypy_cjk_enc_getcodec(struct pypy_cjk_enc_s *);
+RPY_EXTERN
+void pypy_cjk_enc_copystate(struct pypy_cjk_enc_s *dst, struct pypy_cjk_enc_s 
*src);
 
 /* list of codecs defined in the .c files */
 
diff --git a/pypy/module/_multibytecodec/test/test_app_codecs.py 
b/pypy/module/_multibytecodec/test/test_app_codecs.py
--- a/pypy/module/_multibytecodec/test/test_app_codecs.py
+++ b/pypy/module/_multibytecodec/test/test_app_codecs.py
@@ -110,3 +110,33 @@
   lambda e: ('\xc3', e.end))
 raises(TypeError, u"\uDDA1".encode, "gbk",
"test.test_encode_custom_error_handler_type")
+
+def test_encode_replacement_with_state(self):
+import codecs
+s = u'\u4ee4\u477c\u4ee4'.encode("iso-2022-jp", errors="replace")
+assert s == '\x1b$BNa\x1b(B?\x1b$BNa\x1b(B'
+
+def test_streaming_codec(self):
+test_0 = u'\uc5fc\u76d0\u5869\u9e7d\u477c\u4e3d/\u3012'
+test_1 = 
u'\u4ee4\u477c\u3080\u304b\u3057\u3080\u304b\u3057\u3042\u308b\u3068\u3053\u308d\u306b'
+test_2 = u' foo = "Quoted string \u4ee4\u477c" '
+
+ereplace = {'errors': 'replace'}
+exml = {'errors': 'xmlcharrefreplace'}
+for codec in ("iso-2022-jp", "iso-2022-jp-ext", "iso-2022-jp-1",
+  "iso-2022-jp-2", "iso-2022-jp-3", "iso-2022-jp-2004",
+