Author: Matti Picus <matti.pi...@gmail.com> Branch: unicode-utf8-py3 Changeset: r95586:2186945d3c34 Date: 2019-01-07 08:42 +0200 http://bitbucket.org/pypy/pypy/changeset/2186945d3c34/
Log: merge unitcode-utf8 into branch diff --git a/extra_tests/cffi_tests/cffi0/test_ffi_backend.py b/extra_tests/cffi_tests/cffi0/test_ffi_backend.py --- a/extra_tests/cffi_tests/cffi0/test_ffi_backend.py +++ b/extra_tests/cffi_tests/cffi0/test_ffi_backend.py @@ -338,6 +338,13 @@ py.test.raises((TypeError, BufferError), ffi.from_buffer, b"abcd", require_writable=True) + def test_release(self): + ffi = FFI() + p = ffi.new("int[]", 123) + ffi.release(p) + # here, reading p[0] might give garbage or segfault... + ffi.release(p) # no effect + def test_memmove(self): ffi = FFI() p = ffi.new("short[]", [-1234, -2345, -3456, -4567, -5678]) diff --git a/extra_tests/cffi_tests/cffi1/test_new_ffi_1.py b/extra_tests/cffi_tests/cffi1/test_new_ffi_1.py --- a/extra_tests/cffi_tests/cffi1/test_new_ffi_1.py +++ b/extra_tests/cffi_tests/cffi1/test_new_ffi_1.py @@ -1457,6 +1457,35 @@ import gc; gc.collect(); gc.collect(); gc.collect() assert seen == [3] + def test_release(self): + p = ffi.new("int[]", 123) + ffi.release(p) + # here, reading p[0] might give garbage or segfault... + ffi.release(p) # no effect + + def test_release_new_allocator(self): + seen = [] + def myalloc(size): + seen.append(size) + return ffi.new("char[]", b"X" * size) + def myfree(raw): + seen.append(raw) + alloc2 = ffi.new_allocator(alloc=myalloc, free=myfree) + p = alloc2("int[]", 15) + assert seen == [15 * 4] + ffi.release(p) + assert seen == [15 * 4, p] + ffi.release(p) # no effect + assert seen == [15 * 4, p] + # + del seen[:] + p = alloc2("struct ab *") + assert seen == [2 * 4] + ffi.release(p) + assert seen == [2 * 4, p] + ffi.release(p) # no effect + assert seen == [2 * 4, p] + def test_CData_CType(self): assert isinstance(ffi.cast("int", 0), ffi.CData) assert isinstance(ffi.new("int *"), ffi.CData) diff --git a/lib_pypy/cffi/api.py b/lib_pypy/cffi/api.py --- a/lib_pypy/cffi/api.py +++ b/lib_pypy/cffi/api.py @@ -530,6 +530,9 @@ def from_handle(self, x): return self._backend.from_handle(x) + def release(self, x): + self._backend.release(x) + def set_unicode(self, enabled_flag): """Windows: if 'enabled_flag' is True, enable the UNICODE and _UNICODE defines in C, and declare the types like TCHAR and LPTCSTR diff --git a/lib_pypy/cffi/cparser.py b/lib_pypy/cffi/cparser.py --- a/lib_pypy/cffi/cparser.py +++ b/lib_pypy/cffi/cparser.py @@ -16,6 +16,13 @@ except ImportError: lock = None +def _workaround_for_static_import_finders(): + # Issue #392: packaging tools like cx_Freeze can not find these + # because pycparser uses exec dynamic import. This is an obscure + # workaround. This function is never called. + import pycparser.yacctab + import pycparser.lextab + CDEF_SOURCE_STRING = "<cdef source string>" _r_comment = re.compile(r"/\*.*?\*/|//([^\n\\]|\\.)*?$", re.DOTALL | re.MULTILINE) diff --git a/pypy/interpreter/test/test_unicodehelper.py b/pypy/interpreter/test/test_unicodehelper.py --- a/pypy/interpreter/test/test_unicodehelper.py +++ b/pypy/interpreter/test/test_unicodehelper.py @@ -152,6 +152,22 @@ ("??", "ascii", input, 1, 2), ("??", "ascii", input, 5, 6), ("??", "ascii", input, 6, 7)] +if HAS_HYPOTHESIS: + @given(strategies.text()) + def test_unicode_raw_escape(u): + r = uh.utf8_encode_raw_unicode_escape(u.encode("utf8"), 'strict', None) + assert r == u.encode("raw-unicode-escape") + + @given(strategies.text()) + def test_unicode_escape(u): + r = uh.utf8_encode_unicode_escape(u.encode("utf8"), "strict", None) + assert r == u.encode("unicode-escape") + + @given(strategies.text()) + def test_utf8_encode_ascii_2(u): + def eh(errors, encoding, reason, p, start, end): + return "?" * (end - start), end, 'b' + assert utf8_encode_ascii(u.encode("utf8"), "replace", eh) == u.encode("ascii", "replace") def test_encode_decimal(space): assert uh.unicode_encode_decimal(u' 12, 34 ', None) == ' 12, 34 ' @@ -166,20 +182,3 @@ u'12\u1234'.encode('utf8'), 'xmlcharrefreplace', handler) assert result == '12ሴ' -if HAS_HYPOTHESIS: - @given(strategies.text()) - def test_utf8_encode_ascii_2(u): - def eh(errors, encoding, reason, p, start, end): - return "?" * (end - start), end, 'b' - assert utf8_encode_ascii(u.encode("utf8"), "replace", eh) == u.encode("ascii", "replace") - - @given(strategies.text()) - def test_unicode_raw_escape(u): - r = uh.utf8_encode_raw_unicode_escape(u.encode("utf8"), 'strict', None) - assert r == u.encode("raw-unicode-escape") - - @given(strategies.text()) - def test_unicode_escape(u): - r = uh.utf8_encode_unicode_escape(u.encode("utf8"), "strict", None) - assert r == u.encode("unicode-escape") - diff --git a/pypy/module/__pypy__/interp_builders.py b/pypy/module/__pypy__/interp_builders.py --- a/pypy/module/__pypy__/interp_builders.py +++ b/pypy/module/__pypy__/interp_builders.py @@ -63,8 +63,9 @@ def descr__new__(space, w_subtype, size=-1): return W_UnicodeBuilder(space, 3 * size) - @unwrap_spec(s='utf8') - def descr_append(self, space, s): + def descr_append(self, space, w_s): + w_unicode = W_UnicodeObject.convert_arg_to_w_unicode(space, w_s) + s = space.utf8_w(w_unicode) self.builder.append(s) @unwrap_spec(start=int, end=int) diff --git a/pypy/module/__pypy__/test/test_builders.py b/pypy/module/__pypy__/test/test_builders.py --- a/pypy/module/__pypy__/test/test_builders.py +++ b/pypy/module/__pypy__/test/test_builders.py @@ -45,3 +45,8 @@ assert len(b) == 16 assert s == b"abc123you and me" assert b.build() == s + + def test_encode(self): + from __pypy__.builders import UnicodeBuilder + b = UnicodeBuilder() + raises(UnicodeDecodeError, b.append, b'\xc0') diff --git a/pypy/module/_cffi_backend/__init__.py b/pypy/module/_cffi_backend/__init__.py --- a/pypy/module/_cffi_backend/__init__.py +++ b/pypy/module/_cffi_backend/__init__.py @@ -52,6 +52,7 @@ 'unpack': 'func.unpack', 'buffer': 'cbuffer.MiniBuffer', 'memmove': 'func.memmove', + 'release': 'func.release', 'get_errno': 'cerrno.get_errno', 'set_errno': 'cerrno.set_errno', diff --git a/pypy/module/_cffi_backend/cdataobj.py b/pypy/module/_cffi_backend/cdataobj.py --- a/pypy/module/_cffi_backend/cdataobj.py +++ b/pypy/module/_cffi_backend/cdataobj.py @@ -476,6 +476,18 @@ def get_structobj(self): return None + def enter_exit(self, exit_now): + raise oefmt(self.space.w_ValueError, + "only 'cdata' object from ffi.new(), ffi.gc() or ffi.from_buffer() " + "can be used with the 'with' keyword or ffi.release()") + + def descr_enter(self): + self.enter_exit(False) + return self + + def descr_exit(self, args_w): + self.enter_exit(True) + class W_CDataMem(W_CData): """This is used only by the results of cffi.cast('int', x) @@ -528,14 +540,33 @@ def get_structobj(self): return self + def enter_exit(self, exit_now): + from pypy.module._cffi_backend.ctypeptr import W_CTypePtrOrArray + if not isinstance(self.ctype, W_CTypePtrOrArray): + W_CData.enter_exit(self, exit_now) + elif exit_now: + self._do_exit() + + def _do_exit(self): + raise NotImplementedError + class W_CDataNewStd(W_CDataNewOwning): """Subclass using the standard allocator, lltype.malloc()/lltype.free()""" - _attrs_ = [] + _attrs_ = ['explicitly_freed'] + explicitly_freed = False @rgc.must_be_light_finalizer def __del__(self): - lltype.free(self._ptr, flavor='raw') + if not self.explicitly_freed: + lltype.free(self._ptr, flavor='raw') + + def _do_exit(self): + if not self.explicitly_freed: + rgc.add_memory_pressure(-self._sizeof(), self) + self.explicitly_freed = True + rgc.may_ignore_finalizer(self) + lltype.free(self._ptr, flavor='raw') class W_CDataNewNonStd(W_CDataNewOwning): @@ -543,7 +574,16 @@ _attrs_ = ['w_raw_cdata', 'w_free'] def _finalize_(self): - self.space.call_function(self.w_free, self.w_raw_cdata) + if self.w_free is not None: + self.space.call_function(self.w_free, self.w_raw_cdata) + + def _do_exit(self): + w_free = self.w_free + if w_free is not None: + rgc.add_memory_pressure(-self._sizeof(), self) + self.w_free = None + self.may_unregister_rpython_finalizer(self.space) + self.space.call_function(w_free, self.w_raw_cdata) class W_CDataPtrToStructOrUnion(W_CData): @@ -573,6 +613,12 @@ else: return None + def enter_exit(self, exit_now): + if exit_now: + structobj = self.structobj + if isinstance(structobj, W_CDataNewOwning): + structobj._do_exit() + class W_CDataSliced(W_CData): """Subclass with an explicit length, for slices.""" @@ -627,6 +673,9 @@ return "buffer len %d from '%s' object" % ( self.length, self.space.type(self.w_keepalive).name) + def enter_exit(self, exit_now): + pass # for now, no effect on PyPy + class W_CDataGCP(W_CData): """For ffi.gc().""" @@ -640,6 +689,9 @@ self.register_finalizer(space) def _finalize_(self): + self.invoke_finalizer() + + def invoke_finalizer(self): w_destructor = self.w_destructor if w_destructor is not None: self.w_destructor = None @@ -649,6 +701,11 @@ self.w_destructor = None self.may_unregister_rpython_finalizer(self.space) + def enter_exit(self, exit_now): + if exit_now: + self.may_unregister_rpython_finalizer(self.space) + self.invoke_finalizer() + W_CData.typedef = TypeDef( '_cffi_backend.CData', @@ -678,5 +735,7 @@ __iter__ = interp2app(W_CData.iter), __weakref__ = make_weakref_descr(W_CData), __dir__ = interp2app(W_CData.dir), + __enter__ = interp2app(W_CData.descr_enter), + __exit__ = interp2app(W_CData.descr_exit), ) W_CData.typedef.acceptable_as_base_class = False diff --git a/pypy/module/_cffi_backend/ffi_obj.py b/pypy/module/_cffi_backend/ffi_obj.py --- a/pypy/module/_cffi_backend/ffi_obj.py +++ b/pypy/module/_cffi_backend/ffi_obj.py @@ -703,6 +703,16 @@ pass return w_res + @unwrap_spec(w_cdata=W_CData) + def descr_release(self, w_cdata): + """\ +Release now the resources held by a 'cdata' object from ffi.new(), +ffi.gc() or ffi.from_buffer(). The cdata object must not be used +afterwards. + +'ffi.release(cdata)' is equivalent to 'cdata.__exit__()'.""" + w_cdata.enter_exit(True) + class W_InitOnceLock(W_Root): def __init__(self, space): @@ -777,6 +787,7 @@ new_allocator = interp2app(W_FFIObject.descr_new_allocator), new_handle = interp2app(W_FFIObject.descr_new_handle), offsetof = interp2app(W_FFIObject.descr_offsetof), + release = interp2app(W_FFIObject.descr_release), sizeof = interp2app(W_FFIObject.descr_sizeof), string = interp2app(W_FFIObject.descr_string), typeof = interp2app(W_FFIObject.descr_typeof), diff --git a/pypy/module/_cffi_backend/func.py b/pypy/module/_cffi_backend/func.py --- a/pypy/module/_cffi_backend/func.py +++ b/pypy/module/_cffi_backend/func.py @@ -264,3 +264,7 @@ @unwrap_spec(w_cdata=cdataobj.W_CData, size=int) def gcp(space, w_cdata, w_destructor, size=0): return w_cdata.with_gc(w_destructor, size) + +@unwrap_spec(w_cdata=cdataobj.W_CData) +def release(space, w_cdata): + w_cdata.enter_exit(True) diff --git a/pypy/module/_cffi_backend/test/_backend_test_c.py b/pypy/module/_cffi_backend/test/_backend_test_c.py --- a/pypy/module/_cffi_backend/test/_backend_test_c.py +++ b/pypy/module/_cffi_backend/test/_backend_test_c.py @@ -4074,3 +4074,114 @@ assert_eq(cast(t5, 7.0), cast(t3, 7)) assert_lt(cast(t5, 3.1), 3.101) assert_gt(cast(t5, 3.1), 3) + +def test_explicit_release_new(): + # release() on a ffi.new() object has no effect on CPython, but + # really releases memory on PyPy. We can't test that effect + # though, because a released cdata is not marked. + BIntP = new_pointer_type(new_primitive_type("int")) + p = newp(BIntP) + p[0] = 42 + py.test.raises(IndexError, "p[1]") + release(p) + # here, reading p[0] might give garbage or segfault... + release(p) # no effect + # + BStruct = new_struct_type("struct foo") + BStructP = new_pointer_type(BStruct) + complete_struct_or_union(BStruct, [('p', BIntP, -1)]) + pstruct = newp(BStructP) + assert pstruct.p == cast(BIntP, 0) + release(pstruct) + # here, reading pstruct.p might give garbage or segfault... + release(pstruct) # no effect + +def test_explicit_release_new_contextmgr(): + BIntP = new_pointer_type(new_primitive_type("int")) + with newp(BIntP) as p: + p[0] = 42 + assert p[0] == 42 + # here, reading p[0] might give garbage or segfault... + release(p) # no effect + +def test_explicit_release_badtype(): + BIntP = new_pointer_type(new_primitive_type("int")) + p = cast(BIntP, 12345) + py.test.raises(ValueError, release, p) + py.test.raises(ValueError, release, p) + BStruct = new_struct_type("struct foo") + BStructP = new_pointer_type(BStruct) + complete_struct_or_union(BStruct, [('p', BIntP, -1)]) + pstruct = newp(BStructP) + py.test.raises(ValueError, release, pstruct[0]) + +def test_explicit_release_badtype_contextmgr(): + BIntP = new_pointer_type(new_primitive_type("int")) + p = cast(BIntP, 12345) + py.test.raises(ValueError, "with p: pass") + py.test.raises(ValueError, "with p: pass") + +def test_explicit_release_gc(): + BIntP = new_pointer_type(new_primitive_type("int")) + seen = [] + intp1 = newp(BIntP, 12345) + p1 = cast(BIntP, intp1) + p = gcp(p1, seen.append) + assert seen == [] + release(p) + assert seen == [p1] + assert p1[0] == 12345 + assert p[0] == 12345 # true so far, but might change to raise RuntimeError + release(p) # no effect + +def test_explicit_release_gc_contextmgr(): + BIntP = new_pointer_type(new_primitive_type("int")) + seen = [] + intp1 = newp(BIntP, 12345) + p1 = cast(BIntP, intp1) + p = gcp(p1, seen.append) + with p: + assert p[0] == 12345 + assert seen == [] + assert seen == [p1] + assert p1[0] == 12345 + assert p[0] == 12345 # true so far, but might change to raise RuntimeError + release(p) # no effect + +def test_explicit_release_from_buffer(): + a = bytearray(b"xyz") + BChar = new_primitive_type("char") + BCharP = new_pointer_type(BChar) + BCharA = new_array_type(BCharP, None) + p = from_buffer(BCharA, a) + assert p[2] == b"z" + release(p) + assert p[2] == b"z" # true so far, but might change to raise RuntimeError + release(p) # no effect + +def test_explicit_release_from_buffer_contextmgr(): + a = bytearray(b"xyz") + BChar = new_primitive_type("char") + BCharP = new_pointer_type(BChar) + BCharA = new_array_type(BCharP, None) + p = from_buffer(BCharA, a) + with p: + assert p[2] == b"z" + assert p[2] == b"z" # true so far, but might change to raise RuntimeError + release(p) # no effect + +def test_explicit_release_bytearray_on_cpython(): + if '__pypy__' in sys.builtin_module_names: + py.test.skip("pypy's bytearray are never locked") + a = bytearray(b"xyz") + BChar = new_primitive_type("char") + BCharP = new_pointer_type(BChar) + BCharA = new_array_type(BCharP, None) + a += b't' * 10 + p = from_buffer(BCharA, a) + py.test.raises(BufferError, "a += b'u' * 100") + release(p) + a += b'v' * 100 + release(p) # no effect + a += b'w' * 1000 + assert a == bytearray(b"xyz" + b't' * 10 + b'v' * 100 + b'w' * 1000) diff --git a/pypy/module/_cffi_backend/test/test_recompiler.py b/pypy/module/_cffi_backend/test/test_recompiler.py --- a/pypy/module/_cffi_backend/test/test_recompiler.py +++ b/pypy/module/_cffi_backend/test/test_recompiler.py @@ -2108,3 +2108,36 @@ else: assert lib.__loader__ is None assert lib.__spec__ is None + + def test_release(self): + ffi, lib = self.prepare("", "test_release", "") + p = ffi.new("int[]", 123) + ffi.release(p) + # here, reading p[0] might give garbage or segfault... + ffi.release(p) # no effect + + def test_release_new_allocator(self): + ffi, lib = self.prepare("struct ab { int a, b; };", + "test_release_new_allocator", + "struct ab { int a, b; };") + seen = [] + def myalloc(size): + seen.append(size) + return ffi.new("char[]", b"X" * size) + def myfree(raw): + seen.append(raw) + alloc2 = ffi.new_allocator(alloc=myalloc, free=myfree) + p = alloc2("int[]", 15) + assert seen == [15 * 4] + ffi.release(p) + assert seen == [15 * 4, p] + ffi.release(p) # no effect + assert seen == [15 * 4, p] + # + del seen[:] + p = alloc2("struct ab *") + assert seen == [2 * 4] + ffi.release(p) + assert seen == [2 * 4, p] + ffi.release(p) # no effect + assert seen == [2 * 4, p] diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py --- a/pypy/module/_codecs/interp_codecs.py +++ b/pypy/module/_codecs/interp_codecs.py @@ -247,7 +247,6 @@ def xmlcharrefreplace_errors(space, w_exc): - check_exception(space, w_exc) if space.isinstance_w(w_exc, space.w_UnicodeEncodeError): w_obj = space.getattr(w_exc, space.newtext('object')) @@ -276,7 +275,6 @@ def backslashreplace_errors(space, w_exc): - check_exception(space, w_exc) if (space.isinstance_w(w_exc, space.w_UnicodeEncodeError) or space.isinstance_w(w_exc, space.w_UnicodeTranslateError)): @@ -664,7 +662,6 @@ def wrap_encoder(space, w_arg, errors="strict"): # w_arg is a W_Unicode or W_Bytes? w_arg = space.convert_arg_to_w_unicode(w_arg, errors) - w_arg = space.convert_arg_to_w_unicode(w_arg) if errors is None: errors = 'strict' allow_surrogates = False @@ -684,8 +681,6 @@ @unwrap_spec(string='bufferstr', errors='text_or_none', w_final=WrappedDefault(False)) def wrap_decoder(space, string, errors="strict", w_final=None): - - if errors is None: errors = 'strict' final = space.is_true(w_final) _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit