[pypy-commit] pypy unicode-utf8: bytes.__mod__(unicode) must decode bytes as ascii
Author: Matti Picus Branch: unicode-utf8 Changeset: r95544:cc42e48c8a51 Date: 2018-12-26 08:20 +0200 http://bitbucket.org/pypy/pypy/changeset/cc42e48c8a51/ Log:bytes.__mod__(unicode) must decode bytes as ascii diff --git a/pypy/objspace/std/formatting.py b/pypy/objspace/std/formatting.py --- a/pypy/objspace/std/formatting.py +++ b/pypy/objspace/std/formatting.py @@ -11,6 +11,7 @@ from rpython.tool.sourcetools import func_with_new_name from pypy.interpreter.error import OperationError, oefmt +from pypy.interpreter.unicodehelper import check_ascii_or_raise class BaseStringFormatter(object): @@ -435,15 +436,7 @@ if not do_unicode: if got_unicode: # Make sure the format string is ascii encodable -try: -self.fmt.decode('ascii') -except UnicodeDecodeError as e: -raise OperationError(space.w_UnicodeDecodeError, -space.newtuple([space.newtext('ascii'), -space.newbytes(self.fmt), -space.newint(e.start), -space.newint(e.end), -space.newtext(e.message)])) +check_ascii_or_raise(space, self.fmt) raise NeedUnicodeFormattingError s = self.string_formatting(w_value) else: ___ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit
[pypy-commit] pypy unicode-utf8: mrege default into branch
Author: Matti Picus Branch: unicode-utf8 Changeset: r95547:aa0b6372c139 Date: 2019-01-01 08:45 +0200 http://bitbucket.org/pypy/pypy/changeset/aa0b6372c139/ Log:mrege default into branch diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py --- a/pypy/objspace/std/test/test_unicodeobject.py +++ b/pypy/objspace/std/test/test_unicodeobject.py @@ -1231,9 +1231,8 @@ assert type(unicode(z)) is unicode assert unicode(z) == u'foobaz' # -# two completely corner cases where we differ from CPython: -#assert unicode(encoding='supposedly_the_encoding') == u'' -#assert unicode(errors='supposedly_the_error') == u'' +assert unicode(encoding='supposedly_the_encoding') == u'' +assert unicode(errors='supposedly_the_error') == u'' e = raises(TypeError, unicode, u'', 'supposedly_the_encoding') assert str(e.value) == 'decoding Unicode is not supported' e = raises(TypeError, unicode, u'', errors='supposedly_the_error') diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -197,23 +197,20 @@ return unicodedb.islinebreak(ch) @staticmethod -@unwrap_spec(w_string=WrappedDefault("")) -def descr_new(space, w_unicodetype, w_string, w_encoding=None, +def descr_new(space, w_unicodetype, w_string=None, w_encoding=None, w_errors=None): -# NB. the default value of w_obj is really a *wrapped* empty string: -# there is gateway magic at work -w_obj = w_string - encoding, errors = _get_encoding_and_errors(space, w_encoding, w_errors) -if encoding is None and errors is None: -# this is very quick if w_obj is already a w_unicode -w_value = unicode_from_object(space, w_obj) +if w_string is None: +w_value = W_UnicodeObject.EMPTY +elif encoding is None and errors is None: +# this is very quick if w_string is already a w_unicode +w_value = unicode_from_object(space, w_string) else: -if space.isinstance_w(w_obj, space.w_unicode): +if space.isinstance_w(w_string, space.w_unicode): raise oefmt(space.w_TypeError, "decoding Unicode is not supported") -w_value = unicode_from_encoded_object(space, w_obj, +w_value = unicode_from_encoded_object(space, w_string, encoding, errors) if space.is_w(w_unicodetype, space.w_unicode): return w_value ___ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit
[pypy-commit] pypy unicode-utf8: merge default into branch
Author: Matti Picus Branch: unicode-utf8 Changeset: r95541:c09e504c21c6 Date: 2018-12-25 21:01 +0200 http://bitbucket.org/pypy/pypy/changeset/c09e504c21c6/ Log:merge default into branch diff --git a/extra_tests/cffi_tests/cffi0/test_ffi_backend.py b/extra_tests/cffi_tests/cffi0/test_ffi_backend.py --- a/extra_tests/cffi_tests/cffi0/test_ffi_backend.py +++ b/extra_tests/cffi_tests/cffi0/test_ffi_backend.py @@ -327,6 +327,16 @@ assert ffi.typeof(c) is ffi.typeof("char[]") ffi.cast("unsigned short *", c)[1] += 500 assert list(a) == [1, 20500, 3] +assert c == ffi.from_buffer(a, True) +assert c == ffi.from_buffer(a, require_writable=True) +# +p = ffi.from_buffer(b"abcd") +assert p[2] == b"c" +# +assert p == ffi.from_buffer(b"abcd", False) +py.test.raises((TypeError, BufferError), ffi.from_buffer, b"abcd", True) +py.test.raises((TypeError, BufferError), ffi.from_buffer, b"abcd", + require_writable=True) def test_memmove(self): ffi = FFI() diff --git a/extra_tests/cffi_tests/cffi1/test_ffi_obj.py b/extra_tests/cffi_tests/cffi1/test_ffi_obj.py --- a/extra_tests/cffi_tests/cffi1/test_ffi_obj.py +++ b/extra_tests/cffi_tests/cffi1/test_ffi_obj.py @@ -244,6 +244,16 @@ assert ffi.typeof(c) is ffi.typeof("char[]") ffi.cast("unsigned short *", c)[1] += 500 assert list(a) == [1, 20500, 3] +assert c == ffi.from_buffer(a, True) +assert c == ffi.from_buffer(a, require_writable=True) +# +p = ffi.from_buffer(b"abcd") +assert p[2] == b"c" +# +assert p == ffi.from_buffer(b"abcd", False) +py.test.raises((TypeError, BufferError), ffi.from_buffer, b"abcd", True) +py.test.raises((TypeError, BufferError), ffi.from_buffer, b"abcd", + require_writable=True) def test_memmove(): ffi = _cffi1_backend.FFI() diff --git a/extra_tests/cffi_tests/cffi1/test_new_ffi_1.py b/extra_tests/cffi_tests/cffi1/test_new_ffi_1.py --- a/extra_tests/cffi_tests/cffi1/test_new_ffi_1.py +++ b/extra_tests/cffi_tests/cffi1/test_new_ffi_1.py @@ -1654,6 +1654,16 @@ assert ffi.typeof(c) is ffi.typeof("char[]") ffi.cast("unsigned short *", c)[1] += 500 assert list(a) == [1, 20500, 3] +assert c == ffi.from_buffer(a, True) +assert c == ffi.from_buffer(a, require_writable=True) +# +p = ffi.from_buffer(b"abcd") +assert p[2] == b"c" +# +assert p == ffi.from_buffer(b"abcd", False) +py.test.raises((TypeError, BufferError), ffi.from_buffer, b"abcd", True) +py.test.raises((TypeError, BufferError), ffi.from_buffer, b"abcd", + require_writable=True) def test_all_primitives(self): assert set(PRIMITIVE_TO_INDEX) == set([ diff --git a/extra_tests/test_pyrepl/conftest.py b/extra_tests/test_pyrepl/conftest.py new file mode 100644 --- /dev/null +++ b/extra_tests/test_pyrepl/conftest.py @@ -0,0 +1,8 @@ +import sys + +def pytest_ignore_collect(path): +if '__pypy__' not in sys.builtin_module_names: +try: +import pyrepl +except ImportError: +return True diff --git a/lib_pypy/cffi/api.py b/lib_pypy/cffi/api.py --- a/lib_pypy/cffi/api.py +++ b/lib_pypy/cffi/api.py @@ -341,7 +341,7 @@ #""" #note that 'buffer' is a type, set on this instance by __init__ -def from_buffer(self, python_buffer): +def from_buffer(self, python_buffer, require_writable=False): """Return a that points to the data of the given Python object, which must support the buffer interface. Note that this is not meant to be used on the built-in types @@ -349,7 +349,8 @@ but only on objects containing large quantities of raw data in some other format, like 'array.array' or numpy arrays. """ -return self._backend.from_buffer(self.BCharA, python_buffer) +return self._backend.from_buffer(self.BCharA, python_buffer, + require_writable) def memmove(self, dest, src, n): """ffi.memmove(dest, src, n) copies n bytes of memory from src to dest. diff --git a/pypy/doc/cpython_differences.rst b/pypy/doc/cpython_differences.rst --- a/pypy/doc/cpython_differences.rst +++ b/pypy/doc/cpython_differences.rst @@ -394,8 +394,10 @@ * some functions and attributes of the ``gc`` module behave in a slightly different way: for example, ``gc.enable`` and - ``gc.disable`` are supported, but instead of enabling and disabling - the GC, they just enable and disable the execution of finalizers. + ``gc.disable`` are supported, but "enabling and disabling the GC" has + a different meaning in PyPy than in CPython. These functions + actually enable and disable the major collections and the + exe
[pypy-commit] pypy unicode-utf8: 'abc'.encode(...) in cpython calls 'abc'.decode('ascii', 'strict').encode(...)
Author: Matti Picus Branch: unicode-utf8 Changeset: r95546:8704e00eb624 Date: 2019-01-01 08:44 +0200 http://bitbucket.org/pypy/pypy/changeset/8704e00eb624/ Log:'abc'.encode(...) in cpython calls 'abc'.decode('ascii', 'strict').encode(...) diff --git a/pypy/objspace/std/bytesobject.py b/pypy/objspace/std/bytesobject.py --- a/pypy/objspace/std/bytesobject.py +++ b/pypy/objspace/std/bytesobject.py @@ -465,6 +465,10 @@ raise oefmt(space.w_TypeError, "Cannot use string as modifiable buffer") +def descr_encode(self, space, w_encoding=None, w_errors=None): +w_uni = self.descr_decode(space, space.newtext('ascii'), space.newtext('strict')) +return space.call_method(w_uni, 'encode', w_encoding, w_errors) + def descr_getbuffer(self, space, w_flags): #from pypy.objspace.std.bufferobject import W_Buffer #return W_Buffer(StringBuffer(self._value)) @@ -869,7 +873,7 @@ center = interpindirect2app(W_AbstractBytesObject.descr_center), count = interpindirect2app(W_AbstractBytesObject.descr_count), decode = interpindirect2app(W_AbstractBytesObject.descr_decode), -encode = interpindirect2app(W_AbstractBytesObject.descr_encode), +encode = interpindirect2app(W_BytesObject.descr_encode), expandtabs = interpindirect2app(W_AbstractBytesObject.descr_expandtabs), find = interpindirect2app(W_AbstractBytesObject.descr_find), rfind = interpindirect2app(W_AbstractBytesObject.descr_rfind), diff --git a/pypy/objspace/std/test/test_bytesobject.py b/pypy/objspace/std/test/test_bytesobject.py --- a/pypy/objspace/std/test/test_bytesobject.py +++ b/pypy/objspace/std/test/test_bytesobject.py @@ -812,6 +812,11 @@ def test_encode(self): assert 'hello'.encode() == 'hello' assert type('hello'.encode()) is str +s = 'hello \xf8 world' +# CPython first decodes the bytes, then encodes +exc = raises(UnicodeDecodeError, s.encode, 'ascii') +assert str(exc.value) == ("'ascii' codec can't decode byte 0xf8" +" in position 6: ordinal not in range(128)") def test_hash(self): # check that we have the same hash as CPython for at least 31 bits ___ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit
[pypy-commit] pypy unicode-utf8: str_w uses ascii encoding
Author: Matti Picus Branch: unicode-utf8 Changeset: r95543:b8815fb0c04d Date: 2018-12-25 22:32 +0200 http://bitbucket.org/pypy/pypy/changeset/b8815fb0c04d/ Log:str_w uses ascii encoding diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -87,7 +87,7 @@ return space.newint(uid) def str_w(self, space): -return space.text_w(encode_object(space, self, 'utf8', 'strict')) +return space.text_w(encode_object(space, self, 'ascii', 'strict')) def utf8_w(self, space): return self._utf8 ___ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit
[pypy-commit] pypy unicode-utf8: raise for non-ascii.__mod__(unicode)
Author: Matti Picus Branch: unicode-utf8 Changeset: r95542:d9ad50294bd6 Date: 2018-12-25 22:32 +0200 http://bitbucket.org/pypy/pypy/changeset/d9ad50294bd6/ Log:raise for non-ascii.__mod__(unicode) diff --git a/pypy/objspace/std/formatting.py b/pypy/objspace/std/formatting.py --- a/pypy/objspace/std/formatting.py +++ b/pypy/objspace/std/formatting.py @@ -434,6 +434,16 @@ got_unicode = space.isinstance_w(w_value, space.w_unicode) if not do_unicode: if got_unicode: +# Make sure the format string is ascii encodable +try: +self.fmt.decode('ascii') +except UnicodeDecodeError as e: +raise OperationError(space.w_UnicodeDecodeError, +space.newtuple([space.newtext('ascii'), +space.newbytes(self.fmt), +space.newint(e.start), +space.newint(e.end), +space.newtext(e.message)])) raise NeedUnicodeFormattingError s = self.string_formatting(w_value) else: ___ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit
[pypy-commit] pypy unicode-utf8: disable seemingly invalid test, confirmation needed
Author: Matti Picus Branch: unicode-utf8 Changeset: r95545:ad4d65746c50 Date: 2018-12-31 13:40 +0200 http://bitbucket.org/pypy/pypy/changeset/ad4d65746c50/ Log:disable seemingly invalid test, confirmation needed diff --git a/pypy/interpreter/test/test_gateway.py b/pypy/interpreter/test/test_gateway.py --- a/pypy/interpreter/test/test_gateway.py +++ b/pypy/interpreter/test/test_gateway.py @@ -555,12 +555,13 @@ w(None)) raises(gateway.OperationError, space.call_function, w_app_g3_u, w(42)) -w_ascii = space.appexec([], """(): -import sys -return sys.getdefaultencoding() == 'ascii'""") -if space.is_true(w_ascii): -raises(gateway.OperationError, space.call_function, w_app_g3_u, - w("\x80")) +# XXX this part of the test seems wrong, why would "\x80" fail? +# w_ascii = space.appexec([], """(): +# import sys +# return sys.getdefaultencoding() == 'ascii'""") +# if space.is_true(w_ascii): +# raises(gateway.OperationError, space.call_function, w_app_g3_u, +#w("\x80")) def test_interp2app_unwrap_spec_unwrapper(self): space = self.space ___ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit