[pypy-commit] pypy unicode-utf8: bytes.__mod__(unicode) must decode bytes as ascii

2018-12-31 Thread mattip
Author: Matti Picus 
Branch: unicode-utf8
Changeset: r95544:cc42e48c8a51
Date: 2018-12-26 08:20 +0200
http://bitbucket.org/pypy/pypy/changeset/cc42e48c8a51/

Log:bytes.__mod__(unicode) must decode bytes as ascii

diff --git a/pypy/objspace/std/formatting.py b/pypy/objspace/std/formatting.py
--- a/pypy/objspace/std/formatting.py
+++ b/pypy/objspace/std/formatting.py
@@ -11,6 +11,7 @@
 from rpython.tool.sourcetools import func_with_new_name
 
 from pypy.interpreter.error import OperationError, oefmt
+from pypy.interpreter.unicodehelper import check_ascii_or_raise
 
 
 class BaseStringFormatter(object):
@@ -435,15 +436,7 @@
 if not do_unicode:
 if got_unicode:
 # Make sure the format string is ascii encodable
-try:
-self.fmt.decode('ascii')
-except UnicodeDecodeError as e:
-raise OperationError(space.w_UnicodeDecodeError,
-space.newtuple([space.newtext('ascii'),
-space.newbytes(self.fmt),
-space.newint(e.start),
-space.newint(e.end),
-space.newtext(e.message)]))
+check_ascii_or_raise(space, self.fmt)
 raise NeedUnicodeFormattingError
 s = self.string_formatting(w_value)
 else:
___
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit


[pypy-commit] pypy unicode-utf8: mrege default into branch

2018-12-31 Thread mattip
Author: Matti Picus 
Branch: unicode-utf8
Changeset: r95547:aa0b6372c139
Date: 2019-01-01 08:45 +0200
http://bitbucket.org/pypy/pypy/changeset/aa0b6372c139/

Log:mrege default into branch

diff --git a/pypy/objspace/std/test/test_unicodeobject.py 
b/pypy/objspace/std/test/test_unicodeobject.py
--- a/pypy/objspace/std/test/test_unicodeobject.py
+++ b/pypy/objspace/std/test/test_unicodeobject.py
@@ -1231,9 +1231,8 @@
 assert type(unicode(z)) is unicode
 assert unicode(z) == u'foobaz'
 #
-# two completely corner cases where we differ from CPython:
-#assert unicode(encoding='supposedly_the_encoding') == u''
-#assert unicode(errors='supposedly_the_error') == u''
+assert unicode(encoding='supposedly_the_encoding') == u''
+assert unicode(errors='supposedly_the_error') == u''
 e = raises(TypeError, unicode, u'', 'supposedly_the_encoding')
 assert str(e.value) == 'decoding Unicode is not supported'
 e = raises(TypeError, unicode, u'', errors='supposedly_the_error')
diff --git a/pypy/objspace/std/unicodeobject.py 
b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -197,23 +197,20 @@
 return unicodedb.islinebreak(ch)
 
 @staticmethod
-@unwrap_spec(w_string=WrappedDefault(""))
-def descr_new(space, w_unicodetype, w_string, w_encoding=None,
+def descr_new(space, w_unicodetype, w_string=None, w_encoding=None,
   w_errors=None):
-# NB. the default value of w_obj is really a *wrapped* empty string:
-# there is gateway magic at work
-w_obj = w_string
-
 encoding, errors = _get_encoding_and_errors(space, w_encoding,
 w_errors)
-if encoding is None and errors is None:
-# this is very quick if w_obj is already a w_unicode
-w_value = unicode_from_object(space, w_obj)
+if w_string is None:
+w_value = W_UnicodeObject.EMPTY
+elif encoding is None and errors is None:
+# this is very quick if w_string is already a w_unicode
+w_value = unicode_from_object(space, w_string)
 else:
-if space.isinstance_w(w_obj, space.w_unicode):
+if space.isinstance_w(w_string, space.w_unicode):
 raise oefmt(space.w_TypeError,
 "decoding Unicode is not supported")
-w_value = unicode_from_encoded_object(space, w_obj,
+w_value = unicode_from_encoded_object(space, w_string,
   encoding, errors)
 if space.is_w(w_unicodetype, space.w_unicode):
 return w_value
___
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit


[pypy-commit] pypy unicode-utf8: merge default into branch

2018-12-31 Thread mattip
Author: Matti Picus 
Branch: unicode-utf8
Changeset: r95541:c09e504c21c6
Date: 2018-12-25 21:01 +0200
http://bitbucket.org/pypy/pypy/changeset/c09e504c21c6/

Log:merge default into branch

diff --git a/extra_tests/cffi_tests/cffi0/test_ffi_backend.py 
b/extra_tests/cffi_tests/cffi0/test_ffi_backend.py
--- a/extra_tests/cffi_tests/cffi0/test_ffi_backend.py
+++ b/extra_tests/cffi_tests/cffi0/test_ffi_backend.py
@@ -327,6 +327,16 @@
 assert ffi.typeof(c) is ffi.typeof("char[]")
 ffi.cast("unsigned short *", c)[1] += 500
 assert list(a) == [1, 20500, 3]
+assert c == ffi.from_buffer(a, True)
+assert c == ffi.from_buffer(a, require_writable=True)
+#
+p = ffi.from_buffer(b"abcd")
+assert p[2] == b"c"
+#
+assert p == ffi.from_buffer(b"abcd", False)
+py.test.raises((TypeError, BufferError), ffi.from_buffer, b"abcd", 
True)
+py.test.raises((TypeError, BufferError), ffi.from_buffer, b"abcd",
+ require_writable=True)
 
 def test_memmove(self):
 ffi = FFI()
diff --git a/extra_tests/cffi_tests/cffi1/test_ffi_obj.py 
b/extra_tests/cffi_tests/cffi1/test_ffi_obj.py
--- a/extra_tests/cffi_tests/cffi1/test_ffi_obj.py
+++ b/extra_tests/cffi_tests/cffi1/test_ffi_obj.py
@@ -244,6 +244,16 @@
 assert ffi.typeof(c) is ffi.typeof("char[]")
 ffi.cast("unsigned short *", c)[1] += 500
 assert list(a) == [1, 20500, 3]
+assert c == ffi.from_buffer(a, True)
+assert c == ffi.from_buffer(a, require_writable=True)
+#
+p = ffi.from_buffer(b"abcd")
+assert p[2] == b"c"
+#
+assert p == ffi.from_buffer(b"abcd", False)
+py.test.raises((TypeError, BufferError), ffi.from_buffer, b"abcd", True)
+py.test.raises((TypeError, BufferError), ffi.from_buffer, b"abcd",
+ require_writable=True)
 
 def test_memmove():
 ffi = _cffi1_backend.FFI()
diff --git a/extra_tests/cffi_tests/cffi1/test_new_ffi_1.py 
b/extra_tests/cffi_tests/cffi1/test_new_ffi_1.py
--- a/extra_tests/cffi_tests/cffi1/test_new_ffi_1.py
+++ b/extra_tests/cffi_tests/cffi1/test_new_ffi_1.py
@@ -1654,6 +1654,16 @@
 assert ffi.typeof(c) is ffi.typeof("char[]")
 ffi.cast("unsigned short *", c)[1] += 500
 assert list(a) == [1, 20500, 3]
+assert c == ffi.from_buffer(a, True)
+assert c == ffi.from_buffer(a, require_writable=True)
+#
+p = ffi.from_buffer(b"abcd")
+assert p[2] == b"c"
+#
+assert p == ffi.from_buffer(b"abcd", False)
+py.test.raises((TypeError, BufferError), ffi.from_buffer, b"abcd", 
True)
+py.test.raises((TypeError, BufferError), ffi.from_buffer, b"abcd",
+ require_writable=True)
 
 def test_all_primitives(self):
 assert set(PRIMITIVE_TO_INDEX) == set([
diff --git a/extra_tests/test_pyrepl/conftest.py 
b/extra_tests/test_pyrepl/conftest.py
new file mode 100644
--- /dev/null
+++ b/extra_tests/test_pyrepl/conftest.py
@@ -0,0 +1,8 @@
+import sys
+
+def pytest_ignore_collect(path):
+if '__pypy__' not in sys.builtin_module_names:
+try:
+import pyrepl
+except ImportError:
+return True
diff --git a/lib_pypy/cffi/api.py b/lib_pypy/cffi/api.py
--- a/lib_pypy/cffi/api.py
+++ b/lib_pypy/cffi/api.py
@@ -341,7 +341,7 @@
#"""
#note that 'buffer' is a type, set on this instance by __init__
 
-def from_buffer(self, python_buffer):
+def from_buffer(self, python_buffer, require_writable=False):
 """Return a  that points to the data of the
 given Python object, which must support the buffer interface.
 Note that this is not meant to be used on the built-in types
@@ -349,7 +349,8 @@
 but only on objects containing large quantities of raw data
 in some other format, like 'array.array' or numpy arrays.
 """
-return self._backend.from_buffer(self.BCharA, python_buffer)
+return self._backend.from_buffer(self.BCharA, python_buffer,
+ require_writable)
 
 def memmove(self, dest, src, n):
 """ffi.memmove(dest, src, n) copies n bytes of memory from src to dest.
diff --git a/pypy/doc/cpython_differences.rst b/pypy/doc/cpython_differences.rst
--- a/pypy/doc/cpython_differences.rst
+++ b/pypy/doc/cpython_differences.rst
@@ -394,8 +394,10 @@
   
 * some functions and attributes of the ``gc`` module behave in a
   slightly different way: for example, ``gc.enable`` and
-  ``gc.disable`` are supported, but instead of enabling and disabling
-  the GC, they just enable and disable the execution of finalizers.
+  ``gc.disable`` are supported, but "enabling and disabling the GC" has
+  a different meaning in PyPy than in CPython.  These functions
+  actually enable and disable the major collections and the
+  exe

[pypy-commit] pypy unicode-utf8: 'abc'.encode(...) in cpython calls 'abc'.decode('ascii', 'strict').encode(...)

2018-12-31 Thread mattip
Author: Matti Picus 
Branch: unicode-utf8
Changeset: r95546:8704e00eb624
Date: 2019-01-01 08:44 +0200
http://bitbucket.org/pypy/pypy/changeset/8704e00eb624/

Log:'abc'.encode(...) in cpython calls 'abc'.decode('ascii',
'strict').encode(...)

diff --git a/pypy/objspace/std/bytesobject.py b/pypy/objspace/std/bytesobject.py
--- a/pypy/objspace/std/bytesobject.py
+++ b/pypy/objspace/std/bytesobject.py
@@ -465,6 +465,10 @@
 raise oefmt(space.w_TypeError,
 "Cannot use string as modifiable buffer")
 
+def descr_encode(self, space, w_encoding=None, w_errors=None):
+w_uni = self.descr_decode(space, space.newtext('ascii'), 
space.newtext('strict'))
+return space.call_method(w_uni, 'encode', w_encoding, w_errors)
+
 def descr_getbuffer(self, space, w_flags):
 #from pypy.objspace.std.bufferobject import W_Buffer
 #return W_Buffer(StringBuffer(self._value))
@@ -869,7 +873,7 @@
 center = interpindirect2app(W_AbstractBytesObject.descr_center),
 count = interpindirect2app(W_AbstractBytesObject.descr_count),
 decode = interpindirect2app(W_AbstractBytesObject.descr_decode),
-encode = interpindirect2app(W_AbstractBytesObject.descr_encode),
+encode = interpindirect2app(W_BytesObject.descr_encode),
 expandtabs = interpindirect2app(W_AbstractBytesObject.descr_expandtabs),
 find = interpindirect2app(W_AbstractBytesObject.descr_find),
 rfind = interpindirect2app(W_AbstractBytesObject.descr_rfind),
diff --git a/pypy/objspace/std/test/test_bytesobject.py 
b/pypy/objspace/std/test/test_bytesobject.py
--- a/pypy/objspace/std/test/test_bytesobject.py
+++ b/pypy/objspace/std/test/test_bytesobject.py
@@ -812,6 +812,11 @@
 def test_encode(self):
 assert 'hello'.encode() == 'hello'
 assert type('hello'.encode()) is str
+s = 'hello \xf8 world'
+# CPython first decodes the bytes, then encodes
+exc = raises(UnicodeDecodeError, s.encode, 'ascii')
+assert str(exc.value) == ("'ascii' codec can't decode byte 0xf8"
+" in position 6: ordinal not in range(128)")
 
 def test_hash(self):
 # check that we have the same hash as CPython for at least 31 bits
___
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit


[pypy-commit] pypy unicode-utf8: str_w uses ascii encoding

2018-12-31 Thread mattip
Author: Matti Picus 
Branch: unicode-utf8
Changeset: r95543:b8815fb0c04d
Date: 2018-12-25 22:32 +0200
http://bitbucket.org/pypy/pypy/changeset/b8815fb0c04d/

Log:str_w uses ascii encoding

diff --git a/pypy/objspace/std/unicodeobject.py 
b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -87,7 +87,7 @@
 return space.newint(uid)
 
 def str_w(self, space):
-return space.text_w(encode_object(space, self, 'utf8', 'strict'))
+return space.text_w(encode_object(space, self, 'ascii', 'strict'))
 
 def utf8_w(self, space):
 return self._utf8
___
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit


[pypy-commit] pypy unicode-utf8: raise for non-ascii.__mod__(unicode)

2018-12-31 Thread mattip
Author: Matti Picus 
Branch: unicode-utf8
Changeset: r95542:d9ad50294bd6
Date: 2018-12-25 22:32 +0200
http://bitbucket.org/pypy/pypy/changeset/d9ad50294bd6/

Log:raise for non-ascii.__mod__(unicode)

diff --git a/pypy/objspace/std/formatting.py b/pypy/objspace/std/formatting.py
--- a/pypy/objspace/std/formatting.py
+++ b/pypy/objspace/std/formatting.py
@@ -434,6 +434,16 @@
 got_unicode = space.isinstance_w(w_value, space.w_unicode)
 if not do_unicode:
 if got_unicode:
+# Make sure the format string is ascii encodable
+try:
+self.fmt.decode('ascii')
+except UnicodeDecodeError as e:
+raise OperationError(space.w_UnicodeDecodeError,
+space.newtuple([space.newtext('ascii'),
+space.newbytes(self.fmt),
+space.newint(e.start),
+space.newint(e.end),
+space.newtext(e.message)]))
 raise NeedUnicodeFormattingError
 s = self.string_formatting(w_value)
 else:
___
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit


[pypy-commit] pypy unicode-utf8: disable seemingly invalid test, confirmation needed

2018-12-31 Thread mattip
Author: Matti Picus 
Branch: unicode-utf8
Changeset: r95545:ad4d65746c50
Date: 2018-12-31 13:40 +0200
http://bitbucket.org/pypy/pypy/changeset/ad4d65746c50/

Log:disable seemingly invalid test, confirmation needed

diff --git a/pypy/interpreter/test/test_gateway.py 
b/pypy/interpreter/test/test_gateway.py
--- a/pypy/interpreter/test/test_gateway.py
+++ b/pypy/interpreter/test/test_gateway.py
@@ -555,12 +555,13 @@
w(None))
 raises(gateway.OperationError, space.call_function, w_app_g3_u,
w(42))
-w_ascii = space.appexec([], """():
-import sys
-return sys.getdefaultencoding() == 'ascii'""")
-if space.is_true(w_ascii):
-raises(gateway.OperationError, space.call_function, w_app_g3_u,
-   w("\x80"))
+# XXX this part of the test seems wrong, why would "\x80" fail?
+# w_ascii = space.appexec([], """():
+# import sys
+# return sys.getdefaultencoding() == 'ascii'""")
+# if space.is_true(w_ascii):
+# raises(gateway.OperationError, space.call_function, w_app_g3_u,
+#w("\x80"))
 
 def test_interp2app_unwrap_spec_unwrapper(self):
 space = self.space
___
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit