Author: Richard Plangger <planri...@gmail.com>
Branch: py3.5
Changeset: r90414:ee44cdf8b435
Date: 2017-02-28 11:36 +0100
http://bitbucket.org/pypy/pypy/changeset/ee44cdf8b435/

Log:    merge py3.5-text-utf8

diff --git a/pypy/module/imp/importing.py b/pypy/module/imp/importing.py
--- a/pypy/module/imp/importing.py
+++ b/pypy/module/imp/importing.py
@@ -67,7 +67,7 @@
 lib_pypy = os.path.join(os.path.dirname(__file__),
                         '..', '..', '..', 'lib_pypy')
 
-@unwrap_spec(modulename='text0', level=int)
+@unwrap_spec(modulename='fsencode', level=int)
 def importhook(space, modulename, w_globals=None, w_locals=None, 
w_fromlist=None, level=0):
     # A minimal version, that can only import builtin and lib_pypy modules!
     assert w_locals is w_globals
diff --git a/pypy/module/imp/interp_imp.py b/pypy/module/imp/interp_imp.py
--- a/pypy/module/imp/interp_imp.py
+++ b/pypy/module/imp/interp_imp.py
@@ -5,6 +5,7 @@
 from pypy.interpreter.pycode import PyCode
 from pypy.module._io.interp_iobase import W_IOBase
 from pypy.interpreter.streamutil import wrap_streamerror
+from pypy.interpreter.error import OperationError
 
 
 def extension_suffixes(space):
@@ -72,7 +73,11 @@
     return None
 
 def is_builtin(space, w_name):
-    name = space.text0_w(w_name)
+    try:
+        name = space.text0_w(w_name)
+    except OperationError:
+        return space.newint(0)
+
     if name not in space.builtin_modules:
         return space.newint(0)
     if space.finditem(space.sys.get('modules'), w_name) is not None:
diff --git a/pypy/module/imp/test/test_import.py 
b/pypy/module/imp/test/test_import.py
--- a/pypy/module/imp/test/test_import.py
+++ b/pypy/module/imp/test/test_import.py
@@ -255,6 +255,10 @@
     def test_import_keywords(self):
         __import__(name='sys', level=0)
 
+    def test_import_nonutf8_encodable(self):
+        exc = raises(ImportError, __import__, '\ud800')
+        assert exc.value.args[0].startswith("No module named ")
+
     def test_import_by_filename(self):
         import pkg.a
         filename = pkg.a.__file__
diff --git a/pypy/objspace/std/test/test_unicodeobject.py 
b/pypy/objspace/std/test/test_unicodeobject.py
--- a/pypy/objspace/std/test/test_unicodeobject.py
+++ b/pypy/objspace/std/test/test_unicodeobject.py
@@ -35,8 +35,7 @@
         w_uni = space.wrap(u'abcd')
         assert space.text_w(w_uni) == 'abcd'
         w_uni = space.wrap(unichr(0xd921) + unichr(0xdddd))
-        assert space.text_w(w_uni) == '\xed\xa4\xa1\xed\xb7\x9d'
-        #                             ^^^ and not the 4-bytes combined 
character
+        raises(UnicodeEncodeError, space.text_w, w_uni)
 
 
 class AppTestUnicodeStringStdOnly:
diff --git a/pypy/objspace/std/unicodeobject.py 
b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -8,7 +8,7 @@
 from rpython.rlib.runicode import (
     make_unicode_escape_function, str_decode_ascii, str_decode_utf_8,
     unicode_encode_ascii, unicode_encode_utf_8, fast_str_decode_ascii,
-    unicode_encode_utf8sp)
+    unicode_encode_utf8_forbid_surrogates, SurrogateError)
 from rpython.rlib import jit
 
 from pypy.interpreter import unicodehelper
@@ -81,8 +81,16 @@
         return self._value
 
     def text_w(self, space):
-        identifier = jit.conditional_call_elidable(
-                            self._utf8, g_encode_utf8, self._value)
+        try:
+            identifier = jit.conditional_call_elidable(
+                                self._utf8, g_encode_utf8, self._value)
+        except SurrogateError as e:
+            raise OperationError(space.w_UnicodeEncodeError,
+                    space.newtuple([space.newtext('utf-8'),
+                                    self,
+                                    space.newint(e.index-1),
+                                    space.newint(e.index),
+                                    space.newtext("surrogates not allowed")]))
         if not jit.isconstant(self):
             self._utf8 = identifier
         return identifier
@@ -1257,7 +1265,7 @@
 @jit.elidable
 def g_encode_utf8(value):
     """This is a global function because of jit.conditional_call_value"""
-    return unicode_encode_utf8sp(value, len(value))
+    return unicode_encode_utf8_forbid_surrogates(value, len(value))
 
 _repr_function, _ = make_unicode_escape_function(
     pass_printable=True, unicode_output=True, quotes=True, prefix='')
diff --git a/rpython/rlib/runicode.py b/rpython/rlib/runicode.py
--- a/rpython/rlib/runicode.py
+++ b/rpython/rlib/runicode.py
@@ -433,7 +433,9 @@
     return result.build()
 
 class SurrogateError(Exception):
-    pass
+    def __init__(self, char, index):
+        self.char = char
+        self.index = index
 
 def unicode_encode_utf8_forbid_surrogates(s, size):
     # Strict surrogate-forbidding utf-8 encoding.  Any surrogate character
@@ -454,7 +456,7 @@
             result.append(chr((0x80 | (ch & 0x3f))))
         elif ch < 0x10000:
             if 0xD800 <= ch <= 0xDFFF:
-                raise SurrogateError
+                raise SurrogateError(ch, pos)
             # Encode UCS2 Unicode ordinals
             result.append((chr((0xe0 | (ch >> 12)))))
             result.append((chr((0x80 | ((ch >> 6) & 0x3f)))))
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to