[pypy-commit] pypy unicode-utf8-py3: repurpose realunicode_w to differentiate between bytes and str

mattip Sat, 07 Jul 2018 21:18:01 -0700

Author: Matti Picus <matti.pi...@gmail.com>
Branch: unicode-utf8-py3
Changeset: r94823:a50ac22defed
Date: 2018-07-02 14:40 -0500
http://bitbucket.org/pypy/pypy/changeset/a50ac22defed/


Log:    repurpose realunicode_w to differentiate between bytes and str

diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py
--- a/pypy/interpreter/baseobjspace.py
+++ b/pypy/interpreter/baseobjspace.py
@@ -1716,7 +1716,7 @@
         return w_obj.convert_to_w_unicode(self)
 
     def realunicode_w(self, w_obj):
-        return w_obj.utf8_w(self).decode('utf8')
+        return w_obj.realunicode_w(self)
 
     def utf8_0_w(self, w_obj):
         "Like utf8_w, but rejects strings with NUL bytes."
diff --git a/pypy/interpreter/unicodehelper.py 
b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -110,8 +110,8 @@
         # instead
         from pypy.module._codecs.locale import (
             unicode_encode_locale_surrogateescape)
-        uni = space.utf8_w(w_uni)
-        if b'\x00' in uni:
+        uni = space.realunicode_w(w_uni)
+        if u'\x00' in uni:
             raise oefmt(space.w_ValueError, "embedded null character")
         bytes = unicode_encode_locale_surrogateescape(
             uni, errorhandler=encode_error_handler(space))
diff --git a/pypy/module/_csv/interp_reader.py 
b/pypy/module/_csv/interp_reader.py
--- a/pypy/module/_csv/interp_reader.py
+++ b/pypy/module/_csv/interp_reader.py
@@ -73,7 +73,7 @@
                             break
                 raise
             self.line_num += 1
-            line = space.utf8_w(w_line)
+            line = space.realunicode_w(w_line)
             for c in line:
                 if c == b'\0':
                     raise self.error(u"line contains NULL byte")
diff --git a/pypy/objspace/fake/objspace.py b/pypy/objspace/fake/objspace.py
--- a/pypy/objspace/fake/objspace.py
+++ b/pypy/objspace/fake/objspace.py
@@ -218,6 +218,7 @@
     def newutf8(self, x, l):
         return w_some_obj()
 
+    @specialize.argtype(1)
     def newtext(self, x):
         return w_some_obj()
     newtext_or_none = newtext
diff --git a/pypy/objspace/std/bytesobject.py b/pypy/objspace/std/bytesobject.py
--- a/pypy/objspace/std/bytesobject.py
+++ b/pypy/objspace/std/bytesobject.py
@@ -418,6 +418,10 @@
     def bytes_w(self, space):
         return self._value
 
+    def realunicode_w(self, space):
+        raise oefmt(space.w_TypeError,
+                    "unicode object expected, received bytes instead")
+
     def utf8_w(self, space):
         # Use the default encoding.                                            
 
         encoding = getdefaultencoding(space)
diff --git a/pypy/objspace/std/unicodeobject.py 
b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -54,6 +54,10 @@
         """representation for debugging purposes"""
         return "%s(%r)" % (self.__class__.__name__, self._utf8)
 
+    def unwrap(self, space):
+        # for testing
+        return self.realunicode_w(space)
+
     def is_w(self, space, w_other):
         if not isinstance(w_other, W_UnicodeObject):
             return False
@@ -87,20 +91,8 @@
     def utf8_w(self, space):
         return self._utf8
 
-    def text_w(self, space):
-        try:
-            identifier = jit.conditional_call_elidable(
-                                self._utf8, g_encode_utf8, self._length)
-        except SurrogateError as e:
-            raise OperationError(space.w_UnicodeEncodeError,
-                    space.newtuple([space.newtext('utf-8'),
-                                    self,
-                                    space.newint(e.index-1),
-                                    space.newint(e.index),
-                                    space.newtext("surrogates not allowed")]))
-        if not jit.isconstant(self):
-            self._utf8 = identifier
-        return identifier
+    def realunicode_w(self, space):
+        return self._utf8.decode('utf8')
 
     def listview_utf8(self):
         assert self.is_ascii()
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit

[pypy-commit] pypy unicode-utf8-py3: repurpose realunicode_w to differentiate between bytes and str

Reply via email to