Author: Matti Picus <[email protected]>
Branch: unicode-utf8-py3
Changeset: r94823:a50ac22defed
Date: 2018-07-02 14:40 -0500
http://bitbucket.org/pypy/pypy/changeset/a50ac22defed/
Log: repurpose realunicode_w to differentiate between bytes and str
diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py
--- a/pypy/interpreter/baseobjspace.py
+++ b/pypy/interpreter/baseobjspace.py
@@ -1716,7 +1716,7 @@
return w_obj.convert_to_w_unicode(self)
def realunicode_w(self, w_obj):
- return w_obj.utf8_w(self).decode('utf8')
+ return w_obj.realunicode_w(self)
def utf8_0_w(self, w_obj):
"Like utf8_w, but rejects strings with NUL bytes."
diff --git a/pypy/interpreter/unicodehelper.py
b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -110,8 +110,8 @@
# instead
from pypy.module._codecs.locale import (
unicode_encode_locale_surrogateescape)
- uni = space.utf8_w(w_uni)
- if b'\x00' in uni:
+ uni = space.realunicode_w(w_uni)
+ if u'\x00' in uni:
raise oefmt(space.w_ValueError, "embedded null character")
bytes = unicode_encode_locale_surrogateescape(
uni, errorhandler=encode_error_handler(space))
diff --git a/pypy/module/_csv/interp_reader.py
b/pypy/module/_csv/interp_reader.py
--- a/pypy/module/_csv/interp_reader.py
+++ b/pypy/module/_csv/interp_reader.py
@@ -73,7 +73,7 @@
break
raise
self.line_num += 1
- line = space.utf8_w(w_line)
+ line = space.realunicode_w(w_line)
for c in line:
if c == b'\0':
raise self.error(u"line contains NULL byte")
diff --git a/pypy/objspace/fake/objspace.py b/pypy/objspace/fake/objspace.py
--- a/pypy/objspace/fake/objspace.py
+++ b/pypy/objspace/fake/objspace.py
@@ -218,6 +218,7 @@
def newutf8(self, x, l):
return w_some_obj()
+ @specialize.argtype(1)
def newtext(self, x):
return w_some_obj()
newtext_or_none = newtext
diff --git a/pypy/objspace/std/bytesobject.py b/pypy/objspace/std/bytesobject.py
--- a/pypy/objspace/std/bytesobject.py
+++ b/pypy/objspace/std/bytesobject.py
@@ -418,6 +418,10 @@
def bytes_w(self, space):
return self._value
+ def realunicode_w(self, space):
+ raise oefmt(space.w_TypeError,
+ "unicode object expected, received bytes instead")
+
def utf8_w(self, space):
# Use the default encoding.
encoding = getdefaultencoding(space)
diff --git a/pypy/objspace/std/unicodeobject.py
b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -54,6 +54,10 @@
"""representation for debugging purposes"""
return "%s(%r)" % (self.__class__.__name__, self._utf8)
+ def unwrap(self, space):
+ # for testing
+ return self.realunicode_w(space)
+
def is_w(self, space, w_other):
if not isinstance(w_other, W_UnicodeObject):
return False
@@ -87,20 +91,8 @@
def utf8_w(self, space):
return self._utf8
- def text_w(self, space):
- try:
- identifier = jit.conditional_call_elidable(
- self._utf8, g_encode_utf8, self._length)
- except SurrogateError as e:
- raise OperationError(space.w_UnicodeEncodeError,
- space.newtuple([space.newtext('utf-8'),
- self,
- space.newint(e.index-1),
- space.newint(e.index),
- space.newtext("surrogates not allowed")]))
- if not jit.isconstant(self):
- self._utf8 = identifier
- return identifier
+ def realunicode_w(self, space):
+ return self._utf8.decode('utf8')
def listview_utf8(self):
assert self.is_ascii()
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit