Author: Matti Picus <matti.pi...@gmail.com> Branch: unicode-utf8-py3 Changeset: r94823:a50ac22defed Date: 2018-07-02 14:40 -0500 http://bitbucket.org/pypy/pypy/changeset/a50ac22defed/
Log: repurpose realunicode_w to differentiate between bytes and str diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py --- a/pypy/interpreter/baseobjspace.py +++ b/pypy/interpreter/baseobjspace.py @@ -1716,7 +1716,7 @@ return w_obj.convert_to_w_unicode(self) def realunicode_w(self, w_obj): - return w_obj.utf8_w(self).decode('utf8') + return w_obj.realunicode_w(self) def utf8_0_w(self, w_obj): "Like utf8_w, but rejects strings with NUL bytes." diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -110,8 +110,8 @@ # instead from pypy.module._codecs.locale import ( unicode_encode_locale_surrogateescape) - uni = space.utf8_w(w_uni) - if b'\x00' in uni: + uni = space.realunicode_w(w_uni) + if u'\x00' in uni: raise oefmt(space.w_ValueError, "embedded null character") bytes = unicode_encode_locale_surrogateescape( uni, errorhandler=encode_error_handler(space)) diff --git a/pypy/module/_csv/interp_reader.py b/pypy/module/_csv/interp_reader.py --- a/pypy/module/_csv/interp_reader.py +++ b/pypy/module/_csv/interp_reader.py @@ -73,7 +73,7 @@ break raise self.line_num += 1 - line = space.utf8_w(w_line) + line = space.realunicode_w(w_line) for c in line: if c == b'\0': raise self.error(u"line contains NULL byte") diff --git a/pypy/objspace/fake/objspace.py b/pypy/objspace/fake/objspace.py --- a/pypy/objspace/fake/objspace.py +++ b/pypy/objspace/fake/objspace.py @@ -218,6 +218,7 @@ def newutf8(self, x, l): return w_some_obj() + @specialize.argtype(1) def newtext(self, x): return w_some_obj() newtext_or_none = newtext diff --git a/pypy/objspace/std/bytesobject.py b/pypy/objspace/std/bytesobject.py --- a/pypy/objspace/std/bytesobject.py +++ b/pypy/objspace/std/bytesobject.py @@ -418,6 +418,10 @@ def bytes_w(self, space): return self._value + def realunicode_w(self, space): + raise oefmt(space.w_TypeError, + "unicode object expected, received bytes instead") + def utf8_w(self, space): # Use the default encoding. encoding = getdefaultencoding(space) diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -54,6 +54,10 @@ """representation for debugging purposes""" return "%s(%r)" % (self.__class__.__name__, self._utf8) + def unwrap(self, space): + # for testing + return self.realunicode_w(space) + def is_w(self, space, w_other): if not isinstance(w_other, W_UnicodeObject): return False @@ -87,20 +91,8 @@ def utf8_w(self, space): return self._utf8 - def text_w(self, space): - try: - identifier = jit.conditional_call_elidable( - self._utf8, g_encode_utf8, self._length) - except SurrogateError as e: - raise OperationError(space.w_UnicodeEncodeError, - space.newtuple([space.newtext('utf-8'), - self, - space.newint(e.index-1), - space.newint(e.index), - space.newtext("surrogates not allowed")])) - if not jit.isconstant(self): - self._utf8 = identifier - return identifier + def realunicode_w(self, space): + return self._utf8.decode('utf8') def listview_utf8(self): assert self.is_ascii() _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit