Author: fijal
Branch: unicode-utf8
Changeset: r90254:29c62a133d25
Date: 2017-02-21 13:04 +0100
http://bitbucket.org/pypy/pypy/changeset/29c62a133d25/
Log: fix the nomenclature and put a few stop gaps in places we know need
fixing
diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py
--- a/pypy/interpreter/baseobjspace.py
+++ b/pypy/interpreter/baseobjspace.py
@@ -261,7 +261,7 @@
def str_w(self, space):
self._typed_unwrap_error(space, "string")
- def unicode_w(self, space):
+ def utf8_w(self, space):
self._typed_unwrap_error(space, "unicode")
def bytearray_list_of_chars_w(self, space):
@@ -1673,8 +1673,8 @@
raise oefmt(self.w_TypeError, "argument must be a string")
return self.bytes_w(w_obj)
- def unicode_w(self, w_obj):
- return w_obj.unicode_w(self)
+ def utf8_w(self, w_obj):
+ return w_obj.utf8_w(self)
def unicode0_w(self, w_obj):
"Like unicode_w, but rejects strings with NUL bytes."
diff --git a/pypy/objspace/std/bytesobject.py b/pypy/objspace/std/bytesobject.py
--- a/pypy/objspace/std/bytesobject.py
+++ b/pypy/objspace/std/bytesobject.py
@@ -16,7 +16,7 @@
from pypy.objspace.std.formatting import mod_format
from pypy.objspace.std.stringmethods import StringMethods
from pypy.objspace.std.unicodeobject import (
- decode_object, unicode_from_encoded_object,
+ decode_object, utf8_from_encoded_object,
getdefaultencoding)
from pypy.objspace.std.util import IDTAG_SPECIAL, IDTAG_SHIFT
diff --git a/pypy/objspace/std/unicodeobject.py
b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -59,9 +59,9 @@
return True
if self.user_overridden_class or w_other.user_overridden_class:
return False
- s1 = space.unicode_w(self)
- s2 = space.unicode_w(w_other)
- if len(s2) > 1:
+ s1 = space.utf8_w(self)
+ s2 = space.utf8_w(w_other)
+ if len(s2) > 2:
return s1 is s2
else: # strings of len <= 1 are unique-ified
return s1 == s2
@@ -135,11 +135,10 @@
return w_other._utf8
if space.isinstance_w(w_other, space.w_bytes):
return utf8_from_string(space, w_other)._utf8
- yyy
if strict:
raise oefmt(space.w_TypeError,
"%s arg must be None, unicode or str", strict)
- return unicode_from_encoded_object(
+ return utf8_from_encoded_object(
space, w_other, None, "strict")._value
def _chr(self, char):
@@ -509,6 +508,7 @@
if encoding == 'ascii':
# XXX error handling
s = space.charbuf_w(w_obj)
+ xxx
try:
u = fast_str_decode_ascii(s)
except ValueError:
@@ -517,11 +517,13 @@
s, len(s), None, final=True, errorhandler=eh)[0]
return space.newunicode(u)
if encoding == 'utf-8':
+ yyy
s = space.charbuf_w(w_obj)
eh = unicodehelper.decode_error_handler(space)
return space.newunicode(str_decode_utf_8(
s, len(s), None, final=True, errorhandler=eh,
allow_surrogates=True)[0])
+ xxx
w_codecs = space.getbuiltinmodule("_codecs")
w_decode = space.getattr(w_codecs, space.newtext("decode"))
if errors is None:
@@ -532,7 +534,7 @@
return w_retval
-def unicode_from_encoded_object(space, w_obj, encoding, errors):
+def utf8_from_encoded_object(space, w_obj, encoding, errors):
# explicitly block bytearray on 2.7
from .bytearrayobject import W_BytearrayObject
if isinstance(w_obj, W_BytearrayObject):
@@ -580,7 +582,7 @@
check_ascii(s)
except AsciiCheckError:
# raising UnicodeDecodeError is messy, "please crash for me"
- return unicode_from_encoded_object(space, w_bytes, "ascii", "strict")
+ return utf8_from_encoded_object(space, w_bytes, "ascii", "strict")
return W_UnicodeObject(s)
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit