[pypy-commit] pypy unicode-utf8: fix the nomenclature and put a few stop gaps in places we know need fixing

fijal Tue, 21 Feb 2017 04:10:54 -0800

Author: fijal
Branch: unicode-utf8
Changeset: r90254:29c62a133d25
Date: 2017-02-21 13:04 +0100
http://bitbucket.org/pypy/pypy/changeset/29c62a133d25/


Log:    fix the nomenclature and put a few stop gaps in places we know need
        fixing

diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py
--- a/pypy/interpreter/baseobjspace.py
+++ b/pypy/interpreter/baseobjspace.py
@@ -261,7 +261,7 @@
     def str_w(self, space):
         self._typed_unwrap_error(space, "string")
 
-    def unicode_w(self, space):
+    def utf8_w(self, space):
         self._typed_unwrap_error(space, "unicode")
 
     def bytearray_list_of_chars_w(self, space):
@@ -1673,8 +1673,8 @@
             raise oefmt(self.w_TypeError, "argument must be a string")
         return self.bytes_w(w_obj)
 
-    def unicode_w(self, w_obj):
-        return w_obj.unicode_w(self)
+    def utf8_w(self, w_obj):
+        return w_obj.utf8_w(self)
 
     def unicode0_w(self, w_obj):
         "Like unicode_w, but rejects strings with NUL bytes."
diff --git a/pypy/objspace/std/bytesobject.py b/pypy/objspace/std/bytesobject.py
--- a/pypy/objspace/std/bytesobject.py
+++ b/pypy/objspace/std/bytesobject.py
@@ -16,7 +16,7 @@
 from pypy.objspace.std.formatting import mod_format
 from pypy.objspace.std.stringmethods import StringMethods
 from pypy.objspace.std.unicodeobject import (
-    decode_object, unicode_from_encoded_object,
+    decode_object, utf8_from_encoded_object,
     getdefaultencoding)
 from pypy.objspace.std.util import IDTAG_SPECIAL, IDTAG_SHIFT
 
diff --git a/pypy/objspace/std/unicodeobject.py 
b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -59,9 +59,9 @@
             return True
         if self.user_overridden_class or w_other.user_overridden_class:
             return False
-        s1 = space.unicode_w(self)
-        s2 = space.unicode_w(w_other)
-        if len(s2) > 1:
+        s1 = space.utf8_w(self)
+        s2 = space.utf8_w(w_other)
+        if len(s2) > 2:
             return s1 is s2
         else:            # strings of len <= 1 are unique-ified
             return s1 == s2
@@ -135,11 +135,10 @@
             return w_other._utf8
         if space.isinstance_w(w_other, space.w_bytes):
             return utf8_from_string(space, w_other)._utf8
-        yyy
         if strict:
             raise oefmt(space.w_TypeError,
                 "%s arg must be None, unicode or str", strict)
-        return unicode_from_encoded_object(
+        return utf8_from_encoded_object(
             space, w_other, None, "strict")._value
 
     def _chr(self, char):
@@ -509,6 +508,7 @@
         if encoding == 'ascii':
             # XXX error handling
             s = space.charbuf_w(w_obj)
+            xxx
             try:
                 u = fast_str_decode_ascii(s)
             except ValueError:
@@ -517,11 +517,13 @@
                     s, len(s), None, final=True, errorhandler=eh)[0]
             return space.newunicode(u)
         if encoding == 'utf-8':
+            yyy
             s = space.charbuf_w(w_obj)
             eh = unicodehelper.decode_error_handler(space)
             return space.newunicode(str_decode_utf_8(
                     s, len(s), None, final=True, errorhandler=eh,
                     allow_surrogates=True)[0])
+    xxx
     w_codecs = space.getbuiltinmodule("_codecs")
     w_decode = space.getattr(w_codecs, space.newtext("decode"))
     if errors is None:
@@ -532,7 +534,7 @@
     return w_retval
 
 
-def unicode_from_encoded_object(space, w_obj, encoding, errors):
+def utf8_from_encoded_object(space, w_obj, encoding, errors):
     # explicitly block bytearray on 2.7
     from .bytearrayobject import W_BytearrayObject
     if isinstance(w_obj, W_BytearrayObject):
@@ -580,7 +582,7 @@
         check_ascii(s)
     except AsciiCheckError:
         # raising UnicodeDecodeError is messy, "please crash for me"
-        return unicode_from_encoded_object(space, w_bytes, "ascii", "strict")
+        return utf8_from_encoded_object(space, w_bytes, "ascii", "strict")
     return W_UnicodeObject(s)
 
 
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

[pypy-commit] pypy unicode-utf8: fix the nomenclature and put a few stop gaps in places we know need fixing

Reply via email to