[pypy-commit] pypy unicode-utf8-py3: try making space.newtext accept only utf8

mattip Sun, 05 Aug 2018 12:18:01 -0700

Author: Matti Picus <matti.pi...@gmail.com>
Branch: unicode-utf8-py3
Changeset: r94953:1b6dfea3eef5
Date: 2018-08-05 12:16 -0700
http://bitbucket.org/pypy/pypy/changeset/1b6dfea3eef5/


Log:    try making space.newtext accept only utf8

diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py
--- a/pypy/interpreter/baseobjspace.py
+++ b/pypy/interpreter/baseobjspace.py
@@ -105,9 +105,9 @@
             w_id = space.rshift(w_id, w_4)
         return ''.join(addrstring)
 
-    def getrepr(self, space, info, moreinfo=u''):
-        addrstring = unicode(self.getaddrstring(space))
-        return space.newtext(u"<%s at 0x%s%s>" % (info, addrstring, moreinfo))
+    def getrepr(self, space, info, moreinfo=''):
+        addrstring = self.getaddrstring(space)
+        return space.newtext("<%s at 0x%s%s>" % (info, addrstring, moreinfo))
 
     def getslotvalue(self, index):
         raise NotImplementedError
diff --git a/pypy/interpreter/generator.py b/pypy/interpreter/generator.py
--- a/pypy/interpreter/generator.py
+++ b/pypy/interpreter/generator.py
@@ -42,10 +42,8 @@
 
     def descr__repr__(self, space):
         addrstring = self.getaddrstring(space)
-        return space.newtext(u"<%s object %s at 0x%s>" %
-                          (unicode(self.KIND),
-                           self.get_qualname(),
-                           unicode(addrstring)))
+        return space.newtext("<%s object %s at 0x%s>" %
+                          (self.KIND, self.get_qualname(), addrstring))
 
     def descr_send(self, w_arg):
         """send(arg) -> send 'arg' into generator/coroutine,
diff --git a/pypy/interpreter/pyopcode.py b/pypy/interpreter/pyopcode.py
--- a/pypy/interpreter/pyopcode.py
+++ b/pypy/interpreter/pyopcode.py
@@ -1628,7 +1628,7 @@
         if (oparg & consts.FVS_MASK) == consts.FVS_HAVE_SPEC:
             w_spec = self.popvalue()
         else:
-            w_spec = space.newtext(u'')
+            w_spec = space.newtext('')
         w_value = self.popvalue()
         #
         conversion = oparg & consts.FVC_MASK
@@ -1649,9 +1649,9 @@
         lst = []
         for i in range(itemcount-1, -1, -1):
             w_item = self.peekvalue(i)
-            lst.append(space.realunicode_w(w_item))
+            lst.append(space.utf8_w(w_item))
         self.dropvalues(itemcount)
-        w_res = space.newtext(u''.join(lst))
+        w_res = space.newtext(''.join(lst))
         self.pushvalue(w_res)
 
     def _revdb_load_var(self, oparg):
diff --git a/pypy/interpreter/unicodehelper.py 
b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -74,8 +74,8 @@
                               force_ignore=False)[0]
     elif _MACOSX:
         bytes = space.bytes_w(w_string)
-        uni = runicode.str_decode_utf_8_impl(
-            bytes, len(bytes), 'surrogateescape', final=True,
+        uni = str_decode_utf8(
+            bytes, 'surrogateescape', final=True,
             errorhandler=state.decode_error_handler,
             allow_surrogates=False)[0]
     elif space.sys.filesystemencoding is None or state.codec_need_encodings:
@@ -296,15 +296,13 @@
 
 if sys.platform == 'win32':
     def utf8_encode_mbcs(s, slen, errors, errorhandler):
-        from rpython.rlib import runicode
         s = s.decode('utf-8')
-        res = runicode.unicode_encode_mbcs(s, slen, errors, errorhandler)
+        res = unicode_encode_mbcs(s, slen, errors, errorhandler)
         return res
         
     def str_decode_mbcs(s, errors, final, errorhandler):
-        from rpython.rlib import runicode
         slen = len(s)
-        res, size = runicode.str_decode_mbcs(s, slen, final=final, 
errors=errors,
+        res, size = str_decode_mbcs(s, slen, final=final, errors=errors,
                                            errorhandler=errorhandler)
         return res.encode('utf8'), len(res)
 
diff --git a/pypy/module/__pypy__/interp_stderrprinter.py 
b/pypy/module/__pypy__/interp_stderrprinter.py
--- a/pypy/module/__pypy__/interp_stderrprinter.py
+++ b/pypy/module/__pypy__/interp_stderrprinter.py
@@ -16,8 +16,8 @@
         self.fd = fd
 
     def descr_repr(self, space):
-        addrstring = unicode(self.getaddrstring(space))
-        return space.newtext(u"<StdErrPrinter(fd=%d) object at 0x%s>" %
+        addrstring = self.getaddrstring(space)
+        return space.newtext("<StdErrPrinter(fd=%d) object at 0x%s>" %
                                 (self.fd, addrstring))
 
     def descr_noop(self, space):
diff --git a/pypy/module/_codecs/interp_codecs.py 
b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -3,7 +3,7 @@
 from rpython.rlib.objectmodel import we_are_translated, not_rpython
 from rpython.rlib.rstring import StringBuilder, UnicodeBuilder
 from rpython.rlib import runicode
-from rpython.rlib.runicode import ( raw_unicode_escape_helper_unicode)
+from rpython.rlib.runicode import raw_unicode_escape_helper_unicode
 from rpython.rlib import rutf8
 
 from pypy.interpreter.error import OperationError, oefmt
diff --git a/pypy/module/_io/interp_stringio.py 
b/pypy/module/_io/interp_stringio.py
--- a/pypy/module/_io/interp_stringio.py
+++ b/pypy/module/_io/interp_stringio.py
@@ -219,11 +219,13 @@
                 self.w_decoder, "decode", w_obj, space.w_True)
         else:
             w_decoded = w_obj
-        if self.writenl:
+        writenl = self.writenl
+        if writenl is not None:
             w_decoded = space.call_method(
                 w_decoded, "replace",
-                space.newtext("\n"), space.newutf8(self.writenl,
-                    get_utf8_length(self.writenl)))
+                space.newtext("\n"),
+                space.newutf8(writenl, get_utf8_length(writenl)),
+            )
         string = space.utf8_w(w_decoded)
         if string:
             self.buf.write(string)
diff --git a/pypy/module/_pypyjson/interp_decoder.py 
b/pypy/module/_pypyjson/interp_decoder.py
--- a/pypy/module/_pypyjson/interp_decoder.py
+++ b/pypy/module/_pypyjson/interp_decoder.py
@@ -1,7 +1,7 @@
 import sys
 from rpython.rlib.rstring import StringBuilder
 from rpython.rlib.objectmodel import specialize, always_inline, r_dict
-from rpython.rlib import rfloat, runicode, rutf8
+from rpython.rlib import rfloat, rutf8
 from rpython.rtyper.lltypesystem import lltype, rffi
 from pypy.interpreter.error import oefmt, OperationError
 from rpython.rlib.rarithmetic import r_uint
diff --git a/pypy/module/_rawffi/interp_rawffi.py 
b/pypy/module/_rawffi/interp_rawffi.py
--- a/pypy/module/_rawffi/interp_rawffi.py
+++ b/pypy/module/_rawffi/interp_rawffi.py
@@ -451,7 +451,7 @@
                 return space.newbytes(func(add_arg, argdesc, ll_type))
             elif c == 'u':
                 return space.newutf8(rutf8.unichr_as_utf8(
-                    ord(func(add_arg, argdesc, ll_type))), 1)
+                    r_uint(ord(func(add_arg, argdesc, ll_type)))), 1)
             elif c == 'f' or c == 'd' or c == 'g':
                 return space.newfloat(float(func(add_arg, argdesc, ll_type)))
             else:
@@ -615,6 +615,8 @@
 def wcharp2rawunicode(space, address, maxlength=-1):
     if maxlength == -1:
         return wcharp2unicode(space, address)
+    elif maxlength < 0:
+        maxlength = 0
     s = rffi.wcharpsize2utf8(rffi.cast(rffi.CWCHARP, address), maxlength)
     return space.newutf8(s, maxlength)
 
diff --git a/pypy/module/_sre/interp_sre.py b/pypy/module/_sre/interp_sre.py
--- a/pypy/module/_sre/interp_sre.py
+++ b/pypy/module/_sre/interp_sre.py
@@ -123,12 +123,12 @@
         if flags != 0:
             flag_items.append('0x%x' % flags)
         if len(flag_items) == 0:
-            usep = u''
-            uflags = u''
+            usep = ''
+            uflags = ''
         else:
-            usep = u', '
-            uflags = u'|'.join([item.decode('latin-1') for item in flag_items])
-        return space.newtext(u're.compile(%s%s%s)' % (u, usep, uflags))
+            usep = ', '
+            uflags = '|'.join(flag_items)
+        return space.newtext('re.compile(%s%s%s)' % (u, usep, uflags))
 
     def fget_groupindex(self, space):
         w_groupindex = self.w_groupindex
@@ -424,7 +424,7 @@
                 return space.newtext(unicodebuilder.build()), n
         else:
             if space.isinstance_w(w_string, space.w_unicode):
-                w_emptystr = space.newtext(u'')
+                w_emptystr = space.newtext('')
             else:
                 w_emptystr = space.newbytes('')
             w_item = space.call_method(w_emptystr, 'join',
@@ -528,10 +528,10 @@
         ctx = self.ctx
         start, end = ctx.match_start, ctx.match_end
         w_s = slice_w(space, ctx, start, end, space.w_None)
-        u = space.realunicode_w(space.repr(w_s))
+        u = space.utf8_w(space.repr(w_s))
         if len(u) > 50:
             u = u[:50]
-        return space.newtext(u'<_sre.SRE_Match object; span=(%d, %d), 
match=%s>' %
+        return space.newtext('<_sre.SRE_Match object; span=(%d, %d), 
match=%s>' %
                           (start, end, u))
 
     def cannot_copy_w(self):
diff --git a/pypy/module/_weakref/interp__weakref.py 
b/pypy/module/_weakref/interp__weakref.py
--- a/pypy/module/_weakref/interp__weakref.py
+++ b/pypy/module/_weakref/interp__weakref.py
@@ -178,7 +178,7 @@
     def descr__repr__(self, space):
         w_obj = self.dereference()
         if w_obj is None:
-            state = u'; dead'
+            state = '; dead'
         else:
             typename = space.type(w_obj).getname(space)
             objname = w_obj.getname(space)
@@ -186,7 +186,7 @@
                 state = "; to '%s' (%s)" % (typename, objname)
             else:
                 state = "; to '%s'" % (typename,)
-        return self.getrepr(space, unicode(self.typedef.name), state)
+        return self.getrepr(space, self.typedef.name, state)
 
 
 class W_Weakref(W_WeakrefBase):
diff --git a/pypy/module/_winreg/interp_winreg.py 
b/pypy/module/_winreg/interp_winreg.py
--- a/pypy/module/_winreg/interp_winreg.py
+++ b/pypy/module/_winreg/interp_winreg.py
@@ -33,7 +33,7 @@
         return space.newint(self.as_int())
 
     def descr_repr(self, space):
-        return space.newtext(u"<PyHKEY:0x%x>" % (self.as_int(),))
+        return space.newtext("<PyHKEY:0x%x>" % (self.as_int(),))
 
     def descr_int(self, space):
         return space.newint(self.as_int())
diff --git a/pypy/module/posix/interp_posix.py 
b/pypy/module/posix/interp_posix.py
--- a/pypy/module/posix/interp_posix.py
+++ b/pypy/module/posix/interp_posix.py
@@ -935,7 +935,7 @@
   the file descriptor must refer to a directory.
   If this functionality is unavailable, using it raises NotImplementedError."""
     if space.is_none(w_path):
-        w_path = space.newtext(u".")
+        w_path = space.newtext(".")
     if space.isinstance_w(w_path, space.w_bytes):
         # XXX CPython doesn't follow this path either if w_path is,
         # for example, a memoryview or another buffer type
diff --git a/pypy/module/posix/interp_scandir.py 
b/pypy/module/posix/interp_scandir.py
--- a/pypy/module/posix/interp_scandir.py
+++ b/pypy/module/posix/interp_scandir.py
@@ -14,7 +14,7 @@
 def scandir(space, w_path=None):
     "scandir(path='.') -> iterator of DirEntry objects for given path"
     if space.is_none(w_path):
-        w_path = space.newtext(u".")
+        w_path = space.newtext(".")
 
     if not _WIN32:
         if space.isinstance_w(w_path, space.w_bytes):
diff --git a/pypy/objspace/std/newformat.py b/pypy/objspace/std/newformat.py
--- a/pypy/objspace/std/newformat.py
+++ b/pypy/objspace/std/newformat.py
@@ -5,7 +5,7 @@
 import string
 
 from pypy.interpreter.error import OperationError, oefmt
-from rpython.rlib import rstring, runicode, rlocale, rfloat, jit, rutf8
+from rpython.rlib import rstring, rlocale, rfloat, jit, rutf8
 from rpython.rlib.objectmodel import specialize
 from rpython.rlib.rfloat import formatd
 from rpython.rlib.rarithmetic import r_uint, intmask
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit

[pypy-commit] pypy unicode-utf8-py3: try making space.newtext accept only utf8

Reply via email to