Author: Matti Picus <matti.pi...@gmail.com> Branch: unicode-utf8-py3 Changeset: r94953:1b6dfea3eef5 Date: 2018-08-05 12:16 -0700 http://bitbucket.org/pypy/pypy/changeset/1b6dfea3eef5/
Log: try making space.newtext accept only utf8 diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py --- a/pypy/interpreter/baseobjspace.py +++ b/pypy/interpreter/baseobjspace.py @@ -105,9 +105,9 @@ w_id = space.rshift(w_id, w_4) return ''.join(addrstring) - def getrepr(self, space, info, moreinfo=u''): - addrstring = unicode(self.getaddrstring(space)) - return space.newtext(u"<%s at 0x%s%s>" % (info, addrstring, moreinfo)) + def getrepr(self, space, info, moreinfo=''): + addrstring = self.getaddrstring(space) + return space.newtext("<%s at 0x%s%s>" % (info, addrstring, moreinfo)) def getslotvalue(self, index): raise NotImplementedError diff --git a/pypy/interpreter/generator.py b/pypy/interpreter/generator.py --- a/pypy/interpreter/generator.py +++ b/pypy/interpreter/generator.py @@ -42,10 +42,8 @@ def descr__repr__(self, space): addrstring = self.getaddrstring(space) - return space.newtext(u"<%s object %s at 0x%s>" % - (unicode(self.KIND), - self.get_qualname(), - unicode(addrstring))) + return space.newtext("<%s object %s at 0x%s>" % + (self.KIND, self.get_qualname(), addrstring)) def descr_send(self, w_arg): """send(arg) -> send 'arg' into generator/coroutine, diff --git a/pypy/interpreter/pyopcode.py b/pypy/interpreter/pyopcode.py --- a/pypy/interpreter/pyopcode.py +++ b/pypy/interpreter/pyopcode.py @@ -1628,7 +1628,7 @@ if (oparg & consts.FVS_MASK) == consts.FVS_HAVE_SPEC: w_spec = self.popvalue() else: - w_spec = space.newtext(u'') + w_spec = space.newtext('') w_value = self.popvalue() # conversion = oparg & consts.FVC_MASK @@ -1649,9 +1649,9 @@ lst = [] for i in range(itemcount-1, -1, -1): w_item = self.peekvalue(i) - lst.append(space.realunicode_w(w_item)) + lst.append(space.utf8_w(w_item)) self.dropvalues(itemcount) - w_res = space.newtext(u''.join(lst)) + w_res = space.newtext(''.join(lst)) self.pushvalue(w_res) def _revdb_load_var(self, oparg): diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -74,8 +74,8 @@ force_ignore=False)[0] elif _MACOSX: bytes = space.bytes_w(w_string) - uni = runicode.str_decode_utf_8_impl( - bytes, len(bytes), 'surrogateescape', final=True, + uni = str_decode_utf8( + bytes, 'surrogateescape', final=True, errorhandler=state.decode_error_handler, allow_surrogates=False)[0] elif space.sys.filesystemencoding is None or state.codec_need_encodings: @@ -296,15 +296,13 @@ if sys.platform == 'win32': def utf8_encode_mbcs(s, slen, errors, errorhandler): - from rpython.rlib import runicode s = s.decode('utf-8') - res = runicode.unicode_encode_mbcs(s, slen, errors, errorhandler) + res = unicode_encode_mbcs(s, slen, errors, errorhandler) return res def str_decode_mbcs(s, errors, final, errorhandler): - from rpython.rlib import runicode slen = len(s) - res, size = runicode.str_decode_mbcs(s, slen, final=final, errors=errors, + res, size = str_decode_mbcs(s, slen, final=final, errors=errors, errorhandler=errorhandler) return res.encode('utf8'), len(res) diff --git a/pypy/module/__pypy__/interp_stderrprinter.py b/pypy/module/__pypy__/interp_stderrprinter.py --- a/pypy/module/__pypy__/interp_stderrprinter.py +++ b/pypy/module/__pypy__/interp_stderrprinter.py @@ -16,8 +16,8 @@ self.fd = fd def descr_repr(self, space): - addrstring = unicode(self.getaddrstring(space)) - return space.newtext(u"<StdErrPrinter(fd=%d) object at 0x%s>" % + addrstring = self.getaddrstring(space) + return space.newtext("<StdErrPrinter(fd=%d) object at 0x%s>" % (self.fd, addrstring)) def descr_noop(self, space): diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py --- a/pypy/module/_codecs/interp_codecs.py +++ b/pypy/module/_codecs/interp_codecs.py @@ -3,7 +3,7 @@ from rpython.rlib.objectmodel import we_are_translated, not_rpython from rpython.rlib.rstring import StringBuilder, UnicodeBuilder from rpython.rlib import runicode -from rpython.rlib.runicode import ( raw_unicode_escape_helper_unicode) +from rpython.rlib.runicode import raw_unicode_escape_helper_unicode from rpython.rlib import rutf8 from pypy.interpreter.error import OperationError, oefmt diff --git a/pypy/module/_io/interp_stringio.py b/pypy/module/_io/interp_stringio.py --- a/pypy/module/_io/interp_stringio.py +++ b/pypy/module/_io/interp_stringio.py @@ -219,11 +219,13 @@ self.w_decoder, "decode", w_obj, space.w_True) else: w_decoded = w_obj - if self.writenl: + writenl = self.writenl + if writenl is not None: w_decoded = space.call_method( w_decoded, "replace", - space.newtext("\n"), space.newutf8(self.writenl, - get_utf8_length(self.writenl))) + space.newtext("\n"), + space.newutf8(writenl, get_utf8_length(writenl)), + ) string = space.utf8_w(w_decoded) if string: self.buf.write(string) diff --git a/pypy/module/_pypyjson/interp_decoder.py b/pypy/module/_pypyjson/interp_decoder.py --- a/pypy/module/_pypyjson/interp_decoder.py +++ b/pypy/module/_pypyjson/interp_decoder.py @@ -1,7 +1,7 @@ import sys from rpython.rlib.rstring import StringBuilder from rpython.rlib.objectmodel import specialize, always_inline, r_dict -from rpython.rlib import rfloat, runicode, rutf8 +from rpython.rlib import rfloat, rutf8 from rpython.rtyper.lltypesystem import lltype, rffi from pypy.interpreter.error import oefmt, OperationError from rpython.rlib.rarithmetic import r_uint diff --git a/pypy/module/_rawffi/interp_rawffi.py b/pypy/module/_rawffi/interp_rawffi.py --- a/pypy/module/_rawffi/interp_rawffi.py +++ b/pypy/module/_rawffi/interp_rawffi.py @@ -451,7 +451,7 @@ return space.newbytes(func(add_arg, argdesc, ll_type)) elif c == 'u': return space.newutf8(rutf8.unichr_as_utf8( - ord(func(add_arg, argdesc, ll_type))), 1) + r_uint(ord(func(add_arg, argdesc, ll_type)))), 1) elif c == 'f' or c == 'd' or c == 'g': return space.newfloat(float(func(add_arg, argdesc, ll_type))) else: @@ -615,6 +615,8 @@ def wcharp2rawunicode(space, address, maxlength=-1): if maxlength == -1: return wcharp2unicode(space, address) + elif maxlength < 0: + maxlength = 0 s = rffi.wcharpsize2utf8(rffi.cast(rffi.CWCHARP, address), maxlength) return space.newutf8(s, maxlength) diff --git a/pypy/module/_sre/interp_sre.py b/pypy/module/_sre/interp_sre.py --- a/pypy/module/_sre/interp_sre.py +++ b/pypy/module/_sre/interp_sre.py @@ -123,12 +123,12 @@ if flags != 0: flag_items.append('0x%x' % flags) if len(flag_items) == 0: - usep = u'' - uflags = u'' + usep = '' + uflags = '' else: - usep = u', ' - uflags = u'|'.join([item.decode('latin-1') for item in flag_items]) - return space.newtext(u're.compile(%s%s%s)' % (u, usep, uflags)) + usep = ', ' + uflags = '|'.join(flag_items) + return space.newtext('re.compile(%s%s%s)' % (u, usep, uflags)) def fget_groupindex(self, space): w_groupindex = self.w_groupindex @@ -424,7 +424,7 @@ return space.newtext(unicodebuilder.build()), n else: if space.isinstance_w(w_string, space.w_unicode): - w_emptystr = space.newtext(u'') + w_emptystr = space.newtext('') else: w_emptystr = space.newbytes('') w_item = space.call_method(w_emptystr, 'join', @@ -528,10 +528,10 @@ ctx = self.ctx start, end = ctx.match_start, ctx.match_end w_s = slice_w(space, ctx, start, end, space.w_None) - u = space.realunicode_w(space.repr(w_s)) + u = space.utf8_w(space.repr(w_s)) if len(u) > 50: u = u[:50] - return space.newtext(u'<_sre.SRE_Match object; span=(%d, %d), match=%s>' % + return space.newtext('<_sre.SRE_Match object; span=(%d, %d), match=%s>' % (start, end, u)) def cannot_copy_w(self): diff --git a/pypy/module/_weakref/interp__weakref.py b/pypy/module/_weakref/interp__weakref.py --- a/pypy/module/_weakref/interp__weakref.py +++ b/pypy/module/_weakref/interp__weakref.py @@ -178,7 +178,7 @@ def descr__repr__(self, space): w_obj = self.dereference() if w_obj is None: - state = u'; dead' + state = '; dead' else: typename = space.type(w_obj).getname(space) objname = w_obj.getname(space) @@ -186,7 +186,7 @@ state = "; to '%s' (%s)" % (typename, objname) else: state = "; to '%s'" % (typename,) - return self.getrepr(space, unicode(self.typedef.name), state) + return self.getrepr(space, self.typedef.name, state) class W_Weakref(W_WeakrefBase): diff --git a/pypy/module/_winreg/interp_winreg.py b/pypy/module/_winreg/interp_winreg.py --- a/pypy/module/_winreg/interp_winreg.py +++ b/pypy/module/_winreg/interp_winreg.py @@ -33,7 +33,7 @@ return space.newint(self.as_int()) def descr_repr(self, space): - return space.newtext(u"<PyHKEY:0x%x>" % (self.as_int(),)) + return space.newtext("<PyHKEY:0x%x>" % (self.as_int(),)) def descr_int(self, space): return space.newint(self.as_int()) diff --git a/pypy/module/posix/interp_posix.py b/pypy/module/posix/interp_posix.py --- a/pypy/module/posix/interp_posix.py +++ b/pypy/module/posix/interp_posix.py @@ -935,7 +935,7 @@ the file descriptor must refer to a directory. If this functionality is unavailable, using it raises NotImplementedError.""" if space.is_none(w_path): - w_path = space.newtext(u".") + w_path = space.newtext(".") if space.isinstance_w(w_path, space.w_bytes): # XXX CPython doesn't follow this path either if w_path is, # for example, a memoryview or another buffer type diff --git a/pypy/module/posix/interp_scandir.py b/pypy/module/posix/interp_scandir.py --- a/pypy/module/posix/interp_scandir.py +++ b/pypy/module/posix/interp_scandir.py @@ -14,7 +14,7 @@ def scandir(space, w_path=None): "scandir(path='.') -> iterator of DirEntry objects for given path" if space.is_none(w_path): - w_path = space.newtext(u".") + w_path = space.newtext(".") if not _WIN32: if space.isinstance_w(w_path, space.w_bytes): diff --git a/pypy/objspace/std/newformat.py b/pypy/objspace/std/newformat.py --- a/pypy/objspace/std/newformat.py +++ b/pypy/objspace/std/newformat.py @@ -5,7 +5,7 @@ import string from pypy.interpreter.error import OperationError, oefmt -from rpython.rlib import rstring, runicode, rlocale, rfloat, jit, rutf8 +from rpython.rlib import rstring, rlocale, rfloat, jit, rutf8 from rpython.rlib.objectmodel import specialize from rpython.rlib.rfloat import formatd from rpython.rlib.rarithmetic import r_uint, intmask _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit