Author: Matti Picus <matti.pi...@gmail.com> Branch: unicode-utf8-py3 Changeset: r94761:4c4b3a83fd29 Date: 2018-06-13 21:00 -0700 http://bitbucket.org/pypy/pypy/changeset/4c4b3a83fd29/
Log: newunicode -> newtext, newtext now accepts utf8-encoded bytes or unicode diff --git a/pypy/interpreter/astcompiler/fstring.py b/pypy/interpreter/astcompiler/fstring.py --- a/pypy/interpreter/astcompiler/fstring.py +++ b/pypy/interpreter/astcompiler/fstring.py @@ -23,7 +23,7 @@ def f_constant_string(astbuilder, joined_pieces, u, atom_node): space = astbuilder.space - add_constant_string(astbuilder, joined_pieces, space.newunicode(u), + add_constant_string(astbuilder, joined_pieces, space.newtext(u), atom_node) def f_string_compile(astbuilder, source, atom_node): diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py --- a/pypy/interpreter/baseobjspace.py +++ b/pypy/interpreter/baseobjspace.py @@ -107,7 +107,7 @@ def getrepr(self, space, info, moreinfo=u''): addrstring = unicode(self.getaddrstring(space)) - return space.newunicode(u"<%s at 0x%s%s>" % (info, addrstring, moreinfo)) + return space.newtext(u"<%s at 0x%s%s>" % (info, addrstring, moreinfo)) def getslotvalue(self, index): raise NotImplementedError diff --git a/pypy/interpreter/error.py b/pypy/interpreter/error.py --- a/pypy/interpreter/error.py +++ b/pypy/interpreter/error.py @@ -307,7 +307,7 @@ w_value = self._w_value if w_value is None: value = self._compute_value(space) - self._w_value = w_value = space.newunicode(value) + self._w_value = w_value = space.newtext(value) return w_value def _compute_value(self, space): @@ -626,7 +626,7 @@ msg = u'Windows Error %d' % winerror w_errno = space.w_None w_winerror = space.newint(winerror) - w_msg = space.newunicode(msg) + w_msg = space.newtext(msg) else: errno = e.errno if errno == EINTR: @@ -640,7 +640,7 @@ msg = u'error %d' % errno w_errno = space.newint(errno) w_winerror = space.w_None - w_msg = space.newunicode(msg) + w_msg = space.newtext(msg) if w_filename is None: w_filename = space.w_None @@ -672,7 +672,7 @@ def exception_from_errno(space, w_type, errno): msg = strerror(errno) w_error = space.call_function(w_type, space.newint(errno), - space.newunicode(msg)) + space.newtext(msg)) return OperationError(w_type, w_error) def exception_from_saved_errno(space, w_type): diff --git a/pypy/interpreter/function.py b/pypy/interpreter/function.py --- a/pypy/interpreter/function.py +++ b/pypy/interpreter/function.py @@ -313,7 +313,7 @@ tup_base = [] tup_state = [ space.newtext(self.name), - space.newunicode(self.qualname), + space.newtext(self.qualname), w_doc, self.code, w_func_globals, @@ -430,7 +430,7 @@ "__name__ must be set to a string object") def fget_func_qualname(self, space): - return space.newunicode(self.qualname) + return space.newtext(self.qualname) def fset_func_qualname(self, space, w_name): try: @@ -556,7 +556,7 @@ name = u'?' objrepr = space.unicode_w(space.repr(self.w_instance)) s = u'<bound method %s of %s>' % (name, objrepr) - return space.newunicode(s) + return space.newtext(s) def descr_method_getattribute(self, w_attr): space = self.space @@ -598,7 +598,7 @@ else: w_builtins = space.getbuiltinmodule('builtins') new_inst = space.getattr(w_builtins, space.newtext('getattr')) - tup = [w_instance, space.newunicode(w_function.getname(space))] + tup = [w_instance, space.newtext(w_function.getname(space))] return space.newtuple([new_inst, space.newtuple(tup)]) @@ -699,7 +699,7 @@ return self.space.newtext('<built-in function %s>' % (self.name,)) def descr__reduce__(self, space): - return space.newunicode(self.qualname) + return space.newtext(self.qualname) def is_builtin_code(w_func): from pypy.interpreter.gateway import BuiltinCode diff --git a/pypy/interpreter/gateway.py b/pypy/interpreter/gateway.py --- a/pypy/interpreter/gateway.py +++ b/pypy/interpreter/gateway.py @@ -1122,7 +1122,7 @@ kw_defs_w = [] for name, w_def in sorted(alldefs_w.items()): assert name in sig.kwonlyargnames - w_name = space.newunicode(name.decode('utf-8')) + w_name = space.newtext(name.decode('utf-8')) kw_defs_w.append((w_name, w_def)) return defs_w, kw_defs_w diff --git a/pypy/interpreter/generator.py b/pypy/interpreter/generator.py --- a/pypy/interpreter/generator.py +++ b/pypy/interpreter/generator.py @@ -42,7 +42,7 @@ def descr__repr__(self, space): addrstring = self.getaddrstring(space) - return space.newunicode(u"<%s object %s at 0x%s>" % + return space.newtext(u"<%s object %s at 0x%s>" % (unicode(self.KIND), self.get_qualname(), unicode(addrstring))) @@ -215,7 +215,7 @@ e2.record_context(space, space.getexecutioncontext()) raise e2 else: - space.warn(space.newunicode(u"generator '%s' raised StopIteration" + space.warn(space.newtext(u"generator '%s' raised StopIteration" % self.get_qualname()), space.w_PendingDeprecationWarning) @@ -306,7 +306,7 @@ "__name__ must be set to a string object") def descr__qualname__(self, space): - return space.newunicode(self.get_qualname()) + return space.newtext(self.get_qualname()) def descr_set__qualname__(self, space, w_name): try: @@ -398,7 +398,7 @@ self.frame.last_instr == -1: space = self.space msg = u"coroutine '%s' was never awaited" % self.get_qualname() - space.warn(space.newunicode(msg), space.w_RuntimeWarning) + space.warn(space.newtext(msg), space.w_RuntimeWarning) GeneratorOrCoroutine._finalize_(self) diff --git a/pypy/interpreter/pycode.py b/pypy/interpreter/pycode.py --- a/pypy/interpreter/pycode.py +++ b/pypy/interpreter/pycode.py @@ -454,6 +454,6 @@ # co_name should be an identifier name = self.co_name.decode('utf-8') fn = space.unicode_w(self.w_filename) - return space.newunicode(u'<code object %s at 0x%s, file "%s", line %d>' % ( + return space.newtext(u'<code object %s at 0x%s, file "%s", line %d>' % ( name, unicode(self.getaddrstring(space)), fn, -1 if self.co_firstlineno == 0 else self.co_firstlineno)) diff --git a/pypy/interpreter/pyopcode.py b/pypy/interpreter/pyopcode.py --- a/pypy/interpreter/pyopcode.py +++ b/pypy/interpreter/pyopcode.py @@ -1081,7 +1081,7 @@ try: w_pkgname = space.getattr( w_module, space.newtext('__name__')) - w_fullname = space.newunicode(u'%s.%s' % + w_fullname = space.newtext(u'%s.%s' % (space.unicode_w(w_pkgname), space.unicode_w(w_name))) return space.getitem(space.sys.get('modules'), w_fullname) except OperationError: @@ -1626,7 +1626,7 @@ if (oparg & consts.FVS_MASK) == consts.FVS_HAVE_SPEC: w_spec = self.popvalue() else: - w_spec = space.newunicode(u'') + w_spec = space.newtext(u'') w_value = self.popvalue() # conversion = oparg & consts.FVC_MASK @@ -1649,7 +1649,7 @@ w_item = self.peekvalue(i) lst.append(space.unicode_w(w_item)) self.dropvalues(itemcount) - w_res = space.newunicode(u''.join(lst)) + w_res = space.newtext(u''.join(lst)) self.pushvalue(w_res) def _revdb_load_var(self, oparg): diff --git a/pypy/interpreter/pyparser/error.py b/pypy/interpreter/pyparser/error.py --- a/pypy/interpreter/pyparser/error.py +++ b/pypy/interpreter/pyparser/error.py @@ -42,7 +42,7 @@ if len(self.text) != offset: text, _ = str_decode_utf_8(self.text, len(self.text), 'replace') - w_text = space.newunicode(text) + w_text = space.newtext(text) return space.newtuple([ space.newtext(self.msg), space.newtuple([ diff --git a/pypy/interpreter/pyparser/parsestring.py b/pypy/interpreter/pyparser/parsestring.py --- a/pypy/interpreter/pyparser/parsestring.py +++ b/pypy/interpreter/pyparser/parsestring.py @@ -115,7 +115,7 @@ return W_FString(substr, rawmode) else: v = unicodehelper.decode_utf8(space, substr) - return space.newunicode(v) + return space.newtext(v) v = PyString_DecodeEscape(space, substr, 'strict', encoding) return space.newbytes(v) diff --git a/pypy/interpreter/test/test_argument.py b/pypy/interpreter/test/test_argument.py --- a/pypy/interpreter/test/test_argument.py +++ b/pypy/interpreter/test/test_argument.py @@ -95,7 +95,6 @@ def wrap(self, obj): return obj newtext = wrap - newunicode = wrap def text_w(self, s): return self.unicode_w(s).encode('utf-8') diff --git a/pypy/interpreter/test/test_error.py b/pypy/interpreter/test/test_error.py --- a/pypy/interpreter/test/test_error.py +++ b/pypy/interpreter/test/test_error.py @@ -135,7 +135,7 @@ w_None = None def wrap(self, obj): return [obj] - newint = newtext = newunicode = newfilename = wrap + newint = newtext = newfilename = wrap def call_function(self, exc, w_errno, w_msg, w_filename=None, *args): return (exc, w_errno, w_msg, w_filename) space = FakeSpace() diff --git a/pypy/interpreter/test/test_fsencode.py b/pypy/interpreter/test/test_fsencode.py --- a/pypy/interpreter/test/test_fsencode.py +++ b/pypy/interpreter/test/test_fsencode.py @@ -70,7 +70,7 @@ strs.append(self.special_char) for st in strs: # check roundtrip - w_st = space.newunicode(st) + w_st = space.newtext(st) w_enc = space.fsencode(w_st) w_st2 = space.fsdecode(w_enc) assert space.eq_w(w_st, w_st2) @@ -81,7 +81,7 @@ def test_null_byte(self): space = self.space - w_u = space.newunicode(u'abc\x00def') + w_u = space.newtext(u'abc\x00def') # this can behave in two different ways depending on how # much initialized the space is: space.fsencode() can raise # ValueError directly, or return a wrapped bytes with the 0 @@ -94,7 +94,7 @@ if self.special_char: strs.append(self.special_char) for st in strs: - w_st = space.newunicode(st) + w_st = space.newtext(st) w_enc = space.fsencode(w_st) space.appexec([w_st, w_enc], """(u, s): import __pypy__ diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -87,7 +87,7 @@ return space.call_method(w_string, 'decode', getfilesystemencoding(space), space.newtext('surrogateescape')) - return space.newunicode(uni) + return space.newtext(uni) def fsencode(space, w_uni): from pypy.module._codecs import interp_codecs diff --git a/pypy/module/__builtin__/descriptor.py b/pypy/module/__builtin__/descriptor.py --- a/pypy/module/__builtin__/descriptor.py +++ b/pypy/module/__builtin__/descriptor.py @@ -37,7 +37,7 @@ starttype_name = self.w_starttype.getname(space) else: starttype_name = u'NULL' - return space.newunicode(u"<super: <class '%s'>, %s>" % ( + return space.newtext(u"<super: <class '%s'>, %s>" % ( starttype_name, objtype_name)) def get(self, space, w_obj, w_type=None): diff --git a/pypy/module/__pypy__/interp_stderrprinter.py b/pypy/module/__pypy__/interp_stderrprinter.py --- a/pypy/module/__pypy__/interp_stderrprinter.py +++ b/pypy/module/__pypy__/interp_stderrprinter.py @@ -17,7 +17,7 @@ def descr_repr(self, space): addrstring = unicode(self.getaddrstring(space)) - return space.newunicode(u"<StdErrPrinter(fd=%d) object at 0x%s>" % + return space.newtext(u"<StdErrPrinter(fd=%d) object at 0x%s>" % (self.fd, addrstring)) def descr_noop(self, space): diff --git a/pypy/module/_cffi_backend/cerrno.py b/pypy/module/_cffi_backend/cerrno.py --- a/pypy/module/_cffi_backend/cerrno.py +++ b/pypy/module/_cffi_backend/cerrno.py @@ -27,4 +27,4 @@ if code == -1: code = GetLastError_alt_saved() message = FormatErrorW(code) - return space.newtuple([space.newint(code), space.newunicode(message)]) + return space.newtuple([space.newint(code), space.newtext(message)]) diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py --- a/pypy/module/_codecs/interp_codecs.py +++ b/pypy/module/_codecs/interp_codecs.py @@ -298,7 +298,7 @@ oc = ord(obj[pos]) raw_unicode_escape_helper_unicode(builder, oc) pos += 1 - return space.newtuple([space.newunicode(builder.build()), w_end]) + return space.newtuple([space.newtext(builder.build()), w_end]) elif space.isinstance_w(w_exc, space.w_UnicodeDecodeError): obj = space.bytes_w(space.getattr(w_exc, space.newtext('object'))) start = space.int_w(space.getattr(w_exc, space.newtext('start'))) @@ -310,7 +310,7 @@ oc = ord(obj[pos]) raw_unicode_escape_helper_unicode(builder, oc) pos += 1 - return space.newtuple([space.newunicode(builder.build()), w_end]) + return space.newtuple([space.newtext(builder.build()), w_end]) else: raise oefmt(space.w_TypeError, "don't know how to handle %T in error callback", w_exc) @@ -456,7 +456,7 @@ ch = 0 if ch == 0: raise OperationError(space.type(w_exc), w_exc) - return space.newtuple([space.newunicode(unichr(ch)), + return space.newtuple([space.newtext(unichr(ch)), space.newint(start + bytelength)]) else: raise oefmt(space.w_TypeError, @@ -495,7 +495,7 @@ if not consumed: # codec complained about ASCII byte. raise OperationError(space.type(w_exc), w_exc) - return space.newtuple([space.newunicode(replace), + return space.newtuple([space.newtext(replace), space.newint(start + consumed)]) else: raise oefmt(space.w_TypeError, @@ -746,7 +746,7 @@ string, len(string), errors, final, state.decode_error_handler, force_ignore=False) - return space.newtuple([space.newunicode(result), space.newint(consumed)]) + return space.newtuple([space.newtext(result), space.newint(consumed)]) # utf-8 functions are not regular, because we have to pass # "allow_surrogates=False" @@ -1014,7 +1014,7 @@ result, consumed = runicode.str_decode_raw_unicode_escape( string, len(string), errors, final, state.decode_error_handler) - return space.newtuple([space.newunicode(result), space.newint(consumed)]) + return space.newtuple([space.newtext(result), space.newint(consumed)]) # ____________________________________________________________ # Unicode-internal diff --git a/pypy/module/_csv/interp_csv.py b/pypy/module/_csv/interp_csv.py --- a/pypy/module/_csv/interp_csv.py +++ b/pypy/module/_csv/interp_csv.py @@ -156,12 +156,12 @@ def _get_escapechar(space, dialect): if dialect.escapechar == u'\0': return space.w_None - return space.newunicode(dialect.escapechar) + return space.newtext(dialect.escapechar) def _get_quotechar(space, dialect): if dialect.quotechar == u'\0': return space.w_None - return space.newunicode(dialect.quotechar) + return space.newtext(dialect.quotechar) W_Dialect.typedef = TypeDef( @@ -169,12 +169,12 @@ __new__ = interp2app(W_Dialect___new__), delimiter = interp_attrproperty('delimiter', W_Dialect, - wrapfn='newunicode'), + wrapfn='newtext'), doublequote = interp_attrproperty('doublequote', W_Dialect, wrapfn='newbool'), escapechar = GetSetProperty(_get_escapechar, cls=W_Dialect), lineterminator = interp_attrproperty('lineterminator', W_Dialect, - wrapfn='newunicode'), + wrapfn='newtext'), quotechar = GetSetProperty(_get_quotechar, cls=W_Dialect), quoting = interp_attrproperty('quoting', W_Dialect, wrapfn='newint'), diff --git a/pypy/module/_csv/interp_reader.py b/pypy/module/_csv/interp_reader.py --- a/pypy/module/_csv/interp_reader.py +++ b/pypy/module/_csv/interp_reader.py @@ -31,7 +31,7 @@ msg = u'line %d: %s' % (self.line_num, msg) w_module = space.getbuiltinmodule('_csv') w_error = space.getattr(w_module, space.newtext('Error')) - raise OperationError(w_error, space.newunicode(msg)) + raise OperationError(w_error, space.newtext(msg)) def add_char(self, field_builder, c): assert field_builder is not None @@ -44,9 +44,9 @@ field = field_builder.build() if self.numeric_field: self.numeric_field = False - w_obj = space.call_function(space.w_float, space.newunicode(field)) + w_obj = space.call_function(space.w_float, space.newtext(field)) else: - w_obj = space.newunicode(field) + w_obj = space.newtext(field) self.fields_w.append(w_obj) def next_w(self): diff --git a/pypy/module/_csv/interp_writer.py b/pypy/module/_csv/interp_writer.py --- a/pypy/module/_csv/interp_writer.py +++ b/pypy/module/_csv/interp_writer.py @@ -115,7 +115,7 @@ rec.append(dialect.lineterminator) line = rec.build() - return space.call_function(self.w_filewrite, space.newunicode(line)) + return space.call_function(self.w_filewrite, space.newtext(line)) def writerows(self, w_seqseq): """Construct and write a series of sequences to a csv file. diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py --- a/pypy/module/_io/interp_textio.py +++ b/pypy/module/_io/interp_textio.py @@ -732,7 +732,7 @@ w_bytes = space.call_method(self.w_buffer, "read") w_decoded = space.call_method(self.w_decoder, "decode", w_bytes, space.w_True) check_decoded(space, w_decoded) - w_result = space.newunicode(self.decoded.get_chars(-1)) + w_result = space.newtext(self.decoded.get_chars(-1)) w_final = space.add(w_result, w_decoded) self.snapshot = None return w_final @@ -771,7 +771,7 @@ self._check_closed(space) self._writeflush(space) limit = convert_size(space, w_limit) - return space.newunicode(self._readline(space, limit)) + return space.newtext(self._readline(space, limit)) def _readline(self, space, limit): # This is a separate function so that readline_w() can be jitted. diff --git a/pypy/module/_lsprof/interp_lsprof.py b/pypy/module/_lsprof/interp_lsprof.py --- a/pypy/module/_lsprof/interp_lsprof.py +++ b/pypy/module/_lsprof/interp_lsprof.py @@ -253,7 +253,7 @@ s = create_spec_for_object(space, self.w_type) else: s = create_spec_for_method(space, self.w_func, self.w_type) - self.w_string = space.newunicode(s) + self.w_string = space.newtext(s) return self.w_string W_DelayedBuiltinStr.typedef = TypeDef( diff --git a/pypy/module/_multiprocessing/interp_win32_py3.py b/pypy/module/_multiprocessing/interp_win32_py3.py --- a/pypy/module/_multiprocessing/interp_win32_py3.py +++ b/pypy/module/_multiprocessing/interp_win32_py3.py @@ -9,7 +9,7 @@ message = rwin32.FormatErrorW(errno) w_errcode = space.newint(errno) return OperationError(space.w_WindowsError, - space.newtuple([w_errcode, space.newunicode(message), + space.newtuple([w_errcode, space.newtext(message), space.w_None, w_errcode])) @unwrap_spec(handle=int) diff --git a/pypy/module/_rawffi/interp_rawffi.py b/pypy/module/_rawffi/interp_rawffi.py --- a/pypy/module/_rawffi/interp_rawffi.py +++ b/pypy/module/_rawffi/interp_rawffi.py @@ -630,7 +630,7 @@ if _MS_WINDOWS: @unwrap_spec(code=int) def FormatError(space, code): - return space.newunicode(rwin32.FormatErrorW(code)) + return space.newtext(rwin32.FormatErrorW(code)) @unwrap_spec(hresult=int) def check_HRESULT(space, hresult): diff --git a/pypy/module/_socket/interp_socket.py b/pypy/module/_socket/interp_socket.py --- a/pypy/module/_socket/interp_socket.py +++ b/pypy/module/_socket/interp_socket.py @@ -235,7 +235,7 @@ try: msg = (u"unclosed %s" % space.unicode_w(space.repr(self))) - space.warn(space.newunicode(msg), space.w_ResourceWarning) + space.warn(space.newtext(msg), space.w_ResourceWarning) except OperationError as e: # Spurious errors can appear at shutdown if e.match(space, space.w_Warning): @@ -863,9 +863,9 @@ if eintr_retry: return # only return None if eintr_retry==True w_exception = space.call_function(w_exception_class, space.newint(e.errno), - space.newunicode(message)) + space.newtext(message)) else: - w_exception = space.call_function(w_exception_class, space.newunicode(message)) + w_exception = space.call_function(w_exception_class, space.newtext(message)) raise OperationError(w_exception_class, w_exception) def explicit_socket_error(space, msg): diff --git a/pypy/module/_sre/interp_sre.py b/pypy/module/_sre/interp_sre.py --- a/pypy/module/_sre/interp_sre.py +++ b/pypy/module/_sre/interp_sre.py @@ -134,7 +134,7 @@ else: usep = u', ' uflags = u'|'.join([item.decode('latin-1') for item in flag_items]) - return space.newunicode(u're.compile(%s%s%s)' % (u, usep, uflags)) + return space.newtext(u're.compile(%s%s%s)' % (u, usep, uflags)) def fget_groupindex(self, space): w_groupindex = self.w_groupindex @@ -568,7 +568,7 @@ u = space.unicode_w(space.repr(w_s)) if len(u) > 50: u = u[:50] - return space.newunicode(u'<_sre.SRE_Match object; span=(%d, %d), match=%s>' % + return space.newtext(u'<_sre.SRE_Match object; span=(%d, %d), match=%s>' % (start, end, u)) def cannot_copy_w(self): diff --git a/pypy/module/_warnings/interp_warnings.py b/pypy/module/_warnings/interp_warnings.py --- a/pypy/module/_warnings/interp_warnings.py +++ b/pypy/module/_warnings/interp_warnings.py @@ -250,7 +250,7 @@ message = u"%s:%d: %s: %s\n" % (space.unicode_w(w_filename), lineno, space.unicode_w(w_name), space.unicode_w(w_text)) - space.call_method(w_stderr, "write", space.newunicode(message)) + space.call_method(w_stderr, "write", space.newtext(message)) # Print " source_line\n" if not w_sourceline: @@ -277,7 +277,7 @@ if c not in u' \t\014': message = u" %s\n" % (line[i:],) break - space.call_method(w_stderr, "write", space.newunicode(message)) + space.call_method(w_stderr, "write", space.newtext(message)) def do_warn(space, w_message, w_category, stacklevel): context_w = setup_context(space, stacklevel) diff --git a/pypy/module/_winreg/interp_winreg.py b/pypy/module/_winreg/interp_winreg.py --- a/pypy/module/_winreg/interp_winreg.py +++ b/pypy/module/_winreg/interp_winreg.py @@ -11,7 +11,7 @@ message = rwin32.FormatErrorW(errcode) w_errcode = space.newint(errcode) raise OperationError(space.w_WindowsError, - space.newtuple([w_errcode, space.newunicode(message), + space.newtuple([w_errcode, space.newtext(message), space.w_None, w_errcode])) class W_HKEY(W_Root): @@ -33,7 +33,7 @@ return space.newint(self.as_int()) def descr_repr(self, space): - return space.newunicode(u"<PyHKEY:0x%x>" % (self.as_int(),)) + return space.newtext(u"<PyHKEY:0x%x>" % (self.as_int(),)) def descr_int(self, space): return space.newint(self.as_int()) @@ -271,7 +271,7 @@ raiseWindowsError(space, ret, 'RegQueryValue') length = intmask(bufsize_p[0] - 1) / 2 wide_buf = rffi.cast(rffi.CWCHARP, buf) - return space.newunicode(rffi.wcharp2unicoden(wide_buf, length)) + return space.newtext(rffi.wcharp2unicoden(wide_buf, length)) def convert_to_regdata(space, w_value, typ): ''' @@ -378,7 +378,7 @@ if buf[buflen - 1] == '\x00': buflen -= 1 s = rffi.wcharp2unicoden(buf, buflen) - w_s = space.newunicode(s) + w_s = space.newtext(s) return w_s elif typ == rwinreg.REG_MULTI_SZ: @@ -396,7 +396,7 @@ if len(s) == 0: break s = u''.join(s) - l.append(space.newunicode(s)) + l.append(space.newtext(s)) i += 1 return space.newlist(l) @@ -645,7 +645,7 @@ length = intmask(retDataSize[0]) return space.newtuple([ - space.newunicode(rffi.wcharp2unicode(valuebuf)), + space.newtext(rffi.wcharp2unicode(valuebuf)), convert_from_regdata(space, databuf, length, retType[0]), space.newint(intmask(retType[0])), @@ -678,7 +678,7 @@ lltype.nullptr(rwin32.PFILETIME.TO)) if ret != 0: raiseWindowsError(space, ret, 'RegEnumKeyEx') - return space.newunicode(rffi.wcharp2unicode(rffi.cast(rffi.CWCHARP, buf))) + return space.newtext(rffi.wcharp2unicode(rffi.cast(rffi.CWCHARP, buf))) def QueryInfoKey(space, w_hkey): """tuple = QueryInfoKey(key) - Returns information about a key. diff --git a/pypy/module/array/interp_array.py b/pypy/module/array/interp_array.py --- a/pypy/module/array/interp_array.py +++ b/pypy/module/array/interp_array.py @@ -755,7 +755,7 @@ elif self.typecode == "u": r = space.repr(self.descr_tounicode(space)) s = u"array('u', %s)" % space.unicode_w(r) - return space.newunicode(s) + return space.newtext(s) else: r = space.repr(self.descr_tolist(space)) s = "array('%s', %s)" % (self.typecode, space.text_w(r)) @@ -1141,7 +1141,7 @@ raise oefmt(space.w_ValueError, "array contains a unicode character out of " "range(0x110000)") - return space.newunicode(item) + return space.newtext(item) assert 0, "unreachable" # interface diff --git a/pypy/module/cpyext/api.py b/pypy/module/cpyext/api.py --- a/pypy/module/cpyext/api.py +++ b/pypy/module/cpyext/api.py @@ -1710,7 +1710,7 @@ msg = u"function %s not found in library %s" % ( look_for.decode('utf-8'), space.unicode_w(space.newfilename(path))) w_path = space.newfilename(path) - raise_import_error(space, space.newunicode(msg), w_name, w_path) + raise_import_error(space, space.newtext(msg), w_name, w_path) def get_init_name(space, w_name): name_u = space.unicode_w(w_name) @@ -1720,7 +1720,7 @@ return 'PyInit_%s' % (basename,) except UnicodeEncodeError: basename = space.bytes_w(encode_object( - space, space.newunicode(basename_u), 'punycode', None)) + space, space.newtext(basename_u), 'punycode', None)) basename = basename.replace('-', '_') return 'PyInitU_%s' % (basename,) diff --git a/pypy/module/cpyext/pyerrors.py b/pypy/module/cpyext/pyerrors.py --- a/pypy/module/cpyext/pyerrors.py +++ b/pypy/module/cpyext/pyerrors.py @@ -215,12 +215,12 @@ if w_value: w_error = space.call_function(w_type, space.newint(errno), - space.newunicode(msg), + space.newtext(msg), w_value) else: w_error = space.call_function(w_type, space.newint(errno), - space.newunicode(msg)) + space.newtext(msg)) raise OperationError(w_type, w_error) @cpython_api([], rffi.INT_real, error=-1) diff --git a/pypy/module/cpyext/unicodeobject.py b/pypy/module/cpyext/unicodeobject.py --- a/pypy/module/cpyext/unicodeobject.py +++ b/pypy/module/cpyext/unicodeobject.py @@ -920,7 +920,7 @@ if decimal >= 0: ch = unichr(ord('0') + decimal) result.append(ch) - return space.newunicode(result.build()) + return space.newtext(result.build()) @cpython_api([PyObject, PyObject], rffi.INT_real, error=-2) def PyUnicode_Compare(space, w_left, w_right): @@ -1064,4 +1064,4 @@ if end > length: end = length result = usrc[start:end] - return space.newunicode(result) + return space.newtext(result) diff --git a/pypy/module/exceptions/interp_exceptions.py b/pypy/module/exceptions/interp_exceptions.py --- a/pypy/module/exceptions/interp_exceptions.py +++ b/pypy/module/exceptions/interp_exceptions.py @@ -155,7 +155,7 @@ else: args_repr = u"()" clsname = self.getclass(space).getname(space) - return space.newunicode(clsname + args_repr) + return space.newtext(clsname + args_repr) def __repr__(self): """representation for debugging purposes""" @@ -599,26 +599,26 @@ # If available, winerror has the priority over errno if self.w_filename: if self.w_filename2: - return space.newunicode(u"[WinError %s] %s: %s -> %s" % ( + return space.newtext(u"[WinError %s] %s: %s -> %s" % ( winerror, strerror, space.unicode_w(space.repr(self.w_filename)), space.unicode_w(space.repr(self.w_filename2)))) - return space.newunicode(u"[WinError %s] %s: %s" % ( + return space.newtext(u"[WinError %s] %s: %s" % ( winerror, strerror, space.unicode_w(space.repr(self.w_filename)))) - return space.newunicode(u"[WinError %s] %s" % ( + return space.newtext(u"[WinError %s] %s" % ( winerror, strerror)) if self.w_filename: if self.w_filename2: - return space.newunicode(u"[Errno %s] %s: %s -> %s" % ( + return space.newtext(u"[Errno %s] %s: %s -> %s" % ( errno, strerror, space.unicode_w(space.repr(self.w_filename)), space.unicode_w(space.repr(self.w_filename2)))) - return space.newunicode(u"[Errno %s] %s: %s" % ( + return space.newtext(u"[Errno %s] %s: %s" % ( errno, strerror, space.unicode_w(space.repr(self.w_filename)))) if self.w_errno and self.w_strerror: - return space.newunicode(u"[Errno %s] %s" % ( + return space.newtext(u"[Errno %s] %s" % ( errno, strerror)) return W_BaseException.descr_str(self, space) @@ -787,7 +787,7 @@ args_w = [self.args_w[0], w_tuple] args_repr = space.unicode_w(space.repr(space.newtuple(args_w))) clsname = self.getclass(space).getname(space) - return space.newunicode(clsname + args_repr) + return space.newtext(clsname + args_repr) else: return W_Exception.descr_repr(self, space) diff --git a/pypy/module/posix/interp_posix.py b/pypy/module/posix/interp_posix.py --- a/pypy/module/posix/interp_posix.py +++ b/pypy/module/posix/interp_posix.py @@ -838,7 +838,7 @@ def strerror(space, code): """Translate an error code to a message string.""" try: - return space.newunicode(_strerror(code)) + return space.newtext(_strerror(code)) except ValueError: raise oefmt(space.w_ValueError, "strerror() argument out of range") @@ -885,7 +885,7 @@ # started through main() instead of wmain() rwin32._wgetenv(u"") for key, value in rwin32._wenviron_items(): - space.setitem(w_env, space.newunicode(key), space.newunicode(value)) + space.setitem(w_env, space.newtext(key), space.newunicode(value)) @unwrap_spec(name=unicode, value=unicode) def putenv(space, name, value): @@ -935,7 +935,7 @@ the file descriptor must refer to a directory. If this functionality is unavailable, using it raises NotImplementedError.""" if space.is_none(w_path): - w_path = space.newunicode(u".") + w_path = space.newtext(u".") if space.isinstance_w(w_path, space.w_bytes): # XXX CPython doesn't follow this path either if w_path is, # for example, a memoryview or another buffer type @@ -968,7 +968,7 @@ result_w = [None] * len_result for i in range(len_result): if _WIN32: - result_w[i] = space.newunicode(result[i]) + result_w[i] = space.newtext(result[i]) else: result_w[i] = space.newfilename(result[i]) return space.newlist(result_w) @@ -2266,7 +2266,7 @@ space.newtext(e.msg)) except OSError as e: raise wrap_oserror2(space, e, w_path, eintr_retry=False) - return space.newunicode(result) + return space.newtext(result) def chflags(): diff --git a/pypy/module/posix/interp_scandir.py b/pypy/module/posix/interp_scandir.py --- a/pypy/module/posix/interp_scandir.py +++ b/pypy/module/posix/interp_scandir.py @@ -14,7 +14,7 @@ def scandir(space, w_path=None): "scandir(path='.') -> iterator of DirEntry objects for given path" if space.is_none(w_path): - w_path = space.newunicode(u".") + w_path = space.newtext(u".") if not _WIN32: if space.isinstance_w(w_path, space.w_bytes): @@ -45,7 +45,7 @@ else: if len(path_prefix) > 0 and path_prefix[-1] not in (u'\\', u'/', u':'): path_prefix += u'\\' - w_path_prefix = space.newunicode(path_prefix) + w_path_prefix = space.newtext(path_prefix) if rposix.HAVE_FSTATAT: dirfd = rposix.c_dirfd(dirp) else: @@ -153,12 +153,12 @@ if not scandir_iterator.result_is_bytes: w_name = self.space.fsdecode(w_name) else: - w_name = self.space.newunicode(name) + w_name = self.space.newtext(name) self.w_name = w_name def descr_repr(self, space): u = space.unicode_w(space.repr(self.w_name)) - return space.newunicode(u"<DirEntry %s>" % u) + return space.newtext(u"<DirEntry %s>" % u) def fget_name(self, space): return self.w_name diff --git a/pypy/module/pypyjit/test_pypy_c/test_string.py b/pypy/module/pypyjit/test_pypy_c/test_string.py --- a/pypy/module/pypyjit/test_pypy_c/test_string.py +++ b/pypy/module/pypyjit/test_pypy_c/test_string.py @@ -111,7 +111,7 @@ i19 = int_sub(i6, i87) i23 = unicodegetitem(ConstPtr(ptr92), i19) - p25 = newunicode(1) + p25 = newtext(1) unicodesetitem(p25, 0, i23) p97 = call_r(ConstClass(_rpy_unicode_to_decimal_w), p25, descr=<Callr . r EF=5>) guard_no_exception(descr=...) diff --git a/pypy/module/select/interp_select.py b/pypy/module/select/interp_select.py --- a/pypy/module/select/interp_select.py +++ b/pypy/module/select/interp_select.py @@ -83,7 +83,7 @@ message = e.get_msg_unicode() raise OperationError(space.w_OSError, space.newtuple([space.newint(e.errno), - space.newunicode(message)])) + space.newtext(message)])) finally: self.running = False break @@ -154,7 +154,7 @@ if err != errno.EINTR: msg = _c.socket_strerror_unicode(err) raise OperationError(space.w_OSError, space.newtuple([ - space.newint(err), space.newunicode(msg)])) + space.newint(err), space.newtext(msg)])) # got EINTR, automatic retry space.getexecutioncontext().checksignals() if timeout > 0.0: diff --git a/pypy/module/time/interp_time.py b/pypy/module/time/interp_time.py --- a/pypy/module/time/interp_time.py +++ b/pypy/module/time/interp_time.py @@ -459,8 +459,8 @@ _set_module_object(space, "timezone", space.newint(timezone)) _set_module_object(space, 'daylight', space.newint(daylight)) - tzname_w = [space.newunicode(tzname[0].decode('latin-1')), - space.newunicode(tzname[1].decode('latin-1'))] + tzname_w = [space.newtext(tzname[0].decode('latin-1')), + space.newtext(tzname[1].decode('latin-1'))] _set_module_object(space, 'tzname', space.newtuple(tzname_w)) _set_module_object(space, 'altzone', space.newint(altzone)) @@ -556,7 +556,7 @@ # CPython calls PyUnicode_DecodeLocale here should we do the same? tm_zone = decode_utf8(space, rffi.charp2str(t.c_tm_zone), allow_surrogates=True) - extra = [space.newunicode(tm_zone), + extra = [space.newtext(tm_zone), space.newint(rffi.getintfield(t, 'c_tm_gmtoff'))] w_time_tuple = space.newtuple(time_tuple + extra) else: @@ -579,7 +579,7 @@ lltype.free(t_ref, flavor='raw') if not pbuf: raise OperationError(space.w_ValueError, - space.newunicode(_get_error_msg())) + space.newtext(_get_error_msg())) return pbuf tup_w = space.fixedview(w_tup) @@ -745,7 +745,7 @@ if not p: raise OperationError(space.w_ValueError, - space.newunicode(_get_error_msg())) + space.newtext(_get_error_msg())) return _tm_to_tuple(space, p) def localtime(space, w_seconds=None): @@ -763,7 +763,7 @@ if not p: raise OperationError(space.w_OSError, - space.newunicode(_get_error_msg())) + space.newtext(_get_error_msg())) return _tm_to_tuple(space, p) def mktime(space, w_tup): diff --git a/pypy/objspace/std/dictmultiobject.py b/pypy/objspace/std/dictmultiobject.py --- a/pypy/objspace/std/dictmultiobject.py +++ b/pypy/objspace/std/dictmultiobject.py @@ -1437,7 +1437,7 @@ typename = space.type(self).getname(space) w_seq = space.call_function(space.w_list, self) seq_repr = space.unicode_w(space.repr(w_seq)) - return space.newunicode(u"%s(%s)" % (typename, seq_repr)) + return space.newtext(u"%s(%s)" % (typename, seq_repr)) def descr_len(self, space): return space.len(self.w_dict) diff --git a/pypy/objspace/std/dictproxyobject.py b/pypy/objspace/std/dictproxyobject.py --- a/pypy/objspace/std/dictproxyobject.py +++ b/pypy/objspace/std/dictproxyobject.py @@ -44,7 +44,7 @@ return space.str(self.w_mapping) def descr_repr(self, space): - return space.newunicode(u"mappingproxy(%s)" % + return space.newtext(u"mappingproxy(%s)" % (space.unicode_w(space.repr(self.w_mapping)),)) @unwrap_spec(w_default=WrappedDefault(None)) diff --git a/pypy/objspace/std/marshal_impl.py b/pypy/objspace/std/marshal_impl.py --- a/pypy/objspace/std/marshal_impl.py +++ b/pypy/objspace/std/marshal_impl.py @@ -371,9 +371,9 @@ m.atom_str(TYPE_STRING, x.co_code) _marshal_tuple(space, x.co_consts_w, m) _marshal_tuple(space, x.co_names_w, m) # list of w_unicodes - co_varnames_w = [space.newunicode(_decode_utf8(space, s)) for s in x.co_varnames] - co_freevars_w = [space.newunicode(_decode_utf8(space, s)) for s in x.co_freevars] - co_cellvars_w = [space.newunicode(_decode_utf8(space, s)) for s in x.co_cellvars] + co_varnames_w = [space.newtext(_decode_utf8(space, s)) for s in x.co_varnames] + co_freevars_w = [space.newtext(_decode_utf8(space, s)) for s in x.co_freevars] + co_cellvars_w = [space.newtext(_decode_utf8(space, s)) for s in x.co_cellvars] _marshal_tuple(space, co_varnames_w, m) # more lists, now of w_unicodes _marshal_tuple(space, co_freevars_w, m) _marshal_tuple(space, co_cellvars_w, m) @@ -453,7 +453,7 @@ @unmarshaller(TYPE_UNICODE) def unmarshal_unicode(space, u, tc): uc = _decode_utf8(space, u.get_str()) - return space.newunicode(uc) + return space.newtext(uc) @unmarshaller(TYPE_INTERNED) def unmarshal_interned(space, u, tc): @@ -466,7 +466,7 @@ else: lng = u.get_lng() s = u.get(lng) - w_u = u.space.newunicode(s.decode('latin-1')) + w_u = u.space.newtext(s.decode('latin-1')) if interned: w_u = u.space.new_interned_w_str(w_u) return w_u diff --git a/pypy/objspace/std/newformat.py b/pypy/objspace/std/newformat.py --- a/pypy/objspace/std/newformat.py +++ b/pypy/objspace/std/newformat.py @@ -219,7 +219,7 @@ if index == -1: kwarg = name[:i] if self.is_unicode: - w_kwarg = space.newunicode(kwarg) + w_kwarg = space.newtext(kwarg) else: w_kwarg = space.newbytes(kwarg) w_arg = space.getitem(self.w_kwargs, w_kwarg) diff --git a/pypy/objspace/std/objspace.py b/pypy/objspace/std/objspace.py --- a/pypy/objspace/std/objspace.py +++ b/pypy/objspace/std/objspace.py @@ -382,13 +382,18 @@ return W_MemoryView(view) def newbytes(self, s): - assert isinstance(s, str) + assert isinstance(s, bytes) return W_BytesObject(s) def newbytearray(self, l): return W_BytearrayObject(l) + @specialize.argtype(1) def newtext(self, s): + if isinstance(s, str): + s, lgt, chk = str_decode_utf8(s, "string", True, None, + allow_surrogates=True) + return W_UnicodeObject(s, lgt) lgt = rutf8.check_utf8(s, True) return W_UnicodeObject(s, lgt) @@ -399,7 +404,6 @@ def newutf8(self, utf8s, length): assert utf8s is not None - assert isinstance(utf8s, str) return W_UnicodeObject(utf8s, length) def newfilename(self, s): diff --git a/pypy/objspace/std/test/test_dictmultiobject.py b/pypy/objspace/std/test/test_dictmultiobject.py --- a/pypy/objspace/std/test/test_dictmultiobject.py +++ b/pypy/objspace/std/test/test_dictmultiobject.py @@ -1304,10 +1304,6 @@ return obj.decode('ascii') return obj - def newunicode(self, u): - assert isinstance(u, unicode) - return u - def newtext(self, string): assert isinstance(string, str) return string.decode('utf-8') diff --git a/pypy/objspace/std/test/test_stdobjspace.py b/pypy/objspace/std/test/test_stdobjspace.py --- a/pypy/objspace/std/test/test_stdobjspace.py +++ b/pypy/objspace/std/test/test_stdobjspace.py @@ -14,7 +14,7 @@ def test_utf8(self): assert self.space.isinstance_w(self.space.newtext("abc"), self.space.w_unicode) - assert self.space.eq_w(self.space.newtext("üöä"), self.space.newunicode(u"üöä")) + assert self.space.eq_w(self.space.newtext("üöä"), self.space.newtext(u"üöä")) def test_str_w_non_str(self): raises(OperationError,self.space.str_w,self.space.wrap(None)) diff --git a/pypy/objspace/std/tupleobject.py b/pypy/objspace/std/tupleobject.py --- a/pypy/objspace/std/tupleobject.py +++ b/pypy/objspace/std/tupleobject.py @@ -102,11 +102,11 @@ def descr_repr(self, space): items = self.tolist() if len(items) == 1: - return space.newunicode( + return space.newtext( u"(" + space.unicode_w(space.repr(items[0])) + u",)") tmp = u", ".join([space.unicode_w(space.repr(item)) for item in items]) - return space.newunicode(u"(" + tmp + u")") + return space.newtext(u"(" + tmp + u")") def descr_hash(self, space): raise NotImplementedError diff --git a/pypy/objspace/std/typeobject.py b/pypy/objspace/std/typeobject.py --- a/pypy/objspace/std/typeobject.py +++ b/pypy/objspace/std/typeobject.py @@ -725,7 +725,7 @@ else: mod = space.unicode_w(w_mod) if mod is not None and mod != u'builtins': - return space.newunicode(u"<class '%s.%s'>" % (mod, self.getqualname(space))) + return space.newtext(u"<class '%s.%s'>" % (mod, self.getqualname(space))) else: return space.newtext("<class '%s'>" % (self.name,)) @@ -846,7 +846,7 @@ def descr_get__name__(space, w_type): w_type = _check(space, w_type) - return space.newunicode(w_type.getname(space)) + return space.newtext(w_type.getname(space)) def descr_set__name__(space, w_type, w_value): w_type = _check(space, w_type) @@ -863,7 +863,7 @@ def descr_get__qualname__(space, w_type): w_type = _check(space, w_type) - return space.newunicode(w_type.getqualname(space)) + return space.newtext(w_type.getqualname(space)) def descr_set__qualname__(space, w_type, w_value): w_type = _check(space, w_type) @@ -1453,7 +1453,7 @@ cycle.reverse() names = [cls.getname(space) for cls in cycle] # Can't use oefmt() here, since names is a list of unicodes - raise OperationError(space.w_TypeError, space.newunicode( + raise OperationError(space.w_TypeError, space.newtext( u"cycle among base classes: " + u' < '.join(names))) diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -34,7 +34,7 @@ @enforceargs(utf8str=str) def __init__(self, utf8str, length): - assert isinstance(utf8str, str) + assert isinstance(utf8str, bytes) assert length >= 0 self._utf8 = utf8str self._length = length @@ -283,7 +283,7 @@ if space.is_w(space.type(self), space.w_unicode): return self # Subtype -- return genuine unicode string with the same value. - return space.newunicode(space.unicode_w(self)) + return space.newtext(space.unicode_w(self)) def descr_hash(self, space): x = compute_hash(self._utf8) @@ -350,7 +350,7 @@ arg = __args__.keywords[i].decode('utf-8') except UnicodeDecodeError: continue # uh, just skip that - space.setitem(w_kwds, space.newunicode(arg), + space.setitem(w_kwds, space.newtext(arg), __args__.keywords_w[i]) def descr_format(self, space, __args__): _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit