Author: fijal Branch: unicode-utf8 Changeset: r90376:85fee86ba1f7 Date: 2017-02-27 12:11 +0100 http://bitbucket.org/pypy/pypy/changeset/85fee86ba1f7/
Log: (fijal, argio) whack until we run into a serious problem diff --git a/pypy/module/__pypy__/__init__.py b/pypy/module/__pypy__/__init__.py --- a/pypy/module/__pypy__/__init__.py +++ b/pypy/module/__pypy__/__init__.py @@ -10,7 +10,7 @@ interpleveldefs = { "StringBuilder": "interp_builders.W_StringBuilder", - "UnicodeBuilder": "interp_builders.W_UnicodeBuilder", + #"UnicodeBuilder": "interp_builders.W_UnicodeBuilder", } class TimeModule(MixedModule): diff --git a/pypy/module/__pypy__/interp_builders.py b/pypy/module/__pypy__/interp_builders.py --- a/pypy/module/__pypy__/interp_builders.py +++ b/pypy/module/__pypy__/interp_builders.py @@ -64,4 +64,4 @@ return W_Builder W_StringBuilder = create_builder("StringBuilder", str, StringBuilder, "newbytes") -W_UnicodeBuilder = create_builder("UnicodeBuilder", unicode, UnicodeBuilder, "newunicode") +#W_UnicodeBuilder = create_builder("UnicodeBuilder", unicode, UnicodeBuilder, "newunicode") diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py --- a/pypy/module/_codecs/interp_codecs.py +++ b/pypy/module/_codecs/interp_codecs.py @@ -486,6 +486,7 @@ @unwrap_spec(data='bufferstr', errors='str_or_None', byteorder=int, w_final=WrappedDefault(False)) def utf_16_ex_decode(space, data, errors='strict', byteorder=0, w_final=None): + assert False, "fix in the future" if errors is None: errors = 'strict' final = space.is_true(w_final) @@ -507,6 +508,7 @@ @unwrap_spec(data='bufferstr', errors='str_or_None', byteorder=int, w_final=WrappedDefault(False)) def utf_32_ex_decode(space, data, errors='strict', byteorder=0, w_final=None): + assert False, "fix in the future" final = space.is_true(w_final) state = space.fromcache(CodecState) if byteorder == 0: diff --git a/pypy/objspace/fake/objspace.py b/pypy/objspace/fake/objspace.py --- a/pypy/objspace/fake/objspace.py +++ b/pypy/objspace/fake/objspace.py @@ -47,8 +47,8 @@ return NonConstant("foobar") identifier_w = bytes_w = str_w - def unicode_w(self, space): - return NonConstant(u"foobar") + def utf8_w(self, space): + return NonConstant("foobar") def int_w(self, space, allow_conversion=True): return NonConstant(-42) @@ -208,7 +208,7 @@ def newbytes(self, x): return w_some_obj() - def newunicode(self, x): + def newutf8(self, x, l): return w_some_obj() newtext = newbytes diff --git a/pypy/objspace/std/bytesobject.py b/pypy/objspace/std/bytesobject.py --- a/pypy/objspace/std/bytesobject.py +++ b/pypy/objspace/std/bytesobject.py @@ -17,7 +17,7 @@ from pypy.objspace.std.stringmethods import StringMethods from pypy.objspace.std.unicodeobject import ( decode_object, unicode_from_encoded_object, - getdefaultencoding) + getdefaultencoding, unicode_from_string) from pypy.objspace.std.util import IDTAG_SPECIAL, IDTAG_SHIFT @@ -53,17 +53,7 @@ return space.newint(uid) def convert_to_w_unicode(self, space): - # Use the default encoding. - encoding = getdefaultencoding(space) - if encoding == 'ascii': - try: - rutf8.check_ascii(self._value) - return space.newutf8(self._value, len(self._value)) - except rutf8.AsciiCheckError: - xxx - else: - xxx - return space.unicode_w(decode_object(space, self, encoding, None)) + return unicode_from_string(space, self) def descr_add(self, space, w_other): """x.__add__(y) <==> x+y""" diff --git a/pypy/objspace/std/listobject.py b/pypy/objspace/std/listobject.py --- a/pypy/objspace/std/listobject.py +++ b/pypy/objspace/std/listobject.py @@ -1032,7 +1032,7 @@ unilist = space.listview_unicode(w_iterable) if unilist is not None: - xxx + assert False, "disabled" w_list.strategy = strategy = space.fromcache(UnicodeListStrategy) # need to copy because intlist can share with w_iterable w_list.lstorage = strategy.erase(unilist[:]) diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -203,6 +203,7 @@ return unichr(unicodedb.totitle(ord(ch))) def _newlist_unwrapped(self, space, lst): + assert False, "should not be called" return space.newlist_unicode(lst) @staticmethod diff --git a/rpython/annotator/listdef.py b/rpython/annotator/listdef.py --- a/rpython/annotator/listdef.py +++ b/rpython/annotator/listdef.py @@ -107,6 +107,9 @@ self.bookkeeper.annotator.reflowfromposition(position_key) def generalize(self, s_other_value): + if hasattr(self.s_value, 'can_be_None') and not self.s_value.can_be_None and getattr(s_other_value, 'can_be_None', False): + import pdb + pdb.set_trace() s_new_value = unionof(self.s_value, s_other_value) updated = s_new_value != self.s_value if updated: diff --git a/rpython/annotator/unaryop.py b/rpython/annotator/unaryop.py --- a/rpython/annotator/unaryop.py +++ b/rpython/annotator/unaryop.py @@ -671,7 +671,7 @@ return getbookkeeper().newlist(s_item) def method_rsplit(self, patt, max=-1): - s_item = self.basestringclass(no_nul=self.no_nul) + s_item = self.basestringclass(no_nul=self.no_nul, can_be_None=False) return getbookkeeper().newlist(s_item) def method_replace(self, s1, s2): @@ -696,7 +696,7 @@ if not s_enc.is_constant(): raise AnnotatorError("Non-constant encoding not supported") enc = s_enc.const - if enc not in ('ascii', 'latin-1', 'utf-8'): + if enc not in ('ascii', 'latin-1', 'utf-8', 'utf8'): raise AnnotatorError("Encoding %s not supported for unicode" % (enc,)) return SomeString(no_nul=self.no_nul) method_encode.can_only_throw = [] @@ -729,7 +729,7 @@ if not s_enc.is_constant(): raise AnnotatorError("Non-constant encoding not supported") enc = s_enc.const - if enc not in ('ascii', 'latin-1', 'utf-8'): + if enc not in ('ascii', 'latin-1', 'utf-8', 'utf8'): raise AnnotatorError("Encoding %s not supported for strings" % (enc,)) return SomeUnicodeString(no_nul=self.no_nul) method_decode.can_only_throw = [UnicodeDecodeError] diff --git a/rpython/rlib/runicode.py b/rpython/rlib/runicode.py --- a/rpython/rlib/runicode.py +++ b/rpython/rlib/runicode.py @@ -346,8 +346,6 @@ # # See also unicode_encode_utf8sp(). # - if errorhandler is None: - errorhandler = default_unicode_error_encode # NB. a bit messy because rtyper/rstr.py also calls the same # function. Make sure we annotate for the args it passes, too if NonConstant(False): @@ -361,6 +359,9 @@ def unicode_encode_utf_8_impl(s, size, errors, errorhandler, allow_surrogates=False): + # XXX hack + if errorhandler is None: + errorhandler = default_unicode_error_encode assert(size >= 0) result = StringBuilder(size) pos = 0 diff --git a/rpython/rlib/streamio.py b/rpython/rlib/streamio.py --- a/rpython/rlib/streamio.py +++ b/rpython/rlib/streamio.py @@ -708,7 +708,9 @@ assert stop >= 0 chunks.append(self.buf[:stop]) break - chunks.append(self.buf) + buf = self.buf + assert buf is not None + chunks.append(buf) return ''.join(chunks) def readline(self): diff --git a/rpython/rtyper/rstr.py b/rpython/rtyper/rstr.py --- a/rpython/rtyper/rstr.py +++ b/rpython/rtyper/rstr.py @@ -335,7 +335,7 @@ return hop.gendirectcall(self.ll.ll_str2unicode, v_self) elif encoding == 'latin-1': return hop.gendirectcall(self.ll_decode_latin1, v_self) - elif encoding == 'utf-8': + elif encoding == 'utf-8' or encoding == 'utf8': return hop.gendirectcall(self.ll_decode_utf8, v_self) else: raise TyperError("encoding %s not implemented" % (encoding, )) @@ -408,7 +408,7 @@ return hop.gendirectcall(self.ll_str, v_self) elif encoding == "latin-1": return hop.gendirectcall(self.ll_encode_latin1, v_self) - elif encoding == 'utf-8': + elif encoding == 'utf-8' or encoding == 'utf8': return hop.gendirectcall(self.ll_encode_utf8, v_self) else: raise TyperError("encoding %s not implemented" % (encoding, )) _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit