Author: fijal Branch: unicode-utf8 Changeset: r90354:6af99b57c74f Date: 2017-02-25 16:18 +0100 http://bitbucket.org/pypy/pypy/changeset/6af99b57c74f/
Log: get enough plumbing to start running more tests diff --git a/pypy/interpreter/gateway.py b/pypy/interpreter/gateway.py --- a/pypy/interpreter/gateway.py +++ b/pypy/interpreter/gateway.py @@ -159,6 +159,7 @@ def visit_utf8(self, el, app_sig): self.checked_space_method(el, app_sig) + self.orig_arg() # iterate def visit_nonnegint(self, el, app_sig): self.checked_space_method(el, app_sig) @@ -607,6 +608,16 @@ "the name of an argument of the following " "function" % (name,)) + if kw_spec: + filtered = [] + i = 0 + while i < len(unwrap_spec): + elem = unwrap_spec[i] + filtered.append(elem) + if elem == 'utf8': + i += 1 + i += 1 + unwrap_spec = filtered return unwrap_spec diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -74,3 +74,8 @@ uni, len(uni), "strict", errorhandler=raise_unicode_exception_encode, allow_surrogates=True) + +def utf8_encode_ascii(utf8, utf8len, errors, errorhandler): + if len(utf8) == utf8len: + return utf8 + xxx diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py --- a/pypy/module/_codecs/interp_codecs.py +++ b/pypy/module/_codecs/interp_codecs.py @@ -371,15 +371,16 @@ from rpython.rlib import runicode def make_encoder_wrapper(name): - rname = "unicode_encode_%s" % (name.replace("_encode", ""), ) - assert hasattr(runicode, rname) - @unwrap_spec(uni='utf8', errors='str_or_None') + rname = "utf8_encode_%s" % (name.replace("_encode", ""), ) + @unwrap_spec(utf8='utf8', errors='str_or_None') def wrap_encoder(space, utf8, utf8len, errors="strict"): + from pypy.interpreter import unicodehelper + if errors is None: errors = 'strict' state = space.fromcache(CodecState) - func = getattr(runicode, rname) - result = func(utf8, len(utf8), utf8len, + func = getattr(unicodehelper, rname) + result = func(utf8, utf8len, errors, state.encode_error_handler) return space.newtuple([space.newbytes(result), space.newint(utf8len)]) wrap_encoder.func_name = rname @@ -438,10 +439,11 @@ # utf-8 functions are not regular, because we have to pass # "allow_surrogates=True" -@unwrap_spec(uni=unicode, errors='str_or_None') -def utf_8_encode(space, uni, errors="strict"): +@unwrap_spec(utf8='utf8', errors='str_or_None') +def utf_8_encode(space, utf8, utf8len, errors="strict"): if errors is None: errors = 'strict' + xxx state = space.fromcache(CodecState) # NB. can't call unicode_encode_utf_8() directly because that's # an @elidable function nowadays. Instead, we need the _impl(). @@ -605,8 +607,9 @@ final, state.decode_error_handler, mapping) return space.newtuple([space.newunicode(result), space.newint(consumed)]) -@unwrap_spec(uni=unicode, errors='str_or_None') -def charmap_encode(space, uni, errors="strict", w_mapping=None): +@unwrap_spec(utf8='utf8', errors='str_or_None') +def charmap_encode(space, utf8, utf8len, errors="strict", w_mapping=None): + xxx if errors is None: errors = 'strict' if space.is_none(w_mapping): @@ -621,9 +624,10 @@ return space.newtuple([space.newbytes(result), space.newint(len(uni))]) -@unwrap_spec(chars=unicode) -def charmap_build(space, chars): +@unwrap_spec(chars='utf8') +def charmap_build(space, chars, charslen): # XXX CPython sometimes uses a three-level trie + xxx w_charmap = space.newdict() for num in range(len(chars)): elem = chars[num] diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -4,7 +4,7 @@ compute_hash, compute_unique_id, import_from_mixin, enforceargs, newlist_hint) from rpython.rlib.buffer import StringBuffer -from rpython.rlib.rstring import StringBuilder, split, rsplit +from rpython.rlib.rstring import StringBuilder, split, rsplit, UnicodeBuilder from rpython.rlib.runicode import ( make_unicode_escape_function, str_decode_ascii, str_decode_utf_8, unicode_encode_ascii, unicode_encode_utf_8, fast_str_decode_ascii) @@ -111,7 +111,7 @@ return space.newint(ord(self._value[0])) def _new(self, value): - return W_UnicodeObject(value.encode('utf8', len(value))) + return W_UnicodeObject(value.encode('utf8'), len(value)) def _new_from_list(self, value): xxx @@ -153,7 +153,7 @@ assert len(char) == 1 return char[0] - _builder = StringBuilder + _builder = UnicodeBuilder def _isupper(self, ch): return unicodedb.isupper(ord(ch)) @@ -411,7 +411,8 @@ def descr_islower(self, space): cased = False - for uchar in self._value: + val = self._val(space) + for uchar in val: if (unicodedb.isupper(ord(uchar)) or unicodedb.istitle(ord(uchar))): return space.w_False @@ -421,7 +422,7 @@ def descr_isupper(self, space): cased = False - for uchar in self._value: + for uchar in self._val(space): if (unicodedb.islower(ord(uchar)) or unicodedb.istitle(ord(uchar))): return space.w_False @@ -466,7 +467,7 @@ lgt += w_u._length prealloc_size += len(unwrapped[i]) - sb = self._builder(prealloc_size) + sb = StringBuilder(prealloc_size) for i in range(size): if value and i != 0: sb.append(value) @@ -508,7 +509,7 @@ if num_zeros <= 0: # cannot return self, in case it is a subclass of str return W_UnicodeObject(selfval, self._len()) - builder = self._builder(num_zeros + len(selfval)) + builder = StringBuilder(num_zeros + len(selfval)) if len(selfval) > 0 and (selfval[0] == '+' or selfval[0] == '-'): # copy sign to first position builder.append(selfval[0]) @@ -569,9 +570,6 @@ return W_UnicodeObject(centered, self._len() + d) - def descr_title(self, space): - return - def wrapunicode(space, uni): return W_UnicodeObject(uni) _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit