Author: fijal Branch: unicode-utf8 Changeset: r90373:77af71423e68 Date: 2017-02-26 23:41 +0100 http://bitbucket.org/pypy/pypy/changeset/77af71423e68/
Log: fixes diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py --- a/pypy/module/_codecs/interp_codecs.py +++ b/pypy/module/_codecs/interp_codecs.py @@ -604,6 +604,8 @@ @unwrap_spec(string='bufferstr', errors='str_or_None') def charmap_decode(space, string, errors="strict", w_mapping=None): + from pypy.interpreter.unicodehelper import DecodeWrapper + if errors is None: errors = 'strict' if len(string) == 0: @@ -618,12 +620,13 @@ state = space.fromcache(CodecState) result, consumed = runicode.str_decode_charmap( string, len(string), errors, - final, state.decode_error_handler, mapping) + final, DecodeWrapper(state.decode_error_handler).handle, mapping) return space.newtuple([space.newunicode(result), space.newint(consumed)]) @unwrap_spec(utf8='utf8', errors='str_or_None') def charmap_encode(space, utf8, utf8len, errors="strict", w_mapping=None): - xxx + from pypy.interpreter.unicodehelper import EncodeWrapper + if errors is None: errors = 'strict' if space.is_none(w_mapping): @@ -632,20 +635,24 @@ mapping = Charmap_Encode(space, w_mapping) state = space.fromcache(CodecState) + uni = utf8.decode('utf8') result = runicode.unicode_encode_charmap( uni, len(uni), errors, - state.encode_error_handler, mapping) + EncodeWrapper(state.encode_error_handler).handle, mapping) return space.newtuple([space.newbytes(result), space.newint(len(uni))]) @unwrap_spec(chars='utf8') def charmap_build(space, chars, charslen): # XXX CPython sometimes uses a three-level trie - xxx w_charmap = space.newdict() - for num in range(len(chars)): - elem = chars[num] - space.setitem(w_charmap, space.newint(ord(elem)), space.newint(num)) + pos = 0 + num = 0 + while num < charslen: + w_char = space.newint(rutf8.codepoint_at_pos(chars, pos)) + space.setitem(w_charmap, w_char, space.newint(num)) + pos = rutf8.next_codepoint_pos(chars, pos) + num += 1 return w_charmap # ____________________________________________________________ @@ -690,6 +697,8 @@ @unwrap_spec(errors='str_or_None') def unicode_internal_decode(space, w_string, errors="strict"): + from pypy.interpreter.unicodehelper import DecodeWrapper + if errors is None: errors = 'strict' # special case for this codec: unicodes are returned as is @@ -705,7 +714,7 @@ state = space.fromcache(CodecState) result, consumed = runicode.str_decode_unicode_internal( string, len(string), errors, - final, state.decode_error_handler) + final, DecodeWrapper(state.decode_error_handler).handle) return space.newtuple([space.newunicode(result), space.newint(consumed)]) # ____________________________________________________________ diff --git a/rpython/rlib/runicode.py b/rpython/rlib/runicode.py --- a/rpython/rlib/runicode.py +++ b/rpython/rlib/runicode.py @@ -1166,8 +1166,8 @@ c = mapping.get(ch, ERROR_CHAR) if c == ERROR_CHAR: r, pos = errorhandler(errors, "charmap", - "character maps to <undefined>", - s, pos, pos + 1) + "character maps to <undefined>", + s, pos, pos + 1) result.append(r) continue result.append(c) _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit