Author: Antonio Cuni <anto.c...@gmail.com> Branch: Changeset: r57055:aad4c9d57f01 Date: 2012-08-31 15:07 +0200 http://bitbucket.org/pypy/pypy/changeset/aad4c9d57f01/
Log: do the same with str_decode_utf_8 diff --git a/pypy/rpython/lltypesystem/rstr.py b/pypy/rpython/lltypesystem/rstr.py --- a/pypy/rpython/lltypesystem/rstr.py +++ b/pypy/rpython/lltypesystem/rstr.py @@ -132,11 +132,9 @@ CACHE = CONST_STR_CACHE def __init__(self, *args): - from pypy.rlib.runicode import str_decode_utf_8 AbstractStringRepr.__init__(self, *args) self.ll = LLHelpers self.malloc = mallocstr - self.rstr_decode_utf_8 = func_with_new_name(str_decode_utf_8, 'rstr_decode_utf_8') def ll_decode_latin1(self, value): lgt = len(value.chars) @@ -145,13 +143,6 @@ s.chars[i] = cast_primitive(UniChar, value.chars[i]) return s - def ll_decode_utf8(self, llvalue): - from pypy.rpython.annlowlevel import hlstr, llunicode - value = hlstr(llvalue) - assert value is not None - univalue, _ = self.rstr_decode_utf_8(value, len(value), 'strict') - return llunicode(univalue) - class UnicodeRepr(BaseLLStringRepr, AbstractUnicodeRepr): lowleveltype = Ptr(UNICODE) basetype = basestring @@ -159,7 +150,6 @@ CACHE = CONST_UNICODE_CACHE def __init__(self, *args): - from pypy.rlib.runicode import unicode_encode_utf_8 AbstractUnicodeRepr.__init__(self, *args) self.ll = LLHelpers self.malloc = mallocunicode @@ -285,7 +275,7 @@ class LLHelpers(AbstractLLHelpers): - from pypy.rpython.annlowlevel import llstr + from pypy.rpython.annlowlevel import llstr, llunicode @jit.elidable def ll_str_mul(s, times): diff --git a/pypy/rpython/ootypesystem/rstr.py b/pypy/rpython/ootypesystem/rstr.py --- a/pypy/rpython/ootypesystem/rstr.py +++ b/pypy/rpython/ootypesystem/rstr.py @@ -60,14 +60,6 @@ sb.ll_append_char(cast_primitive(UniChar, c)) return sb.ll_build() - def ll_decode_utf8(self, llvalue): - from pypy.rpython.annlowlevel import hlstr, oounicode - from pypy.rlib.runicode import str_decode_utf_8 - value = hlstr(llvalue) - assert value is not None - univalue, _ = str_decode_utf_8(value, len(value), 'strict') - return oounicode(univalue) - class UnicodeRepr(BaseOOStringRepr, AbstractUnicodeRepr): lowleveltype = ootype.Unicode @@ -126,7 +118,7 @@ class LLHelpers(AbstractLLHelpers): - from pypy.rpython.annlowlevel import oostr as llstr + from pypy.rpython.annlowlevel import oostr as llstr, oounicode as llunicode def ll_chr2str(ch): return ootype.oostring(ch, -1) diff --git a/pypy/rpython/rstr.py b/pypy/rpython/rstr.py --- a/pypy/rpython/rstr.py +++ b/pypy/rpython/rstr.py @@ -3,6 +3,7 @@ from pypy.tool.sourcetools import func_with_new_name from pypy.annotation import model as annmodel from pypy.rlib import jit +from pypy.rlib.nonconst import NonConstant from pypy.rpython.error import TyperError from pypy.rpython.rmodel import IntegerRepr, IteratorRepr from pypy.rpython.rmodel import inputconst, Repr @@ -12,7 +13,22 @@ cast_primitive, typeOf class AbstractStringRepr(Repr): - pass + + def __init__(self, *args): + from pypy.rlib.runicode import str_decode_utf_8, raise_unicode_exception_decode + Repr.__init__(self, *args) + self.rstr_decode_utf_8 = func_with_new_name(str_decode_utf_8, + 'rstr_decode_utf_8') + self.rraise_unicode_exception_decode = func_with_new_name( + raise_unicode_exception_decode, 'rraise_unicode_exception_decode') + + @jit.elidable + def ll_decode_utf8(self, llvalue): + from pypy.rpython.annlowlevel import hlstr + value = hlstr(llvalue) + assert value is not None + univalue, _ = self.rstr_decode_utf_8(value, len(value), 'strict') + return self.ll.llunicode(univalue) class AbstractCharRepr(AbstractStringRepr): pass diff --git a/pypy/rpython/test/test_runicode.py b/pypy/rpython/test/test_runicode.py --- a/pypy/rpython/test/test_runicode.py +++ b/pypy/rpython/test/test_runicode.py @@ -158,11 +158,19 @@ def test_utf_8_decoding_annotation(self): from pypy.rlib.runicode import str_decode_utf_8 + def errorhandler(errors, encoding, msg, s, + startingpos, endingpos): + raise UnicodeDecodeError(encoding, s, startingpos, endingpos, msg) + strings = [u'àèì'.encode('utf-8'), u'ìòéà'.encode('utf-8')] def f(n): x = strings[n] + if n: + errors = 'strict' + else: + errors = 'foo' # the annotation of y is SomeUnicodeString(can_be_None=False) - y, _ = str_decode_utf_8(x, len(x), 'strict') + y, _ = str_decode_utf_8(x, len(x), errors, errorhandler) return x.decode('utf-8') + y assert self.ll_to_string(self.interpret(f, [1])) == f(1) _______________________________________________ pypy-commit mailing list pypy-commit@python.org http://mail.python.org/mailman/listinfo/pypy-commit