Author: Armin Rigo <ar...@tunes.org> Branch: Changeset: r90215:bde34ec1885f Date: 2017-02-20 09:10 +0100 http://bitbucket.org/pypy/pypy/changeset/bde34ec1885f/
Log: Pff, whack more at the hack for calling str_encode_utf_8 from ''.encode('utf-8') diff --git a/rpython/annotator/unaryop.py b/rpython/annotator/unaryop.py --- a/rpython/annotator/unaryop.py +++ b/rpython/annotator/unaryop.py @@ -698,8 +698,18 @@ enc = s_enc.const if enc not in ('ascii', 'latin-1', 'utf-8'): raise AnnotatorError("Encoding %s not supported for unicode" % (enc,)) + if enc == 'utf-8': + from rpython.rlib import runicode + bookkeeper = getbookkeeper() + s_func = bookkeeper.immutablevalue( + runicode.unicode_encode_utf_8_elidable) + s_errors = bookkeeper.immutablevalue('strict') + s_errorhandler = bookkeeper.immutablevalue(None) + s_allow_surr = bookkeeper.immutablevalue(True) + args = [self, self.len(), s_errors, s_errorhandler, s_allow_surr] + bookkeeper.emulate_pbc_call(bookkeeper.position_key, s_func, args) return SomeString(no_nul=self.no_nul) - method_encode.can_only_throw = [] + method_encode.can_only_throw = [UnicodeEncodeError] class __extend__(SomeString): @@ -731,6 +741,19 @@ enc = s_enc.const if enc not in ('ascii', 'latin-1', 'utf-8'): raise AnnotatorError("Encoding %s not supported for strings" % (enc,)) + if enc == 'utf-8': + from rpython.rlib import runicode + bookkeeper = getbookkeeper() + s_func = bookkeeper.immutablevalue( + runicode.str_decode_utf_8_elidable) + s_errors = bookkeeper.immutablevalue('strict') + s_final = bookkeeper.immutablevalue(True) + s_errorhandler = bookkeeper.immutablevalue( + runicode.default_unicode_error_decode) + s_allow_surr = bookkeeper.immutablevalue(True) + args = [self, self.len(), s_errors, s_final, s_errorhandler, + s_allow_surr] + bookkeeper.emulate_pbc_call(bookkeeper.position_key, s_func, args) return SomeUnicodeString(no_nul=self.no_nul) method_decode.can_only_throw = [UnicodeDecodeError] diff --git a/rpython/rlib/runicode.py b/rpython/rlib/runicode.py --- a/rpython/rlib/runicode.py +++ b/rpython/rlib/runicode.py @@ -6,7 +6,6 @@ from rpython.tool.sourcetools import func_with_new_name from rpython.rtyper.lltypesystem import lltype, rffi from rpython.rlib import jit -from rpython.rlib.nonconst import NonConstant if rffi.sizeof(lltype.UniChar) == 4: @@ -107,9 +106,6 @@ return u'', None, endingpos raise UnicodeEncodeError(encoding, u, startingpos, endingpos, msg) -def ll_unicode_error_decode(errors, encoding, msg, s, startingpos, endingpos): - raise UnicodeDecodeError(encoding, s, startingpos, endingpos, msg) - # ____________________________________________________________ # utf-8 @@ -131,15 +127,6 @@ errorhandler=None, allow_surrogates=allow_surrogate_by_default): if errorhandler is None: errorhandler = default_unicode_error_decode - # NB. a bit messy because rtyper/rstr.py also calls the same - # function. Make sure we annotate for the args it passes, too - if NonConstant(False): - s = NonConstant('?????') - size = NonConstant(12345) - errors = NonConstant('strict') - final = NonConstant(True) - errorhandler = ll_unicode_error_decode - allow_surrogates = NonConstant(True) return str_decode_utf_8_elidable(s, size, errors, final, errorhandler, allow_surrogates=allow_surrogates) @@ -348,14 +335,6 @@ # if errorhandler is None: errorhandler = default_unicode_error_encode - # NB. a bit messy because rtyper/rstr.py also calls the same - # function. Make sure we annotate for the args it passes, too - if NonConstant(False): - s = NonConstant(u'?????') - size = NonConstant(12345) - errors = NonConstant('strict') - # no errorhandler needed for rtyper/rstr.py - allow_surrogates = NonConstant(True) return unicode_encode_utf_8_elidable(s, size, errors, errorhandler, allow_surrogates=allow_surrogates) diff --git a/rpython/rtyper/rstr.py b/rpython/rtyper/rstr.py --- a/rpython/rtyper/rstr.py +++ b/rpython/rtyper/rstr.py @@ -13,17 +13,14 @@ class AbstractStringRepr(Repr): - def __init__(self, *args): - Repr.__init__(self, *args) - self.rstr_decode_utf_8 = None - @jit.elidable def ll_decode_utf8(self, llvalue): from rpython.rtyper.annlowlevel import hlstr from rpython.rlib import runicode value = hlstr(llvalue) assert value is not None - errorhandler = runicode.ll_unicode_error_decode + errorhandler = runicode.default_unicode_error_decode + # NB. keep the arguments in sync with annotator/unaryop.py u, pos = runicode.str_decode_utf_8_elidable( value, len(value), 'strict', True, errorhandler, True) # XXX maybe the whole ''.decode('utf-8') should be not RPython. @@ -374,10 +371,6 @@ class AbstractUnicodeRepr(AbstractStringRepr): - def __init__(self, *args): - AbstractStringRepr.__init__(self, *args) - self.runicode_encode_utf_8 = None - def rtype_method_upper(self, hop): raise TyperError("Cannot do toupper on unicode string") @@ -390,6 +383,7 @@ from rpython.rlib import runicode s = hlunicode(ll_s) assert s is not None + # NB. keep the arguments in sync with annotator/unaryop.py bytes = runicode.unicode_encode_utf_8_elidable( s, len(s), 'strict', None, True) return self.ll.llstr(bytes) _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit