Author: Armin Rigo <[email protected]>
Branch:
Changeset: r90215:bde34ec1885f
Date: 2017-02-20 09:10 +0100
http://bitbucket.org/pypy/pypy/changeset/bde34ec1885f/
Log: Pff, whack more at the hack for calling str_encode_utf_8 from
''.encode('utf-8')
diff --git a/rpython/annotator/unaryop.py b/rpython/annotator/unaryop.py
--- a/rpython/annotator/unaryop.py
+++ b/rpython/annotator/unaryop.py
@@ -698,8 +698,18 @@
enc = s_enc.const
if enc not in ('ascii', 'latin-1', 'utf-8'):
raise AnnotatorError("Encoding %s not supported for unicode" %
(enc,))
+ if enc == 'utf-8':
+ from rpython.rlib import runicode
+ bookkeeper = getbookkeeper()
+ s_func = bookkeeper.immutablevalue(
+ runicode.unicode_encode_utf_8_elidable)
+ s_errors = bookkeeper.immutablevalue('strict')
+ s_errorhandler = bookkeeper.immutablevalue(None)
+ s_allow_surr = bookkeeper.immutablevalue(True)
+ args = [self, self.len(), s_errors, s_errorhandler, s_allow_surr]
+ bookkeeper.emulate_pbc_call(bookkeeper.position_key, s_func, args)
return SomeString(no_nul=self.no_nul)
- method_encode.can_only_throw = []
+ method_encode.can_only_throw = [UnicodeEncodeError]
class __extend__(SomeString):
@@ -731,6 +741,19 @@
enc = s_enc.const
if enc not in ('ascii', 'latin-1', 'utf-8'):
raise AnnotatorError("Encoding %s not supported for strings" %
(enc,))
+ if enc == 'utf-8':
+ from rpython.rlib import runicode
+ bookkeeper = getbookkeeper()
+ s_func = bookkeeper.immutablevalue(
+ runicode.str_decode_utf_8_elidable)
+ s_errors = bookkeeper.immutablevalue('strict')
+ s_final = bookkeeper.immutablevalue(True)
+ s_errorhandler = bookkeeper.immutablevalue(
+ runicode.default_unicode_error_decode)
+ s_allow_surr = bookkeeper.immutablevalue(True)
+ args = [self, self.len(), s_errors, s_final, s_errorhandler,
+ s_allow_surr]
+ bookkeeper.emulate_pbc_call(bookkeeper.position_key, s_func, args)
return SomeUnicodeString(no_nul=self.no_nul)
method_decode.can_only_throw = [UnicodeDecodeError]
diff --git a/rpython/rlib/runicode.py b/rpython/rlib/runicode.py
--- a/rpython/rlib/runicode.py
+++ b/rpython/rlib/runicode.py
@@ -6,7 +6,6 @@
from rpython.tool.sourcetools import func_with_new_name
from rpython.rtyper.lltypesystem import lltype, rffi
from rpython.rlib import jit
-from rpython.rlib.nonconst import NonConstant
if rffi.sizeof(lltype.UniChar) == 4:
@@ -107,9 +106,6 @@
return u'', None, endingpos
raise UnicodeEncodeError(encoding, u, startingpos, endingpos, msg)
-def ll_unicode_error_decode(errors, encoding, msg, s, startingpos, endingpos):
- raise UnicodeDecodeError(encoding, s, startingpos, endingpos, msg)
-
# ____________________________________________________________
# utf-8
@@ -131,15 +127,6 @@
errorhandler=None,
allow_surrogates=allow_surrogate_by_default):
if errorhandler is None:
errorhandler = default_unicode_error_decode
- # NB. a bit messy because rtyper/rstr.py also calls the same
- # function. Make sure we annotate for the args it passes, too
- if NonConstant(False):
- s = NonConstant('?????')
- size = NonConstant(12345)
- errors = NonConstant('strict')
- final = NonConstant(True)
- errorhandler = ll_unicode_error_decode
- allow_surrogates = NonConstant(True)
return str_decode_utf_8_elidable(s, size, errors, final, errorhandler,
allow_surrogates=allow_surrogates)
@@ -348,14 +335,6 @@
#
if errorhandler is None:
errorhandler = default_unicode_error_encode
- # NB. a bit messy because rtyper/rstr.py also calls the same
- # function. Make sure we annotate for the args it passes, too
- if NonConstant(False):
- s = NonConstant(u'?????')
- size = NonConstant(12345)
- errors = NonConstant('strict')
- # no errorhandler needed for rtyper/rstr.py
- allow_surrogates = NonConstant(True)
return unicode_encode_utf_8_elidable(s, size, errors, errorhandler,
allow_surrogates=allow_surrogates)
diff --git a/rpython/rtyper/rstr.py b/rpython/rtyper/rstr.py
--- a/rpython/rtyper/rstr.py
+++ b/rpython/rtyper/rstr.py
@@ -13,17 +13,14 @@
class AbstractStringRepr(Repr):
- def __init__(self, *args):
- Repr.__init__(self, *args)
- self.rstr_decode_utf_8 = None
-
@jit.elidable
def ll_decode_utf8(self, llvalue):
from rpython.rtyper.annlowlevel import hlstr
from rpython.rlib import runicode
value = hlstr(llvalue)
assert value is not None
- errorhandler = runicode.ll_unicode_error_decode
+ errorhandler = runicode.default_unicode_error_decode
+ # NB. keep the arguments in sync with annotator/unaryop.py
u, pos = runicode.str_decode_utf_8_elidable(
value, len(value), 'strict', True, errorhandler, True)
# XXX maybe the whole ''.decode('utf-8') should be not RPython.
@@ -374,10 +371,6 @@
class AbstractUnicodeRepr(AbstractStringRepr):
- def __init__(self, *args):
- AbstractStringRepr.__init__(self, *args)
- self.runicode_encode_utf_8 = None
-
def rtype_method_upper(self, hop):
raise TyperError("Cannot do toupper on unicode string")
@@ -390,6 +383,7 @@
from rpython.rlib import runicode
s = hlunicode(ll_s)
assert s is not None
+ # NB. keep the arguments in sync with annotator/unaryop.py
bytes = runicode.unicode_encode_utf_8_elidable(
s, len(s), 'strict', None, True)
return self.ll.llstr(bytes)
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit