[pypy-commit] pypy default: Pff, whack more at the hack for calling str_encode_utf_8 from

arigo Mon, 20 Feb 2017 00:13:20 -0800

Author: Armin Rigo <[email protected]>
Branch: 
Changeset: r90215:bde34ec1885f
Date: 2017-02-20 09:10 +0100
http://bitbucket.org/pypy/pypy/changeset/bde34ec1885f/


Log:    Pff, whack more at the hack for calling str_encode_utf_8 from
        ''.encode('utf-8')

diff --git a/rpython/annotator/unaryop.py b/rpython/annotator/unaryop.py
--- a/rpython/annotator/unaryop.py
+++ b/rpython/annotator/unaryop.py
@@ -698,8 +698,18 @@
         enc = s_enc.const
         if enc not in ('ascii', 'latin-1', 'utf-8'):
             raise AnnotatorError("Encoding %s not supported for unicode" % 
(enc,))
+        if enc == 'utf-8':
+            from rpython.rlib import runicode
+            bookkeeper = getbookkeeper()
+            s_func = bookkeeper.immutablevalue(
+                             runicode.unicode_encode_utf_8_elidable)
+            s_errors = bookkeeper.immutablevalue('strict')
+            s_errorhandler = bookkeeper.immutablevalue(None)
+            s_allow_surr = bookkeeper.immutablevalue(True)
+            args = [self, self.len(), s_errors, s_errorhandler, s_allow_surr]
+            bookkeeper.emulate_pbc_call(bookkeeper.position_key, s_func, args)
         return SomeString(no_nul=self.no_nul)
-    method_encode.can_only_throw = []
+    method_encode.can_only_throw = [UnicodeEncodeError]
 
 
 class __extend__(SomeString):
@@ -731,6 +741,19 @@
         enc = s_enc.const
         if enc not in ('ascii', 'latin-1', 'utf-8'):
             raise AnnotatorError("Encoding %s not supported for strings" % 
(enc,))
+        if enc == 'utf-8':
+            from rpython.rlib import runicode
+            bookkeeper = getbookkeeper()
+            s_func = bookkeeper.immutablevalue(
+                            runicode.str_decode_utf_8_elidable)
+            s_errors = bookkeeper.immutablevalue('strict')
+            s_final = bookkeeper.immutablevalue(True)
+            s_errorhandler = bookkeeper.immutablevalue(
+                                    runicode.default_unicode_error_decode)
+            s_allow_surr = bookkeeper.immutablevalue(True)
+            args = [self, self.len(), s_errors, s_final, s_errorhandler,
+                    s_allow_surr]
+            bookkeeper.emulate_pbc_call(bookkeeper.position_key, s_func, args)
         return SomeUnicodeString(no_nul=self.no_nul)
     method_decode.can_only_throw = [UnicodeDecodeError]
 
diff --git a/rpython/rlib/runicode.py b/rpython/rlib/runicode.py
--- a/rpython/rlib/runicode.py
+++ b/rpython/rlib/runicode.py
@@ -6,7 +6,6 @@
 from rpython.tool.sourcetools import func_with_new_name
 from rpython.rtyper.lltypesystem import lltype, rffi
 from rpython.rlib import jit
-from rpython.rlib.nonconst import NonConstant
 
 
 if rffi.sizeof(lltype.UniChar) == 4:
@@ -107,9 +106,6 @@
         return u'', None, endingpos
     raise UnicodeEncodeError(encoding, u, startingpos, endingpos, msg)
 
-def ll_unicode_error_decode(errors, encoding, msg, s, startingpos, endingpos):
-    raise UnicodeDecodeError(encoding, s, startingpos, endingpos, msg)
-
 # ____________________________________________________________
 # utf-8
 
@@ -131,15 +127,6 @@
                      errorhandler=None, 
allow_surrogates=allow_surrogate_by_default):
     if errorhandler is None:
         errorhandler = default_unicode_error_decode
-    # NB. a bit messy because rtyper/rstr.py also calls the same
-    # function.  Make sure we annotate for the args it passes, too
-    if NonConstant(False):
-        s = NonConstant('?????')
-        size = NonConstant(12345)
-        errors = NonConstant('strict')
-        final = NonConstant(True)
-        errorhandler = ll_unicode_error_decode
-        allow_surrogates = NonConstant(True)
     return str_decode_utf_8_elidable(s, size, errors, final, errorhandler,
                                      allow_surrogates=allow_surrogates)
 
@@ -348,14 +335,6 @@
     #
     if errorhandler is None:
         errorhandler = default_unicode_error_encode
-    # NB. a bit messy because rtyper/rstr.py also calls the same
-    # function.  Make sure we annotate for the args it passes, too
-    if NonConstant(False):
-        s = NonConstant(u'?????')
-        size = NonConstant(12345)
-        errors = NonConstant('strict')
-        # no errorhandler needed for rtyper/rstr.py
-        allow_surrogates = NonConstant(True)
     return unicode_encode_utf_8_elidable(s, size, errors, errorhandler,
                                          allow_surrogates=allow_surrogates)
 
diff --git a/rpython/rtyper/rstr.py b/rpython/rtyper/rstr.py
--- a/rpython/rtyper/rstr.py
+++ b/rpython/rtyper/rstr.py
@@ -13,17 +13,14 @@
 
 class AbstractStringRepr(Repr):
 
-    def __init__(self, *args):
-        Repr.__init__(self, *args)
-        self.rstr_decode_utf_8 = None
-
     @jit.elidable
     def ll_decode_utf8(self, llvalue):
         from rpython.rtyper.annlowlevel import hlstr
         from rpython.rlib import runicode
         value = hlstr(llvalue)
         assert value is not None
-        errorhandler = runicode.ll_unicode_error_decode
+        errorhandler = runicode.default_unicode_error_decode
+        # NB. keep the arguments in sync with annotator/unaryop.py
         u, pos = runicode.str_decode_utf_8_elidable(
             value, len(value), 'strict', True, errorhandler, True)
         # XXX maybe the whole ''.decode('utf-8') should be not RPython.
@@ -374,10 +371,6 @@
 
 class AbstractUnicodeRepr(AbstractStringRepr):
 
-    def __init__(self, *args):
-        AbstractStringRepr.__init__(self, *args)
-        self.runicode_encode_utf_8 = None
-
     def rtype_method_upper(self, hop):
         raise TyperError("Cannot do toupper on unicode string")
 
@@ -390,6 +383,7 @@
         from rpython.rlib import runicode
         s = hlunicode(ll_s)
         assert s is not None
+        # NB. keep the arguments in sync with annotator/unaryop.py
         bytes = runicode.unicode_encode_utf_8_elidable(
             s, len(s), 'strict', None, True)
         return self.ll.llstr(bytes)
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

[pypy-commit] pypy default: Pff, whack more at the hack for calling str_encode_utf_8 from

Reply via email to