Author: Armin Rigo <[email protected]>
Branch: 
Changeset: r86862:74b4b27aaa7b
Date: 2016-09-04 14:49 +0200
http://bitbucket.org/pypy/pypy/changeset/74b4b27aaa7b/

Log:    Another attempt at fixing the original problem

diff --git a/rpython/rlib/runicode.py b/rpython/rlib/runicode.py
--- a/rpython/rlib/runicode.py
+++ b/rpython/rlib/runicode.py
@@ -1,5 +1,5 @@
 import sys
-from rpython.rlib.objectmodel import specialize, we_are_translated
+from rpython.rlib.objectmodel import specialize, we_are_translated, enforceargs
 from rpython.rlib.rstring import StringBuilder, UnicodeBuilder
 from rpython.rlib.rarithmetic import r_uint, intmask, widen
 from rpython.rlib.unicodedata import unicodedb
@@ -145,17 +145,21 @@
 _invalid_byte_3_of_4 = _invalid_cont_byte
 _invalid_byte_4_of_4 = _invalid_cont_byte
 
+@enforceargs(allow_surrogates=bool)
 def _invalid_byte_2_of_3(ordch1, ordch2, allow_surrogates):
     return (ordch2>>6 != 0x2 or    # 0b10
             (ordch1 == 0xe0 and ordch2 < 0xa0)
             # surrogates shouldn't be valid UTF-8!
-            or (not allow_surrogates and ordch1 == 0xed and ordch2 > 0x9f))
+            or (ordch1 == 0xed and ordch2 > 0x9f and not allow_surrogates))
 
 def _invalid_byte_2_of_4(ordch1, ordch2):
     return (ordch2>>6 != 0x2 or    # 0b10
             (ordch1 == 0xf0 and ordch2 < 0x90) or
             (ordch1 == 0xf4 and ordch2 > 0x8f))
 
+# note: this specialize() is here for rtyper/rstr.py, which calls this
+# function too but with its own fixed errorhandler
[email protected]_or_var(4)
 def str_decode_utf_8_impl(s, size, errors, final, errorhandler,
                           allow_surrogates, result):
     if size == 0:
@@ -328,6 +332,9 @@
     return unicode_encode_utf_8_impl(s, size, errors, errorhandler,
                                      allow_surrogates=allow_surrogates)
 
+# note: this specialize() is here for rtyper/rstr.py, which calls this
+# function too but with its own fixed errorhandler
[email protected]_or_var(3)
 def unicode_encode_utf_8_impl(s, size, errors, errorhandler,
                               allow_surrogates=False):
     assert(size >= 0)
diff --git a/rpython/rlib/test/test_runicode.py 
b/rpython/rlib/test/test_runicode.py
--- a/rpython/rlib/test/test_runicode.py
+++ b/rpython/rlib/test/test_runicode.py
@@ -55,7 +55,7 @@
                 s = s.encode(encoding)
         except LookupError as e:
             py.test.skip(e)
-        result, consumed = decoder(s, len(s), True)
+        result, consumed = decoder(s, len(s), 'strict', final=True)
         assert consumed == len(s)
         self.typeequals(trueresult, result)
 
@@ -69,7 +69,7 @@
                 s = s.decode(encoding)
         except LookupError as e:
             py.test.skip(e)
-        result = encoder(s, len(s), True)
+        result = encoder(s, len(s), 'strict')
         self.typeequals(trueresult, result)
 
     def checkencodeerror(self, s, encoding, start, stop):
diff --git a/rpython/rtyper/rstr.py b/rpython/rtyper/rstr.py
--- a/rpython/rtyper/rstr.py
+++ b/rpython/rtyper/rstr.py
@@ -35,7 +35,8 @@
             allow_surrogates=False, result=result)
         return self.ll.llunicode(result.build())
 
-    def ll_raise_unicode_exception_decode(self, errors, encoding, msg, s,
+    @staticmethod
+    def ll_raise_unicode_exception_decode(errors, encoding, msg, s,
                                        startingpos, endingpos):
         raise UnicodeDecodeError(encoding, s, startingpos, endingpos, msg)
 
@@ -411,7 +412,8 @@
             allow_surrogates=False)
         return self.ll.llstr(bytes)
 
-    def ll_raise_unicode_exception_encode(self, errors, encoding, msg, u,
+    @staticmethod
+    def ll_raise_unicode_exception_encode(errors, encoding, msg, u,
                                           startingpos, endingpos):
         raise UnicodeEncodeError(encoding, u, startingpos, endingpos, msg)
 
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to