[pypy-commit] pypy default: do the same with str_decode_utf_8

antocuni Fri, 31 Aug 2012 07:30:07 -0700

Author: Antonio Cuni <[email protected]>
Branch: 
Changeset: r57055:aad4c9d57f01
Date: 2012-08-31 15:07 +0200
http://bitbucket.org/pypy/pypy/changeset/aad4c9d57f01/


Log:    do the same with str_decode_utf_8

diff --git a/pypy/rpython/lltypesystem/rstr.py 
b/pypy/rpython/lltypesystem/rstr.py
--- a/pypy/rpython/lltypesystem/rstr.py
+++ b/pypy/rpython/lltypesystem/rstr.py
@@ -132,11 +132,9 @@
     CACHE = CONST_STR_CACHE
 
     def __init__(self, *args):
-        from pypy.rlib.runicode import str_decode_utf_8
         AbstractStringRepr.__init__(self, *args)
         self.ll = LLHelpers
         self.malloc = mallocstr
-        self.rstr_decode_utf_8 = func_with_new_name(str_decode_utf_8, 
'rstr_decode_utf_8')
 
     def ll_decode_latin1(self, value):
         lgt = len(value.chars)
@@ -145,13 +143,6 @@
             s.chars[i] = cast_primitive(UniChar, value.chars[i])
         return s
 
-    def ll_decode_utf8(self, llvalue):
-        from pypy.rpython.annlowlevel import hlstr, llunicode
-        value = hlstr(llvalue)
-        assert value is not None
-        univalue, _ = self.rstr_decode_utf_8(value, len(value), 'strict')
-        return llunicode(univalue)
-
 class UnicodeRepr(BaseLLStringRepr, AbstractUnicodeRepr):
     lowleveltype = Ptr(UNICODE)
     basetype = basestring
@@ -159,7 +150,6 @@
     CACHE = CONST_UNICODE_CACHE
 
     def __init__(self, *args):
-        from pypy.rlib.runicode import unicode_encode_utf_8
         AbstractUnicodeRepr.__init__(self, *args)
         self.ll = LLHelpers
         self.malloc = mallocunicode
@@ -285,7 +275,7 @@
 
 
 class LLHelpers(AbstractLLHelpers):
-    from pypy.rpython.annlowlevel import llstr
+    from pypy.rpython.annlowlevel import llstr, llunicode
 
     @jit.elidable
     def ll_str_mul(s, times):
diff --git a/pypy/rpython/ootypesystem/rstr.py 
b/pypy/rpython/ootypesystem/rstr.py
--- a/pypy/rpython/ootypesystem/rstr.py
+++ b/pypy/rpython/ootypesystem/rstr.py
@@ -60,14 +60,6 @@
             sb.ll_append_char(cast_primitive(UniChar, c))
         return sb.ll_build()
 
-    def ll_decode_utf8(self, llvalue):
-        from pypy.rpython.annlowlevel import hlstr, oounicode
-        from pypy.rlib.runicode import str_decode_utf_8
-        value = hlstr(llvalue)
-        assert value is not None
-        univalue, _ = str_decode_utf_8(value, len(value), 'strict')
-        return oounicode(univalue)
-
 
 class UnicodeRepr(BaseOOStringRepr, AbstractUnicodeRepr):
     lowleveltype = ootype.Unicode
@@ -126,7 +118,7 @@
 
 class LLHelpers(AbstractLLHelpers):
 
-    from pypy.rpython.annlowlevel import oostr as llstr
+    from pypy.rpython.annlowlevel import oostr as llstr, oounicode as llunicode
 
     def ll_chr2str(ch):
         return ootype.oostring(ch, -1)
diff --git a/pypy/rpython/rstr.py b/pypy/rpython/rstr.py
--- a/pypy/rpython/rstr.py
+++ b/pypy/rpython/rstr.py
@@ -3,6 +3,7 @@
 from pypy.tool.sourcetools import func_with_new_name
 from pypy.annotation import model as annmodel
 from pypy.rlib import jit
+from pypy.rlib.nonconst import NonConstant
 from pypy.rpython.error import TyperError
 from pypy.rpython.rmodel import IntegerRepr, IteratorRepr
 from pypy.rpython.rmodel import inputconst, Repr
@@ -12,7 +13,22 @@
      cast_primitive, typeOf
 
 class AbstractStringRepr(Repr):
-    pass
+
+    def __init__(self, *args):
+        from pypy.rlib.runicode import str_decode_utf_8, 
raise_unicode_exception_decode
+        Repr.__init__(self, *args)
+        self.rstr_decode_utf_8 = func_with_new_name(str_decode_utf_8,
+                                                    'rstr_decode_utf_8')
+        self.rraise_unicode_exception_decode = func_with_new_name(
+            raise_unicode_exception_decode, 'rraise_unicode_exception_decode')
+        
+    @jit.elidable
+    def ll_decode_utf8(self, llvalue):
+        from pypy.rpython.annlowlevel import hlstr
+        value = hlstr(llvalue)
+        assert value is not None
+        univalue, _ = self.rstr_decode_utf_8(value, len(value), 'strict')
+        return self.ll.llunicode(univalue)
 
 class AbstractCharRepr(AbstractStringRepr):
     pass
diff --git a/pypy/rpython/test/test_runicode.py 
b/pypy/rpython/test/test_runicode.py
--- a/pypy/rpython/test/test_runicode.py
+++ b/pypy/rpython/test/test_runicode.py
@@ -158,11 +158,19 @@
 
     def test_utf_8_decoding_annotation(self):
         from pypy.rlib.runicode import str_decode_utf_8
+        def errorhandler(errors, encoding, msg, s,
+                         startingpos, endingpos):
+            raise UnicodeDecodeError(encoding, s, startingpos, endingpos, msg)
+        
         strings = [u'&#224;&#232;&#236;'.encode('utf-8'), 
u'&#236;&#242;&#233;&#224;'.encode('utf-8')]
         def f(n):
             x = strings[n]
+            if n:
+                errors = 'strict'
+            else:
+                errors = 'foo'
             # the annotation of y is SomeUnicodeString(can_be_None=False)
-            y, _ = str_decode_utf_8(x, len(x), 'strict')
+            y, _ = str_decode_utf_8(x, len(x), errors, errorhandler)
             return x.decode('utf-8') + y
 
         assert self.ll_to_string(self.interpret(f, [1])) == f(1)
_______________________________________________
pypy-commit mailing list
[email protected]
http://mail.python.org/mailman/listinfo/pypy-commit

[pypy-commit] pypy default: do the same with str_decode_utf_8

Reply via email to