Author: Antonio Cuni <[email protected]>
Branch:
Changeset: r57055:aad4c9d57f01
Date: 2012-08-31 15:07 +0200
http://bitbucket.org/pypy/pypy/changeset/aad4c9d57f01/
Log: do the same with str_decode_utf_8
diff --git a/pypy/rpython/lltypesystem/rstr.py
b/pypy/rpython/lltypesystem/rstr.py
--- a/pypy/rpython/lltypesystem/rstr.py
+++ b/pypy/rpython/lltypesystem/rstr.py
@@ -132,11 +132,9 @@
CACHE = CONST_STR_CACHE
def __init__(self, *args):
- from pypy.rlib.runicode import str_decode_utf_8
AbstractStringRepr.__init__(self, *args)
self.ll = LLHelpers
self.malloc = mallocstr
- self.rstr_decode_utf_8 = func_with_new_name(str_decode_utf_8,
'rstr_decode_utf_8')
def ll_decode_latin1(self, value):
lgt = len(value.chars)
@@ -145,13 +143,6 @@
s.chars[i] = cast_primitive(UniChar, value.chars[i])
return s
- def ll_decode_utf8(self, llvalue):
- from pypy.rpython.annlowlevel import hlstr, llunicode
- value = hlstr(llvalue)
- assert value is not None
- univalue, _ = self.rstr_decode_utf_8(value, len(value), 'strict')
- return llunicode(univalue)
-
class UnicodeRepr(BaseLLStringRepr, AbstractUnicodeRepr):
lowleveltype = Ptr(UNICODE)
basetype = basestring
@@ -159,7 +150,6 @@
CACHE = CONST_UNICODE_CACHE
def __init__(self, *args):
- from pypy.rlib.runicode import unicode_encode_utf_8
AbstractUnicodeRepr.__init__(self, *args)
self.ll = LLHelpers
self.malloc = mallocunicode
@@ -285,7 +275,7 @@
class LLHelpers(AbstractLLHelpers):
- from pypy.rpython.annlowlevel import llstr
+ from pypy.rpython.annlowlevel import llstr, llunicode
@jit.elidable
def ll_str_mul(s, times):
diff --git a/pypy/rpython/ootypesystem/rstr.py
b/pypy/rpython/ootypesystem/rstr.py
--- a/pypy/rpython/ootypesystem/rstr.py
+++ b/pypy/rpython/ootypesystem/rstr.py
@@ -60,14 +60,6 @@
sb.ll_append_char(cast_primitive(UniChar, c))
return sb.ll_build()
- def ll_decode_utf8(self, llvalue):
- from pypy.rpython.annlowlevel import hlstr, oounicode
- from pypy.rlib.runicode import str_decode_utf_8
- value = hlstr(llvalue)
- assert value is not None
- univalue, _ = str_decode_utf_8(value, len(value), 'strict')
- return oounicode(univalue)
-
class UnicodeRepr(BaseOOStringRepr, AbstractUnicodeRepr):
lowleveltype = ootype.Unicode
@@ -126,7 +118,7 @@
class LLHelpers(AbstractLLHelpers):
- from pypy.rpython.annlowlevel import oostr as llstr
+ from pypy.rpython.annlowlevel import oostr as llstr, oounicode as llunicode
def ll_chr2str(ch):
return ootype.oostring(ch, -1)
diff --git a/pypy/rpython/rstr.py b/pypy/rpython/rstr.py
--- a/pypy/rpython/rstr.py
+++ b/pypy/rpython/rstr.py
@@ -3,6 +3,7 @@
from pypy.tool.sourcetools import func_with_new_name
from pypy.annotation import model as annmodel
from pypy.rlib import jit
+from pypy.rlib.nonconst import NonConstant
from pypy.rpython.error import TyperError
from pypy.rpython.rmodel import IntegerRepr, IteratorRepr
from pypy.rpython.rmodel import inputconst, Repr
@@ -12,7 +13,22 @@
cast_primitive, typeOf
class AbstractStringRepr(Repr):
- pass
+
+ def __init__(self, *args):
+ from pypy.rlib.runicode import str_decode_utf_8,
raise_unicode_exception_decode
+ Repr.__init__(self, *args)
+ self.rstr_decode_utf_8 = func_with_new_name(str_decode_utf_8,
+ 'rstr_decode_utf_8')
+ self.rraise_unicode_exception_decode = func_with_new_name(
+ raise_unicode_exception_decode, 'rraise_unicode_exception_decode')
+
+ @jit.elidable
+ def ll_decode_utf8(self, llvalue):
+ from pypy.rpython.annlowlevel import hlstr
+ value = hlstr(llvalue)
+ assert value is not None
+ univalue, _ = self.rstr_decode_utf_8(value, len(value), 'strict')
+ return self.ll.llunicode(univalue)
class AbstractCharRepr(AbstractStringRepr):
pass
diff --git a/pypy/rpython/test/test_runicode.py
b/pypy/rpython/test/test_runicode.py
--- a/pypy/rpython/test/test_runicode.py
+++ b/pypy/rpython/test/test_runicode.py
@@ -158,11 +158,19 @@
def test_utf_8_decoding_annotation(self):
from pypy.rlib.runicode import str_decode_utf_8
+ def errorhandler(errors, encoding, msg, s,
+ startingpos, endingpos):
+ raise UnicodeDecodeError(encoding, s, startingpos, endingpos, msg)
+
strings = [u'àèì'.encode('utf-8'),
u'ìòéà'.encode('utf-8')]
def f(n):
x = strings[n]
+ if n:
+ errors = 'strict'
+ else:
+ errors = 'foo'
# the annotation of y is SomeUnicodeString(can_be_None=False)
- y, _ = str_decode_utf_8(x, len(x), 'strict')
+ y, _ = str_decode_utf_8(x, len(x), errors, errorhandler)
return x.decode('utf-8') + y
assert self.ll_to_string(self.interpret(f, [1])) == f(1)
_______________________________________________
pypy-commit mailing list
[email protected]
http://mail.python.org/mailman/listinfo/pypy-commit