Author: Hakan Ardo <ha...@debian.org> Branch: Changeset: r56200:942fa7ae1145 Date: 2012-07-18 22:40 +0200 http://bitbucket.org/pypy/pypy/changeset/942fa7ae1145/
Log: merge diff --git a/pypy/annotation/binaryop.py b/pypy/annotation/binaryop.py --- a/pypy/annotation/binaryop.py +++ b/pypy/annotation/binaryop.py @@ -7,7 +7,7 @@ from pypy.tool.pairtype import pair, pairtype from pypy.annotation.model import SomeObject, SomeInteger, SomeBool, s_Bool from pypy.annotation.model import SomeString, SomeChar, SomeList, SomeDict -from pypy.annotation.model import SomeUnicodeCodePoint +from pypy.annotation.model import SomeUnicodeCodePoint, SomeStringOrUnicode from pypy.annotation.model import SomeTuple, SomeImpossibleValue, s_ImpossibleValue from pypy.annotation.model import SomeInstance, SomeBuiltin, SomeIterator from pypy.annotation.model import SomePBC, SomeFloat, s_None @@ -470,30 +470,37 @@ "string formatting mixing strings and unicode not supported") -class __extend__(pairtype(SomeString, SomeTuple)): - def mod((str, s_tuple)): +class __extend__(pairtype(SomeString, SomeTuple), + pairtype(SomeUnicodeString, SomeTuple)): + def mod((s_string, s_tuple)): + is_string = isinstance(s_string, SomeString) + is_unicode = isinstance(s_string, SomeUnicodeString) + assert is_string or is_unicode for s_item in s_tuple.items: - if isinstance(s_item, (SomeUnicodeCodePoint, SomeUnicodeString)): + if (is_unicode and isinstance(s_item, (SomeChar, SomeString)) or + is_string and isinstance(s_item, (SomeUnicodeCodePoint, + SomeUnicodeString))): raise NotImplementedError( "string formatting mixing strings and unicode not supported") - getbookkeeper().count('strformat', str, s_tuple) - no_nul = str.no_nul + getbookkeeper().count('strformat', s_string, s_tuple) + no_nul = s_string.no_nul for s_item in s_tuple.items: if isinstance(s_item, SomeFloat): pass # or s_item is a subclass, like SomeInteger - elif isinstance(s_item, SomeString) and s_item.no_nul: + elif isinstance(s_item, SomeStringOrUnicode) and s_item.no_nul: pass else: no_nul = False break - return SomeString(no_nul=no_nul) + return s_string.__class__(no_nul=no_nul) -class __extend__(pairtype(SomeString, SomeObject)): +class __extend__(pairtype(SomeString, SomeObject), + pairtype(SomeUnicodeString, SomeObject)): - def mod((str, args)): - getbookkeeper().count('strformat', str, args) - return SomeString() + def mod((s_string, args)): + getbookkeeper().count('strformat', s_string, args) + return s_string.__class__() class __extend__(pairtype(SomeFloat, SomeFloat)): diff --git a/pypy/annotation/test/test_annrpython.py b/pypy/annotation/test/test_annrpython.py --- a/pypy/annotation/test/test_annrpython.py +++ b/pypy/annotation/test/test_annrpython.py @@ -3389,6 +3389,22 @@ s = a.build_types(f, [str]) assert isinstance(s, annmodel.SomeString) + def test_unicodeformatting(self): + def f(x): + return u'%s' % x + + a = self.RPythonAnnotator() + s = a.build_types(f, [unicode]) + assert isinstance(s, annmodel.SomeUnicodeString) + + def test_unicodeformatting_tuple(self): + def f(x): + return u'%s' % (x,) + + a = self.RPythonAnnotator() + s = a.build_types(f, [unicode]) + assert isinstance(s, annmodel.SomeUnicodeString) + def test_negative_slice(self): def f(s, e): diff --git a/pypy/doc/coding-guide.rst b/pypy/doc/coding-guide.rst --- a/pypy/doc/coding-guide.rst +++ b/pypy/doc/coding-guide.rst @@ -255,7 +255,12 @@ code if the translator can prove that they are non-negative. When slicing a string it is necessary to prove that the slice start and stop indexes are non-negative. There is no implicit str-to-unicode cast - anywhere. + anywhere. Simple string formatting using the ``%`` operator works, as long + as the format string is known at translation time; the only supported + formatting specifiers are ``%s``, ``%d``, ``%x``, ``%o``, ``%f``, plus + ``%r`` but only for user-defined instances. Modifiers such as conversion + flags, precision, length etc. are not supported. Moreover, it is forbidden + to mix unicode and strings when formatting. **tuples** diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -14,5 +14,11 @@ .. branch: nupypy-axis-arg-check Check that axis arg is valid in _numpypy +.. branch: iterator-in-rpython +.. branch: numpypy_count_nonzero +.. branch: even-more-jit-hooks + + .. "uninteresting" branches that we should just ignore for the whatsnew: .. branch: slightly-shorter-c +.. branch: better-enforceargs diff --git a/pypy/objspace/std/strutil.py b/pypy/objspace/std/strutil.py --- a/pypy/objspace/std/strutil.py +++ b/pypy/objspace/std/strutil.py @@ -185,4 +185,4 @@ try: return rstring_to_float(s) except ValueError: - raise ParseStringError("invalid literal for float()") + raise ParseStringError("invalid literal for float(): '%s'" % s) diff --git a/pypy/objspace/std/test/test_floatobject.py b/pypy/objspace/std/test/test_floatobject.py --- a/pypy/objspace/std/test/test_floatobject.py +++ b/pypy/objspace/std/test/test_floatobject.py @@ -441,6 +441,13 @@ b = A(5).real assert type(b) is float + def test_invalid_literal_message(self): + try: + float('abcdef') + except ValueError, e: + assert 'abcdef' in e.message + else: + assert False, 'did not raise' class AppTestFloatHex: def w_identical(self, x, y): diff --git a/pypy/rlib/objectmodel.py b/pypy/rlib/objectmodel.py --- a/pypy/rlib/objectmodel.py +++ b/pypy/rlib/objectmodel.py @@ -3,9 +3,11 @@ RPython-compliant way. """ +import py import sys import types import math +import inspect # specialize is a decorator factory for attaching _annspecialcase_ # attributes to functions: for example @@ -106,15 +108,68 @@ specialize = _Specialize() -def enforceargs(*args): +def enforceargs(*types, **kwds): """ Decorate a function with forcing of RPython-level types on arguments. None means no enforcing. - XXX shouldn't we also add asserts in function body? + When not translated, the type of the actual arguments are checked against + the enforced types every time the function is called. You can disable the + typechecking by passing ``typecheck=False`` to @enforceargs. """ + typecheck = kwds.pop('typecheck', True) + if kwds: + raise TypeError, 'got an unexpected keyword argument: %s' % kwds.keys() + if not typecheck: + def decorator(f): + f._annenforceargs_ = types + return f + return decorator + # + from pypy.annotation.signature import annotationoftype + from pypy.annotation.model import SomeObject def decorator(f): - f._annenforceargs_ = args - return f + def get_annotation(t): + if isinstance(t, SomeObject): + return t + return annotationoftype(t) + def typecheck(*args): + for i, (expected_type, arg) in enumerate(zip(types, args)): + if expected_type is None: + continue + s_expected = get_annotation(expected_type) + s_argtype = get_annotation(type(arg)) + if not s_expected.contains(s_argtype): + msg = "%s argument number %d must be of type %s" % ( + f.func_name, i+1, expected_type) + raise TypeError, msg + # + # we cannot simply wrap the function using *args, **kwds, because it's + # not RPython. Instead, we generate a function with exactly the same + # argument list + argspec = inspect.getargspec(f) + assert len(argspec.args) == len(types), ( + 'not enough types provided: expected %d, got %d' % + (len(types), len(argspec.args))) + assert not argspec.varargs, '*args not supported by enforceargs' + assert not argspec.keywords, '**kwargs not supported by enforceargs' + # + arglist = ', '.join(argspec.args) + src = py.code.Source(""" + def {name}({arglist}): + if not we_are_translated(): + typecheck({arglist}) + return {name}_original({arglist}) + """.format(name=f.func_name, arglist=arglist)) + # + mydict = {f.func_name + '_original': f, + 'typecheck': typecheck, + 'we_are_translated': we_are_translated} + exec src.compile() in mydict + result = mydict[f.func_name] + result.func_defaults = f.func_defaults + result.func_dict.update(f.func_dict) + result._annenforceargs_ = types + return result return decorator # ____________________________________________________________ diff --git a/pypy/rlib/rgc.py b/pypy/rlib/rgc.py --- a/pypy/rlib/rgc.py +++ b/pypy/rlib/rgc.py @@ -138,8 +138,8 @@ return hop.genop(opname, vlist, resulttype = hop.r_result.lowleveltype) @jit.oopspec('list.ll_arraycopy(source, dest, source_start, dest_start, length)') +@enforceargs(None, None, int, int, int) @specialize.ll() -@enforceargs(None, None, int, int, int) def ll_arraycopy(source, dest, source_start, dest_start, length): from pypy.rpython.lltypesystem.lloperation import llop from pypy.rlib.objectmodel import keepalive_until_here diff --git a/pypy/rlib/test/test_objectmodel.py b/pypy/rlib/test/test_objectmodel.py --- a/pypy/rlib/test/test_objectmodel.py +++ b/pypy/rlib/test/test_objectmodel.py @@ -420,9 +420,45 @@ def test_enforceargs_decorator(): @enforceargs(int, str, None) def f(a, b, c): - pass + return a, b, c + f.foo = 'foo' + assert f._annenforceargs_ == (int, str, None) + assert f.func_name == 'f' + assert f.foo == 'foo' + assert f(1, 'hello', 42) == (1, 'hello', 42) + exc = py.test.raises(TypeError, "f(1, 2, 3)") + assert exc.value.message == "f argument number 2 must be of type <type 'str'>" + py.test.raises(TypeError, "f('hello', 'world', 3)") + +def test_enforceargs_defaults(): + @enforceargs(int, int) + def f(a, b=40): + return a+b + assert f(2) == 42 + +def test_enforceargs_int_float_promotion(): + @enforceargs(float) + def f(x): + return x + # in RPython there is an implicit int->float promotion + assert f(42) == 42 + +def test_enforceargs_no_typecheck(): + @enforceargs(int, str, None, typecheck=False) + def f(a, b, c): + return a, b, c assert f._annenforceargs_ == (int, str, None) + assert f(1, 2, 3) == (1, 2, 3) # no typecheck + +def test_enforceargs_translates(): + from pypy.rpython.lltypesystem import lltype + @enforceargs(int, float) + def f(a, b): + return a, b + graph = getgraph(f, [int, int]) + TYPES = [v.concretetype for v in graph.getargs()] + assert TYPES == [lltype.Signed, lltype.Float] def getgraph(f, argtypes): from pypy.translator.translator import TranslationContext, graphof diff --git a/pypy/rpython/lltypesystem/rstr.py b/pypy/rpython/lltypesystem/rstr.py --- a/pypy/rpython/lltypesystem/rstr.py +++ b/pypy/rpython/lltypesystem/rstr.py @@ -1,5 +1,6 @@ from weakref import WeakValueDictionary from pypy.tool.pairtype import pairtype +from pypy.annotation import model as annmodel from pypy.rpython.error import TyperError from pypy.rlib.objectmodel import malloc_zero_filled, we_are_translated from pypy.rlib.objectmodel import _hash_string, enforceargs @@ -169,6 +170,13 @@ return result @jit.elidable + def ll_unicode(self, s): + if s: + return s + else: + return self.convert_const(u'None') + + @jit.elidable def ll_encode_latin1(self, s): length = len(s.chars) result = mallocstr(length) @@ -962,13 +970,18 @@ def do_stringformat(cls, hop, sourcevarsrepr): s_str = hop.args_s[0] assert s_str.is_constant() + is_unicode = isinstance(s_str, annmodel.SomeUnicodeString) + if is_unicode: + TEMPBUF = TEMP_UNICODE + else: + TEMPBUF = TEMP s = s_str.const things = cls.parse_fmt_string(s) size = inputconst(Signed, len(things)) # could be unsigned? - cTEMP = inputconst(Void, TEMP) + cTEMP = inputconst(Void, TEMPBUF) cflags = inputconst(Void, {'flavor': 'gc'}) vtemp = hop.genop("malloc_varsize", [cTEMP, cflags, size], - resulttype=Ptr(TEMP)) + resulttype=Ptr(TEMPBUF)) argsiter = iter(sourcevarsrepr) @@ -979,7 +992,13 @@ vitem, r_arg = argsiter.next() if not hasattr(r_arg, 'll_str'): raise TyperError("ll_str unsupported for: %r" % r_arg) - if code == 's' or (code == 'r' and isinstance(r_arg, InstanceRepr)): + if code == 's': + if is_unicode: + # only UniCharRepr and UnicodeRepr has it so far + vchunk = hop.gendirectcall(r_arg.ll_unicode, vitem) + else: + vchunk = hop.gendirectcall(r_arg.ll_str, vitem) + elif code == 'r' and isinstance(r_arg, InstanceRepr): vchunk = hop.gendirectcall(r_arg.ll_str, vitem) elif code == 'd': assert isinstance(r_arg, IntegerRepr) @@ -999,9 +1018,17 @@ else: raise TyperError, "%%%s is not RPython" % (code, ) else: - from pypy.rpython.lltypesystem.rstr import string_repr - vchunk = inputconst(string_repr, thing) + from pypy.rpython.lltypesystem.rstr import string_repr, unicode_repr + if is_unicode: + vchunk = inputconst(unicode_repr, thing) + else: + vchunk = inputconst(string_repr, thing) i = inputconst(Signed, i) + if is_unicode and vchunk.concretetype != Ptr(UNICODE): + # if we are here, one of the ll_str.* functions returned some + # STR, so we convert it to unicode. It's a bit suboptimal + # because we do one extra copy. + vchunk = hop.gendirectcall(cls.ll_str2unicode, vchunk) hop.genop('setarrayitem', [vtemp, i, vchunk]) hop.exception_cannot_occur() # to ignore the ZeroDivisionError of '%' @@ -1009,6 +1036,7 @@ do_stringformat = classmethod(do_stringformat) TEMP = GcArray(Ptr(STR)) +TEMP_UNICODE = GcArray(Ptr(UNICODE)) # ____________________________________________________________ diff --git a/pypy/rpython/ootypesystem/rstr.py b/pypy/rpython/ootypesystem/rstr.py --- a/pypy/rpython/ootypesystem/rstr.py +++ b/pypy/rpython/ootypesystem/rstr.py @@ -1,4 +1,5 @@ from pypy.tool.pairtype import pairtype +from pypy.annotation import model as annmodel from pypy.rlib.rarithmetic import ovfcheck from pypy.rpython.error import TyperError from pypy.rpython.rstr import AbstractStringRepr,AbstractCharRepr,\ @@ -79,6 +80,12 @@ sb.ll_append_char(cast_primitive(Char, c)) return sb.ll_build() + def ll_unicode(self, s): + if s: + return s + else: + return self.convert_const(u'None') + def ll_encode_latin1(self, value): sb = ootype.new(ootype.StringBuilder) length = value.ll_strlen() @@ -312,6 +319,7 @@ string_repr = hop.rtyper.type_system.rstr.string_repr s_str = hop.args_s[0] assert s_str.is_constant() + is_unicode = isinstance(s_str, annmodel.SomeUnicodeString) s = s_str.const c_append = hop.inputconst(ootype.Void, 'll_append') @@ -320,8 +328,15 @@ c8 = hop.inputconst(ootype.Signed, 8) c10 = hop.inputconst(ootype.Signed, 10) c16 = hop.inputconst(ootype.Signed, 16) - c_StringBuilder = hop.inputconst(ootype.Void, ootype.StringBuilder) - v_buf = hop.genop("new", [c_StringBuilder], resulttype=ootype.StringBuilder) + if is_unicode: + StringBuilder = ootype.UnicodeBuilder + RESULT = ootype.Unicode + else: + StringBuilder = ootype.StringBuilder + RESULT = ootype.String + + c_StringBuilder = hop.inputconst(ootype.Void, StringBuilder) + v_buf = hop.genop("new", [c_StringBuilder], resulttype=StringBuilder) things = cls.parse_fmt_string(s) argsiter = iter(sourcevarsrepr) @@ -331,7 +346,12 @@ vitem, r_arg = argsiter.next() if not hasattr(r_arg, 'll_str'): raise TyperError("ll_str unsupported for: %r" % r_arg) - if code == 's' or (code == 'r' and isinstance(r_arg, InstanceRepr)): + if code == 's': + if is_unicode: + vchunk = hop.gendirectcall(r_arg.ll_unicode, vitem) + else: + vchunk = hop.gendirectcall(r_arg.ll_str, vitem) + elif code == 'r' and isinstance(r_arg, InstanceRepr): vchunk = hop.gendirectcall(r_arg.ll_str, vitem) elif code == 'd': assert isinstance(r_arg, IntegerRepr) @@ -348,13 +368,19 @@ else: raise TyperError, "%%%s is not RPython" % (code, ) else: - vchunk = hop.inputconst(string_repr, thing) - #i = inputconst(Signed, i) - #hop.genop('setarrayitem', [vtemp, i, vchunk]) + if is_unicode: + vchunk = hop.inputconst(unicode_repr, thing) + else: + vchunk = hop.inputconst(string_repr, thing) + if is_unicode and vchunk.concretetype != ootype.Unicode: + # if we are here, one of the ll_str.* functions returned some + # STR, so we convert it to unicode. It's a bit suboptimal + # because we do one extra copy. + vchunk = hop.gendirectcall(cls.ll_str2unicode, vchunk) hop.genop('oosend', [c_append, v_buf, vchunk], resulttype=ootype.Void) hop.exception_cannot_occur() # to ignore the ZeroDivisionError of '%' - return hop.genop('oosend', [c_build, v_buf], resulttype=ootype.String) + return hop.genop('oosend', [c_build, v_buf], resulttype=RESULT) do_stringformat = classmethod(do_stringformat) diff --git a/pypy/rpython/rpbc.py b/pypy/rpython/rpbc.py --- a/pypy/rpython/rpbc.py +++ b/pypy/rpython/rpbc.py @@ -11,7 +11,7 @@ mangle, inputdesc, warning, impossible_repr from pypy.rpython import rclass from pypy.rpython import robject -from pypy.rpython.annlowlevel import llstr +from pypy.rpython.annlowlevel import llstr, llunicode from pypy.rpython import callparse diff --git a/pypy/rpython/rstr.py b/pypy/rpython/rstr.py --- a/pypy/rpython/rstr.py +++ b/pypy/rpython/rstr.py @@ -483,6 +483,8 @@ # xxx suboptimal, maybe return str(unicode(ch)) + def ll_unicode(self, ch): + return unicode(ch) class __extend__(AbstractCharRepr, AbstractUniCharRepr): diff --git a/pypy/rpython/test/test_runicode.py b/pypy/rpython/test/test_runicode.py --- a/pypy/rpython/test/test_runicode.py +++ b/pypy/rpython/test/test_runicode.py @@ -1,3 +1,4 @@ +# -*- encoding: utf-8 -*- from pypy.rpython.lltypesystem.lltype import malloc from pypy.rpython.lltypesystem.rstr import LLHelpers, UNICODE @@ -194,7 +195,16 @@ assert self.interpret(fn, [u'(']) == False assert self.interpret(fn, [u'\u1058']) == False assert self.interpret(fn, [u'X']) == True - + + def test_strformat_unicode_arg(self): + const = self.const + def percentS(s): + return const("before %s after") % (s,) + # + res = self.interpret(percentS, [const(u'à')]) + assert self.ll_to_string(res) == const(u'before à after') + # + def unsupported(self): py.test.skip("not supported") @@ -202,12 +212,6 @@ test_upper = unsupported test_lower = unsupported test_splitlines = unsupported - test_strformat = unsupported - test_strformat_instance = unsupported - test_strformat_nontuple = unsupported - test_percentformat_instance = unsupported - test_percentformat_tuple = unsupported - test_percentformat_list = unsupported test_int = unsupported test_int_valueerror = unsupported test_float = unsupported _______________________________________________ pypy-commit mailing list pypy-commit@python.org http://mail.python.org/mailman/listinfo/pypy-commit