Why not? We have only wide build, don't we?
On Sun, Jan 22, 2012 at 9:57 PM, amauryfa <[email protected]> wrote: > Author: Amaury Forgeot d'Arc <[email protected]> > Branch: merge-2.7.2 > Changeset: r51662:693b08144e00 > Date: 2012-01-22 20:24 +0100 > http://bitbucket.org/pypy/pypy/changeset/693b08144e00/ > > Log: Implement CPython issue5057: do not const-fold a unicode.__getitem__ > operation which returns a non-BMP character, this produces .pyc > files which depends on the unicode width > > diff --git a/pypy/interpreter/astcompiler/optimize.py > b/pypy/interpreter/astcompiler/optimize.py > --- a/pypy/interpreter/astcompiler/optimize.py > +++ b/pypy/interpreter/astcompiler/optimize.py > @@ -5,6 +5,7 @@ > from pypy.tool import stdlib_opcode as ops > from pypy.interpreter.error import OperationError > from pypy.rlib.unroll import unrolling_iterable > +from pypy.rlib.runicode import MAXUNICODE > > > def optimize_ast(space, tree, compile_info): > @@ -289,8 +290,30 @@ > w_idx = subs.slice.as_constant() > if w_idx is not None: > try: > - return ast.Const(self.space.getitem(w_obj, w_idx), > subs.lineno, subs.col_offset) > + w_const = self.space.getitem(w_obj, w_idx) > except OperationError: > - # Let exceptions propgate at runtime. > - pass > + # Let exceptions propagate at runtime. > + return subs > + > + # CPython issue5057: if v is unicode, there might > + # be differences between wide and narrow builds in > + # cases like u'\U00012345'[0]. > + # Wide builds will return a non-BMP char, whereas > + # narrow builds will return a surrogate. In both > + # the cases skip the optimization in order to > + # produce compatible pycs. > + if (self.space.isinstance_w(w_obj, self.space.w_unicode) > + and > + self.space.isinstance_w(w_const, > self.space.w_unicode)): > + unistr = self.space.unicode_w(w_const) > + if len(unistr) == 1: > + ch = ord(unistr[0]) > + else: > + ch = 0 > + if (ch > 0xFFFF or > + (MAXUNICODE == 0xFFFF and 0xD800 <= ch <= > OxDFFFF)): > + return subs > + > + return ast.Const(w_const, subs.lineno, subs.col_offset) > + > return subs > diff --git a/pypy/interpreter/astcompiler/test/test_compiler.py > b/pypy/interpreter/astcompiler/test/test_compiler.py > --- a/pypy/interpreter/astcompiler/test/test_compiler.py > +++ b/pypy/interpreter/astcompiler/test/test_compiler.py > @@ -838,6 +838,30 @@ > # Just checking this doesn't crash out > self.count_instructions(source) > > + def test_const_fold_unicode_subscr(self): > + source = """def f(): > + return u"abc"[0] > + """ > + counts = self.count_instructions(source) > + assert counts == {ops.LOAD_CONST: 1, ops.RETURN_VALUE: 1} > + > + # getitem outside of the BMP should not be optimized > + source = """def f(): > + return u"\U00012345"[0] > + """ > + counts = self.count_instructions(source) > + assert counts == {ops.LOAD_CONST: 2, ops.BINARY_SUBSCR: 1, > + ops.RETURN_VALUE: 1} > + > + # getslice is not yet optimized. > + # Still, check a case which yields the empty string. > + source = """def f(): > + return u"abc"[:0] > + """ > + counts = self.count_instructions(source) > + assert counts == {ops.LOAD_CONST: 2, ops.SLICE+2: 1, > + ops.RETURN_VALUE: 1} > + > def test_remove_dead_code(self): > source = """def f(x): > return 5 > _______________________________________________ > pypy-commit mailing list > [email protected] > http://mail.python.org/mailman/listinfo/pypy-commit _______________________________________________ pypy-dev mailing list [email protected] http://mail.python.org/mailman/listinfo/pypy-dev
