Author: Amaury Forgeot d'Arc <amaur...@gmail.com> Branch: merge-2.7.2 Changeset: r51662:693b08144e00 Date: 2012-01-22 20:24 +0100 http://bitbucket.org/pypy/pypy/changeset/693b08144e00/
Log: Implement CPython issue5057: do not const-fold a unicode.__getitem__ operation which returns a non-BMP character, this produces .pyc files which depends on the unicode width diff --git a/pypy/interpreter/astcompiler/optimize.py b/pypy/interpreter/astcompiler/optimize.py --- a/pypy/interpreter/astcompiler/optimize.py +++ b/pypy/interpreter/astcompiler/optimize.py @@ -5,6 +5,7 @@ from pypy.tool import stdlib_opcode as ops from pypy.interpreter.error import OperationError from pypy.rlib.unroll import unrolling_iterable +from pypy.rlib.runicode import MAXUNICODE def optimize_ast(space, tree, compile_info): @@ -289,8 +290,30 @@ w_idx = subs.slice.as_constant() if w_idx is not None: try: - return ast.Const(self.space.getitem(w_obj, w_idx), subs.lineno, subs.col_offset) + w_const = self.space.getitem(w_obj, w_idx) except OperationError: - # Let exceptions propgate at runtime. - pass + # Let exceptions propagate at runtime. + return subs + + # CPython issue5057: if v is unicode, there might + # be differences between wide and narrow builds in + # cases like u'\U00012345'[0]. + # Wide builds will return a non-BMP char, whereas + # narrow builds will return a surrogate. In both + # the cases skip the optimization in order to + # produce compatible pycs. + if (self.space.isinstance_w(w_obj, self.space.w_unicode) + and + self.space.isinstance_w(w_const, self.space.w_unicode)): + unistr = self.space.unicode_w(w_const) + if len(unistr) == 1: + ch = ord(unistr[0]) + else: + ch = 0 + if (ch > 0xFFFF or + (MAXUNICODE == 0xFFFF and 0xD800 <= ch <= OxDFFFF)): + return subs + + return ast.Const(w_const, subs.lineno, subs.col_offset) + return subs diff --git a/pypy/interpreter/astcompiler/test/test_compiler.py b/pypy/interpreter/astcompiler/test/test_compiler.py --- a/pypy/interpreter/astcompiler/test/test_compiler.py +++ b/pypy/interpreter/astcompiler/test/test_compiler.py @@ -838,6 +838,30 @@ # Just checking this doesn't crash out self.count_instructions(source) + def test_const_fold_unicode_subscr(self): + source = """def f(): + return u"abc"[0] + """ + counts = self.count_instructions(source) + assert counts == {ops.LOAD_CONST: 1, ops.RETURN_VALUE: 1} + + # getitem outside of the BMP should not be optimized + source = """def f(): + return u"\U00012345"[0] + """ + counts = self.count_instructions(source) + assert counts == {ops.LOAD_CONST: 2, ops.BINARY_SUBSCR: 1, + ops.RETURN_VALUE: 1} + + # getslice is not yet optimized. + # Still, check a case which yields the empty string. + source = """def f(): + return u"abc"[:0] + """ + counts = self.count_instructions(source) + assert counts == {ops.LOAD_CONST: 2, ops.SLICE+2: 1, + ops.RETURN_VALUE: 1} + def test_remove_dead_code(self): source = """def f(x): return 5 _______________________________________________ pypy-commit mailing list pypy-commit@python.org http://mail.python.org/mailman/listinfo/pypy-commit