Author: Philip Jenvey <[email protected]>
Branch: py3k
Changeset: r60646:1b48d48dc26c
Date: 2013-01-28 17:08 -0800
http://bitbucket.org/pypy/pypy/changeset/1b48d48dc26c/
Log: cpython issue3297: fix parsing of surrogates w/ wide builds
diff --git a/pypy/interpreter/pyparser/parsestring.py
b/pypy/interpreter/pyparser/parsestring.py
--- a/pypy/interpreter/pyparser/parsestring.py
+++ b/pypy/interpreter/pyparser/parsestring.py
@@ -1,3 +1,4 @@
+# coding: utf-8
from pypy.interpreter.error import OperationError
from pypy.interpreter import unicodehelper
from rpython.rlib.rstring import StringBuilder
@@ -58,7 +59,10 @@
# latin-1; So multibyte sequences must be escaped.
lis = [] # using a list to assemble the value
end = q
- # Worst case: "\XX" may become "\u005c\uHHLL" (12 bytes)
+ # Worst case:
+ # "ä" (2 bytes) may become "\U000000E4" (10 bytes), or 1:5
+ # "\ä" (3 bytes) may become "\u005c\U000000E4" (16 bytes),
+ # or ~1:6
while ps < end:
if s[ps] == '\\':
lis.append(s[ps])
@@ -70,13 +74,15 @@
# instead.
lis.append("u005c")
if ord(s[ps]) & 0x80: # XXX inefficient
- w, ps = decode_utf8(space, s, ps, end, "utf-16-be")
+ w, ps = decode_utf8(space, s, ps, end, "utf-32-be")
rn = len(w)
- assert rn % 2 == 0
- for i in range(0, rn, 2):
- lis.append('\\u')
+ assert rn % 4 == 0
+ for i in range(0, rn, 4):
+ lis.append('\\U')
lis.append(hexbyte(ord(w[i])))
lis.append(hexbyte(ord(w[i+1])))
+ lis.append(hexbyte(ord(w[i+2])))
+ lis.append(hexbyte(ord(w[i+3])))
else:
lis.append(s[ps])
ps += 1
diff --git a/pypy/interpreter/test/test_exec.py
b/pypy/interpreter/test/test_exec.py
--- a/pypy/interpreter/test/test_exec.py
+++ b/pypy/interpreter/test/test_exec.py
@@ -199,3 +199,11 @@
x = ns['x']
assert len(x) == 6
assert ord(x[0]) == 0x0439
+
+ def test_issue3297(self):
+ c = compile("a, b = '\U0001010F', '\\U0001010F'", "dummy", "exec")
+ d = {}
+ exec(c, d)
+ assert d['a'] == d['b']
+ assert len(d['a']) == len(d['b'])
+ assert ascii(d['a']) == ascii(d['b'])
_______________________________________________
pypy-commit mailing list
[email protected]
http://mail.python.org/mailman/listinfo/pypy-commit