Author: Philip Jenvey <[email protected]>
Branch: py3k
Changeset: r69876:d3304f165e53
Date: 2014-03-11 16:44 -0700
http://bitbucket.org/pypy/pypy/changeset/d3304f165e53/
Log: readd support for u'' literals (pep 414). this doesn't land until
CPython 3.3 but it may allow some code targetting 3.3 to run on our
current 3.2 target. it also eases syncing test cases from the
default branch
diff --git a/pypy/interpreter/pyparser/dfa_generated.py
b/pypy/interpreter/pyparser/dfa_generated.py
--- a/pypy/interpreter/pyparser/dfa_generated.py
+++ b/pypy/interpreter/pyparser/dfa_generated.py
@@ -27,7 +27,7 @@
'I': 1, 'J': 1, 'K': 1, 'L': 1,
'M': 1, 'N': 1, 'O': 1, 'P': 1,
'Q': 1, 'R': 3, 'S': 1, 'T': 1,
- 'U': 1, 'V': 1, 'W': 1, 'X': 1,
+ 'U': 3, 'V': 1, 'W': 1, 'X': 1,
'Y': 1, 'Z': 1, '[': 14, '\\': 18,
']': 14, '^': 13, '_': 1, '`': 14,
'a': 1, 'b': 2, 'c': 1, 'd': 1,
@@ -35,7 +35,7 @@
'i': 1, 'j': 1, 'k': 1, 'l': 1,
'm': 1, 'n': 1, 'o': 1, 'p': 1,
'q': 1, 'r': 3, 's': 1, 't': 1,
- 'u': 1, 'v': 1, 'w': 1, 'x': 1,
+ 'u': 3, 'v': 1, 'w': 1, 'x': 1,
'y': 1, 'z': 1, '{': 14, '|': 13,
'}': 14, '~': 14, '\x80': 1},
# 1
diff --git a/pypy/interpreter/pyparser/gendfa.py
b/pypy/interpreter/pyparser/gendfa.py
--- a/pypy/interpreter/pyparser/gendfa.py
+++ b/pypy/interpreter/pyparser/gendfa.py
@@ -149,9 +149,11 @@
funny = group(states, operator, bracket, special)
# ____________________________________________________________
def makeStrPrefix ():
- return chain(states,
- maybe(states, groupStr(states, "bB")),
- maybe(states, groupStr(states, "rR")))
+ return group(states,
+ chain(states,
+ maybe(states, groupStr(states, "bB")),
+ maybe(states, groupStr(states, "rR"))),
+ maybe(states, groupStr(states, "uU")))
# ____________________________________________________________
contStr = group(states,
chain(states,
diff --git a/pypy/interpreter/pyparser/parsestring.py
b/pypy/interpreter/pyparser/parsestring.py
--- a/pypy/interpreter/pyparser/parsestring.py
+++ b/pypy/interpreter/pyparser/parsestring.py
@@ -21,13 +21,18 @@
quote = s[ps]
rawmode = False
unicode_literal = True
+ saw_u = False
# string decoration handling
if quote == 'b' or quote == 'B':
ps += 1
quote = s[ps]
unicode_literal = False
- if quote == 'r' or quote == 'R':
+ elif quote == 'u' or quote == 'U':
+ ps += 1
+ quote = s[ps]
+ saw_u = True
+ if not saw_u and quote == 'r' or quote == 'R':
ps += 1
quote = s[ps]
rawmode = True
diff --git a/pypy/interpreter/pyparser/pytokenize.py
b/pypy/interpreter/pyparser/pytokenize.py
--- a/pypy/interpreter/pyparser/pytokenize.py
+++ b/pypy/interpreter/pyparser/pytokenize.py
@@ -25,6 +25,8 @@
'"' : doubleDFA,
'r' : None,
'R' : None,
+ "u" : None,
+ "U" : None,
'b' : None,
'B' : None}
@@ -33,6 +35,8 @@
prefix = uniPrefix + rawPrefix
endDFAs[prefix + "'''"] = single3DFA
endDFAs[prefix + '"""'] = double3DFA
+endDFAs["u'''"] = single3DFA
+endDFAs['U"""'] = double3DFA
whiteSpaceStatesAccepts = [True]
whiteSpaceStates = [{'\t': 0, ' ': 0, '\x0c': 0}]
@@ -44,6 +48,7 @@
triple_quoted = {}
for t in ("'''", '"""',
"r'''", 'r"""', "R'''", 'R"""',
+ "u'''", 'u"""', "U'''", 'U"""',
"b'''", 'b"""', "B'''", 'B"""',
"br'''", 'br"""', "Br'''", 'Br"""',
"bR'''", 'bR"""', "BR'''", 'BR"""'):
@@ -51,6 +56,7 @@
single_quoted = {}
for t in ("'", '"',
"r'", 'r"', "R'", 'R"',
+ "u'", 'u"', "U'", 'U"',
"b'", 'b"', "B'", 'B"',
"br'", 'br"', "Br'", 'Br"',
"bR'", 'bR"', "BR'", 'BR"'):
diff --git a/pypy/interpreter/pyparser/test/test_parsestring.py
b/pypy/interpreter/pyparser/test/test_parsestring.py
--- a/pypy/interpreter/pyparser/test/test_parsestring.py
+++ b/pypy/interpreter/pyparser/test/test_parsestring.py
@@ -62,6 +62,19 @@
ret = space.unwrap(w_ret)
assert ret == eval("# -*- coding: koi8-u -*-\nu'\x81'")
+ def test_unicode_pep414(self):
+ space = self.space
+ for s in [u'hello world', u'hello\n world']:
+ self.parse_and_compare(repr(s), unicode(s))
+
+ self.parse_and_compare("u'''hello\\x42 world'''",
+ u'hello\x42 world')
+ self.parse_and_compare("u'''hello\\u0842 world'''",
+ u'hello\u0842 world')
+
+ space.raises_w(space.w_ValueError,
+ parsestring.parsestr, space, None, "ur'foo'")
+
def test_unicode_literals(self):
space = self.space
w_ret = parsestring.parsestr(space, None, repr("hello"))
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit