Author: Philip Jenvey <[email protected]>
Branch: py3k
Changeset: r69876:d3304f165e53
Date: 2014-03-11 16:44 -0700
http://bitbucket.org/pypy/pypy/changeset/d3304f165e53/

Log:    readd support for u'' literals (pep 414). this doesn't land until
        CPython 3.3 but it may allow some code targetting 3.3 to run on our
        current 3.2 target. it also eases syncing test cases from the
        default branch

diff --git a/pypy/interpreter/pyparser/dfa_generated.py 
b/pypy/interpreter/pyparser/dfa_generated.py
--- a/pypy/interpreter/pyparser/dfa_generated.py
+++ b/pypy/interpreter/pyparser/dfa_generated.py
@@ -27,7 +27,7 @@
      'I': 1, 'J': 1, 'K': 1, 'L': 1,
      'M': 1, 'N': 1, 'O': 1, 'P': 1,
      'Q': 1, 'R': 3, 'S': 1, 'T': 1,
-     'U': 1, 'V': 1, 'W': 1, 'X': 1,
+     'U': 3, 'V': 1, 'W': 1, 'X': 1,
      'Y': 1, 'Z': 1, '[': 14, '\\': 18,
      ']': 14, '^': 13, '_': 1, '`': 14,
      'a': 1, 'b': 2, 'c': 1, 'd': 1,
@@ -35,7 +35,7 @@
      'i': 1, 'j': 1, 'k': 1, 'l': 1,
      'm': 1, 'n': 1, 'o': 1, 'p': 1,
      'q': 1, 'r': 3, 's': 1, 't': 1,
-     'u': 1, 'v': 1, 'w': 1, 'x': 1,
+     'u': 3, 'v': 1, 'w': 1, 'x': 1,
      'y': 1, 'z': 1, '{': 14, '|': 13,
      '}': 14, '~': 14, '\x80': 1},
     # 1
diff --git a/pypy/interpreter/pyparser/gendfa.py 
b/pypy/interpreter/pyparser/gendfa.py
--- a/pypy/interpreter/pyparser/gendfa.py
+++ b/pypy/interpreter/pyparser/gendfa.py
@@ -149,9 +149,11 @@
     funny = group(states, operator, bracket, special)
     # ____________________________________________________________
     def makeStrPrefix ():
-        return chain(states,
-                     maybe(states, groupStr(states, "bB")),
-                     maybe(states, groupStr(states, "rR")))
+        return group(states,
+                     chain(states,
+                           maybe(states, groupStr(states, "bB")),
+                           maybe(states, groupStr(states, "rR"))),
+                     maybe(states, groupStr(states, "uU")))
     # ____________________________________________________________
     contStr = group(states,
                     chain(states,
diff --git a/pypy/interpreter/pyparser/parsestring.py 
b/pypy/interpreter/pyparser/parsestring.py
--- a/pypy/interpreter/pyparser/parsestring.py
+++ b/pypy/interpreter/pyparser/parsestring.py
@@ -21,13 +21,18 @@
     quote = s[ps]
     rawmode = False
     unicode_literal = True
+    saw_u = False
 
     # string decoration handling
     if quote == 'b' or quote == 'B':
         ps += 1
         quote = s[ps]
         unicode_literal = False
-    if quote == 'r' or quote == 'R':
+    elif quote == 'u' or quote == 'U':
+        ps += 1
+        quote = s[ps]
+        saw_u = True
+    if not saw_u and quote == 'r' or quote == 'R':
         ps += 1
         quote = s[ps]
         rawmode = True
diff --git a/pypy/interpreter/pyparser/pytokenize.py 
b/pypy/interpreter/pyparser/pytokenize.py
--- a/pypy/interpreter/pyparser/pytokenize.py
+++ b/pypy/interpreter/pyparser/pytokenize.py
@@ -25,6 +25,8 @@
            '"' : doubleDFA,
            'r' : None,
            'R' : None,
+           "u" : None,
+           "U" : None,
            'b' : None,
            'B' : None}
 
@@ -33,6 +35,8 @@
         prefix = uniPrefix + rawPrefix
         endDFAs[prefix + "'''"] = single3DFA
         endDFAs[prefix + '"""'] = double3DFA
+endDFAs["u'''"] = single3DFA
+endDFAs['U"""'] = double3DFA
 
 whiteSpaceStatesAccepts = [True]
 whiteSpaceStates = [{'\t': 0, ' ': 0, '\x0c': 0}]
@@ -44,6 +48,7 @@
 triple_quoted = {}
 for t in ("'''", '"""',
           "r'''", 'r"""', "R'''", 'R"""',
+          "u'''", 'u"""', "U'''", 'U"""',
           "b'''", 'b"""', "B'''", 'B"""',
           "br'''", 'br"""', "Br'''", 'Br"""',
           "bR'''", 'bR"""', "BR'''", 'BR"""'):
@@ -51,6 +56,7 @@
 single_quoted = {}
 for t in ("'", '"',
           "r'", 'r"', "R'", 'R"',
+          "u'", 'u"', "U'", 'U"',
           "b'", 'b"', "B'", 'B"',
           "br'", 'br"', "Br'", 'Br"',
           "bR'", 'bR"', "BR'", 'BR"'):
diff --git a/pypy/interpreter/pyparser/test/test_parsestring.py 
b/pypy/interpreter/pyparser/test/test_parsestring.py
--- a/pypy/interpreter/pyparser/test/test_parsestring.py
+++ b/pypy/interpreter/pyparser/test/test_parsestring.py
@@ -62,6 +62,19 @@
         ret = space.unwrap(w_ret)
         assert ret == eval("# -*- coding: koi8-u -*-\nu'\x81'")
 
+    def test_unicode_pep414(self):
+        space = self.space
+        for s in [u'hello world', u'hello\n world']:
+            self.parse_and_compare(repr(s), unicode(s))
+
+        self.parse_and_compare("u'''hello\\x42 world'''",
+                               u'hello\x42 world')
+        self.parse_and_compare("u'''hello\\u0842 world'''",
+                               u'hello\u0842 world')
+
+        space.raises_w(space.w_ValueError,
+                       parsestring.parsestr, space, None, "ur'foo'")
+
     def test_unicode_literals(self):
         space = self.space
         w_ret = parsestring.parsestr(space, None, repr("hello"))
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to