[pypy-commit] pypy py3k: The \u is no more an escape sequence in raw unicode strings:

amauryfa Mon, 03 Sep 2012 13:39:35 -0700

Author: Amaury Forgeot d'Arc <[email protected]>
Branch: py3k
Changeset: r57111:2851a02c2eba
Date: 2012-09-02 23:18 +0200
http://bitbucket.org/pypy/pypy/changeset/2851a02c2eba/


Log:    The \u is no more an escape sequence in raw unicode strings: r'\u'
        is a string of two characters.

diff --git a/pypy/interpreter/pyparser/parsestring.py 
b/pypy/interpreter/pyparser/parsestring.py
--- a/pypy/interpreter/pyparser/parsestring.py
+++ b/pypy/interpreter/pyparser/parsestring.py
@@ -48,7 +48,7 @@
                                         'unmatched triple quotes in literal')
         q -= 2
 
-    if unicode_literal: # XXX Py_UnicodeFlag is ignored for now
+    if unicode_literal and not rawmode: # XXX Py_UnicodeFlag is ignored for now
         if encoding is None or encoding == "iso-8859-1":
             # 'unicode_escape' expects latin-1 bytes, string is ready.
             buf = s
@@ -87,29 +87,19 @@
             bufq = len(buf)
         assert 0 <= bufp <= bufq
         substr = buf[bufp:bufq]
-        if rawmode:
-            v = unicodehelper.PyUnicode_DecodeRawUnicodeEscape(space, substr)
-        else:
-            v = unicodehelper.PyUnicode_DecodeUnicodeEscape(space, substr)
+        v = unicodehelper.PyUnicode_DecodeUnicodeEscape(space, substr)
         return space.wrap(v)
 
-    need_encoding = (encoding is not None and
-                     encoding != "utf-8" and encoding != "utf8" and
-                     encoding != "iso-8859-1")
     assert 0 <= ps <= q
     substr = s[ps : q]
-    if rawmode or '\\' not in s[ps:]:
-        if need_encoding:
-            w_u = space.wrap(unicodehelper.PyUnicode_DecodeUTF8(space, substr))
-            w_v = unicodehelper.PyUnicode_AsEncodedString(space, w_u, 
space.wrap(encoding))
-            return w_v
+    if rawmode or '\\' not in substr:
+        if not unicode_literal:
+            return space.wrapbytes(substr)
         else:
-            return space.wrapbytes(substr)
+            v = unicodehelper.PyUnicode_DecodeUTF8(space, substr)
+            return space.wrap(v)
 
-    enc = None
-    if need_encoding:
-         enc = encoding
-    v = PyString_DecodeEscape(space, substr, enc)
+    v = PyString_DecodeEscape(space, substr, encoding)
     return space.wrapbytes(v)
 
 def hexbyte(val):
diff --git a/pypy/interpreter/pyparser/test/test_parsestring.py 
b/pypy/interpreter/pyparser/test/test_parsestring.py
--- a/pypy/interpreter/pyparser/test/test_parsestring.py
+++ b/pypy/interpreter/pyparser/test/test_parsestring.py
@@ -62,6 +62,11 @@
         w_ret = parsestring.parsestr(space, None, "r'hi'")
         assert space.isinstance_w(w_ret, space.w_unicode)
 
+    def test_raw_unicode_literals(self):
+        space = self.space
+        w_ret = parsestring.parsestr(space, None, "r'\u'")
+        assert space.int_w(space.len(w_ret)) == 2
+
     def test_bytes(self):
         space = self.space
         b = "b'hello'"
_______________________________________________
pypy-commit mailing list
[email protected]
http://mail.python.org/mailman/listinfo/pypy-commit

[pypy-commit] pypy py3k: The \u is no more an escape sequence in raw unicode strings:

Reply via email to