[pypy-commit] pypy py3k: Issue1262: str.repr now passes all printable characters.

amauryfa Mon, 24 Sep 2012 14:47:26 -0700

Author: Amaury Forgeot d'Arc <[email protected]>
Branch: py3k
Changeset: r57533:a2b494570133
Date: 2012-09-23 22:38 +0200
http://bitbucket.org/pypy/pypy/changeset/a2b494570133/


Log:    Issue1262: str.__repr__ now passes all printable characters. Patch
        by arielby.

diff --git a/pypy/objspace/std/test/test_unicodeobject.py 
b/pypy/objspace/std/test/test_unicodeobject.py
--- a/pypy/objspace/std/test/test_unicodeobject.py
+++ b/pypy/objspace/std/test/test_unicodeobject.py
@@ -556,9 +556,8 @@
         raises(UnicodeError, b"\xc2".decode, "utf-8")
         assert b'\xe1\x80'.decode('utf-8', 'replace') == "\ufffd"
 
-    def test_repr_bug(self):
-        # we need to implement PEP 3138 for this to work
-        # http://www.python.org/dev/peps/pep-3138/
+    def test_repr_printable(self):
+        # PEP 3138: __repr__ respects printable characters.
         x = '\u027d'
         y = "'\u027d'"
         assert (repr(x) == y)
diff --git a/pypy/objspace/std/unicodeobject.py 
b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -12,7 +12,8 @@
 from pypy.rlib.objectmodel import compute_hash, specialize
 from pypy.rlib.objectmodel import compute_unique_id
 from pypy.rlib.rstring import UnicodeBuilder
-from pypy.rlib.runicode import unicode_encode_unicode_escape
+from pypy.rlib.runicode import (
+    unicode_encode_unicode_escape, unicode_escape_nonprintable)
 from pypy.module.unicodedata import unicodedb
 from pypy.tool.sourcetools import func_with_new_name
 from pypy.rlib import jit
@@ -857,7 +858,7 @@
 def repr__Unicode(space, w_unicode):
     chars = w_unicode._value
     size = len(chars)
-    s = unicode_encode_unicode_escape(chars, size, "strict", quotes=True)
+    s = unicode_escape_nonprintable(chars, size, "strict", quotes=True)
     return space.wrap(s)
 
 def mod__Unicode_ANY(space, w_format, w_values):
diff --git a/pypy/rlib/runicode.py b/pypy/rlib/runicode.py
--- a/pypy/rlib/runicode.py
+++ b/pypy/rlib/runicode.py
@@ -1210,28 +1210,37 @@
 
     return builder.build(), pos
 
-def make_unicode_escape_function():
+def make_unicode_escape_function(for_repr=False):
     # Python3 has two similar escape functions: One to implement
     # encode('unicode_escape') and which outputs bytes, and unicode.__repr__
     # which outputs unicode.  They cannot share RPython code, so we generate
     # them with the template below.
-    # Python2 does not really need this, but it reduces diffs between branches.
+
+    if for_repr:
+        STRING_BUILDER = UnicodeBuilder
+        STR = unicode
+        CHR = UNICHR
+    else:
+        STRING_BUILDER = StringBuilder
+        STR = str
+        CHR = chr
+
     def unicode_escape(s, size, errors, errorhandler=None, quotes=False):
         # errorhandler is not used: this function cannot cause Unicode errors
-        result = StringBuilder(size)
+        result = STRING_BUILDER(size)
 
         if quotes:
             if s.find(u'\'') != -1 and s.find(u'\"') == -1:
                 quote = ord('\"')
-                result.append('"')
+                result.append(STR('"'))
             else:
                 quote = ord('\'')
-                result.append('\'')
+                result.append(STR('\''))
         else:
             quote = 0
 
             if size == 0:
-                return ''
+                return STR('')
 
         pos = 0
         while pos < size:
@@ -1240,8 +1249,8 @@
 
             # Escape quotes
             if quotes and (oc == quote or ch == '\\'):
-                result.append('\\')
-                result.append(chr(oc))
+                result.append(STR('\\'))
+                result.append(CHR(oc))
                 pos += 1
                 continue
 
@@ -1256,7 +1265,7 @@
 
                 if 0xDC00 <= oc2 <= 0xDFFF:
                     ucs = (((oc & 0x03FF) << 10) | (oc2 & 0x03FF)) + 0x00010000
-                    raw_unicode_escape_helper(result, ucs)
+                    char_escape_helper(result, ucs)
                     pos += 1
                     continue
                 # Fall through: isolated surrogates are copied as-is
@@ -1264,48 +1273,54 @@
 
             # Map special whitespace to '\t', \n', '\r'
             if ch == '\t':
-                result.append('\\t')
+                result.append(STR('\\t'))
             elif ch == '\n':
-                result.append('\\n')
+                result.append(STR('\\n'))
             elif ch == '\r':
-                result.append('\\r')
+                result.append(STR('\\r'))
             elif ch == '\\':
-                result.append('\\\\')
+                result.append(STR('\\\\'))
 
             # Map non-printable or non-ascii to '\xhh' or '\uhhhh'
-            elif oc < 32 or oc >= 0x7F:
-                raw_unicode_escape_helper(result, oc)
+            elif for_repr and not unicodedb.isprintable(oc):
+                char_escape_helper(result, oc)
+            elif not for_repr and (oc < 32 or oc >= 0x7F):
+                char_escape_helper(result, oc)
 
             # Copy everything else as-is
             else:
-                result.append(chr(oc))
+                result.append(CHR(oc))
             pos += 1
 
         if quotes:
-            result.append(chr(quote))
+            result.append(CHR(quote))
         return result.build()
 
     def char_escape_helper(result, char):
         num = hex(char)
+        if STR is unicode:
+            num = num.decode('ascii')
         if char >= 0x10000:
-            result.append("\\U")
+            result.append(STR("\\U"))
             zeros = 8
         elif char >= 0x100:
-            result.append("\\u")
+            result.append(STR("\\u"))
             zeros = 4
         else:
-            result.append("\\x")
+            result.append(STR("\\x"))
             zeros = 2
         lnum = len(num)
         nb = zeros + 2 - lnum # num starts with '0x'
         if nb > 0:
-            result.append_multiple_char('0', nb)
+            result.append_multiple_char(STR('0'), nb)
         result.append_slice(num, 2, lnum)
 
     return unicode_escape, char_escape_helper
 
 (unicode_encode_unicode_escape, raw_unicode_escape_helper
  ) = make_unicode_escape_function()
+(unicode_escape_nonprintable, _
+ ) = make_unicode_escape_function(for_repr=True)
 
 # ____________________________________________________________
 # Raw unicode escape
_______________________________________________
pypy-commit mailing list
[email protected]
http://mail.python.org/mailman/listinfo/pypy-commit

[pypy-commit] pypy py3k: Issue1262: str.__repr__ now passes all printable characters.

Reply via email to

[pypy-commit] pypy py3k: Issue1262: str.repr now passes all printable characters.