Author: Matti Picus <[email protected]>
Branch: unicode-utf8-py3
Changeset: r95441:e0b40146105a
Date: 2018-12-02 14:08 -0800
http://bitbucket.org/pypy/pypy/changeset/e0b40146105a/

Log:    test, fix formatting '%s.2' for unicode

diff --git a/pypy/objspace/std/formatting.py b/pypy/objspace/std/formatting.py
--- a/pypy/objspace/std/formatting.py
+++ b/pypy/objspace/std/formatting.py
@@ -335,7 +335,8 @@
 
         @specialize.arg(2)
         def std_wp(self, r, is_string=False):
-            length = len(r)
+            # r is utf8-encoded unicode
+            length = rutf8.codepoints_in_utf8(r)
             if do_unicode and is_string:
                 # convert string to unicode using the default encoding
                 r = self.space.utf8_w(self.space.newbytes(r))
@@ -346,6 +347,10 @@
                 return
             if prec >= 0 and prec < length:
                 length = prec   # ignore the end of the string if too long
+            if do_unicode:
+                # XXX could use W_UnicodeObject.descr_getslice, but that would
+                # require a refactor to use the w_val, not r
+                length = rutf8._pos_at_index(r, length)
             result = self.result
             padding = self.width - length
             if padding < 0:
diff --git a/pypy/objspace/std/test/test_unicodeobject.py 
b/pypy/objspace/std/test/test_unicodeobject.py
--- a/pypy/objspace/std/test/test_unicodeobject.py
+++ b/pypy/objspace/std/test/test_unicodeobject.py
@@ -1002,7 +1002,8 @@
     def test_formatting_uchr(self):
         assert '%c' % '\U00021483' == '\U00021483'
 
-    def test_formatting_unicode__str__(self):
+    def test_formatting_unicode__str__0(self):
+        assert '%.2s' % "a\xe9\u20ac" == 'a\xe9'
         class A:
             def __init__(self, num):
                 self.num = num
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to