Author: Matti Picus <[email protected]>
Branch: unicode-utf8-py3
Changeset: r95441:e0b40146105a
Date: 2018-12-02 14:08 -0800
http://bitbucket.org/pypy/pypy/changeset/e0b40146105a/
Log: test, fix formatting '%s.2' for unicode
diff --git a/pypy/objspace/std/formatting.py b/pypy/objspace/std/formatting.py
--- a/pypy/objspace/std/formatting.py
+++ b/pypy/objspace/std/formatting.py
@@ -335,7 +335,8 @@
@specialize.arg(2)
def std_wp(self, r, is_string=False):
- length = len(r)
+ # r is utf8-encoded unicode
+ length = rutf8.codepoints_in_utf8(r)
if do_unicode and is_string:
# convert string to unicode using the default encoding
r = self.space.utf8_w(self.space.newbytes(r))
@@ -346,6 +347,10 @@
return
if prec >= 0 and prec < length:
length = prec # ignore the end of the string if too long
+ if do_unicode:
+ # XXX could use W_UnicodeObject.descr_getslice, but that would
+ # require a refactor to use the w_val, not r
+ length = rutf8._pos_at_index(r, length)
result = self.result
padding = self.width - length
if padding < 0:
diff --git a/pypy/objspace/std/test/test_unicodeobject.py
b/pypy/objspace/std/test/test_unicodeobject.py
--- a/pypy/objspace/std/test/test_unicodeobject.py
+++ b/pypy/objspace/std/test/test_unicodeobject.py
@@ -1002,7 +1002,8 @@
def test_formatting_uchr(self):
assert '%c' % '\U00021483' == '\U00021483'
- def test_formatting_unicode__str__(self):
+ def test_formatting_unicode__str__0(self):
+ assert '%.2s' % "a\xe9\u20ac" == 'a\xe9'
class A:
def __init__(self, num):
self.num = num
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit