Author: Matti Picus <matti.pi...@gmail.com>
Branch: unicode-utf8-py3
Changeset: r95678:488e3a462f8d
Date: 2019-01-20 11:35 +0200
http://bitbucket.org/pypy/pypy/changeset/488e3a462f8d/

Log:    test, fix for capitalize returning more than one codepoint

diff --git a/pypy/objspace/std/test/test_unicodeobject.py 
b/pypy/objspace/std/test/test_unicodeobject.py
--- a/pypy/objspace/std/test/test_unicodeobject.py
+++ b/pypy/objspace/std/test/test_unicodeobject.py
@@ -407,6 +407,8 @@
         # check with Ll chars with no upper - nothing changes here
         assert ('\u019b\u1d00\u1d86\u0221\u1fb7'.capitalize() ==
                 '\u019b\u1d00\u1d86\u0221\u1fb7')
+        # cpython issue 17252 for i_dot
+        assert u'h\u0130'.capitalize() == u'H\u0069\u0307'
 
     def test_changed_in_unicodedata_version_8(self):
         assert u'\u025C'.upper() == u'\uA7AB'
diff --git a/pypy/objspace/std/unicodeobject.py 
b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -855,8 +855,9 @@
         for c in codes:
             builder.append_code(c)
         for ch in it:
-            ch = unicodedb.tolower(ch)
-            builder.append_code(ch)
+            ch = unicodedb.tolower_full(ch)
+            for ch1 in ch:
+                builder.append_code(ch1)
         return self.from_utf8builder(builder)
 
     @unwrap_spec(width=int, w_fillchar=WrappedDefault(u' '))
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to