[pypy-commit] pypy unicode-utf8: Tests and fixes for 'allow_surrogates=True' in various unicode methods

arigo Thu, 23 Nov 2017 01:34:46 -0800

Author: Armin Rigo <[email protected]>
Branch: unicode-utf8
Changeset: r93135:16bfad77e3d5
Date: 2017-11-23 10:33 +0100
http://bitbucket.org/pypy/pypy/changeset/16bfad77e3d5/


Log:    Tests and fixes for 'allow_surrogates=True' in various unicode
        methods

diff --git a/pypy/objspace/std/test/test_unicodeobject.py 
b/pypy/objspace/std/test/test_unicodeobject.py
--- a/pypy/objspace/std/test/test_unicodeobject.py
+++ b/pypy/objspace/std/test/test_unicodeobject.py
@@ -299,6 +299,7 @@
         assert u"Brown Fox".title() == u"Brown Fox"
         assert u"bro!wn fox".title() == u"Bro!Wn Fox"
         assert u"brow\u4321n fox".title() == u"Brow\u4321N Fox"
+        assert u'\ud800'.title() == u'\ud800'
 
     def test_istitle(self):
         assert u"".istitle() == False
@@ -328,10 +329,12 @@
         assert u'A'.lower() == u'a'
         assert u'\u0105'.lower() == u'\u0105'
         assert u'\u0104'.lower() == u'\u0105'
+        assert u'\ud800'.lower() == u'\ud800'
         assert u'a'.upper() == u'A'
         assert u'A'.upper() == u'A'
         assert u'\u0105'.upper() == u'\u0104'
         assert u'\u0104'.upper() == u'\u0104'
+        assert u'\ud800'.upper() == u'\ud800'
 
     def test_capitalize(self):
         assert u"brown fox".capitalize() == u"Brown fox"
@@ -354,6 +357,8 @@
         # check with Ll chars with no upper - nothing changes here
         assert (u'\u019b\u1d00\u1d86\u0221\u1fb7'.capitalize() ==
                 u'\u019b\u1d00\u1d86\u0221\u1fb7')
+        assert u'\ud800'.capitalize() == u'\ud800'
+        assert u'xx\ud800'.capitalize() == u'Xx\ud800'
 
     def test_rjust(self):
         s = u"abc"
@@ -844,6 +849,7 @@
 
     def test_swapcase(self):
         assert u'\xe4\xc4\xdf'.swapcase() == u'\xc4\xe4\xdf'
+        assert u'\ud800'.swapcase() == u'\ud800'
 
     def test_buffer(self):
         buf = buffer(u'XY')
diff --git a/pypy/objspace/std/unicodeobject.py 
b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -357,7 +357,7 @@
                 ch = unicodedb.toupper(ch)
             if ch >= 0x80:
                 flag = rutf8.combine_flags(flag, rutf8.FLAG_REGULAR)
-            rutf8.unichr_as_utf8_append(builder, ch)
+            rutf8.unichr_as_utf8_append(builder, ch, allow_surrogates=True)
         return W_UnicodeObject(builder.build(), self._length, flag)
 
     def descr_title(self, space):
@@ -382,7 +382,7 @@
                 ch = unicodedb.tolower(ch)
             if ch >= 0x80:
                 flag = rutf8.combine_flags(flag, rutf8.FLAG_REGULAR)
-            rutf8.unichr_as_utf8_append(builder, ch)
+            rutf8.unichr_as_utf8_append(builder, ch, allow_surrogates=True)
             previous_is_cased = unicodedb.iscased(ch)
         return builder.build(), flag
 
@@ -541,7 +541,7 @@
             lower = unicodedb.tolower(rutf8.codepoint_at_pos(self._utf8, pos))
             if lower >= 0x80:
                 flag = rutf8.combine_flags(flag, rutf8.FLAG_REGULAR)
-            rutf8.unichr_as_utf8_append(builder, lower) # XXX allow surrogates?
+            rutf8.unichr_as_utf8_append(builder, lower, allow_surrogates=True)
             pos = rutf8.next_codepoint_pos(self._utf8, pos)
         return W_UnicodeObject(builder.build(), self._len(), flag)
 
@@ -721,7 +721,7 @@
             if uchar >= 0x80:
                 flag = rutf8.combine_flags(flag, rutf8.FLAG_REGULAR)
             i = rutf8.next_codepoint_pos(value, i)
-            rutf8.unichr_as_utf8_append(builder, uchar)
+            rutf8.unichr_as_utf8_append(builder, uchar, allow_surrogates=True)
         return W_UnicodeObject(builder.build(), self._length, flag)
 
     @unwrap_spec(width=int)
@@ -831,14 +831,14 @@
         uchar = rutf8.codepoint_at_pos(value, 0)
         i = rutf8.next_codepoint_pos(value, 0)
         ch = unicodedb.toupper(uchar)
-        rutf8.unichr_as_utf8_append(builder, ch)
+        rutf8.unichr_as_utf8_append(builder, ch, allow_surrogates=True)
         if ch >= 0x80:
             flag = rutf8.combine_flags(flag, rutf8.FLAG_REGULAR)
         while i < len(value):
             uchar = rutf8.codepoint_at_pos(value, i)
             i = rutf8.next_codepoint_pos(value, i)
             ch = unicodedb.tolower(uchar)
-            rutf8.unichr_as_utf8_append(builder, ch)
+            rutf8.unichr_as_utf8_append(builder, ch, allow_surrogates=True)
             if ch >= 0x80:
                 flag = rutf8.combine_flags(flag, rutf8.FLAG_REGULAR)
         return W_UnicodeObject(builder.build(), self._len(), flag)
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

[pypy-commit] pypy unicode-utf8: Tests and fixes for 'allow_surrogates=True' in various unicode methods

Reply via email to