Author: Armin Rigo <[email protected]>
Branch: unicode-utf8
Changeset: r93135:16bfad77e3d5
Date: 2017-11-23 10:33 +0100
http://bitbucket.org/pypy/pypy/changeset/16bfad77e3d5/
Log: Tests and fixes for 'allow_surrogates=True' in various unicode
methods
diff --git a/pypy/objspace/std/test/test_unicodeobject.py
b/pypy/objspace/std/test/test_unicodeobject.py
--- a/pypy/objspace/std/test/test_unicodeobject.py
+++ b/pypy/objspace/std/test/test_unicodeobject.py
@@ -299,6 +299,7 @@
assert u"Brown Fox".title() == u"Brown Fox"
assert u"bro!wn fox".title() == u"Bro!Wn Fox"
assert u"brow\u4321n fox".title() == u"Brow\u4321N Fox"
+ assert u'\ud800'.title() == u'\ud800'
def test_istitle(self):
assert u"".istitle() == False
@@ -328,10 +329,12 @@
assert u'A'.lower() == u'a'
assert u'\u0105'.lower() == u'\u0105'
assert u'\u0104'.lower() == u'\u0105'
+ assert u'\ud800'.lower() == u'\ud800'
assert u'a'.upper() == u'A'
assert u'A'.upper() == u'A'
assert u'\u0105'.upper() == u'\u0104'
assert u'\u0104'.upper() == u'\u0104'
+ assert u'\ud800'.upper() == u'\ud800'
def test_capitalize(self):
assert u"brown fox".capitalize() == u"Brown fox"
@@ -354,6 +357,8 @@
# check with Ll chars with no upper - nothing changes here
assert (u'\u019b\u1d00\u1d86\u0221\u1fb7'.capitalize() ==
u'\u019b\u1d00\u1d86\u0221\u1fb7')
+ assert u'\ud800'.capitalize() == u'\ud800'
+ assert u'xx\ud800'.capitalize() == u'Xx\ud800'
def test_rjust(self):
s = u"abc"
@@ -844,6 +849,7 @@
def test_swapcase(self):
assert u'\xe4\xc4\xdf'.swapcase() == u'\xc4\xe4\xdf'
+ assert u'\ud800'.swapcase() == u'\ud800'
def test_buffer(self):
buf = buffer(u'XY')
diff --git a/pypy/objspace/std/unicodeobject.py
b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -357,7 +357,7 @@
ch = unicodedb.toupper(ch)
if ch >= 0x80:
flag = rutf8.combine_flags(flag, rutf8.FLAG_REGULAR)
- rutf8.unichr_as_utf8_append(builder, ch)
+ rutf8.unichr_as_utf8_append(builder, ch, allow_surrogates=True)
return W_UnicodeObject(builder.build(), self._length, flag)
def descr_title(self, space):
@@ -382,7 +382,7 @@
ch = unicodedb.tolower(ch)
if ch >= 0x80:
flag = rutf8.combine_flags(flag, rutf8.FLAG_REGULAR)
- rutf8.unichr_as_utf8_append(builder, ch)
+ rutf8.unichr_as_utf8_append(builder, ch, allow_surrogates=True)
previous_is_cased = unicodedb.iscased(ch)
return builder.build(), flag
@@ -541,7 +541,7 @@
lower = unicodedb.tolower(rutf8.codepoint_at_pos(self._utf8, pos))
if lower >= 0x80:
flag = rutf8.combine_flags(flag, rutf8.FLAG_REGULAR)
- rutf8.unichr_as_utf8_append(builder, lower) # XXX allow surrogates?
+ rutf8.unichr_as_utf8_append(builder, lower, allow_surrogates=True)
pos = rutf8.next_codepoint_pos(self._utf8, pos)
return W_UnicodeObject(builder.build(), self._len(), flag)
@@ -721,7 +721,7 @@
if uchar >= 0x80:
flag = rutf8.combine_flags(flag, rutf8.FLAG_REGULAR)
i = rutf8.next_codepoint_pos(value, i)
- rutf8.unichr_as_utf8_append(builder, uchar)
+ rutf8.unichr_as_utf8_append(builder, uchar, allow_surrogates=True)
return W_UnicodeObject(builder.build(), self._length, flag)
@unwrap_spec(width=int)
@@ -831,14 +831,14 @@
uchar = rutf8.codepoint_at_pos(value, 0)
i = rutf8.next_codepoint_pos(value, 0)
ch = unicodedb.toupper(uchar)
- rutf8.unichr_as_utf8_append(builder, ch)
+ rutf8.unichr_as_utf8_append(builder, ch, allow_surrogates=True)
if ch >= 0x80:
flag = rutf8.combine_flags(flag, rutf8.FLAG_REGULAR)
while i < len(value):
uchar = rutf8.codepoint_at_pos(value, i)
i = rutf8.next_codepoint_pos(value, i)
ch = unicodedb.tolower(uchar)
- rutf8.unichr_as_utf8_append(builder, ch)
+ rutf8.unichr_as_utf8_append(builder, ch, allow_surrogates=True)
if ch >= 0x80:
flag = rutf8.combine_flags(flag, rutf8.FLAG_REGULAR)
return W_UnicodeObject(builder.build(), self._len(), flag)
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit