Author: Maciej Fijalkowski <[email protected]>
Branch:
Changeset: r60237:320d0abbca64
Date: 2013-01-20 16:19 +0200
http://bitbucket.org/pypy/pypy/changeset/320d0abbca64/
Log: add a missing file
diff --git a/pypy/module/unicodedata/test_unicodedata.py
b/pypy/module/unicodedata/test_unicodedata.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/unicodedata/test_unicodedata.py
@@ -0,0 +1,103 @@
+
+class AppTestUnicodeData:
+ spaceconfig = dict(usemodules=('unicodedata',))
+
+ def test_hangul_syllables(self):
+ import unicodedata
+ # Test all leading, vowel and trailing jamo
+ # but not every combination of them.
+ for code, name in ((0xAC00, 'HANGUL SYLLABLE GA'),
+ (0xAE69, 'HANGUL SYLLABLE GGAEG'),
+ (0xB0D2, 'HANGUL SYLLABLE NYAGG'),
+ (0xB33B, 'HANGUL SYLLABLE DYAEGS'),
+ (0xB5A4, 'HANGUL SYLLABLE DDEON'),
+ (0xB80D, 'HANGUL SYLLABLE RENJ'),
+ (0xBA76, 'HANGUL SYLLABLE MYEONH'),
+ (0xBCDF, 'HANGUL SYLLABLE BYED'),
+ (0xBF48, 'HANGUL SYLLABLE BBOL'),
+ (0xC1B1, 'HANGUL SYLLABLE SWALG'),
+ (0xC41A, 'HANGUL SYLLABLE SSWAELM'),
+ (0xC683, 'HANGUL SYLLABLE OELB'),
+ (0xC8EC, 'HANGUL SYLLABLE JYOLS'),
+ (0xCB55, 'HANGUL SYLLABLE JJULT'),
+ (0xCDBE, 'HANGUL SYLLABLE CWEOLP'),
+ (0xD027, 'HANGUL SYLLABLE KWELH'),
+ (0xD290, 'HANGUL SYLLABLE TWIM'),
+ (0xD4F9, 'HANGUL SYLLABLE PYUB'),
+ (0xD762, 'HANGUL SYLLABLE HEUBS'),
+ (0xAE27, 'HANGUL SYLLABLE GYIS'),
+ (0xB090, 'HANGUL SYLLABLE GGISS'),
+ (0xB0AD, 'HANGUL SYLLABLE NANG'),
+ (0xB316, 'HANGUL SYLLABLE DAEJ'),
+ (0xB57F, 'HANGUL SYLLABLE DDYAC'),
+ (0xB7E8, 'HANGUL SYLLABLE RYAEK'),
+ (0xBA51, 'HANGUL SYLLABLE MEOT'),
+ (0xBCBA, 'HANGUL SYLLABLE BEP'),
+ (0xBF23, 'HANGUL SYLLABLE BBYEOH'),
+ (0xD7A3, 'HANGUL SYLLABLE HIH')):
+ assert unicodedata.name(unichr(code)) == name
+ assert unicodedata.lookup(name) == unichr(code)
+ # Test outside the range
+ raises(ValueError, unicodedata.name, unichr(0xAC00 - 1))
+ raises(ValueError, unicodedata.name, unichr(0xD7A3 + 1))
+
+ def test_cjk(self):
+ import sys
+ import unicodedata
+ cases = ((0x3400, 0x4DB5),
+ (0x4E00, 0x9FA5))
+ if unicodedata.unidata_version >= "5": # don't know the exact limit
+ cases = ((0x3400, 0x4DB5),
+ (0x4E00, 0x9FCB),
+ (0x20000, 0x2A6D6),
+ (0x2A700, 0x2B734))
+ elif unicodedata.unidata_version >= "4.1":
+ cases = ((0x3400, 0x4DB5),
+ (0x4E00, 0x9FBB),
+ (0x20000, 0x2A6D6))
+ for first, last in cases:
+ # Test at and inside the boundary
+ for i in (first, first + 1, last - 1, last):
+ charname = 'CJK UNIFIED IDEOGRAPH-%X'%i
+ char = ('\\U%08X' % i).decode('unicode-escape')
+ assert unicodedata.name(char) == charname
+ assert unicodedata.lookup(charname) == char
+ # Test outside the boundary
+ for i in first - 1, last + 1:
+ charname = 'CJK UNIFIED IDEOGRAPH-%X'%i
+ char = ('\\U%08X' % i).decode('unicode-escape')
+ try:
+ unicodedata.name(char)
+ except ValueError, e:
+ assert e.message == 'no such name'
+ raises(KeyError, unicodedata.lookup, charname)
+
+ def test_bug_1704793(self): # from CPython
+ import unicodedata
+ assert unicodedata.lookup("GOTHIC LETTER FAIHU") == u'\U00010346'
+
+ def test_normalize(self):
+ import unicodedata
+ raises(TypeError, unicodedata.normalize, 'x')
+
+ def test_normalize_wide(self):
+ import sys, unicodedata
+ if sys.maxunicode < 0x10ffff:
+ skip("requires a 'wide' python build.")
+ assert unicodedata.normalize('NFC', u'\U000110a5\U000110ba') ==
u'\U000110ab'
+
+ def test_linebreaks(self):
+ linebreaks = (0x0a, 0x0b, 0x0c, 0x0d, 0x85,
+ 0x1c, 0x1d, 0x1e, 0x2028, 0x2029)
+ for i in linebreaks:
+ for j in range(-2, 3):
+ lines = (unichr(i + j) + u'A').splitlines()
+ if i + j in linebreaks:
+ assert len(lines) == 2
+ else:
+ assert len(lines) == 1
+
+ def test_mirrored(self):
+ import unicodedata
+ # For no reason, unicodedata.mirrored() returns an int, not a bool
+ assert repr(unicodedata.mirrored(u' ')) == '0'
_______________________________________________
pypy-commit mailing list
[email protected]
http://mail.python.org/mailman/listinfo/pypy-commit