Author: Armin Rigo <ar...@tunes.org> Branch: Changeset: r94866:fb05c07c73c5 Date: 2018-07-14 09:46 +0200 http://bitbucket.org/pypy/pypy/changeset/fb05c07c73c5/
Log: Issue #2857 Fix .casefold() in some cases, for py3.5 diff --git a/rpython/rlib/unicodedata/generate_unicodedb.py b/rpython/rlib/unicodedata/generate_unicodedb.py --- a/rpython/rlib/unicodedata/generate_unicodedb.py +++ b/rpython/rlib/unicodedata/generate_unicodedb.py @@ -913,8 +913,17 @@ casefolds = {} for code, char in table.enum_chars(): - if char.casefolding and char.casefolding != [char.lower]: - casefolds[code] = char.casefolding + full_casefold = char.casefolding + if full_casefold is None: + full_casefold = [code] + full_lower = char.lower + if full_lower is None: + full_lower = code + # if we don't write anything into the file, then the RPython + # program would compute the result 'full_lower' instead. + # Is that the right answer? + if full_casefold != [full_lower]: + casefolds[code] = full_casefold writeDict(outfile, '_casefolds', casefolds, base_mod) print >> outfile, ''' diff --git a/rpython/rlib/unicodedata/test/test_unicodedata.py b/rpython/rlib/unicodedata/test/test_unicodedata.py --- a/rpython/rlib/unicodedata/test/test_unicodedata.py +++ b/rpython/rlib/unicodedata/test/test_unicodedata.py @@ -148,3 +148,15 @@ def test_changed_in_version_8(self): assert unicodedb_6_2_0.toupper_full(0x025C) == [0x025C] assert unicodedb_8_0_0.toupper_full(0x025C) == [0xA7AB] + + def test_casefold(self): + # returns None when we have no special casefolding rule, + # which means that tolower_full() should be used instead + assert unicodedb_8_0_0.casefold_lookup(0x1000) == None + assert unicodedb_8_0_0.casefold_lookup(0x0061) == None + assert unicodedb_8_0_0.casefold_lookup(0x0041) == None + # a case where casefold() != lower() + assert unicodedb_8_0_0.casefold_lookup(0x00DF) == [ord('s'), ord('s')] + # returns the argument itself, and not None, in rare cases + # where tolower_full() would return something different + assert unicodedb_8_0_0.casefold_lookup(0x13A0) == [0x13A0] diff --git a/rpython/rlib/unicodedata/unicodedb_8_0_0.py b/rpython/rlib/unicodedata/unicodedb_8_0_0.py --- a/rpython/rlib/unicodedata/unicodedb_8_0_0.py +++ b/rpython/rlib/unicodedata/unicodedb_8_0_0.py @@ -21307,6 +21307,92 @@ return code _casefolds = { +5024: [5024], +5025: [5025], +5026: [5026], +5027: [5027], +5028: [5028], +5029: [5029], +5030: [5030], +5031: [5031], +5032: [5032], +5033: [5033], +5034: [5034], +5035: [5035], +5036: [5036], +5037: [5037], +5038: [5038], +5039: [5039], +5040: [5040], +5041: [5041], +5042: [5042], +5043: [5043], +5044: [5044], +5045: [5045], +5046: [5046], +5047: [5047], +5048: [5048], +5049: [5049], +5050: [5050], +5051: [5051], +5052: [5052], +5053: [5053], +5054: [5054], +5055: [5055], +5056: [5056], +5057: [5057], +5058: [5058], +5059: [5059], +5060: [5060], +5061: [5061], +5062: [5062], +5063: [5063], +5064: [5064], +5065: [5065], +5066: [5066], +5067: [5067], +5068: [5068], +5069: [5069], +5070: [5070], +5071: [5071], +5072: [5072], +5073: [5073], +5074: [5074], +5075: [5075], +5076: [5076], +5077: [5077], +5078: [5078], +5079: [5079], +5080: [5080], +5081: [5081], +5082: [5082], +5083: [5083], +5084: [5084], +5085: [5085], +5086: [5086], +5087: [5087], +5088: [5088], +5089: [5089], +5090: [5090], +5091: [5091], +5092: [5092], +5093: [5093], +5094: [5094], +5095: [5095], +5096: [5096], +5097: [5097], +5098: [5098], +5099: [5099], +5100: [5100], +5101: [5101], +5102: [5102], +5103: [5103], +5104: [5104], +5105: [5105], +5106: [5106], +5107: [5107], +5108: [5108], +5109: [5109], 5112: [5104], 5113: [5105], 5114: [5106], diff --git a/rpython/rlib/unicodedata/unicodedb_9_0_0.py b/rpython/rlib/unicodedata/unicodedb_9_0_0.py --- a/rpython/rlib/unicodedata/unicodedb_9_0_0.py +++ b/rpython/rlib/unicodedata/unicodedb_9_0_0.py @@ -24430,6 +24430,92 @@ return code _casefolds = { +5024: [5024], +5025: [5025], +5026: [5026], +5027: [5027], +5028: [5028], +5029: [5029], +5030: [5030], +5031: [5031], +5032: [5032], +5033: [5033], +5034: [5034], +5035: [5035], +5036: [5036], +5037: [5037], +5038: [5038], +5039: [5039], +5040: [5040], +5041: [5041], +5042: [5042], +5043: [5043], +5044: [5044], +5045: [5045], +5046: [5046], +5047: [5047], +5048: [5048], +5049: [5049], +5050: [5050], +5051: [5051], +5052: [5052], +5053: [5053], +5054: [5054], +5055: [5055], +5056: [5056], +5057: [5057], +5058: [5058], +5059: [5059], +5060: [5060], +5061: [5061], +5062: [5062], +5063: [5063], +5064: [5064], +5065: [5065], +5066: [5066], +5067: [5067], +5068: [5068], +5069: [5069], +5070: [5070], +5071: [5071], +5072: [5072], +5073: [5073], +5074: [5074], +5075: [5075], +5076: [5076], +5077: [5077], +5078: [5078], +5079: [5079], +5080: [5080], +5081: [5081], +5082: [5082], +5083: [5083], +5084: [5084], +5085: [5085], +5086: [5086], +5087: [5087], +5088: [5088], +5089: [5089], +5090: [5090], +5091: [5091], +5092: [5092], +5093: [5093], +5094: [5094], +5095: [5095], +5096: [5096], +5097: [5097], +5098: [5098], +5099: [5099], +5100: [5100], +5101: [5101], +5102: [5102], +5103: [5103], +5104: [5104], +5105: [5105], +5106: [5106], +5107: [5107], +5108: [5108], +5109: [5109], 5112: [5104], 5113: [5105], 5114: [5106], _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit