Author: Amaury Forgeot d'Arc <amaur...@gmail.com>
Branch: py3k
Changeset: r60256:4b63836b7e97
Date: 2013-01-20 20:27 +0100
http://bitbucket.org/pypy/pypy/changeset/4b63836b7e97/

Log:    hg merge default

diff --git a/pypy/module/unicodedata/interp_ucd.py 
b/pypy/module/unicodedata/interp_ucd.py
--- a/pypy/module/unicodedata/interp_ucd.py
+++ b/pypy/module/unicodedata/interp_ucd.py
@@ -9,6 +9,7 @@
 from rpython.rlib.objectmodel import we_are_translated
 from rpython.rlib.runicode import MAXUNICODE
 from rpython.rlib.unicodedata import unicodedb_5_2_0, unicodedb_3_2_0
+from rpython.rlib.runicode import code_to_unichr, ORD
 import sys
 
 
@@ -30,25 +31,6 @@
 # The functions below are subtly different from the ones in runicode.py.
 # When PyPy implements Python 3 they should be merged.
 
-def UNICHR(c):
-    if c <= sys.maxunicode and c <= MAXUNICODE:
-        return unichr(c)
-    else:
-        c -= 0x10000
-        return (unichr(0xD800 + (c >> 10)) +
-                unichr(0xDC00 + (c & 0x03FF)))
-
-def ORD(u):
-    assert isinstance(u, unicode)
-    if len(u) == 1:
-        return ord(u[0])
-    elif len(u) == 2:
-        ch1 = ord(u[0])
-        ch2 = ord(u[1])
-        if 0xD800 <= ch1 <= 0xDBFF and 0xDC00 <= ch2 <= 0xDFFF:
-            return (((ch1 - 0xD800) << 10) | (ch2 - 0xDC00)) + 0x10000
-    raise ValueError
-
 if MAXUNICODE > 0xFFFF:
     # Target is wide build
     def unichr_to_code_w(space, w_unichr):
@@ -69,12 +51,6 @@
                     'need a single Unicode character as parameter'))
             return space.int_w(space.ord(w_unichr))
 
-    def code_to_unichr(code):
-        if not we_are_translated() and sys.maxunicode == 0xFFFF:
-            # Host CPython is narrow build, generate surrogates
-            return UNICHR(code)
-        else:
-            return unichr(code)
 else:
     # Target is narrow build
     def unichr_to_code_w(space, w_unichr):
@@ -97,10 +73,6 @@
                 raise OperationError(space.w_TypeError, space.wrap(
                     'need a single Unicode character as parameter'))
 
-    def code_to_unichr(code):
-        # generate surrogates for large codes
-        return UNICHR(code)
-
 
 class UCD(Wrappable):
     def __init__(self, unicodedb):
diff --git a/pypy/module/unicodedata/test_unicodedata.py 
b/pypy/module/unicodedata/test_unicodedata.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/unicodedata/test_unicodedata.py
@@ -0,0 +1,103 @@
+
+class AppTestUnicodeData:
+    spaceconfig = dict(usemodules=('unicodedata',))
+
+    def test_hangul_syllables(self):
+        import unicodedata
+        # Test all leading, vowel and trailing jamo
+        # but not every combination of them.
+        for code, name in ((0xAC00, 'HANGUL SYLLABLE GA'),
+                           (0xAE69, 'HANGUL SYLLABLE GGAEG'),
+                           (0xB0D2, 'HANGUL SYLLABLE NYAGG'),
+                           (0xB33B, 'HANGUL SYLLABLE DYAEGS'),
+                           (0xB5A4, 'HANGUL SYLLABLE DDEON'),
+                           (0xB80D, 'HANGUL SYLLABLE RENJ'),
+                           (0xBA76, 'HANGUL SYLLABLE MYEONH'),
+                           (0xBCDF, 'HANGUL SYLLABLE BYED'),
+                           (0xBF48, 'HANGUL SYLLABLE BBOL'),
+                           (0xC1B1, 'HANGUL SYLLABLE SWALG'),
+                           (0xC41A, 'HANGUL SYLLABLE SSWAELM'),
+                           (0xC683, 'HANGUL SYLLABLE OELB'),
+                           (0xC8EC, 'HANGUL SYLLABLE JYOLS'),
+                           (0xCB55, 'HANGUL SYLLABLE JJULT'),
+                           (0xCDBE, 'HANGUL SYLLABLE CWEOLP'),
+                           (0xD027, 'HANGUL SYLLABLE KWELH'),
+                           (0xD290, 'HANGUL SYLLABLE TWIM'),
+                           (0xD4F9, 'HANGUL SYLLABLE PYUB'),
+                           (0xD762, 'HANGUL SYLLABLE HEUBS'),
+                           (0xAE27, 'HANGUL SYLLABLE GYIS'),
+                           (0xB090, 'HANGUL SYLLABLE GGISS'),
+                           (0xB0AD, 'HANGUL SYLLABLE NANG'),
+                           (0xB316, 'HANGUL SYLLABLE DAEJ'),
+                           (0xB57F, 'HANGUL SYLLABLE DDYAC'),
+                           (0xB7E8, 'HANGUL SYLLABLE RYAEK'),
+                           (0xBA51, 'HANGUL SYLLABLE MEOT'),
+                           (0xBCBA, 'HANGUL SYLLABLE BEP'),
+                           (0xBF23, 'HANGUL SYLLABLE BBYEOH'),
+                           (0xD7A3, 'HANGUL SYLLABLE HIH')):
+            assert unicodedata.name(chr(code)) == name
+            assert unicodedata.lookup(name) == chr(code)
+        # Test outside the range
+        raises(ValueError, unicodedata.name, chr(0xAC00 - 1))
+        raises(ValueError, unicodedata.name, chr(0xD7A3 + 1))
+
+    def test_cjk(self):
+        import sys
+        import unicodedata
+        cases = ((0x3400, 0x4DB5),
+                 (0x4E00, 0x9FA5))
+        if unicodedata.unidata_version >= "5":    # don't know the exact limit
+            cases = ((0x3400, 0x4DB5),
+                     (0x4E00, 0x9FCB),
+                     (0x20000, 0x2A6D6),
+                     (0x2A700, 0x2B734))
+        elif unicodedata.unidata_version >= "4.1":
+            cases = ((0x3400, 0x4DB5),
+                     (0x4E00, 0x9FBB),
+                     (0x20000, 0x2A6D6))
+        for first, last in cases:
+            # Test at and inside the boundary
+            for i in (first, first + 1, last - 1, last):
+                charname = 'CJK UNIFIED IDEOGRAPH-%X'%i
+                char = chr(i)
+                assert unicodedata.name(char) == charname
+                assert unicodedata.lookup(charname) == char
+            # Test outside the boundary
+            for i in first - 1, last + 1:
+                charname = 'CJK UNIFIED IDEOGRAPH-%X'%i
+                char = chr(i)
+                try:
+                    unicodedata.name(char)
+                except ValueError as e:
+                    assert e.message == 'no such name'
+                raises(KeyError, unicodedata.lookup, charname)
+
+    def test_bug_1704793(self): # from CPython
+        import unicodedata
+        assert unicodedata.lookup("GOTHIC LETTER FAIHU") == '\U00010346'
+
+    def test_normalize(self):
+        import unicodedata
+        raises(TypeError, unicodedata.normalize, 'x')
+
+    def test_normalize_wide(self):
+        import sys, unicodedata
+        if sys.maxunicode < 0x10ffff:
+            skip("requires a 'wide' python build.")
+        assert unicodedata.normalize('NFC', '\U000110a5\U000110ba') == 
'\U000110ab'
+
+    def test_linebreaks(self):
+        linebreaks = (0x0a, 0x0b, 0x0c, 0x0d, 0x85,
+                      0x1c, 0x1d, 0x1e, 0x2028, 0x2029)
+        for i in linebreaks:
+            for j in range(-2, 3):
+                lines = (chr(i + j) + 'A').splitlines()
+                if i + j in linebreaks:
+                    assert len(lines) == 2
+                else:
+                    assert len(lines) == 1
+
+    def test_mirrored(self):
+        import unicodedata
+        # For no reason, unicodedata.mirrored() returns an int, not a bool
+        assert repr(unicodedata.mirrored(' ')) == '0'
diff --git a/rpython/bin/rpython b/rpython/bin/rpython
--- a/rpython/bin/rpython
+++ b/rpython/bin/rpython
@@ -7,7 +7,8 @@
 run with --help for more information
 """
 
-import sys
+import sys, os
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
 from rpython.translator.goal.translate import main
 
 # no implicit targets
diff --git a/rpython/rlib/runicode.py b/rpython/rlib/runicode.py
--- a/rpython/rlib/runicode.py
+++ b/rpython/rlib/runicode.py
@@ -1,15 +1,16 @@
 import sys
-from rpython.rlib.bitmanipulation import splitter
-from rpython.rtyper.lltypesystem import lltype, rffi
-from rpython.rlib.objectmodel import we_are_translated, specialize, enforceargs
+from rpython.rlib.objectmodel import specialize, we_are_translated
 from rpython.rlib.rstring import StringBuilder, UnicodeBuilder
 from rpython.rlib.rarithmetic import r_uint, intmask
 from rpython.rlib.unicodedata import unicodedb
+from rpython.rtyper.lltypesystem import lltype, rffi
+
 
 if rffi.sizeof(lltype.UniChar) == 4:
     MAXUNICODE = 0x10ffff
 else:
     MAXUNICODE = 0xffff
+
 BYTEORDER = sys.byteorder
 
 if MAXUNICODE > sys.maxunicode:
@@ -45,6 +46,27 @@
     UNICHR = unichr
     ORD = ord
 
+if MAXUNICODE > 0xFFFF:
+    def code_to_unichr(code):
+        if not we_are_translated() and sys.maxunicode == 0xFFFF:
+            # Host CPython is narrow build, generate surrogates
+            return UNICHR(code)
+        else:
+            return unichr(code)
+else:
+    def code_to_unichr(code):
+        # generate surrogates for large codes
+        return UNICHR(code)
+
+def _STORECHAR(result, CH, byteorder):
+    hi = chr(((CH) >> 8) & 0xff)
+    lo = chr((CH) & 0xff)
+    if byteorder == 'little':
+        result.append(lo)
+        result.append(hi)
+    else:
+        result.append(hi)
+        result.append(lo)
 
 def default_unicode_error_decode(errors, encoding, msg, s,
                                  startingpos, endingpos):
@@ -446,16 +468,6 @@
             result.append(r)
     return result.build(), pos, bo
 
-def _STORECHAR(result, CH, byteorder):
-    hi = chr(((CH) >> 8) & 0xff)
-    lo = chr((CH) & 0xff)
-    if byteorder == 'little':
-        result.append(lo)
-        result.append(hi)
-    else:
-        result.append(hi)
-        result.append(lo)
-
 def unicode_encode_utf_16_helper(s, size, errors,
                                  errorhandler=None,
                                  byteorder='little'):
diff --git a/pypy/module/unicodedata/test_interp_ucd.py 
b/rpython/rlib/unicodedata/test/test_ucd.py
rename from pypy/module/unicodedata/test_interp_ucd.py
rename to rpython/rlib/unicodedata/test/test_ucd.py
--- a/pypy/module/unicodedata/test_interp_ucd.py
+++ b/rpython/rlib/unicodedata/test/test_ucd.py
@@ -1,6 +1,6 @@
 from rpython.rtyper.test.tool import BaseRtypingTest, LLRtypeMixin
 from rpython.rlib.unicodedata import unicodedb_5_2_0
-from pypy.module.unicodedata.interp_ucd import code_to_unichr
+from rpython.rlib.unicodedata.ucd import code_to_unichr
 
 class TestTranslated(BaseRtypingTest, LLRtypeMixin):
 
diff --git a/rpython/rlib/unicodedata/test/test_unicodedata.py 
b/rpython/rlib/unicodedata/test/test_unicodedata.py
--- a/rpython/rlib/unicodedata/test/test_unicodedata.py
+++ b/rpython/rlib/unicodedata/test/test_unicodedata.py
@@ -1,109 +1,6 @@
 import py
 from rpython.rlib.unicodedata import unicodedb_3_2_0, unicodedb_5_2_0
 
-class AppTestUnicodeData:
-    spaceconfig = dict(usemodules=('unicodedata',))
-
-    def test_hangul_syllables(self):
-        import unicodedata
-        # Test all leading, vowel and trailing jamo
-        # but not every combination of them.
-        for code, name in ((0xAC00, 'HANGUL SYLLABLE GA'),
-                           (0xAE69, 'HANGUL SYLLABLE GGAEG'),
-                           (0xB0D2, 'HANGUL SYLLABLE NYAGG'),
-                           (0xB33B, 'HANGUL SYLLABLE DYAEGS'),
-                           (0xB5A4, 'HANGUL SYLLABLE DDEON'),
-                           (0xB80D, 'HANGUL SYLLABLE RENJ'),
-                           (0xBA76, 'HANGUL SYLLABLE MYEONH'),
-                           (0xBCDF, 'HANGUL SYLLABLE BYED'),
-                           (0xBF48, 'HANGUL SYLLABLE BBOL'),
-                           (0xC1B1, 'HANGUL SYLLABLE SWALG'),
-                           (0xC41A, 'HANGUL SYLLABLE SSWAELM'),
-                           (0xC683, 'HANGUL SYLLABLE OELB'),
-                           (0xC8EC, 'HANGUL SYLLABLE JYOLS'),
-                           (0xCB55, 'HANGUL SYLLABLE JJULT'),
-                           (0xCDBE, 'HANGUL SYLLABLE CWEOLP'),
-                           (0xD027, 'HANGUL SYLLABLE KWELH'),
-                           (0xD290, 'HANGUL SYLLABLE TWIM'),
-                           (0xD4F9, 'HANGUL SYLLABLE PYUB'),
-                           (0xD762, 'HANGUL SYLLABLE HEUBS'),
-                           (0xAE27, 'HANGUL SYLLABLE GYIS'),
-                           (0xB090, 'HANGUL SYLLABLE GGISS'),
-                           (0xB0AD, 'HANGUL SYLLABLE NANG'),
-                           (0xB316, 'HANGUL SYLLABLE DAEJ'),
-                           (0xB57F, 'HANGUL SYLLABLE DDYAC'),
-                           (0xB7E8, 'HANGUL SYLLABLE RYAEK'),
-                           (0xBA51, 'HANGUL SYLLABLE MEOT'),
-                           (0xBCBA, 'HANGUL SYLLABLE BEP'),
-                           (0xBF23, 'HANGUL SYLLABLE BBYEOH'),
-                           (0xD7A3, 'HANGUL SYLLABLE HIH')):
-            assert unicodedata.name(chr(code)) == name
-            assert unicodedata.lookup(name) == chr(code)
-        # Test outside the range
-        py.test.raises(ValueError, unicodedata.name, chr(0xAC00 - 1))
-        py.test.raises(ValueError, unicodedata.name, chr(0xD7A3 + 1))
-
-    def test_cjk(self):
-        import sys
-        import unicodedata
-        cases = ((0x3400, 0x4DB5),
-                 (0x4E00, 0x9FA5))
-        if unicodedata.unidata_version >= "5":    # don't know the exact limit
-            cases = ((0x3400, 0x4DB5),
-                     (0x4E00, 0x9FCB),
-                     (0x20000, 0x2A6D6),
-                     (0x2A700, 0x2B734))
-        elif unicodedata.unidata_version >= "4.1":
-            cases = ((0x3400, 0x4DB5),
-                     (0x4E00, 0x9FBB),
-                     (0x20000, 0x2A6D6))
-        for first, last in cases:
-            # Test at and inside the boundary
-            for i in (first, first + 1, last - 1, last):
-                charname = 'CJK UNIFIED IDEOGRAPH-%X'%i
-                char = chr(i)
-                assert unicodedata.name(char) == charname
-                assert unicodedata.lookup(charname) == char
-            # Test outside the boundary
-            for i in first - 1, last + 1:
-                charname = 'CJK UNIFIED IDEOGRAPH-%X'%i
-                char = chr(i)
-                try:
-                    unicodedata.name(char)
-                except ValueError as e:
-                    assert e.message == 'no such name'
-                py.test.raises(KeyError, unicodedata.lookup, charname)
-
-    def test_bug_1704793(self): # from CPython
-        import unicodedata
-        assert unicodedata.lookup("GOTHIC LETTER FAIHU") == '\U00010346'
-
-    def test_normalize(self):
-        import unicodedata
-        py.test.raises(TypeError, unicodedata.normalize, 'x')
-
-    def test_normalize_wide(self):
-        import sys, unicodedata
-        if sys.maxunicode < 0x10ffff:
-            skip("requires a 'wide' python build.")
-        assert unicodedata.normalize('NFC', '\U000110a5\U000110ba') == 
'\U000110ab'
-
-    def test_linebreaks(self):
-        linebreaks = (0x0a, 0x0b, 0x0c, 0x0d, 0x85,
-                      0x1c, 0x1d, 0x1e, 0x2028, 0x2029)
-        for i in linebreaks:
-            for j in range(-2, 3):
-                lines = (chr(i + j) + 'A').splitlines()
-                if i + j in linebreaks:
-                    assert len(lines) == 2
-                else:
-                    assert len(lines) == 1
-
-    def test_mirrored(self):
-        import unicodedata
-        # For no reason, unicodedata.mirrored() returns an int, not a bool
-        assert repr(unicodedata.mirrored(' ')) == '0'
-
 class TestUnicodeData(object):
     def setup_class(cls):
         import random, unicodedata
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
http://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to