Author: Maciej Fijalkowski <fij...@gmail.com>
Branch: 
Changeset: r60235:8b3fd7dd838c
Date: 2013-01-20 16:13 +0200
http://bitbucket.org/pypy/pypy/changeset/8b3fd7dd838c/

Log:    shuffle stuff around

diff --git a/pypy/module/unicodedata/interp_ucd.py 
b/pypy/module/unicodedata/interp_ucd.py
--- a/pypy/module/unicodedata/interp_ucd.py
+++ b/pypy/module/unicodedata/interp_ucd.py
@@ -9,6 +9,7 @@
 from rpython.rlib.objectmodel import we_are_translated
 from rpython.rlib.runicode import MAXUNICODE
 from rpython.rlib.unicodedata import unicodedb_5_2_0, unicodedb_3_2_0
+from rpython.rlib.unicodedata.ucd import code_to_unichr, ORD
 import sys
 
 
@@ -30,25 +31,6 @@
 # The functions below are subtly different from the ones in runicode.py.
 # When PyPy implements Python 3 they should be merged.
 
-def UNICHR(c):
-    if c <= sys.maxunicode and c <= MAXUNICODE:
-        return unichr(c)
-    else:
-        c -= 0x10000
-        return (unichr(0xD800 + (c >> 10)) +
-                unichr(0xDC00 + (c & 0x03FF)))
-
-def ORD(u):
-    assert isinstance(u, unicode)
-    if len(u) == 1:
-        return ord(u[0])
-    elif len(u) == 2:
-        ch1 = ord(u[0])
-        ch2 = ord(u[1])
-        if 0xD800 <= ch1 <= 0xDBFF and 0xDC00 <= ch2 <= 0xDFFF:
-            return (((ch1 - 0xD800) << 10) | (ch2 - 0xDC00)) + 0x10000
-    raise ValueError
-
 if MAXUNICODE > 0xFFFF:
     # Target is wide build
     def unichr_to_code_w(space, w_unichr):
@@ -69,12 +51,6 @@
                     'need a single Unicode character as parameter'))
             return space.int_w(space.ord(w_unichr))
 
-    def code_to_unichr(code):
-        if not we_are_translated() and sys.maxunicode == 0xFFFF:
-            # Host CPython is narrow build, generate surrogates
-            return UNICHR(code)
-        else:
-            return unichr(code)
 else:
     # Target is narrow build
     def unichr_to_code_w(space, w_unichr):
@@ -97,10 +73,6 @@
                 raise OperationError(space.w_TypeError, space.wrap(
                     'need a single Unicode character as parameter'))
 
-    def code_to_unichr(code):
-        # generate surrogates for large codes
-        return UNICHR(code)
-
 
 class UCD(Wrappable):
     def __init__(self, unicodedb):
diff --git a/rpython/rlib/runicode.py b/rpython/rlib/runicode.py
--- a/rpython/rlib/runicode.py
+++ b/rpython/rlib/runicode.py
@@ -1,49 +1,9 @@
 import sys
-from rpython.rlib.bitmanipulation import splitter
-from rpython.rtyper.lltypesystem import lltype, rffi
-from rpython.rlib.objectmodel import we_are_translated, specialize, enforceargs
+from rpython.rlib.objectmodel import specialize, enforceargs
 from rpython.rlib.rstring import StringBuilder, UnicodeBuilder
 from rpython.rlib.rarithmetic import r_uint, intmask
 from rpython.rlib.unicodedata import unicodedb
-
-if rffi.sizeof(lltype.UniChar) == 4:
-    MAXUNICODE = 0x10ffff
-else:
-    MAXUNICODE = 0xffff
-BYTEORDER = sys.byteorder
-
-if MAXUNICODE > sys.maxunicode:
-    # A version of unichr which allows codes outside the BMP
-    # even on narrow unicode builds.
-    # It will be used when interpreting code on top of a UCS2 CPython,
-    # when sizeof(wchar_t) == 4.
-    # Note that Python3 uses a similar implementation.
-    def UNICHR(c):
-        assert not we_are_translated()
-        if c <= sys.maxunicode or c > MAXUNICODE:
-            return unichr(c)
-        else:
-            c -= 0x10000
-            return (unichr(0xD800 + (c >> 10)) +
-                    unichr(0xDC00 + (c & 0x03FF)))
-    UNICHR._flowspace_rewrite_directly_as_ = unichr
-    # ^^^ NB.: for translation, it's essential to use this hack instead
-    # of calling unichr() from UNICHR(), because unichr() detects if there
-    # is a "try:except ValueError" immediately around it.
-
-    def ORD(u):
-        assert not we_are_translated()
-        if isinstance(u, unicode) and len(u) == 2:
-            ch1 = ord(u[0])
-            ch2 = ord(u[1])
-            if 0xD800 <= ch1 <= 0xDBFF and 0xDC00 <= ch2 <= 0xDFFF:
-                return (((ch1 - 0xD800) << 10) | (ch2 - 0xDC00)) + 0x10000
-        return ord(u)
-    ORD._flowspace_rewrite_directly_as_ = ord
-
-else:
-    UNICHR = unichr
-    ORD = ord
+from rpython.rlib.unicodedata.ucd import MAXUNICODE, UNICHR, BYTEORDER
 
 
 def default_unicode_error_decode(errors, encoding, msg, s,
@@ -446,16 +406,6 @@
             result.append(r)
     return result.build(), pos, bo
 
-def _STORECHAR(result, CH, byteorder):
-    hi = chr(((CH) >> 8) & 0xff)
-    lo = chr((CH) & 0xff)
-    if byteorder == 'little':
-        result.append(lo)
-        result.append(hi)
-    else:
-        result.append(hi)
-        result.append(lo)
-
 def unicode_encode_utf_16_helper(s, size, errors,
                                  errorhandler=None,
                                  byteorder='little'):
diff --git a/pypy/module/unicodedata/test_interp_ucd.py 
b/rpython/rlib/unicodedata/test/test_ucd.py
rename from pypy/module/unicodedata/test_interp_ucd.py
rename to rpython/rlib/unicodedata/test/test_ucd.py
--- a/pypy/module/unicodedata/test_interp_ucd.py
+++ b/rpython/rlib/unicodedata/test/test_ucd.py
@@ -1,6 +1,6 @@
 from rpython.rtyper.test.tool import BaseRtypingTest, LLRtypeMixin
 from rpython.rlib.unicodedata import unicodedb_5_2_0
-from pypy.module.unicodedata.interp_ucd import code_to_unichr
+from rpython.rlib.unicodedata.ucd import code_to_unichr
 
 class TestTranslated(BaseRtypingTest, LLRtypeMixin):
 
diff --git a/rpython/rlib/unicodedata/test/test_unicodedata.py 
b/rpython/rlib/unicodedata/test/test_unicodedata.py
--- a/rpython/rlib/unicodedata/test/test_unicodedata.py
+++ b/rpython/rlib/unicodedata/test/test_unicodedata.py
@@ -1,109 +1,6 @@
 import py
 from rpython.rlib.unicodedata import unicodedb_3_2_0, unicodedb_5_2_0
 
-class AppTestUnicodeData:
-    spaceconfig = dict(usemodules=('unicodedata',))
-
-    def test_hangul_syllables(self):
-        import unicodedata
-        # Test all leading, vowel and trailing jamo
-        # but not every combination of them.
-        for code, name in ((0xAC00, 'HANGUL SYLLABLE GA'),
-                           (0xAE69, 'HANGUL SYLLABLE GGAEG'),
-                           (0xB0D2, 'HANGUL SYLLABLE NYAGG'),
-                           (0xB33B, 'HANGUL SYLLABLE DYAEGS'),
-                           (0xB5A4, 'HANGUL SYLLABLE DDEON'),
-                           (0xB80D, 'HANGUL SYLLABLE RENJ'),
-                           (0xBA76, 'HANGUL SYLLABLE MYEONH'),
-                           (0xBCDF, 'HANGUL SYLLABLE BYED'),
-                           (0xBF48, 'HANGUL SYLLABLE BBOL'),
-                           (0xC1B1, 'HANGUL SYLLABLE SWALG'),
-                           (0xC41A, 'HANGUL SYLLABLE SSWAELM'),
-                           (0xC683, 'HANGUL SYLLABLE OELB'),
-                           (0xC8EC, 'HANGUL SYLLABLE JYOLS'),
-                           (0xCB55, 'HANGUL SYLLABLE JJULT'),
-                           (0xCDBE, 'HANGUL SYLLABLE CWEOLP'),
-                           (0xD027, 'HANGUL SYLLABLE KWELH'),
-                           (0xD290, 'HANGUL SYLLABLE TWIM'),
-                           (0xD4F9, 'HANGUL SYLLABLE PYUB'),
-                           (0xD762, 'HANGUL SYLLABLE HEUBS'),
-                           (0xAE27, 'HANGUL SYLLABLE GYIS'),
-                           (0xB090, 'HANGUL SYLLABLE GGISS'),
-                           (0xB0AD, 'HANGUL SYLLABLE NANG'),
-                           (0xB316, 'HANGUL SYLLABLE DAEJ'),
-                           (0xB57F, 'HANGUL SYLLABLE DDYAC'),
-                           (0xB7E8, 'HANGUL SYLLABLE RYAEK'),
-                           (0xBA51, 'HANGUL SYLLABLE MEOT'),
-                           (0xBCBA, 'HANGUL SYLLABLE BEP'),
-                           (0xBF23, 'HANGUL SYLLABLE BBYEOH'),
-                           (0xD7A3, 'HANGUL SYLLABLE HIH')):
-            assert unicodedata.name(unichr(code)) == name
-            assert unicodedata.lookup(name) == unichr(code)
-        # Test outside the range
-        py.test.raises(ValueError, unicodedata.name, unichr(0xAC00 - 1))
-        py.test.raises(ValueError, unicodedata.name, unichr(0xD7A3 + 1))
-
-    def test_cjk(self):
-        import sys
-        import unicodedata
-        cases = ((0x3400, 0x4DB5),
-                 (0x4E00, 0x9FA5))
-        if unicodedata.unidata_version >= "5":    # don't know the exact limit
-            cases = ((0x3400, 0x4DB5),
-                     (0x4E00, 0x9FCB),
-                     (0x20000, 0x2A6D6),
-                     (0x2A700, 0x2B734))
-        elif unicodedata.unidata_version >= "4.1":
-            cases = ((0x3400, 0x4DB5),
-                     (0x4E00, 0x9FBB),
-                     (0x20000, 0x2A6D6))
-        for first, last in cases:
-            # Test at and inside the boundary
-            for i in (first, first + 1, last - 1, last):
-                charname = 'CJK UNIFIED IDEOGRAPH-%X'%i
-                char = ('\\U%08X' % i).decode('unicode-escape')
-                assert unicodedata.name(char) == charname
-                assert unicodedata.lookup(charname) == char
-            # Test outside the boundary
-            for i in first - 1, last + 1:
-                charname = 'CJK UNIFIED IDEOGRAPH-%X'%i
-                char = ('\\U%08X' % i).decode('unicode-escape')
-                try:
-                    unicodedata.name(char)
-                except ValueError, e:
-                    assert e.message == 'no such name'
-                py.test.raises(KeyError, unicodedata.lookup, charname)
-
-    def test_bug_1704793(self): # from CPython
-        import unicodedata
-        assert unicodedata.lookup("GOTHIC LETTER FAIHU") == u'\U00010346'
-
-    def test_normalize(self):
-        import unicodedata
-        py.test.raises(TypeError, unicodedata.normalize, 'x')
-
-    def test_normalize_wide(self):
-        import sys, unicodedata
-        if sys.maxunicode < 0x10ffff:
-            skip("requires a 'wide' python build.")
-        assert unicodedata.normalize('NFC', u'\U000110a5\U000110ba') == 
u'\U000110ab'
-
-    def test_linebreaks(self):
-        linebreaks = (0x0a, 0x0b, 0x0c, 0x0d, 0x85,
-                      0x1c, 0x1d, 0x1e, 0x2028, 0x2029)
-        for i in linebreaks:
-            for j in range(-2, 3):
-                lines = (unichr(i + j) + u'A').splitlines()
-                if i + j in linebreaks:
-                    assert len(lines) == 2
-                else:
-                    assert len(lines) == 1
-
-    def test_mirrored(self):
-        import unicodedata
-        # For no reason, unicodedata.mirrored() returns an int, not a bool
-        assert repr(unicodedata.mirrored(u' ')) == '0'
-
 class TestUnicodeData(object):
     def setup_class(cls):
         import random, unicodedata
diff --git a/rpython/rlib/unicodedata/ucd.py b/rpython/rlib/unicodedata/ucd.py
new file mode 100644
--- /dev/null
+++ b/rpython/rlib/unicodedata/ucd.py
@@ -0,0 +1,87 @@
+
+import sys
+from rpython.rtyper.lltypesystem import lltype, rffi
+from rpython.rlib.objectmodel import we_are_translated
+
+
+if rffi.sizeof(lltype.UniChar) == 4:
+    MAXUNICODE = 0x10ffff
+else:
+    MAXUNICODE = 0xffff
+    
+BYTEORDER = sys.byteorder
+
+if MAXUNICODE > sys.maxunicode:
+    # A version of unichr which allows codes outside the BMP
+    # even on narrow unicode builds.
+    # It will be used when interpreting code on top of a UCS2 CPython,
+    # when sizeof(wchar_t) == 4.
+    # Note that Python3 uses a similar implementation.
+    def UNICHR(c):
+        assert not we_are_translated()
+        if c <= sys.maxunicode or c > MAXUNICODE:
+            return unichr(c)
+        else:
+            c -= 0x10000
+            return (unichr(0xD800 + (c >> 10)) +
+                    unichr(0xDC00 + (c & 0x03FF)))
+    UNICHR._flowspace_rewrite_directly_as_ = unichr
+    # ^^^ NB.: for translation, it's essential to use this hack instead
+    # of calling unichr() from UNICHR(), because unichr() detects if there
+    # is a "try:except ValueError" immediately around it.
+
+    def ORD(u):
+        assert not we_are_translated()
+        if isinstance(u, unicode) and len(u) == 2:
+            ch1 = ord(u[0])
+            ch2 = ord(u[1])
+            if 0xD800 <= ch1 <= 0xDBFF and 0xDC00 <= ch2 <= 0xDFFF:
+                return (((ch1 - 0xD800) << 10) | (ch2 - 0xDC00)) + 0x10000
+        return ord(u)
+    ORD._flowspace_rewrite_directly_as_ = ord
+
+else:
+    UNICHR = unichr
+    ORD = ord
+
+if MAXUNICODE > 0xFFFF:
+    def code_to_unichr(code):
+        if not we_are_translated() and sys.maxunicode == 0xFFFF:
+            # Host CPython is narrow build, generate surrogates
+            return UNICHR(code)
+        else:
+            return unichr(code)
+else:
+    def code_to_unichr(code):
+        # generate surrogates for large codes
+        return UNICHR(code)    
+
+
+def UNICHR(c):
+    if c <= sys.maxunicode and c <= MAXUNICODE:
+        return unichr(c)
+    else:
+        c -= 0x10000
+        return (unichr(0xD800 + (c >> 10)) +
+                unichr(0xDC00 + (c & 0x03FF)))
+
+def ORD(u):
+    assert isinstance(u, unicode)
+    if len(u) == 1:
+        return ord(u[0])
+    elif len(u) == 2:
+        ch1 = ord(u[0])
+        ch2 = ord(u[1])
+        if 0xD800 <= ch1 <= 0xDBFF and 0xDC00 <= ch2 <= 0xDFFF:
+            return (((ch1 - 0xD800) << 10) | (ch2 - 0xDC00)) + 0x10000
+    raise ValueError
+
+def _STORECHAR(result, CH, byteorder):
+    hi = chr(((CH) >> 8) & 0xff)
+    lo = chr((CH) & 0xff)
+    if byteorder == 'little':
+        result.append(lo)
+        result.append(hi)
+    else:
+        result.append(hi)
+        result.append(lo)
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
http://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to