http://www.mediawiki.org/wiki/Special:Code/MediaWiki/56291

Revision: 56291
Author:   philip
Date:     2009-09-14 05:43:56 +0000 (Mon, 14 Sep 2009)

Log Message:
-----------
Update the Chinese conversion tables.

Modified Paths:
--------------
    trunk/phase3/includes/ZhConversion.php
    trunk/phase3/includes/zhtable/Makefile.py
    trunk/phase3/includes/zhtable/simpphrases.manual
    trunk/phase3/includes/zhtable/toHK.manual
    trunk/phase3/includes/zhtable/toSimp.manual
    trunk/phase3/includes/zhtable/toTrad.manual
    trunk/phase3/includes/zhtable/tradphrases.manual
    trunk/phase3/includes/zhtable/tradphrases_exclude.manual

Modified: trunk/phase3/includes/ZhConversion.php
===================================================================
--- trunk/phase3/includes/ZhConversion.php      2009-09-14 02:25:35 UTC (rev 
56290)
+++ trunk/phase3/includes/ZhConversion.php      2009-09-14 05:43:56 UTC (rev 
56291)
@@ -2885,7 +2885,6 @@
 '并发现' => '並發現',
 '并发表' => '並發表',
 '中国国际信托投资公司' => '中國國際信托投資公司',
-'中国烟草总公司' => '中國烟草總公司',
 '中型钟' => '中型鐘',
 '中型钟表面' => '中型鐘表面',
 '中型钟表' => '中型鐘錶',
@@ -8510,6 +8509,7 @@
 '零天后' => '零天後',
 '零只' => '零隻',
 '零余' => '零餘',
+'电子表格' => '電子表格',
 '电子表' => '電子錶',
 '电子钟' => '電子鐘',
 '电子钟表' => '電子鐘錶',
@@ -8602,7 +8602,7 @@
 '显示表' => '顯示錶',
 '显示钟' => '顯示鐘',
 '显示钟表' => '顯示鐘錶',
-'显着标志' => '顯著標志',
+'显著标志' => '顯著標志',
 '风干' => '風乾',
 '风土志' => '風土誌',
 '风卷残云' => '風捲殘雲',
@@ -13082,8 +13082,6 @@
 '想著稱' => '想著称',
 '想著者' => '想著者',
 '想著述' => '想著述',
-'成效顯著' => '成效显著',
-'成績顯著' => '成绩显著',
 '戰著' => '战着',
 '戰著書' => '战著书',
 '戰著作' => '战著作',
@@ -13339,7 +13337,6 @@
 '撼著稱' => '撼著称',
 '撼著者' => '撼著者',
 '撼著述' => '撼著述',
-'效果顯著' => '效果显著',
 '敞著' => '敞着',
 '敞著書' => '敞著书',
 '敞著作' => '敞著作',
@@ -13422,12 +13419,7 @@
 '映著述' => '映著述',
 '昭著' => '昭著',
 '顯著' => '显著',
-'顯著地' => '显著地',
-'顯著地位' => '显著地位',
-'顯著性' => '显著性',
-'顯著成績' => '显著成绩',
-'顯著效果' => '显著效果',
-'顯著特點' => '显著特点',
+'显著' => '显著',
 '晃著' => '晃着',
 '晃著書' => '晃著书',
 '晃著作' => '晃著作',
@@ -13535,7 +13527,6 @@
 '梳著者' => '梳著者',
 '梳著述' => '梳著述',
 '樊於期' => '樊於期',
-'比較顯著' => '比较显著',
 '氆氌' => '氆氌',
 '求著' => '求着',
 '求著書' => '求著书',
@@ -13583,6 +13574,7 @@
 '流著稱' => '流著称',
 '流著者' => '流著者',
 '流著述' => '流著述',
+'流露著' => '流露着',
 '浮著' => '浮着',
 '浮著書' => '浮著书',
 '浮著作' => '浮著作',
@@ -13735,7 +13727,6 @@
 '疑著稱' => '疑著称',
 '疑著者' => '疑著者',
 '疑著述' => '疑著述',
-'療效顯著' => '疗效显著',
 '癥瘕' => '癥瘕',
 '皺著' => '皱着',
 '皺著書' => '皱著书',
@@ -14503,6 +14494,13 @@
 '雅著者' => '雅著者',
 '雅著述' => '雅著述',
 '雍乾' => '雍乾',
+'靠著' => '靠着',
+'靠著作' => '靠著作',
+'靠著名' => '靠著名',
+'靠著錄' => '靠著录',
+'靠著稱' => '靠著称',
+'靠著者' => '靠著者',
+'靠著述' => '靠著述',
 '頂著' => '顶着',
 '頂著書' => '顶著书',
 '頂著作' => '顶著作',
@@ -16042,6 +16040,7 @@
 '流著者' => '流著者',
 '流著述' => '流著述',
 '流著錄' => '流著錄',
+'流露著' => '流露着',
 '浮著' => '浮着',
 '浮著作' => '浮著作',
 '浮著名' => '浮著名',
@@ -16106,6 +16105,7 @@
 '潤著者' => '潤著者',
 '潤著述' => '潤著述',
 '潤著錄' => '潤著錄',
+'菸' => '煙',
 '照著' => '照着',
 '照著作' => '照著作',
 '照著名' => '照著名',
@@ -16981,6 +16981,15 @@
 '雜著述' => '雜著述',
 '雜著錄' => '雜著錄',
 '冰淇淋' => '雪糕',
+'靠著' => '靠着',
+'靠著作' => '靠著作',
+'靠著名' => '靠著名',
+'靠著稱' => '靠著稱',
+'靠著称' => '靠著稱',
+'靠著者' => '靠著者',
+'靠著述' => '靠著述',
+'靠著錄' => '靠著錄',
+'靠著录' => '靠著錄',
 '響著' => '響着',
 '響著作' => '響著作',
 '響著名' => '響著名',

Modified: trunk/phase3/includes/zhtable/Makefile.py
===================================================================
--- trunk/phase3/includes/zhtable/Makefile.py   2009-09-14 02:25:35 UTC (rev 
56290)
+++ trunk/phase3/includes/zhtable/Makefile.py   2009-09-14 05:43:56 UTC (rev 
56291)
@@ -1,8 +1,24 @@
+#!/usr/bin/python
+# -*- coding: utf-8  -*-
 # @author Philip
-# You should run this script UNDER python 3000.
 import tarfile, zipfile
-import os, re, shutil, urllib.request
+import os, re, shutil, sys, platform
 
+pyversion = platform.python_version()
+if pyversion[:3] in ['2.5', '2.6', '2.7']:
+    import urllib as urllib_request
+    import codecs
+    uniopen = codecs.open
+    def unichr2(i):
+        if sys.maxunicode >= 0x10000 or i < 0x10000:
+            return unichr(i)
+        else:
+            return unichr(0xD7C0+(i>>10)) + unichr(0xDC00+(i&0x3FF))
+elif pyversion[:2] == '3.':
+    import urllib.request as urllib_request
+    uniopen = open
+    unichr2 = chr
+
 # DEFINE
 SF_MIRROR = 'easynews'
 SCIM_TABLES_VER = '0.5.9'
@@ -15,13 +31,19 @@
         print( 'File %s up to date.' % dest )
         return
     print( 'Downloading from [%s] ...' % url )
-    urllib.request.urlretrieve( url, dest )
+    urllib_request.urlretrieve( url, dest )
     print( 'Download complete.\n' )
     return
 
 def GetFileFromZip( path ):
     print( 'Extracting files from %s ...' % path )
-    zipfile.ZipFile(path).extractall()
+    if pyversion[:3] == '2.5':
+        text = zipfile.ZipFile(path).read('Unihan.txt')
+        uhfile = uniopen('Unihan.txt', 'w')
+        uhfile.write(text)
+        uhfile.close()
+    else:
+        zipfile.ZipFile(path).extractall()
     return
 
 def GetFileFromTar( path, member, rename ):
@@ -34,25 +56,25 @@
 
 def ReadBIG5File( dest ):
     print( 'Reading and decoding %s ...' % dest )
-    f1 = open( dest, 'r', encoding='big5hkscs', errors='replace' )
+    f1 = uniopen( dest, 'r', encoding='big5hkscs', errors='replace' )
     text = f1.read()
     text = text.replace( '\ufffd', '\n' )
     f1.close()
-    f2 = open( dest, 'w', encoding='utf8' )
+    f2 = uniopen( dest, 'w', encoding='utf8' )
     f2.write(text)
     f2.close()
     return text
 
 def ReadFile( dest ):
     print( 'Reading and decoding %s ...' % dest )
-    f = open( dest, 'r', encoding='utf8' )
+    f = uniopen( dest, 'r', encoding='utf8' )
     ret = f.read()
     f.close()
     return ret
 
 def ReadUnihanFile( dest ):
     print( 'Reading and decoding %s ...' % dest )
-    f = open( dest, 'r', encoding='utf8' )
+    f = uniopen( dest, 'r', encoding='utf8' )
     t2s_code = []
     s2t_code = []
     while True:
@@ -82,7 +104,7 @@
 
 def ConvertToChar( code ):
     code = code.split('<')[0]
-    return chr( int( code[2:], 16 ) )
+    return unichr2( int( code[2:], 16 ) )
 
 def GetDefaultTable( code_table ):
     char_table = {}
@@ -101,8 +123,8 @@
         elem = elem.strip('|')
         if elem:
             temp2 = elem.split( '|', 1 )
-            from_char = chr( int( temp2[0][2:7], 16 ) )
-            to_chars = [chr( int( code[2:7], 16 ) ) for code in 
temp2[1].split('|')]
+            from_char = unichr2( int( temp2[0][2:7], 16 ) )
+            to_chars = [unichr2( int( code[2:7], 16 ) ) for code in 
temp2[1].split('|')]
             char_table[from_char] = to_chars
     return char_table
 
@@ -222,7 +244,9 @@
 def CustomRules( dest ):
     text = ReadFile( dest )
     temp = text.split()
-    ret = {temp[i]: temp[i + 1] for i in range( 0, len( temp ), 2 )}
+    ret = dict()
+    for i in range( 0, len( temp ), 2 ):
+        ret[temp[i]] = temp[i + 1]
     return ret
 
 def GetPHPArray( table ):
@@ -429,7 +453,7 @@
     php += GetPHPArray( toSG )
     php += '\n);'
     
-    f = open( 'ZhConversion.php', 'w', encoding = 'utf8' )
+    f = uniopen( 'ZhConversion.php', 'w', encoding = 'utf8' )
     print ('Writing ZhConversion.php ... ')
     f.write( php )
     f.close()

Modified: trunk/phase3/includes/zhtable/simpphrases.manual
===================================================================
--- trunk/phase3/includes/zhtable/simpphrases.manual    2009-09-14 02:25:35 UTC 
(rev 56290)
+++ trunk/phase3/includes/zhtable/simpphrases.manual    2009-09-14 05:43:56 UTC 
(rev 56291)
@@ -2179,6 +2179,14 @@
 传著者
 传著述
 标志着
+流露着
+靠着
+靠著作
+靠著名
+靠著录
+靠著称
+靠著者
+靠著述
 於乎
 於戏
 魏徵

Modified: trunk/phase3/includes/zhtable/toHK.manual
===================================================================
--- trunk/phase3/includes/zhtable/toHK.manual   2009-09-14 02:25:35 UTC (rev 
56290)
+++ trunk/phase3/includes/zhtable/toHK.manual   2009-09-14 05:43:56 UTC (rev 
56291)
@@ -2141,10 +2141,20 @@
 殺著稱    殺著稱
 殺著錄    殺著錄
 殺著書    殺著書
+標誌著    標誌着
 幹著     幹着
 干着     幹着
 干着急    干着急
-標誌著    標誌着
+流露著    流露着
+靠著     靠着
+靠著作    靠著作
+靠著名    靠著名
+靠著錄    靠著錄
+靠著录    靠著錄
+靠著稱    靠著稱
+靠著称    靠著稱
+靠著者    靠著者
+靠著述    靠著述
 新著龍虎門  新著龍虎門
 榴莲     榴槤
 榴蓮     榴槤
@@ -2163,4 +2173,5 @@
 醯醋     醯醋
 醯醢     醯醢
 醯壶     醯壺
-醯壺     醯壺
\ No newline at end of file
+醯壺     醯壺
+菸      煙
\ No newline at end of file

Modified: trunk/phase3/includes/zhtable/toSimp.manual
===================================================================
--- trunk/phase3/includes/zhtable/toSimp.manual 2009-09-14 02:25:35 UTC (rev 
56290)
+++ trunk/phase3/includes/zhtable/toSimp.manual 2009-09-14 05:43:56 UTC (rev 
56291)
@@ -132,4 +132,6 @@
 苧烯     苧烯
 李乾顺    李乾顺
 幹著     干着
-氾濫     泛滥
\ No newline at end of file
+氾濫     泛滥
+显著     显著
+顯著     显著
\ No newline at end of file

Modified: trunk/phase3/includes/zhtable/toTrad.manual
===================================================================
--- trunk/phase3/includes/zhtable/toTrad.manual 2009-09-14 02:25:35 UTC (rev 
56290)
+++ trunk/phase3/includes/zhtable/toTrad.manual 2009-09-14 05:43:56 UTC (rev 
56291)
@@ -83,4 +83,5 @@
 古書云    古書云
 古語云    古語云
 經有云    經有云
-語有云    語有云
\ No newline at end of file
+語有云    語有云
+显著标志   顯著標志
\ No newline at end of file

Modified: trunk/phase3/includes/zhtable/tradphrases.manual
===================================================================
--- trunk/phase3/includes/zhtable/tradphrases.manual    2009-09-14 02:25:35 UTC 
(rev 56290)
+++ trunk/phase3/includes/zhtable/tradphrases.manual    2009-09-14 05:43:56 UTC 
(rev 56291)
@@ -342,7 +342,6 @@
 雞絲麵
 面朝著
 面臨著
-顯著標志
 颳著
 髮絲
 斷髮
@@ -3498,4 +3497,5 @@
 億個
 兆個
 零個
-云:# 不作“雲:”
\ No newline at end of file
+云:# 不作“雲:”
+電子表格
\ No newline at end of file

Modified: trunk/phase3/includes/zhtable/tradphrases_exclude.manual
===================================================================
--- trunk/phase3/includes/zhtable/tradphrases_exclude.manual    2009-09-14 
02:25:35 UTC (rev 56290)
+++ trunk/phase3/includes/zhtable/tradphrases_exclude.manual    2009-09-14 
05:43:56 UTC (rev 56291)
@@ -309,4 +309,5 @@
 穀風
 復讎
 避暑山庄
-遊牧
\ No newline at end of file
+遊牧
+烟草
\ No newline at end of file



_______________________________________________
MediaWiki-CVS mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs

Reply via email to