http://www.mediawiki.org/wiki/Special:Code/MediaWiki/56291
Revision: 56291
Author: philip
Date: 2009-09-14 05:43:56 +0000 (Mon, 14 Sep 2009)
Log Message:
-----------
Update the Chinese conversion tables.
Modified Paths:
--------------
trunk/phase3/includes/ZhConversion.php
trunk/phase3/includes/zhtable/Makefile.py
trunk/phase3/includes/zhtable/simpphrases.manual
trunk/phase3/includes/zhtable/toHK.manual
trunk/phase3/includes/zhtable/toSimp.manual
trunk/phase3/includes/zhtable/toTrad.manual
trunk/phase3/includes/zhtable/tradphrases.manual
trunk/phase3/includes/zhtable/tradphrases_exclude.manual
Modified: trunk/phase3/includes/ZhConversion.php
===================================================================
--- trunk/phase3/includes/ZhConversion.php 2009-09-14 02:25:35 UTC (rev
56290)
+++ trunk/phase3/includes/ZhConversion.php 2009-09-14 05:43:56 UTC (rev
56291)
@@ -2885,7 +2885,6 @@
'并发现' => '並發現',
'并发表' => '並發表',
'中国国际信托投资公司' => '中國國際信托投資公司',
-'中国烟草总公司' => '中國烟草總公司',
'中型钟' => '中型鐘',
'中型钟表面' => '中型鐘表面',
'中型钟表' => '中型鐘錶',
@@ -8510,6 +8509,7 @@
'零天后' => '零天後',
'零只' => '零隻',
'零余' => '零餘',
+'电子表格' => '電子表格',
'电子表' => '電子錶',
'电子钟' => '電子鐘',
'电子钟表' => '電子鐘錶',
@@ -8602,7 +8602,7 @@
'显示表' => '顯示錶',
'显示钟' => '顯示鐘',
'显示钟表' => '顯示鐘錶',
-'显着标志' => '顯著標志',
+'显著标志' => '顯著標志',
'风干' => '風乾',
'风土志' => '風土誌',
'风卷残云' => '風捲殘雲',
@@ -13082,8 +13082,6 @@
'想著稱' => '想著称',
'想著者' => '想著者',
'想著述' => '想著述',
-'成效顯著' => '成效显著',
-'成績顯著' => '成绩显著',
'戰著' => '战着',
'戰著書' => '战著书',
'戰著作' => '战著作',
@@ -13339,7 +13337,6 @@
'撼著稱' => '撼著称',
'撼著者' => '撼著者',
'撼著述' => '撼著述',
-'效果顯著' => '效果显著',
'敞著' => '敞着',
'敞著書' => '敞著书',
'敞著作' => '敞著作',
@@ -13422,12 +13419,7 @@
'映著述' => '映著述',
'昭著' => '昭著',
'顯著' => '显著',
-'顯著地' => '显著地',
-'顯著地位' => '显著地位',
-'顯著性' => '显著性',
-'顯著成績' => '显著成绩',
-'顯著效果' => '显著效果',
-'顯著特點' => '显著特点',
+'显著' => '显著',
'晃著' => '晃着',
'晃著書' => '晃著书',
'晃著作' => '晃著作',
@@ -13535,7 +13527,6 @@
'梳著者' => '梳著者',
'梳著述' => '梳著述',
'樊於期' => '樊於期',
-'比較顯著' => '比较显著',
'氆氌' => '氆氌',
'求著' => '求着',
'求著書' => '求著书',
@@ -13583,6 +13574,7 @@
'流著稱' => '流著称',
'流著者' => '流著者',
'流著述' => '流著述',
+'流露著' => '流露着',
'浮著' => '浮着',
'浮著書' => '浮著书',
'浮著作' => '浮著作',
@@ -13735,7 +13727,6 @@
'疑著稱' => '疑著称',
'疑著者' => '疑著者',
'疑著述' => '疑著述',
-'療效顯著' => '疗效显著',
'癥瘕' => '癥瘕',
'皺著' => '皱着',
'皺著書' => '皱著书',
@@ -14503,6 +14494,13 @@
'雅著者' => '雅著者',
'雅著述' => '雅著述',
'雍乾' => '雍乾',
+'靠著' => '靠着',
+'靠著作' => '靠著作',
+'靠著名' => '靠著名',
+'靠著錄' => '靠著录',
+'靠著稱' => '靠著称',
+'靠著者' => '靠著者',
+'靠著述' => '靠著述',
'頂著' => '顶着',
'頂著書' => '顶著书',
'頂著作' => '顶著作',
@@ -16042,6 +16040,7 @@
'流著者' => '流著者',
'流著述' => '流著述',
'流著錄' => '流著錄',
+'流露著' => '流露着',
'浮著' => '浮着',
'浮著作' => '浮著作',
'浮著名' => '浮著名',
@@ -16106,6 +16105,7 @@
'潤著者' => '潤著者',
'潤著述' => '潤著述',
'潤著錄' => '潤著錄',
+'菸' => '煙',
'照著' => '照着',
'照著作' => '照著作',
'照著名' => '照著名',
@@ -16981,6 +16981,15 @@
'雜著述' => '雜著述',
'雜著錄' => '雜著錄',
'冰淇淋' => '雪糕',
+'靠著' => '靠着',
+'靠著作' => '靠著作',
+'靠著名' => '靠著名',
+'靠著稱' => '靠著稱',
+'靠著称' => '靠著稱',
+'靠著者' => '靠著者',
+'靠著述' => '靠著述',
+'靠著錄' => '靠著錄',
+'靠著录' => '靠著錄',
'響著' => '響着',
'響著作' => '響著作',
'響著名' => '響著名',
Modified: trunk/phase3/includes/zhtable/Makefile.py
===================================================================
--- trunk/phase3/includes/zhtable/Makefile.py 2009-09-14 02:25:35 UTC (rev
56290)
+++ trunk/phase3/includes/zhtable/Makefile.py 2009-09-14 05:43:56 UTC (rev
56291)
@@ -1,8 +1,24 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
# @author Philip
-# You should run this script UNDER python 3000.
import tarfile, zipfile
-import os, re, shutil, urllib.request
+import os, re, shutil, sys, platform
+pyversion = platform.python_version()
+if pyversion[:3] in ['2.5', '2.6', '2.7']:
+ import urllib as urllib_request
+ import codecs
+ uniopen = codecs.open
+ def unichr2(i):
+ if sys.maxunicode >= 0x10000 or i < 0x10000:
+ return unichr(i)
+ else:
+ return unichr(0xD7C0+(i>>10)) + unichr(0xDC00+(i&0x3FF))
+elif pyversion[:2] == '3.':
+ import urllib.request as urllib_request
+ uniopen = open
+ unichr2 = chr
+
# DEFINE
SF_MIRROR = 'easynews'
SCIM_TABLES_VER = '0.5.9'
@@ -15,13 +31,19 @@
print( 'File %s up to date.' % dest )
return
print( 'Downloading from [%s] ...' % url )
- urllib.request.urlretrieve( url, dest )
+ urllib_request.urlretrieve( url, dest )
print( 'Download complete.\n' )
return
def GetFileFromZip( path ):
print( 'Extracting files from %s ...' % path )
- zipfile.ZipFile(path).extractall()
+ if pyversion[:3] == '2.5':
+ text = zipfile.ZipFile(path).read('Unihan.txt')
+ uhfile = uniopen('Unihan.txt', 'w')
+ uhfile.write(text)
+ uhfile.close()
+ else:
+ zipfile.ZipFile(path).extractall()
return
def GetFileFromTar( path, member, rename ):
@@ -34,25 +56,25 @@
def ReadBIG5File( dest ):
print( 'Reading and decoding %s ...' % dest )
- f1 = open( dest, 'r', encoding='big5hkscs', errors='replace' )
+ f1 = uniopen( dest, 'r', encoding='big5hkscs', errors='replace' )
text = f1.read()
text = text.replace( '\ufffd', '\n' )
f1.close()
- f2 = open( dest, 'w', encoding='utf8' )
+ f2 = uniopen( dest, 'w', encoding='utf8' )
f2.write(text)
f2.close()
return text
def ReadFile( dest ):
print( 'Reading and decoding %s ...' % dest )
- f = open( dest, 'r', encoding='utf8' )
+ f = uniopen( dest, 'r', encoding='utf8' )
ret = f.read()
f.close()
return ret
def ReadUnihanFile( dest ):
print( 'Reading and decoding %s ...' % dest )
- f = open( dest, 'r', encoding='utf8' )
+ f = uniopen( dest, 'r', encoding='utf8' )
t2s_code = []
s2t_code = []
while True:
@@ -82,7 +104,7 @@
def ConvertToChar( code ):
code = code.split('<')[0]
- return chr( int( code[2:], 16 ) )
+ return unichr2( int( code[2:], 16 ) )
def GetDefaultTable( code_table ):
char_table = {}
@@ -101,8 +123,8 @@
elem = elem.strip('|')
if elem:
temp2 = elem.split( '|', 1 )
- from_char = chr( int( temp2[0][2:7], 16 ) )
- to_chars = [chr( int( code[2:7], 16 ) ) for code in
temp2[1].split('|')]
+ from_char = unichr2( int( temp2[0][2:7], 16 ) )
+ to_chars = [unichr2( int( code[2:7], 16 ) ) for code in
temp2[1].split('|')]
char_table[from_char] = to_chars
return char_table
@@ -222,7 +244,9 @@
def CustomRules( dest ):
text = ReadFile( dest )
temp = text.split()
- ret = {temp[i]: temp[i + 1] for i in range( 0, len( temp ), 2 )}
+ ret = dict()
+ for i in range( 0, len( temp ), 2 ):
+ ret[temp[i]] = temp[i + 1]
return ret
def GetPHPArray( table ):
@@ -429,7 +453,7 @@
php += GetPHPArray( toSG )
php += '\n);'
- f = open( 'ZhConversion.php', 'w', encoding = 'utf8' )
+ f = uniopen( 'ZhConversion.php', 'w', encoding = 'utf8' )
print ('Writing ZhConversion.php ... ')
f.write( php )
f.close()
Modified: trunk/phase3/includes/zhtable/simpphrases.manual
===================================================================
--- trunk/phase3/includes/zhtable/simpphrases.manual 2009-09-14 02:25:35 UTC
(rev 56290)
+++ trunk/phase3/includes/zhtable/simpphrases.manual 2009-09-14 05:43:56 UTC
(rev 56291)
@@ -2179,6 +2179,14 @@
传著者
传著述
标志着
+流露着
+靠着
+靠著作
+靠著名
+靠著录
+靠著称
+靠著者
+靠著述
於乎
於戏
魏徵
Modified: trunk/phase3/includes/zhtable/toHK.manual
===================================================================
--- trunk/phase3/includes/zhtable/toHK.manual 2009-09-14 02:25:35 UTC (rev
56290)
+++ trunk/phase3/includes/zhtable/toHK.manual 2009-09-14 05:43:56 UTC (rev
56291)
@@ -2141,10 +2141,20 @@
殺著稱 殺著稱
殺著錄 殺著錄
殺著書 殺著書
+標誌著 標誌着
幹著 幹着
干着 幹着
干着急 干着急
-標誌著 標誌着
+流露著 流露着
+靠著 靠着
+靠著作 靠著作
+靠著名 靠著名
+靠著錄 靠著錄
+靠著录 靠著錄
+靠著稱 靠著稱
+靠著称 靠著稱
+靠著者 靠著者
+靠著述 靠著述
新著龍虎門 新著龍虎門
榴莲 榴槤
榴蓮 榴槤
@@ -2163,4 +2173,5 @@
醯醋 醯醋
醯醢 醯醢
醯壶 醯壺
-醯壺 醯壺
\ No newline at end of file
+醯壺 醯壺
+菸 煙
\ No newline at end of file
Modified: trunk/phase3/includes/zhtable/toSimp.manual
===================================================================
--- trunk/phase3/includes/zhtable/toSimp.manual 2009-09-14 02:25:35 UTC (rev
56290)
+++ trunk/phase3/includes/zhtable/toSimp.manual 2009-09-14 05:43:56 UTC (rev
56291)
@@ -132,4 +132,6 @@
苧烯 苧烯
李乾顺 李乾顺
幹著 干着
-氾濫 泛滥
\ No newline at end of file
+氾濫 泛滥
+显著 显著
+顯著 显著
\ No newline at end of file
Modified: trunk/phase3/includes/zhtable/toTrad.manual
===================================================================
--- trunk/phase3/includes/zhtable/toTrad.manual 2009-09-14 02:25:35 UTC (rev
56290)
+++ trunk/phase3/includes/zhtable/toTrad.manual 2009-09-14 05:43:56 UTC (rev
56291)
@@ -83,4 +83,5 @@
古書云 古書云
古語云 古語云
經有云 經有云
-語有云 語有云
\ No newline at end of file
+語有云 語有云
+显著标志 顯著標志
\ No newline at end of file
Modified: trunk/phase3/includes/zhtable/tradphrases.manual
===================================================================
--- trunk/phase3/includes/zhtable/tradphrases.manual 2009-09-14 02:25:35 UTC
(rev 56290)
+++ trunk/phase3/includes/zhtable/tradphrases.manual 2009-09-14 05:43:56 UTC
(rev 56291)
@@ -342,7 +342,6 @@
雞絲麵
面朝著
面臨著
-顯著標志
颳著
髮絲
斷髮
@@ -3498,4 +3497,5 @@
億個
兆個
零個
-云:# 不作“雲:”
\ No newline at end of file
+云:# 不作“雲:”
+電子表格
\ No newline at end of file
Modified: trunk/phase3/includes/zhtable/tradphrases_exclude.manual
===================================================================
--- trunk/phase3/includes/zhtable/tradphrases_exclude.manual 2009-09-14
02:25:35 UTC (rev 56290)
+++ trunk/phase3/includes/zhtable/tradphrases_exclude.manual 2009-09-14
05:43:56 UTC (rev 56291)
@@ -309,4 +309,5 @@
穀風
復讎
避暑山庄
-遊牧
\ No newline at end of file
+遊牧
+烟草
\ No newline at end of file
_______________________________________________
MediaWiki-CVS mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs