Recently support for Chinese charsets was added to Vade-Mecum, and this required patching of helper/CharsetMapping.py file of the Python distiller since it lacks support for "GBK" charset (simplified Chinese).
I've uploaded the patched file along with the latest release of VM at sf.net, but I think this change should go to to the PyPlucker distribution. I've attached the (simplistic) patch, but I think that it would be better if someone maintaining PyPlucker re-generated that mapping by itself since it's very simple (thanks to PyPlucker developers!). The current IANA registry of charsets can be grabbed here: http://www.iana.org/assignments/character-sets Thanks in advance!
--- /mnt/C/Program
Files/Plucker/parser/python/PyPlucker/helper/CharsetMapping.py.orig 2002-05-18
10:28:24.000000000 +0400
+++ /mnt/C/src/vm/CharsetMapping.py 2006-04-03 18:03:06.000000000 +0400
@@ -14,6 +14,9 @@
Copyright 2001 by Dirk Heiser <[EMAIL PROTECTED]>
Distributable under the GNU General Public License Version 2 or newer.
+
+Updated to the IANA registry as of 28 Jan 2005
+by Konstantin Khomoutov <[EMAIL PROTECTED]>
"""
import getopt, os, sys, re, string
@@ -302,6 +304,7 @@
'csisolatin4' : 7,
'ecma-cyrillic' : 77,
'iso-ir-111' : 77,
+ 'koi8-e' : 77,
'csiso111ecmacyrillic' : 77,
'csa_z243.4-1985-1' : 78,
'iso-ir-121' : 78,
@@ -721,6 +724,18 @@
'cp01149' : 2100,
'ebcdic-is-871+euro' : 2100,
'big5-hkscs' : 2101,
+ 'ibm1047' : 2102,
+ 'ibm-1047' : 2102,
+ 'ptcp154' : 2103,
+ 'csptcp154' : 2103,
+ 'pt154' : 2103,
+ 'cp154' : 2103,
+ 'cyrillic-asian' : 2103,
+ 'amiga-1251' : 2104,
+ 'ami1251' : 2104,
+ 'amiga1251' : 2104,
+ 'ami-1251' : 2104,
+ 'koi7-switched' : 2105,
'unicode-1-1' : 1010,
'csunicode11' : 1010,
'scsu' : 1011,
@@ -728,6 +743,13 @@
'utf-16be' : 1013,
'utf-16le' : 1014,
'utf-16' : 1015,
+ 'cesu-8' : 1016,
+ 'cscesu-8' : 1016,
+ 'utf-32' : 1017,
+ 'utf-32be' : 1018,
+ 'utf-32le' : 1019,
+ 'bocu-1' : 1020,
+ 'csbocu-1' : 1020,
'unicode-1-1-utf-7' : 103,
'csunicode11utf7' : 103,
'utf-8' : 106,
@@ -741,7 +763,21 @@
'l8' : 110,
'iso-8859-15' : 111,
'iso_8859-15' : 111,
+ 'latin-9' : 111,
'iso-8859-16' : 112,
+ 'iso-ir-226' : 112,
+ 'iso_8859-16:2001' : 112,
+ 'iso_8859-16' : 112,
+ 'latin10' : 112,
+ 'l10' : 112,
+ 'gbk' : 113,
+ 'cp936' : 113,
+ 'ms936' : 113,
+ 'windows-936' : 113,
+ 'gb18030' : 114,
+ 'osd_ebcdic_df04_15' : 115,
+ 'osd_ebcdic_df03_irv' : 116,
+ 'osd_ebcdic_df04_1' : 117,
'jis_encoding' : 16,
'csjisencoding' : 16,
'shift_jis' : 17,
@@ -760,7 +796,7 @@
'iso-unicode-ibm-1261' : 1005,
'csunicodeibm1261' : 1005,
'iso-unicode-ibm-1268' : 1006,
- 'csunidoceibm1268' : 1006,
+ 'csunicodeibm1268' : 1006,
'iso-unicode-ibm-1276' : 1007,
'csunicodeibm1276' : 1007,
'iso-unicode-ibm-1264' : 1008,
@@ -1004,18 +1042,32 @@
2099 : 'IBM01148',
2100 : 'IBM01149',
2101 : 'Big5-HKSCS',
+ 2102 : 'IBM1047',
+ 2103 : 'PTCP154',
+ 2104 : 'Amiga-1251',
+ 2105 : 'KOI7-switched',
1010 : 'UNICODE-1-1',
1011 : 'SCSU',
1012 : 'UTF-7',
1013 : 'UTF-16BE',
1014 : 'UTF-16LE',
1015 : 'UTF-16',
+ 1016 : 'CESU-8',
+ 1017 : 'UTF-32',
+ 1018 : 'UTF-32BE',
+ 1019 : 'UTF-32LE',
+ 1020 : 'BOCU-1',
103 : 'UNICODE-1-1-UTF-7',
106 : 'UTF-8',
109 : 'ISO-8859-13',
110 : 'ISO-8859-14',
111 : 'ISO-8859-15',
112 : 'ISO-8859-16',
+ 113 : 'GBK',
+ 114 : 'GB18030',
+ 115 : 'OSD_EBCDIC_DF04_15',
+ 116 : 'OSD_EBCDIC_DF03_IRV',
+ 117 : 'OSD_EBCDIC_DF04_1',
16 : 'JIS_Encoding',
17 : 'Shift_JIS',
18 : 'EUC-JP',
