This patch is mainly a large extension to the approximate() method.
It's certainly not complete yet though...

There's also a couple of minor changes and a few comments that
should be looked at.

Andrew Dunbar.

-- 
http://linguaphile.sourceforge.net
Index: src/af/xap/xp/xap_EncodingManager.cpp
===================================================================
RCS file: /cvsroot/abi/src/af/xap/xp/xap_EncodingManager.cpp,v
retrieving revision 1.33
diff -u -r1.33 xap_EncodingManager.cpp
--- src/af/xap/xp/xap_EncodingManager.cpp       2001/05/25 05:52:12     1.33
+++ src/af/xap/xp/xap_EncodingManager.cpp       2001/06/03 07:18:20
@@ -81,6 +81,8 @@
 
 char XAP_EncodingManager::fallbackChar(UT_UCSChar c) const 
 { 
+       // TODO shouldn't we return U+FFFD "REPLACEMENT CHARACTER"
+       // TODO or U+25A0 "BLACK SQUARE" for Unicode?
     return '?'; 
 }
 
@@ -90,12 +92,878 @@
 {
        if (max_length==0)
                return 0;
-       if (max_length==1)
+       if (max_length>=3)
        {
                switch (c)
                {
-                       case 0x201d:
-                       case 0x201c:
+                       case 0x00A9:    // COPYRIGHT SIGN
+                       case 0x24B8:    // CIRCLED LATIN CAPITAL LETTER C
+                               strcpy(out,"(C)"); return 3;
+                       case 0x00AE:    // REGISTERED SIGN
+                       case 0x24C7:    // CIRCLED LATIN CAPITAL LETTER R
+                               strcpy(out,"(R)"); return 3;
+                       case 0xFB03:    // LATIN SMALL LIGATURE FFI
+                               strcpy(out,"ffi"); return 3;
+                       case 0xFB04:    // LATIN SMALL LIGATURE FFL
+                               strcpy(out,"ffl"); return 3;
+               }
+       }       
+       if (max_length>=2)
+       {
+               switch (c)
+               {
+                       case 0x00C6:    // LATIN CAPITAL LETTER AE
+                       case 0x01E2:    // LATIN CAPITAL LETTER AE WITH MACRON
+                       case 0x01FC:    // LATIN CAPITAL LETTER AE WITH ACUTE
+                               strcpy(out,"AE"); return 2;
+                       case 0x00DF:    // LATIN SMALL LETTER SHARP S
+                               strcpy(out,"ss"); return 2;
+                       case 0x00E6:    // LATIN SMALL LETTER AE
+                       case 0x01E3:    // LATIN SMALL LETTER AE WITH MACRON
+                       case 0x01FD:    // LATIN SMALL LETTER AE WITH ACUTE
+                               strcpy(out,"ae"); return 2;
+                       case 0x0132:    // LATIN CAPITAL LIGATURE IJ
+                               strcpy(out,"IJ"); return 2;
+                       case 0x0133:    // LATIN SMALL LIGATURE IJ
+                               strcpy(out,"ij"); return 2;
+                       case 0x0152:    // LATIN CAPITAL LIGATURE OE
+                               strcpy(out,"OE"); return 2;
+                       case 0x0153:    // LATIN SMALL LIGATURE OE
+                               strcpy(out,"oe"); return 2;
+                       case 0xFB00:    // LATIN SMALL LIGATURE FF
+                               strcpy(out,"ff"); return 2;
+                       case 0xFB01:    // LATIN SMALL LIGATURE FI
+                               strcpy(out,"fi"); return 2;
+                       case 0xFB02:    // LATIN SMALL LIGATURE FL
+                               strcpy(out,"fl"); return 2;
+                       case 0xFB05:    // LATIN SMALL LIGATURE LONG S T
+                       case 0xFB06:    // LATIN SMALL LIGATURE ST
+                               strcpy(out,"st"); return 2;
+               }
+       }       
+       if (max_length>=1)
+       {
+               switch (c)
+               {
+                       case 0x00C0:    // LATIN CAPITAL LETTER A WITH GRAVE
+                       case 0x00C1:    // LATIN CAPITAL LETTER A WITH ACUTE
+                       case 0x00C2:    // LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+                       case 0x00C3:    // LATIN CAPITAL LETTER A WITH TILDE
+                       case 0x00C4:    // LATIN CAPITAL LETTER A WITH DIAERESIS
+                       case 0x00C5:    // LATIN CAPITAL LETTER A WITH RING ABOVE
+                       case 0x0100:    // LATIN CAPITAL LETTER A WITH MACRON
+                       case 0x0102:    // LATIN CAPITAL LETTER A WITH BREVE
+                       case 0x0104:    // LATIN CAPITAL LETTER A WITH OGONEK
+                       case 0x01CD:    // LATIN CAPITAL LETTER A WITH CARON
+                       case 0x01DE:    // LATIN CAPITAL LETTER A WITH DIAERESIS AND 
+MACRON
+                       case 0x01E0:    // LATIN CAPITAL LETTER A WITH DOT ABOVE AND 
+MACRON
+                       case 0x01FA:    // LATIN CAPITAL LETTER A WITH RING ABOVE AND 
+ACUTE
+                       case 0x0200:    // LATIN CAPITAL LETTER A WITH DOUBLE GRAVE
+                       case 0x0202:    // LATIN CAPITAL LETTER A WITH INVERTED BREVE
+                       case 0x1E00:    // LATIN CAPITAL LETTER A WITH RING BELOW
+                       case 0x1EA0:    // LATIN CAPITAL LETTER A WITH DOT BELOW
+                       case 0x1EA2:    // LATIN CAPITAL LETTER A WITH HOOK ABOVE
+                       case 0x1EA4:    // LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND 
+ACUTE
+                       case 0x1EA6:    // LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND 
+GRAVE
+                       case 0x1EA8:    // LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND 
+HOOK ABOVE
+                       case 0x1EAA:    // LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND 
+TILDE
+                       case 0x1EAC:    // LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND 
+DOT BELOW
+                       case 0x1EAE:    // LATIN CAPITAL LETTER A WITH BREVE AND ACUTE
+                       case 0x1EB0:    // LATIN CAPITAL LETTER A WITH BREVE AND GRAVE
+                       case 0x1EB2:    // LATIN CAPITAL LETTER A WITH BREVE AND HOOK 
+ABOVE
+                       case 0x1EB4:    // LATIN CAPITAL LETTER A WITH BREVE AND TILDE
+                       case 0x1EB6:    // LATIN CAPITAL LETTER A WITH BREVE AND DOT 
+BELOW
+                       case 0xFF21:    // FULLWIDTH LATIN CAPITAL LETTER A
+                               *out = 'A'; return 1;
+                       case 0x0181:    // LATIN CAPITAL LETTER B WITH HOOK
+                       case 0x0182:    // LATIN CAPITAL LETTER B WITH TOPBAR
+                       case 0x1E02:    // LATIN CAPITAL LETTER B WITH DOT ABOVE
+                       case 0x1E04:    // LATIN CAPITAL LETTER B WITH DOT BELOW
+                       case 0x1E06:    // LATIN CAPITAL LETTER B WITH LINE BELOW
+                       case 0xFF22:    // FULLWIDTH LATIN CAPITAL LETTER B
+                               *out = 'B'; return 1;
+                       case 0x00C7:    // LATIN CAPITAL LETTER C WITH CEDILLA
+                       case 0x0106:    // LATIN CAPITAL LETTER C WITH ACUTE
+                       case 0x0108:    // LATIN CAPITAL LETTER C WITH CIRCUMFLEX
+                       case 0x010A:    // LATIN CAPITAL LETTER C WITH DOT ABOVE
+                       case 0x010C:    // LATIN CAPITAL LETTER C WITH CARON
+                       case 0x0187:    // LATIN CAPITAL LETTER C WITH HOOK
+                       case 0x1E08:    // LATIN CAPITAL LETTER C WITH CEDILLA AND 
+ACUTE
+                       case 0xFF23:    // FULLWIDTH LATIN CAPITAL LETTER C
+                               *out = 'C'; return 1;
+                       case 0x010E:    // LATIN CAPITAL LETTER D WITH CARON
+                       case 0x0110:    // LATIN CAPITAL LETTER D WITH STROKE
+                       case 0x018A:    // LATIN CAPITAL LETTER D WITH HOOK
+                       case 0x018B:    // LATIN CAPITAL LETTER D WITH TOPBAR
+                       case 0x1E0A:    // LATIN CAPITAL LETTER D WITH DOT ABOVE
+                       case 0x1E0C:    // LATIN CAPITAL LETTER D WITH DOT BELOW
+                       case 0x1E0E:    // LATIN CAPITAL LETTER D WITH LINE BELOW
+                       case 0x1E10:    // LATIN CAPITAL LETTER D WITH CEDILLA
+                       case 0x1E12:    // LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW
+                       case 0xFF24:    // FULLWIDTH LATIN CAPITAL LETTER D
+                               *out = 'D'; return 1;
+                       case 0x00C8:    // LATIN CAPITAL LETTER E WITH GRAVE
+                       case 0x00C9:    // LATIN CAPITAL LETTER E WITH ACUTE
+                       case 0x00CA:    // LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+                       case 0x00CB:    // LATIN CAPITAL LETTER E WITH DIAERESIS
+                       case 0x0112:    // LATIN CAPITAL LETTER E WITH MACRON
+                       case 0x0114:    // LATIN CAPITAL LETTER E WITH BREVE
+                       case 0x0116:    // LATIN CAPITAL LETTER E WITH DOT ABOVE
+                       case 0x0118:    // LATIN CAPITAL LETTER E WITH OGONEK
+                       case 0x011A:    // LATIN CAPITAL LETTER E WITH CARON
+                       case 0x0204:    // LATIN CAPITAL LETTER E WITH DOUBLE GRAVE
+                       case 0x0206:    // LATIN CAPITAL LETTER E WITH INVERTED BREVE
+                       case 0x1E14:    // LATIN CAPITAL LETTER E WITH MACRON AND GRAVE
+                       case 0x1E16:    // LATIN CAPITAL LETTER E WITH MACRON AND ACUTE
+                       case 0x1E18:    // LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW
+                       case 0x1E1A:    // LATIN CAPITAL LETTER E WITH TILDE BELOW
+                       case 0x1E1C:    // LATIN CAPITAL LETTER E WITH CEDILLA AND 
+BREVE
+                       case 0x1EB8:    // LATIN CAPITAL LETTER E WITH DOT BELOW
+                       case 0x1EBA:    // LATIN CAPITAL LETTER E WITH HOOK ABOVE
+                       case 0x1EBC:    // LATIN CAPITAL LETTER E WITH TILDE
+                       case 0x1EBE:    // LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND 
+ACUTE
+                       case 0x1EC0:    // LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND 
+GRAVE
+                       case 0x1EC2:    // LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND 
+HOOK ABOVE
+                       case 0x1EC4:    // LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND 
+TILDE
+                       case 0x1EC6:    // LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND 
+DOT BELOW
+                       case 0xFF25:    // FULLWIDTH LATIN CAPITAL LETTER E
+                               *out = 'E'; return 1;
+                       case 0x0191:    // LATIN CAPITAL LETTER F WITH HOOK
+                       case 0x1E1E:    // LATIN CAPITAL LETTER F WITH DOT ABOVE
+                       case 0xFF26:    // FULLWIDTH LATIN CAPITAL LETTER F
+                               *out = 'F'; return 1;
+                       case 0x011C:    // LATIN CAPITAL LETTER G WITH CIRCUMFLEX
+                       case 0x011E:    // LATIN CAPITAL LETTER G WITH BREVE
+                       case 0x0120:    // LATIN CAPITAL LETTER G WITH DOT ABOVE
+                       case 0x0122:    // LATIN CAPITAL LETTER G WITH CEDILLA
+                       case 0x0193:    // LATIN CAPITAL LETTER G WITH HOOK
+                       case 0x01E4:    // LATIN CAPITAL LETTER G WITH STROKE
+                       case 0x01E6:    // LATIN CAPITAL LETTER G WITH CARON
+                       case 0x01F4:    // LATIN CAPITAL LETTER G WITH ACUTE
+                       case 0x1E20:    // LATIN CAPITAL LETTER G WITH MACRON
+                       case 0xFF27:    // FULLWIDTH LATIN CAPITAL LETTER G
+                               *out = 'G'; return 1;
+                       case 0x0124:    // LATIN CAPITAL LETTER H WITH CIRCUMFLEX
+                       case 0x0126:    // LATIN CAPITAL LETTER H WITH STROKE
+                       case 0x1E22:    // LATIN CAPITAL LETTER H WITH DOT ABOVE
+                       case 0x1E24:    // LATIN CAPITAL LETTER H WITH DOT BELOW
+                       case 0x1E26:    // LATIN CAPITAL LETTER H WITH DIAERESIS
+                       case 0x1E28:    // LATIN CAPITAL LETTER H WITH CEDILLA
+                       case 0x1E2A:    // LATIN CAPITAL LETTER H WITH BREVE BELOW
+                       case 0xFF28:    // FULLWIDTH LATIN CAPITAL LETTER H
+                               *out = 'H'; return 1;
+                       case 0x00CC:    // LATIN CAPITAL LETTER I WITH GRAVE
+                       case 0x00CD:    // LATIN CAPITAL LETTER I WITH ACUTE
+                       case 0x00CE:    // LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+                       case 0x00CF:    // LATIN CAPITAL LETTER I WITH DIAERESIS
+                       case 0x0128:    // LATIN CAPITAL LETTER I WITH TILDE
+                       case 0x012A:    // LATIN CAPITAL LETTER I WITH MACRON
+                       case 0x012C:    // LATIN CAPITAL LETTER I WITH BREVE
+                       case 0x012E:    // LATIN CAPITAL LETTER I WITH OGONEK
+                       case 0x0130:    // LATIN CAPITAL LETTER I WITH DOT ABOVE
+                       case 0x0197:    // LATIN CAPITAL LETTER I WITH STROKE
+                       case 0x01CF:    // LATIN CAPITAL LETTER I WITH CARON
+                       case 0x0208:    // LATIN CAPITAL LETTER I WITH DOUBLE GRAVE
+                       case 0x020A:    // LATIN CAPITAL LETTER I WITH INVERTED BREVE
+                       case 0x1E2C:    // LATIN CAPITAL LETTER I WITH TILDE BELOW
+                       case 0x1E2E:    // LATIN CAPITAL LETTER I WITH DIAERESIS AND 
+ACUTE
+                       case 0x1EC8:    // LATIN CAPITAL LETTER I WITH HOOK ABOVE
+                       case 0x1ECA:    // LATIN CAPITAL LETTER I WITH DOT BELOW
+                       case 0xFF29:    // FULLWIDTH LATIN CAPITAL LETTER I
+                               *out = 'I'; return 1;
+                       case 0x0134:    // LATIN CAPITAL LETTER J WITH CIRCUMFLEX
+                       case 0xFF2A:    // FULLWIDTH LATIN CAPITAL LETTER J
+                               *out = 'J'; return 1;
+                       case 0x0136:    // LATIN CAPITAL LETTER K WITH CEDILLA
+                       case 0x0198:    // LATIN CAPITAL LETTER K WITH HOOK
+                       case 0x01E8:    // LATIN CAPITAL LETTER K WITH CARON
+                       case 0x1E30:    // LATIN CAPITAL LETTER K WITH ACUTE
+                       case 0x1E32:    // LATIN CAPITAL LETTER K WITH DOT BELOW
+                       case 0x1E34:    // LATIN CAPITAL LETTER K WITH LINE BELOW
+                       case 0xFF2B:    // FULLWIDTH LATIN CAPITAL LETTER K
+                               *out = 'K'; return 1;
+                       case 0x0139:    // LATIN CAPITAL LETTER L WITH ACUTE
+                       case 0x013B:    // LATIN CAPITAL LETTER L WITH CEDILLA
+                       case 0x013D:    // LATIN CAPITAL LETTER L WITH CARON
+                       case 0x013F:    // LATIN CAPITAL LETTER L WITH MIDDLE DOT
+                       case 0x0141:    // LATIN CAPITAL LETTER L WITH STROKE
+                       case 0x1E36:    // LATIN CAPITAL LETTER L WITH DOT BELOW
+                       case 0x1E38:    // LATIN CAPITAL LETTER L WITH DOT BELOW AND 
+MACRON
+                       case 0x1E3A:    // LATIN CAPITAL LETTER L WITH LINE BELOW
+                       case 0x1E3C:    // LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW
+                       case 0xFF2C:    // FULLWIDTH LATIN CAPITAL LETTER L
+                               *out = 'L'; return 1;
+                       case 0x1E3E:    // LATIN CAPITAL LETTER M WITH ACUTE
+                       case 0x1E40:    // LATIN CAPITAL LETTER M WITH DOT ABOVE
+                       case 0x1E42:    // LATIN CAPITAL LETTER M WITH DOT BELOW
+                       case 0xFF2D:    // FULLWIDTH LATIN CAPITAL LETTER M
+                               *out = 'M'; return 1;
+                       case 0x00D1:    // LATIN CAPITAL LETTER N WITH TILDE
+                       case 0x0143:    // LATIN CAPITAL LETTER N WITH ACUTE
+                       case 0x0145:    // LATIN CAPITAL LETTER N WITH CEDILLA
+                       case 0x0147:    // LATIN CAPITAL LETTER N WITH CARON
+                       case 0x019D:    // LATIN CAPITAL LETTER N WITH LEFT HOOK
+                       case 0x1E44:    // LATIN CAPITAL LETTER N WITH DOT ABOVE
+                       case 0x1E46:    // LATIN CAPITAL LETTER N WITH DOT BELOW
+                       case 0x1E48:    // LATIN CAPITAL LETTER N WITH LINE BELOW
+                       case 0x1E4A:    // LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW
+                       case 0xFF2E:    // FULLWIDTH LATIN CAPITAL LETTER N
+                               *out = 'N'; return 1;
+                       case 0x00D2:    // LATIN CAPITAL LETTER O WITH GRAVE
+                       case 0x00D3:    // LATIN CAPITAL LETTER O WITH ACUTE
+                       case 0x00D4:    // LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+                       case 0x00D5:    // LATIN CAPITAL LETTER O WITH TILDE
+                       case 0x00D6:    // LATIN CAPITAL LETTER O WITH DIAERESIS
+                       case 0x00D8:    // LATIN CAPITAL LETTER O WITH STROKE
+                       case 0x014C:    // LATIN CAPITAL LETTER O WITH MACRON
+                       case 0x014E:    // LATIN CAPITAL LETTER O WITH BREVE
+                       case 0x0150:    // LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
+                       case 0x019F:    // LATIN CAPITAL LETTER O WITH MIDDLE TILDE
+                       case 0x01A0:    // LATIN CAPITAL LETTER O WITH HORN
+                       case 0x01D1:    // LATIN CAPITAL LETTER O WITH CARON
+                       case 0x01EA:    // LATIN CAPITAL LETTER O WITH OGONEK
+                       case 0x01EC:    // LATIN CAPITAL LETTER O WITH OGONEK AND 
+MACRON
+                       case 0x01FE:    // LATIN CAPITAL LETTER O WITH STROKE AND ACUTE
+                       case 0x020C:    // LATIN CAPITAL LETTER O WITH DOUBLE GRAVE
+                       case 0x020E:    // LATIN CAPITAL LETTER O WITH INVERTED BREVE
+                       case 0x1E4C:    // LATIN CAPITAL LETTER O WITH TILDE AND ACUTE
+                       case 0x1E4E:    // LATIN CAPITAL LETTER O WITH TILDE AND 
+DIAERESIS
+                       case 0x1E50:    // LATIN CAPITAL LETTER O WITH MACRON AND GRAVE
+                       case 0x1E52:    // LATIN CAPITAL LETTER O WITH MACRON AND ACUTE
+                       case 0x1ECC:    // LATIN CAPITAL LETTER O WITH DOT BELOW
+                       case 0x1ECE:    // LATIN CAPITAL LETTER O WITH HOOK ABOVE
+                       case 0x1ED0:    // LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND 
+ACUTE
+                       case 0x1ED2:    // LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND 
+GRAVE
+                       case 0x1ED4:    // LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND 
+HOOK ABOVE
+                       case 0x1ED6:    // LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND 
+TILDE
+                       case 0x1ED8:    // LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND 
+DOT BELOW
+                       case 0x1EDA:    // LATIN CAPITAL LETTER O WITH HORN AND ACUTE
+                       case 0x1EDC:    // LATIN CAPITAL LETTER O WITH HORN AND GRAVE
+                       case 0x1EDE:    // LATIN CAPITAL LETTER O WITH HORN AND HOOK 
+ABOVE
+                       case 0x1EE0:    // LATIN CAPITAL LETTER O WITH HORN AND TILDE
+                       case 0x1EE2:    // LATIN CAPITAL LETTER O WITH HORN AND DOT 
+BELOW
+                       case 0xFF2F:    // FULLWIDTH LATIN CAPITAL LETTER O
+                               *out = 'O'; return 1;
+                       case 0x01A4:    // LATIN CAPITAL LETTER P WITH HOOK
+                       case 0x1E54:    // LATIN CAPITAL LETTER P WITH ACUTE
+                       case 0x1E56:    // LATIN CAPITAL LETTER P WITH DOT ABOVE
+                       case 0xFF30:    // FULLWIDTH LATIN CAPITAL LETTER P
+                               *out = 'P'; return 1;
+                       case 0xFF31:    // FULLWIDTH LATIN CAPITAL LETTER Q
+                               *out = 'Q'; return 1;
+                       case 0x0154:    // LATIN CAPITAL LETTER R WITH ACUTE
+                       case 0x0156:    // LATIN CAPITAL LETTER R WITH CEDILLA
+                       case 0x0158:    // LATIN CAPITAL LETTER R WITH CARON
+                       case 0x0210:    // LATIN CAPITAL LETTER R WITH DOUBLE GRAVE
+                       case 0x0212:    // LATIN CAPITAL LETTER R WITH INVERTED BREVE
+                       case 0x1E58:    // LATIN CAPITAL LETTER R WITH DOT ABOVE
+                       case 0x1E5A:    // LATIN CAPITAL LETTER R WITH DOT BELOW
+                       case 0x1E5C:    // LATIN CAPITAL LETTER R WITH DOT BELOW AND 
+MACRON
+                       case 0x1E5E:    // LATIN CAPITAL LETTER R WITH LINE BELOW
+                       case 0xFF32:    // FULLWIDTH LATIN CAPITAL LETTER R
+                               *out = 'R'; return 1;
+                       case 0x015A:    // LATIN CAPITAL LETTER S WITH ACUTE
+                       case 0x015C:    // LATIN CAPITAL LETTER S WITH CIRCUMFLEX
+                       case 0x015E:    // LATIN CAPITAL LETTER S WITH CEDILLA
+                       case 0x0160:    // LATIN CAPITAL LETTER S WITH CARON
+                       case 0x1E60:    // LATIN CAPITAL LETTER S WITH DOT ABOVE
+                       case 0x1E62:    // LATIN CAPITAL LETTER S WITH DOT BELOW
+                       case 0x1E64:    // LATIN CAPITAL LETTER S WITH ACUTE AND DOT 
+ABOVE
+                       case 0x1E66:    // LATIN CAPITAL LETTER S WITH CARON AND DOT 
+ABOVE
+                       case 0x1E68:    // LATIN CAPITAL LETTER S WITH DOT BELOW AND 
+DOT ABOVE
+                       case 0xFF33:    // FULLWIDTH LATIN CAPITAL LETTER S
+                               *out = 'S'; return 1;
+                       case 0x0162:    // LATIN CAPITAL LETTER T WITH CEDILLA
+                       case 0x0164:    // LATIN CAPITAL LETTER T WITH CARON
+                       case 0x0166:    // LATIN CAPITAL LETTER T WITH STROKE
+                       case 0x01AC:    // LATIN CAPITAL LETTER T WITH HOOK
+                       case 0x01AE:    // LATIN CAPITAL LETTER T WITH RETROFLEX HOOK
+                       case 0x1E6A:    // LATIN CAPITAL LETTER T WITH DOT ABOVE
+                       case 0x1E6C:    // LATIN CAPITAL LETTER T WITH DOT BELOW
+                       case 0x1E6E:    // LATIN CAPITAL LETTER T WITH LINE BELOW
+                       case 0x1E70:    // LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW
+                       case 0xFF34:    // FULLWIDTH LATIN CAPITAL LETTER T
+                               *out = 'T'; return 1;
+                       case 0x00D9:    // LATIN CAPITAL LETTER U WITH GRAVE
+                       case 0x00DA:    // LATIN CAPITAL LETTER U WITH ACUTE
+                       case 0x00DB:    // LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+                       case 0x00DC:    // LATIN CAPITAL LETTER U WITH DIAERESIS
+                       case 0x0168:    // LATIN CAPITAL LETTER U WITH TILDE
+                       case 0x016A:    // LATIN CAPITAL LETTER U WITH MACRON
+                       case 0x016C:    // LATIN CAPITAL LETTER U WITH BREVE
+                       case 0x016E:    // LATIN CAPITAL LETTER U WITH RING ABOVE
+                       case 0x0170:    // LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
+                       case 0x0172:    // LATIN CAPITAL LETTER U WITH OGONEK
+                       case 0x01AF:    // LATIN CAPITAL LETTER U WITH HORN
+                       case 0x01D3:    // LATIN CAPITAL LETTER U WITH CARON
+                       case 0x01D5:    // LATIN CAPITAL LETTER U WITH DIAERESIS AND 
+MACRON
+                       case 0x01D7:    // LATIN CAPITAL LETTER U WITH DIAERESIS AND 
+ACUTE
+                       case 0x01D9:    // LATIN CAPITAL LETTER U WITH DIAERESIS AND 
+CARON
+                       case 0x01DB:    // LATIN CAPITAL LETTER U WITH DIAERESIS AND 
+GRAVE
+                       case 0x0214:    // LATIN CAPITAL LETTER U WITH DOUBLE GRAVE
+                       case 0x0216:    // LATIN CAPITAL LETTER U WITH INVERTED BREVE
+                       case 0x1E72:    // LATIN CAPITAL LETTER U WITH DIAERESIS BELOW
+                       case 0x1E74:    // LATIN CAPITAL LETTER U WITH TILDE BELOW
+                       case 0x1E76:    // LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW
+                       case 0x1E78:    // LATIN CAPITAL LETTER U WITH TILDE AND ACUTE
+                       case 0x1E7A:    // LATIN CAPITAL LETTER U WITH MACRON AND 
+DIAERESIS
+                       case 0x1EE4:    // LATIN CAPITAL LETTER U WITH DOT BELOW
+                       case 0x1EE6:    // LATIN CAPITAL LETTER U WITH HOOK ABOVE
+                       case 0x1EE8:    // LATIN CAPITAL LETTER U WITH HORN AND ACUTE
+                       case 0x1EEA:    // LATIN CAPITAL LETTER U WITH HORN AND GRAVE
+                       case 0x1EEC:    // LATIN CAPITAL LETTER U WITH HORN AND HOOK 
+ABOVE
+                       case 0x1EEE:    // LATIN CAPITAL LETTER U WITH HORN AND TILDE
+                       case 0x1EF0:    // LATIN CAPITAL LETTER U WITH HORN AND DOT 
+BELOW
+                       case 0xFF35:    // FULLWIDTH LATIN CAPITAL LETTER U
+                               *out = 'U'; return 1;
+                       case 0x01B2:    // LATIN CAPITAL LETTER V WITH HOOK
+                       case 0x1E7C:    // LATIN CAPITAL LETTER V WITH TILDE
+                       case 0x1E7E:    // LATIN CAPITAL LETTER V WITH DOT BELOW
+                       case 0xFF36:    // FULLWIDTH LATIN CAPITAL LETTER V
+                               *out = 'V'; return 1;
+                       case 0x0174:    // LATIN CAPITAL LETTER W WITH CIRCUMFLEX
+                       case 0x1E80:    // LATIN CAPITAL LETTER W WITH GRAVE
+                       case 0x1E82:    // LATIN CAPITAL LETTER W WITH ACUTE
+                       case 0x1E84:    // LATIN CAPITAL LETTER W WITH DIAERESIS
+                       case 0x1E86:    // LATIN CAPITAL LETTER W WITH DOT ABOVE
+                       case 0x1E88:    // LATIN CAPITAL LETTER W WITH DOT BELOW
+                       case 0xFF37:    // FULLWIDTH LATIN CAPITAL LETTER W
+                               *out = 'W'; return 1;
+                       case 0x1E8A:    // LATIN CAPITAL LETTER X WITH DOT ABOVE
+                       case 0x1E8C:    // LATIN CAPITAL LETTER X WITH DIAERESIS
+                       case 0xFF38:    // FULLWIDTH LATIN CAPITAL LETTER X
+                               *out = 'X'; return 1;
+                       case 0x00DD:    // LATIN CAPITAL LETTER Y WITH ACUTE
+                       case 0x0176:    // LATIN CAPITAL LETTER Y WITH CIRCUMFLEX
+                       case 0x0178:    // LATIN CAPITAL LETTER Y WITH DIAERESIS
+                       case 0x01B3:    // LATIN CAPITAL LETTER Y WITH HOOK
+                       case 0x1E8E:    // LATIN CAPITAL LETTER Y WITH DOT ABOVE
+                       case 0x1EF2:    // LATIN CAPITAL LETTER Y WITH GRAVE
+                       case 0x1EF4:    // LATIN CAPITAL LETTER Y WITH DOT BELOW
+                       case 0x1EF6:    // LATIN CAPITAL LETTER Y WITH HOOK ABOVE
+                       case 0x1EF8:    // LATIN CAPITAL LETTER Y WITH TILDE
+                       case 0xFF39:    // FULLWIDTH LATIN CAPITAL LETTER Y
+                               *out = 'Y'; return 1;
+                       case 0x0179:    // LATIN CAPITAL LETTER Z WITH ACUTE
+                       case 0x017B:    // LATIN CAPITAL LETTER Z WITH DOT ABOVE
+                       case 0x017D:    // LATIN CAPITAL LETTER Z WITH CARON
+                               *out = 'Z'; return 1;
+                       case 0x00E0:    // LATIN SMALL LETTER A WITH GRAVE
+                       case 0x00E1:    // LATIN SMALL LETTER A WITH ACUTE
+                       case 0x00E2:    // LATIN SMALL LETTER A WITH CIRCUMFLEX
+                       case 0x00E3:    // LATIN SMALL LETTER A WITH TILDE
+                       case 0x00E4:    // LATIN SMALL LETTER A WITH DIAERESIS
+                       case 0x00E5:    // LATIN SMALL LETTER A WITH RING ABOVE
+                       case 0x0101:    // LATIN SMALL LETTER A WITH MACRON
+                       case 0x0103:    // LATIN SMALL LETTER A WITH BREVE
+                       case 0x0105:    // LATIN SMALL LETTER A WITH OGONEK
+                       case 0x01CE:    // LATIN SMALL LETTER A WITH CARON
+                       case 0x01DF:    // LATIN SMALL LETTER A WITH DIAERESIS AND 
+MACRON
+                       case 0x01E1:    // LATIN SMALL LETTER A WITH DOT ABOVE AND 
+MACRON
+                       case 0x01FB:    // LATIN SMALL LETTER A WITH RING ABOVE AND 
+ACUTE
+                       case 0x0201:    // LATIN SMALL LETTER A WITH DOUBLE GRAVE
+                       case 0x0203:    // LATIN SMALL LETTER A WITH INVERTED BREVE
+                       case 0x1E01:    // LATIN SMALL LETTER A WITH RING BELOW
+                       case 0x1E9A:    // LATIN SMALL LETTER A WITH RIGHT HALF RING
+                       case 0x1EA1:    // LATIN SMALL LETTER A WITH DOT BELOW
+                       case 0x1EA3:    // LATIN SMALL LETTER A WITH HOOK ABOVE
+                       case 0x1EA5:    // LATIN SMALL LETTER A WITH CIRCUMFLEX AND 
+ACUTE
+                       case 0x1EA7:    // LATIN SMALL LETTER A WITH CIRCUMFLEX AND 
+GRAVE
+                       case 0x1EA9:    // LATIN SMALL LETTER A WITH CIRCUMFLEX AND 
+HOOK ABOVE
+                       case 0x1EAB:    // LATIN SMALL LETTER A WITH CIRCUMFLEX AND 
+TILDE
+                       case 0x1EAD:    // LATIN SMALL LETTER A WITH CIRCUMFLEX AND 
+DOT BELOW
+                       case 0x1EAF:    // LATIN SMALL LETTER A WITH BREVE AND ACUTE
+                       case 0x1EB1:    // LATIN SMALL LETTER A WITH BREVE AND GRAVE
+                       case 0x1EB3:    // LATIN SMALL LETTER A WITH BREVE AND HOOK 
+ABOVE
+                       case 0x1EB5:    // LATIN SMALL LETTER A WITH BREVE AND TILDE
+                       case 0x1EB7:    // LATIN SMALL LETTER A WITH BREVE AND DOT 
+BELOW
+                       case 0xFF41:    // FULLWIDTH LATIN SMALL LETTER A
+                               *out = 'a'; return 1;
+                       case 0x0180:    // LATIN SMALL LETTER B WITH STROKE
+                       case 0x0183:    // LATIN SMALL LETTER B WITH TOPBAR
+                       case 0x0253:    // LATIN SMALL LETTER B WITH HOOK
+                       case 0x1E03:    // LATIN SMALL LETTER B WITH DOT ABOVE
+                       case 0x1E05:    // LATIN SMALL LETTER B WITH DOT BELOW
+                       case 0x1E07:    // LATIN SMALL LETTER B WITH LINE BELOW
+                       case 0xFF42:    // FULLWIDTH LATIN SMALL LETTER B
+                               *out = 'b'; return 1;
+                       case 0x00E7:    // LATIN SMALL LETTER C WITH CEDILLA
+                       case 0x0107:    // LATIN SMALL LETTER C WITH ACUTE
+                       case 0x0109:    // LATIN SMALL LETTER C WITH CIRCUMFLEX
+                       case 0x010B:    // LATIN SMALL LETTER C WITH DOT ABOVE
+                       case 0x010D:    // LATIN SMALL LETTER C WITH CARON
+                       case 0x0188:    // LATIN SMALL LETTER C WITH HOOK
+                       case 0x0255:    // LATIN SMALL LETTER C WITH CURL
+                       case 0x1E09:    // LATIN SMALL LETTER C WITH CEDILLA AND ACUTE
+                       case 0xFF43:    // FULLWIDTH LATIN SMALL LETTER C
+                               *out = 'c'; return 1;
+                       case 0x010F:    // LATIN SMALL LETTER D WITH CARON
+                       case 0x0111:    // LATIN SMALL LETTER D WITH STROKE
+                       case 0x018C:    // LATIN SMALL LETTER D WITH TOPBAR
+                       case 0x0256:    // LATIN SMALL LETTER D WITH TAIL
+                       case 0x0257:    // LATIN SMALL LETTER D WITH HOOK
+                       case 0x1E0B:    // LATIN SMALL LETTER D WITH DOT ABOVE
+                       case 0x1E0D:    // LATIN SMALL LETTER D WITH DOT BELOW
+                       case 0x1E0F:    // LATIN SMALL LETTER D WITH LINE BELOW
+                       case 0x1E11:    // LATIN SMALL LETTER D WITH CEDILLA
+                       case 0x1E13:    // LATIN SMALL LETTER D WITH CIRCUMFLEX BELOW
+                       case 0xFF44:    // FULLWIDTH LATIN SMALL LETTER D
+                               *out = 'd'; return 1;
+                       case 0x00E8:    // LATIN SMALL LETTER E WITH GRAVE
+                       case 0x00E9:    // LATIN SMALL LETTER E WITH ACUTE
+                       case 0x00EA:    // LATIN SMALL LETTER E WITH CIRCUMFLEX
+                       case 0x00EB:    // LATIN SMALL LETTER E WITH DIAERESIS
+                       case 0x0113:    // LATIN SMALL LETTER E WITH MACRON
+                       case 0x0115:    // LATIN SMALL LETTER E WITH BREVE
+                       case 0x0117:    // LATIN SMALL LETTER E WITH DOT ABOVE
+                       case 0x0119:    // LATIN SMALL LETTER E WITH OGONEK
+                       case 0x011B:    // LATIN SMALL LETTER E WITH CARON
+                       case 0x0205:    // LATIN SMALL LETTER E WITH DOUBLE GRAVE
+                       case 0x0207:    // LATIN SMALL LETTER E WITH INVERTED BREVE
+                       case 0x1E15:    // LATIN SMALL LETTER E WITH MACRON AND GRAVE
+                       case 0x1E17:    // LATIN SMALL LETTER E WITH MACRON AND ACUTE
+                       case 0x1E19:    // LATIN SMALL LETTER E WITH CIRCUMFLEX BELOW
+                       case 0x1E1B:    // LATIN SMALL LETTER E WITH TILDE BELOW
+                       case 0x1E1D:    // LATIN SMALL LETTER E WITH CEDILLA AND BREVE
+                       case 0x1EB9:    // LATIN SMALL LETTER E WITH DOT BELOW
+                       case 0x1EBB:    // LATIN SMALL LETTER E WITH HOOK ABOVE
+                       case 0x1EBD:    // LATIN SMALL LETTER E WITH TILDE
+                       case 0x1EBF:    // LATIN SMALL LETTER E WITH CIRCUMFLEX AND 
+ACUTE
+                       case 0x1EC1:    // LATIN SMALL LETTER E WITH CIRCUMFLEX AND 
+GRAVE
+                       case 0x1EC3:    // LATIN SMALL LETTER E WITH CIRCUMFLEX AND 
+HOOK ABOVE
+                       case 0x1EC5:    // LATIN SMALL LETTER E WITH CIRCUMFLEX AND 
+TILDE
+                       case 0x1EC7:    // LATIN SMALL LETTER E WITH CIRCUMFLEX AND 
+DOT BELOW
+                       case 0xFF45:    // FULLWIDTH LATIN SMALL LETTER E
+                               *out = 'e'; return 1;
+                       case 0x0192:    // LATIN SMALL LETTER F WITH HOOK
+                       case 0x1E1F:    // LATIN SMALL LETTER F WITH DOT ABOVE
+                       case 0xFF46:    // FULLWIDTH LATIN SMALL LETTER F
+                               *out = 'f'; return 1;
+                       case 0x011D:    // LATIN SMALL LETTER G WITH CIRCUMFLEX
+                       case 0x011F:    // LATIN SMALL LETTER G WITH BREVE
+                       case 0x0121:    // LATIN SMALL LETTER G WITH DOT ABOVE
+                       case 0x0123:    // LATIN SMALL LETTER G WITH CEDILLA
+                       case 0x01E5:    // LATIN SMALL LETTER G WITH STROKE
+                       case 0x01E7:    // LATIN SMALL LETTER G WITH CARON
+                       case 0x01F5:    // LATIN SMALL LETTER G WITH ACUTE
+                       case 0x0260:    // LATIN SMALL LETTER G WITH HOOK
+                       case 0x1E21:    // LATIN SMALL LETTER G WITH MACRON
+                       case 0xFF47:    // FULLWIDTH LATIN SMALL LETTER G
+                               *out = 'g'; return 1;
+                       case 0x0125:    // LATIN SMALL LETTER H WITH CIRCUMFLEX
+                       case 0x0127:    // LATIN SMALL LETTER H WITH STROKE
+                       case 0x0266:    // LATIN SMALL LETTER H WITH HOOK
+                       case 0x1E23:    // LATIN SMALL LETTER H WITH DOT ABOVE
+                       case 0x1E25:    // LATIN SMALL LETTER H WITH DOT BELOW
+                       case 0x1E27:    // LATIN SMALL LETTER H WITH DIAERESIS
+                       case 0x1E29:    // LATIN SMALL LETTER H WITH CEDILLA
+                       case 0x1E2B:    // LATIN SMALL LETTER H WITH BREVE BELOW
+                       case 0x1E96:    // LATIN SMALL LETTER H WITH LINE BELOW
+                       case 0xFF48:    // FULLWIDTH LATIN SMALL LETTER H
+                               *out = 'h'; return 1;
+                       case 0x00EC:    // LATIN SMALL LETTER I WITH GRAVE
+                       case 0x00ED:    // LATIN SMALL LETTER I WITH ACUTE
+                       case 0x00EE:    // LATIN SMALL LETTER I WITH CIRCUMFLEX
+                       case 0x00EF:    // LATIN SMALL LETTER I WITH DIAERESIS
+                       case 0x0129:    // LATIN SMALL LETTER I WITH TILDE
+                       case 0x012B:    // LATIN SMALL LETTER I WITH MACRON
+                       case 0x012D:    // LATIN SMALL LETTER I WITH BREVE
+                       case 0x012F:    // LATIN SMALL LETTER I WITH OGONEK
+                       case 0x01D0:    // LATIN SMALL LETTER I WITH CARON
+                       case 0x0209:    // LATIN SMALL LETTER I WITH DOUBLE GRAVE
+                       case 0x020B:    // LATIN SMALL LETTER I WITH INVERTED BREVE
+                       case 0x0268:    // LATIN SMALL LETTER I WITH STROKE
+                       case 0x1E2D:    // LATIN SMALL LETTER I WITH TILDE BELOW
+                       case 0x1E2F:    // LATIN SMALL LETTER I WITH DIAERESIS AND 
+ACUTE
+                       case 0x1EC9:    // LATIN SMALL LETTER I WITH HOOK ABOVE
+                       case 0x1ECB:    // LATIN SMALL LETTER I WITH DOT BELOW
+                       case 0xFF49:    // FULLWIDTH LATIN SMALL LETTER I
+                               *out = 'i'; return 1;
+                       case 0x0135:    // LATIN SMALL LETTER J WITH CIRCUMFLEX
+                       case 0x01F0:    // LATIN SMALL LETTER J WITH CARON
+                       case 0x029D:    // LATIN SMALL LETTER J WITH CROSSED-TAIL
+                       case 0xFF4A:    // FULLWIDTH LATIN SMALL LETTER J
+                               *out = 'j'; return 1;
+                       case 0x0137:    // LATIN SMALL LETTER K WITH CEDILLA
+                       case 0x0199:    // LATIN SMALL LETTER K WITH HOOK
+                       case 0x01E9:    // LATIN SMALL LETTER K WITH CARON
+                       case 0x1E31:    // LATIN SMALL LETTER K WITH ACUTE
+                       case 0x1E33:    // LATIN SMALL LETTER K WITH DOT BELOW
+                       case 0x1E35:    // LATIN SMALL LETTER K WITH LINE BELOW
+                       case 0xFF4B:    // FULLWIDTH LATIN SMALL LETTER K
+                               *out = 'k'; return 1;
+                       case 0x013A:    // LATIN SMALL LETTER L WITH ACUTE
+                       case 0x013C:    // LATIN SMALL LETTER L WITH CEDILLA
+                       case 0x013E:    // LATIN SMALL LETTER L WITH CARON
+                       case 0x0140:    // LATIN SMALL LETTER L WITH MIDDLE DOT
+                       case 0x0142:    // LATIN SMALL LETTER L WITH STROKE
+                       case 0x019A:    // LATIN SMALL LETTER L WITH BAR
+                       case 0x026B:    // LATIN SMALL LETTER L WITH MIDDLE TILDE
+                       case 0x026C:    // LATIN SMALL LETTER L WITH BELT
+                       case 0x026D:    // LATIN SMALL LETTER L WITH RETROFLEX HOOK
+                       case 0x1E37:    // LATIN SMALL LETTER L WITH DOT BELOW
+                       case 0x1E39:    // LATIN SMALL LETTER L WITH DOT BELOW AND 
+MACRON
+                       case 0x1E3B:    // LATIN SMALL LETTER L WITH LINE BELOW
+                       case 0x1E3D:    // LATIN SMALL LETTER L WITH CIRCUMFLEX BELOW
+                       case 0xFF4C:    // FULLWIDTH LATIN SMALL LETTER L
+                               *out = 'l'; return 1;
+                       case 0x0271:    // LATIN SMALL LETTER M WITH HOOK
+                       case 0x1E3F:    // LATIN SMALL LETTER M WITH ACUTE
+                       case 0x1E41:    // LATIN SMALL LETTER M WITH DOT ABOVE
+                       case 0x1E43:    // LATIN SMALL LETTER M WITH DOT BELOW
+                       case 0xFF4D:    // FULLWIDTH LATIN SMALL LETTER M
+                               *out = 'm'; return 1;
+                       case 0x00F1:    // LATIN SMALL LETTER N WITH TILDE
+                       case 0x0144:    // LATIN SMALL LETTER N WITH ACUTE
+                       case 0x0146:    // LATIN SMALL LETTER N WITH CEDILLA
+                       case 0x0148:    // LATIN SMALL LETTER N WITH CARON
+                       case 0x019E:    // LATIN SMALL LETTER N WITH LONG RIGHT LEG
+                       case 0x0272:    // LATIN SMALL LETTER N WITH LEFT HOOK
+                       case 0x0273:    // LATIN SMALL LETTER N WITH RETROFLEX HOOK
+                       case 0x1E45:    // LATIN SMALL LETTER N WITH DOT ABOVE
+                       case 0x1E47:    // LATIN SMALL LETTER N WITH DOT BELOW
+                       case 0x1E49:    // LATIN SMALL LETTER N WITH LINE BELOW
+                       case 0x1E4B:    // LATIN SMALL LETTER N WITH CIRCUMFLEX BELOW
+                       case 0xFF4E:    // FULLWIDTH LATIN SMALL LETTER N
+                               *out = 'n'; return 1;
+                       case 0x00F2:    // LATIN SMALL LETTER O WITH GRAVE
+                       case 0x00F3:    // LATIN SMALL LETTER O WITH ACUTE
+                       case 0x00F4:    // LATIN SMALL LETTER O WITH CIRCUMFLEX
+                       case 0x00F5:    // LATIN SMALL LETTER O WITH TILDE
+                       case 0x00F6:    // LATIN SMALL LETTER O WITH DIAERESIS
+                       case 0x00F8:    // LATIN SMALL LETTER O WITH STROKE
+                       case 0x014D:    // LATIN SMALL LETTER O WITH MACRON
+                       case 0x014F:    // LATIN SMALL LETTER O WITH BREVE
+                       case 0x0151:    // LATIN SMALL LETTER O WITH DOUBLE ACUTE
+                       case 0x01A1:    // LATIN SMALL LETTER O WITH HORN
+                       case 0x01D2:    // LATIN SMALL LETTER O WITH CARON
+                       case 0x01EB:    // LATIN SMALL LETTER O WITH OGONEK
+                       case 0x01ED:    // LATIN SMALL LETTER O WITH OGONEK AND MACRON
+                       case 0x01FF:    // LATIN SMALL LETTER O WITH STROKE AND ACUTE
+                       case 0x020D:    // LATIN SMALL LETTER O WITH DOUBLE GRAVE
+                       case 0x020F:    // LATIN SMALL LETTER O WITH INVERTED BREVE
+                       case 0x1E4D:    // LATIN SMALL LETTER O WITH TILDE AND ACUTE
+                       case 0x1E4F:    // LATIN SMALL LETTER O WITH TILDE AND 
+DIAERESIS
+                       case 0x1E51:    // LATIN SMALL LETTER O WITH MACRON AND GRAVE
+                       case 0x1E53:    // LATIN SMALL LETTER O WITH MACRON AND ACUTE
+                       case 0x1ECD:    // LATIN SMALL LETTER O WITH DOT BELOW
+                       case 0x1ECF:    // LATIN SMALL LETTER O WITH HOOK ABOVE
+                       case 0x1ED1:    // LATIN SMALL LETTER O WITH CIRCUMFLEX AND 
+ACUTE
+                       case 0x1ED3:    // LATIN SMALL LETTER O WITH CIRCUMFLEX AND 
+GRAVE
+                       case 0x1ED5:    // LATIN SMALL LETTER O WITH CIRCUMFLEX AND 
+HOOK ABOVE
+                       case 0x1ED7:    // LATIN SMALL LETTER O WITH CIRCUMFLEX AND 
+TILDE
+                       case 0x1ED9:    // LATIN SMALL LETTER O WITH CIRCUMFLEX AND 
+DOT BELOW
+                       case 0x1EDB:    // LATIN SMALL LETTER O WITH HORN AND ACUTE
+                       case 0x1EDD:    // LATIN SMALL LETTER O WITH HORN AND GRAVE
+                       case 0x1EDF:    // LATIN SMALL LETTER O WITH HORN AND HOOK 
+ABOVE
+                       case 0x1EE1:    // LATIN SMALL LETTER O WITH HORN AND TILDE
+                       case 0x1EE3:    // LATIN SMALL LETTER O WITH HORN AND DOT BELOW
+                       case 0xFF4F:    // FULLWIDTH LATIN SMALL LETTER O
+                               *out = 'o'; return 1;
+                       case 0x01A5:    // LATIN SMALL LETTER P WITH HOOK
+                       case 0x1E55:    // LATIN SMALL LETTER P WITH ACUTE
+                       case 0x1E57:    // LATIN SMALL LETTER P WITH DOT ABOVE
+                       case 0xFF50:    // FULLWIDTH LATIN SMALL LETTER P
+                               *out = 'p'; return 1;
+                       case 0x02A0:    // LATIN SMALL LETTER Q WITH HOOK
+                       case 0xFF51:    // FULLWIDTH LATIN SMALL LETTER Q
+                               *out = 'q'; return 1;
+                       case 0x0155:    // LATIN SMALL LETTER R WITH ACUTE
+                       case 0x0157:    // LATIN SMALL LETTER R WITH CEDILLA
+                       case 0x0159:    // LATIN SMALL LETTER R WITH CARON
+                       case 0x0211:    // LATIN SMALL LETTER R WITH DOUBLE GRAVE
+                       case 0x0213:    // LATIN SMALL LETTER R WITH INVERTED BREVE
+                       case 0x027C:    // LATIN SMALL LETTER R WITH LONG LEG
+                       case 0x027D:    // LATIN SMALL LETTER R WITH TAIL
+                       case 0x027E:    // LATIN SMALL LETTER R WITH FISHHOOK
+                       case 0x1E59:    // LATIN SMALL LETTER R WITH DOT ABOVE
+                       case 0x1E5B:    // LATIN SMALL LETTER R WITH DOT BELOW
+                       case 0x1E5D:    // LATIN SMALL LETTER R WITH DOT BELOW AND 
+MACRON
+                       case 0x1E5F:    // LATIN SMALL LETTER R WITH LINE BELOW
+                       case 0xFF52:    // FULLWIDTH LATIN SMALL LETTER R
+                               *out = 'r'; return 1;
+                       case 0x015B:    // LATIN SMALL LETTER S WITH ACUTE
+                       case 0x015D:    // LATIN SMALL LETTER S WITH CIRCUMFLEX
+                       case 0x015F:    // LATIN SMALL LETTER S WITH CEDILLA
+                       case 0x0161:    // LATIN SMALL LETTER S WITH CARON
+                       case 0x0282:    // LATIN SMALL LETTER S WITH HOOK
+                       case 0x1E61:    // LATIN SMALL LETTER S WITH DOT ABOVE
+                       case 0x1E63:    // LATIN SMALL LETTER S WITH DOT BELOW
+                       case 0x1E65:    // LATIN SMALL LETTER S WITH ACUTE AND DOT 
+ABOVE
+                       case 0x1E67:    // LATIN SMALL LETTER S WITH CARON AND DOT 
+ABOVE
+                       case 0x1E69:    // LATIN SMALL LETTER S WITH DOT BELOW AND DOT 
+ABOVE
+                       case 0xFF53:    // FULLWIDTH LATIN SMALL LETTER S
+                               *out = 's'; return 1;
+                       case 0x0163:    // LATIN SMALL LETTER T WITH CEDILLA
+                       case 0x0165:    // LATIN SMALL LETTER T WITH CARON
+                       case 0x0167:    // LATIN SMALL LETTER T WITH STROKE
+                       case 0x01AB:    // LATIN SMALL LETTER T WITH PALATAL HOOK
+                       case 0x01AD:    // LATIN SMALL LETTER T WITH HOOK
+                       case 0x0288:    // LATIN SMALL LETTER T WITH RETROFLEX HOOK
+                       case 0x1E6B:    // LATIN SMALL LETTER T WITH DOT ABOVE
+                       case 0x1E6D:    // LATIN SMALL LETTER T WITH DOT BELOW
+                       case 0x1E6F:    // LATIN SMALL LETTER T WITH LINE BELOW
+                       case 0x1E71:    // LATIN SMALL LETTER T WITH CIRCUMFLEX BELOW
+                       case 0x1E97:    // LATIN SMALL LETTER T WITH DIAERESIS
+                       case 0xFF54:    // FULLWIDTH LATIN SMALL LETTER T
+                               *out = 't'; return 1;
+                       case 0x00F9:    // LATIN SMALL LETTER U WITH GRAVE
+                       case 0x00FA:    // LATIN SMALL LETTER U WITH ACUTE
+                       case 0x00FB:    // LATIN SMALL LETTER U WITH CIRCUMFLEX
+                       case 0x00FC:    // LATIN SMALL LETTER U WITH DIAERESIS
+                       case 0x0169:    // LATIN SMALL LETTER U WITH TILDE
+                       case 0x016B:    // LATIN SMALL LETTER U WITH MACRON
+                       case 0x016D:    // LATIN SMALL LETTER U WITH BREVE
+                       case 0x016F:    // LATIN SMALL LETTER U WITH RING ABOVE
+                       case 0x0171:    // LATIN SMALL LETTER U WITH DOUBLE ACUTE
+                       case 0x0173:    // LATIN SMALL LETTER U WITH OGONEK
+                       case 0x01B0:    // LATIN SMALL LETTER U WITH HORN
+                       case 0x01D4:    // LATIN SMALL LETTER U WITH CARON
+                       case 0x01D6:    // LATIN SMALL LETTER U WITH DIAERESIS AND 
+MACRON
+                       case 0x01D8:    // LATIN SMALL LETTER U WITH DIAERESIS AND 
+ACUTE
+                       case 0x01DA:    // LATIN SMALL LETTER U WITH DIAERESIS AND 
+CARON
+                       case 0x01DC:    // LATIN SMALL LETTER U WITH DIAERESIS AND 
+GRAVE
+                       case 0x0215:    // LATIN SMALL LETTER U WITH DOUBLE GRAVE
+                       case 0x0217:    // LATIN SMALL LETTER U WITH INVERTED BREVE
+                       case 0x0289:    // LATIN SMALL LETTER U BAR
+                       case 0x1E73:    // LATIN SMALL LETTER U WITH DIAERESIS BELOW
+                       case 0x1E75:    // LATIN SMALL LETTER U WITH TILDE BELOW
+                       case 0x1E77:    // LATIN SMALL LETTER U WITH CIRCUMFLEX BELOW
+                       case 0x1E79:    // LATIN SMALL LETTER U WITH TILDE AND ACUTE
+                       case 0x1E7B:    // LATIN SMALL LETTER U WITH MACRON AND 
+DIAERESIS
+                       case 0x1EE5:    // LATIN SMALL LETTER U WITH DOT BELOW
+                       case 0x1EE7:    // LATIN SMALL LETTER U WITH HOOK ABOVE
+                       case 0x1EE9:    // LATIN SMALL LETTER U WITH HORN AND ACUTE
+                       case 0x1EEB:    // LATIN SMALL LETTER U WITH HORN AND GRAVE
+                       case 0x1EED:    // LATIN SMALL LETTER U WITH HORN AND HOOK 
+ABOVE
+                       case 0x1EEF:    // LATIN SMALL LETTER U WITH HORN AND TILDE
+                       case 0x1EF1:    // LATIN SMALL LETTER U WITH HORN AND DOT BELOW
+                       case 0xFF55:    // FULLWIDTH LATIN SMALL LETTER U
+                               *out = 'u'; return 1;
+                       case 0x028B:    // LATIN SMALL LETTER V WITH HOOK
+                       case 0x1E7D:    // LATIN SMALL LETTER V WITH TILDE
+                       case 0x1E7F:    // LATIN SMALL LETTER V WITH DOT BELOW
+                       case 0xFF56:    // FULLWIDTH LATIN SMALL LETTER V
+                               *out = 'v'; return 1;
+                       case 0x0175:    // LATIN SMALL LETTER W WITH CIRCUMFLEX
+                       case 0x1E81:    // LATIN SMALL LETTER W WITH GRAVE
+                       case 0x1E83:    // LATIN SMALL LETTER W WITH ACUTE
+                       case 0x1E85:    // LATIN SMALL LETTER W WITH DIAERESIS
+                       case 0x1E87:    // LATIN SMALL LETTER W WITH DOT ABOVE
+                       case 0x1E89:    // LATIN SMALL LETTER W WITH DOT BELOW
+                       case 0x1E98:    // LATIN SMALL LETTER W WITH RING ABOVE
+                       case 0xFF57:    // FULLWIDTH LATIN SMALL LETTER W
+                               *out = 'w'; return 1;
+                       case 0x1E8B:    // LATIN SMALL LETTER X WITH DOT ABOVE
+                       case 0x1E8D:    // LATIN SMALL LETTER X WITH DIAERESIS
+                       case 0xFF58:    // FULLWIDTH LATIN SMALL LETTER X
+                               *out = 'x'; return 1;
+                       case 0x00FD:    // LATIN SMALL LETTER Y WITH ACUTE
+                       case 0x00FF:    // LATIN SMALL LETTER Y WITH DIAERESIS
+                       case 0x0177:    // LATIN SMALL LETTER Y WITH CIRCUMFLEX
+                       case 0x01B4:    // LATIN SMALL LETTER Y WITH HOOK
+                       case 0x1E8F:    // LATIN SMALL LETTER Y WITH DOT ABOVE
+                       case 0x1E99:    // LATIN SMALL LETTER Y WITH RING ABOVE
+                       case 0x1EF3:    // LATIN SMALL LETTER Y WITH GRAVE
+                       case 0x1EF5:    // LATIN SMALL LETTER Y WITH DOT BELOW
+                       case 0x1EF7:    // LATIN SMALL LETTER Y WITH HOOK ABOVE
+                       case 0x1EF9:    // LATIN SMALL LETTER Y WITH TILDE
+                       case 0xFF59:    // FULLWIDTH LATIN SMALL LETTER Y
+                               *out = 'y'; return 1;
+                       case 0x017A:    // LATIN SMALL LETTER Z WITH ACUTE
+                       case 0x017C:    // LATIN SMALL LETTER Z WITH DOT ABOVE
+                       case 0x017E:    // LATIN SMALL LETTER Z WITH CARON
+                       case 0x01B6:    // LATIN SMALL LETTER Z WITH STROKE
+                       case 0x0290:    // LATIN SMALL LETTER Z WITH RETROFLEX HOOK
+                       case 0x0291:    // LATIN SMALL LETTER Z WITH CURL
+                       case 0x1E91:    // LATIN SMALL LETTER Z WITH CIRCUMFLEX
+                       case 0x1E93:    // LATIN SMALL LETTER Z WITH DOT BELOW
+                       case 0x1E95:    // LATIN SMALL LETTER Z WITH LINE BELOW
+                       case 0xFF5A:    // FULLWIDTH LATIN SMALL LETTER Z
+                               *out = 'z'; return 1;
+                       case 0x0660:    // ARABIC-INDIC DIGIT ZERO
+                       case 0x06F0:    // EXTENDED ARABIC-INDIC DIGIT ZERO
+                       case 0x0966:    // DEVANAGARI DIGIT ZERO
+                       case 0x09E6:    // BENGALI DIGIT ZERO
+                       case 0x0A66:    // GURMUKHI DIGIT ZERO
+                       case 0x0AE6:    // GUJARATI DIGIT ZERO
+                       case 0x0B66:    // ORIYA DIGIT ZERO
+                       case 0x0BE6:    // TAMIL DIGIT ONE
+                       case 0x0C66:    // TELUGU DIGIT ZERO
+                       case 0x0CE6:    // KANNADA DIGIT ZERO
+                       case 0x0D66:    // MALAYALAM DIGIT ZERO
+                       case 0x0E50:    // THAI DIGIT ZERO
+                       case 0x0ED0:    // LAO DIGIT ZERO
+                       case 0x0F20:    // TIBETAN DIGIT ZERO
+                       case 0xFF10:    // FULLWIDTH DIGIT ZERO
+                               *out = '0'; return 1;
+                       case 0x0661:    // ARABIC-INDIC DIGIT ONE
+                       case 0x06F1:    // EXTENDED ARABIC-INDIC DIGIT ONE
+                       case 0x0967:    // DEVANAGARI DIGIT ONE
+                       case 0x09E7:    // BENGALI DIGIT ONE
+                       case 0x0A67:    // GURMUKHI DIGIT ONE
+                       case 0x0AE7:    // GUJARATI DIGIT ONE
+                       case 0x0B67:    // ORIYA DIGIT ONE
+                       case 0x0BE7:    // TAMIL DIGIT ONE
+                       case 0x0C67:    // TELUGU DIGIT ONE
+                       case 0x0CE7:    // KANNADA DIGIT ONE
+                       case 0x0D67:    // MALAYALAM DIGIT ONE
+                       case 0x0E51:    // THAI DIGIT ONE
+                       case 0x0ED1:    // LAO DIGIT ONE
+                       case 0x0F21:    // TIBETAN DIGIT ONE
+                       case 0xFF11:    // FULLWIDTH DIGIT ONE
+                               *out = '1'; return 1;
+                       case 0x0662:    // ARABIC-INDIC DIGIT TWO
+                       case 0x06F2:    // EXTENDED ARABIC-INDIC DIGIT TWO
+                       case 0x0968:    // DEVANAGARI DIGIT TWO
+                       case 0x09E8:    // BENGALI DIGIT TWO
+                       case 0x0A68:    // GURMUKHI DIGIT TWO
+                       case 0x0AE8:    // GUJARATI DIGIT TWO
+                       case 0x0B68:    // ORIYA DIGIT TWO
+                       case 0x0BE8:    // TAMIL DIGIT TWO
+                       case 0x0C68:    // TELUGU DIGIT TWO
+                       case 0x0CE8:    // KANNADA DIGIT TWO
+                       case 0x0D68:    // MALAYALAM DIGIT TWO
+                       case 0x0E52:    // THAI DIGIT TWO
+                       case 0x0ED2:    // LAO DIGIT TWO
+                       case 0x0F22:    // TIBETAN DIGIT TWO
+                       case 0xFF12:    // FULLWIDTH DIGIT TWO
+                               *out = '2'; return 1;
+                       case 0x0663:    // ARABIC-INDIC DIGIT THREE
+                       case 0x06F3:    // EXTENDED ARABIC-INDIC DIGIT THREE
+                       case 0x0969:    // DEVANAGARI DIGIT THREE
+                       case 0x09E9:    // BENGALI DIGIT THREE
+                       case 0x0A69:    // GURMUKHI DIGIT THREE
+                       case 0x0AE9:    // GUJARATI DIGIT THREE
+                       case 0x0B69:    // ORIYA DIGIT THREE
+                       case 0x0BE9:    // TAMIL DIGIT THREE
+                       case 0x0C69:    // TELUGU DIGIT THREE
+                       case 0x0CE9:    // KANNADA DIGIT THREE
+                       case 0x0D69:    // MALAYALAM DIGIT THREE
+                       case 0x0E53:    // THAI DIGIT THREE
+                       case 0x0ED3:    // LAO DIGIT THREE
+                       case 0x0F23:    // TIBETAN DIGIT THREE
+                       case 0xFF13:    // FULLWIDTH DIGIT THREE
+                               *out = '3'; return 1;
+                       case 0x0664:    // ARABIC-INDIC DIGIT FOUR
+                       case 0x06F4:    // EXTENDED ARABIC-INDIC DIGIT FOUR
+                       case 0x096A:    // DEVANAGARI DIGIT FOUR
+                       case 0x09EA:    // BENGALI DIGIT FOUR
+                       case 0x0A6A:    // GURMUKHI DIGIT FOUR
+                       case 0x0AEA:    // GUJARATI DIGIT FOUR
+                       case 0x0B6A:    // ORIYA DIGIT FOUR
+                       case 0x0BEA:    // TAMIL DIGIT FOUR
+                       case 0x0C6A:    // TELUGU DIGIT FOUR
+                       case 0x0CEA:    // KANNADA DIGIT FOUR
+                       case 0x0D6A:    // MALAYALAM DIGIT FOUR
+                       case 0x0E54:    // THAI DIGIT FOUR
+                       case 0x0ED4:    // LAO DIGIT FOUR
+                       case 0x0F24:    // TIBETAN DIGIT FOUR
+                       case 0xFF14:    // FULLWIDTH DIGIT FOUR
+                               *out = '4'; return 1;
+                       case 0x0665:    // ARABIC-INDIC DIGIT FIVE
+                       case 0x06F5:    // EXTENDED ARABIC-INDIC DIGIT FIVE
+                       case 0x096B:    // DEVANAGARI DIGIT FIVE
+                       case 0x09EB:    // BENGALI DIGIT FIVE
+                       case 0x0A6B:    // GURMUKHI DIGIT FIVE
+                       case 0x0AEB:    // GUJARATI DIGIT FIVE
+                       case 0x0B6B:    // ORIYA DIGIT FIVE
+                       case 0x0BEB:    // TAMIL DIGIT FIVE
+                       case 0x0C6B:    // TELUGU DIGIT FIVE
+                       case 0x0CEB:    // KANNADA DIGIT FIVE
+                       case 0x0D6B:    // MALAYALAM DIGIT FIVE
+                       case 0x0E55:    // THAI DIGIT FIVE
+                       case 0x0ED5:    // LAO DIGIT FIVE
+                       case 0x0F25:    // TIBETAN DIGIT FIVE
+                       case 0xFF15:    // FULLWIDTH DIGIT FIVE
+                               *out = '5'; return 1;
+                       case 0x0666:    // ARABIC-INDIC DIGIT SIX
+                       case 0x06F6:    // EXTENDED ARABIC-INDIC DIGIT SIX
+                       case 0x096C:    // DEVANAGARI DIGIT SIX
+                       case 0x09EC:    // BENGALI DIGIT SIX
+                       case 0x0A6C:    // GURMUKHI DIGIT SIX
+                       case 0x0AEC:    // GUJARATI DIGIT SIX
+                       case 0x0B6C:    // ORIYA DIGIT SIX
+                       case 0x0BEC:    // TAMIL DIGIT SIX
+                       case 0x0C6C:    // TELUGU DIGIT SIX
+                       case 0x0CEC:    // KANNADA DIGIT SIX
+                       case 0x0D6C:    // MALAYALAM DIGIT SIX
+                       case 0x0E56:    // THAI DIGIT SIX
+                       case 0x0ED6:    // LAO DIGIT SIX
+                       case 0x0F26:    // TIBETAN DIGIT SIX
+                       case 0xFF16:    // FULLWIDTH DIGIT SIX
+                               *out = '6'; return 1;
+                       case 0x0667:    // ARABIC-INDIC DIGIT SEVEN
+                       case 0x06F7:    // EXTENDED ARABIC-INDIC DIGIT SEVEN
+                       case 0x096D:    // DEVANAGARI DIGIT SEVEN
+                       case 0x09ED:    // BENGALI DIGIT SEVEN
+                       case 0x0A6D:    // GURMUKHI DIGIT SEVEN
+                       case 0x0AED:    // GUJARATI DIGIT SEVEN
+                       case 0x0B6D:    // ORIYA DIGIT SEVEN
+                       case 0x0BED:    // TAMIL DIGIT SEVEN
+                       case 0x0C6D:    // TELUGU DIGIT SEVEN
+                       case 0x0CED:    // KANNADA DIGIT SEVEN
+                       case 0x0D6D:    // MALAYALAM DIGIT SEVEN
+                       case 0x0E57:    // THAI DIGIT SEVEN
+                       case 0x0ED7:    // LAO DIGIT SEVEN
+                       case 0x0F27:    // TIBETAN DIGIT SEVEN
+                       case 0xFF17:    // FULLWIDTH DIGIT SEVEN
+                               *out = '7'; return 1;
+                       case 0x0668:    // ARABIC-INDIC DIGIT EIGHT
+                       case 0x06F8:    // EXTENDED ARABIC-INDIC DIGIT EIGHT
+                       case 0x096E:    // DEVANAGARI DIGIT EIGHT
+                       case 0x09EE:    // BENGALI DIGIT EIGHT
+                       case 0x0A6E:    // GURMUKHI DIGIT EIGHT
+                       case 0x0AEE:    // GUJARATI DIGIT EIGHT
+                       case 0x0B6E:    // ORIYA DIGIT EIGHT
+                       case 0x0BEE:    // TAMIL DIGIT EIGHT
+                       case 0x0C6E:    // TELUGU DIGIT EIGHT
+                       case 0x0CEE:    // KANNADA DIGIT EIGHT
+                       case 0x0D6E:    // MALAYALAM DIGIT EIGHT
+                       case 0x0E58:    // THAI DIGIT EIGHT
+                       case 0x0ED8:    // LAO DIGIT EIGHT
+                       case 0x0F28:    // TIBETAN DIGIT EIGHT
+                       case 0xFF18:    // FULLWIDTH DIGIT EIGHT
+                               *out = '8'; return 1;
+                       case 0x0669:    // ARABIC-INDIC DIGIT NINE
+                       case 0x06F9:    // EXTENDED ARABIC-INDIC DIGIT NINE
+                       case 0x096F:    // DEVANAGARI DIGIT NINE
+                       case 0x09EF:    // BENGALI DIGIT NINE
+                       case 0x0A6F:    // GURMUKHI DIGIT NINE
+                       case 0x0AEF:    // GUJARATI DIGIT NINE
+                       case 0x0B6F:    // ORIYA DIGIT NINE
+                       case 0x0BEF:    // TAMIL DIGIT NINE
+                       case 0x0C6F:    // TELUGU DIGIT NINE
+                       case 0x0CEF:    // KANNADA DIGIT NINE
+                       case 0x0D6F:    // MALAYALAM DIGIT NINE
+                       case 0x0E59:    // THAI DIGIT NINE
+                       case 0x0ED9:    // LAO DIGIT NINE
+                       case 0x0F29:    // TIBETAN DIGIT NINE
+                       case 0xFF19:    // FULLWIDTH DIGIT NINE
+                               *out = '9'; return 1;
+                       case 0x00A1:    // INVERTED EXCLAMATION MARK
+                               *out = '!'; return 1;
+                       case 0x00A6:    // BROKEN BAR
+                               *out = '|'; return 1;
+                       case 0x00AD:    // SOFT HYPHEN
+                       case 0x02D7:    // MODIFIER LETTER MINUS SIGN
+                       case 0x2010:    // HYPHEN
+                       case 0x2011:    // NON-BREAKING HYPHEN
+                       case 0x2212:    // MINUS SIGN
+                       case 0xFE63:    // SMALL HYPHEN-MINUS
+                       case 0xFF0D:    // FULLWIDTH HYPHEN-MINUS
+                               *out = '-'; return 1;
+                       case 0x00BF:    // INVERTED QUESTION MARK
+                               *out = '?'; return 1;
+                       case 0x00D7:    // MULTIPLICATION SIGN
+                               *out = 'x'; return 1;
+                       case 0x2018:    // LEFT SINGLE QUOTATION MARK
+                       case 0x2019:    // RIGHT SINGLE QUOTATION MARK
+                       case 0xFF07:    // FULLWIDTH APOSTROPHE
+                               *out = '\''; return 1;
+                       case 0x201c:    // LEFT DOUBLE QUOTATION MARK
+                       case 0x201d:    // RIGHT DOUBLE QUOTATION MARK
+                       case 0xFF02:    // FULLWIDTH QUOTATION MARK
                                *out = '"'; return 1;
                        default:
                                return 0;
@@ -486,6 +1354,10 @@
 };
 
 
+/*
+ TODO I'm pretty sure you can't break Korean at any character.
+ And what about Japanese Katakana and Hiragana?
+*/
 static const _rmap can_break_words_data[]=
 {
        {"0"}, /* default value - can't break words at any character. */    
@@ -787,7 +1669,8 @@
        }
        {
            if (cjk_locale()) {
-               /* CJK guys should do something similar to 'else' branch */     
+                       /* CJK guys should do something similar to 'else' branch */    
+ 
+                       TexPrologue = " ";
            } else {
                char buf[500];
                int len = 0;
@@ -876,6 +1759,11 @@
     return TexPrologue;
 };
 
+// Warning:
+// This code forces us to use "GB2312", "BIG5", etc instead
+// of "CP936", "CP950", etc even when our iconv supports
+// the "CPxxx" form and the encodings differ.
+// Be sure this is what you want if you call this function.
 const char* XAP_EncodingManager::charsetFromCodepage(int lid) const
 {
     static char buf[100];
Index: src/af/xap/xp/xap_EncodingManager.h
===================================================================
RCS file: /cvsroot/abi/src/af/xap/xp/xap_EncodingManager.h,v
retrieving revision 1.19
diff -u -r1.19 xap_EncodingManager.h
--- src/af/xap/xp/xap_EncodingManager.h 2001/05/25 05:52:12     1.19
+++ src/af/xap/xp/xap_EncodingManager.h 2001/06/03 07:18:23
@@ -52,9 +52,14 @@
     /*
        this shouldn't return NULL. Don't free or write to returned string. 
        The string should be uppercased (extra font tarballs assume this).
+       TODO isn't iconv case sensitive?  Mac encoding names are mixed case!
     */
     virtual const char* getNativeEncodingName() const;
 
+       /*
+       This should return true for any Unicode locale:
+       UTF-8 on *nix, UCS-2 on Windows, etc
+       */
        inline virtual bool isUnicodeLocale() const {return m_bIsUnicodeLocale;}
 
     /*

Reply via email to