This patch enables the RTF importer to make us of the \fcharset and
\fcpg properties of the font table and switch between encodings
when it encounters \f.

There are a few edge cases and charsets I couldn't find information
on so please contact me or implement these if you know about them.

Andrew Dunbar.

-- 
http://linguaphile.sourceforge.net
Index: src/wp/impexp/xp/ie_imp_RTF.cpp
===================================================================
RCS file: /cvsroot/abi/src/wp/impexp/xp/ie_imp_RTF.cpp,v
retrieving revision 1.61
diff -u -r1.61 ie_imp_RTF.cpp
--- src/wp/impexp/xp/ie_imp_RTF.cpp     2001/05/08 04:19:38     1.61
+++ src/wp/impexp/xp/ie_imp_RTF.cpp     2001/05/21 08:16:57
@@ -149,10 +149,89 @@
        m_family = fontFamily;
        m_charSet = charSet;
        m_codepage = codepage;
+       m_szCodepage = "MS-ANSI";
        m_pitch = pitch;
        memcpy(m_panose, panose, 10*sizeof(unsigned char));
        m_pFontName = pFontName;
        m_pAlternativeFontName = pAlternativeFontName;
+
+       // Set charset/codepage converter
+       if (m_codepage && m_charSet)
+       {
+               UT_DEBUGMSG(("RTF Font has codepage *and* charset\n"));
+               UT_ASSERT(UT_NOT_IMPLEMENTED);
+       }
+       else if (m_codepage)
+       {
+               m_szCodepage = 
+XAP_EncodingManager::get_instance()->charsetFromCodepage(m_codepage);
+       }
+       else if (m_charSet)
+       {
+               switch (m_charSet)
+               {
+                       case 0:         // ANSI_CHARSET
+                               m_szCodepage = "MS-ANSI";       // CP1252
+                               break;
+                       case 2:         // SYMBOL_CHARSET
+                               UT_DEBUGMSG(("RTF Font charset 'Symbol' not 
+implemented\n"));
+                               UT_ASSERT(UT_NOT_IMPLEMENTED);
+                               break;
+                       case 128:       // SHIFTJIS_CHARSET
+                               m_szCodepage = "SHIFT-JIS";
+                               break;
+                       case 161:       // GREEK_CHARSET
+                               m_szCodepage = "MS-GREEK";      // CP1253
+                               break;
+                       case 162:       // TURKISH_CHARSET
+                               m_szCodepage = "MS-TURK";               // CP1254
+                               break;
+                       // TODO What is different?  Iconv only supports one MS Hebrew 
+codepage.
+                       case 181:       // HEBREWUSER_CHARSET
+                               UT_DEBUGMSG(("RTF Font charset 'HEBREWUSER'??\n"));
+                       case 177:       // HEBREW_CHARSET
+                               m_szCodepage = "MS-HEBR";               // CP1255
+                               break;
+                       // TODO What is different?  Iconv only supports one MS Arabic 
+codepage.
+                       case 178:       // ARABICSIMPLIFIED_CHARSET
+                               UT_DEBUGMSG(("RTF Font charset 
+'ARABICSIMPLIFIED'??\n"));
+                               m_szCodepage = "MS-ARAB";               // CP1256
+                               break;
+                       case 179:       // ARABICTRADITIONAL_CHARSET
+                               UT_DEBUGMSG(("RTF Font charset 
+'ARABICTRADITIONAL'??\n"));
+                               m_szCodepage = "MS-ARAB";               // CP1256
+                               break;
+                       case 180:       // ARABICUSER_CHARSET
+                               UT_DEBUGMSG(("RTF Font charset 'ARABICUSER'??\n"));
+                               m_szCodepage = "MS-ARAB";               // CP1256
+                               break;
+                       case 204:       // CYRILLIC_CHARSET
+                               m_szCodepage = "MS-CYRL";       // CP1251
+                               break;
+                       case 238:       // EASTERNEUROPE_CHARSET
+                               m_szCodepage = "MS-EE"; // CP1250
+                               break;
+                       case 254:       // PC437_CHARSET
+                               // TODO What is this and can iconv do it?
+                               UT_DEBUGMSG(("RTF Font charset 'PC437'??\n"));
+                               UT_ASSERT(UT_NOT_IMPLEMENTED);
+                               break;
+                       case 255:       // OEM_CHARSET
+                               // TODO Can iconv do this?
+                               UT_DEBUGMSG(("RTF Font charset 'OEM'??\n"));
+                               UT_ASSERT(UT_NOT_IMPLEMENTED);
+                               break;
+                       default:
+                               UT_DEBUGMSG(("RTF Font charset unknown: %d\n", 
+m_charSet));
+                               // TODO Unknown charset
+                               UT_ASSERT(UT_NOT_IMPLEMENTED);
+               }
+       }
+       else
+       {
+               // TODO No codepage or charset - what do we do?
+               UT_DEBUGMSG(("RTF Font has neither codepage *nor* charset\n"));
+               // UT_ASSERT(UT_NOT_IMPLEMENTED);
+       }
 }
 
 
@@ -3499,6 +3578,10 @@
 
 bool IE_Imp_RTF::HandleFace(UT_uint32 fontNumber)
 {
+       RTFFontTableItem* pFont = GetNthTableFont(fontNumber);
+       if (pFont != NULL)
+               m_mbtowc.setInCharset(pFont->m_szCodepage);
+
        return HandleU32CharacterProp(fontNumber, 
&m_currentRTFState.m_charProps.m_fontNumber);
 }
 
Index: src/wp/impexp/xp/ie_imp_RTF.h
===================================================================
RCS file: /cvsroot/abi/src/wp/impexp/xp/ie_imp_RTF.h,v
retrieving revision 1.32
diff -u -r1.32 ie_imp_RTF.h
--- src/wp/impexp/xp/ie_imp_RTF.h       2001/05/09 12:34:18     1.32
+++ src/wp/impexp/xp/ie_imp_RTF.h       2001/05/21 08:16:59
@@ -51,6 +51,7 @@
        FontFamilyEnum m_family;
        int m_charSet;
        int m_codepage;
+       const char* m_szCodepage;
        FontPitch m_pitch;
        unsigned char m_panose[10];
        char* m_pFontName;

Reply via email to