Tag: cws_src680_dba205b
User: oj      
Date: 2006/08/11 01:17:09

Modified:
   dba/dbaccess/source/ui/misc/HtmlReader.cxx
   dba/dbaccess/source/ui/inc/HtmlReader.hxx

Log:
 #i46408# set correct encoding

File Changes:

Directory: /dba/dbaccess/source/ui/misc/
========================================

File [changed]: HtmlReader.cxx
Url: 
http://dba.openoffice.org/source/browse/dba/dbaccess/source/ui/misc/HtmlReader.cxx?r1=1.25&r2=1.25.30.1
Delta lines:  +35 -75
---------------------
--- HtmlReader.cxx      20 Jun 2006 03:20:01 -0000      1.25
+++ HtmlReader.cxx      11 Aug 2006 08:17:06 -0000      1.25.30.1
@@ -4,9 +4,9 @@
  *
  *  $RCSfile: HtmlReader.cxx,v $
  *
- *  $Revision: 1.25 $
+ *  $Revision: 1.25.30.1 $
  *
- *  last change: $Author: hr $ $Date: 2006/06/20 03:20:01 $
+ *  last change: $Author: oj $ $Date: 2006/08/11 08:17:06 $
  *
  *  The Contents of this file are made available subject to
  *  the terms of GNU Lesser General Public License Version 2.1.
@@ -50,9 +50,18 @@
 #ifndef DBACCESS_SHARED_DBUSTRINGS_HRC
 #include "dbustrings.hrc"
 #endif
+#ifndef _SFXDOCINF_HXX
+#include <sfx2/docinf.hxx>
+#endif
+#ifndef _SFXHTML_HXX
+#include <sfx2/sfxhtml.hxx>
+#endif
 #ifndef _TOOLS_DEBUG_HXX
 #include <tools/debug.hxx>
 #endif
+#ifndef _TOOLS_TENCCVT_HXX
+#include <tools/tenccvt.hxx>
+#endif
 #ifndef _DBAUI_MODULE_DBU_HXX_
 #include "moduledbu.hxx"
 #endif
@@ -197,12 +206,9 @@
        ,m_bSDNum(sal_False)
 {
        DBG_CTOR(OHTMLReader,NULL);
-       // If the system encoding is ANSI, this encoding is used as default
-       // source encoding. Otherwise ISO-8859-1 will be used, because this
-       // is the real default encoding.
-       SetSrcEncoding( RTL_TEXTENCODING_MS_1252 == gsl_getSystemTextEncoding()
-                                               ? RTL_TEXTENCODING_MS_1252
-                                               : RTL_TEXTENCODING_ISO_8859_1 );
+       SetSrcEncoding( GetExtendedCompatibilityTextEncoding(  
RTL_TEXTENCODING_ISO_8859_1 ) );
+    // If the file starts with a BOM, switch to UCS2.
+    SetSwitchToUCS2( TRUE );
 }
 // ---------------------------------------------------------------------------
 OHTMLReader::OHTMLReader(SvStream& rIn,
@@ -221,12 +227,9 @@
        ,m_bSDNum(sal_False)
 {
        DBG_CTOR(OHTMLReader,NULL);
-       // If the system encoding is ANSI, this encoding is used as default
-       // source encoding. Otherwise ISO-8859-1 will be used, because this
-       // is the real default encoding.
-       SetSrcEncoding( RTL_TEXTENCODING_MS_1252 == gsl_getSystemTextEncoding()
-                                               ? RTL_TEXTENCODING_MS_1252
-                                               : RTL_TEXTENCODING_ISO_8859_1 );
+       SetSrcEncoding( GetExtendedCompatibilityTextEncoding(  
RTL_TEXTENCODING_ISO_8859_1 ) );
+    // If the file starts with a BOM, switch to UCS2.
+    SetSwitchToUCS2( TRUE );
 }
 // ---------------------------------------------------------------------------
 OHTMLReader::~OHTMLReader()
@@ -244,33 +247,12 @@
        return m_bFoundTable ? eParseState : SVPAR_ERROR;
 }
 // 
-----------------------------------------------------------------------------
-rtl_TextEncoding OHTMLReader::GetEncodingByMIME( const String& rMime )
-{
-       DBG_CHKTHIS(OHTMLReader,NULL);
-       ByteString sType;
-       ByteString sSubType;
-       INetContentTypeParameterList aParameters;
-       ByteString sMime( rMime, RTL_TEXTENCODING_ASCII_US );
-       if (INetContentTypes::parse(sMime, sType, sSubType, &aParameters))
-       {
-               const INetContentTypeParameter * pCharset
-                       = aParameters.find("charset");
-               if (pCharset != 0)
-               {
-                       ByteString sValue( pCharset->m_sValue, 
RTL_TEXTENCODING_ASCII_US );
-                       return rtl_getTextEncodingFromMimeCharset( 
sValue.GetBuffer() );
-               }
-       }
-       return RTL_TEXTENCODING_DONTKNOW;
-}
-
-// ---------------------------------------------------------------------------
 void OHTMLReader::NextToken( int nToken )
 {
        DBG_CHKTHIS(OHTMLReader,NULL);
        if(m_bError || !m_nRows) // falls Fehler oder keine Rows mehr zur 
"Uberpr"ufung dann gleich zur"uck
                return;
-       if(!m_bMetaOptions)
+       if ( nToken ==  HTML_META )
                setTextEncoding();
 
        if(m_xConnection.is())    // gibt an welcher CTOR gerufen wurde und 
damit, ob eine Tabelle erstellt werden soll
@@ -668,43 +650,21 @@
        DBG_CHKTHIS(OHTMLReader,NULL);
        m_bMetaOptions = sal_True;
        USHORT nContentOption = HTML_O_CONTENT;
-       String aName, aContent;
-       USHORT nAction = HTML_META_NONE;
-       BOOL bHTTPEquiv = FALSE;
-       const HTMLOptions *pHtmlOptions = GetOptions(&nContentOption);
-       for( USHORT i = pHtmlOptions->Count(); i; )
-       {
-               const HTMLOption *pOption = (*pHtmlOptions)[ --i ];
-               switch( pOption->GetToken() )
-               {
-               case HTML_O_HTTPEQUIV:
-                       aName = pOption->GetString();
-                       pOption->GetEnum( nAction, getOptions() );
-                       bHTTPEquiv = TRUE;
-                       break;
-               case HTML_O_CONTENT:
-                       aContent = pOption->GetString();
-                       break;
-               }
-       }
-       if( bHTTPEquiv || HTML_META_DESCRIPTION!=nAction )
-       {
-               // wenn's keine Description ist CRs und LFs aus dem CONTENT 
entfernen
-               aContent.EraseAllChars( _CR );
-               aContent.EraseAllChars( _LF );
-       }
-       else
-       {
-               // fuer die Beschreibung die Zeilen-Umbrueche entsprechen 
wandeln
-               aContent.ConvertLineEnd();
-       }
-       switch( nAction )
-       {
-               case HTML_META_CONTENT_TYPE:
-                       if( aContent.Len() )
-                               SetSrcEncoding(GetEncodingByMIME( aContent ));
-                       break;
-       }
+       rtl_TextEncoding eEnc = RTL_TEXTENCODING_DONTKNOW;
+    USHORT nMetaTags = 0;
+
+    ::std::auto_ptr<SfxDocumentInfo> pInfo(new SfxDocumentInfo());
+       BOOL bRet = SfxHTMLParser::ParseMetaOptions( pInfo.get(), NULL,
+                                                                 
GetOptions(&nContentOption),
+                                                                 nMetaTags, 
eEnc );
+
+       // If the encoding is set by a META tag, it may only overwrite the
+       // current encoding if both, the current and the new encoding, are 
1-BYTE
+       // encodings. Everything else cannot lead to reasonable results.
+       if( RTL_TEXTENCODING_DONTKNOW != eEnc &&
+               rtl_isOctetTextEncoding( eEnc ) &&
+               rtl_isOctetTextEncoding( GetSrcEncoding() ) )
+               SetSrcEncoding( eEnc );
 }
 // 
-----------------------------------------------------------------------------
 void OHTMLReader::release()

Directory: /dba/dbaccess/source/ui/inc/
=======================================

File [changed]: HtmlReader.hxx
Url: 
http://dba.openoffice.org/source/browse/dba/dbaccess/source/ui/inc/HtmlReader.hxx?r1=1.12&r2=1.12.34.1
Delta lines:  +4 -5
-------------------
--- HtmlReader.hxx      20 Jun 2006 03:11:46 -0000      1.12
+++ HtmlReader.hxx      11 Aug 2006 08:17:07 -0000      1.12.34.1
@@ -4,9 +4,9 @@
  *
  *  $RCSfile: HtmlReader.hxx,v $
  *
- *  $Revision: 1.12 $
+ *  $Revision: 1.12.34.1 $
  *
- *  last change: $Author: hr $ $Date: 2006/06/20 03:11:46 $
+ *  last change: $Author: oj $ $Date: 2006/08/11 08:17:07 $
  *
  *  The Contents of this file are made available subject to
  *  the terms of GNU Lesser General Public License Version 2.1.
@@ -76,10 +76,9 @@
                void                            TableDataOn(SvxCellHorJustify& 
eVal,int nToken);
                void                            
TableFontOn(::com::sun::star::awt::FontDescriptor& _rFont,sal_Int32 
&_rTextColor);
                sal_Int16                       GetWidthPixel( const 
HTMLOption* pOption );
-               rtl_TextEncoding        GetEncodingByMIME( const String& rMime 
);
                void                            setTextEncoding();
         void                fetchOptions();
-               ~OHTMLReader();
+               virtual ~OHTMLReader();
        public:
                OHTMLReader(SvStream& rIn,
                                        const SharedConnection& _rxConnection,




---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

Reply via email to