Tag: cws_src680_mingwport03
User: vg      
Date: 2006/11/07 06:33:31

Modified:
   dba/dbaccess/source/ui/misc/HtmlReader.cxx

Log:
 RESYNC: (1.25-1.28); FILE MERGED

File Changes:

Directory: /dba/dbaccess/source/ui/misc/
========================================

File [changed]: HtmlReader.cxx
Url: 
http://dba.openoffice.org/source/browse/dba/dbaccess/source/ui/misc/HtmlReader.cxx?r1=1.25.50.1&r2=1.25.50.2
Delta lines:  +42 -75
---------------------
--- HtmlReader.cxx      7 Sep 2006 10:36:14 -0000       1.25.50.1
+++ HtmlReader.cxx      7 Nov 2006 14:33:29 -0000       1.25.50.2
@@ -32,6 +32,9 @@
  *    MA  02111-1307  USA
  *
  ************************************************************************/
+
+// MARKER(update_precomp.py): autogen include statement, do not remove
+#include "precompiled_dbaccess.hxx"
 #ifndef DBAUI_HTMLREADER_HXX
 #include "HtmlReader.hxx"
 #endif
@@ -41,6 +44,9 @@
 #ifndef _CONNECTIVITY_DBTOOLS_HXX_
 #include <connectivity/dbtools.hxx>
 #endif
+#ifndef _TOOLS_TENCCVT_HXX
+#include <tools/tenccvt.hxx>
+#endif
 #ifndef _COMPHELPER_EXTRACT_HXX_
 #include <comphelper/extract.hxx>
 #endif
@@ -50,9 +56,18 @@
 #ifndef DBACCESS_SHARED_DBUSTRINGS_HRC
 #include "dbustrings.hrc"
 #endif
+#ifndef _SFXDOCINF_HXX
+#include <sfx2/docinf.hxx>
+#endif
+#ifndef _SFXHTML_HXX
+#include <sfx2/sfxhtml.hxx>
+#endif
 #ifndef _TOOLS_DEBUG_HXX
 #include <tools/debug.hxx>
 #endif
+#ifndef _TOOLS_TENCCVT_HXX
+#include <tools/tenccvt.hxx>
+#endif
 #ifndef _DBAUI_MODULE_DBU_HXX_
 #include "moduledbu.hxx"
 #endif
@@ -201,12 +216,9 @@
        ,m_bSDNum(sal_False)
 {
        DBG_CTOR(OHTMLReader,NULL);
-       // If the system encoding is ANSI, this encoding is used as default
-       // source encoding. Otherwise ISO-8859-1 will be used, because this
-       // is the real default encoding.
-       SetSrcEncoding( RTL_TEXTENCODING_MS_1252 == gsl_getSystemTextEncoding()
-                                               ? RTL_TEXTENCODING_MS_1252
-                                               : RTL_TEXTENCODING_ISO_8859_1 );
+       SetSrcEncoding( GetExtendedCompatibilityTextEncoding(  
RTL_TEXTENCODING_ISO_8859_1 ) );
+    // If the file starts with a BOM, switch to UCS2.
+    SetSwitchToUCS2( TRUE );
 }
 // ---------------------------------------------------------------------------
 OHTMLReader::OHTMLReader(SvStream& rIn,
@@ -225,12 +237,9 @@
        ,m_bSDNum(sal_False)
 {
        DBG_CTOR(OHTMLReader,NULL);
-       // If the system encoding is ANSI, this encoding is used as default
-       // source encoding. Otherwise ISO-8859-1 will be used, because this
-       // is the real default encoding.
-       SetSrcEncoding( RTL_TEXTENCODING_MS_1252 == gsl_getSystemTextEncoding()
-                                               ? RTL_TEXTENCODING_MS_1252
-                                               : RTL_TEXTENCODING_ISO_8859_1 );
+       SetSrcEncoding( GetExtendedCompatibilityTextEncoding(  
RTL_TEXTENCODING_ISO_8859_1 ) );
+    // If the file starts with a BOM, switch to UCS2.
+    SetSwitchToUCS2( TRUE );
 }
 // ---------------------------------------------------------------------------
 OHTMLReader::~OHTMLReader()
@@ -248,33 +257,12 @@
        return m_bFoundTable ? eParseState : SVPAR_ERROR;
 }
 // 
-----------------------------------------------------------------------------
-rtl_TextEncoding OHTMLReader::GetEncodingByMIME( const String& rMime )
-{
-       DBG_CHKTHIS(OHTMLReader,NULL);
-       ByteString sType;
-       ByteString sSubType;
-       INetContentTypeParameterList aParameters;
-       ByteString sMime( rMime, RTL_TEXTENCODING_ASCII_US );
-       if (INetContentTypes::parse(sMime, sType, sSubType, &aParameters))
-       {
-               const INetContentTypeParameter * pCharset
-                       = aParameters.find("charset");
-               if (pCharset != 0)
-               {
-                       ByteString sValue( pCharset->m_sValue, 
RTL_TEXTENCODING_ASCII_US );
-                       return rtl_getTextEncodingFromMimeCharset( 
sValue.GetBuffer() );
-               }
-       }
-       return RTL_TEXTENCODING_DONTKNOW;
-}
-
-// ---------------------------------------------------------------------------
 void OHTMLReader::NextToken( int nToken )
 {
        DBG_CHKTHIS(OHTMLReader,NULL);
        if(m_bError || !m_nRows) // falls Fehler oder keine Rows mehr zur 
"Uberpr"ufung dann gleich zur"uck
                return;
-       if(!m_bMetaOptions)
+       if ( nToken ==  HTML_META )
                setTextEncoding();
 
        if(m_xConnection.is())    // gibt an welcher CTOR gerufen wurde und 
damit, ob eine Tabelle erstellt werden soll
@@ -672,43 +660,21 @@
        DBG_CHKTHIS(OHTMLReader,NULL);
        m_bMetaOptions = sal_True;
        USHORT nContentOption = HTML_O_CONTENT;
-       String aName, aContent;
-       USHORT nAction = HTML_META_NONE;
-       BOOL bHTTPEquiv = FALSE;
-       const HTMLOptions *pHtmlOptions = GetOptions(&nContentOption);
-       for( USHORT i = pHtmlOptions->Count(); i; )
-       {
-               const HTMLOption *pOption = (*pHtmlOptions)[ --i ];
-               switch( pOption->GetToken() )
-               {
-               case HTML_O_HTTPEQUIV:
-                       aName = pOption->GetString();
-                       pOption->GetEnum( nAction, getOptions() );
-                       bHTTPEquiv = TRUE;
-                       break;
-               case HTML_O_CONTENT:
-                       aContent = pOption->GetString();
-                       break;
-               }
-       }
-       if( bHTTPEquiv || HTML_META_DESCRIPTION!=nAction )
-       {
-               // wenn's keine Description ist CRs und LFs aus dem CONTENT 
entfernen
-               aContent.EraseAllChars( _CR );
-               aContent.EraseAllChars( _LF );
-       }
-       else
-       {
-               // fuer die Beschreibung die Zeilen-Umbrueche entsprechen 
wandeln
-               aContent.ConvertLineEnd();
-       }
-       switch( nAction )
-       {
-               case HTML_META_CONTENT_TYPE:
-                       if( aContent.Len() )
-                               SetSrcEncoding(GetEncodingByMIME( aContent ));
-                       break;
-       }
+       rtl_TextEncoding eEnc = RTL_TEXTENCODING_DONTKNOW;
+    USHORT nMetaTags = 0;
+
+    ::std::auto_ptr<SfxDocumentInfo> pInfo(new SfxDocumentInfo());
+       SfxHTMLParser::ParseMetaOptions( pInfo.get(), NULL,
+                                                                 
GetOptions(&nContentOption),
+                                                                 nMetaTags, 
eEnc );
+
+       // If the encoding is set by a META tag, it may only overwrite the
+       // current encoding if both, the current and the new encoding, are 
1-BYTE
+       // encodings. Everything else cannot lead to reasonable results.
+       if( RTL_TEXTENCODING_DONTKNOW != eEnc &&
+               rtl_isOctetTextEncoding( eEnc ) &&
+               rtl_isOctetTextEncoding( GetSrcEncoding() ) )
+               SetSrcEncoding( eEnc );
 }
 // 
-----------------------------------------------------------------------------
 void OHTMLReader::release()
@@ -723,3 +689,4 @@
        return new OWizHTMLExtend(_pParent,rInput);
 }
 // 
-----------------------------------------------------------------------------
+




---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

Reply via email to