sw/qa/extras/rtfexport/data/tdf95706_2.rtf      |   17 ++
 sw/qa/extras/rtfexport/rtfexport4.cxx           |   12 +
 writerfilter/source/rtftok/rtfdispatchvalue.cxx |    4 
 writerfilter/source/rtftok/rtfdocumentimpl.cxx  |  162 +++++++++++++-----------
 writerfilter/source/rtftok/rtfdocumentimpl.hxx  |    1 
 5 files changed, 122 insertions(+), 74 deletions(-)

New commits:
commit 844be7358f1eec00094a55fa1fb4fadadb8cd1bf
Author:     Vasily Melenchuk <vasily.melenc...@cib.de>
AuthorDate: Thu Apr 7 20:59:08 2022 +0300
Commit:     Miklos Vajna <vmik...@collabora.com>
CommitDate: Fri Apr 8 11:22:54 2022 +0200

    tdf#95706: RTF import: tolerant font table parsing
    
    While font name in font table should end with semicolon
    ({\fonttbl{\f42 Arial;}}) it is not always true and
    MS Word is tolerant to it: it still able to parse this
    correctly. Seems LO also should not require strict spec
    conformance.
    
    So idea of font parsing is changed: instead of inserting
    font on semicolon, it is done on next \fN or destination
    end. All collected text to this moment is a font name.
    
    Change-Id: I6b41951217442a71fd2ebbfc58a3fc79f6f913db
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/132686
    Tested-by: Jenkins
    Reviewed-by: Miklos Vajna <vmik...@collabora.com>

diff --git a/sw/qa/extras/rtfexport/data/tdf95706_2.rtf 
b/sw/qa/extras/rtfexport/data/tdf95706_2.rtf
new file mode 100644
index 000000000000..d36d2ccd2396
--- /dev/null
+++ b/sw/qa/extras/rtfexport/data/tdf95706_2.rtf
@@ -0,0 +1,17 @@
+{\rtf\ansi
+{\fonttbl
+{\f1 Arial}
+\f2 Impact
+\f3 T\'69mes New Roman
+\f4 T
+a
+h
+o
+m
+a
+}
+\pard\f1\fs26 Arial\par
+\pard\f2\fs26 Impact\par
+\pard\f3\fs26 Times New Roman\par
+\pard\f4\fs26 Tahoma\par
+}
diff --git a/sw/qa/extras/rtfexport/rtfexport4.cxx 
b/sw/qa/extras/rtfexport/rtfexport4.cxx
index 2d036538d6fe..c2af71a93f63 100644
--- a/sw/qa/extras/rtfexport/rtfexport4.cxx
+++ b/sw/qa/extras/rtfexport/rtfexport4.cxx
@@ -553,6 +553,18 @@ DECLARE_RTFEXPORT_TEST(testTdf95706, "tdf95706.rtf")
     CPPUNIT_ASSERT_EQUAL(OUString("Arial"), getProperty<OUString>(xRun16, 
"CharFontName"));
 }
 
+DECLARE_RTFEXPORT_TEST(testTdf95706_2, "tdf95706_2.rtf")
+{
+    CPPUNIT_ASSERT_EQUAL(OUString("Arial"),
+                         getProperty<OUString>(getRun(getParagraph(1), 1), 
"CharFontName"));
+    CPPUNIT_ASSERT_EQUAL(OUString("Impact"),
+                         getProperty<OUString>(getRun(getParagraph(2), 1), 
"CharFontName"));
+    CPPUNIT_ASSERT_EQUAL(OUString("Times New Roman"),
+                         getProperty<OUString>(getRun(getParagraph(3), 1), 
"CharFontName"));
+    CPPUNIT_ASSERT_EQUAL(OUString("Tahoma"),
+                         getProperty<OUString>(getRun(getParagraph(4), 1), 
"CharFontName"));
+}
+
 DECLARE_RTFEXPORT_TEST(testTdf111851, "tdf111851.rtf")
 {
     uno::Reference<text::XTextTable> xTable(getParagraphOrTable(1), 
uno::UNO_QUERY);
diff --git a/writerfilter/source/rtftok/rtfdispatchvalue.cxx 
b/writerfilter/source/rtftok/rtfdispatchvalue.cxx
index b3c04bb0f1f1..ca092fb66a37 100644
--- a/writerfilter/source/rtftok/rtfdispatchvalue.cxx
+++ b/writerfilter/source/rtftok/rtfdispatchvalue.cxx
@@ -762,6 +762,10 @@ RTFError RTFDocumentImpl::dispatchValue(RTFKeyword 
nKeyword, int nParam)
             if (m_aStates.top().getDestination() == Destination::FONTTABLE
                 || m_aStates.top().getDestination() == Destination::FONTENTRY)
             {
+                // Some text in buffer? It is font name. So previous font 
definition is complete
+                if (m_aStates.top().getCurrentDestinationText()->getLength())
+                    handleFontTableEntry();
+
                 m_aFontIndexes.push_back(nParam);
                 m_nCurrentFontIndex = getFontIndex(nParam);
             }
diff --git a/writerfilter/source/rtftok/rtfdocumentimpl.cxx 
b/writerfilter/source/rtftok/rtfdocumentimpl.cxx
index 5a19ccebb20a..47349ac8aaba 100644
--- a/writerfilter/source/rtftok/rtfdocumentimpl.cxx
+++ b/writerfilter/source/rtftok/rtfdocumentimpl.cxx
@@ -1332,6 +1332,74 @@ void RTFDocumentImpl::singleChar(sal_uInt8 nValue, bool 
bRunProps)
     }
 }
 
+void RTFDocumentImpl::handleFontTableEntry()
+{
+    OUString aName = 
m_aStates.top().getCurrentDestinationText()->makeStringAndClear();
+
+    if (aName.isEmpty())
+        return;
+
+    if (aName.endsWith(";"))
+    {
+        aName = aName.copy(0, aName.getLength() - 1);
+    }
+
+    // Old documents can contain no encoding information in fontinfo,
+    // but there can be font name suffixes: Arial CE is not a special
+    // font, it is ordinal Arial, but with used cp 1250 encoding.
+    // Moreover these suffixes have priority over \cpgN and \fcharsetN
+    // in MS Word.
+    OUString aFontSuffix;
+    OUString aNameNoSuffix(aName);
+    sal_Int32 nLastSpace = aName.lastIndexOf(' ');
+    if (nLastSpace >= 0)
+    {
+        aFontSuffix = aName.copy(nLastSpace + 1);
+        aNameNoSuffix = aName.copy(0, nLastSpace);
+        sal_Int32 nEncoding = RTL_TEXTENCODING_DONTKNOW;
+        for (int i = 0; aRTFFontNameSuffixes[i].codepage != 
RTL_TEXTENCODING_DONTKNOW; i++)
+        {
+            if (aFontSuffix.equalsAscii(aRTFFontNameSuffixes[i].suffix))
+            {
+                nEncoding = aRTFFontNameSuffixes[i].codepage;
+                break;
+            }
+        }
+        if (nEncoding > RTL_TEXTENCODING_DONTKNOW)
+        {
+            m_nCurrentEncoding = nEncoding;
+            m_aStates.top().setCurrentEncoding(m_nCurrentEncoding);
+        }
+        else
+        {
+            // Unknown suffix: looks like it is just a part of font name, 
restore it
+            aNameNoSuffix = aName;
+        }
+    }
+
+    m_aFontNames[m_nCurrentFontIndex] = aNameNoSuffix;
+    if (m_nCurrentEncoding >= 0)
+    {
+        m_aFontEncodings[m_nCurrentFontIndex] = m_nCurrentEncoding;
+        m_nCurrentEncoding = -1;
+    }
+    m_aStates.top().getTableAttributes().set(NS_ooxml::LN_CT_Font_name,
+                                             new RTFValue(aNameNoSuffix));
+
+    writerfilter::Reference<Properties>::Pointer_t const pProp(new 
RTFReferenceProperties(
+        m_aStates.top().getTableAttributes(), 
m_aStates.top().getTableSprms()));
+
+    //See fdo#47347 initial invalid font entry properties are inserted first,
+    //so when we attempt to insert the correct ones, there's already an
+    //entry in the map for them, so the new ones aren't inserted.
+    auto lb = m_aFontTableEntries.lower_bound(m_nCurrentFontIndex);
+    if (lb != m_aFontTableEntries.end()
+        && !(m_aFontTableEntries.key_comp()(m_nCurrentFontIndex, lb->first)))
+        lb->second = pProp;
+    else
+        m_aFontTableEntries.insert(lb, std::make_pair(m_nCurrentFontIndex, 
pProp));
+}
+
 void RTFDocumentImpl::text(OUString& rString)
 {
     if (rString.getLength() == 1 && m_aStates.top().getDestination() != 
Destination::DOCCOMM)
@@ -1345,10 +1413,7 @@ void RTFDocumentImpl::text(OUString& rString)
     bool bRet = true;
     switch (m_aStates.top().getDestination())
     {
-        // Note: in fonttbl there may or may not be groups; in stylesheet
-        // and revtbl groups are mandatory
-        case Destination::FONTTABLE:
-        case Destination::FONTENTRY:
+        // Note: in stylesheet and revtbl groups are mandatory
         case Destination::STYLEENTRY:
         case Destination::LISTNAME:
         case Destination::REVISIONENTRY:
@@ -1368,68 +1433,6 @@ void RTFDocumentImpl::text(OUString& rString)
                     = 
m_aStates.top().getCurrentDestinationText()->makeStringAndClear();
                 switch (m_aStates.top().getDestination())
                 {
-                    case Destination::FONTTABLE:
-                    case Destination::FONTENTRY:
-                    {
-                        // Old documents can contain no encoding information 
in fontinfo,
-                        // but there can be font name suffixes: Arial CE is 
not a special
-                        // font, it is ordinal Arial, but with used cp 1250 
encoding.
-                        // Moreover these suffixes have priority over \cpgN 
and \fcharsetN
-                        // in MS Word.
-                        OUString aFontSuffix;
-                        OUString aNameNoSuffix(aName);
-                        sal_Int32 nLastSpace = aName.lastIndexOf(' ');
-                        if (nLastSpace >= 0)
-                        {
-                            aFontSuffix = aName.copy(nLastSpace + 1);
-                            aNameNoSuffix = aName.copy(0, nLastSpace);
-                            sal_Int32 nEncoding = RTL_TEXTENCODING_DONTKNOW;
-                            for (int i = 0;
-                                 aRTFFontNameSuffixes[i].codepage != 
RTL_TEXTENCODING_DONTKNOW; i++)
-                            {
-                                if 
(aFontSuffix.equalsAscii(aRTFFontNameSuffixes[i].suffix))
-                                {
-                                    nEncoding = 
aRTFFontNameSuffixes[i].codepage;
-                                    break;
-                                }
-                            }
-                            if (nEncoding > RTL_TEXTENCODING_DONTKNOW)
-                            {
-                                m_nCurrentEncoding = nEncoding;
-                                
m_aStates.top().setCurrentEncoding(m_nCurrentEncoding);
-                            }
-                            else
-                            {
-                                // Unknown suffix: looks like it is just a 
part of font name, restore it
-                                aNameNoSuffix = aName;
-                            }
-                        }
-
-                        m_aFontNames[m_nCurrentFontIndex] = aNameNoSuffix;
-                        if (m_nCurrentEncoding >= 0)
-                        {
-                            m_aFontEncodings[m_nCurrentFontIndex] = 
m_nCurrentEncoding;
-                            m_nCurrentEncoding = -1;
-                        }
-                        
m_aStates.top().getTableAttributes().set(NS_ooxml::LN_CT_Font_name,
-                                                                 new 
RTFValue(aNameNoSuffix));
-
-                        writerfilter::Reference<Properties>::Pointer_t const 
pProp(
-                            new 
RTFReferenceProperties(m_aStates.top().getTableAttributes(),
-                                                       
m_aStates.top().getTableSprms()));
-
-                        //See fdo#47347 initial invalid font entry properties 
are inserted first,
-                        //so when we attempt to insert the correct ones, 
there's already an
-                        //entry in the map for them, so the new ones aren't 
inserted.
-                        auto lb = 
m_aFontTableEntries.lower_bound(m_nCurrentFontIndex);
-                        if (lb != m_aFontTableEntries.end()
-                            && 
!(m_aFontTableEntries.key_comp()(m_nCurrentFontIndex, lb->first)))
-                            lb->second = pProp;
-                        else
-                            m_aFontTableEntries.insert(lb,
-                                                       
std::make_pair(m_nCurrentFontIndex, pProp));
-                    }
-                    break;
                     case Destination::STYLEENTRY:
                     {
                         RTFValue::Pointer_t pType
@@ -1467,6 +1470,8 @@ void RTFDocumentImpl::text(OUString& rString)
             }
         }
         break;
+        case Destination::FONTTABLE:
+        case Destination::FONTENTRY:
         case Destination::LEVELTEXT:
         case Destination::SHAPEPROPERTYNAME:
         case Destination::SHAPEPROPERTYVALUE:
@@ -2216,17 +2221,26 @@ RTFError 
RTFDocumentImpl::beforePopState(RTFParserState& rState)
 {
     switch (rState.getDestination())
     {
+        //Note: in fonttbl there may or may not be groups, so process it as no 
groups
         case Destination::FONTTABLE:
+        case Destination::FONTENTRY:
         {
-            writerfilter::Reference<Table>::Pointer_t const pTable(
-                new RTFReferenceTable(m_aFontTableEntries));
-            Mapper().table(NS_ooxml::LN_FONTTABLE, pTable);
-            if (m_nDefaultFontIndex >= 0)
+            // Some text unhandled? Seems it is last font name
+            if (m_aStates.top().getCurrentDestinationText()->getLength())
+                handleFontTableEntry();
+
+            if (rState.getDestination() == Destination::FONTTABLE)
             {
-                auto pValue = new 
RTFValue(m_aFontNames[getFontIndex(m_nDefaultFontIndex)]);
-                putNestedAttribute(m_aDefaultState.getCharacterSprms(),
-                                   NS_ooxml::LN_EG_RPrBase_rFonts, 
NS_ooxml::LN_CT_Fonts_ascii,
-                                   pValue);
+                writerfilter::Reference<Table>::Pointer_t const pTable(
+                    new RTFReferenceTable(m_aFontTableEntries));
+                Mapper().table(NS_ooxml::LN_FONTTABLE, pTable);
+                if (m_nDefaultFontIndex >= 0)
+                {
+                    auto pValue = new 
RTFValue(m_aFontNames[getFontIndex(m_nDefaultFontIndex)]);
+                    putNestedAttribute(m_aDefaultState.getCharacterSprms(),
+                                       NS_ooxml::LN_EG_RPrBase_rFonts, 
NS_ooxml::LN_CT_Fonts_ascii,
+                                       pValue);
+                }
             }
         }
         break;
diff --git a/writerfilter/source/rtftok/rtfdocumentimpl.hxx 
b/writerfilter/source/rtftok/rtfdocumentimpl.hxx
index 66e27a509be5..14ffc2f630a4 100644
--- a/writerfilter/source/rtftok/rtfdocumentimpl.hxx
+++ b/writerfilter/source/rtftok/rtfdocumentimpl.hxx
@@ -777,6 +777,7 @@ private:
     writerfilter::Reference<Properties>::Pointer_t
     getProperties(const RTFSprms& rAttributes, RTFSprms const& rSprms, Id 
nStyleType);
     void checkNeedPap();
+    void handleFontTableEntry();
     void sectBreak(bool bFinal = false);
     void prepareProperties(RTFParserState& rState,
                            writerfilter::Reference<Properties>::Pointer_t& 
o_rpParagraphProperties,

Reply via email to