sw/qa/extras/ooxmlexport/data/custom-styles-TOC-comma.docx |binary
 sw/qa/extras/ooxmlexport/ooxmlexport17.cxx                 |   25 +++++++++++
 writerfilter/source/dmapper/DomainMapper_Impl.cxx          |    4 +
 writerfilter/source/dmapper/StyleSheetTable.cxx            |   29 +++++++++++++
 4 files changed, 57 insertions(+), 1 deletion(-)

New commits:
commit d42e7d50225d5ff7359cfc765ec9694cf83c9749
Author:     Michael Stahl <michael.st...@allotropia.de>
AuthorDate: Wed Jan 18 15:41:57 2023 +0100
Commit:     Miklos Vajna <vmik...@collabora.com>
CommitDate: Thu Jan 19 12:43:03 2023 +0000

    tdf#153083 writerfilter: import locale-dependent TOC \t style names
    
    The bugdoc contains this style:
    
      <w:style w:type="paragraph" w:styleId="IntensivesZitat">
        <w:name w:val="Intense Quote"/>
        <w:basedOn w:val="Standard"/>
        <w:next w:val="Standard"/>
        <w:link w:val="IntensivesZitatZchn"/>
        ...
    
    which is referred to by:
    
      TOC \o "1-3" \h \z \t "Intensives Zitat;3;Custom1;3;_MyStyle0;3"
    
    Word in an "en" locale is unable to match the "Intensives Zitat" in the
    TOC field with the style "Intense Quote", which is a built-in style
    in Word (no equivalent in Writer).
    
    At first glance nothing in styles.xml matches the localised built-in
    style name in the TOC field.
    
    But it looks like the w:styleId value is somehow generated from the
    localised style name by omitting certain characters like SPACE and
    non-ASCII letters.
    
    Change-Id: I2050f7cf7f8d80bee1f667ee53b7f9981bbf7b49
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/145745
    Tested-by: Jenkins
    Reviewed-by: Michael Stahl <michael.st...@allotropia.de>
    (cherry picked from commit ecbad22fdf81c6f072b6c9f9c16dbba47fe4748c)
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/145721
    Tested-by: Michael Stahl <michael.st...@allotropia.de>
    Reviewed-by: Miklos Vajna <vmik...@collabora.com>

diff --git a/sw/qa/extras/ooxmlexport/data/custom-styles-TOC-comma.docx 
b/sw/qa/extras/ooxmlexport/data/custom-styles-TOC-comma.docx
new file mode 100644
index 000000000000..1b013086dcc6
Binary files /dev/null and 
b/sw/qa/extras/ooxmlexport/data/custom-styles-TOC-comma.docx differ
diff --git a/sw/qa/extras/ooxmlexport/ooxmlexport17.cxx 
b/sw/qa/extras/ooxmlexport/ooxmlexport17.cxx
index d9d5802e0606..b216e14d22fe 100644
--- a/sw/qa/extras/ooxmlexport/ooxmlexport17.cxx
+++ b/sw/qa/extras/ooxmlexport/ooxmlexport17.cxx
@@ -15,6 +15,7 @@
 #include <com/sun/star/style/ParagraphAdjust.hpp>
 #include <com/sun/star/text/WritingMode2.hpp>
 #include <com/sun/star/text/XBookmarksSupplier.hpp>
+#include <com/sun/star/text/XDocumentIndex.hpp>
 #include <com/sun/star/text/XFootnotesSupplier.hpp>
 #include <com/sun/star/text/XTextFieldsSupplier.hpp>
 #include <com/sun/star/text/XTextField.hpp>
@@ -701,6 +702,30 @@ DECLARE_OOXMLEXPORT_TEST(testTdf148361, "tdf148361.docx")
     CPPUNIT_ASSERT_EQUAL(OUString("[Type text]"), aActual);
 }
 
+DECLARE_OOXMLEXPORT_TEST(testTdf153082_comma, "custom-styles-TOC-comma.docx")
+{
+    uno::Reference<text::XDocumentIndexesSupplier> xIndexSupplier(mxComponent, 
uno::UNO_QUERY);
+    uno::Reference<container::XIndexAccess> xIndexes = 
xIndexSupplier->getDocumentIndexes();
+    uno::Reference<text::XDocumentIndex> xTOC(xIndexes->getByIndex(0), 
uno::UNO_QUERY);
+    // check styles
+    uno::Reference<container::XIndexAccess> xParaStyles =
+        getProperty<uno::Reference<container::XIndexAccess>>(xTOC, 
"LevelParagraphStyles");
+    uno::Sequence<OUString> styles;
+    xParaStyles->getByIndex(0) >>= styles;
+    CPPUNIT_ASSERT_EQUAL(uno::Sequence<OUString>{"_MyStyle0"}, styles);
+    xParaStyles->getByIndex(1) >>= styles;
+    CPPUNIT_ASSERT_EQUAL(uno::Sequence<OUString>{"Custom1"}, styles);
+    xParaStyles->getByIndex(2) >>= styles;
+    // the first one is built-in Word style that was localised DE "Intensives 
Zitat" in the file
+    CPPUNIT_ASSERT_EQUAL(uno::Sequence<OUString>{"Intense Quote"}, styles);
+    xTOC->update();
+    OUString const tocContent(xTOC->getAnchor()->getString());
+    CPPUNIT_ASSERT(tocContent.startsWith("Table of Contents"));
+    CPPUNIT_ASSERT(tocContent.indexOf("Lorem ipsum dolor sit amet, 
consectetuer adipiscing elit.") != -1);
+    CPPUNIT_ASSERT(tocContent.indexOf("Fusce posuere, magna sed pulvinar 
ultricies, purus lectus malesuada libero, sit amet commodo magna eros quis 
urna.") != -1);
+    CPPUNIT_ASSERT(tocContent.indexOf("Pellentesque habitant morbi tristique 
senectus et netus et malesuada fames ac turpis egestas.") != -1);
+}
+
 DECLARE_OOXMLEXPORT_TEST(testTdf142407, "tdf142407.docx")
 {
     uno::Reference<container::XNameAccess> xPageStyles = 
getStyles("PageStyles");
diff --git a/writerfilter/source/dmapper/DomainMapper_Impl.cxx 
b/writerfilter/source/dmapper/DomainMapper_Impl.cxx
index bcfcbc7c72dd..3f33280f5a22 100644
--- a/writerfilter/source/dmapper/DomainMapper_Impl.cxx
+++ b/writerfilter/source/dmapper/DomainMapper_Impl.cxx
@@ -6279,7 +6279,9 @@ void DomainMapper_Impl::handleToc
                     uno::Sequence< OUString> aStyles( nLevelCount );
                     for ( auto& rStyle : asNonConstRange(aStyles) )
                     {
-                        rStyle = (aTOCStyleIter++)->second;
+                        // tdf#153082 must map w:styleId to w:name
+                        rStyle = 
GetStyleSheetTable()->ConvertStyleName(aTOCStyleIter->second, true);
+                        ++aTOCStyleIter;
                     }
                     xParaStyles->replaceByIndex(nLevel - 1, uno::Any(aStyles));
                 }
diff --git a/writerfilter/source/dmapper/StyleSheetTable.cxx 
b/writerfilter/source/dmapper/StyleSheetTable.cxx
index 9641a2a032bd..1a767712ea94 100644
--- a/writerfilter/source/dmapper/StyleSheetTable.cxx
+++ b/writerfilter/source/dmapper/StyleSheetTable.cxx
@@ -42,6 +42,7 @@
 #include <map>
 #include <osl/diagnose.h>
 #include <rtl/ustrbuf.hxx>
+#include <rtl/character.hxx>
 #include <sal/log.hxx>
 #include <comphelper/propertyvalue.hxx>
 #include <comphelper/string.hxx>
@@ -1412,6 +1413,30 @@ const StyleSheetEntryPtr & 
StyleSheetTable::GetCurrentEntry() const
     return m_pImpl->m_pCurrentEntry;
 }
 
+/**
+ This is a heuristic to find Word's w:styleId value from localised style name.
+ It's not clear how exactly it works, but apparently Word stores into
+ w:styleId some filtered representation of the localised style name.
+ Tragically there are references to the localised style name itself in TOC
+ fields.
+ Hopefully this works and a complete map of >100 built-in style names
+ localised to all langauges isn't needed.
+*/
+static auto FilterChars(OUString const& rStyleName) -> OUString
+{
+    OUStringBuffer ret;
+    sal_Int32 index(0);
+    while (index < rStyleName.getLength())
+    {
+        auto const c(rStyleName.iterateCodePoints(&index));
+        if (rtl::isAsciiAlphanumeric(c))
+        {
+            ret.appendUtf32(c);
+        }
+    }
+    return ret.makeStringAndClear();
+}
+
 OUString StyleSheetTable::ConvertStyleName( const OUString& rWWName, bool 
bExtendedSearch)
 {
     OUString sRet( rWWName );
@@ -1419,6 +1444,10 @@ OUString StyleSheetTable::ConvertStyleName( const 
OUString& rWWName, bool bExten
     {
         //search for the rWWName in the IdentifierD of the existing styles and 
convert the sStyleName member
         auto findIt = m_pImpl->m_aStyleSheetEntriesMap.find(rWWName);
+        if (findIt == m_pImpl->m_aStyleSheetEntriesMap.end())
+        {
+            findIt = 
m_pImpl->m_aStyleSheetEntriesMap.find(FilterChars(rWWName));
+        }
         if (findIt != m_pImpl->m_aStyleSheetEntriesMap.end())
             sRet = findIt->second->sStyleName;
     }

Reply via email to