include/svtools/parhtml.hxx                              |    1 
 sw/qa/extras/htmlexport/data/test_no_space_preserve.fodt |   25 ++
 sw/qa/extras/htmlexport/htmlexport.cxx                   |  147 +++++++++++++++
 sw/qa/extras/tiledrendering/data/multiline.odt           |binary
 sw/qa/extras/tiledrendering/tiledrendering.cxx           |   40 ++++
 sw/source/filter/html/htmlatr.cxx                        |   83 ++++++--
 sw/source/filter/html/swhtml.cxx                         |    2 
 sw/source/filter/html/wrthtml.cxx                        |   14 +
 sw/source/filter/html/wrthtml.hxx                        |    4 
 sw/source/uibase/dochdl/swdtflvr.cxx                     |    2 
 10 files changed, 298 insertions(+), 20 deletions(-)

New commits:
commit ea2c1bf8b48070b3869ba35fdf9c84aec57cf678
Author:     Mike Kaganski <mike.kagan...@collabora.com>
AuthorDate: Tue Nov 14 11:05:05 2023 +0300
Commit:     Mike Kaganski <mike.kagan...@collabora.com>
CommitDate: Thu Nov 23 22:57:50 2023 +0300

    Limit PreserveSpaces HTML/ReqIF export option to proper space characters
    
    We only need to care about SPACE, TAB, LF, and CR. We don't need to care
    about LF in some cases, because it gets converted to a <br/> element, and
    handled specially. And we don't need to care about other "whitespace"
    characters, like control characters and Unicode space.
    
    Our own object placeholder characters were treated as "space", which was
    wrong.
    
    One peculiarity needs to be noted. In ReqIF case, a SPACE before LF needs
    no 'xml:space="preserve"', but an LF before SPACE needs one, because it
    needs to disambiguate between a <br> followed by a significant space vs.
    a <br> followed by a pretty-printed newline.
    
    Change-Id: I74273592df05bb94d8e4ecaea2c069c0e086b7d8
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/159853
    Tested-by: Jenkins
    Reviewed-by: Mike Kaganski <mike.kagan...@collabora.com>

diff --git a/include/svtools/parhtml.hxx b/include/svtools/parhtml.hxx
index a6a15b93e3c1..1a17e98b2997 100644
--- a/include/svtools/parhtml.hxx
+++ b/include/svtools/parhtml.hxx
@@ -189,6 +189,7 @@ protected:
 
     void SetNamespace(std::u16string_view rNamespace);
 
+    bool GetPreserveSpaces() const { return m_bPreserveSpaces; }
     void SetPreserveSpaces(bool val) { m_bPreserveSpaces = val; }
 
 public:
diff --git a/sw/qa/extras/htmlexport/data/test_no_space_preserve.fodt 
b/sw/qa/extras/htmlexport/data/test_no_space_preserve.fodt
new file mode 100644
index 000000000000..e0d9a3506e9f
--- /dev/null
+++ b/sw/qa/extras/htmlexport/data/test_no_space_preserve.fodt
@@ -0,0 +1,25 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<office:document 
xmlns:office="urn:oasis:names:tc:opendocument:xmlns:office:1.0" 
xmlns:text="urn:oasis:names:tc:opendocument:xmlns:text:1.0" 
xmlns:draw="urn:oasis:names:tc:opendocument:xmlns:drawing:1.0" 
xmlns:svg="urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0" 
office:version="1.3" office:mimetype="application/vnd.oasis.opendocument.text">
+ <office:body>
+  <office:text>
+   <text:p>No special spaces</text:p>
+   <text:p><text:s/>Leading space</text:p>
+   <text:p>Trailing space </text:p>
+   <text:p>Double <text:s/>space</text:p>
+   <text:p><text:line-break/>Leading/trailing breaks<text:line-break/></text:p>
+   <text:p><text:line-break/> Leading break + space</text:p>
+   <text:p>Trailing space + break <text:line-break/></text:p>
+   <text:p>Middle<text:line-break/>break</text:p>
+   <text:p>Middle space <text:line-break/>+ break</text:p>
+   <text:p>Middle break<text:line-break/> + space</text:p>
+   <text:p>Trailing space and SVG <draw:frame text:anchor-type="as-char" 
svg:width="5.59mm" svg:height="5.59mm" draw:z-index="0"><draw:image 
draw:mime-type="image/svg+xml">
+      
<office:binary-data>PD94bWwgdmVyc2lvbj0iMS4wIiBlbmNvZGluZz0iVVRGLTgiPz4KPHN2ZyB4bWxucz0iaHR0
+       cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHZlcnNpb249IjEuMSIgdmlld0JveD0iMCAwIDIw
+       IDIwIiBzdHJva2U9ImJsYWNrIj4KPHBhdGggZD0iTTEsMUwxOSwxOSIvPgo8L3N2Zz4=
+      </office:binary-data>
+     </draw:image>
+    </draw:frame></text:p>
+  </office:text>
+ </office:body>
+</office:document>
\ No newline at end of file
diff --git a/sw/qa/extras/htmlexport/htmlexport.cxx 
b/sw/qa/extras/htmlexport/htmlexport.cxx
index 6b8de215d27c..d72e2b1daf9e 100644
--- a/sw/qa/extras/htmlexport/htmlexport.cxx
+++ b/sw/qa/extras/htmlexport/htmlexport.cxx
@@ -2816,6 +2816,153 @@ CPPUNIT_TEST_FIXTURE(SwHtmlDomExportTest, 
testReqIF_PreserveSpaces)
     CPPUNIT_ASSERT_EQUAL(paraText, getParagraph(1)->getString());
 }
 
+CPPUNIT_TEST_FIXTURE(SwHtmlDomExportTest, testHTML_NoPreserveSpaces)
+{
+    // Test cases where "PreserveSpaces" should not introduce respective markup
+
+    const auto assertXPath_NoWhiteSpaceInStyle
+        = [this](const xmlDocUniquePtr& pDoc, const OString& rXPath) {
+              xmlXPathObjectPtr pXmlObj = getXPathNode(pDoc, rXPath);
+              xmlNodeSetPtr pXmlNodes = pXmlObj->nodesetval;
+              CPPUNIT_ASSERT_EQUAL_MESSAGE(rXPath.getStr(), 1, 
xmlXPathNodeSetGetLength(pXmlNodes));
+              xmlNodePtr pXmlNode = pXmlNodes->nodeTab[0];
+              if (xmlChar* prop = xmlGetProp(pXmlNode, BAD_CAST("style")))
+              {
+                  OUString style = OUString::fromUtf8(reinterpret_cast<const 
char*>(prop));
+                  CPPUNIT_ASSERT_MESSAGE(rXPath.getStr(), 
style.indexOf("white-space:") < 0);
+              }
+              xmlXPathFreeObject(pXmlObj);
+          };
+    const auto assertXPath_HasWhiteSpaceInStyle
+        = [this](const xmlDocUniquePtr& pDoc, const OString& rXPath) {
+              const OUString style = getXPath(pDoc, rXPath, "style");
+              CPPUNIT_ASSERT_MESSAGE(rXPath.getStr(), 
style.indexOf("white-space: pre-wrap") >= 0);
+          };
+
+    createSwDoc("test_no_space_preserve.fodt");
+
+    // Export to plain HTML, using PreserveSpaces:
+    uno::Reference<css::frame::XStorable> xStorable(mxComponent, 
uno::UNO_QUERY_THROW);
+    css::uno::Sequence<css::beans::PropertyValue> aStoreProperties = {
+        comphelper::makePropertyValue("FilterName", OUString("HTML 
(StarWriter)")),
+        comphelper::makePropertyValue("PreserveSpaces", true),
+    };
+    xStorable->storeToURL(maTempFile.GetURL(), aStoreProperties);
+
+    htmlDocUniquePtr pHtmlDoc = parseHtml(maTempFile);
+    CPPUNIT_ASSERT(pHtmlDoc);
+
+    // No whitespace preservation, where no leading / trailing / double 
whitespace
+    assertXPath_NoWhiteSpaceInStyle(pHtmlDoc, "/html/body/p[1]");
+    // Whitespace preserved for a leading space
+    assertXPath_HasWhiteSpaceInStyle(pHtmlDoc, "/html/body/p[2]");
+    // Whitespace preserved for a trailing space
+    assertXPath_HasWhiteSpaceInStyle(pHtmlDoc, "/html/body/p[3]");
+    // Whitespace preserved for a double space
+    assertXPath_HasWhiteSpaceInStyle(pHtmlDoc, "/html/body/p[4]");
+    // No whitespace preservation for leading / trailing breaks
+    assertXPath_NoWhiteSpaceInStyle(pHtmlDoc, "/html/body/p[5]");
+    // Whitespace preserved for a leading break + space
+    assertXPath_HasWhiteSpaceInStyle(pHtmlDoc, "/html/body/p[6]");
+    // Whitespace preserved for a trailing space + break
+    assertXPath_HasWhiteSpaceInStyle(pHtmlDoc, "/html/body/p[7]");
+    // No whitespace preservation for a middle break
+    assertXPath_NoWhiteSpaceInStyle(pHtmlDoc, "/html/body/p[8]");
+    // Whitespace preserved for a middle space + break
+    assertXPath_HasWhiteSpaceInStyle(pHtmlDoc, "/html/body/p[9]");
+    // Whitespace preserved for a middle break + space
+    assertXPath_HasWhiteSpaceInStyle(pHtmlDoc, "/html/body/p[10]");
+    // No whitespace preservation for a trailing space and SVG
+    assertXPath_NoWhiteSpaceInStyle(pHtmlDoc, "/html/body/p[11]");
+
+    // Test import
+
+    setImportFilterName("HTML (StarWriter)");
+    UnoApiTest::load(maTempFile.GetURL());
+
+    CPPUNIT_ASSERT_EQUAL(OUString("No special spaces"), 
getParagraph(1)->getString());
+    CPPUNIT_ASSERT_EQUAL(OUString(" Leading space"), 
getParagraph(2)->getString());
+    CPPUNIT_ASSERT_EQUAL(OUString("Trailing space "), 
getParagraph(3)->getString());
+    CPPUNIT_ASSERT_EQUAL(OUString("Double  space"), 
getParagraph(4)->getString());
+    // Trailing break is removed in SwHTMLParser::AppendTextNode, and replaced 
with para spacing
+    CPPUNIT_ASSERT_EQUAL(OUString("\nLeading/trailing breaks"), 
getParagraph(5)->getString());
+    CPPUNIT_ASSERT_EQUAL(OUString("\n Leading break + space"), 
getParagraph(6)->getString());
+    // Trailing break is removed in SwHTMLParser::AppendTextNode, and replaced 
with para spacing
+    CPPUNIT_ASSERT_EQUAL(OUString("Trailing space + break "), 
getParagraph(7)->getString());
+    CPPUNIT_ASSERT_EQUAL(OUString("Middle\nbreak"), 
getParagraph(8)->getString());
+    CPPUNIT_ASSERT_EQUAL(OUString("Middle space \n+ break"), 
getParagraph(9)->getString());
+    CPPUNIT_ASSERT_EQUAL(OUString("Middle break\n + space"), 
getParagraph(10)->getString());
+    // The SVG is replaced by a space in SwXParagraph::getString()
+    CPPUNIT_ASSERT_EQUAL(OUString("Trailing space and SVG  "), 
getParagraph(11)->getString());
+}
+
+CPPUNIT_TEST_FIXTURE(SwHtmlDomExportTest, testReqIF_NoPreserveSpaces)
+{
+    // Test cases where "PreserveSpaces" should not introduce respective markup
+
+    createSwDoc("test_no_space_preserve.fodt");
+
+    // Export to ReqIF, using PreserveSpaces:
+    uno::Reference<css::frame::XStorable> xStorable(mxComponent, 
uno::UNO_QUERY_THROW);
+    css::uno::Sequence<css::beans::PropertyValue> aStoreProperties = {
+        comphelper::makePropertyValue("FilterName", OUString("HTML 
(StarWriter)")),
+        comphelper::makePropertyValue("FilterOptions", 
OUString("xhtmlns=reqif-xhtml")),
+        comphelper::makePropertyValue("PreserveSpaces", true),
+    };
+    xStorable->storeToURL(maTempFile.GetURL(), aStoreProperties);
+
+    SvMemoryStream aStream;
+    WrapReqifFromTempFile(aStream);
+    xmlDocUniquePtr pXmlDoc = parseXmlStream(&aStream);
+
+    // No whitespace preservation, where no leading / trailing / double 
whitespace
+    assertXPathNoAttribute(pXmlDoc, 
"/reqif-xhtml:html/reqif-xhtml:div/reqif-xhtml:p[1]", "space");
+    // Whitespace preserved for a leading space
+    assertXPath(pXmlDoc, "/reqif-xhtml:html/reqif-xhtml:div/reqif-xhtml:p[2]", 
"space",
+                u"preserve");
+    // Whitespace preserved for a trailing space
+    assertXPath(pXmlDoc, "/reqif-xhtml:html/reqif-xhtml:div/reqif-xhtml:p[3]", 
"space",
+                u"preserve");
+    // Whitespace preserved for a double space
+    assertXPath(pXmlDoc, "/reqif-xhtml:html/reqif-xhtml:div/reqif-xhtml:p[4]", 
"space",
+                u"preserve");
+    // No whitespace preservation for leading / trailing breaks
+    assertXPathNoAttribute(pXmlDoc, 
"/reqif-xhtml:html/reqif-xhtml:div/reqif-xhtml:p[5]", "space");
+    // Whitespace preserved for a leading break + space
+    assertXPath(pXmlDoc, "/reqif-xhtml:html/reqif-xhtml:div/reqif-xhtml:p[6]", 
"space",
+                u"preserve");
+    // No whitespace preservation for a trailing space + break
+    assertXPathNoAttribute(pXmlDoc, 
"/reqif-xhtml:html/reqif-xhtml:div/reqif-xhtml:p[7]", "space");
+    // No whitespace preservation for a middle break
+    assertXPathNoAttribute(pXmlDoc, 
"/reqif-xhtml:html/reqif-xhtml:div/reqif-xhtml:p[8]", "space");
+    // No whitespace preservation for a middle space + break
+    assertXPathNoAttribute(pXmlDoc, 
"/reqif-xhtml:html/reqif-xhtml:div/reqif-xhtml:p[9]", "space");
+    // Whitespace preserved for a middle break + space
+    assertXPath(pXmlDoc, 
"/reqif-xhtml:html/reqif-xhtml:div/reqif-xhtml:p[10]", "space",
+                u"preserve");
+    // No whitespace preservation for a trailing space and SVG
+    assertXPathNoAttribute(pXmlDoc, 
"/reqif-xhtml:html/reqif-xhtml:div/reqif-xhtml:p[11]", "space");
+
+    // Test import
+
+    setImportFilterOptions("xhtmlns=reqif-xhtml");
+    setImportFilterName("HTML (StarWriter)");
+    UnoApiTest::load(maTempFile.GetURL());
+
+    CPPUNIT_ASSERT_EQUAL(OUString("No special spaces"), 
getParagraph(1)->getString());
+    CPPUNIT_ASSERT_EQUAL(OUString(" Leading space"), 
getParagraph(2)->getString());
+    CPPUNIT_ASSERT_EQUAL(OUString("Trailing space "), 
getParagraph(3)->getString());
+    CPPUNIT_ASSERT_EQUAL(OUString("Double  space"), 
getParagraph(4)->getString());
+    CPPUNIT_ASSERT_EQUAL(OUString("\nLeading/trailing breaks\n"), 
getParagraph(5)->getString());
+    CPPUNIT_ASSERT_EQUAL(OUString("\n Leading break + space"), 
getParagraph(6)->getString());
+    CPPUNIT_ASSERT_EQUAL(OUString("Trailing space + break \n"), 
getParagraph(7)->getString());
+    CPPUNIT_ASSERT_EQUAL(OUString("Middle\nbreak"), 
getParagraph(8)->getString());
+    CPPUNIT_ASSERT_EQUAL(OUString("Middle space \n+ break"), 
getParagraph(9)->getString());
+    CPPUNIT_ASSERT_EQUAL(OUString("Middle break\n + space"), 
getParagraph(10)->getString());
+    // The SVG is replaced by a space in SwXParagraph::getString()
+    CPPUNIT_ASSERT_EQUAL(OUString("Trailing space and SVG  "), 
getParagraph(11)->getString());
+}
+
 CPPUNIT_TEST_FIXTURE(SwHtmlDomExportTest, testReqIF_ExportFormulasAsPDF)
 {
     // Given a document with a formula:
diff --git a/sw/source/filter/html/htmlatr.cxx 
b/sw/source/filter/html/htmlatr.cxx
index e0722596d368..a99140564301 100644
--- a/sw/source/filter/html/htmlatr.cxx
+++ b/sw/source/filter/html/htmlatr.cxx
@@ -769,7 +769,7 @@ static void OutHTML_SwFormat( Writer& rWrt, const SwFormat& 
rFormat,
     if( rInfo.bInNumberBulletList && bNumberedForListItem )
     {
         HtmlWriter html(rWrt.Strm(), rHWrt.maNamespace);
-        html.prettyPrint(rHWrt.m_bPrettyPrint);
+        html.prettyPrint(rHWrt.IsPrettyPrint());
         html.start(OOO_STRING_SVTOOLS_HTML_li);
         if (!bNumbered)
         {
@@ -1011,7 +1011,7 @@ static void OutHTML_SwFormatOff( Writer& rWrt, const 
SwHTMLTextCollOutputInfo& r
 
     if( rInfo.ShouldOutputToken() )
     {
-        if (rHWrt.m_bPrettyPrint && rHWrt.IsLFPossible())
+        if (rHWrt.IsPrettyPrint() && rHWrt.IsLFPossible())
             rHWrt.OutNewLine( true );
 
         // if necessary, for BLOCKQUOTE, ADDRESS and DD another paragraph token
@@ -2006,26 +2006,70 @@ void HTMLEndPosLst::OutEndAttrs( SwHTMLWriter& rHWrt, 
sal_Int32 nPos )
     }
 }
 
-static bool NeedPreserveWhitespace(std::u16string_view str)
+static constexpr bool IsLF(sal_Unicode ch) { return ch == '\n'; }
+
+static constexpr bool IsWhitespaceExcludingLF(sal_Unicode ch)
+{
+    return ch == ' ' || ch == '\t' || ch == '\r';
+}
+
+static constexpr bool IsWhitespaceIncludingLF(sal_Unicode ch)
+{
+    return IsWhitespaceExcludingLF(ch) || IsLF(ch);
+}
+
+static bool NeedPreserveWhitespace(std::u16string_view str, bool xml)
 {
     if (str.empty())
         return false;
     // leading / trailing spaces
-    if (o3tl::internal::implIsWhitespace(str.front())
-        || o3tl::internal::implIsWhitespace(str.back()))
+    // A leading / trailing \n would turn into a leading / trailing <br/>,
+    // and will not disappear, even without space preserving option
+    if (IsWhitespaceExcludingLF(str.front()) || 
IsWhitespaceExcludingLF(str.back()))
         return true;
-    bool bWasSpace = false;
-    for (auto ch : str)
+    for (size_t i = 0; i < str.size(); ++i)
     {
-        if (o3tl::internal::implIsWhitespace(ch))
+        if (xml)
         {
-            if (bWasSpace)
-                return true; // Second whitespace in a row
-            else
-                bWasSpace = true;
+            // No need to consider \n, which convert to <br/>, when it's after 
a space
+            // (but handle it *before* a space)
+            if (IsWhitespaceIncludingLF(str[i]))
+            {
+                do
+                {
+                    ++i;
+                    if (i == str.size())
+                        return false;
+                } while (IsLF(str[i]));
+                if (IsWhitespaceExcludingLF(str[i]))
+                    return true; // Second whitespace in a row
+            }
+        }
+        else // html
+        {
+            // Only consider \n, when an adjacent space is not \n - which 
would be eaten
+            // without a space preserving option
+            if (IsWhitespaceExcludingLF(str[i]))
+            {
+                ++i;
+                if (i == str.size())
+                    return false;
+                if (IsWhitespaceIncludingLF(str[i]))
+                    return true; // Any whitespace after a non-LF whitespace
+            }
+            else if (IsLF(str[i]))
+            {
+                do
+                {
+                    ++i;
+                    if (i == str.size())
+                        return false;
+                }
+                while (IsLF(str[i]));
+                if (IsWhitespaceExcludingLF(str[i]))
+                    return true; // A non-LF whitespace after a LF
+            }
         }
-        else
-            bWasSpace = false;
     }
     return false;
 }
@@ -2062,7 +2106,7 @@ Writer& OutHTML_SwTextNode( Writer& rWrt, const 
SwContentNode& rNode )
         rHTMLWrt.SetLFPossible(true);
 
         HtmlWriter aHtml(rWrt.Strm(), rHTMLWrt.maNamespace);
-        aHtml.prettyPrint(rHTMLWrt.m_bPrettyPrint);
+        aHtml.prettyPrint(rHTMLWrt.IsPrettyPrint());
         aHtml.start(OOO_STRING_SVTOOLS_HTML_horzrule);
 
         const SfxItemSet* pItemSet = pNd->GetpSwAttrSet();
@@ -2240,7 +2284,7 @@ Writer& OutHTML_SwTextNode( Writer& rWrt, const 
SwContentNode& rNode )
     bool bOldLFPossible = rHTMLWrt.IsLFPossible();
     bool bOldSpacePreserve = rHTMLWrt.IsSpacePreserve();
     if (rHTMLWrt.IsPreserveSpacesOnWritePrefSet())
-        rHTMLWrt.SetSpacePreserve(NeedPreserveWhitespace(rStr));
+        rHTMLWrt.SetSpacePreserve(NeedPreserveWhitespace(rStr, 
rHTMLWrt.mbReqIF));
     OutHTML_SwFormat( rWrt, rFormat, pNd->GetpSwAttrSet(), aFormatInfo );
 
     // If we didn't open a new line before the paragraph tag, we do that now
@@ -2255,7 +2299,7 @@ Writer& OutHTML_SwTextNode( Writer& rWrt, const 
SwContentNode& rNode )
     // now it's a good opportunity again for an LF - if it is still allowed
     // FIXME: for LOK case we set rHTMLWrt.m_nWishLineLen as -1, for now keep 
old flow
     // when LOK side will be fixed - don't insert new line at the beginning
-    if( rHTMLWrt.IsLFPossible() && rHTMLWrt.m_bPrettyPrint && 
rHTMLWrt.m_nWishLineLen >= 0 &&
+    if( rHTMLWrt.IsLFPossible() && rHTMLWrt.IsPrettyPrint() && 
rHTMLWrt.m_nWishLineLen >= 0 &&
         rHTMLWrt.GetLineLen() >= rHTMLWrt.m_nWishLineLen )
     {
         rHTMLWrt.OutNewLine();
@@ -2494,7 +2538,7 @@ Writer& OutHTML_SwTextNode( Writer& rWrt, const 
SwContentNode& rNode )
                         nWordLen = nEnd;
                     nWordLen -= nStrPos;
 
-                    if( rHTMLWrt.m_bPrettyPrint && rHTMLWrt.m_nWishLineLen >= 
0 &&
+                    if( rHTMLWrt.IsPrettyPrint() && rHTMLWrt.m_nWishLineLen >= 
0 &&
                         (nLineLen >= rHTMLWrt.m_nWishLineLen ||
                         (nLineLen+nWordLen) >= rHTMLWrt.m_nWishLineLen ) )
                     {
@@ -2510,7 +2554,7 @@ Writer& OutHTML_SwTextNode( Writer& rWrt, const 
SwContentNode& rNode )
                     {
                         HTMLOutFuncs::FlushToAscii( rWrt.Strm() );
                         HtmlWriter aHtml(rWrt.Strm(), rHTMLWrt.maNamespace);
-                        aHtml.prettyPrint(rHTMLWrt.m_bPrettyPrint);
+                        aHtml.prettyPrint(rHTMLWrt.IsPrettyPrint());
                         aHtml.single(OOO_STRING_SVTOOLS_HTML_linebreak);
                     }
                     else if (c == CH_TXT_ATR_FORMELEMENT)
@@ -2589,7 +2633,7 @@ Writer& OutHTML_SwTextNode( Writer& rWrt, const 
SwContentNode& rNode )
         else
         {
             HtmlWriter aHtml(rHTMLWrt.Strm(), rHTMLWrt.maNamespace);
-            aHtml.prettyPrint(rHTMLWrt.m_bPrettyPrint);
+            aHtml.prettyPrint(rHTMLWrt.IsPrettyPrint());
             aHtml.single(OOO_STRING_SVTOOLS_HTML_linebreak);
             const SvxULSpaceItem& rULSpace = 
pNd->GetSwAttrSet().Get(RES_UL_SPACE);
             if (rULSpace.GetLower() > 0 && !bEndOfCell)
@@ -2616,7 +2660,7 @@ Writer& OutHTML_SwTextNode( Writer& rWrt, const 
SwContentNode& rNode )
         }
 
         HtmlWriter aHtml(rHTMLWrt.Strm(), rHTMLWrt.maNamespace);
-        aHtml.prettyPrint(rHTMLWrt.m_bPrettyPrint);
+        aHtml.prettyPrint(rHTMLWrt.IsPrettyPrint());
         aHtml.start(OOO_STRING_SVTOOLS_HTML_linebreak);
         aHtml.attribute(OOO_STRING_SVTOOLS_HTML_O_clear, pString);
         aHtml.end();
diff --git a/sw/source/filter/html/swhtml.cxx b/sw/source/filter/html/swhtml.cxx
index 2d8f99c44d95..e96ab81d5af1 100644
--- a/sw/source/filter/html/swhtml.cxx
+++ b/sw/source/filter/html/swhtml.cxx
@@ -1524,7 +1524,7 @@ void SwHTMLParser::NextToken( HtmlTokenId nToken )
     case HtmlTokenId::TEXTTOKEN:
     case HtmlTokenId::CDATA:
         // insert string without spanning attributes at the end.
-        if( !aToken.isEmpty() && ' '==aToken[0] && !IsReadPRE() )
+        if (!aToken.isEmpty() && ' ' == aToken[0] && !IsReadPRE() && 
!GetPreserveSpaces())
         {
             sal_Int32 nPos = m_pPam->GetPoint()->GetContentIndex();
             const SwTextNode* pTextNode = nPos ? 
m_pPam->GetPoint()->GetNode().GetTextNode() : nullptr;
diff --git a/sw/source/filter/html/wrthtml.cxx 
b/sw/source/filter/html/wrthtml.cxx
index 1826cee5bd14..be83b5331266 100644
--- a/sw/source/filter/html/wrthtml.cxx
+++ b/sw/source/filter/html/wrthtml.cxx
@@ -153,7 +153,6 @@ SwHTMLWriter::SwHTMLWriter( const OUString& rBaseURL, 
std::u16string_view rFilte
     , mbEmbedImages(false)
     , m_bCfgPrintLayout( false )
     , m_bParaDotLeaders( false )
-    , m_bPrettyPrint( true )
 {
     SetBaseURL(rBaseURL);
 
diff --git a/sw/source/filter/html/wrthtml.hxx 
b/sw/source/filter/html/wrthtml.hxx
index b6cb0b6bd830..423fd265063e 100644
--- a/sw/source/filter/html/wrthtml.hxx
+++ b/sw/source/filter/html/wrthtml.hxx
@@ -278,6 +278,7 @@ class SW_DLLPUBLIC SwHTMLWriter : public Writer
 
     FieldUnit m_eCSS1Unit;
 
+    bool m_bPrettyPrint = true; // Allows to add new lines to make it more 
readable
     bool m_bLFPossible = false; // a line break can be inserted
     bool m_bSpacePreserve = false; // Using xml::space="preserve", or 
"white-space: pre-wrap" style
     bool m_bPreserveSpacesOnWrite = false; // If export should use 
m_bSpacePreserve
@@ -419,7 +420,6 @@ public:
 #define sCSS2_P_CLASS_leaders "leaders"
     bool m_bCfgPrintLayout : 1;       // PrintLayout option for TOC dot leaders
     bool m_bParaDotLeaders : 1;       // for TOC dot leaders
-    bool m_bPrettyPrint : 1;          // Allows to add new lines to make it 
more readable
     // 26
 
     /// Tracks which text portion attributes are currently open: a which id -> 
open count map.
@@ -624,6 +624,7 @@ public:
     /// Determines the prefix string needed to respect the requested namespace 
alias.
     OString GetNamespace() const;
 
+    bool IsPrettyPrint() const { return !m_bSpacePreserve && m_bPrettyPrint; }
     bool IsLFPossible() const { return !m_bSpacePreserve && m_bLFPossible; }
     void SetLFPossible(bool val) { m_bLFPossible = val; }
     bool IsSpacePreserve() const { return m_bSpacePreserve; }
commit 42e0d2e6c0ea9a3152a0a76587cded500ec93553
Author:     Szymon Kłos <szymon.k...@collabora.com>
AuthorDate: Fri Nov 19 12:38:18 2021 +0100
Commit:     Mike Kaganski <mike.kagan...@collabora.com>
CommitDate: Thu Nov 23 17:19:00 2023 +0300

    lok: don't pretty print html for online
    
    followup for "lok: don't limit line length in HTMLWriter"
    
    Pretty printing makes html more readable for human but introduces
    lots of new line marks which are later interpreted in the client
    as a valid new lines in the document content.
    
    This was causing multiplying the line breaks when inserting hyperlinks
    where we read current selection's content and then we pass it back
    to the core as link's content (with added new lines).
    
    This change needs cypress change to not contain "\n" at the beginning
    of some content read from the document.
    
    This also afects copying from the online document to clipboard.
    
    Change-Id: I2b17d62398d947fcf1d3fb1ed6005c3063d114f2
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/136893
    Tested-by: Jenkins CollaboraOffice <jenkinscollaboraoff...@gmail.com>
    Reviewed-by: Henry Castro <hcas...@collabora.com>
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/141479
    Tested-by: Jenkins
    Reviewed-by: Szymon Kłos <szymon.k...@collabora.com>

diff --git a/sw/qa/extras/tiledrendering/data/multiline.odt 
b/sw/qa/extras/tiledrendering/data/multiline.odt
new file mode 100644
index 000000000000..4c60b58decb2
Binary files /dev/null and b/sw/qa/extras/tiledrendering/data/multiline.odt 
differ
diff --git a/sw/qa/extras/tiledrendering/tiledrendering.cxx 
b/sw/qa/extras/tiledrendering/tiledrendering.cxx
index e39dc418f8a2..8263fcaf6249 100644
--- a/sw/qa/extras/tiledrendering/tiledrendering.cxx
+++ b/sw/qa/extras/tiledrendering/tiledrendering.cxx
@@ -424,6 +424,46 @@ CPPUNIT_TEST_FIXTURE(SwTiledRenderingTest, 
testGetTextSelectionLineLimit)
     CPPUNIT_ASSERT(sHtmlText.match(sExpectedHtml, nStart));
 }
 
+CPPUNIT_TEST_FIXTURE(SwTiledRenderingTest, testGetTextSelectionMultiLine)
+{
+    // Test will check if correct number of new line marks / paragraphs is 
generated
+    static OStringLiteral sOriginalText(u8"Heading\n\
+Let's have text; we need to be able to select the text inside the shape, but 
also the various individual ones too:\n\
+\n\
+\n\
+\n\
+\n\
+\n\
+And this is all for Writer shape objects\n\
+Heading on second page");
+
+    static OStringLiteral sExpectedHtml(u8"Heading</h2>\n\
+<p>Let's have text; we need to be able to select the text inside the shape, 
but also the various individual ones too:</p>\n\
+<p><br/><br/></p>\n\
+<p><br/><br/></p>\n\
+<p><br/><br/></p>\n\
+<p><br/><br/></p>\n\
+<p><br/><br/></p>\n\
+<h1 class=\"western\">And this is all for Writer shape objects</h1>\n\
+<h2 class=\"western\">Heading on second page</h2>");
+
+    SwXTextDocument* pXTextDocument = createDoc("multiline.odt");
+
+    SwWrtShell* pWrtShell = pXTextDocument->GetDocShell()->GetWrtShell();
+    // Create a selection.
+    pWrtShell->SelAll();
+
+    OString sPlainText = 
apitest::helper::transferable::getTextSelection(pXTextDocument->getSelection(), 
"text/plain;charset=utf-8");
+
+    CPPUNIT_ASSERT_EQUAL(OString(sOriginalText), sPlainText.trim());
+
+    OString sHtmlText = 
apitest::helper::transferable::getTextSelection(pXTextDocument->getSelection(), 
"text/html");
+
+    int nStart = sHtmlText.indexOf(u8"Heading");
+
+    CPPUNIT_ASSERT(sHtmlText.match(sExpectedHtml, nStart));
+}
+
 CPPUNIT_TEST_FIXTURE(SwTiledRenderingTest, testSetGraphicSelection)
 {
     SwXTextDocument* pXTextDocument = createDoc("shape.fodt");
diff --git a/sw/source/filter/html/htmlatr.cxx 
b/sw/source/filter/html/htmlatr.cxx
index 114d6de16b3a..e0722596d368 100644
--- a/sw/source/filter/html/htmlatr.cxx
+++ b/sw/source/filter/html/htmlatr.cxx
@@ -769,6 +769,7 @@ static void OutHTML_SwFormat( Writer& rWrt, const SwFormat& 
rFormat,
     if( rInfo.bInNumberBulletList && bNumberedForListItem )
     {
         HtmlWriter html(rWrt.Strm(), rHWrt.maNamespace);
+        html.prettyPrint(rHWrt.m_bPrettyPrint);
         html.start(OOO_STRING_SVTOOLS_HTML_li);
         if (!bNumbered)
         {
@@ -1010,7 +1011,7 @@ static void OutHTML_SwFormatOff( Writer& rWrt, const 
SwHTMLTextCollOutputInfo& r
 
     if( rInfo.ShouldOutputToken() )
     {
-        if (rHWrt.IsLFPossible())
+        if (rHWrt.m_bPrettyPrint && rHWrt.IsLFPossible())
             rHWrt.OutNewLine( true );
 
         // if necessary, for BLOCKQUOTE, ADDRESS and DD another paragraph token
@@ -2061,6 +2062,7 @@ Writer& OutHTML_SwTextNode( Writer& rWrt, const 
SwContentNode& rNode )
         rHTMLWrt.SetLFPossible(true);
 
         HtmlWriter aHtml(rWrt.Strm(), rHTMLWrt.maNamespace);
+        aHtml.prettyPrint(rHTMLWrt.m_bPrettyPrint);
         aHtml.start(OOO_STRING_SVTOOLS_HTML_horzrule);
 
         const SfxItemSet* pItemSet = pNd->GetpSwAttrSet();
@@ -2253,8 +2255,8 @@ Writer& OutHTML_SwTextNode( Writer& rWrt, const 
SwContentNode& rNode )
     // now it's a good opportunity again for an LF - if it is still allowed
     // FIXME: for LOK case we set rHTMLWrt.m_nWishLineLen as -1, for now keep 
old flow
     // when LOK side will be fixed - don't insert new line at the beginning
-    if( rHTMLWrt.IsLFPossible() &&
-        rHTMLWrt.GetLineLen() >= (rHTMLWrt.m_nWishLineLen >= 0 ? 
rHTMLWrt.m_nWishLineLen : 70 ) )
+    if( rHTMLWrt.IsLFPossible() && rHTMLWrt.m_bPrettyPrint && 
rHTMLWrt.m_nWishLineLen >= 0 &&
+        rHTMLWrt.GetLineLen() >= rHTMLWrt.m_nWishLineLen )
     {
         rHTMLWrt.OutNewLine();
     }
@@ -2492,7 +2494,7 @@ Writer& OutHTML_SwTextNode( Writer& rWrt, const 
SwContentNode& rNode )
                         nWordLen = nEnd;
                     nWordLen -= nStrPos;
 
-                    if( rHTMLWrt.m_nWishLineLen >= 0 &&
+                    if( rHTMLWrt.m_bPrettyPrint && rHTMLWrt.m_nWishLineLen >= 
0 &&
                         (nLineLen >= rHTMLWrt.m_nWishLineLen ||
                         (nLineLen+nWordLen) >= rHTMLWrt.m_nWishLineLen ) )
                     {
@@ -2508,6 +2510,7 @@ Writer& OutHTML_SwTextNode( Writer& rWrt, const 
SwContentNode& rNode )
                     {
                         HTMLOutFuncs::FlushToAscii( rWrt.Strm() );
                         HtmlWriter aHtml(rWrt.Strm(), rHTMLWrt.maNamespace);
+                        aHtml.prettyPrint(rHTMLWrt.m_bPrettyPrint);
                         aHtml.single(OOO_STRING_SVTOOLS_HTML_linebreak);
                     }
                     else if (c == CH_TXT_ATR_FORMELEMENT)
@@ -2586,6 +2589,7 @@ Writer& OutHTML_SwTextNode( Writer& rWrt, const 
SwContentNode& rNode )
         else
         {
             HtmlWriter aHtml(rHTMLWrt.Strm(), rHTMLWrt.maNamespace);
+            aHtml.prettyPrint(rHTMLWrt.m_bPrettyPrint);
             aHtml.single(OOO_STRING_SVTOOLS_HTML_linebreak);
             const SvxULSpaceItem& rULSpace = 
pNd->GetSwAttrSet().Get(RES_UL_SPACE);
             if (rULSpace.GetLower() > 0 && !bEndOfCell)
@@ -2612,6 +2616,7 @@ Writer& OutHTML_SwTextNode( Writer& rWrt, const 
SwContentNode& rNode )
         }
 
         HtmlWriter aHtml(rHTMLWrt.Strm(), rHTMLWrt.maNamespace);
+        aHtml.prettyPrint(rHTMLWrt.m_bPrettyPrint);
         aHtml.start(OOO_STRING_SVTOOLS_HTML_linebreak);
         aHtml.attribute(OOO_STRING_SVTOOLS_HTML_O_clear, pString);
         aHtml.end();
diff --git a/sw/source/filter/html/wrthtml.cxx 
b/sw/source/filter/html/wrthtml.cxx
index 48c6f8718665..1826cee5bd14 100644
--- a/sw/source/filter/html/wrthtml.cxx
+++ b/sw/source/filter/html/wrthtml.cxx
@@ -153,6 +153,7 @@ SwHTMLWriter::SwHTMLWriter( const OUString& rBaseURL, 
std::u16string_view rFilte
     , mbEmbedImages(false)
     , m_bCfgPrintLayout( false )
     , m_bParaDotLeaders( false )
+    , m_bPrettyPrint( true )
 {
     SetBaseURL(rBaseURL);
 
@@ -227,6 +228,12 @@ void SwHTMLWriter::SetupFilterOptions(std::u16string_view 
rFilterOptions)
         aStoreMap["NoLineLimit"] <<= true;
     }
 
+    // this option can be "on" together with any of above
+    if (rFilterOptions.find(u"NoPrettyPrint") != std::u16string_view::npos)
+    {
+        aStoreMap["NoPrettyPrint"] <<= true;
+    }
+
     const uno::Sequence<OUString> aOptionSeq
         = comphelper::string::convertCommaSeparated(rFilterOptions);
     static const OUStringLiteral aXhtmlNsKey(u"xhtmlns=");
@@ -291,6 +298,14 @@ void SwHTMLWriter::SetupFilterFromPropertyValues(
         mbSkipHeaderFooter = bVal;
     }
 
+    // this option can be "on" together with any of above
+    it = aStoreMap.find("NoPrettyPrint");
+    if (it != aStoreMap.end())
+    {
+        m_nWishLineLen = -1;
+        m_bPrettyPrint = false;
+    }
+
     it = aStoreMap.find("EmbedImages");
     if (it != aStoreMap.end())
     {
diff --git a/sw/source/filter/html/wrthtml.hxx 
b/sw/source/filter/html/wrthtml.hxx
index 721c901d379b..b6cb0b6bd830 100644
--- a/sw/source/filter/html/wrthtml.hxx
+++ b/sw/source/filter/html/wrthtml.hxx
@@ -419,7 +419,8 @@ public:
 #define sCSS2_P_CLASS_leaders "leaders"
     bool m_bCfgPrintLayout : 1;       // PrintLayout option for TOC dot leaders
     bool m_bParaDotLeaders : 1;       // for TOC dot leaders
-    // 25
+    bool m_bPrettyPrint : 1;          // Allows to add new lines to make it 
more readable
+    // 26
 
     /// Tracks which text portion attributes are currently open: a which id -> 
open count map.
     std::map<sal_uInt16, int> maStartedAttributes;
diff --git a/sw/source/uibase/dochdl/swdtflvr.cxx 
b/sw/source/uibase/dochdl/swdtflvr.cxx
index d9a6db5c1bb5..c8787952d524 100644
--- a/sw/source/uibase/dochdl/swdtflvr.cxx
+++ b/sw/source/uibase/dochdl/swdtflvr.cxx
@@ -782,7 +782,7 @@ bool SwTransferable::WriteObject( 
tools::SvRef<SotTempStream>& xStream,
     case SWTRANSFER_OBJECTTYPE_HTML:
     {
         // LOK is interested in getting images embedded for copy/paste support.
-        GetHTMLWriter( comphelper::LibreOfficeKit::isActive() ? 
OUString("EmbedImages;NoLineLimit") : OUString(), OUString(), xWrt );
+        GetHTMLWriter( comphelper::LibreOfficeKit::isActive() ? 
OUString("EmbedImages;NoPrettyPrint") : OUString(), OUString(), xWrt );
         break;
     }
 

Reply via email to