include/svtools/parhtml.hxx | 1 sw/qa/extras/htmlexport/data/test_no_space_preserve.fodt | 25 ++ sw/qa/extras/htmlexport/htmlexport.cxx | 147 +++++++++++++++ sw/qa/extras/tiledrendering/data/multiline.odt |binary sw/qa/extras/tiledrendering/tiledrendering.cxx | 40 ++++ sw/source/filter/html/htmlatr.cxx | 83 ++++++-- sw/source/filter/html/swhtml.cxx | 2 sw/source/filter/html/wrthtml.cxx | 14 + sw/source/filter/html/wrthtml.hxx | 4 sw/source/uibase/dochdl/swdtflvr.cxx | 2 10 files changed, 298 insertions(+), 20 deletions(-)
New commits: commit ea2c1bf8b48070b3869ba35fdf9c84aec57cf678 Author: Mike Kaganski <mike.kagan...@collabora.com> AuthorDate: Tue Nov 14 11:05:05 2023 +0300 Commit: Mike Kaganski <mike.kagan...@collabora.com> CommitDate: Thu Nov 23 22:57:50 2023 +0300 Limit PreserveSpaces HTML/ReqIF export option to proper space characters We only need to care about SPACE, TAB, LF, and CR. We don't need to care about LF in some cases, because it gets converted to a <br/> element, and handled specially. And we don't need to care about other "whitespace" characters, like control characters and Unicode space. Our own object placeholder characters were treated as "space", which was wrong. One peculiarity needs to be noted. In ReqIF case, a SPACE before LF needs no 'xml:space="preserve"', but an LF before SPACE needs one, because it needs to disambiguate between a <br> followed by a significant space vs. a <br> followed by a pretty-printed newline. Change-Id: I74273592df05bb94d8e4ecaea2c069c0e086b7d8 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/159853 Tested-by: Jenkins Reviewed-by: Mike Kaganski <mike.kagan...@collabora.com> diff --git a/include/svtools/parhtml.hxx b/include/svtools/parhtml.hxx index a6a15b93e3c1..1a17e98b2997 100644 --- a/include/svtools/parhtml.hxx +++ b/include/svtools/parhtml.hxx @@ -189,6 +189,7 @@ protected: void SetNamespace(std::u16string_view rNamespace); + bool GetPreserveSpaces() const { return m_bPreserveSpaces; } void SetPreserveSpaces(bool val) { m_bPreserveSpaces = val; } public: diff --git a/sw/qa/extras/htmlexport/data/test_no_space_preserve.fodt b/sw/qa/extras/htmlexport/data/test_no_space_preserve.fodt new file mode 100644 index 000000000000..e0d9a3506e9f --- /dev/null +++ b/sw/qa/extras/htmlexport/data/test_no_space_preserve.fodt @@ -0,0 +1,25 @@ +<?xml version="1.0" encoding="UTF-8"?> + +<office:document xmlns:office="urn:oasis:names:tc:opendocument:xmlns:office:1.0" xmlns:text="urn:oasis:names:tc:opendocument:xmlns:text:1.0" xmlns:draw="urn:oasis:names:tc:opendocument:xmlns:drawing:1.0" xmlns:svg="urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0" office:version="1.3" office:mimetype="application/vnd.oasis.opendocument.text"> + <office:body> + <office:text> + <text:p>No special spaces</text:p> + <text:p><text:s/>Leading space</text:p> + <text:p>Trailing space </text:p> + <text:p>Double <text:s/>space</text:p> + <text:p><text:line-break/>Leading/trailing breaks<text:line-break/></text:p> + <text:p><text:line-break/> Leading break + space</text:p> + <text:p>Trailing space + break <text:line-break/></text:p> + <text:p>Middle<text:line-break/>break</text:p> + <text:p>Middle space <text:line-break/>+ break</text:p> + <text:p>Middle break<text:line-break/> + space</text:p> + <text:p>Trailing space and SVG <draw:frame text:anchor-type="as-char" svg:width="5.59mm" svg:height="5.59mm" draw:z-index="0"><draw:image draw:mime-type="image/svg+xml"> + <office:binary-data>PD94bWwgdmVyc2lvbj0iMS4wIiBlbmNvZGluZz0iVVRGLTgiPz4KPHN2ZyB4bWxucz0iaHR0 + cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHZlcnNpb249IjEuMSIgdmlld0JveD0iMCAwIDIw + IDIwIiBzdHJva2U9ImJsYWNrIj4KPHBhdGggZD0iTTEsMUwxOSwxOSIvPgo8L3N2Zz4= + </office:binary-data> + </draw:image> + </draw:frame></text:p> + </office:text> + </office:body> +</office:document> \ No newline at end of file diff --git a/sw/qa/extras/htmlexport/htmlexport.cxx b/sw/qa/extras/htmlexport/htmlexport.cxx index 6b8de215d27c..d72e2b1daf9e 100644 --- a/sw/qa/extras/htmlexport/htmlexport.cxx +++ b/sw/qa/extras/htmlexport/htmlexport.cxx @@ -2816,6 +2816,153 @@ CPPUNIT_TEST_FIXTURE(SwHtmlDomExportTest, testReqIF_PreserveSpaces) CPPUNIT_ASSERT_EQUAL(paraText, getParagraph(1)->getString()); } +CPPUNIT_TEST_FIXTURE(SwHtmlDomExportTest, testHTML_NoPreserveSpaces) +{ + // Test cases where "PreserveSpaces" should not introduce respective markup + + const auto assertXPath_NoWhiteSpaceInStyle + = [this](const xmlDocUniquePtr& pDoc, const OString& rXPath) { + xmlXPathObjectPtr pXmlObj = getXPathNode(pDoc, rXPath); + xmlNodeSetPtr pXmlNodes = pXmlObj->nodesetval; + CPPUNIT_ASSERT_EQUAL_MESSAGE(rXPath.getStr(), 1, xmlXPathNodeSetGetLength(pXmlNodes)); + xmlNodePtr pXmlNode = pXmlNodes->nodeTab[0]; + if (xmlChar* prop = xmlGetProp(pXmlNode, BAD_CAST("style"))) + { + OUString style = OUString::fromUtf8(reinterpret_cast<const char*>(prop)); + CPPUNIT_ASSERT_MESSAGE(rXPath.getStr(), style.indexOf("white-space:") < 0); + } + xmlXPathFreeObject(pXmlObj); + }; + const auto assertXPath_HasWhiteSpaceInStyle + = [this](const xmlDocUniquePtr& pDoc, const OString& rXPath) { + const OUString style = getXPath(pDoc, rXPath, "style"); + CPPUNIT_ASSERT_MESSAGE(rXPath.getStr(), style.indexOf("white-space: pre-wrap") >= 0); + }; + + createSwDoc("test_no_space_preserve.fodt"); + + // Export to plain HTML, using PreserveSpaces: + uno::Reference<css::frame::XStorable> xStorable(mxComponent, uno::UNO_QUERY_THROW); + css::uno::Sequence<css::beans::PropertyValue> aStoreProperties = { + comphelper::makePropertyValue("FilterName", OUString("HTML (StarWriter)")), + comphelper::makePropertyValue("PreserveSpaces", true), + }; + xStorable->storeToURL(maTempFile.GetURL(), aStoreProperties); + + htmlDocUniquePtr pHtmlDoc = parseHtml(maTempFile); + CPPUNIT_ASSERT(pHtmlDoc); + + // No whitespace preservation, where no leading / trailing / double whitespace + assertXPath_NoWhiteSpaceInStyle(pHtmlDoc, "/html/body/p[1]"); + // Whitespace preserved for a leading space + assertXPath_HasWhiteSpaceInStyle(pHtmlDoc, "/html/body/p[2]"); + // Whitespace preserved for a trailing space + assertXPath_HasWhiteSpaceInStyle(pHtmlDoc, "/html/body/p[3]"); + // Whitespace preserved for a double space + assertXPath_HasWhiteSpaceInStyle(pHtmlDoc, "/html/body/p[4]"); + // No whitespace preservation for leading / trailing breaks + assertXPath_NoWhiteSpaceInStyle(pHtmlDoc, "/html/body/p[5]"); + // Whitespace preserved for a leading break + space + assertXPath_HasWhiteSpaceInStyle(pHtmlDoc, "/html/body/p[6]"); + // Whitespace preserved for a trailing space + break + assertXPath_HasWhiteSpaceInStyle(pHtmlDoc, "/html/body/p[7]"); + // No whitespace preservation for a middle break + assertXPath_NoWhiteSpaceInStyle(pHtmlDoc, "/html/body/p[8]"); + // Whitespace preserved for a middle space + break + assertXPath_HasWhiteSpaceInStyle(pHtmlDoc, "/html/body/p[9]"); + // Whitespace preserved for a middle break + space + assertXPath_HasWhiteSpaceInStyle(pHtmlDoc, "/html/body/p[10]"); + // No whitespace preservation for a trailing space and SVG + assertXPath_NoWhiteSpaceInStyle(pHtmlDoc, "/html/body/p[11]"); + + // Test import + + setImportFilterName("HTML (StarWriter)"); + UnoApiTest::load(maTempFile.GetURL()); + + CPPUNIT_ASSERT_EQUAL(OUString("No special spaces"), getParagraph(1)->getString()); + CPPUNIT_ASSERT_EQUAL(OUString(" Leading space"), getParagraph(2)->getString()); + CPPUNIT_ASSERT_EQUAL(OUString("Trailing space "), getParagraph(3)->getString()); + CPPUNIT_ASSERT_EQUAL(OUString("Double space"), getParagraph(4)->getString()); + // Trailing break is removed in SwHTMLParser::AppendTextNode, and replaced with para spacing + CPPUNIT_ASSERT_EQUAL(OUString("\nLeading/trailing breaks"), getParagraph(5)->getString()); + CPPUNIT_ASSERT_EQUAL(OUString("\n Leading break + space"), getParagraph(6)->getString()); + // Trailing break is removed in SwHTMLParser::AppendTextNode, and replaced with para spacing + CPPUNIT_ASSERT_EQUAL(OUString("Trailing space + break "), getParagraph(7)->getString()); + CPPUNIT_ASSERT_EQUAL(OUString("Middle\nbreak"), getParagraph(8)->getString()); + CPPUNIT_ASSERT_EQUAL(OUString("Middle space \n+ break"), getParagraph(9)->getString()); + CPPUNIT_ASSERT_EQUAL(OUString("Middle break\n + space"), getParagraph(10)->getString()); + // The SVG is replaced by a space in SwXParagraph::getString() + CPPUNIT_ASSERT_EQUAL(OUString("Trailing space and SVG "), getParagraph(11)->getString()); +} + +CPPUNIT_TEST_FIXTURE(SwHtmlDomExportTest, testReqIF_NoPreserveSpaces) +{ + // Test cases where "PreserveSpaces" should not introduce respective markup + + createSwDoc("test_no_space_preserve.fodt"); + + // Export to ReqIF, using PreserveSpaces: + uno::Reference<css::frame::XStorable> xStorable(mxComponent, uno::UNO_QUERY_THROW); + css::uno::Sequence<css::beans::PropertyValue> aStoreProperties = { + comphelper::makePropertyValue("FilterName", OUString("HTML (StarWriter)")), + comphelper::makePropertyValue("FilterOptions", OUString("xhtmlns=reqif-xhtml")), + comphelper::makePropertyValue("PreserveSpaces", true), + }; + xStorable->storeToURL(maTempFile.GetURL(), aStoreProperties); + + SvMemoryStream aStream; + WrapReqifFromTempFile(aStream); + xmlDocUniquePtr pXmlDoc = parseXmlStream(&aStream); + + // No whitespace preservation, where no leading / trailing / double whitespace + assertXPathNoAttribute(pXmlDoc, "/reqif-xhtml:html/reqif-xhtml:div/reqif-xhtml:p[1]", "space"); + // Whitespace preserved for a leading space + assertXPath(pXmlDoc, "/reqif-xhtml:html/reqif-xhtml:div/reqif-xhtml:p[2]", "space", + u"preserve"); + // Whitespace preserved for a trailing space + assertXPath(pXmlDoc, "/reqif-xhtml:html/reqif-xhtml:div/reqif-xhtml:p[3]", "space", + u"preserve"); + // Whitespace preserved for a double space + assertXPath(pXmlDoc, "/reqif-xhtml:html/reqif-xhtml:div/reqif-xhtml:p[4]", "space", + u"preserve"); + // No whitespace preservation for leading / trailing breaks + assertXPathNoAttribute(pXmlDoc, "/reqif-xhtml:html/reqif-xhtml:div/reqif-xhtml:p[5]", "space"); + // Whitespace preserved for a leading break + space + assertXPath(pXmlDoc, "/reqif-xhtml:html/reqif-xhtml:div/reqif-xhtml:p[6]", "space", + u"preserve"); + // No whitespace preservation for a trailing space + break + assertXPathNoAttribute(pXmlDoc, "/reqif-xhtml:html/reqif-xhtml:div/reqif-xhtml:p[7]", "space"); + // No whitespace preservation for a middle break + assertXPathNoAttribute(pXmlDoc, "/reqif-xhtml:html/reqif-xhtml:div/reqif-xhtml:p[8]", "space"); + // No whitespace preservation for a middle space + break + assertXPathNoAttribute(pXmlDoc, "/reqif-xhtml:html/reqif-xhtml:div/reqif-xhtml:p[9]", "space"); + // Whitespace preserved for a middle break + space + assertXPath(pXmlDoc, "/reqif-xhtml:html/reqif-xhtml:div/reqif-xhtml:p[10]", "space", + u"preserve"); + // No whitespace preservation for a trailing space and SVG + assertXPathNoAttribute(pXmlDoc, "/reqif-xhtml:html/reqif-xhtml:div/reqif-xhtml:p[11]", "space"); + + // Test import + + setImportFilterOptions("xhtmlns=reqif-xhtml"); + setImportFilterName("HTML (StarWriter)"); + UnoApiTest::load(maTempFile.GetURL()); + + CPPUNIT_ASSERT_EQUAL(OUString("No special spaces"), getParagraph(1)->getString()); + CPPUNIT_ASSERT_EQUAL(OUString(" Leading space"), getParagraph(2)->getString()); + CPPUNIT_ASSERT_EQUAL(OUString("Trailing space "), getParagraph(3)->getString()); + CPPUNIT_ASSERT_EQUAL(OUString("Double space"), getParagraph(4)->getString()); + CPPUNIT_ASSERT_EQUAL(OUString("\nLeading/trailing breaks\n"), getParagraph(5)->getString()); + CPPUNIT_ASSERT_EQUAL(OUString("\n Leading break + space"), getParagraph(6)->getString()); + CPPUNIT_ASSERT_EQUAL(OUString("Trailing space + break \n"), getParagraph(7)->getString()); + CPPUNIT_ASSERT_EQUAL(OUString("Middle\nbreak"), getParagraph(8)->getString()); + CPPUNIT_ASSERT_EQUAL(OUString("Middle space \n+ break"), getParagraph(9)->getString()); + CPPUNIT_ASSERT_EQUAL(OUString("Middle break\n + space"), getParagraph(10)->getString()); + // The SVG is replaced by a space in SwXParagraph::getString() + CPPUNIT_ASSERT_EQUAL(OUString("Trailing space and SVG "), getParagraph(11)->getString()); +} + CPPUNIT_TEST_FIXTURE(SwHtmlDomExportTest, testReqIF_ExportFormulasAsPDF) { // Given a document with a formula: diff --git a/sw/source/filter/html/htmlatr.cxx b/sw/source/filter/html/htmlatr.cxx index e0722596d368..a99140564301 100644 --- a/sw/source/filter/html/htmlatr.cxx +++ b/sw/source/filter/html/htmlatr.cxx @@ -769,7 +769,7 @@ static void OutHTML_SwFormat( Writer& rWrt, const SwFormat& rFormat, if( rInfo.bInNumberBulletList && bNumberedForListItem ) { HtmlWriter html(rWrt.Strm(), rHWrt.maNamespace); - html.prettyPrint(rHWrt.m_bPrettyPrint); + html.prettyPrint(rHWrt.IsPrettyPrint()); html.start(OOO_STRING_SVTOOLS_HTML_li); if (!bNumbered) { @@ -1011,7 +1011,7 @@ static void OutHTML_SwFormatOff( Writer& rWrt, const SwHTMLTextCollOutputInfo& r if( rInfo.ShouldOutputToken() ) { - if (rHWrt.m_bPrettyPrint && rHWrt.IsLFPossible()) + if (rHWrt.IsPrettyPrint() && rHWrt.IsLFPossible()) rHWrt.OutNewLine( true ); // if necessary, for BLOCKQUOTE, ADDRESS and DD another paragraph token @@ -2006,26 +2006,70 @@ void HTMLEndPosLst::OutEndAttrs( SwHTMLWriter& rHWrt, sal_Int32 nPos ) } } -static bool NeedPreserveWhitespace(std::u16string_view str) +static constexpr bool IsLF(sal_Unicode ch) { return ch == '\n'; } + +static constexpr bool IsWhitespaceExcludingLF(sal_Unicode ch) +{ + return ch == ' ' || ch == '\t' || ch == '\r'; +} + +static constexpr bool IsWhitespaceIncludingLF(sal_Unicode ch) +{ + return IsWhitespaceExcludingLF(ch) || IsLF(ch); +} + +static bool NeedPreserveWhitespace(std::u16string_view str, bool xml) { if (str.empty()) return false; // leading / trailing spaces - if (o3tl::internal::implIsWhitespace(str.front()) - || o3tl::internal::implIsWhitespace(str.back())) + // A leading / trailing \n would turn into a leading / trailing <br/>, + // and will not disappear, even without space preserving option + if (IsWhitespaceExcludingLF(str.front()) || IsWhitespaceExcludingLF(str.back())) return true; - bool bWasSpace = false; - for (auto ch : str) + for (size_t i = 0; i < str.size(); ++i) { - if (o3tl::internal::implIsWhitespace(ch)) + if (xml) { - if (bWasSpace) - return true; // Second whitespace in a row - else - bWasSpace = true; + // No need to consider \n, which convert to <br/>, when it's after a space + // (but handle it *before* a space) + if (IsWhitespaceIncludingLF(str[i])) + { + do + { + ++i; + if (i == str.size()) + return false; + } while (IsLF(str[i])); + if (IsWhitespaceExcludingLF(str[i])) + return true; // Second whitespace in a row + } + } + else // html + { + // Only consider \n, when an adjacent space is not \n - which would be eaten + // without a space preserving option + if (IsWhitespaceExcludingLF(str[i])) + { + ++i; + if (i == str.size()) + return false; + if (IsWhitespaceIncludingLF(str[i])) + return true; // Any whitespace after a non-LF whitespace + } + else if (IsLF(str[i])) + { + do + { + ++i; + if (i == str.size()) + return false; + } + while (IsLF(str[i])); + if (IsWhitespaceExcludingLF(str[i])) + return true; // A non-LF whitespace after a LF + } } - else - bWasSpace = false; } return false; } @@ -2062,7 +2106,7 @@ Writer& OutHTML_SwTextNode( Writer& rWrt, const SwContentNode& rNode ) rHTMLWrt.SetLFPossible(true); HtmlWriter aHtml(rWrt.Strm(), rHTMLWrt.maNamespace); - aHtml.prettyPrint(rHTMLWrt.m_bPrettyPrint); + aHtml.prettyPrint(rHTMLWrt.IsPrettyPrint()); aHtml.start(OOO_STRING_SVTOOLS_HTML_horzrule); const SfxItemSet* pItemSet = pNd->GetpSwAttrSet(); @@ -2240,7 +2284,7 @@ Writer& OutHTML_SwTextNode( Writer& rWrt, const SwContentNode& rNode ) bool bOldLFPossible = rHTMLWrt.IsLFPossible(); bool bOldSpacePreserve = rHTMLWrt.IsSpacePreserve(); if (rHTMLWrt.IsPreserveSpacesOnWritePrefSet()) - rHTMLWrt.SetSpacePreserve(NeedPreserveWhitespace(rStr)); + rHTMLWrt.SetSpacePreserve(NeedPreserveWhitespace(rStr, rHTMLWrt.mbReqIF)); OutHTML_SwFormat( rWrt, rFormat, pNd->GetpSwAttrSet(), aFormatInfo ); // If we didn't open a new line before the paragraph tag, we do that now @@ -2255,7 +2299,7 @@ Writer& OutHTML_SwTextNode( Writer& rWrt, const SwContentNode& rNode ) // now it's a good opportunity again for an LF - if it is still allowed // FIXME: for LOK case we set rHTMLWrt.m_nWishLineLen as -1, for now keep old flow // when LOK side will be fixed - don't insert new line at the beginning - if( rHTMLWrt.IsLFPossible() && rHTMLWrt.m_bPrettyPrint && rHTMLWrt.m_nWishLineLen >= 0 && + if( rHTMLWrt.IsLFPossible() && rHTMLWrt.IsPrettyPrint() && rHTMLWrt.m_nWishLineLen >= 0 && rHTMLWrt.GetLineLen() >= rHTMLWrt.m_nWishLineLen ) { rHTMLWrt.OutNewLine(); @@ -2494,7 +2538,7 @@ Writer& OutHTML_SwTextNode( Writer& rWrt, const SwContentNode& rNode ) nWordLen = nEnd; nWordLen -= nStrPos; - if( rHTMLWrt.m_bPrettyPrint && rHTMLWrt.m_nWishLineLen >= 0 && + if( rHTMLWrt.IsPrettyPrint() && rHTMLWrt.m_nWishLineLen >= 0 && (nLineLen >= rHTMLWrt.m_nWishLineLen || (nLineLen+nWordLen) >= rHTMLWrt.m_nWishLineLen ) ) { @@ -2510,7 +2554,7 @@ Writer& OutHTML_SwTextNode( Writer& rWrt, const SwContentNode& rNode ) { HTMLOutFuncs::FlushToAscii( rWrt.Strm() ); HtmlWriter aHtml(rWrt.Strm(), rHTMLWrt.maNamespace); - aHtml.prettyPrint(rHTMLWrt.m_bPrettyPrint); + aHtml.prettyPrint(rHTMLWrt.IsPrettyPrint()); aHtml.single(OOO_STRING_SVTOOLS_HTML_linebreak); } else if (c == CH_TXT_ATR_FORMELEMENT) @@ -2589,7 +2633,7 @@ Writer& OutHTML_SwTextNode( Writer& rWrt, const SwContentNode& rNode ) else { HtmlWriter aHtml(rHTMLWrt.Strm(), rHTMLWrt.maNamespace); - aHtml.prettyPrint(rHTMLWrt.m_bPrettyPrint); + aHtml.prettyPrint(rHTMLWrt.IsPrettyPrint()); aHtml.single(OOO_STRING_SVTOOLS_HTML_linebreak); const SvxULSpaceItem& rULSpace = pNd->GetSwAttrSet().Get(RES_UL_SPACE); if (rULSpace.GetLower() > 0 && !bEndOfCell) @@ -2616,7 +2660,7 @@ Writer& OutHTML_SwTextNode( Writer& rWrt, const SwContentNode& rNode ) } HtmlWriter aHtml(rHTMLWrt.Strm(), rHTMLWrt.maNamespace); - aHtml.prettyPrint(rHTMLWrt.m_bPrettyPrint); + aHtml.prettyPrint(rHTMLWrt.IsPrettyPrint()); aHtml.start(OOO_STRING_SVTOOLS_HTML_linebreak); aHtml.attribute(OOO_STRING_SVTOOLS_HTML_O_clear, pString); aHtml.end(); diff --git a/sw/source/filter/html/swhtml.cxx b/sw/source/filter/html/swhtml.cxx index 2d8f99c44d95..e96ab81d5af1 100644 --- a/sw/source/filter/html/swhtml.cxx +++ b/sw/source/filter/html/swhtml.cxx @@ -1524,7 +1524,7 @@ void SwHTMLParser::NextToken( HtmlTokenId nToken ) case HtmlTokenId::TEXTTOKEN: case HtmlTokenId::CDATA: // insert string without spanning attributes at the end. - if( !aToken.isEmpty() && ' '==aToken[0] && !IsReadPRE() ) + if (!aToken.isEmpty() && ' ' == aToken[0] && !IsReadPRE() && !GetPreserveSpaces()) { sal_Int32 nPos = m_pPam->GetPoint()->GetContentIndex(); const SwTextNode* pTextNode = nPos ? m_pPam->GetPoint()->GetNode().GetTextNode() : nullptr; diff --git a/sw/source/filter/html/wrthtml.cxx b/sw/source/filter/html/wrthtml.cxx index 1826cee5bd14..be83b5331266 100644 --- a/sw/source/filter/html/wrthtml.cxx +++ b/sw/source/filter/html/wrthtml.cxx @@ -153,7 +153,6 @@ SwHTMLWriter::SwHTMLWriter( const OUString& rBaseURL, std::u16string_view rFilte , mbEmbedImages(false) , m_bCfgPrintLayout( false ) , m_bParaDotLeaders( false ) - , m_bPrettyPrint( true ) { SetBaseURL(rBaseURL); diff --git a/sw/source/filter/html/wrthtml.hxx b/sw/source/filter/html/wrthtml.hxx index b6cb0b6bd830..423fd265063e 100644 --- a/sw/source/filter/html/wrthtml.hxx +++ b/sw/source/filter/html/wrthtml.hxx @@ -278,6 +278,7 @@ class SW_DLLPUBLIC SwHTMLWriter : public Writer FieldUnit m_eCSS1Unit; + bool m_bPrettyPrint = true; // Allows to add new lines to make it more readable bool m_bLFPossible = false; // a line break can be inserted bool m_bSpacePreserve = false; // Using xml::space="preserve", or "white-space: pre-wrap" style bool m_bPreserveSpacesOnWrite = false; // If export should use m_bSpacePreserve @@ -419,7 +420,6 @@ public: #define sCSS2_P_CLASS_leaders "leaders" bool m_bCfgPrintLayout : 1; // PrintLayout option for TOC dot leaders bool m_bParaDotLeaders : 1; // for TOC dot leaders - bool m_bPrettyPrint : 1; // Allows to add new lines to make it more readable // 26 /// Tracks which text portion attributes are currently open: a which id -> open count map. @@ -624,6 +624,7 @@ public: /// Determines the prefix string needed to respect the requested namespace alias. OString GetNamespace() const; + bool IsPrettyPrint() const { return !m_bSpacePreserve && m_bPrettyPrint; } bool IsLFPossible() const { return !m_bSpacePreserve && m_bLFPossible; } void SetLFPossible(bool val) { m_bLFPossible = val; } bool IsSpacePreserve() const { return m_bSpacePreserve; } commit 42e0d2e6c0ea9a3152a0a76587cded500ec93553 Author: Szymon Kłos <szymon.k...@collabora.com> AuthorDate: Fri Nov 19 12:38:18 2021 +0100 Commit: Mike Kaganski <mike.kagan...@collabora.com> CommitDate: Thu Nov 23 17:19:00 2023 +0300 lok: don't pretty print html for online followup for "lok: don't limit line length in HTMLWriter" Pretty printing makes html more readable for human but introduces lots of new line marks which are later interpreted in the client as a valid new lines in the document content. This was causing multiplying the line breaks when inserting hyperlinks where we read current selection's content and then we pass it back to the core as link's content (with added new lines). This change needs cypress change to not contain "\n" at the beginning of some content read from the document. This also afects copying from the online document to clipboard. Change-Id: I2b17d62398d947fcf1d3fb1ed6005c3063d114f2 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/136893 Tested-by: Jenkins CollaboraOffice <jenkinscollaboraoff...@gmail.com> Reviewed-by: Henry Castro <hcas...@collabora.com> Reviewed-on: https://gerrit.libreoffice.org/c/core/+/141479 Tested-by: Jenkins Reviewed-by: Szymon Kłos <szymon.k...@collabora.com> diff --git a/sw/qa/extras/tiledrendering/data/multiline.odt b/sw/qa/extras/tiledrendering/data/multiline.odt new file mode 100644 index 000000000000..4c60b58decb2 Binary files /dev/null and b/sw/qa/extras/tiledrendering/data/multiline.odt differ diff --git a/sw/qa/extras/tiledrendering/tiledrendering.cxx b/sw/qa/extras/tiledrendering/tiledrendering.cxx index e39dc418f8a2..8263fcaf6249 100644 --- a/sw/qa/extras/tiledrendering/tiledrendering.cxx +++ b/sw/qa/extras/tiledrendering/tiledrendering.cxx @@ -424,6 +424,46 @@ CPPUNIT_TEST_FIXTURE(SwTiledRenderingTest, testGetTextSelectionLineLimit) CPPUNIT_ASSERT(sHtmlText.match(sExpectedHtml, nStart)); } +CPPUNIT_TEST_FIXTURE(SwTiledRenderingTest, testGetTextSelectionMultiLine) +{ + // Test will check if correct number of new line marks / paragraphs is generated + static OStringLiteral sOriginalText(u8"Heading\n\ +Let's have text; we need to be able to select the text inside the shape, but also the various individual ones too:\n\ +\n\ +\n\ +\n\ +\n\ +\n\ +And this is all for Writer shape objects\n\ +Heading on second page"); + + static OStringLiteral sExpectedHtml(u8"Heading</h2>\n\ +<p>Let's have text; we need to be able to select the text inside the shape, but also the various individual ones too:</p>\n\ +<p><br/><br/></p>\n\ +<p><br/><br/></p>\n\ +<p><br/><br/></p>\n\ +<p><br/><br/></p>\n\ +<p><br/><br/></p>\n\ +<h1 class=\"western\">And this is all for Writer shape objects</h1>\n\ +<h2 class=\"western\">Heading on second page</h2>"); + + SwXTextDocument* pXTextDocument = createDoc("multiline.odt"); + + SwWrtShell* pWrtShell = pXTextDocument->GetDocShell()->GetWrtShell(); + // Create a selection. + pWrtShell->SelAll(); + + OString sPlainText = apitest::helper::transferable::getTextSelection(pXTextDocument->getSelection(), "text/plain;charset=utf-8"); + + CPPUNIT_ASSERT_EQUAL(OString(sOriginalText), sPlainText.trim()); + + OString sHtmlText = apitest::helper::transferable::getTextSelection(pXTextDocument->getSelection(), "text/html"); + + int nStart = sHtmlText.indexOf(u8"Heading"); + + CPPUNIT_ASSERT(sHtmlText.match(sExpectedHtml, nStart)); +} + CPPUNIT_TEST_FIXTURE(SwTiledRenderingTest, testSetGraphicSelection) { SwXTextDocument* pXTextDocument = createDoc("shape.fodt"); diff --git a/sw/source/filter/html/htmlatr.cxx b/sw/source/filter/html/htmlatr.cxx index 114d6de16b3a..e0722596d368 100644 --- a/sw/source/filter/html/htmlatr.cxx +++ b/sw/source/filter/html/htmlatr.cxx @@ -769,6 +769,7 @@ static void OutHTML_SwFormat( Writer& rWrt, const SwFormat& rFormat, if( rInfo.bInNumberBulletList && bNumberedForListItem ) { HtmlWriter html(rWrt.Strm(), rHWrt.maNamespace); + html.prettyPrint(rHWrt.m_bPrettyPrint); html.start(OOO_STRING_SVTOOLS_HTML_li); if (!bNumbered) { @@ -1010,7 +1011,7 @@ static void OutHTML_SwFormatOff( Writer& rWrt, const SwHTMLTextCollOutputInfo& r if( rInfo.ShouldOutputToken() ) { - if (rHWrt.IsLFPossible()) + if (rHWrt.m_bPrettyPrint && rHWrt.IsLFPossible()) rHWrt.OutNewLine( true ); // if necessary, for BLOCKQUOTE, ADDRESS and DD another paragraph token @@ -2061,6 +2062,7 @@ Writer& OutHTML_SwTextNode( Writer& rWrt, const SwContentNode& rNode ) rHTMLWrt.SetLFPossible(true); HtmlWriter aHtml(rWrt.Strm(), rHTMLWrt.maNamespace); + aHtml.prettyPrint(rHTMLWrt.m_bPrettyPrint); aHtml.start(OOO_STRING_SVTOOLS_HTML_horzrule); const SfxItemSet* pItemSet = pNd->GetpSwAttrSet(); @@ -2253,8 +2255,8 @@ Writer& OutHTML_SwTextNode( Writer& rWrt, const SwContentNode& rNode ) // now it's a good opportunity again for an LF - if it is still allowed // FIXME: for LOK case we set rHTMLWrt.m_nWishLineLen as -1, for now keep old flow // when LOK side will be fixed - don't insert new line at the beginning - if( rHTMLWrt.IsLFPossible() && - rHTMLWrt.GetLineLen() >= (rHTMLWrt.m_nWishLineLen >= 0 ? rHTMLWrt.m_nWishLineLen : 70 ) ) + if( rHTMLWrt.IsLFPossible() && rHTMLWrt.m_bPrettyPrint && rHTMLWrt.m_nWishLineLen >= 0 && + rHTMLWrt.GetLineLen() >= rHTMLWrt.m_nWishLineLen ) { rHTMLWrt.OutNewLine(); } @@ -2492,7 +2494,7 @@ Writer& OutHTML_SwTextNode( Writer& rWrt, const SwContentNode& rNode ) nWordLen = nEnd; nWordLen -= nStrPos; - if( rHTMLWrt.m_nWishLineLen >= 0 && + if( rHTMLWrt.m_bPrettyPrint && rHTMLWrt.m_nWishLineLen >= 0 && (nLineLen >= rHTMLWrt.m_nWishLineLen || (nLineLen+nWordLen) >= rHTMLWrt.m_nWishLineLen ) ) { @@ -2508,6 +2510,7 @@ Writer& OutHTML_SwTextNode( Writer& rWrt, const SwContentNode& rNode ) { HTMLOutFuncs::FlushToAscii( rWrt.Strm() ); HtmlWriter aHtml(rWrt.Strm(), rHTMLWrt.maNamespace); + aHtml.prettyPrint(rHTMLWrt.m_bPrettyPrint); aHtml.single(OOO_STRING_SVTOOLS_HTML_linebreak); } else if (c == CH_TXT_ATR_FORMELEMENT) @@ -2586,6 +2589,7 @@ Writer& OutHTML_SwTextNode( Writer& rWrt, const SwContentNode& rNode ) else { HtmlWriter aHtml(rHTMLWrt.Strm(), rHTMLWrt.maNamespace); + aHtml.prettyPrint(rHTMLWrt.m_bPrettyPrint); aHtml.single(OOO_STRING_SVTOOLS_HTML_linebreak); const SvxULSpaceItem& rULSpace = pNd->GetSwAttrSet().Get(RES_UL_SPACE); if (rULSpace.GetLower() > 0 && !bEndOfCell) @@ -2612,6 +2616,7 @@ Writer& OutHTML_SwTextNode( Writer& rWrt, const SwContentNode& rNode ) } HtmlWriter aHtml(rHTMLWrt.Strm(), rHTMLWrt.maNamespace); + aHtml.prettyPrint(rHTMLWrt.m_bPrettyPrint); aHtml.start(OOO_STRING_SVTOOLS_HTML_linebreak); aHtml.attribute(OOO_STRING_SVTOOLS_HTML_O_clear, pString); aHtml.end(); diff --git a/sw/source/filter/html/wrthtml.cxx b/sw/source/filter/html/wrthtml.cxx index 48c6f8718665..1826cee5bd14 100644 --- a/sw/source/filter/html/wrthtml.cxx +++ b/sw/source/filter/html/wrthtml.cxx @@ -153,6 +153,7 @@ SwHTMLWriter::SwHTMLWriter( const OUString& rBaseURL, std::u16string_view rFilte , mbEmbedImages(false) , m_bCfgPrintLayout( false ) , m_bParaDotLeaders( false ) + , m_bPrettyPrint( true ) { SetBaseURL(rBaseURL); @@ -227,6 +228,12 @@ void SwHTMLWriter::SetupFilterOptions(std::u16string_view rFilterOptions) aStoreMap["NoLineLimit"] <<= true; } + // this option can be "on" together with any of above + if (rFilterOptions.find(u"NoPrettyPrint") != std::u16string_view::npos) + { + aStoreMap["NoPrettyPrint"] <<= true; + } + const uno::Sequence<OUString> aOptionSeq = comphelper::string::convertCommaSeparated(rFilterOptions); static const OUStringLiteral aXhtmlNsKey(u"xhtmlns="); @@ -291,6 +298,14 @@ void SwHTMLWriter::SetupFilterFromPropertyValues( mbSkipHeaderFooter = bVal; } + // this option can be "on" together with any of above + it = aStoreMap.find("NoPrettyPrint"); + if (it != aStoreMap.end()) + { + m_nWishLineLen = -1; + m_bPrettyPrint = false; + } + it = aStoreMap.find("EmbedImages"); if (it != aStoreMap.end()) { diff --git a/sw/source/filter/html/wrthtml.hxx b/sw/source/filter/html/wrthtml.hxx index 721c901d379b..b6cb0b6bd830 100644 --- a/sw/source/filter/html/wrthtml.hxx +++ b/sw/source/filter/html/wrthtml.hxx @@ -419,7 +419,8 @@ public: #define sCSS2_P_CLASS_leaders "leaders" bool m_bCfgPrintLayout : 1; // PrintLayout option for TOC dot leaders bool m_bParaDotLeaders : 1; // for TOC dot leaders - // 25 + bool m_bPrettyPrint : 1; // Allows to add new lines to make it more readable + // 26 /// Tracks which text portion attributes are currently open: a which id -> open count map. std::map<sal_uInt16, int> maStartedAttributes; diff --git a/sw/source/uibase/dochdl/swdtflvr.cxx b/sw/source/uibase/dochdl/swdtflvr.cxx index d9a6db5c1bb5..c8787952d524 100644 --- a/sw/source/uibase/dochdl/swdtflvr.cxx +++ b/sw/source/uibase/dochdl/swdtflvr.cxx @@ -782,7 +782,7 @@ bool SwTransferable::WriteObject( tools::SvRef<SotTempStream>& xStream, case SWTRANSFER_OBJECTTYPE_HTML: { // LOK is interested in getting images embedded for copy/paste support. - GetHTMLWriter( comphelper::LibreOfficeKit::isActive() ? OUString("EmbedImages;NoLineLimit") : OUString(), OUString(), xWrt ); + GetHTMLWriter( comphelper::LibreOfficeKit::isActive() ? OUString("EmbedImages;NoPrettyPrint") : OUString(), OUString(), xWrt ); break; }