sw/qa/filter/md/md.cxx | 60 ++++++++++++++++++++++++++++++++++++++++++ sw/source/filter/md/wrtmd.cxx | 15 ++++++++-- sw/source/filter/md/wrtmd.hxx | 1 3 files changed, 73 insertions(+), 3 deletions(-)
New commits: commit 51cce2385a7a7cd6980e13e5a2ed889963b1d72c Author: Miklos Vajna <vmik...@collabora.com> AuthorDate: Mon Sep 22 08:35:16 2025 +0200 Commit: Caolán McNamara <caolan.mcnam...@collabora.com> CommitDate: Mon Sep 22 13:48:56 2025 +0200 tdf#167564 sw markdown export: handle multi-para table cells Save the bugdoc as markdown, <https://github.github.com/gfm/#tables-extension-> says only "inlines" are allowed in a table cell, but multiple paragraphs would be block elements, which is invalid. An additional problem is that in case the table cell has 3 paragraphs, then the middle one won't be even detected as an in-table one, since we generate "cell info" instances for the start and end of the cell. Fix the problem by tracking cell ends, so we can emit " " between in-table paragraphs (but not at the end), and also by checking if we're in a table (and not if we're at cell start/end) to decide when to strip away unwanted newlines. With this, the paragraph breaks are lost during export, but you get valid output and reasonable fallback for paragraphs in table cells. Change-Id: I58c5226610ce89c6ac1f066ad87765ed8106be29 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/191308 Tested-by: Jenkins CollaboraOffice <jenkinscollaboraoff...@gmail.com> Reviewed-by: Caolán McNamara <caolan.mcnam...@collabora.com> diff --git a/sw/qa/filter/md/md.cxx b/sw/qa/filter/md/md.cxx index 6096c0829b57..e18ed1a50f17 100644 --- a/sw/qa/filter/md/md.cxx +++ b/sw/qa/filter/md/md.cxx @@ -619,6 +619,66 @@ CPPUNIT_TEST_FIXTURE(Test, testImageDescTitleExport) CPPUNIT_ASSERT_EQUAL(aExpected, aActual); } +CPPUNIT_TEST_FIXTURE(Test, testMultiParaTableMdExport) +{ + // Given a document that has a table, 3 paragraphs in the A1 cell: + createSwDoc(); + SwDocShell* pDocShell = getSwDocShell(); + SwWrtShell* pWrtShell = pDocShell->GetWrtShell(); + pWrtShell->Insert(u"A"_ustr); + SwInsertTableOptions aInsertTableOptions(SwInsertTableFlags::DefaultBorder, + /*nRowsToRepeat=*/0); + pWrtShell->InsertTable(aInsertTableOptions, /*nRows=*/3, /*nCols=*/3); + pWrtShell->Insert(u"Z"_ustr); + pWrtShell->SttPara(); + pWrtShell->MoveTable(GotoPrevTable, fnTableStart); + pWrtShell->Insert(u"A1 first"_ustr); + pWrtShell->SplitNode(); + pWrtShell->Insert(u"A1 second"_ustr); + pWrtShell->SplitNode(); + pWrtShell->Insert(u"A1 third"_ustr); + pWrtShell->GoNextCell(); + pWrtShell->Insert(u"B1"_ustr); + pWrtShell->GoNextCell(); + pWrtShell->Insert(u"C1"_ustr); + pWrtShell->GoNextCell(); + pWrtShell->Insert(u"A2"_ustr); + pWrtShell->GoNextCell(); + pWrtShell->Insert(u"B2"_ustr); + pWrtShell->GoNextCell(); + pWrtShell->Insert(u"C2"_ustr); + pWrtShell->GoNextCell(); + pWrtShell->Insert(u"A3"_ustr); + pWrtShell->GoNextCell(); + pWrtShell->Insert(u"B3"_ustr); + pWrtShell->GoNextCell(); + pWrtShell->Insert(u"C3"_ustr); + + // When saving that to markdown: + save(mpFilter); + + // Then make sure the A1 cell still only has inlines: + std::string aActual = TempFileToString(); + std::string aExpected( + // clang-format off + "A" SAL_NEWLINE_STRING + SAL_NEWLINE_STRING + "| A1 first A1 second A1 third | B1 | C1 |" SAL_NEWLINE_STRING + "| - | - | - |" SAL_NEWLINE_STRING + "| A2 | B2 | C2 |" SAL_NEWLINE_STRING + "| A3 | B3 | C3 |" SAL_NEWLINE_STRING + SAL_NEWLINE_STRING + "Z" SAL_NEWLINE_STRING + // clang-format on + ); + // Without the accompanying fix in place, this test would have failed with: + // - Expected: A1 first A1 second A1 third + // - Actual : A1 first A1 second A1 third + // i.e. multiple paragraphs were not merged into a single paragraph to form just a list of + // inline blocks, as required by the spec. + CPPUNIT_ASSERT_EQUAL(aExpected, aActual); +} + CPPUNIT_PLUGIN_IMPLEMENT(); /* vim:set shiftwidth=4 softtabstop=4 expandtab cinoptions=b1,g0,N-s cinkeys+=0=break: */ diff --git a/sw/source/filter/md/wrtmd.cxx b/sw/source/filter/md/wrtmd.cxx index 50bf3810d917..67f02bf926d3 100644 --- a/sw/source/filter/md/wrtmd.cxx +++ b/sw/source/filter/md/wrtmd.cxx @@ -472,8 +472,10 @@ void OutMarkdown_SwTextNode(SwMDWriter& rWrt, const SwTextNode& rNode, bool bFir { std::optional<SwMDCellInfo> oCellInfo; std::stack<SwMDTableInfo>& rTableInfos = rWrt.GetTableInfos(); + bool bInTable = false; if (!rTableInfos.empty()) { + bInTable = true; SwMDTableInfo& aTableInfo = rTableInfos.top(); auto it = aTableInfo.aCellInfos.find(rNode.GetIndex()); if (it != aTableInfo.aCellInfos.end()) @@ -499,7 +501,7 @@ void OutMarkdown_SwTextNode(SwMDWriter& rWrt, const SwTextNode& rNode, bool bFir if (!rNodeText.isEmpty()) { // Paragraphs separate by empty lines - if (!bFirst && !oCellInfo) + if (!bFirst && !bInTable) rWrt.Strm().WriteUnicodeOrByteText(u"" SAL_NEWLINE_STRING); const SwFormatColl* pFormatColl = rNode.GetFormatColl(); @@ -724,7 +726,13 @@ void OutMarkdown_SwTextNode(SwMDWriter& rWrt, const SwTextNode& rNode, bool bFir } } - if (!oCellInfo || bRowEnd) + bool bCellEnd = oCellInfo && oCellInfo->bCellEnd; + if (bInTable && !bCellEnd) + { + // Separator is a space between two in-table-cell paragraphs. + rWrt.Strm().WriteUnicodeOrByteText(u" "); + } + else if (!bInTable || bRowEnd) { rWrt.Strm().WriteUnicodeOrByteText(u"" SAL_NEWLINE_STRING); } @@ -767,9 +775,10 @@ void OutMarkdown_SwTableNode(SwMDWriter& rWrt, const SwTableNode& rTableNode) { rStartInfo.bRowStart = true; } + SwMDCellInfo& rEndInfo = aTableInfo.aCellInfos[pEnd->GetIndex() - 1]; + rEndInfo.bCellEnd = true; if (nBox == pLine->GetTabBoxes().size() - 1) { - SwMDCellInfo& rEndInfo = aTableInfo.aCellInfos[pEnd->GetIndex() - 1]; rEndInfo.bRowEnd = true; if (nLine == 0) { diff --git a/sw/source/filter/md/wrtmd.hxx b/sw/source/filter/md/wrtmd.hxx index d3f6d15db58f..77b6e965c792 100644 --- a/sw/source/filter/md/wrtmd.hxx +++ b/sw/source/filter/md/wrtmd.hxx @@ -33,6 +33,7 @@ struct SwMDCellInfo { bool bCellStart = false; + bool bCellEnd = false; bool bRowStart = false; bool bRowEnd = false; bool bFirstRowEnd = false;