sw/qa/extras/rtfexport/data/tdf136445-1-min.rtf | 17 +++++++ sw/qa/extras/rtfexport/rtfexport8.cxx | 14 +++++ sw/qa/extras/rtfimport/rtfimport.cxx | 2 writerfilter/source/dmapper/DomainMapper_Impl.cxx | 52 +++++++++------------- writerfilter/source/dmapper/DomainMapper_Impl.hxx | 50 +++++++++++---------- writerfilter/source/rtftok/rtfdispatchsymbol.cxx | 2 6 files changed, 83 insertions(+), 54 deletions(-)
New commits: commit 86ad08f9d25110e91e92a0badf6de75e785b3644 Author: Michael Stahl <michael.st...@allotropia.de> AuthorDate: Thu Feb 1 12:53:07 2024 +0100 Commit: Michael Stahl <michael.st...@allotropia.de> CommitDate: Fri Feb 2 17:17:28 2024 +0100 writerfilter: fix missing paragraph break on tdf136445-1.rtf This causes an assert: crossrefbookmark.cxx:44: sw::mark::CrossRefBookmark::CrossRefBookmark(): Assertion `IDocumentMarkAccess::IsLegalPaMForCrossRefHeadingBookmark(rPaM) && "<CrossRefBookmark::CrossRefBookmark(..)>" "- creation of cross-reference bookmark with an illegal PaM that does not expand over exactly one whole paragraph."' failed. The problem is that there is an annotation at the end of a paragraph, and reading the annotation changes various members of DomainMapper_Impl, in particular m_bParaSectpr and m_bParaChanged, causing "bRemove" in DomainMapper::lcl_utext() to be erroneously true, removing the just inserted paragraph break again. Move all flags that are evaluated for bRemove to SubstreamContext. This causes one test failure, but it turns out that the new result is the same as in Word 2013. Test name: (anonymous namespace)::testTdf108947::TestBody equality assertion failed - Expected: Header Page 2 ? - Actual : Header Page 2 ? (regression from commit 15b886f460919ea3dce425a621dc017c2992a96b) Change-Id: I44a7a8928ab04c600d4d3c43bc4e4deeafe57d89 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/162932 Tested-by: Jenkins Reviewed-by: Michael Stahl <michael.st...@allotropia.de> diff --git a/sw/qa/extras/rtfexport/data/tdf136445-1-min.rtf b/sw/qa/extras/rtfexport/data/tdf136445-1-min.rtf new file mode 100644 index 000000000000..c0abd0d293be --- /dev/null +++ b/sw/qa/extras/rtfexport/data/tdf136445-1-min.rtf @@ -0,0 +1,17 @@ +{ tf1 +{\*\listtable +{\list\listtemplateid8 +{\listlevel\levelnfc0\leveljc0\levelstartat1\levelfollow0{\leveltext \'02E\'00;}{\levelnumbers\'02;}i-720\li720}\listid8} +} +{\listoverridetable{\listoverride\listid8\listoverridecount0\ls8}} + +\ltrpar \sectd +\pard\plain \ltrpar{ tlch\hich \ltrch\loch +I ax xoixx xuxixx xxe xixxx. (Xaxxexx 1989 x.x. xaxax a)}{ tlch\hich \ltrch\loch +{\*tnid CL}{\*tnauthor Christian}+sic!}}} +\par \pard\plain {\listtext\pard\plain E119 ab}\ilvl0\ls8 \li720 i0\lin720 in0i-720\ql\ltrpar{{\*kmkstart __RefNumPara__395941_134077278}{\*kmkend __RefNumPara__395941_134077278} tlch\hich \ltrch\loch +Xix ab xaxa ab x-a ab ab ab xix ab }{ tlch\hich \ltrch\langfe0\dbch\loch\lang255\loch +x xi = xa.} +\par +} diff --git a/sw/qa/extras/rtfexport/rtfexport8.cxx b/sw/qa/extras/rtfexport/rtfexport8.cxx index 167539197051..08ca8452f928 100644 --- a/sw/qa/extras/rtfexport/rtfexport8.cxx +++ b/sw/qa/extras/rtfexport/rtfexport8.cxx @@ -109,6 +109,20 @@ DECLARE_RTFEXPORT_TEST(testTdf158586_lostFrame, "tdf158586_lostFrame.rtf") CPPUNIT_ASSERT_EQUAL(2, getPages()); } +DECLARE_RTFEXPORT_TEST(testAnnotationPar, "tdf136445-1-min.rtf") +{ + // the problem was that the paragraph break following annotation was missing + CPPUNIT_ASSERT_EQUAL(2, getParagraphs()); + CPPUNIT_ASSERT_EQUAL( + OUString("Annotation"), + getProperty<OUString>( + getRun(getParagraph(1, "I ax xoixx xuxixx xxe xixxx. (Xaxxexx 1989 x.x. xaxax a)"), 2), + "TextPortionType")); + CPPUNIT_ASSERT( + !getProperty<OUString>(getParagraph(2, "Xix xaxa x-a xix x xi = xa."), "ListId") + .isEmpty()); +} + DECLARE_RTFEXPORT_TEST(testTdf158826_extraCR, "tdf158826_extraCR.rtf") { // Note: this is a hand-minimized sample, and very likely doesn't follow RTF { } rules... diff --git a/sw/qa/extras/rtfimport/rtfimport.cxx b/sw/qa/extras/rtfimport/rtfimport.cxx index 629f75c97f0f..b1971ecc478c 100644 --- a/sw/qa/extras/rtfimport/rtfimport.cxx +++ b/sw/qa/extras/rtfimport/rtfimport.cxx @@ -1570,7 +1570,7 @@ CPPUNIT_TEST_FIXTURE(Test, testTdf108947) uno::Reference<text::XText> xHeaderTextLeft = getProperty<uno::Reference<text::XText>>( getStyles("PageStyles")->getByName("Default Page Style"), "HeaderTextLeft"); aActual = xHeaderTextLeft->getString(); - CPPUNIT_ASSERT_EQUAL(OUString("Header Page 2 ?"), aActual); + CPPUNIT_ASSERT_EQUAL(OUString(SAL_NEWLINE_STRING "Header Page 2 ?"), aActual); #endif } diff --git a/writerfilter/source/dmapper/DomainMapper_Impl.cxx b/writerfilter/source/dmapper/DomainMapper_Impl.cxx index 633296ce69e1..c5f0b4ddcf16 100644 --- a/writerfilter/source/dmapper/DomainMapper_Impl.cxx +++ b/writerfilter/source/dmapper/DomainMapper_Impl.cxx @@ -369,15 +369,9 @@ DomainMapper_Impl::DomainMapper_Impl( m_bIsParaMarkerChange( false ), m_bIsParaMarkerMove( false ), m_bRedlineImageInPreviousRun( false ), - m_bParaChanged( false ), - m_bIsFirstParaInSection( true ), - m_bIsFirstParaInSectionAfterRedline( true ), m_bDummyParaAddedForTableInSection( false ), m_bTextFrameInserted(false), - m_bIsPreviousParagraphFramed( false ), - m_bIsLastParaInSection( false ), m_bIsLastSectionGroup( false ), - m_bParaSectpr( false ), m_bUsingEnhancedFields( false ), m_bSdt(false), m_bIsFirstRun(false), @@ -398,7 +392,6 @@ DomainMapper_Impl::DomainMapper_Impl( m_bIsSplitPara(false), m_bIsActualParagraphFramed( false ), m_bParaAutoBefore(false), - m_bParaWithInlineObject(false), m_bSaxError(false) { m_StreamStateStack.emplace(); // add state for document body @@ -935,25 +928,27 @@ void DomainMapper_Impl::SetIsLastSectionGroup( bool bIsLast ) void DomainMapper_Impl::SetIsLastParagraphInSection( bool bIsLast ) { - m_bIsLastParaInSection = bIsLast; + m_StreamStateStack.top().bIsLastParaInSection = bIsLast; } void DomainMapper_Impl::SetIsFirstParagraphInSection( bool bIsFirst ) { - m_bIsFirstParaInSection = bIsFirst; + m_StreamStateStack.top().bIsFirstParaInSection = bIsFirst; } void DomainMapper_Impl::SetIsFirstParagraphInSectionAfterRedline( bool bIsFirstAfterRedline ) { - m_bIsFirstParaInSectionAfterRedline = bIsFirstAfterRedline; + m_StreamStateStack.top().bIsFirstParaInSectionAfterRedline = bIsFirstAfterRedline; } bool DomainMapper_Impl::GetIsFirstParagraphInSection( bool bAfterRedline ) const { // Anchored objects may include multiple paragraphs, // and none of them should be considered the first para in section. - return ( bAfterRedline ? m_bIsFirstParaInSectionAfterRedline : m_bIsFirstParaInSection ) + return (bAfterRedline + ? m_StreamStateStack.top().bIsFirstParaInSectionAfterRedline + : m_StreamStateStack.top().bIsFirstParaInSection) && !IsInShape() && !IsInComments() && !IsInFootOrEndnote(); @@ -975,13 +970,11 @@ void DomainMapper_Impl::SetIsTextFrameInserted( bool bIsInserted ) m_bTextFrameInserted = bIsInserted; } - void DomainMapper_Impl::SetParaSectpr(bool bParaSectpr) { - m_bParaSectpr = bParaSectpr; + m_StreamStateStack.top().bParaSectpr = bParaSectpr; } - void DomainMapper_Impl::SetSdt(bool bSdt) { m_bSdt = bSdt; @@ -2828,7 +2821,7 @@ void DomainMapper_Impl::finishParagraph( const PropertyMapPtr& pPropertyMap, con xCur->goLeft( 1 , true ); // Extend the redline ranges for empty paragraphs - if ( !m_bParaChanged && m_previousRedline ) + if (!m_StreamStateStack.top().bParaChanged && m_previousRedline) CreateRedline( xCur, m_previousRedline ); CheckParaMarkerRedline( xCur ); } @@ -2972,21 +2965,21 @@ void DomainMapper_Impl::finishParagraph( const PropertyMapPtr& pPropertyMap, con else SetIsPreviousParagraphFramed(false); - m_bRemoveThisParagraph = false; + m_StreamStateStack.top().bRemoveThisParagraph = false; if( !IsInHeaderFooter() && !IsInShape() && (!pParaContext || !pParaContext->props().IsFrameMode()) ) { // If the paragraph is in a frame, shape or header/footer, it's not a paragraph of the section itself. SetIsFirstParagraphInSection(false); // don't count an empty deleted paragraph as first paragraph in section to avoid of // the deletion of the next empty paragraph later, resulting loss of the associated page break - if (!m_previousRedline || m_bParaChanged) + if (!m_previousRedline || m_StreamStateStack.top().bParaChanged) { SetIsFirstParagraphInSectionAfterRedline(false); SetIsLastParagraphInSection(false); } } m_previousRedline.clear(); - m_bParaChanged = false; + m_StreamStateStack.top().bParaChanged = false; if (IsInComments() && pParaContext) { @@ -3023,7 +3016,7 @@ void DomainMapper_Impl::finishParagraph( const PropertyMapPtr& pPropertyMap, con } m_bParaAutoBefore = false; - m_bParaWithInlineObject = false; + m_StreamStateStack.top().bParaWithInlineObject = false; #ifdef DBG_UTIL TagLogger::getInstance().endElement(); @@ -3303,7 +3296,7 @@ void DomainMapper_Impl::applyToggleAttributes(const PropertyMapPtr& pPropertyMap m_pParaMarkerRedlineMove.clear(); } CheckRedline( xTextRange ); - m_bParaChanged = true; + m_StreamStateStack.top().bParaChanged = true; //getTableManager( ).handle(xTextRange); } @@ -4153,7 +4146,8 @@ void DomainMapper_Impl::CheckRedline( uno::Reference< text::XTextRange > const& // only export ParagraphFormat, when there is no other redline in the same text portion to avoid missing redline compression, // but always export the first ParagraphFormat redline in a paragraph to keep the paragraph style change data for rejection - if( (!bUsedRange || !m_bParaChanged) && GetTopContextOfType(CONTEXT_PARAGRAPH) ) + if ((!bUsedRange || !m_StreamStateStack.top().bParaChanged) + && GetTopContextOfType(CONTEXT_PARAGRAPH)) { std::vector<RedlineParamsPtr>& avRedLines = GetTopContextOfType(CONTEXT_PARAGRAPH)->Redlines(); for( const auto& rRedline : avRedLines ) @@ -4800,7 +4794,7 @@ void DomainMapper_Impl::PushShapeContext( const uno::Reference< drawing::XShape getPropertyName( PROP_OPAQUE ), uno::Any( true ) ); } - m_bParaChanged = true; + m_StreamStateStack.top().bParaChanged = true; getTableManager().setIsInShape(true); } catch ( const uno::Exception& ) @@ -6783,15 +6777,15 @@ OUString DomainMapper_Impl::extractTocTitle() css::uno::Reference<css::beans::XPropertySet> DomainMapper_Impl::StartIndexSectionChecked(const OUString& sServiceName) { - if (m_bParaChanged) + if (m_StreamStateStack.top().bParaChanged) { - finishParagraph(GetTopContextOfType(CONTEXT_PARAGRAPH), false); // resets m_bParaChanged + finishParagraph(GetTopContextOfType(CONTEXT_PARAGRAPH), false); // resets bParaChanged PopProperties(CONTEXT_PARAGRAPH); PushProperties(CONTEXT_PARAGRAPH); SetIsFirstRun(true); // The first paragraph of the index that is continuation of just finished one needs to be - // removed when finished (unless more content will arrive, which will set m_bParaChanged) - m_bRemoveThisParagraph = true; + // removed when finished (unless more content will arrive, which will set bParaChanged) + m_StreamStateStack.top().bRemoveThisParagraph = true; } const auto& xTextAppend = GetTopTextAppend(); const auto xTextRange = xTextAppend->getEnd(); @@ -8616,7 +8610,7 @@ void DomainMapper_Impl::PopFieldContext() if (m_bStartedTOC || m_bStartIndex || m_bStartBibliography) { // inside SDT, last empty paragraph is also part of index - if (!m_bParaChanged && !m_xSdtEntryStart) + if (!m_StreamStateStack.top().bParaChanged && !m_xSdtEntryStart) { // End of index is the first item on a new paragraph - this paragraph // should not be part of index @@ -8642,7 +8636,7 @@ void DomainMapper_Impl::PopFieldContext() m_bStartedTOC = false; m_aTextAppendStack.pop(); m_StreamStateStack.top().bTextInserted = false; - m_bParaChanged = true; // the paragraph must stay anyway + m_StreamStateStack.top().bParaChanged = true; // the paragraph must stay anyway } m_bStartTOC = false; m_bStartIndex = false; @@ -9239,7 +9233,7 @@ void DomainMapper_Impl::ImportGraphic(const writerfilter::Reference<Properties> } else if (m_eGraphicImportType == IMPORT_AS_DETECTED_INLINE) { - m_bParaWithInlineObject = true; + m_StreamStateStack.top().bParaWithInlineObject = true; // store inline images with track changes, because the anchor point // to set redlining is not available yet diff --git a/writerfilter/source/dmapper/DomainMapper_Impl.hxx b/writerfilter/source/dmapper/DomainMapper_Impl.hxx index 7d3c27ae2fce..0464d5d9fba1 100644 --- a/writerfilter/source/dmapper/DomainMapper_Impl.hxx +++ b/writerfilter/source/dmapper/DomainMapper_Impl.hxx @@ -194,14 +194,26 @@ struct SubstreamContext * inTbl SPRM or not). */ sal_Int32 nTableDepth = 0; - // deferred breaks need to be saved for RTF, probably not for DOCX - bool bIsColumnBreakDeferred = false; - bool bIsPageBreakDeferred = false; + // deferred breaks need to be saved for RTF, also for DOCX annotations + bool bIsColumnBreakDeferred = false; + bool bIsPageBreakDeferred = false; sal_Int32 nLineBreaksDeferred = 0; /// Current paragraph had at least one field in it. - bool bParaHadField = false; + bool bParaHadField = false; /// Current paragraph in a table is first paragraph of a cell - bool bFirstParagraphInCell = true; + bool bFirstParagraphInCell = true; + /// If the current paragraph has any runs. + bool bParaChanged = false; + bool bIsFirstParaInSectionAfterRedline = true; + bool bIsFirstParaInSection = true; + bool bIsLastParaInSection = false; + /// If the current paragraph contains section property definitions. + bool bParaSectpr = false; + bool bIsPreviousParagraphFramed = false; + /// Current paragraph had at least one inline object in it. + bool bParaWithInlineObject = false; + /// This is a continuation of already finished paragraph - e.g., first in an index section + bool bRemoveThisParagraph = false; }; /// Information about a paragraph to be finished after a field end. @@ -614,25 +626,15 @@ private: // text ZWSPs to keep the change tracking of the image in Writer.) bool m_bRedlineImageInPreviousRun; - /// If the current paragraph has any runs. - bool m_bParaChanged; - bool m_bIsFirstParaInSection; - bool m_bIsFirstParaInSectionAfterRedline; bool m_bIsFirstParaInShape = false; bool m_bDummyParaAddedForTableInSection; bool m_bTextFrameInserted; - bool m_bIsPreviousParagraphFramed; - bool m_bIsLastParaInSection; bool m_bIsLastSectionGroup; - /// If the current paragraph contains section property definitions. - bool m_bParaSectpr; bool m_bUsingEnhancedFields; /// If the current paragraph is inside a structured document element. bool m_bSdt; bool m_bIsFirstRun; bool m_bIsOutsideAParagraph; - /// This is a continuation of already finished paragraph - e.g., first in an index section - bool m_bRemoveThisParagraph = false; css::uno::Reference< css::text::XTextCursor > m_xTOCMarkerCursor; @@ -731,7 +733,7 @@ public: void SetIsDecimalComma() { m_bIsDecimalComma = true; }; void SetIsLastParagraphInSection( bool bIsLast ); - bool GetIsLastParagraphInSection() const { return m_bIsLastParaInSection;} + bool GetIsLastParagraphInSection() const { return m_StreamStateStack.top().bIsLastParaInSection; } void SetRubySprmId( sal_uInt32 nSprmId) { m_aRubyInfo.nSprmId = nSprmId ; } void SetRubyText( OUString const &sText, OUString const &sStyle) { m_aRubyInfo.sRubyText = sText; @@ -755,10 +757,11 @@ public: bool GetIsTextFrameInserted() const { return m_bTextFrameInserted;} void SetIsTextDeleted(bool bIsTextDeleted) { m_bTextDeleted = bIsTextDeleted; } - void SetIsPreviousParagraphFramed( bool bIsFramed ) { m_bIsPreviousParagraphFramed = bIsFramed; } - bool GetIsPreviousParagraphFramed() const { return m_bIsPreviousParagraphFramed; } + void SetIsPreviousParagraphFramed(bool const bIsFramed) + { m_StreamStateStack.top().bIsPreviousParagraphFramed = bIsFramed; } + bool GetIsPreviousParagraphFramed() const { return m_StreamStateStack.top().bIsPreviousParagraphFramed; } void SetParaSectpr(bool bParaSectpr); - bool GetParaSectpr() const { return m_bParaSectpr;} + bool GetParaSectpr() const { return m_StreamStateStack.top().bParaSectpr; } void SetSymbolChar( sal_Int32 nSymbol) { m_aSymbolData.cSymbol = sal_Unicode(nSymbol); } void SetSymbolFont( OUString const &rName ) { m_aSymbolData.sFont = rName; } @@ -772,9 +775,10 @@ public: /// Gives access to the currently open run/inline SDTs. const std::stack<BookmarkInsertPosition>& GetSdtStarts() const; - bool GetParaChanged() const { return m_bParaChanged;} + + bool GetParaChanged() const { return m_StreamStateStack.top().bParaChanged; } bool GetParaHadField() const { return m_StreamStateStack.top().bParaHadField; } - bool GetRemoveThisPara() const { return m_bRemoveThisParagraph; } + bool GetRemoveThisPara() const { return m_StreamStateStack.top().bRemoveThisParagraph; } void deferBreak( BreakType deferredBreakType ); bool isBreakDeferred( BreakType deferredBreakType ); @@ -1206,7 +1210,7 @@ public: bool m_bIsActualParagraphFramed; std::deque<css::uno::Any> m_aStoredRedlines[StoredRedlines::NONE]; - bool IsParaWithInlineObject() const { return m_bParaWithInlineObject; } + bool IsParaWithInlineObject() const { return m_StreamStateStack.top().bParaWithInlineObject; } css::uno::Reference< css::embed::XStorage > m_xDocumentStorage; @@ -1236,8 +1240,6 @@ private: css::uno::Reference<css::beans::XPropertySet> m_xPreviousParagraph; /// Current paragraph has automatic before spacing. bool m_bParaAutoBefore; - /// Current paragraph had at least one inline object in it. - bool m_bParaWithInlineObject; /// SAXException was seen so document will be abandoned bool m_bSaxError; diff --git a/writerfilter/source/rtftok/rtfdispatchsymbol.cxx b/writerfilter/source/rtftok/rtfdispatchsymbol.cxx index 6c1c94b944d9..0f01d79f5cd4 100644 --- a/writerfilter/source/rtftok/rtfdispatchsymbol.cxx +++ b/writerfilter/source/rtftok/rtfdispatchsymbol.cxx @@ -406,6 +406,8 @@ RTFError RTFDocumentImpl::dispatchSymbol(RTFKeyword nKeyword) } sal_uInt8 const sBreak[] = { 0xc }; Mapper().text(sBreak, 1); + // testFdo81892 don't do another \par break directly; because of + // GetSplitPgBreakAndParaMark() it does finishParagraph *twice* m_bNeedCr = true; } }