sw/inc/EnhancedPDFExportHelper.hxx | 3 sw/source/core/text/EnhancedPDFExportHelper.cxx | 49 ++++++++++- vcl/qa/cppunit/pdfexport/pdfexport.cxx | 102 ++++++++++++++++++++++-- 3 files changed, 141 insertions(+), 13 deletions(-)
New commits: commit bc3d8776a49c898710fd689f2d8ba7abf0db9954 Author: Michael Stahl <michael.st...@allotropia.de> AuthorDate: Tue Oct 24 13:51:39 2023 +0200 Commit: Miklos Vajna <vmik...@collabora.com> CommitDate: Wed Oct 25 11:28:35 2023 +0200 tdf#156565 sw: PDF/UA export: only one Link ILSE per link The problem is that for a hyperlink, multiple Link SEs are created, but only one Link annotation; the Link SEs all point to the annotation but the annotation can only point to one Link SE. So try to create only one Link SE for a hyperlink, similar to commit ee3c3fcf5c48964f7bc1d64484409f072c614866. This could be further subdivided by Spans when formatting properties change but it looks complicated and rarely needed. Change-Id: I7d158b599ec744b03e78eeca88d717183f2ba1dc Reviewed-on: https://gerrit.libreoffice.org/c/core/+/158387 Tested-by: Jenkins Reviewed-by: Michael Stahl <michael.st...@allotropia.de> (cherry picked from commit 4c5283a3a11008a06a995c49ed777734dc1f6066) Reviewed-on: https://gerrit.libreoffice.org/c/core/+/158286 Reviewed-by: Miklos Vajna <vmik...@collabora.com> diff --git a/sw/inc/EnhancedPDFExportHelper.hxx b/sw/inc/EnhancedPDFExportHelper.hxx index 1ab0f8868af4..542138157d2f 100644 --- a/sw/inc/EnhancedPDFExportHelper.hxx +++ b/sw/inc/EnhancedPDFExportHelper.hxx @@ -39,6 +39,7 @@ class SwPrintData; class SwTextPainter; class SwEditShell; class StringRangeEnumerator; +class SwTextAttr; class SwTextNode; class SwTable; class SwNumberTreeNode; @@ -161,7 +162,7 @@ class SwTaggedPDFHelper void EndCurrentSpan(); void CreateCurrentSpan(SwTextPaintInfo const& rInf, OUString const& rStyleName); - bool CheckContinueSpan(SwTextPaintInfo const& rInf, std::u16string_view rStyleName); + bool CheckContinueSpan(SwTextPaintInfo const& rInf, std::u16string_view rStyleName, SwTextAttr const* pInetFormatAttr); bool CheckReopenTag(); void CheckRestoreTag() const; diff --git a/sw/source/core/text/EnhancedPDFExportHelper.cxx b/sw/source/core/text/EnhancedPDFExportHelper.cxx index 9f99c58b4a35..1abd288b787d 100644 --- a/sw/source/core/text/EnhancedPDFExportHelper.cxx +++ b/sw/source/core/text/EnhancedPDFExportHelper.cxx @@ -156,6 +156,7 @@ struct SwEnhancedPDFState }; ::std::optional<Span> m_oCurrentSpan; + ::std::optional<SwTextAttr const*> m_oCurrentLink; SwEnhancedPDFState(LanguageType const eLanguageDefault) : m_eLanguageDefault(eLanguageDefault) @@ -1597,13 +1598,18 @@ void SwTaggedPDFHelper::BeginBlockStructureElements() void SwTaggedPDFHelper::EndStructureElements() { - if (mpPDFExtOutDevData->GetSwPDFState()->m_oCurrentSpan) + if (mpFrameInfo != nullptr) { - if (mpFrameInfo != nullptr) + if (mpPDFExtOutDevData->GetSwPDFState()->m_oCurrentSpan) { // close span at end of paragraph mpPDFExtOutDevData->GetSwPDFState()->m_oCurrentSpan.reset(); ++m_nEndStructureElement; } + if (mpPDFExtOutDevData->GetSwPDFState()->m_oCurrentLink) + { // close link at end of paragraph + mpPDFExtOutDevData->GetSwPDFState()->m_oCurrentLink.reset(); + ++m_nEndStructureElement; + } } while ( m_nEndStructureElement > 0 ) @@ -1640,8 +1646,32 @@ void SwTaggedPDFHelper::CreateCurrentSpan( } bool SwTaggedPDFHelper::CheckContinueSpan( - SwTextPaintInfo const& rInf, std::u16string_view const rStyleName) + SwTextPaintInfo const& rInf, std::u16string_view const rStyleName, + SwTextAttr const*const pInetFormatAttr) { + // for now, don't create span inside of link - this should be very rare + // situation and it looks complicated to implement. + assert(!mpPDFExtOutDevData->GetSwPDFState()->m_oCurrentSpan + || !mpPDFExtOutDevData->GetSwPDFState()->m_oCurrentLink); + if (mpPDFExtOutDevData->GetSwPDFState()->m_oCurrentLink) + { + if (pInetFormatAttr && pInetFormatAttr == *mpPDFExtOutDevData->GetSwPDFState()->m_oCurrentLink) + { + return true; + } + else + { + mpPDFExtOutDevData->GetSwPDFState()->m_oCurrentLink.reset(); + EndTag(); + return false; + } + } + if (mpPDFExtOutDevData->GetSwPDFState()->m_oCurrentSpan && pInetFormatAttr) + { + EndCurrentSpan(); + return false; + } + if (!mpPDFExtOutDevData->GetSwPDFState()->m_oCurrentSpan) return false; @@ -1691,7 +1721,7 @@ void SwTaggedPDFHelper::BeginInlineStructureElements() } // note: ILSE may be nested, so only end the span if needed to start new one - bool const isContinueSpan(CheckContinueSpan(rInf, sStyleName)); + bool const isContinueSpan(CheckContinueSpan(rInf, sStyleName, pInetFormatAttr)); sal_uInt16 nPDFType = USHRT_MAX; OUString aPDFType; @@ -1714,8 +1744,15 @@ void SwTaggedPDFHelper::BeginInlineStructureElements() // Check for Link: if( pInetFormatAttr ) { - nPDFType = vcl::PDFWriter::Link; - aPDFType = aLinkString; + if (!isContinueSpan) + { + nPDFType = vcl::PDFWriter::Link; + aPDFType = aLinkString; + assert(!mpPDFExtOutDevData->GetSwPDFState()->m_oCurrentLink); + mpPDFExtOutDevData->GetSwPDFState()->m_oCurrentLink.emplace(pInetFormatAttr); + // leave it open to let next portion decide to merge or close + --m_nEndStructureElement; + } } // Check for Quote/Code character style: else if (sStyleName == aQuotation) diff --git a/vcl/qa/cppunit/pdfexport/pdfexport.cxx b/vcl/qa/cppunit/pdfexport/pdfexport.cxx index f83210b9acb5..85a7bf215454 100644 --- a/vcl/qa/cppunit/pdfexport/pdfexport.cxx +++ b/vcl/qa/cppunit/pdfexport/pdfexport.cxx @@ -3685,6 +3685,21 @@ CPPUNIT_TEST_FIXTURE(PdfExportTest, testTdf157817) auto pST10 = dynamic_cast<vcl::filter::PDFNameElement*>(pObjectT10->Lookup("S")); CPPUNIT_ASSERT_EQUAL(OString("Contents#201"), pST10->GetValue()); + auto pKidsT10 = dynamic_cast<vcl::filter::PDFArrayElement*>(pObjectT10->Lookup("K")); + CPPUNIT_ASSERT(pKidsT10); + auto pKidsT10v = pKidsT10->GetElements(); + CPPUNIT_ASSERT_EQUAL(size_t(1), pKidsT10v.size()); + + // there is one and only one Link + auto pRefKidT100 = dynamic_cast<vcl::filter::PDFReferenceElement*>(pKidsT10v[0]); + CPPUNIT_ASSERT(pRefKidT100); + auto pObjectT100 = pRefKidT100->LookupObject(); + CPPUNIT_ASSERT(pObjectT100); + auto pTypeT100 = dynamic_cast<vcl::filter::PDFNameElement*>(pObjectT100->Lookup("Type")); + CPPUNIT_ASSERT_EQUAL(OString("StructElem"), pTypeT100->GetValue()); + auto pST100 = dynamic_cast<vcl::filter::PDFNameElement*>(pObjectT100->Lookup("S")); + CPPUNIT_ASSERT_EQUAL(OString("Link"), pST100->GetValue()); + auto pRefKidT2 = dynamic_cast<vcl::filter::PDFReferenceElement*>(pKidsTv[1]); CPPUNIT_ASSERT(pRefKidT2); auto pObjectT2 = pRefKidT2->LookupObject(); @@ -3707,6 +3722,21 @@ CPPUNIT_TEST_FIXTURE(PdfExportTest, testTdf157817) auto pST20 = dynamic_cast<vcl::filter::PDFNameElement*>(pObjectT20->Lookup("S")); CPPUNIT_ASSERT_EQUAL(OString("Contents#201"), pST20->GetValue()); + auto pKidsT20 = dynamic_cast<vcl::filter::PDFArrayElement*>(pObjectT20->Lookup("K")); + CPPUNIT_ASSERT(pKidsT20); + auto pKidsT20v = pKidsT20->GetElements(); + CPPUNIT_ASSERT_EQUAL(size_t(1), pKidsT20v.size()); + + // there is one and only one Link + auto pRefKidT200 = dynamic_cast<vcl::filter::PDFReferenceElement*>(pKidsT20v[0]); + CPPUNIT_ASSERT(pRefKidT200); + auto pObjectT200 = pRefKidT200->LookupObject(); + CPPUNIT_ASSERT(pObjectT200); + auto pTypeT200 = dynamic_cast<vcl::filter::PDFNameElement*>(pObjectT200->Lookup("Type")); + CPPUNIT_ASSERT_EQUAL(OString("StructElem"), pTypeT200->GetValue()); + auto pST200 = dynamic_cast<vcl::filter::PDFNameElement*>(pObjectT200->Lookup("S")); + CPPUNIT_ASSERT_EQUAL(OString("Link"), pST200->GetValue()); + auto pRefKidT3 = dynamic_cast<vcl::filter::PDFReferenceElement*>(pKidsTv[1]); CPPUNIT_ASSERT(pRefKidT3); auto pObjectT3 = pRefKidT3->LookupObject(); @@ -3728,6 +3758,21 @@ CPPUNIT_TEST_FIXTURE(PdfExportTest, testTdf157817) CPPUNIT_ASSERT_EQUAL(OString("StructElem"), pTypeT30->GetValue()); auto pST30 = dynamic_cast<vcl::filter::PDFNameElement*>(pObjectT30->Lookup("S")); CPPUNIT_ASSERT_EQUAL(OString("Contents#201"), pST30->GetValue()); + + auto pKidsT30 = dynamic_cast<vcl::filter::PDFArrayElement*>(pObjectT30->Lookup("K")); + CPPUNIT_ASSERT(pKidsT30); + auto pKidsT30v = pKidsT30->GetElements(); + CPPUNIT_ASSERT_EQUAL(size_t(1), pKidsT30v.size()); + + // there is one and only one Link + auto pRefKidT300 = dynamic_cast<vcl::filter::PDFReferenceElement*>(pKidsT30v[0]); + CPPUNIT_ASSERT(pRefKidT300); + auto pObjectT300 = pRefKidT300->LookupObject(); + CPPUNIT_ASSERT(pObjectT300); + auto pTypeT300 = dynamic_cast<vcl::filter::PDFNameElement*>(pObjectT300->Lookup("Type")); + CPPUNIT_ASSERT_EQUAL(OString("StructElem"), pTypeT300->GetValue()); + auto pST300 = dynamic_cast<vcl::filter::PDFNameElement*>(pObjectT300->Lookup("S")); + CPPUNIT_ASSERT_EQUAL(OString("Link"), pST300->GetValue()); } CPPUNIT_TEST_FIXTURE(PdfExportTest, testTdf135638) @@ -4078,8 +4123,8 @@ CPPUNIT_TEST_FIXTURE(PdfExportTest, testSpans) CPPUNIT_ASSERT(pKids10210); // assume there are no MCID ref at this level auto vKids10210 = pKids10210->GetElements(); - // only one span - CPPUNIT_ASSERT_EQUAL(size_t(1), vKids10210.size()); + // 2 span and a hyperlink + CPPUNIT_ASSERT_EQUAL(size_t(3), vKids10210.size()); auto pRefKid102100 = dynamic_cast<vcl::filter::PDFReferenceElement*>(vKids10210[0]); CPPUNIT_ASSERT(pRefKid102100); @@ -4095,11 +4140,56 @@ CPPUNIT_TEST_FIXTURE(PdfExportTest, testSpans) = dynamic_cast<vcl::filter::PDFArrayElement*>(pObject102100->Lookup("K")); CPPUNIT_ASSERT(pKids102100); auto vKids102100 = pKids102100->GetElements(); - // there is a hyperlink and a footnote - auto nLinks(0); for (size_t i = 0; i < vKids102100.size(); ++i) { auto pKid = dynamic_cast<vcl::filter::PDFReferenceElement*>(vKids102100[i]); + CPPUNIT_ASSERT(!pKid); + } + + auto pRefKid102101 = dynamic_cast<vcl::filter::PDFReferenceElement*>(vKids10210[1]); + CPPUNIT_ASSERT(pRefKid102101); + auto pObject102101 = pRefKid102101->LookupObject(); + CPPUNIT_ASSERT(pObject102101); + auto pType102101 + = dynamic_cast<vcl::filter::PDFNameElement*>(pObject102101->Lookup("Type")); + CPPUNIT_ASSERT_EQUAL(OString("StructElem"), pType102101->GetValue()); + auto pS102101 + = dynamic_cast<vcl::filter::PDFNameElement*>(pObject102101->Lookup("S")); + CPPUNIT_ASSERT_EQUAL(OString("Link"), pS102101->GetValue()); + auto pKids102101 + = dynamic_cast<vcl::filter::PDFArrayElement*>(pObject102101->Lookup("K")); + CPPUNIT_ASSERT(pKids102101); + auto vKids102101 = pKids102101->GetElements(); + auto nRef(0); + for (size_t i = 0; i < vKids102101.size(); ++i) + { + auto pKid = dynamic_cast<vcl::filter::PDFReferenceElement*>(vKids102101[i]); + if (pKid) + { + ++nRef; // annotation + } + } + CPPUNIT_ASSERT_EQUAL(static_cast<decltype(nRef)>(1), nRef); + + auto pRefKid102102 = dynamic_cast<vcl::filter::PDFReferenceElement*>(vKids10210[2]); + CPPUNIT_ASSERT(pRefKid102102); + auto pObject102102 = pRefKid102102->LookupObject(); + CPPUNIT_ASSERT(pObject102102); + auto pType102102 + = dynamic_cast<vcl::filter::PDFNameElement*>(pObject102102->Lookup("Type")); + CPPUNIT_ASSERT_EQUAL(OString("StructElem"), pType102102->GetValue()); + auto pS102102 + = dynamic_cast<vcl::filter::PDFNameElement*>(pObject102102->Lookup("S")); + CPPUNIT_ASSERT_EQUAL(OString("Span"), pS102102->GetValue()); + auto pKids102102 + = dynamic_cast<vcl::filter::PDFArrayElement*>(pObject102102->Lookup("K")); + CPPUNIT_ASSERT(pKids102102); + auto vKids102102 = pKids102102->GetElements(); + // there is a footnote + auto nFtn(0); + for (size_t i = 0; i < vKids102102.size(); ++i) + { + auto pKid = dynamic_cast<vcl::filter::PDFReferenceElement*>(vKids102102[i]); if (pKid) { auto pObject = pKid->LookupObject(); @@ -4109,10 +4199,10 @@ CPPUNIT_TEST_FIXTURE(PdfExportTest, testSpans) CPPUNIT_ASSERT_EQUAL(OString("StructElem"), pType->GetValue()); auto pS = dynamic_cast<vcl::filter::PDFNameElement*>(pObject->Lookup("S")); CPPUNIT_ASSERT_EQUAL(OString("Link"), pS->GetValue()); - ++nLinks; + ++nFtn; } } - CPPUNIT_ASSERT_EQUAL(static_cast<decltype(nLinks)>(2), nLinks); + CPPUNIT_ASSERT_EQUAL(static_cast<decltype(nFtn)>(1), nFtn); auto pRefKid103 = dynamic_cast<vcl::filter::PDFReferenceElement*>(vKids10[3]); CPPUNIT_ASSERT(pRefKid103);