sw/inc/EnhancedPDFExportHelper.hxx              |    3 
 sw/source/core/text/EnhancedPDFExportHelper.cxx |   49 ++++++++++-
 vcl/qa/cppunit/pdfexport/pdfexport.cxx          |  102 ++++++++++++++++++++++--
 3 files changed, 141 insertions(+), 13 deletions(-)

New commits:
commit bc3d8776a49c898710fd689f2d8ba7abf0db9954
Author:     Michael Stahl <michael.st...@allotropia.de>
AuthorDate: Tue Oct 24 13:51:39 2023 +0200
Commit:     Miklos Vajna <vmik...@collabora.com>
CommitDate: Wed Oct 25 11:28:35 2023 +0200

    tdf#156565 sw: PDF/UA export: only one Link ILSE per link
    
    The problem is that for a hyperlink, multiple Link SEs are created, but
    only one Link annotation; the Link SEs all point to the annotation but
    the annotation can only point to one Link SE.
    
    So try to create only one Link SE for a hyperlink, similar to commit
    ee3c3fcf5c48964f7bc1d64484409f072c614866.  This could be
    further subdivided by Spans when formatting properties change but it
    looks complicated and rarely needed.
    
    Change-Id: I7d158b599ec744b03e78eeca88d717183f2ba1dc
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/158387
    Tested-by: Jenkins
    Reviewed-by: Michael Stahl <michael.st...@allotropia.de>
    (cherry picked from commit 4c5283a3a11008a06a995c49ed777734dc1f6066)
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/158286
    Reviewed-by: Miklos Vajna <vmik...@collabora.com>

diff --git a/sw/inc/EnhancedPDFExportHelper.hxx 
b/sw/inc/EnhancedPDFExportHelper.hxx
index 1ab0f8868af4..542138157d2f 100644
--- a/sw/inc/EnhancedPDFExportHelper.hxx
+++ b/sw/inc/EnhancedPDFExportHelper.hxx
@@ -39,6 +39,7 @@ class SwPrintData;
 class SwTextPainter;
 class SwEditShell;
 class StringRangeEnumerator;
+class SwTextAttr;
 class SwTextNode;
 class SwTable;
 class SwNumberTreeNode;
@@ -161,7 +162,7 @@ class SwTaggedPDFHelper
 
     void EndCurrentSpan();
     void CreateCurrentSpan(SwTextPaintInfo const& rInf, OUString const& 
rStyleName);
-    bool CheckContinueSpan(SwTextPaintInfo const& rInf, std::u16string_view 
rStyleName);
+    bool CheckContinueSpan(SwTextPaintInfo const& rInf, std::u16string_view 
rStyleName, SwTextAttr const* pInetFormatAttr);
 
     bool CheckReopenTag();
     void CheckRestoreTag() const;
diff --git a/sw/source/core/text/EnhancedPDFExportHelper.cxx 
b/sw/source/core/text/EnhancedPDFExportHelper.cxx
index 9f99c58b4a35..1abd288b787d 100644
--- a/sw/source/core/text/EnhancedPDFExportHelper.cxx
+++ b/sw/source/core/text/EnhancedPDFExportHelper.cxx
@@ -156,6 +156,7 @@ struct SwEnhancedPDFState
     };
 
     ::std::optional<Span> m_oCurrentSpan;
+    ::std::optional<SwTextAttr const*> m_oCurrentLink;
 
     SwEnhancedPDFState(LanguageType const eLanguageDefault)
         : m_eLanguageDefault(eLanguageDefault)
@@ -1597,13 +1598,18 @@ void SwTaggedPDFHelper::BeginBlockStructureElements()
 
 void SwTaggedPDFHelper::EndStructureElements()
 {
-    if (mpPDFExtOutDevData->GetSwPDFState()->m_oCurrentSpan)
+    if (mpFrameInfo != nullptr)
     {
-        if (mpFrameInfo != nullptr)
+        if (mpPDFExtOutDevData->GetSwPDFState()->m_oCurrentSpan)
         {   // close span at end of paragraph
             mpPDFExtOutDevData->GetSwPDFState()->m_oCurrentSpan.reset();
             ++m_nEndStructureElement;
         }
+        if (mpPDFExtOutDevData->GetSwPDFState()->m_oCurrentLink)
+        {   // close link at end of paragraph
+            mpPDFExtOutDevData->GetSwPDFState()->m_oCurrentLink.reset();
+            ++m_nEndStructureElement;
+        }
     }
 
     while ( m_nEndStructureElement > 0 )
@@ -1640,8 +1646,32 @@ void SwTaggedPDFHelper::CreateCurrentSpan(
 }
 
 bool SwTaggedPDFHelper::CheckContinueSpan(
-        SwTextPaintInfo const& rInf, std::u16string_view const rStyleName)
+        SwTextPaintInfo const& rInf, std::u16string_view const rStyleName,
+        SwTextAttr const*const pInetFormatAttr)
 {
+    // for now, don't create span inside of link - this should be very rare
+    // situation and it looks complicated to implement.
+    assert(!mpPDFExtOutDevData->GetSwPDFState()->m_oCurrentSpan
+        || !mpPDFExtOutDevData->GetSwPDFState()->m_oCurrentLink);
+    if (mpPDFExtOutDevData->GetSwPDFState()->m_oCurrentLink)
+    {
+        if (pInetFormatAttr && pInetFormatAttr == 
*mpPDFExtOutDevData->GetSwPDFState()->m_oCurrentLink)
+        {
+            return true;
+        }
+        else
+        {
+            mpPDFExtOutDevData->GetSwPDFState()->m_oCurrentLink.reset();
+            EndTag();
+            return false;
+        }
+    }
+    if (mpPDFExtOutDevData->GetSwPDFState()->m_oCurrentSpan && pInetFormatAttr)
+    {
+        EndCurrentSpan();
+        return false;
+    }
+
     if (!mpPDFExtOutDevData->GetSwPDFState()->m_oCurrentSpan)
         return false;
 
@@ -1691,7 +1721,7 @@ void SwTaggedPDFHelper::BeginInlineStructureElements()
     }
 
     // note: ILSE may be nested, so only end the span if needed to start new 
one
-    bool const isContinueSpan(CheckContinueSpan(rInf, sStyleName));
+    bool const isContinueSpan(CheckContinueSpan(rInf, sStyleName, 
pInetFormatAttr));
 
     sal_uInt16 nPDFType = USHRT_MAX;
     OUString aPDFType;
@@ -1714,8 +1744,15 @@ void SwTaggedPDFHelper::BeginInlineStructureElements()
                 // Check for Link:
                 if( pInetFormatAttr )
                 {
-                    nPDFType = vcl::PDFWriter::Link;
-                    aPDFType = aLinkString;
+                    if (!isContinueSpan)
+                    {
+                        nPDFType = vcl::PDFWriter::Link;
+                        aPDFType = aLinkString;
+                        
assert(!mpPDFExtOutDevData->GetSwPDFState()->m_oCurrentLink);
+                        
mpPDFExtOutDevData->GetSwPDFState()->m_oCurrentLink.emplace(pInetFormatAttr);
+                        // leave it open to let next portion decide to merge 
or close
+                        --m_nEndStructureElement;
+                    }
                 }
                 // Check for Quote/Code character style:
                 else if (sStyleName == aQuotation)
diff --git a/vcl/qa/cppunit/pdfexport/pdfexport.cxx 
b/vcl/qa/cppunit/pdfexport/pdfexport.cxx
index f83210b9acb5..85a7bf215454 100644
--- a/vcl/qa/cppunit/pdfexport/pdfexport.cxx
+++ b/vcl/qa/cppunit/pdfexport/pdfexport.cxx
@@ -3685,6 +3685,21 @@ CPPUNIT_TEST_FIXTURE(PdfExportTest, testTdf157817)
     auto pST10 = 
dynamic_cast<vcl::filter::PDFNameElement*>(pObjectT10->Lookup("S"));
     CPPUNIT_ASSERT_EQUAL(OString("Contents#201"), pST10->GetValue());
 
+    auto pKidsT10 = 
dynamic_cast<vcl::filter::PDFArrayElement*>(pObjectT10->Lookup("K"));
+    CPPUNIT_ASSERT(pKidsT10);
+    auto pKidsT10v = pKidsT10->GetElements();
+    CPPUNIT_ASSERT_EQUAL(size_t(1), pKidsT10v.size());
+
+    // there is one and only one Link
+    auto pRefKidT100 = 
dynamic_cast<vcl::filter::PDFReferenceElement*>(pKidsT10v[0]);
+    CPPUNIT_ASSERT(pRefKidT100);
+    auto pObjectT100 = pRefKidT100->LookupObject();
+    CPPUNIT_ASSERT(pObjectT100);
+    auto pTypeT100 = 
dynamic_cast<vcl::filter::PDFNameElement*>(pObjectT100->Lookup("Type"));
+    CPPUNIT_ASSERT_EQUAL(OString("StructElem"), pTypeT100->GetValue());
+    auto pST100 = 
dynamic_cast<vcl::filter::PDFNameElement*>(pObjectT100->Lookup("S"));
+    CPPUNIT_ASSERT_EQUAL(OString("Link"), pST100->GetValue());
+
     auto pRefKidT2 = 
dynamic_cast<vcl::filter::PDFReferenceElement*>(pKidsTv[1]);
     CPPUNIT_ASSERT(pRefKidT2);
     auto pObjectT2 = pRefKidT2->LookupObject();
@@ -3707,6 +3722,21 @@ CPPUNIT_TEST_FIXTURE(PdfExportTest, testTdf157817)
     auto pST20 = 
dynamic_cast<vcl::filter::PDFNameElement*>(pObjectT20->Lookup("S"));
     CPPUNIT_ASSERT_EQUAL(OString("Contents#201"), pST20->GetValue());
 
+    auto pKidsT20 = 
dynamic_cast<vcl::filter::PDFArrayElement*>(pObjectT20->Lookup("K"));
+    CPPUNIT_ASSERT(pKidsT20);
+    auto pKidsT20v = pKidsT20->GetElements();
+    CPPUNIT_ASSERT_EQUAL(size_t(1), pKidsT20v.size());
+
+    // there is one and only one Link
+    auto pRefKidT200 = 
dynamic_cast<vcl::filter::PDFReferenceElement*>(pKidsT20v[0]);
+    CPPUNIT_ASSERT(pRefKidT200);
+    auto pObjectT200 = pRefKidT200->LookupObject();
+    CPPUNIT_ASSERT(pObjectT200);
+    auto pTypeT200 = 
dynamic_cast<vcl::filter::PDFNameElement*>(pObjectT200->Lookup("Type"));
+    CPPUNIT_ASSERT_EQUAL(OString("StructElem"), pTypeT200->GetValue());
+    auto pST200 = 
dynamic_cast<vcl::filter::PDFNameElement*>(pObjectT200->Lookup("S"));
+    CPPUNIT_ASSERT_EQUAL(OString("Link"), pST200->GetValue());
+
     auto pRefKidT3 = 
dynamic_cast<vcl::filter::PDFReferenceElement*>(pKidsTv[1]);
     CPPUNIT_ASSERT(pRefKidT3);
     auto pObjectT3 = pRefKidT3->LookupObject();
@@ -3728,6 +3758,21 @@ CPPUNIT_TEST_FIXTURE(PdfExportTest, testTdf157817)
     CPPUNIT_ASSERT_EQUAL(OString("StructElem"), pTypeT30->GetValue());
     auto pST30 = 
dynamic_cast<vcl::filter::PDFNameElement*>(pObjectT30->Lookup("S"));
     CPPUNIT_ASSERT_EQUAL(OString("Contents#201"), pST30->GetValue());
+
+    auto pKidsT30 = 
dynamic_cast<vcl::filter::PDFArrayElement*>(pObjectT30->Lookup("K"));
+    CPPUNIT_ASSERT(pKidsT30);
+    auto pKidsT30v = pKidsT30->GetElements();
+    CPPUNIT_ASSERT_EQUAL(size_t(1), pKidsT30v.size());
+
+    // there is one and only one Link
+    auto pRefKidT300 = 
dynamic_cast<vcl::filter::PDFReferenceElement*>(pKidsT30v[0]);
+    CPPUNIT_ASSERT(pRefKidT300);
+    auto pObjectT300 = pRefKidT300->LookupObject();
+    CPPUNIT_ASSERT(pObjectT300);
+    auto pTypeT300 = 
dynamic_cast<vcl::filter::PDFNameElement*>(pObjectT300->Lookup("Type"));
+    CPPUNIT_ASSERT_EQUAL(OString("StructElem"), pTypeT300->GetValue());
+    auto pST300 = 
dynamic_cast<vcl::filter::PDFNameElement*>(pObjectT300->Lookup("S"));
+    CPPUNIT_ASSERT_EQUAL(OString("Link"), pST300->GetValue());
 }
 
 CPPUNIT_TEST_FIXTURE(PdfExportTest, testTdf135638)
@@ -4078,8 +4123,8 @@ CPPUNIT_TEST_FIXTURE(PdfExportTest, testSpans)
                 CPPUNIT_ASSERT(pKids10210);
                 // assume there are no MCID ref at this level
                 auto vKids10210 = pKids10210->GetElements();
-                // only one span
-                CPPUNIT_ASSERT_EQUAL(size_t(1), vKids10210.size());
+                // 2 span and a hyperlink
+                CPPUNIT_ASSERT_EQUAL(size_t(3), vKids10210.size());
 
                 auto pRefKid102100 = 
dynamic_cast<vcl::filter::PDFReferenceElement*>(vKids10210[0]);
                 CPPUNIT_ASSERT(pRefKid102100);
@@ -4095,11 +4140,56 @@ CPPUNIT_TEST_FIXTURE(PdfExportTest, testSpans)
                     = 
dynamic_cast<vcl::filter::PDFArrayElement*>(pObject102100->Lookup("K"));
                 CPPUNIT_ASSERT(pKids102100);
                 auto vKids102100 = pKids102100->GetElements();
-                // there is a hyperlink and a footnote
-                auto nLinks(0);
                 for (size_t i = 0; i < vKids102100.size(); ++i)
                 {
                     auto pKid = 
dynamic_cast<vcl::filter::PDFReferenceElement*>(vKids102100[i]);
+                    CPPUNIT_ASSERT(!pKid);
+                }
+
+                auto pRefKid102101 = 
dynamic_cast<vcl::filter::PDFReferenceElement*>(vKids10210[1]);
+                CPPUNIT_ASSERT(pRefKid102101);
+                auto pObject102101 = pRefKid102101->LookupObject();
+                CPPUNIT_ASSERT(pObject102101);
+                auto pType102101
+                    = 
dynamic_cast<vcl::filter::PDFNameElement*>(pObject102101->Lookup("Type"));
+                CPPUNIT_ASSERT_EQUAL(OString("StructElem"), 
pType102101->GetValue());
+                auto pS102101
+                    = 
dynamic_cast<vcl::filter::PDFNameElement*>(pObject102101->Lookup("S"));
+                CPPUNIT_ASSERT_EQUAL(OString("Link"), pS102101->GetValue());
+                auto pKids102101
+                    = 
dynamic_cast<vcl::filter::PDFArrayElement*>(pObject102101->Lookup("K"));
+                CPPUNIT_ASSERT(pKids102101);
+                auto vKids102101 = pKids102101->GetElements();
+                auto nRef(0);
+                for (size_t i = 0; i < vKids102101.size(); ++i)
+                {
+                    auto pKid = 
dynamic_cast<vcl::filter::PDFReferenceElement*>(vKids102101[i]);
+                    if (pKid)
+                    {
+                        ++nRef; // annotation
+                    }
+                }
+                CPPUNIT_ASSERT_EQUAL(static_cast<decltype(nRef)>(1), nRef);
+
+                auto pRefKid102102 = 
dynamic_cast<vcl::filter::PDFReferenceElement*>(vKids10210[2]);
+                CPPUNIT_ASSERT(pRefKid102102);
+                auto pObject102102 = pRefKid102102->LookupObject();
+                CPPUNIT_ASSERT(pObject102102);
+                auto pType102102
+                    = 
dynamic_cast<vcl::filter::PDFNameElement*>(pObject102102->Lookup("Type"));
+                CPPUNIT_ASSERT_EQUAL(OString("StructElem"), 
pType102102->GetValue());
+                auto pS102102
+                    = 
dynamic_cast<vcl::filter::PDFNameElement*>(pObject102102->Lookup("S"));
+                CPPUNIT_ASSERT_EQUAL(OString("Span"), pS102102->GetValue());
+                auto pKids102102
+                    = 
dynamic_cast<vcl::filter::PDFArrayElement*>(pObject102102->Lookup("K"));
+                CPPUNIT_ASSERT(pKids102102);
+                auto vKids102102 = pKids102102->GetElements();
+                // there is a footnote
+                auto nFtn(0);
+                for (size_t i = 0; i < vKids102102.size(); ++i)
+                {
+                    auto pKid = 
dynamic_cast<vcl::filter::PDFReferenceElement*>(vKids102102[i]);
                     if (pKid)
                     {
                         auto pObject = pKid->LookupObject();
@@ -4109,10 +4199,10 @@ CPPUNIT_TEST_FIXTURE(PdfExportTest, testSpans)
                         CPPUNIT_ASSERT_EQUAL(OString("StructElem"), 
pType->GetValue());
                         auto pS = 
dynamic_cast<vcl::filter::PDFNameElement*>(pObject->Lookup("S"));
                         CPPUNIT_ASSERT_EQUAL(OString("Link"), pS->GetValue());
-                        ++nLinks;
+                        ++nFtn;
                     }
                 }
-                CPPUNIT_ASSERT_EQUAL(static_cast<decltype(nLinks)>(2), nLinks);
+                CPPUNIT_ASSERT_EQUAL(static_cast<decltype(nFtn)>(1), nFtn);
 
                 auto pRefKid103 = 
dynamic_cast<vcl::filter::PDFReferenceElement*>(vKids10[3]);
                 CPPUNIT_ASSERT(pRefKid103);

Reply via email to