sd/qa/unit/data/pdf/ErrareHumanumEst.pdf |binary
 sd/qa/unit/export-tests.cxx              |   25 ++++++++++
 svx/source/svdraw/svdpdf.cxx             |    6 ++
 vcl/qa/cppunit/pdfexport/pdfexport2.cxx  |    2 
 vcl/source/pdf/PDFiumLibrary.cxx         |   74 ++++++++++++++++++++++++++++---
 5 files changed, 101 insertions(+), 6 deletions(-)

New commits:
commit 184e53c833e199264e5f0fed5ea301eefcd3eeda
Author:     Caolán McNamara <[email protected]>
AuthorDate: Wed Oct 8 21:23:57 2025 +0100
Commit:     Caolán McNamara <[email protected]>
CommitDate: Fri Oct 17 09:23:14 2025 +0200

    use FPDFText_GetUnicode to get text
    
    instead of FPDFTextObj_GetText, which is returning 0x2
    for some hyphens. If we use the slightly lower level
    apis we can get info as to substituted hyphens.
    
    Change-Id: I26efa9f1acb5ba819b63034399da4f1961373f13
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/192081
    Tested-by: Jenkins CollaboraOffice <[email protected]>
    Reviewed-by: Miklos Vajna <[email protected]>
    (cherry picked from commit 25550b2daf29a4eb766dd22692c43b7be354a87c)
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/192360
    Reviewed-by: Caolán McNamara <[email protected]>
    Tested-by: Jenkins

diff --git a/vcl/qa/cppunit/pdfexport/pdfexport2.cxx 
b/vcl/qa/cppunit/pdfexport/pdfexport2.cxx
index 5b8398f1ef16..bf9c5f05a693 100644
--- a/vcl/qa/cppunit/pdfexport/pdfexport2.cxx
+++ b/vcl/qa/cppunit/pdfexport/pdfexport2.cxx
@@ -5577,7 +5577,7 @@ CPPUNIT_TEST_FIXTURE(PdfExportTest2, 
testTdf162194SoftHyphen)
 
     CPPUNIT_ASSERT_EQUAL(u"Waffle"_ustr, aText.at(0).trim());
     CPPUNIT_ASSERT_EQUAL(u"AAA Waf"_ustr, aText.at(1).trim());
-    CPPUNIT_ASSERT_EQUAL(u""_ustr, aText.at(2).trim());
+    CPPUNIT_ASSERT_EQUAL(u"-"_ustr, aText.at(2).trim());
     CPPUNIT_ASSERT_EQUAL(u"fle"_ustr, aText.at(3).trim());
 }
 
diff --git a/vcl/source/pdf/PDFiumLibrary.cxx b/vcl/source/pdf/PDFiumLibrary.cxx
index 478d08e7797e..4b159141764a 100644
--- a/vcl/source/pdf/PDFiumLibrary.cxx
+++ b/vcl/source/pdf/PDFiumLibrary.cxx
@@ -465,8 +465,6 @@ public:
     PDFiumTextPageImpl(FPDF_TEXTPAGE pTextPage);
     ~PDFiumTextPageImpl();
 
-    FPDF_TEXTPAGE getPointer() { return mpTextPage; }
-
     int countChars() override;
     unsigned int getUnicode(int index) override;
     std::unique_ptr<PDFiumSearchHandle> findStart(const OUString& rFindWhat, 
PDFFindFlags nFlags,
@@ -474,6 +472,73 @@ public:
 
     /// Returned rect is no longer upside down and is in mm100.
     basegfx::B2DRectangle getCharBox(int nIndex, double fPageHeight) override;
+
+    OUString getText(FPDF_PAGEOBJECT pPageObject)
+    {
+        OUStringBuffer aResult;
+
+        bool containsPreChar = false;
+        bool addLineFeed = false;
+        double posY(0), originX(0.0), originY(0.0);
+
+        // FPDFTextObj_GetText also does a similar loop over the entire
+        // contents of the text page, this is the intended to be the equivalent
+        // of that except for (currently) added recovery of hyphens.
+        int count = FPDFText_CountChars(mpTextPage);
+        for (int i = 0; i < count; ++i)
+        {
+            FPDF_PAGEOBJECT pOwner = FPDFText_GetTextObject(mpTextPage, i);
+            sal_Unicode cUnicode = FPDFText_GetUnicode(mpTextPage, i);
+            if (pOwner == pPageObject)
+            {
+                FPDFText_GetCharOrigin(mpTextPage, i, &originX, &originY);
+
+                if (fabs(posY - originY) > 0 && !containsPreChar && 
addLineFeed)
+                {
+                    posY = originY;
+                    if (!aResult.isEmpty())
+                        aResult.append("
");
+                }
+                containsPreChar = true;
+                addLineFeed = false;
+
+                switch (cUnicode)
+                {
+                    case 0:
+                        SAL_INFO("vcl.filter", "PDFiumImpl: cannot get unicode 
for char");
+                        break;
+                    default:
+                        aResult.append(cUnicode);
+                        break;
+                    case 0x2: // oddly pdfium replaces some '-' with 2.
+                    {
+                        int isHyphen = FPDFText_IsHyphen(mpTextPage, i);
+                        if (isHyphen == 1)
+                            aResult.append('-');
+                        else
+                        {
+                            SAL_WARN_IF(isHyphen == -1, "vcl.filter",
+                                        "PDFiumImpl: FPDFText_IsHyphen 
failure");
+                            aResult.append(cUnicode);
+                        }
+                    }
+                    break;
+                }
+            }
+            else if (cUnicode == ' ' && containsPreChar)
+            {
+                aResult.append(' ');
+                containsPreChar = false;
+                addLineFeed = false;
+            }
+            else
+            {
+                containsPreChar = false;
+                addLineFeed = true;
+            }
+        }
+        return aResult.toString();
+    }
 };
 
 class PDFiumSignatureImpl final : public PDFiumSignature
@@ -1090,9 +1155,8 @@ 
PDFiumPageObjectImpl::PDFiumPageObjectImpl(FPDF_PAGEOBJECT pPageObject)
 OUString PDFiumPageObjectImpl::getText(std::unique_ptr<PDFiumTextPage> const& 
rTextPage)
 {
     auto pTextPage = static_cast<PDFiumTextPageImpl*>(rTextPage.get());
-    return getUnicodeString([this, pTextPage](FPDF_WCHAR* buffer, unsigned 
long length) {
-        return FPDFTextObj_GetText(mpPageObject, pTextPage->getPointer(), 
buffer, length);
-    });
+    // FPDFTextObj_GetText may report some hyphens as 0x2
+    return pTextPage->getText(mpPageObject);
 }
 
 PDFPageObjectType PDFiumPageObjectImpl::getType()
commit 73c5466eb53e62c28df4a71341afea3815f169e2
Author:     Caolán McNamara <[email protected]>
AuthorDate: Mon Oct 6 12:16:22 2025 +0100
Commit:     Caolán McNamara <[email protected]>
CommitDate: Fri Oct 17 09:23:05 2025 +0200

    font version needs to exist and be in a non-0 fractional format
    
    Change-Id: I72420866185a890b3b2af2acf2339bad3fe0080d
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/191961
    Reviewed-by: Miklos Vajna <[email protected]>
    Tested-by: Jenkins CollaboraOffice <[email protected]>
    (cherry picked from commit 10a2e4ea3df5d1314de3af5c7e93c1eac96c31ed)
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/192359
    Tested-by: Jenkins
    Reviewed-by: Caolán McNamara <[email protected]>

diff --git a/sd/qa/unit/data/pdf/ErrareHumanumEst.pdf 
b/sd/qa/unit/data/pdf/ErrareHumanumEst.pdf
new file mode 100644
index 000000000000..f27ba2ed886e
Binary files /dev/null and b/sd/qa/unit/data/pdf/ErrareHumanumEst.pdf differ
diff --git a/sd/qa/unit/export-tests.cxx b/sd/qa/unit/export-tests.cxx
index 0277d2127f3f..d6829ec1f9e6 100644
--- a/sd/qa/unit/export-tests.cxx
+++ b/sd/qa/unit/export-tests.cxx
@@ -1185,6 +1185,31 @@ CPPUNIT_TEST_FIXTURE(SdExportTest, 
testExplodedPdfGrayscaleImageUnderInvisibleTe
     CPPUNIT_ASSERT_MESSAGE("Shape should be Invisible", !bVisible);
 }
 
+CPPUNIT_TEST_FIXTURE(SdExportTest, testExplodedPdfMissingFontVersion)
+{
+    auto pPdfium = vcl::pdf::PDFiumLibrary::get();
+    if (!pPdfium)
+        return;
+    UsePdfium aGuard;
+
+    loadFromFile(u"pdf/ErrareHumanumEst.pdf");
+
+    
setFilterOptions("{\"DecomposePDF\":{\"type\":\"boolean\",\"value\":\"true\"}}");
+    setImportFilterName(u"OpenDocument Drawing Flat XML"_ustr);
+    saveAndReload(u"OpenDocument Drawing Flat XML"_ustr);
+
+    const SdrPage* pPage = GetPage(1);
+
+    const SdrObject* pObj = pPage->GetObj(0);
+    CPPUNIT_ASSERT(pObj);
+    const SdrObjGroup* pObjGroup = dynamic_cast<const SdrObjGroup*>(pObj);
+    CPPUNIT_ASSERT(pObjGroup);
+    const SdrTextObj* pTextObj = DynCastSdrTextObj(pObjGroup->GetObj(0));
+    OUString sText = 
pTextObj->GetOutlinerParaObject()->GetTextObject().GetText(0);
+    // Without fix this fails to import at all
+    CPPUNIT_ASSERT_EQUAL(u"Errare humanum est"_ustr, sText);
+}
+
 CPPUNIT_TEST_FIXTURE(SdExportTest, testEmbeddedText)
 {
     createSdDrawDoc("objectwithtext.fodg");
diff --git a/svx/source/svdraw/svdpdf.cxx b/svx/source/svdraw/svdpdf.cxx
index 261aacfabfd8..9943ae8254b2 100644
--- a/svx/source/svdraw/svdpdf.cxx
+++ b/svx/source/svdraw/svdpdf.cxx
@@ -1116,6 +1116,12 @@ static bool toPfaCID(SubSetInfo& rSubSetInfo, const 
OUString& fileUrl,
     if (version.isEmpty())
         version = CIDFontVersion;
 
+    if (version.isEmpty() || version.toDouble() == 0.0)
+    {
+        SAL_WARN("sd.filter", "Font version cannot be empty or 0.0");
+        version = "0.001"_ostr;
+    }
+
     if (!brokenFontName.isEmpty())
         FontName = postScriptName.toUtf8();
 

Reply via email to