include/vcl/filter/pdfdocument.hxx     |    2 
 vcl/Library_vcl.mk                     |    1 
 vcl/inc/pdf/ExternalPDFStreams.hxx     |    2 
 vcl/inc/pdf/pdfcompat.hxx              |   45 +++++++++++
 vcl/source/filter/ipdf/pdfcompat.cxx   |  129 +++++++++++++++++++++++++++++++++
 vcl/source/filter/ipdf/pdfdocument.cxx |   19 ++++
 vcl/source/filter/ipdf/pdfread.cxx     |  112 +---------------------------
 7 files changed, 202 insertions(+), 108 deletions(-)

New commits:
commit 7b5e440dc7b7b710d695a00c51c3b061bc5729ba
Author:     Dennis Francis <dennis.fran...@collabora.com>
AuthorDate: Tue Oct 18 16:14:37 2022 +0530
Commit:     Miklos Vajna <vmik...@collabora.com>
CommitDate: Tue Oct 25 15:57:57 2022 +0200

    vcl: re-exporting broken pdfs -> empty pages
    
    Certain pdf documents when loaded in LO_IMPORT_USE_PDFIUM=1 mode even if
    pdf-version < v1.6 sometimes has missing objects referred by other
    objects for determing its stream length for instance.  As a result
    parsing fails and results in a pdf with empty pages.  A round trip
    through pdfium and exporting to v1.6 seems to cure the issue. Possibly
    it does some repairing work to determine the length of the stream in a
    independent pass through the file.
    
    Change-Id: Id09f67eddab4163ed12a3a3f3a73baf92e2912aa
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/141495
    Tested-by: Jenkins CollaboraOffice <jenkinscollaboraoff...@gmail.com>
    Reviewed-by: Miklos Vajna <vmik...@collabora.com>

diff --git a/include/vcl/filter/pdfdocument.hxx 
b/include/vcl/filter/pdfdocument.hxx
index 7f7cc8dfb641..6ab6adc2468a 100644
--- a/include/vcl/filter/pdfdocument.hxx
+++ b/include/vcl/filter/pdfdocument.hxx
@@ -575,6 +575,8 @@ public:
     //@{
     /// Read elements from the start of the stream till its end.
     bool Read(SvStream& rStream);
+    /// Calls Read() first and if it fails it tries to fixup and then retry.
+    bool ReadWithPossibleFixup(SvStream& rStream);
     void SetSignatureLine(const std::vector<sal_Int8>& rSignatureLine);
     void SetSignaturePage(size_t nPage);
     /// Sign the read document with xCertificate in the edit buffer.
diff --git a/vcl/Library_vcl.mk b/vcl/Library_vcl.mk
index 9c6e7220330e..8c19497ff6cd 100644
--- a/vcl/Library_vcl.mk
+++ b/vcl/Library_vcl.mk
@@ -428,6 +428,7 @@ $(eval $(call gb_Library_add_exception_objects,vcl,\
     vcl/source/filter/GraphicFormatDetector \
     vcl/source/filter/igif/decode \
     vcl/source/filter/igif/gifread \
+    vcl/source/filter/ipdf/pdfcompat \
     vcl/source/filter/ipdf/pdfread \
     vcl/source/filter/ipdf/pdfdocument \
     vcl/source/filter/ixbm/xbmread \
diff --git a/vcl/inc/pdf/ExternalPDFStreams.hxx 
b/vcl/inc/pdf/ExternalPDFStreams.hxx
index 0a1997fe7dc7..71448910ac0f 100644
--- a/vcl/inc/pdf/ExternalPDFStreams.hxx
+++ b/vcl/inc/pdf/ExternalPDFStreams.hxx
@@ -41,7 +41,7 @@ struct VCL_DLLPUBLIC ExternalPDFStream
             aPDFStream.WriteBytes(maData.data(), maData.size());
             aPDFStream.Seek(0);
             auto pPDFDocument = std::make_shared<filter::PDFDocument>();
-            if (!pPDFDocument->Read(aPDFStream))
+            if (!pPDFDocument->ReadWithPossibleFixup(aPDFStream))
             {
                 SAL_WARN("vcl.pdfwriter",
                          "PDFWriterImpl::writeReferenceXObject: reading the 
PDF document failed");
diff --git a/vcl/inc/pdf/pdfcompat.hxx b/vcl/inc/pdf/pdfcompat.hxx
new file mode 100644
index 000000000000..29de3901a436
--- /dev/null
+++ b/vcl/inc/pdf/pdfcompat.hxx
@@ -0,0 +1,45 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#pragma once
+
+#include <config_features.h>
+#include <tools/gen.hxx>
+#include <tools/stream.hxx>
+#include <vcl/vectorgraphicdata.hxx>
+
+namespace vcl::pdf
+{
+#if HAVE_FEATURE_PDFIUM
+
+/// Convert to inch, then assume 96 DPI.
+inline double pointToPixel(const double fPoint, const double fResolutionDPI)
+{
+    return fPoint * fResolutionDPI / 72.;
+}
+
+/// Decide if PDF data is old enough to be compatible.
+bool isCompatible(SvStream& rInStream, sal_uInt64 nPos, sal_uInt64 nSize);
+
+/// Converts to highest supported format version (currently 1.6).
+/// Usually used to deal with missing referenced objects in the
+/// source pdf stream.
+bool convertToHighestSupported(SvStream& rInStream, SvStream& rOutStream);
+
+#endif // HAVE_FEATURE_PDFIUM
+
+/// Takes care of transparently downgrading the version of the PDF stream in
+/// case it's too new for our PDF export.
+bool getCompatibleStream(SvStream& rInStream, SvStream& rOutStream);
+
+VectorGraphicDataArray createVectorGraphicDataArray(SvStream& rStream);
+
+} // end of vcl::filter::ipdf namespace
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/vcl/source/filter/ipdf/pdfcompat.cxx 
b/vcl/source/filter/ipdf/pdfcompat.cxx
new file mode 100644
index 000000000000..3136e2940249
--- /dev/null
+++ b/vcl/source/filter/ipdf/pdfcompat.cxx
@@ -0,0 +1,129 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#include <pdf/pdfcompat.hxx>
+
+#if HAVE_FEATURE_PDFIUM
+#include <fpdfview.h>
+#include <fpdf_edit.h>
+#include <tools/UnitConversion.hxx>
+#endif
+
+#include <vcl/filter/PDFiumLibrary.hxx>
+#include <sal/log.hxx>
+
+namespace vcl::pdf
+{
+#if HAVE_FEATURE_PDFIUM
+
+/// Decide if PDF data is old enough to be compatible.
+bool isCompatible(SvStream& rInStream, sal_uInt64 nPos, sal_uInt64 nSize)
+{
+    if (nSize < 8)
+        return false;
+
+    // %PDF-x.y
+    sal_uInt8 aFirstBytes[8];
+    rInStream.Seek(nPos);
+    sal_uLong nRead = rInStream.ReadBytes(aFirstBytes, 8);
+    if (nRead < 8)
+        return false;
+
+    if (aFirstBytes[0] != '%' || aFirstBytes[1] != 'P' || aFirstBytes[2] != 'D'
+        || aFirstBytes[3] != 'F' || aFirstBytes[4] != '-')
+        return false;
+
+    sal_Int32 nMajor = OString(aFirstBytes[5]).toInt32();
+    sal_Int32 nMinor = OString(aFirstBytes[7]).toInt32();
+    return !(nMajor > 1 || (nMajor == 1 && nMinor > 6));
+}
+
+/// Converts to highest supported format version (1.6).
+/// Usually used to deal with missing referenced objects in source
+/// pdf stream.
+bool convertToHighestSupported(SvStream& rInStream, SvStream& rOutStream)
+{
+    sal_uInt64 nPos = STREAM_SEEK_TO_BEGIN;
+    sal_uInt64 nSize = STREAM_SEEK_TO_END;
+    rInStream.Seek(nPos);
+    // Convert to PDF-1.6.
+    auto pPdfium = vcl::pdf::PDFiumLibrary::get();
+    if (!pPdfium)
+        return false;
+
+    // Read input into a buffer.
+    SvMemoryStream aInBuffer;
+    aInBuffer.WriteStream(rInStream, nSize);
+
+    SvMemoryStream aSaved;
+    {
+        // Load the buffer using pdfium.
+        std::unique_ptr<vcl::pdf::PDFiumDocument> pPdfDocument
+            = pPdfium->openDocument(aInBuffer.GetData(), aInBuffer.GetSize());
+        if (!pPdfDocument)
+            return false;
+
+        // 16 means PDF-1.6.
+        if (!pPdfDocument->saveWithVersion(aSaved, 16))
+            return false;
+    }
+
+    aSaved.Seek(STREAM_SEEK_TO_BEGIN);
+    rOutStream.WriteStream(aSaved);
+
+    return rOutStream.good();
+}
+
+/// Takes care of transparently downgrading the version of the PDF stream in
+/// case it's too new for our PDF export.
+bool getCompatibleStream(SvStream& rInStream, SvStream& rOutStream)
+{
+    sal_uInt64 nPos = STREAM_SEEK_TO_BEGIN;
+    sal_uInt64 nSize = STREAM_SEEK_TO_END;
+    bool bCompatible = isCompatible(rInStream, nPos, nSize);
+    rInStream.Seek(nPos);
+    if (bCompatible)
+        // Not converting.
+        rOutStream.WriteStream(rInStream, nSize);
+    else
+        convertToHighestSupported(rInStream, rOutStream);
+
+    return rOutStream.good();
+}
+#else
+bool getCompatibleStream(SvStream& rInStream, SvStream& rOutStream)
+{
+    rInStream.Seek(STREAM_SEEK_TO_BEGIN);
+    rOutStream.WriteStream(rInStream, STREAM_SEEK_TO_END);
+    return rOutStream.good();
+}
+#endif // HAVE_FEATURE_PDFIUM
+
+VectorGraphicDataArray createVectorGraphicDataArray(SvStream& rStream)
+{
+    // Save the original PDF stream for later use.
+    SvMemoryStream aMemoryStream;
+    if (!getCompatibleStream(rStream, aMemoryStream))
+        return VectorGraphicDataArray();
+
+    const sal_uInt32 nStreamLength = aMemoryStream.TellEnd();
+
+    VectorGraphicDataArray aPdfData(nStreamLength);
+
+    aMemoryStream.Seek(STREAM_SEEK_TO_BEGIN);
+    aMemoryStream.ReadBytes(aPdfData.begin(), nStreamLength);
+    if (aMemoryStream.GetError())
+        return VectorGraphicDataArray();
+
+    return aPdfData;
+}
+
+} // end vcl::filter::ipdf namespace
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/vcl/source/filter/ipdf/pdfdocument.cxx 
b/vcl/source/filter/ipdf/pdfdocument.cxx
index 8a3ee8924d04..7569deede0f5 100644
--- a/vcl/source/filter/ipdf/pdfdocument.cxx
+++ b/vcl/source/filter/ipdf/pdfdocument.cxx
@@ -8,6 +8,8 @@
  */
 
 #include <vcl/filter/pdfdocument.hxx>
+#include <pdf/pdfcompat.hxx>
+#include <config_features.h>
 
 #include <map>
 #include <memory>
@@ -29,6 +31,7 @@
 #include <o3tl/safeint.hxx>
 
 #include <pdf/objectcopier.hxx>
+#include <vcl/pdfread.hxx>
 
 using namespace com::sun::star;
 
@@ -1350,6 +1353,22 @@ void PDFDocument::SetIDObject(size_t nID, 
PDFObjectElement* pObject)
     m_aIDObjects[nID] = pObject;
 }
 
+bool PDFDocument::ReadWithPossibleFixup(SvStream& rStream)
+{
+#if HAVE_FEATURE_PDFIUM
+    if (Read(rStream))
+        return true;
+
+    // Read failed, try a roundtrip through pdfium and then retry.
+    rStream.Seek(0);
+    SvMemoryStream aStandardizedStream;
+    vcl::pdf::convertToHighestSupported(rStream, aStandardizedStream);
+    return Read(aStandardizedStream);
+#else
+    return Read(rStream);
+#endif
+}
+
 bool PDFDocument::Read(SvStream& rStream)
 {
     // Check file magic.
diff --git a/vcl/source/filter/ipdf/pdfread.cxx 
b/vcl/source/filter/ipdf/pdfread.cxx
index cda9f9d2304b..05b9a966fd02 100644
--- a/vcl/source/filter/ipdf/pdfread.cxx
+++ b/vcl/source/filter/ipdf/pdfread.cxx
@@ -8,6 +8,7 @@
  */
 
 #include <vcl/pdfread.hxx>
+#include <pdf/pdfcompat.hxx>
 
 #include <config_features.h>
 
@@ -28,109 +29,6 @@
 
 using namespace com::sun::star;
 
-namespace
-{
-#if HAVE_FEATURE_PDFIUM
-
-/// Convert to inch, then assume 96 DPI.
-inline double pointToPixel(const double fPoint, const double fResolutionDPI)
-{
-    return fPoint * fResolutionDPI / 72.;
-}
-
-/// Decide if PDF data is old enough to be compatible.
-bool isCompatible(SvStream& rInStream, sal_uInt64 nPos, sal_uInt64 nSize)
-{
-    if (nSize < 8)
-        return false;
-
-    // %PDF-x.y
-    sal_uInt8 aFirstBytes[8];
-    rInStream.Seek(nPos);
-    sal_uLong nRead = rInStream.ReadBytes(aFirstBytes, 8);
-    if (nRead < 8)
-        return false;
-
-    if (aFirstBytes[0] != '%' || aFirstBytes[1] != 'P' || aFirstBytes[2] != 'D'
-        || aFirstBytes[3] != 'F' || aFirstBytes[4] != '-')
-        return false;
-
-    sal_Int32 nMajor = OString(aFirstBytes[5]).toInt32();
-    sal_Int32 nMinor = OString(aFirstBytes[7]).toInt32();
-    return !(nMajor > 1 || (nMajor == 1 && nMinor > 6));
-}
-
-/// Takes care of transparently downgrading the version of the PDF stream in
-/// case it's too new for our PDF export.
-bool getCompatibleStream(SvStream& rInStream, SvStream& rOutStream)
-{
-    sal_uInt64 nPos = STREAM_SEEK_TO_BEGIN;
-    sal_uInt64 nSize = STREAM_SEEK_TO_END;
-    bool bCompatible = isCompatible(rInStream, nPos, nSize);
-    rInStream.Seek(nPos);
-    if (bCompatible)
-        // Not converting.
-        rOutStream.WriteStream(rInStream, nSize);
-    else
-    {
-        // Downconvert to PDF-1.6.
-        auto pPdfium = vcl::pdf::PDFiumLibrary::get();
-        if (!pPdfium)
-            return false;
-
-        // Read input into a buffer.
-        SvMemoryStream aInBuffer;
-        aInBuffer.WriteStream(rInStream, nSize);
-
-        SvMemoryStream aSaved;
-        {
-            // Load the buffer using pdfium.
-            std::unique_ptr<vcl::pdf::PDFiumDocument> pPdfDocument
-                = pPdfium->openDocument(aInBuffer.GetData(), 
aInBuffer.GetSize());
-            if (!pPdfDocument)
-                return false;
-
-            // 16 means PDF-1.6.
-            if (!pPdfDocument->saveWithVersion(aSaved, 16))
-                return false;
-        }
-
-        aSaved.Seek(STREAM_SEEK_TO_BEGIN);
-        rOutStream.WriteStream(aSaved);
-    }
-
-    return rOutStream.good();
-}
-#else
-bool getCompatibleStream(SvStream& rInStream, SvStream& rOutStream)
-{
-    rInStream.Seek(STREAM_SEEK_TO_BEGIN);
-    rOutStream.WriteStream(rInStream, STREAM_SEEK_TO_END);
-    return rOutStream.good();
-}
-#endif // HAVE_FEATURE_PDFIUM
-
-VectorGraphicDataArray createVectorGraphicDataArray(SvStream& rStream)
-{
-    // Save the original PDF stream for later use.
-    SvMemoryStream aMemoryStream;
-    if (!getCompatibleStream(rStream, aMemoryStream))
-        return VectorGraphicDataArray();
-
-    const sal_uInt32 nStreamLength = aMemoryStream.TellEnd();
-
-    VectorGraphicDataArray aPdfData(nStreamLength);
-
-    aMemoryStream.Seek(STREAM_SEEK_TO_BEGIN);
-    aMemoryStream.ReadBytes(aPdfData.begin(), nStreamLength);
-    if (aMemoryStream.GetError())
-        return VectorGraphicDataArray();
-
-    return aPdfData;
-}
-
-} // end anonymous namespace
-
 namespace vcl
 {
 size_t RenderPDFBitmaps(const void* pBuffer, int nSize, std::vector<BitmapEx>& 
rBitmaps,
@@ -168,8 +66,8 @@ size_t RenderPDFBitmaps(const void* pBuffer, int nSize, 
std::vector<BitmapEx>& r
         }
 
         // Returned unit is points, convert that to pixel.
-        const size_t nPageWidth = pointToPixel(nPageWidthPoints, 
fResolutionDPI);
-        const size_t nPageHeight = pointToPixel(nPageHeightPoints, 
fResolutionDPI);
+        const size_t nPageWidth = vcl::pdf::pointToPixel(nPageWidthPoints, 
fResolutionDPI);
+        const size_t nPageHeight = vcl::pdf::pointToPixel(nPageHeightPoints, 
fResolutionDPI);
         std::unique_ptr<vcl::pdf::PDFiumBitmap> pPdfBitmap
             = pPdfium->createBitmap(nPageWidth, nPageHeight, /*alpha=*/1);
         if (!pPdfBitmap)
@@ -236,7 +134,7 @@ size_t RenderPDFBitmaps(const void* pBuffer, int nSize, 
std::vector<BitmapEx>& r
 
 bool ImportPDF(SvStream& rStream, Graphic& rGraphic)
 {
-    VectorGraphicDataArray aPdfDataArray = 
createVectorGraphicDataArray(rStream);
+    VectorGraphicDataArray aPdfDataArray = 
vcl::pdf::createVectorGraphicDataArray(rStream);
     if (!aPdfDataArray.hasElements())
     {
         SAL_WARN("vcl.filter", "ImportPDF: empty PDF data array");
@@ -437,7 +335,7 @@ size_t ImportPDFUnloaded(const OUString& rURL, 
std::vector<PDFGraphicResult>& rG
         ::utl::UcbStreamHelper::CreateStream(rURL, StreamMode::READ | 
StreamMode::SHARE_DENYNONE));
 
     // Save the original PDF stream for later use.
-    VectorGraphicDataArray aPdfDataArray = 
createVectorGraphicDataArray(*xStream);
+    VectorGraphicDataArray aPdfDataArray = 
vcl::pdf::createVectorGraphicDataArray(*xStream);
     if (!aPdfDataArray.hasElements())
         return 0;
 

Reply via email to