include/vcl/filter/pdfdocument.hxx     |    2 
 vcl/Library_vcl.mk                     |    1 
 vcl/inc/pdf/ExternalPDFStreams.hxx     |    2 
 vcl/inc/pdf/pdfcompat.hxx              |   42 ++++++++++++
 vcl/source/filter/ipdf/pdfcompat.cxx   |  113 +++++++++++++++++++++++++++++++++
 vcl/source/filter/ipdf/pdfdocument.cxx |   14 ++++
 vcl/source/filter/ipdf/pdfread.cxx     |  104 +-----------------------------
 7 files changed, 178 insertions(+), 100 deletions(-)

New commits:
commit 03a0b41ba21f7d546160c819e088e0b0023b68bb
Author:     Dennis Francis <dennis.fran...@collabora.com>
AuthorDate: Tue Oct 18 16:14:37 2022 +0530
Commit:     Andras Timar <andras.ti...@collabora.com>
CommitDate: Wed Oct 26 20:38:17 2022 +0200

    vcl: re-exporting broken pdfs -> empty pages
    
    Certain pdf documents when loaded in LO_IMPORT_USE_PDFIUM=1 mode even if
    pdf-version < v1.6 sometimes has missing objects referred by other
    objects for determing its stream length for instance.  As a result
    parsing fails and results in a pdf with empty pages.  A round trip
    through pdfium and exporting to v1.6 seems to cure the issue. Possibly
    it does some repairing work to determine the length of the stream in a
    independent pass through the file.
    
    Change-Id: Id09f67eddab4163ed12a3a3f3a73baf92e2912aa
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/141854
    Tested-by: Jenkins CollaboraOffice <jenkinscollaboraoff...@gmail.com>
    Reviewed-by: Andras Timar <andras.ti...@collabora.com>

diff --git a/include/vcl/filter/pdfdocument.hxx 
b/include/vcl/filter/pdfdocument.hxx
index dd03029227d2..fbe0be89cdc6 100644
--- a/include/vcl/filter/pdfdocument.hxx
+++ b/include/vcl/filter/pdfdocument.hxx
@@ -576,6 +576,8 @@ public:
     //@{
     /// Read elements from the start of the stream till its end.
     bool Read(SvStream& rStream);
+    /// Calls Read() first and if it fails it tries to fixup and then retry.
+    bool ReadWithPossibleFixup(SvStream& rStream);
     void SetSignatureLine(std::vector<sal_Int8>&& rSignatureLine);
     void SetSignaturePage(size_t nPage);
     /// Sign the read document with xCertificate in the edit buffer.
diff --git a/vcl/Library_vcl.mk b/vcl/Library_vcl.mk
index b0a6ee533133..25f6a0ef9562 100644
--- a/vcl/Library_vcl.mk
+++ b/vcl/Library_vcl.mk
@@ -450,6 +450,7 @@ $(eval $(call gb_Library_add_exception_objects,vcl,\
     vcl/source/filter/ipict/ipict \
     vcl/source/filter/ipsd/ipsd \
     vcl/source/filter/ipict/shape \
+    vcl/source/filter/ipdf/pdfcompat \
     vcl/source/filter/ipdf/pdfread \
     vcl/source/filter/ipdf/pdfdocument \
     vcl/source/filter/iras/iras \
diff --git a/vcl/inc/pdf/ExternalPDFStreams.hxx 
b/vcl/inc/pdf/ExternalPDFStreams.hxx
index 7840217630c8..45b15f7a74bc 100644
--- a/vcl/inc/pdf/ExternalPDFStreams.hxx
+++ b/vcl/inc/pdf/ExternalPDFStreams.hxx
@@ -42,7 +42,7 @@ struct VCL_DLLPUBLIC ExternalPDFStream
             aPDFStream.WriteBytes(maDataContainer.getData(), 
maDataContainer.getSize());
             aPDFStream.Seek(0);
             auto pPDFDocument = std::make_shared<filter::PDFDocument>();
-            if (!pPDFDocument->Read(aPDFStream))
+            if (!pPDFDocument->ReadWithPossibleFixup(aPDFStream))
             {
                 SAL_WARN("vcl.pdfwriter",
                          "PDFWriterImpl::writeReferenceXObject: reading the 
PDF document failed");
diff --git a/vcl/inc/pdf/pdfcompat.hxx b/vcl/inc/pdf/pdfcompat.hxx
new file mode 100644
index 000000000000..8f629b3bc8ee
--- /dev/null
+++ b/vcl/inc/pdf/pdfcompat.hxx
@@ -0,0 +1,42 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#pragma once
+
+#include <config_features.h>
+#include <tools/gen.hxx>
+#include <tools/stream.hxx>
+#include <tools/UnitConversion.hxx>
+#include <vcl/graph.hxx>
+
+namespace vcl::pdf
+{
+/// Convert to inch, then assume 96 DPI.
+inline double pointToPixel(const double fPoint, const double fResolutionDPI)
+{
+    return o3tl::convert(fPoint, o3tl::Length::pt, o3tl::Length::in) * 
fResolutionDPI;
+}
+
+/// Decide if PDF data is old enough to be compatible.
+bool isCompatible(SvStream& rInStream, sal_uInt64 nPos, sal_uInt64 nSize);
+
+/// Converts to highest supported format version (currently 1.6).
+/// Usually used to deal with missing referenced objects in the
+/// source pdf stream.
+bool convertToHighestSupported(SvStream& rInStream, SvStream& rOutStream);
+
+/// Takes care of transparently downgrading the version of the PDF stream in
+/// case it's too new for our PDF export.
+bool getCompatibleStream(SvStream& rInStream, SvStream& rOutStream);
+
+BinaryDataContainer createBinaryDataContainer(SvStream& rStream);
+
+} // end of vcl::filter::ipdf namespace
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/vcl/source/filter/ipdf/pdfcompat.cxx 
b/vcl/source/filter/ipdf/pdfcompat.cxx
new file mode 100644
index 000000000000..52be1f3b2c07
--- /dev/null
+++ b/vcl/source/filter/ipdf/pdfcompat.cxx
@@ -0,0 +1,113 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#include <pdf/pdfcompat.hxx>
+
+#include <vcl/filter/PDFiumLibrary.hxx>
+#include <sal/log.hxx>
+
+namespace vcl::pdf
+{
+/// Decide if PDF data is old enough to be compatible.
+bool isCompatible(SvStream& rInStream, sal_uInt64 nPos, sal_uInt64 nSize)
+{
+    if (nSize < 8)
+        return false;
+
+    // %PDF-x.y
+    sal_uInt8 aFirstBytes[8];
+    rInStream.Seek(nPos);
+    sal_uLong nRead = rInStream.ReadBytes(aFirstBytes, 8);
+    if (nRead < 8)
+        return false;
+
+    if (aFirstBytes[0] != '%' || aFirstBytes[1] != 'P' || aFirstBytes[2] != 'D'
+        || aFirstBytes[3] != 'F' || aFirstBytes[4] != '-')
+        return false;
+
+    sal_Int32 nMajor = OString(char(aFirstBytes[5])).toInt32();
+    sal_Int32 nMinor = OString(char(aFirstBytes[7])).toInt32();
+    return !(nMajor > 1 || (nMajor == 1 && nMinor > 6));
+}
+
+/// Converts to highest supported format version (1.6).
+/// Usually used to deal with missing referenced objects in source
+/// pdf stream.
+bool convertToHighestSupported(SvStream& rInStream, SvStream& rOutStream)
+{
+    sal_uInt64 nPos = STREAM_SEEK_TO_BEGIN;
+    sal_uInt64 nSize = STREAM_SEEK_TO_END;
+    rInStream.Seek(nPos);
+    // Convert to PDF-1.6.
+    auto pPdfium = vcl::pdf::PDFiumLibrary::get();
+    if (!pPdfium)
+        return false;
+
+    // Read input into a buffer.
+    SvMemoryStream aInBuffer;
+    aInBuffer.WriteStream(rInStream, nSize);
+
+    SvMemoryStream aSaved;
+    {
+        // Load the buffer using pdfium.
+        std::unique_ptr<vcl::pdf::PDFiumDocument> pPdfDocument
+            = pPdfium->openDocument(aInBuffer.GetData(), aInBuffer.GetSize(), 
OString());
+        if (!pPdfDocument)
+            return false;
+
+        // 16 means PDF-1.6.
+        if (!pPdfDocument->saveWithVersion(aSaved, 16))
+            return false;
+    }
+
+    aSaved.Seek(STREAM_SEEK_TO_BEGIN);
+    rOutStream.WriteStream(aSaved);
+
+    return rOutStream.good();
+}
+
+/// Takes care of transparently downgrading the version of the PDF stream in
+/// case it's too new for our PDF export.
+bool getCompatibleStream(SvStream& rInStream, SvStream& rOutStream)
+{
+    sal_uInt64 nPos = STREAM_SEEK_TO_BEGIN;
+    sal_uInt64 nSize = STREAM_SEEK_TO_END;
+    bool bCompatible = isCompatible(rInStream, nPos, nSize);
+    rInStream.Seek(nPos);
+    if (bCompatible)
+        // Not converting.
+        rOutStream.WriteStream(rInStream, nSize);
+    else
+        convertToHighestSupported(rInStream, rOutStream);
+
+    return rOutStream.good();
+}
+
+BinaryDataContainer createBinaryDataContainer(SvStream& rStream)
+{
+    // Save the original PDF stream for later use.
+    SvMemoryStream aMemoryStream;
+    if (!getCompatibleStream(rStream, aMemoryStream))
+        return {};
+
+    const sal_uInt32 nStreamLength = aMemoryStream.TellEnd();
+
+    auto aPdfData = std::make_unique<std::vector<sal_uInt8>>(nStreamLength);
+
+    aMemoryStream.Seek(STREAM_SEEK_TO_BEGIN);
+    aMemoryStream.ReadBytes(aPdfData->data(), aPdfData->size());
+    if (aMemoryStream.GetError())
+        return {};
+
+    return { std::move(aPdfData) };
+}
+
+} // end vcl::filter::ipdf namespace
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/vcl/source/filter/ipdf/pdfdocument.cxx 
b/vcl/source/filter/ipdf/pdfdocument.cxx
index 4573d414cfc6..4430c7217fc7 100644
--- a/vcl/source/filter/ipdf/pdfdocument.cxx
+++ b/vcl/source/filter/ipdf/pdfdocument.cxx
@@ -8,6 +8,8 @@
  */
 
 #include <vcl/filter/pdfdocument.hxx>
+#include <pdf/pdfcompat.hxx>
+#include <config_features.h>
 
 #include <map>
 #include <memory>
@@ -1348,6 +1350,18 @@ void PDFDocument::SetIDObject(size_t nID, 
PDFObjectElement* pObject)
     m_aIDObjects[nID] = pObject;
 }
 
+bool PDFDocument::ReadWithPossibleFixup(SvStream& rStream)
+{
+    if (Read(rStream))
+        return true;
+
+    // Read failed, try a roundtrip through pdfium and then retry.
+    rStream.Seek(0);
+    SvMemoryStream aStandardizedStream;
+    vcl::pdf::convertToHighestSupported(rStream, aStandardizedStream);
+    return Read(aStandardizedStream);
+}
+
 bool PDFDocument::Read(SvStream& rStream)
 {
     // Check file magic.
diff --git a/vcl/source/filter/ipdf/pdfread.cxx 
b/vcl/source/filter/ipdf/pdfread.cxx
index 7a6209c9aa31..392e76ac0cd1 100644
--- a/vcl/source/filter/ipdf/pdfread.cxx
+++ b/vcl/source/filter/ipdf/pdfread.cxx
@@ -8,8 +8,7 @@
  */
 
 #include <vcl/pdfread.hxx>
-
-#include <tools/UnitConversion.hxx>
+#include <pdf/pdfcompat.hxx>
 
 #include <pdf/PdfConfig.hxx>
 #include <vcl/graph.hxx>
@@ -22,99 +21,6 @@
 
 using namespace com::sun::star;
 
-namespace
-{
-/// Convert to inch, then assume 96 DPI.
-inline double pointToPixel(const double fPoint, const double fResolutionDPI)
-{
-    return o3tl::convert(fPoint, o3tl::Length::pt, o3tl::Length::in) * 
fResolutionDPI;
-}
-
-/// Decide if PDF data is old enough to be compatible.
-bool isCompatible(SvStream& rInStream, sal_uInt64 nPos, sal_uInt64 nSize)
-{
-    if (nSize < 8)
-        return false;
-
-    // %PDF-x.y
-    sal_uInt8 aFirstBytes[8];
-    rInStream.Seek(nPos);
-    sal_uLong nRead = rInStream.ReadBytes(aFirstBytes, 8);
-    if (nRead < 8)
-        return false;
-
-    if (aFirstBytes[0] != '%' || aFirstBytes[1] != 'P' || aFirstBytes[2] != 'D'
-        || aFirstBytes[3] != 'F' || aFirstBytes[4] != '-')
-        return false;
-
-    sal_Int32 nMajor = OString(char(aFirstBytes[5])).toInt32();
-    sal_Int32 nMinor = OString(char(aFirstBytes[7])).toInt32();
-    return !(nMajor > 1 || (nMajor == 1 && nMinor > 6));
-}
-
-/// Takes care of transparently downgrading the version of the PDF stream in
-/// case it's too new for our PDF export.
-bool getCompatibleStream(SvStream& rInStream, SvStream& rOutStream)
-{
-    sal_uInt64 nPos = STREAM_SEEK_TO_BEGIN;
-    sal_uInt64 nSize = STREAM_SEEK_TO_END;
-    bool bCompatible = isCompatible(rInStream, nPos, nSize);
-    rInStream.Seek(nPos);
-    if (bCompatible)
-        // Not converting.
-        rOutStream.WriteStream(rInStream, nSize);
-    else
-    {
-        // Downconvert to PDF-1.6.
-        auto pPdfium = vcl::pdf::PDFiumLibrary::get();
-        if (!pPdfium)
-            return false;
-
-        // Read input into a buffer.
-        SvMemoryStream aInBuffer;
-        aInBuffer.WriteStream(rInStream, nSize);
-
-        SvMemoryStream aSaved;
-        {
-            // Load the buffer using pdfium.
-            std::unique_ptr<vcl::pdf::PDFiumDocument> pPdfDocument
-                = pPdfium->openDocument(aInBuffer.GetData(), 
aInBuffer.GetSize(), OString());
-            if (!pPdfDocument)
-                return false;
-
-            // 16 means PDF-1.6.
-            if (!pPdfDocument->saveWithVersion(aSaved, 16))
-                return false;
-        }
-
-        aSaved.Seek(STREAM_SEEK_TO_BEGIN);
-        rOutStream.WriteStream(aSaved);
-    }
-
-    return rOutStream.good();
-}
-
-BinaryDataContainer createBinaryDataContainer(SvStream& rStream)
-{
-    // Save the original PDF stream for later use.
-    SvMemoryStream aMemoryStream;
-    if (!getCompatibleStream(rStream, aMemoryStream))
-        return {};
-
-    const sal_uInt32 nStreamLength = aMemoryStream.TellEnd();
-
-    auto aPdfData = std::make_unique<std::vector<sal_uInt8>>(nStreamLength);
-
-    aMemoryStream.Seek(STREAM_SEEK_TO_BEGIN);
-    aMemoryStream.ReadBytes(aPdfData->data(), aPdfData->size());
-    if (aMemoryStream.GetError())
-        return {};
-
-    return { std::move(aPdfData) };
-}
-
-} // end anonymous namespace
-
 namespace vcl
 {
 size_t RenderPDFBitmaps(const void* pBuffer, int nSize, std::vector<BitmapEx>& 
rBitmaps,
@@ -157,8 +63,8 @@ size_t RenderPDFBitmaps(const void* pBuffer, int nSize, 
std::vector<BitmapEx>& r
         }
 
         // Returned unit is points, convert that to pixel.
-        const size_t nPageWidth = pointToPixel(nPageWidthPoints, 
fResolutionDPI);
-        const size_t nPageHeight = pointToPixel(nPageHeightPoints, 
fResolutionDPI);
+        const size_t nPageWidth = vcl::pdf::pointToPixel(nPageWidthPoints, 
fResolutionDPI);
+        const size_t nPageHeight = vcl::pdf::pointToPixel(nPageHeightPoints, 
fResolutionDPI);
         std::unique_ptr<vcl::pdf::PDFiumBitmap> pPdfBitmap
             = pPdfium->createBitmap(nPageWidth, nPageHeight, /*nAlpha=*/1);
         if (!pPdfBitmap)
@@ -217,7 +123,7 @@ size_t RenderPDFBitmaps(const void* pBuffer, int nSize, 
std::vector<BitmapEx>& r
 bool importPdfVectorGraphicData(SvStream& rStream,
                                 std::shared_ptr<VectorGraphicData>& 
rVectorGraphicData)
 {
-    BinaryDataContainer aDataContainer = createBinaryDataContainer(rStream);
+    BinaryDataContainer aDataContainer = 
vcl::pdf::createBinaryDataContainer(rStream);
     if (aDataContainer.isEmpty())
     {
         SAL_WARN("vcl.filter", "ImportPDF: empty PDF data array");
@@ -428,7 +334,7 @@ size_t ImportPDFUnloaded(const OUString& rURL, 
std::vector<PDFGraphicResult>& rG
         ::utl::UcbStreamHelper::CreateStream(rURL, StreamMode::READ | 
StreamMode::SHARE_DENYNONE));
 
     // Save the original PDF stream for later use.
-    BinaryDataContainer aDataContainer = createBinaryDataContainer(*xStream);
+    BinaryDataContainer aDataContainer = 
vcl::pdf::createBinaryDataContainer(*xStream);
     if (aDataContainer.isEmpty())
         return 0;
 

Reply via email to