external/boost/UnpackedTarball_boost.mk                                        
           |    2 
 external/boost/boost.file_iterator.sharing_win.patch                           
           |  158 ++++++++++
 filter/CppunitTest_filter_textfilterdetect.mk                                  
           |    4 
 
filter/qa/unit/data/hybrid_calc_\320\260\320\261\320\262_\316\261\316\262\316\263.pdf"
    |binary
 
filter/qa/unit/data/hybrid_impress_\320\260\320\261\320\262_\316\261\316\262\316\263.pdf"
 |binary
 
filter/qa/unit/data/hybrid_writer_\320\260\320\261\320\262_\316\261\316\262\316\263.pdf"
  |binary
 filter/qa/unit/textfilterdetect.cxx                                            
           |   41 ++
 include/o3tl/char16_t2wchar_t.hxx                                              
           |    1 
 sdext/source/pdfimport/filterdet.cxx                                           
           |    4 
 sdext/source/pdfimport/inc/pdfparse.hxx                                        
           |    5 
 sdext/source/pdfimport/pdfparse/pdfparse.cxx                                   
           |   11 
 sdext/source/pdfimport/test/pdfunzip.cxx                                       
           |    3 
 sdext/source/pdfimport/wrapper/wrapper.cxx                                     
           |    3 
 13 files changed, 216 insertions(+), 16 deletions(-)

New commits:
commit 873c56df149bb463e5267162e43c755ad16779b8
Author:     Mike Kaganski <mike.kagan...@collabora.com>
AuthorDate: Fri Dec 1 16:48:44 2023 +0300
Commit:     Thorsten Behrens <thorsten.behr...@allotropia.de>
CommitDate: Mon Jan 1 23:28:09 2024 +0100

    tdf#158442: fix opening hybrid PDFs on Windows
    
    Commit 046e9545956d8ad1d69345d6b4a4c0a33714d179 (Try to revert to use
    of file_iterator from boost on Windows, 2023-10-31) had introduced a
    problem that pdfparse::PDFReader::read couldn't create file_iterator
    for files already opened with write access: mmap_file_iterator ctor
    on Windows used single FILE_SHARE_READ as dwSharedMode parameter for
    CreateFileA WinAPI; and that failed, when the file was already opened
    using GENERIC_WRITE in dwDesiredAccess - which happens when opening
    stream in TypeDetection::impl_detectTypeFlatAndDeep.
    
    Fix this by patching boosts' mmap_file_iterator constructor to use
    FILE_SHARE_READ | FILE_SHARE_WRITE, like we do in osl_openFile.
    
    But there was a pre-existing problem of using char-based CreateFileA
    API, which disallows opening any files with names not representable
    in current Windows codepage. Such hybrid PDF files would still fail
    creation of the file_iterator, and open as PDF.
    
    Fix that by further patching boost to have wstring-based constructors
    for file_iterator and mmap_file_iterator on Windows, which would call
    CreateFileW.
    
    Change-Id: Ib190bc090636159ade390b3dd120957d06d7b89b
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/160218
    Tested-by: Jenkins
    Reviewed-by: Mike Kaganski <mike.kagan...@collabora.com>
    Signed-off-by: Xisco Fauli <xiscofa...@libreoffice.org>
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/160306
    Reviewed-by: Thorsten Behrens <thorsten.behr...@allotropia.de>

diff --git a/external/boost/UnpackedTarball_boost.mk 
b/external/boost/UnpackedTarball_boost.mk
index acdc5d331c76..d8bd131ac8df 100644
--- a/external/boost/UnpackedTarball_boost.mk
+++ b/external/boost/UnpackedTarball_boost.mk
@@ -35,6 +35,8 @@ boost_patches += boost-ios.patch.0
 # violations":
 boost_patches += 
0001-Avoid-boost-phoenix-placeholders-uarg1.10-ODR-violat.patch.2
 
+boost_patches += boost.file_iterator.sharing_win.patch
+
 $(eval $(call gb_UnpackedTarball_UnpackedTarball,boost))
 
 $(eval $(call gb_UnpackedTarball_set_tarball,boost,$(BOOST_TARBALL)))
diff --git a/external/boost/boost.file_iterator.sharing_win.patch 
b/external/boost/boost.file_iterator.sharing_win.patch
new file mode 100644
index 000000000000..b3b8bea3f3ff
--- /dev/null
+++ b/external/boost/boost.file_iterator.sharing_win.patch
@@ -0,0 +1,158 @@
+--- 
foo/misc/boost/boost/spirit/home/classic/iterator/impl/file_iterator.ipp.orig
++++ foo/misc/boost/boost/spirit/home/classic/iterator/impl/file_iterator.ipp
+@@ -181,67 +181,28 @@ public:
+     {}
+ 
+     explicit mmap_file_iterator(std::string const& fileName)
+-      : m_filesize(0), m_curChar(0)
+-    {
+-        HANDLE hFile = ::CreateFileA(
++      : mmap_file_iterator(::CreateFileA(
+             fileName.c_str(),
+             GENERIC_READ,
+-            FILE_SHARE_READ,
++            FILE_SHARE_READ | FILE_SHARE_WRITE,
+             NULL,
+             OPEN_EXISTING,
+             FILE_FLAG_SEQUENTIAL_SCAN,
+             NULL
+-        );
+-
+-        if (hFile == INVALID_HANDLE_VALUE)
+-            return;
+-
+-        // Store the size of the file, it's used to construct
+-        //  the end iterator
+-        m_filesize = ::GetFileSize(hFile, NULL);
++        ))
++    {}
+ 
+-        HANDLE hMap = ::CreateFileMapping(
+-            hFile,
++    explicit mmap_file_iterator(std::wstring const& fileName)
++      : mmap_file_iterator(::CreateFileW(
++            fileName.c_str(),
++            GENERIC_READ,
++            FILE_SHARE_READ | FILE_SHARE_WRITE,
+             NULL,
+-            PAGE_READONLY,
+-            0, 0,
++            OPEN_EXISTING,
++            FILE_FLAG_SEQUENTIAL_SCAN,
+             NULL
+-        );
+-
+-        if (hMap == NULL)
+-        {
+-            ::CloseHandle(hFile);
+-            return;
+-        }
+-
+-        LPVOID pMem = ::MapViewOfFile(
+-            hMap,
+-            FILE_MAP_READ,
+-            0, 0, 0
+-        );
+-
+-        if (pMem == NULL)
+-        {
+-            ::CloseHandle(hMap);
+-            ::CloseHandle(hFile);
+-            return;
+-        }
+-
+-        // We hold both the file handle and the memory pointer.
+-        // We can close the hMap handle now because Windows holds internally
+-        //  a reference to it since there is a view mapped.
+-        ::CloseHandle(hMap);
+-
+-        // It seems like we can close the file handle as well (because
+-        //  a reference is hold by the filemap object).
+-        ::CloseHandle(hFile);
+-
+-        // Store the handles inside the shared_ptr (with the custom 
destructors)
+-        m_mem.reset(static_cast<CharT*>(pMem), ::UnmapViewOfFile);
+-
+-        // Start of the file
+-        m_curChar = m_mem.get();
+-    }
++        ))
++    {}
+ 
+     mmap_file_iterator(const mmap_file_iterator& iter)
+     { *this = iter; }
+@@ -290,6 +251,59 @@ private:
+     boost::shared_ptr<CharT> m_mem;
+     std::size_t m_filesize;
+     CharT* m_curChar;
++    
++    explicit mmap_file_iterator(HANDLE hFile)
++      : m_filesize(0), m_curChar(0)
++    {
++        if (hFile == INVALID_HANDLE_VALUE)
++            return;
++
++        // Store the size of the file, it's used to construct
++        //  the end iterator
++        m_filesize = ::GetFileSize(hFile, NULL);
++
++        HANDLE hMap = ::CreateFileMapping(
++            hFile,
++            NULL,
++            PAGE_READONLY,
++            0, 0,
++            NULL
++        );
++
++        if (hMap == NULL)
++        {
++            ::CloseHandle(hFile);
++            return;
++        }
++
++        LPVOID pMem = ::MapViewOfFile(
++            hMap,
++            FILE_MAP_READ,
++            0, 0, 0
++        );
++
++        if (pMem == NULL)
++        {
++            ::CloseHandle(hMap);
++            ::CloseHandle(hFile);
++            return;
++        }
++
++        // We hold both the file handle and the memory pointer.
++        // We can close the hMap handle now because Windows holds internally
++        //  a reference to it since there is a view mapped.
++        ::CloseHandle(hMap);
++
++        // It seems like we can close the file handle as well (because
++        //  a reference is hold by the filemap object).
++        ::CloseHandle(hFile);
++
++        // Store the handles inside the shared_ptr (with the custom 
destructors)
++        m_mem.reset(static_cast<CharT*>(pMem), ::UnmapViewOfFile);
++
++        // Start of the file
++        m_curChar = m_mem.get();
++    }
+ };
+ 
+ #endif // BOOST_SPIRIT_FILEITERATOR_WINDOWS
+--- foo/misc/boost/boost/spirit/home/classic/iterator/file_iterator.hpp.orig
++++ foo/misc/boost/boost/spirit/home/classic/iterator/file_iterator.hpp
+@@ -170,6 +170,12 @@ public:
+     :   base_t(adapted_t(fileName))
+     {}
+ 
++#ifdef BOOST_SPIRIT_FILEITERATOR_WINDOWS
++    file_iterator(std::wstring const& fileName)
++    :   base_t(adapted_t(fileName))
++    {}
++#endif
++
+     file_iterator(const base_t& iter)
+     :   base_t(iter)
+     {}
diff --git a/filter/CppunitTest_filter_textfilterdetect.mk 
b/filter/CppunitTest_filter_textfilterdetect.mk
index be2697a17406..e931a5741a1d 100644
--- a/filter/CppunitTest_filter_textfilterdetect.mk
+++ b/filter/CppunitTest_filter_textfilterdetect.mk
@@ -42,4 +42,8 @@ $(eval $(call 
gb_CppunitTest_use_rdb,filter_textfilterdetect,services))
 
 $(eval $(call gb_CppunitTest_use_configuration,filter_textfilterdetect))
 
+$(eval $(call gb_CppunitTest_use_custom_headers,filter_textfilterdetect, \
+    officecfg/registry \
+))
+
 # vim: set noet sw=4 ts=4:
diff --git "a/filter/qa/unit/data/hybrid_calc_абв_αβγ.pdf" 
"b/filter/qa/unit/data/hybrid_calc_абв_αβγ.pdf"
new file mode 100644
index 000000000000..b104113a5238
Binary files /dev/null and "b/filter/qa/unit/data/hybrid_calc_абв_αβγ.pdf" 
differ
diff --git "a/filter/qa/unit/data/hybrid_impress_абв_αβγ.pdf" 
"b/filter/qa/unit/data/hybrid_impress_абв_αβγ.pdf"
new file mode 100644
index 000000000000..78e7136211f4
Binary files /dev/null and "b/filter/qa/unit/data/hybrid_impress_абв_αβγ.pdf" 
differ
diff --git "a/filter/qa/unit/data/hybrid_writer_абв_αβγ.pdf" 
"b/filter/qa/unit/data/hybrid_writer_абв_αβγ.pdf"
new file mode 100644
index 000000000000..00cf3de44c5c
Binary files /dev/null and "b/filter/qa/unit/data/hybrid_writer_абв_αβγ.pdf" 
differ
diff --git a/filter/qa/unit/textfilterdetect.cxx 
b/filter/qa/unit/textfilterdetect.cxx
index 293f211a46e3..47bc0b8707c6 100644
--- a/filter/qa/unit/textfilterdetect.cxx
+++ b/filter/qa/unit/textfilterdetect.cxx
@@ -16,7 +16,9 @@
 #include <com/sun/star/sheet/XSpreadsheetDocument.hpp>
 #include <com/sun/star/text/XTextDocument.hpp>
 
+#include <comphelper/configuration.hxx>
 #include <comphelper/propertyvalue.hxx>
+#include <officecfg/Office/Common.hxx>
 #include <sfx2/docfac.hxx>
 #include <unotools/mediadescriptor.hxx>
 #include <unotools/streamwrap.hxx>
@@ -31,6 +33,11 @@ using namespace com::sun::star;
 
 namespace
 {
+bool supportsService(const uno::Reference<lang::XComponent>& x, const 
OUString& s)
+{
+    return uno::Reference<lang::XServiceInfo>(x, 
uno::UNO_QUERY_THROW)->supportsService(s);
+}
+
 /// Test class for PlainTextFilterDetect.
 class TextFilterDetectTest : public UnoApiTest
 {
@@ -63,10 +70,6 @@ CPPUNIT_TEST_FIXTURE(TextFilterDetectTest, testTdf114428)
 
 CPPUNIT_TEST_FIXTURE(TextFilterDetectTest, testEmptyFile)
 {
-    auto supportsService = [](const uno::Reference<lang::XComponent>& x, const 
OUString& s) {
-        return uno::Reference<lang::XServiceInfo>(x, 
uno::UNO_QUERY_THROW)->supportsService(s);
-    };
-
     // Given an empty file, with a pptx extension
     // When loading the file
     loadFromURL(u"empty.pptx");
@@ -172,6 +175,36 @@ CPPUNIT_TEST_FIXTURE(TextFilterDetectTest, testEmptyFile)
         CPPUNIT_ASSERT_EQUAL(OUString(u"Writer template’s first line"), 
xParagraph->getString());
     }
 }
+
+CPPUNIT_TEST_FIXTURE(TextFilterDetectTest, testHybridPDFFile)
+{
+    // Make sure that file locking is ON
+    {
+        std::shared_ptr<comphelper::ConfigurationChanges> xChanges(
+            comphelper::ConfigurationChanges::create());
+        
officecfg::Office::Common::Misc::UseDocumentSystemFileLocking::set(true, 
xChanges);
+        xChanges->commit();
+    }
+
+    // Given a hybrid PDF file
+
+    // Created in Writer
+    loadFromURL(u"hybrid_writer_абв_αβγ.pdf");
+    // Make sure it opens in Writer.
+    // Without the accompanying fix in place, this test would have failed on 
Windows, as it was
+    // opened in Draw instead.
+    CPPUNIT_ASSERT(supportsService(mxComponent, 
"com.sun.star.text.TextDocument"));
+
+    // Created in Calc
+    loadFromURL(u"hybrid_calc_абв_αβγ.pdf");
+    // Make sure it opens in Calc.
+    CPPUNIT_ASSERT(supportsService(mxComponent, 
"com.sun.star.sheet.SpreadsheetDocument"));
+
+    // Created in Impress
+    loadFromURL(u"hybrid_impress_абв_αβγ.pdf");
+    // Make sure it opens in Impress.
+    CPPUNIT_ASSERT(supportsService(mxComponent, 
"com.sun.star.presentation.PresentationDocument"));
+}
 }
 
 CPPUNIT_PLUGIN_IMPLEMENT();
diff --git a/include/o3tl/char16_t2wchar_t.hxx 
b/include/o3tl/char16_t2wchar_t.hxx
index fcbbaf079ce3..c2640f627109 100644
--- a/include/o3tl/char16_t2wchar_t.hxx
+++ b/include/o3tl/char16_t2wchar_t.hxx
@@ -41,6 +41,7 @@ inline char16_t* toU(wchar_t* p) { return 
reinterpret_cast<char16_t*>(p); }
 inline char16_t const* toU(wchar_t const* p) { return 
reinterpret_cast<char16_t const*>(p); }
 
 inline std::u16string_view toU(std::wstring_view v) { return { toU(v.data()), 
v.size() }; }
+inline std::wstring_view toW(std::u16string_view v) { return { toW(v.data()), 
v.size() }; }
 #endif
 }
 
diff --git a/sdext/source/pdfimport/filterdet.cxx 
b/sdext/source/pdfimport/filterdet.cxx
index ef29e8a2c022..32248993ff6c 100644
--- a/sdext/source/pdfimport/filterdet.cxx
+++ b/sdext/source/pdfimport/filterdet.cxx
@@ -520,13 +520,11 @@ uno::Reference< io::XStream > getAdditionalStream( const 
OUString&
                                                    bool                        
                  bMayUseUI )
 {
     uno::Reference< io::XStream > xEmbed;
-    OString aPDFFile;
     OUString aSysUPath;
     if( osl_getSystemPathFromFileURL( rInPDFFileURL.pData, &aSysUPath.pData ) 
!= osl_File_E_None )
         return xEmbed;
-    aPDFFile = OUStringToOString( aSysUPath, osl_getThreadTextEncoding() );
 
-    std::unique_ptr<pdfparse::PDFEntry> pEntry( pdfparse::PDFReader::read( 
aPDFFile.getStr() ));
+    std::unique_ptr<pdfparse::PDFEntry> 
pEntry(pdfparse::PDFReader::read(aSysUPath));
     if( pEntry )
     {
         pdfparse::PDFFile* pPDFFile = 
dynamic_cast<pdfparse::PDFFile*>(pEntry.get());
diff --git a/sdext/source/pdfimport/inc/pdfparse.hxx 
b/sdext/source/pdfimport/inc/pdfparse.hxx
index 7891419471d3..542a9ed4b1a5 100644
--- a/sdext/source/pdfimport/inc/pdfparse.hxx
+++ b/sdext/source/pdfimport/inc/pdfparse.hxx
@@ -292,10 +292,7 @@ struct PDFReader
 {
     PDFReader() = delete;
 
-    static std::unique_ptr<PDFEntry> read( const char* pFileName );
-#ifdef _WIN32
-    static std::unique_ptr<PDFEntry> read( const char* pBuffer, unsigned int 
nLen );
-#endif
+    static std::unique_ptr<PDFEntry> read(std::u16string_view aFileName);
 };
 
 } // namespace
diff --git a/sdext/source/pdfimport/pdfparse/pdfparse.cxx 
b/sdext/source/pdfimport/pdfparse/pdfparse.cxx
index cdd3ac13ff35..8b3da7eb39d7 100644
--- a/sdext/source/pdfimport/pdfparse/pdfparse.cxx
+++ b/sdext/source/pdfimport/pdfparse/pdfparse.cxx
@@ -36,7 +36,9 @@
 
 #include <string.h>
 
+#include <o3tl/char16_t2wchar_t.hxx>
 #include <o3tl/safeint.hxx>
+#include <osl/thread.h>
 #include <rtl/strbuf.hxx>
 #include <rtl/ustrbuf.hxx>
 #include <sal/log.hxx>
@@ -558,9 +560,14 @@ public:
 
 }
 
-std::unique_ptr<PDFEntry> PDFReader::read( const char* pFileName )
+std::unique_ptr<PDFEntry> PDFReader::read(std::u16string_view aFileName)
 {
-    file_iterator<> file_start( pFileName );
+#ifdef _WIN32
+    file_iterator<> file_start(std::wstring(o3tl::toW(aFileName)));
+#else
+    file_iterator<> file_start(
+        std::string(OUStringToOString(aFileName, 
osl_getThreadTextEncoding())));
+#endif
     if( ! file_start )
         return nullptr;
     file_iterator<> file_end = file_start.make_end();
diff --git a/sdext/source/pdfimport/test/pdfunzip.cxx 
b/sdext/source/pdfimport/test/pdfunzip.cxx
index c4d5b6bc0b71..14e534c0439c 100644
--- a/sdext/source/pdfimport/test/pdfunzip.cxx
+++ b/sdext/source/pdfimport/test/pdfunzip.cxx
@@ -224,7 +224,8 @@ typedef int(*PDFFileHdl)(const char*, const char*, 
PDFFile*);
 static int handleFile( const char* pInFile, const char* pOutFile, const char* 
pPassword, PDFFileHdl pHdl )
 {
     int nRet = 0;
-    std::unique_ptr<PDFEntry> pEntry = pdfparse::PDFReader::read( pInFile );
+    std::unique_ptr<PDFEntry> pEntry
+        = pdfparse::PDFReader::read(OStringToOUString(pInFile, 
osl_getThreadTextEncoding()));
     if( pEntry )
     {
         PDFFile* pPDFFile = dynamic_cast<PDFFile*>(pEntry.get());
diff --git a/sdext/source/pdfimport/wrapper/wrapper.cxx 
b/sdext/source/pdfimport/wrapper/wrapper.cxx
index f75821788d0f..8dfc060580c1 100644
--- a/sdext/source/pdfimport/wrapper/wrapper.cxx
+++ b/sdext/source/pdfimport/wrapper/wrapper.cxx
@@ -911,9 +911,8 @@ static bool checkEncryption( std::u16string_view            
               i_rPa
                              )
 {
     bool bSuccess = false;
-    OString aPDFFile = OUStringToOString( i_rPath, osl_getThreadTextEncoding() 
);
 
-    std::unique_ptr<pdfparse::PDFEntry> pEntry( pdfparse::PDFReader::read( 
aPDFFile.getStr() ));
+    std::unique_ptr<pdfparse::PDFEntry> 
pEntry(pdfparse::PDFReader::read(i_rPath));
     if( pEntry )
     {
         pdfparse::PDFFile* pPDFFile = 
dynamic_cast<pdfparse::PDFFile*>(pEntry.get());

Reply via email to