.git-hooks/pre-commit                                                          
           |   31 -
 external/boost/UnpackedTarball_boost.mk                                        
           |    2 
 external/boost/boost.file_iterator.sharing_win.patch                           
           |  158 ++++++++++
 filter/CppunitTest_filter_textfilterdetect.mk                                  
           |    4 
 
filter/qa/unit/data/hybrid_calc_\320\260\320\261\320\262_\316\261\316\262\316\263.pdf"
    |binary
 
filter/qa/unit/data/hybrid_impress_\320\260\320\261\320\262_\316\261\316\262\316\263.pdf"
 |binary
 
filter/qa/unit/data/hybrid_writer_\320\260\320\261\320\262_\316\261\316\262\316\263.pdf"
  |binary
 filter/qa/unit/textfilterdetect.cxx                                            
           |   41 ++
 include/o3tl/char16_t2wchar_t.hxx                                              
           |    1 
 sdext/source/pdfimport/filterdet.cxx                                           
           |    5 
 sdext/source/pdfimport/inc/pdfparse.hxx                                        
           |    5 
 sdext/source/pdfimport/pdfparse/pdfparse.cxx                                   
           |   11 
 sdext/source/pdfimport/test/pdfunzip.cxx                                       
           |    3 
 sdext/source/pdfimport/wrapper/wrapper.cxx                                     
           |    3 
 14 files changed, 216 insertions(+), 48 deletions(-)

New commits:
commit a5a49657dc17609a05dca59a8521fd71d14fe76e
Author:     Mike Kaganski <[email protected]>
AuthorDate: Fri Dec 1 16:48:44 2023 +0300
Commit:     Mike Kaganski <[email protected]>
CommitDate: Fri Dec 1 16:13:38 2023 +0100

    tdf#158442: fix opening hybrid PDFs on Windows
    
    Commit 046e9545956d8ad1d69345d6b4a4c0a33714d179 (Try to revert to use
    of file_iterator from boost on Windows, 2023-10-31) had introduced a
    problem that pdfparse::PDFReader::read couldn't create file_iterator
    for files already opened with write access: mmap_file_iterator ctor
    on Windows used single FILE_SHARE_READ as dwSharedMode parameter for
    CreateFileA WinAPI; and that failed, when the file was already opened
    using GENERIC_WRITE in dwDesiredAccess - which happens when opening
    stream in TypeDetection::impl_detectTypeFlatAndDeep.
    
    Fix this by patching boosts' mmap_file_iterator constructor to use
    FILE_SHARE_READ | FILE_SHARE_WRITE, like we do in osl_openFile.
    
    But there was a pre-existing problem of using char-based CreateFileA
    API, which disallows opening any files with names not representable
    in current Windows codepage. Such hybrid PDF files would still fail
    creation of the file_iterator, and open as PDF.
    
    Fix that by further patching boost to have wstring-based constructors
    for file_iterator and mmap_file_iterator on Windows, which would call
    CreateFileW.
    
    Change-Id: Ib190bc090636159ade390b3dd120957d06d7b89b
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/160218
    Tested-by: Jenkins
    Reviewed-by: Mike Kaganski <[email protected]>

diff --git a/external/boost/UnpackedTarball_boost.mk 
b/external/boost/UnpackedTarball_boost.mk
index acdc5d331c76..d8bd131ac8df 100644
--- a/external/boost/UnpackedTarball_boost.mk
+++ b/external/boost/UnpackedTarball_boost.mk
@@ -35,6 +35,8 @@ boost_patches += boost-ios.patch.0
 # violations":
 boost_patches += 
0001-Avoid-boost-phoenix-placeholders-uarg1.10-ODR-violat.patch.2
 
+boost_patches += boost.file_iterator.sharing_win.patch
+
 $(eval $(call gb_UnpackedTarball_UnpackedTarball,boost))
 
 $(eval $(call gb_UnpackedTarball_set_tarball,boost,$(BOOST_TARBALL)))
diff --git a/external/boost/boost.file_iterator.sharing_win.patch 
b/external/boost/boost.file_iterator.sharing_win.patch
new file mode 100644
index 000000000000..b3b8bea3f3ff
--- /dev/null
+++ b/external/boost/boost.file_iterator.sharing_win.patch
@@ -0,0 +1,158 @@
+--- 
foo/misc/boost/boost/spirit/home/classic/iterator/impl/file_iterator.ipp.orig
++++ foo/misc/boost/boost/spirit/home/classic/iterator/impl/file_iterator.ipp
+@@ -181,67 +181,28 @@ public:
+     {}
+ 
+     explicit mmap_file_iterator(std::string const& fileName)
+-      : m_filesize(0), m_curChar(0)
+-    {
+-        HANDLE hFile = ::CreateFileA(
++      : mmap_file_iterator(::CreateFileA(
+             fileName.c_str(),
+             GENERIC_READ,
+-            FILE_SHARE_READ,
++            FILE_SHARE_READ | FILE_SHARE_WRITE,
+             NULL,
+             OPEN_EXISTING,
+             FILE_FLAG_SEQUENTIAL_SCAN,
+             NULL
+-        );
+-
+-        if (hFile == INVALID_HANDLE_VALUE)
+-            return;
+-
+-        // Store the size of the file, it's used to construct
+-        //  the end iterator
+-        m_filesize = ::GetFileSize(hFile, NULL);
++        ))
++    {}
+ 
+-        HANDLE hMap = ::CreateFileMapping(
+-            hFile,
++    explicit mmap_file_iterator(std::wstring const& fileName)
++      : mmap_file_iterator(::CreateFileW(
++            fileName.c_str(),
++            GENERIC_READ,
++            FILE_SHARE_READ | FILE_SHARE_WRITE,
+             NULL,
+-            PAGE_READONLY,
+-            0, 0,
++            OPEN_EXISTING,
++            FILE_FLAG_SEQUENTIAL_SCAN,
+             NULL
+-        );
+-
+-        if (hMap == NULL)
+-        {
+-            ::CloseHandle(hFile);
+-            return;
+-        }
+-
+-        LPVOID pMem = ::MapViewOfFile(
+-            hMap,
+-            FILE_MAP_READ,
+-            0, 0, 0
+-        );
+-
+-        if (pMem == NULL)
+-        {
+-            ::CloseHandle(hMap);
+-            ::CloseHandle(hFile);
+-            return;
+-        }
+-
+-        // We hold both the file handle and the memory pointer.
+-        // We can close the hMap handle now because Windows holds internally
+-        //  a reference to it since there is a view mapped.
+-        ::CloseHandle(hMap);
+-
+-        // It seems like we can close the file handle as well (because
+-        //  a reference is hold by the filemap object).
+-        ::CloseHandle(hFile);
+-
+-        // Store the handles inside the shared_ptr (with the custom 
destructors)
+-        m_mem.reset(static_cast<CharT*>(pMem), ::UnmapViewOfFile);
+-
+-        // Start of the file
+-        m_curChar = m_mem.get();
+-    }
++        ))
++    {}
+ 
+     mmap_file_iterator(const mmap_file_iterator& iter)
+     { *this = iter; }
+@@ -290,6 +251,59 @@ private:
+     boost::shared_ptr<CharT> m_mem;
+     std::size_t m_filesize;
+     CharT* m_curChar;
++    
++    explicit mmap_file_iterator(HANDLE hFile)
++      : m_filesize(0), m_curChar(0)
++    {
++        if (hFile == INVALID_HANDLE_VALUE)
++            return;
++
++        // Store the size of the file, it's used to construct
++        //  the end iterator
++        m_filesize = ::GetFileSize(hFile, NULL);
++
++        HANDLE hMap = ::CreateFileMapping(
++            hFile,
++            NULL,
++            PAGE_READONLY,
++            0, 0,
++            NULL
++        );
++
++        if (hMap == NULL)
++        {
++            ::CloseHandle(hFile);
++            return;
++        }
++
++        LPVOID pMem = ::MapViewOfFile(
++            hMap,
++            FILE_MAP_READ,
++            0, 0, 0
++        );
++
++        if (pMem == NULL)
++        {
++            ::CloseHandle(hMap);
++            ::CloseHandle(hFile);
++            return;
++        }
++
++        // We hold both the file handle and the memory pointer.
++        // We can close the hMap handle now because Windows holds internally
++        //  a reference to it since there is a view mapped.
++        ::CloseHandle(hMap);
++
++        // It seems like we can close the file handle as well (because
++        //  a reference is hold by the filemap object).
++        ::CloseHandle(hFile);
++
++        // Store the handles inside the shared_ptr (with the custom 
destructors)
++        m_mem.reset(static_cast<CharT*>(pMem), ::UnmapViewOfFile);
++
++        // Start of the file
++        m_curChar = m_mem.get();
++    }
+ };
+ 
+ #endif // BOOST_SPIRIT_FILEITERATOR_WINDOWS
+--- foo/misc/boost/boost/spirit/home/classic/iterator/file_iterator.hpp.orig
++++ foo/misc/boost/boost/spirit/home/classic/iterator/file_iterator.hpp
+@@ -170,6 +170,12 @@ public:
+     :   base_t(adapted_t(fileName))
+     {}
+ 
++#ifdef BOOST_SPIRIT_FILEITERATOR_WINDOWS
++    file_iterator(std::wstring const& fileName)
++    :   base_t(adapted_t(fileName))
++    {}
++#endif
++
+     file_iterator(const base_t& iter)
+     :   base_t(iter)
+     {}
diff --git a/filter/CppunitTest_filter_textfilterdetect.mk 
b/filter/CppunitTest_filter_textfilterdetect.mk
index be2697a17406..e931a5741a1d 100644
--- a/filter/CppunitTest_filter_textfilterdetect.mk
+++ b/filter/CppunitTest_filter_textfilterdetect.mk
@@ -42,4 +42,8 @@ $(eval $(call 
gb_CppunitTest_use_rdb,filter_textfilterdetect,services))
 
 $(eval $(call gb_CppunitTest_use_configuration,filter_textfilterdetect))
 
+$(eval $(call gb_CppunitTest_use_custom_headers,filter_textfilterdetect, \
+    officecfg/registry \
+))
+
 # vim: set noet sw=4 ts=4:
diff --git 
"a/filter/qa/unit/data/hybrid_calc_\320\260\320\261\320\262_\316\261\316\262\316\263.pdf"
 
"b/filter/qa/unit/data/hybrid_calc_\320\260\320\261\320\262_\316\261\316\262\316\263.pdf"
new file mode 100644
index 000000000000..b104113a5238
Binary files /dev/null and 
"b/filter/qa/unit/data/hybrid_calc_\320\260\320\261\320\262_\316\261\316\262\316\263.pdf"
 differ
diff --git 
"a/filter/qa/unit/data/hybrid_impress_\320\260\320\261\320\262_\316\261\316\262\316\263.pdf"
 
"b/filter/qa/unit/data/hybrid_impress_\320\260\320\261\320\262_\316\261\316\262\316\263.pdf"
new file mode 100644
index 000000000000..78e7136211f4
Binary files /dev/null and 
"b/filter/qa/unit/data/hybrid_impress_\320\260\320\261\320\262_\316\261\316\262\316\263.pdf"
 differ
diff --git 
"a/filter/qa/unit/data/hybrid_writer_\320\260\320\261\320\262_\316\261\316\262\316\263.pdf"
 
"b/filter/qa/unit/data/hybrid_writer_\320\260\320\261\320\262_\316\261\316\262\316\263.pdf"
new file mode 100644
index 000000000000..00cf3de44c5c
Binary files /dev/null and 
"b/filter/qa/unit/data/hybrid_writer_\320\260\320\261\320\262_\316\261\316\262\316\263.pdf"
 differ
diff --git a/filter/qa/unit/textfilterdetect.cxx 
b/filter/qa/unit/textfilterdetect.cxx
index e8de97a8df61..c88de0e8790d 100644
--- a/filter/qa/unit/textfilterdetect.cxx
+++ b/filter/qa/unit/textfilterdetect.cxx
@@ -16,7 +16,9 @@
 #include <com/sun/star/sheet/XSpreadsheetDocument.hpp>
 #include <com/sun/star/text/XTextDocument.hpp>
 
+#include <comphelper/configuration.hxx>
 #include <comphelper/propertyvalue.hxx>
+#include <officecfg/Office/Common.hxx>
 #include <sfx2/docfac.hxx>
 #include <unotools/mediadescriptor.hxx>
 #include <unotools/streamwrap.hxx>
@@ -31,6 +33,11 @@ using namespace com::sun::star;
 
 namespace
 {
+bool supportsService(const uno::Reference<lang::XComponent>& x, const 
OUString& s)
+{
+    return uno::Reference<lang::XServiceInfo>(x, 
uno::UNO_QUERY_THROW)->supportsService(s);
+}
+
 /// Test class for PlainTextFilterDetect.
 class TextFilterDetectTest : public UnoApiTest
 {
@@ -63,10 +70,6 @@ CPPUNIT_TEST_FIXTURE(TextFilterDetectTest, testTdf114428)
 
 CPPUNIT_TEST_FIXTURE(TextFilterDetectTest, testEmptyFile)
 {
-    auto supportsService = [](const uno::Reference<lang::XComponent>& x, const 
OUString& s) {
-        return uno::Reference<lang::XServiceInfo>(x, 
uno::UNO_QUERY_THROW)->supportsService(s);
-    };
-
     // Given an empty file, with a pptx extension
     // When loading the file
     loadFromURL(u"empty.pptx");
@@ -172,6 +175,36 @@ CPPUNIT_TEST_FIXTURE(TextFilterDetectTest, testEmptyFile)
         CPPUNIT_ASSERT_EQUAL(u"Writer template’s first line"_ustr, 
xParagraph->getString());
     }
 }
+
+CPPUNIT_TEST_FIXTURE(TextFilterDetectTest, testHybridPDFFile)
+{
+    // Make sure that file locking is ON
+    {
+        std::shared_ptr<comphelper::ConfigurationChanges> xChanges(
+            comphelper::ConfigurationChanges::create());
+        
officecfg::Office::Common::Misc::UseDocumentSystemFileLocking::set(true, 
xChanges);
+        xChanges->commit();
+    }
+
+    // Given a hybrid PDF file
+
+    // Created in Writer
+    loadFromURL(u"hybrid_writer_абв_αβγ.pdf");
+    // Make sure it opens in Writer.
+    // Without the accompanying fix in place, this test would have failed on 
Windows, as it was
+    // opened in Draw instead.
+    CPPUNIT_ASSERT(supportsService(mxComponent, 
"com.sun.star.text.TextDocument"));
+
+    // Created in Calc
+    loadFromURL(u"hybrid_calc_абв_αβγ.pdf");
+    // Make sure it opens in Calc.
+    CPPUNIT_ASSERT(supportsService(mxComponent, 
"com.sun.star.sheet.SpreadsheetDocument"));
+
+    // Created in Impress
+    loadFromURL(u"hybrid_impress_абв_αβγ.pdf");
+    // Make sure it opens in Impress.
+    CPPUNIT_ASSERT(supportsService(mxComponent, 
"com.sun.star.presentation.PresentationDocument"));
+}
 }
 
 CPPUNIT_PLUGIN_IMPLEMENT();
diff --git a/include/o3tl/char16_t2wchar_t.hxx 
b/include/o3tl/char16_t2wchar_t.hxx
index fcbbaf079ce3..c2640f627109 100644
--- a/include/o3tl/char16_t2wchar_t.hxx
+++ b/include/o3tl/char16_t2wchar_t.hxx
@@ -41,6 +41,7 @@ inline char16_t* toU(wchar_t* p) { return 
reinterpret_cast<char16_t*>(p); }
 inline char16_t const* toU(wchar_t const* p) { return 
reinterpret_cast<char16_t const*>(p); }
 
 inline std::u16string_view toU(std::wstring_view v) { return { toU(v.data()), 
v.size() }; }
+inline std::wstring_view toW(std::u16string_view v) { return { toW(v.data()), 
v.size() }; }
 #endif
 }
 
diff --git a/sdext/source/pdfimport/filterdet.cxx 
b/sdext/source/pdfimport/filterdet.cxx
index e8d2e11d30ba..d9fdcc03c6ab 100644
--- a/sdext/source/pdfimport/filterdet.cxx
+++ b/sdext/source/pdfimport/filterdet.cxx
@@ -555,7 +555,6 @@ uno::Reference< io::XStream > getAdditionalStream( const 
OUString&
                                                    bool                        
                  bMayUseUI )
 {
     uno::Reference< io::XStream > xEmbed;
-    OString aPDFFile;
     OUString aSysUPath;
     if( osl_getSystemPathFromFileURL( rInPDFFileURL.pData, &aSysUPath.pData ) 
!= osl_File_E_None )
         return xEmbed;
@@ -563,9 +562,7 @@ uno::Reference< io::XStream > getAdditionalStream( const 
OUString&
     if (!detectHasAdditionalStreams(aSysUPath))
         return xEmbed;
 
-    aPDFFile = OUStringToOString( aSysUPath, osl_getThreadTextEncoding() );
-
-    std::unique_ptr<pdfparse::PDFEntry> pEntry( pdfparse::PDFReader::read( 
aPDFFile.getStr() ));
+    std::unique_ptr<pdfparse::PDFEntry> 
pEntry(pdfparse::PDFReader::read(aSysUPath));
     if( pEntry )
     {
         pdfparse::PDFFile* pPDFFile = 
dynamic_cast<pdfparse::PDFFile*>(pEntry.get());
diff --git a/sdext/source/pdfimport/inc/pdfparse.hxx 
b/sdext/source/pdfimport/inc/pdfparse.hxx
index 7891419471d3..542a9ed4b1a5 100644
--- a/sdext/source/pdfimport/inc/pdfparse.hxx
+++ b/sdext/source/pdfimport/inc/pdfparse.hxx
@@ -292,10 +292,7 @@ struct PDFReader
 {
     PDFReader() = delete;
 
-    static std::unique_ptr<PDFEntry> read( const char* pFileName );
-#ifdef _WIN32
-    static std::unique_ptr<PDFEntry> read( const char* pBuffer, unsigned int 
nLen );
-#endif
+    static std::unique_ptr<PDFEntry> read(std::u16string_view aFileName);
 };
 
 } // namespace
diff --git a/sdext/source/pdfimport/pdfparse/pdfparse.cxx 
b/sdext/source/pdfimport/pdfparse/pdfparse.cxx
index cdd3ac13ff35..8b3da7eb39d7 100644
--- a/sdext/source/pdfimport/pdfparse/pdfparse.cxx
+++ b/sdext/source/pdfimport/pdfparse/pdfparse.cxx
@@ -36,7 +36,9 @@
 
 #include <string.h>
 
+#include <o3tl/char16_t2wchar_t.hxx>
 #include <o3tl/safeint.hxx>
+#include <osl/thread.h>
 #include <rtl/strbuf.hxx>
 #include <rtl/ustrbuf.hxx>
 #include <sal/log.hxx>
@@ -558,9 +560,14 @@ public:
 
 }
 
-std::unique_ptr<PDFEntry> PDFReader::read( const char* pFileName )
+std::unique_ptr<PDFEntry> PDFReader::read(std::u16string_view aFileName)
 {
-    file_iterator<> file_start( pFileName );
+#ifdef _WIN32
+    file_iterator<> file_start(std::wstring(o3tl::toW(aFileName)));
+#else
+    file_iterator<> file_start(
+        std::string(OUStringToOString(aFileName, 
osl_getThreadTextEncoding())));
+#endif
     if( ! file_start )
         return nullptr;
     file_iterator<> file_end = file_start.make_end();
diff --git a/sdext/source/pdfimport/test/pdfunzip.cxx 
b/sdext/source/pdfimport/test/pdfunzip.cxx
index b9bf3a62f14f..6db3b740d668 100644
--- a/sdext/source/pdfimport/test/pdfunzip.cxx
+++ b/sdext/source/pdfimport/test/pdfunzip.cxx
@@ -224,7 +224,8 @@ typedef int(*PDFFileHdl)(const char*, const char*, 
PDFFile*);
 static int handleFile( const char* pInFile, const char* pOutFile, const char* 
pPassword, PDFFileHdl pHdl )
 {
     int nRet = 0;
-    std::unique_ptr<PDFEntry> pEntry = pdfparse::PDFReader::read( pInFile );
+    std::unique_ptr<PDFEntry> pEntry
+        = pdfparse::PDFReader::read(OStringToOUString(pInFile, 
osl_getThreadTextEncoding()));
     if( pEntry )
     {
         PDFFile* pPDFFile = dynamic_cast<PDFFile*>(pEntry.get());
diff --git a/sdext/source/pdfimport/wrapper/wrapper.cxx 
b/sdext/source/pdfimport/wrapper/wrapper.cxx
index ad25f85ebc09..ade4dc5edb6b 100644
--- a/sdext/source/pdfimport/wrapper/wrapper.cxx
+++ b/sdext/source/pdfimport/wrapper/wrapper.cxx
@@ -911,9 +911,8 @@ static bool checkEncryption( std::u16string_view            
               i_rPa
                              )
 {
     bool bSuccess = false;
-    OString aPDFFile = OUStringToOString( i_rPath, osl_getThreadTextEncoding() 
);
 
-    std::unique_ptr<pdfparse::PDFEntry> pEntry( pdfparse::PDFReader::read( 
aPDFFile.getStr() ));
+    std::unique_ptr<pdfparse::PDFEntry> 
pEntry(pdfparse::PDFReader::read(i_rPath));
     if( pEntry )
     {
         pdfparse::PDFFile* pPDFFile = 
dynamic_cast<pdfparse::PDFFile*>(pEntry.get());
commit 8a0015c35f3f137e4f3a80e40616bc078e265a1c
Author:     Mike Kaganski <[email protected]>
AuthorDate: Fri Dec 1 16:43:49 2023 +0300
Commit:     Mike Kaganski <[email protected]>
CommitDate: Fri Dec 1 16:13:32 2023 +0100

    Drop allownonascii check from pre-commit checks
    
    Supposedly, at this day and age, it is OK to use non-ascii file names.
    Specifically, this is intended to allow such names for bugdocs, which
    allows simpler testing of problems with handling those.
    
    An alternative would be to rename bugdocs at runtime; but that still
    requires that the target filesystem supports such names, so...
    
    Change-Id: I25da2402f311d59c5777c4cd302147d6965dea5f
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/160217
    Tested-by: Jenkins
    Reviewed-by: Mike Kaganski <[email protected]>

diff --git a/.git-hooks/pre-commit b/.git-hooks/pre-commit
index 521ed202b9ac..48f131920fd3 100755
--- a/.git-hooks/pre-commit
+++ b/.git-hooks/pre-commit
@@ -381,37 +381,6 @@ if ( system( "git rev-parse --verify HEAD >/dev/null 2>&1" 
) == 0 )
     $against="HEAD"
 }
 
-# If you want to allow non-ascii filenames set this variable to true.
-my $allownonascii=`git config hooks.allownonascii`;
-chomp $allownonascii;
-
-# Cross platform projects tend to avoid non-ascii filenames; prevent
-# them from being added to the repository. We exploit the fact that the
-# printable range starts at the space character and ends with tilde.
-if ( $allownonascii ne "true" &&
-    # Note that the use of brackets around a tr range is ok here, (it's
-    # even required, for portability to Solaris 10's /usr/bin/tr), since
-    # the square bracket bytes happen to fall in the designated range.
-    `git diff --cached --name-only --diff-filter=A -z $against | \
-     LC_ALL=C tr -d '[ -~]\\0'` ne "" )
-{
-    print <<EOM;
-Error: Attempt to add a non-ascii file name.
-
-This can cause problems if you want to work
-with people on other platforms.
-
-To be portable it is advisable to rename the file ...
-
-If you know what you are doing you can disable this
-check using:
-
-  git config hooks.allownonascii true
-
-EOM
-    exit( 1 );
-}
-
 # Block large files.
 open( FILES, "git diff --cached --name-only --diff-filter=A -z $against |" ) 
||  die "Cannot run git diff-index.";
 while (<FILES>)

Reply via email to