Rebased ref, commits from common ancestor: commit 12f2ddf58c4b4b7ce35814be09a8f0b99bde18fb Author: Andras Timar <andras.ti...@collabora.com> AuthorDate: Mon May 10 22:48:59 2021 +0200 Commit: Andras Timar <andras.ti...@collabora.com> CommitDate: Mon May 10 23:31:56 2021 +0200
Bump version to 5.4.7.2.M7 Change-Id: I0e5314312bbefd3bd59cd39f0f8b876de8463e25 diff --git a/configure.ac b/configure.ac index 9c25ac076c8c..bab32d23e6fa 100644 --- a/configure.ac +++ b/configure.ac @@ -9,7 +9,7 @@ dnl in order to create a configure script. # several non-alphanumeric characters, those are split off and used only for the # ABOUTBOXPRODUCTVERSIONSUFFIX in openoffice.lst. Why that is necessary, no idea. -AC_INIT([LibreOffice],[5.4.7.2.M6],[],[],[http://documentfoundation.org/]) +AC_INIT([LibreOffice],[5.4.7.2.M7],[],[],[http://documentfoundation.org/]) AC_PREREQ([2.59]) commit 036e62326d08cfdcda8b390720ab9c0cf9f8c3f7 Author: Luboš Luňák <l.lu...@collabora.com> AuthorDate: Thu Apr 29 20:10:34 2021 +0200 Commit: Andras Timar <andras.ti...@collabora.com> CommitDate: Mon May 10 23:31:56 2021 +0200 allow utf-8 in xml names (liborcus) (tdf#141672) Change-Id: Ib150d55b588a572e4352396f18de2331983b2aae Reviewed-on: https://gerrit.libreoffice.org/c/core/+/114892 Tested-by: Jenkins Reviewed-by: Luboš Luňák <l.lu...@collabora.com> diff --git a/external/liborcus/UnpackedTarball_liborcus.mk b/external/liborcus/UnpackedTarball_liborcus.mk index 791436e66016..6df45cb2fb39 100644 --- a/external/liborcus/UnpackedTarball_liborcus.mk +++ b/external/liborcus/UnpackedTarball_liborcus.mk @@ -21,6 +21,10 @@ $(eval $(call gb_UnpackedTarball_add_patches,liborcus,\ external/liborcus/0001-protect-the-self-closing-xml-element-code-against-se.patch \ )) +$(eval $(call gb_UnpackedTarball_add_patches,liborcus,\ + external/liborcus/allow-utf-8-in-xml-names.patch \ +)) + ifeq ($(OS),WNT) $(eval $(call gb_UnpackedTarball_add_patches,liborcus,\ external/liborcus/windows-constants-hack.patch \ diff --git a/external/liborcus/allow-utf-8-in-xml-names.patch b/external/liborcus/allow-utf-8-in-xml-names.patch new file mode 100644 index 000000000000..efef24b84053 --- /dev/null +++ b/external/liborcus/allow-utf-8-in-xml-names.patch @@ -0,0 +1,289 @@ +From 9889cb660372bc6c3da22fc274c73ea11040415f Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Lubo=C5=A1=20Lu=C5=88=C3=A1k?= <l.lu...@centrum.cz> +Date: Thu, 29 Apr 2021 19:12:20 +0200 +Subject: [PATCH] allow utf-8 in xml names (#137) + +https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-NameStartChar +has a list of all allowed characters. +--- + include/orcus/sax_parser_base.hpp | 3 + + src/orcus_test_xml.cpp | 1 + + src/parser/sax_parser_base.cpp | 186 ++++++++++++++++++++++++++++-- + test/xml/non-ascii/check.txt | 4 + + test/xml/non-ascii/input.xml | 4 + + 5 files changed, 190 insertions(+), 8 deletions(-) + create mode 100644 test/xml/non-ascii/check.txt + create mode 100644 test/xml/non-ascii/input.xml + +diff --git a/include/orcus/sax_parser_base.hpp b/include/orcus/sax_parser_base.hpp +index a117b3a2..1aeb8b38 100644 +--- a/include/orcus/sax_parser_base.hpp ++++ b/include/orcus/sax_parser_base.hpp +@@ -217,6 +217,9 @@ protected: + void element_name(parser_element& elem, const char* begin_pos); + void attribute_name(pstring& attr_ns, pstring& attr_name); + void characters_with_encoded_char(cell_buffer& buf); ++ ++ int is_name_char(); ++ int is_name_start_char(); + }; + + }} +diff --git a/src/orcus_test_xml.cpp b/src/orcus_test_xml.cpp +index 98e83297..89c8af72 100644 +--- a/src/orcus_test_xml.cpp ++++ b/src/orcus_test_xml.cpp +@@ -73,6 +73,7 @@ const char* sax_parser_test_dirs[] = { + SRCDIR"/test/xml/bom/", + SRCDIR"/test/xml/custom-decl-1/", + SRCDIR"/test/xml/cdata-1/" ++ SRCDIR"/test/xml/non-ascii/", + }; + + const char* sax_parser_parse_only_test_dirs[] = { +diff --git a/src/parser/sax_parser_base.cpp b/src/parser/sax_parser_base.cpp +index 743130da..ecbd7f99 100644 +--- a/src/parser/sax_parser_base.cpp ++++ b/src/parser/sax_parser_base.cpp +@@ -296,20 +296,22 @@ void parser_base::value_with_encoded_char(cell_buffer& buf, pstring& str) + str = pstring(buf.get(), buf.size()); + + // Skip the closing quote. +- assert(cur_char() == '"'); ++ assert(!has_char() || cur_char() == '"'); + next(); + } + + bool parser_base::value(pstring& str, bool decode) + { + char c = cur_char(); +- if (c != '"') ++ if (c != '"' && c != '\'') + throw malformed_xml_error("value must be quoted", offset()); + ++ char quote_char = c; ++ + c = next_char_checked(); + + const char* p0 = mp_char; +- for (; c != '"'; c = next_char_checked()) ++ for (; c != quote_char; c = next_char_checked()) + { + if (decode && c == '&') + { +@@ -330,19 +332,187 @@ bool parser_base::value(pstring& str, bool decode) + return false; + } + ++// https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-NameStartChar ++// Return length of the character in bytes, otherwise 0. ++template< bool only_start_name > ++static ++int is_name_char_helper(const char* mp_char, const char* mp_end) ++{ ++ const unsigned char first = mp_char[0]; ++ // Note that ':' technically is an allowed name character, but it is handled separately ++ // e.g. in element_name(), so here pretend it isn't. ++ if (/*first == ':' ||*/ first == '_' || (first >= 'A' && first <= 'Z') || (first >= 'a' && first <= 'z')) ++ return 1; ++ if (!only_start_name && (first == '-' || first == '.' || (first >= '0' && first <= '9'))) ++ return 1; ++ ++ if (first < 0x7f) // other ascii characters are not allowed ++ return 0; ++ if (mp_end < mp_char + 1) ++ return 0; ++ const unsigned char second = mp_char[1]; ++ ++ // 0xb7 = 0xc2 0xb7 utf-8 ++ if (!only_start_name && first == 0xc2 && second == 0xb7) ++ return 2; ++ ++ // [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] ++ // 0xc0 = 0xc3 0x80 utf-8 ++ if (first < 0xc3) ++ return 0; ++ // xd7 = 0xc3 0x97 utf-8, 0xf7 = 0xc3 0xb7 utf-8 ++ if (first == 0xc3) ++ return second >= 0x80 && second <= 0xff && second != 0x97 && second != 0xb7 ? 2 : 0; ++ // 0x2ff = 0xcb 0xbf utf-8, 0x300 = 0xcc 0x80 utf-8 ++ if (first >= 0xc4 && first <= 0xcb) ++ return 2; ++ ++ // [#x0300-#x036F] ++ // 0x0300 = 0xcc 0x80 utf-8, 0x36f = 0xcd 0xaf utf-8 ++ if (!only_start_name && first == 0xcc) ++ return 2; ++ if (!only_start_name && first == 0xcd && second <= 0xaf) ++ return 2; ++ ++ // [#x370-#x37D] | [#x37F-#x1FFF] ++ // 0x370 = 0xcd 0xb0 utf-8, 0x37e = 0xcd 0xbe ++ if (first < 0xcd) ++ return 0; ++ if (first == 0xcd) ++ return second >= 0xb0 && second != 0xbe ? 2 : 0; ++ // 0x07ff = 0xdf 0xbf utf-8 (the last 2-byte utf-8) ++ if (first <= 0xdf) ++ return 2; ++ ++ if (first < 0xe0) ++ return 0; ++ if (mp_end < mp_char + 2) ++ return 0; ++ const unsigned char third = mp_char[2]; ++ ++ // 0x0800 = 0xe0 0xa0 0x80 utf-8, 0x1fff = 0xe1 0xbf 0xbf utf-8, 0x2000 = 0xe2 0x80 0x80 ++ if (first == 0xe0 || first == 0xe1) ++ return 3; ++ ++ // [#x200C-#x200D] ++ // 0x200c = 0xe2 0x80 0x8c utf-8, 0x200d = 0xe2 0x80 0x8d utf-8 ++ if (first < 0xe2) ++ return 0; ++ if (first == 0xe2 && second == 0x80 && (third == 0x8c || third == 0x8d)) ++ return 3; ++ ++ // [#x203F-#x2040] ++ // 0x203f = 0xe2 0x80 0xbf utf-8, 0x2040 = 0xe2 0x81 0x80 utf-8 ++ if (!only_start_name && first == 0xe2 && second == 0x80 && third == 0xbf) ++ return 3; ++ if (!only_start_name && first == 0xe2 && second == 0x81 && third == 0x80) ++ return 3; ++ ++ // [#x2070-#x218F] ++ // 0x2070 = 0xe2 0x81 0xb0 utf-8, 0x218f = 0xe2 0x86 0x8f utf-8 ++ if (first == 0xe2) ++ { ++ if (second < 0x81) ++ return 0; ++ if (second >= 0x81 && second < 0x86) ++ return 3; ++ if (second == 0x86 && third <= 0x8f) ++ return 3; ++ } ++ ++ // [#x2C00-#x2FEF] ++ // 0x2c00 = 0xe2 0xb0 0x80 utf-8, 0x2fef = 0xe2 0xbf 0xaf utf-8 ++ if (first == 0xe2) ++ { ++ if (second < 0xb0) ++ return 0; ++ if (second < 0xbf) ++ return 3; ++ if (second == 0xbf && third <= 0xaf) ++ return 3; ++ } ++ ++ // [#x3001-#xD7FF] ++ // 0x3001 = 0xe3 0x80 0x81 utf-8, 0xd7ff = 0xed 0x9f 0xbf utf-8, 0xd800 = 0xed 0xa0 0x80 utf-8 ++ if (first < 0xe3) ++ return 0; ++ if (first < 0xed) ++ return 3; ++ if (first == 0xed && second <= 0x9f) ++ return 3; ++ ++ // [#xF900-#xFDCF] ++ // 0xf900 = 0xef 0xa4 0x80 utf-8, 0xfdcf = 0xef 0xb7 0x8f utf-8 ++ if (first == 0xef) ++ { ++ if (second < 0xa4) ++ return 0; ++ if (second < 0xb7) ++ return 3; ++ if (second == 0xb7 && third <= 0x8f) ++ return 3; ++ } ++ ++ // [#xFDF0-#xFFFD] ++ // 0xfdf0 = 0xef 0xb7 0xb0 utf-8, 0xfffd = 0xef 0xbf 0xbd utf-8 ++ if (first == 0xef) ++ { ++ assert(second >= 0xb7); ++ if (second == 0xb7 && third < 0xb0) ++ return 0; ++ if (second < 0xbe) ++ return 3; ++ if (second == 0xbf && third <= 0xbd) ++ return 3; ++ } ++ ++ if (first < 0xf0) ++ return 0; ++ if (mp_end < mp_char + 3) ++ return 0; ++ // const unsigned char fourth = mp_char[3]; ++ ++ // [#x10000-#xEFFFF] ++ // 0x10000 = 0xf0 0x90 0x80 0x80 utf-8, 0xeffff = 0xf3 0xaf 0xbf 0xbf utf-8, ++ // 0xf0000 = 0xf3 0xb0 0x80 0x80 utf-8 ++ if (first >= 0xf0 && first < 0xf2) ++ return 4; ++ if (first == 0xf3 && second < 0xb0) ++ return 4; ++ ++ return 0; ++} ++ ++int parser_base::is_name_char() ++{ ++ return is_name_char_helper<false>(mp_char, mp_end); ++} ++ ++int parser_base::is_name_start_char() ++{ ++ return is_name_char_helper<true>(mp_char, mp_end); ++} ++ + void parser_base::name(pstring& str) + { + const char* p0 = mp_char; +- char c = cur_char(); +- if (!is_alpha(c)) ++ int skip = is_name_start_char(); ++ if (skip == 0) + { + ::std::ostringstream os; +- os << "name must begin with an alphabet, but got this instead '" << c << "'"; ++ os << "name must begin with an alphabet, but got this instead '" << cur_char() << "'"; + throw malformed_xml_error(os.str(), offset()); + } ++ next(skip); + +- while (is_alpha(c) || is_numeric(c) || is_name_char(c)) +- c = next_char_checked(); ++ for(;;) ++ { ++ cur_char_checked(); // check end of xml stream ++ skip = is_name_char(); ++ if(skip == 0) ++ break; ++ next(skip); ++ } + + str = pstring(p0, mp_char-p0); + } +diff --git a/test/xml/non-ascii/check.txt b/test/xml/non-ascii/check.txt +new file mode 100644 +index 00000000..77b7c003 +--- /dev/null ++++ b/test/xml/non-ascii/check.txt +@@ -0,0 +1,4 @@ ++/Myšička ++/Myšička@jméno="Žužla" ++/Myšička/Nožičky ++/Myšička/Nožičky"4" +diff --git a/test/xml/non-ascii/input.xml b/test/xml/non-ascii/input.xml +new file mode 100644 +index 00000000..c516744b +--- /dev/null ++++ b/test/xml/non-ascii/input.xml +@@ -0,0 +1,4 @@ ++<?xml version="1.0" encoding="UTF-8"?> ++<Myšička jméno="Žužla"> ++ <Nožičky>4</Nožičky> ++</Myšička> +-- +2.26.2 + commit 70acef544db618ded97e9550a2519930d30e3fba Author: Stephan Bergmann <sberg...@redhat.com> AuthorDate: Tue Feb 16 09:30:09 2021 +0100 Commit: Andras Timar <andras.ti...@collabora.com> CommitDate: Mon May 10 23:31:54 2021 +0200 Improve checkExtension Reviewed-on: https://gerrit.libreoffice.org/c/core/+/110970 Reviewed-by: Mike Kaganski <mike.kagan...@collabora.com> Tested-by: Jenkins (cherry picked from commit f456c4dacf700e064e112ef068ff7edb04239754) Reviewed-on: https://gerrit.libreoffice.org/c/core/+/110922 Reviewed-by: Michael Stahl <michael.st...@allotropia.de> (cherry picked from commit f19d95986756412e5d72047656eec17a720c5e57) Change-Id: Iff416a9c5930ad5903f7ee51a2abbc94d5f40800 diff --git a/shell/source/win32/SysShExec.cxx b/shell/source/win32/SysShExec.cxx index 1d0932e5db8d..19e0223f37d0 100644 --- a/shell/source/win32/SysShExec.cxx +++ b/shell/source/win32/SysShExec.cxx @@ -389,21 +389,28 @@ void SAL_CALL CSysShExec::execute( const OUString& aCommand, const OUString& aPa } } pathname = OUString(SAL_U(path)); + // ShellExecuteExW appears to ignore trailing dots, so remove them: + while (pathname.endsWith(".", &pathname)) {} auto const n = pathname.lastIndexOf('.'); if (n > pathname.lastIndexOf('\\')) { auto const ext = pathname.copy(n + 1); - OUString env; - if (osl_getEnvironment(OUString("PATHEXT").pData, &env.pData) != osl_Process_E_None) - { - SAL_INFO("shell", "osl_getEnvironment(PATHEXT) failed"); - } - if (!(checkExtension(ext, env) - && checkExtension( - ext, - ".COM;.EXE;.BAT;.CMD;.VBS;.VBE;.JS;.JSE;.WSF;.WSH;.MSC;.PY;.CLASS;.JAR"))) - { - throw css::lang::IllegalArgumentException( - "XSystemShellExecute.execute, cannot process <" + aCommand + ">", {}, 0); + if (!ext.isEmpty()) { + OUString env; + if (osl_getEnvironment(OUString("PATHEXT").pData, &env.pData) + != osl_Process_E_None) + { + SAL_INFO("shell", "osl_getEnvironment(PATHEXT) failed"); + } + if (!(checkExtension(ext, env) + && checkExtension( + ext, + ".COM;.EXE;.BAT;.CMD;.VBS;.VBE;.JS;.JSE;.WSF;.WSH;.MSC;.PY;.CLASS;" + ".JAR;.APPLICATION;.LNK;.SCR"))) + { + throw css::lang::IllegalArgumentException( + "XSystemShellExecute.execute, cannot process <" + aCommand + ">", {}, + 0); + } } } } commit af34e2c563a1535e25c4ca9a933cb90299b8eef2 Author: Mike Kaganski <mike.kagan...@collabora.com> AuthorDate: Tue Jan 28 01:28:24 2020 +0300 Commit: Andras Timar <andras.ti...@collabora.com> CommitDate: Mon May 10 23:31:24 2021 +0200 tdf#130216: normalize paths with .. segments ... which obviously are rejected by SHGetFileInfoW and SHParseDisplayName that it calls internally. Reviewed-on: https://gerrit.libreoffice.org/c/core/+/87565 Tested-by: Jenkins Reviewed-by: Mike Kaganski <mike.kagan...@collabora.com> Signed-off-by: Xisco Fauli <xiscofa...@libreoffice.org> Reviewed-on: https://gerrit.libreoffice.org/c/core/+/87737 Reviewed-by: Miklos Vajna <vmik...@collabora.com> (cherry picked from commit fc043d38c256243fb782cc48e7708feaeabba4ae) Change-Id: I2f5f3c675ea6aa1c2d92eef30be4399a8d600255 diff --git a/shell/source/win32/SysShExec.cxx b/shell/source/win32/SysShExec.cxx index a9e5a2c4ec7d..1d0932e5db8d 100644 --- a/shell/source/win32/SysShExec.cxx +++ b/shell/source/win32/SysShExec.cxx @@ -39,6 +39,7 @@ #endif #include <windows.h> #include <shellapi.h> +#include <Shlobj.h> #include <Shobjidl.h> #include <objbase.h> #if defined _MSC_VER @@ -307,21 +308,33 @@ void SAL_CALL CSysShExec::execute( const OUString& aCommand, const OUString& aPa + "> failed with " + OUString::number(e1)), {}, 0); } + const int MAX_LONG_PATH = 32767; // max longpath on WinNT + if (pathname.getLength() >= MAX_LONG_PATH) + { + throw css::lang::IllegalArgumentException( + "XSystemShellExecute.execute, path <" + pathname + "> too long", {}, 0); + } + wchar_t path[MAX_LONG_PATH]; + wcscpy_s(path, SAL_W(pathname.getStr())); for (int i = 0;; ++i) { + // tdf#130216: normalize c:\path\to\something\..\else into c:\path\to\else + if (PathResolve(path, nullptr, PRF_VERIFYEXISTS | PRF_REQUIREABSOLUTE) == 0) + { + throw css::lang::IllegalArgumentException( + "XSystemShellExecute.execute, PathResolve(" + OUString(SAL_U(path)) + + ") failed", + {}, 0); + } SHFILEINFOW info; - if (SHGetFileInfoW( - SAL_W(pathname.getStr()), 0, &info, sizeof info, SHGFI_EXETYPE) - != 0) + if (SHGetFileInfoW(path, 0, &info, sizeof info, SHGFI_EXETYPE) != 0) { throw css::lang::IllegalArgumentException( "XSystemShellExecute.execute, cannot process <" + aCommand + ">", {}, 0); } - if (SHGetFileInfoW( - SAL_W(pathname.getStr()), 0, &info, sizeof info, SHGFI_ATTRIBUTES) - == 0) + if (SHGetFileInfoW(path, 0, &info, sizeof info, SHGFI_ATTRIBUTES) == 0) { throw css::lang::IllegalArgumentException( - "XSystemShellExecute.execute, SHGetFileInfoW(" + pathname + ") failed", {}, + "XSystemShellExecute.execute, SHGetFileInfoW(" + OUString(SAL_U(path)) + ") failed", {}, 0); } if ((info.dwAttributes & SFGAO_LINK) == 0) { @@ -346,7 +359,7 @@ void SAL_CALL CSysShExec::execute( const OUString& aCommand, const OUString& aPa + o3tl::runtimeToOUString(e3.what())), {}, 0); } - e2 = file->Load(SAL_W(pathname.getStr()), STGM_READ); + e2 = file->Load(path, STGM_READ); if (FAILED(e2)) { throw css::lang::IllegalArgumentException( ("XSystemShellExecute.execute, IPersistFile.Load failed with " @@ -360,16 +373,14 @@ void SAL_CALL CSysShExec::execute( const OUString& aCommand, const OUString& aPa + OUString::number(e2)), {}, 0); } - wchar_t path[MAX_PATH]; WIN32_FIND_DATAW wfd; - e2 = link->GetPath(path, MAX_PATH, &wfd, SLGP_RAWPATH); + e2 = link->GetPath(path, SAL_N_ELEMENTS(path), &wfd, SLGP_RAWPATH); if (FAILED(e2)) { throw css::lang::IllegalArgumentException( ("XSystemShellExecute.execute, IShellLink.GetPath failed with " + OUString::number(e2)), {}, 0); } - pathname = SAL_U(path); // Fail at some arbitrary nesting depth, to avoid an infinite loop: if (i == 30) { throw css::lang::IllegalArgumentException( @@ -377,6 +388,7 @@ void SAL_CALL CSysShExec::execute( const OUString& aCommand, const OUString& aPa {}, 0); } } + pathname = OUString(SAL_U(path)); auto const n = pathname.lastIndexOf('.'); if (n > pathname.lastIndexOf('\\')) { auto const ext = pathname.copy(n + 1); _______________________________________________ Libreoffice-commits mailing list libreoffice-comm...@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/libreoffice-commits