Changeset: 246b51d94896 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/246b51d94896
Modified Files:
MonetDB.spec
debian/control
sql/test/BugTracker-2025/Tests/All
Branch: default
Log Message:
merged with Mar2025
diffs (truncated from 987 to 300 lines):
diff --git a/MonetDB.spec b/MonetDB.spec
--- a/MonetDB.spec
+++ b/MonetDB.spec
@@ -140,7 +140,7 @@ BuildRequires: pkgconfig(openssl) >= 1.1
%global with_openssl 1
%endif
%if %{with pcre}
-BuildRequires: pkgconfig(libpcre) >= 4.5
+BuildRequires: pkgconfig(libpcre2-8)
%endif
BuildRequires: pkgconfig(zlib)
BuildRequires: pkgconfig(liblz4) >= 1.8
diff --git a/NT/mksqlwxs.py b/NT/mksqlwxs.py
--- a/NT/mksqlwxs.py
+++ b/NT/mksqlwxs.py
@@ -169,7 +169,7 @@ def main():
vcpkg.format(r'bin\libxml2.dll'),
vcpkg.format(r'bin\lz4.dll'),
vcpkg.format(r'bin\liblzma.dll'),
- vcpkg.format(r'bin\pcre.dll'),
+ vcpkg.format(r'bin\pcre2-8.dll'),
vcpkg.format(r'bin\zlib1.dll')])
id = comp(debug, id, 14,
[r'bin\mclient.pdb',
@@ -230,7 +230,7 @@ def main():
vcpkg.format(r'lib\libxml2.lib'),
vcpkg.format(r'lib\lz4.lib'),
vcpkg.format(r'lib\lzma.lib'),
- vcpkg.format(r'lib\pcre.lib'),
+ vcpkg.format(r'lib\pcre2-8.lib'),
vcpkg.format(r'lib\zlib.lib')])
print(r' </Directory>')
print(r' <Directory Id="share" Name="share">')
diff --git a/cmake/Modules/FindPCRE.cmake b/cmake/Modules/FindPCRE.cmake
--- a/cmake/Modules/FindPCRE.cmake
+++ b/cmake/Modules/FindPCRE.cmake
@@ -24,9 +24,9 @@
# PCRE_VERSION - The version found.
# PCRE_FOUND - True if pcre found.
-find_path(PCRE_INCLUDE_DIR NAMES pcre.h)
+find_path(PCRE_INCLUDE_DIR NAMES pcre2.h)
-find_library(PCRE_LIBRARIES NAMES pcre)
+find_library(PCRE_LIBRARIES NAMES pcre2-8)
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(PCRE
@@ -37,9 +37,9 @@ find_package_handle_standard_args(PCRE
mark_as_advanced(PCRE_INCLUDE_DIR PCRE_LIBRARIES PCRE_VERSION)
if(PCRE_FOUND)
- file(STRINGS "${PCRE_INCLUDE_DIR}/pcre.h" PCRE_VERSION_LINES REGEX "[
\t]*#define[ \t]+PCRE_(MAJOR|MINOR)")
- string(REGEX REPLACE ".*PCRE_MAJOR *\([0-9]*\).*" "\\1" PCRE_VERSION_MAJOR
"${PCRE_VERSION_LINES}")
- string(REGEX REPLACE ".*PCRE_MINOR *\([0-9]*\).*" "\\1" PCRE_VERSION_MINOR
"${PCRE_VERSION_LINES}")
+ file(STRINGS "${PCRE_INCLUDE_DIR}/pcre2.h" PCRE_VERSION_LINES REGEX "[
\t]*#define[ \t]+PCRE2_(MAJOR|MINOR)")
+ string(REGEX REPLACE ".*PCRE2_MAJOR *\([0-9]*\).*" "\\1" PCRE_VERSION_MAJOR
"${PCRE_VERSION_LINES}")
+ string(REGEX REPLACE ".*PCRE2_MINOR *\([0-9]*\).*" "\\1" PCRE_VERSION_MINOR
"${PCRE_VERSION_LINES}")
set(PCRE_VERSION "${PCRE_VERSION_MAJOR}.${PCRE_VERSION_MINOR}")
if(NOT TARGET PCRE::PCRE AND
diff --git a/debian/control b/debian/control
--- a/debian/control
+++ b/debian/control
@@ -7,7 +7,7 @@ Vcs-Browser: https://dev.monetdb.org/hg/
Vcs-Hg: https://dev.monetdb.org/hg/MonetDB/ -b default
Build-Depends: debhelper-compat (= 12), cmake (>= 3.12), bison,
libbz2-dev, libcurl4-gnutls-dev, libgeos-dev (>= 3.10.0),
- libpcre3-dev, libreadline-dev, liblzma-dev, liblz4-dev (>= 1.8.0),
+ libpcre2-dev, libreadline-dev, liblzma-dev, liblz4-dev (>= 1.8.0),
libxml2-dev, pkg-config,
python3, python3-dev, python3-numpy,
unixodbc-dev, zlib1g-dev, r-base-dev,
diff --git a/monetdb5/ChangeLog.Mar2025 b/monetdb5/ChangeLog.Mar2025
--- a/monetdb5/ChangeLog.Mar2025
+++ b/monetdb5/ChangeLog.Mar2025
@@ -1,3 +1,9 @@
# ChangeLog file for MonetDB5
# This file is updated with Maddlog
+* Fri Aug 1 2025 Sjoerd Mullender <[email protected]>
+- The PCRE module has been ported to the PCRE2 version of the library.
+ The main difference is in the regexp_replace function which now no
+ longer accepts \ to introduce replacements. Only $ is accepted (it
+ was already accepted before).
+
diff --git a/monetdb5/modules/mal/pcre.c b/monetdb5/modules/mal/pcre.c
--- a/monetdb5/modules/mal/pcre.c
+++ b/monetdb5/modules/mal/pcre.c
@@ -15,12 +15,7 @@
* PCRE library interface
* The PCRE library is a set of functions that implement regular
* expression pattern matching using the same syntax and semantics as Perl,
- * with just a few differences. The current implementation of PCRE
- * (release 4.x) corresponds approximately with Perl 5.8, including support
- * for UTF-8 encoded strings. However, this support has to be
- * explicitly enabled; it is not the default.
- *
- * ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre
+ * with just a few differences.
*/
#include "monetdb_config.h"
#include <string.h>
@@ -34,19 +29,15 @@
#include <wctype.h>
#ifdef HAVE_LIBPCRE
-#include <pcre.h>
-#ifndef PCRE_STUDY_JIT_COMPILE
-/* old library version on e.g. EPEL 6 */
-#define pcre_free_study(x) pcre_free(x)
-#define PCRE_STUDY_JIT_COMPILE 0
-#endif
+#define PCRE2_CODE_UNIT_WIDTH 8
+#include <pcre2.h>
#define JIT_COMPILE_MIN 1024 /* when to try JIT compilation of
patterns */
#else
#include <regex.h>
-typedef regex_t pcre;
+typedef regex_t pcre2_code;
#endif
/* current implementation assumes simple %keyword% [keyw%]* */
@@ -316,26 +307,6 @@ mnre_create(const char *pat, bool caseig
return NULL;
}
-#ifdef HAVE_LIBPCRE
-static str
-pcre_compile_wrap(pcre **res, const char *pattern, bit insensitive)
-{
- pcre *r;
- const char *err_p = NULL;
- int errpos = 0;
- int options = PCRE_UTF8 | PCRE_NO_UTF8_CHECK | PCRE_MULTILINE;
- if (insensitive)
- options |= PCRE_CASELESS;
-
- if ((r = pcre_compile(pattern, options, &err_p, &errpos, NULL)) ==
NULL) {
- throw(MAL, "pcre.compile", OPERATION_FAILED
- " with\n'%s'\nat %d in\n'%s'.\n", err_p, errpos,
pattern);
- }
- *res = r;
- return MAL_SUCCEED;
-}
-#endif
-
/* maximum number of back references and quoted \ or $ in replacement string */
#define MAX_NR_REFS 20
@@ -346,160 +317,26 @@ struct backref {
};
#ifdef HAVE_LIBPCRE
-/* fill in parameter backrefs (length maxrefs) with information about
- * back references in the replacement string; a back reference is a
- * dollar or backslash followed by a number */
-static int
-parse_replacement(const char *replacement, int len_replacement,
- struct backref *backrefs, int maxrefs)
-{
- int nbackrefs = 0;
-
- for (int i = 0; i < len_replacement && nbackrefs < maxrefs; i++) {
- if (replacement[i] == '$' || replacement[i] == '\\') {
- char *endptr;
- backrefs[nbackrefs].idx = strtol(replacement + i + 1,
&endptr, 10);
- if (endptr > replacement + i + 1) {
- int k = (int) (endptr - (replacement + i + 1));
- backrefs[nbackrefs].start = i;
- backrefs[nbackrefs].end = i + k + 1;
- nbackrefs++;
- } else if (replacement[i] == replacement[i + 1]) {
- /* doubled $ or \, we must copy just one to the
output */
- backrefs[nbackrefs].idx = INT_MAX; /*
impossible value > 0 */
- backrefs[nbackrefs].start = i;
- backrefs[nbackrefs].end = i + 1;
- i++; /* don't look at second
$ or \ again */
- nbackrefs++;
- }
- /* else: $ or \ followed by something we don't
recognize,
- * so just leave it */
- }
- }
- return nbackrefs;
-}
-
-static char *
-single_replace(pcre *pcre_code, pcre_extra *extra,
- const char *origin_str, int len_origin_str,
- int exec_options, int *ovector, int ovecsize,
- const char *replacement, int len_replacement,
- struct backref *backrefs, int nbackrefs,
- bool global, char *result, int *max_result)
+static PCRE2_UCHAR *
+single_replace(pcre2_code *pcre_code, pcre2_match_data *match_data,
+ PCRE2_SPTR origin_str, PCRE2_SIZE len_origin_str,
+ uint32_t exec_options,
+ PCRE2_SPTR replacement, PCRE2_SIZE len_replacement,
+ PCRE2_UCHAR *result, PCRE2_SIZE *max_result)
{
- int offset = 0;
- int len_result = 0;
- int addlen;
- int empty_match_correction = 0;
- char *tmp;
-
- do {
- int j = pcre_exec(pcre_code, extra, origin_str, len_origin_str,
offset,
- exec_options, ovector,
ovecsize);
- if (j <= 0)
- break;
-
- empty_match_correction = ovector[0] == ovector[1] ? 1 : 0;
-
- // calculate the length of the string that will be appended to
result
- addlen = ovector[0] - offset
- + (nbackrefs == 0 ? len_replacement : 0) +
empty_match_correction;
- if (len_result + addlen >= *max_result) {
- tmp = GDKrealloc(result, len_result + addlen + 1);
- if (tmp == NULL) {
- GDKfree(result);
- return NULL;
- }
- result = tmp;
- *max_result = len_result + addlen + 1;
- }
- // append to the result the parts of the original string that
are left unchanged
- if (ovector[0] > offset) {
- strncpy(result + len_result, origin_str + offset,
- ovector[0] - offset);
- len_result += ovector[0] - offset;
- }
- // append to the result the replacement of the matched string
- if (nbackrefs == 0) {
- strncpy(result + len_result, replacement,
len_replacement);
- len_result += len_replacement;
- } else {
- int prevend = 0;
- for (int i = 0; i < nbackrefs; i++) {
- int off, len;
- if (backrefs[i].idx >= ovecsize / 3) {
- /* out of bounds, replace with empty
string */
- off = 0;
- len = 0;
- } else {
- off = ovector[backrefs[i].idx * 2];
- len = ovector[backrefs[i].idx * 2 + 1]
- off;
- }
- addlen = backrefs[i].start - prevend + len;
- if (len_result + addlen >= *max_result) {
- tmp = GDKrealloc(result, len_result +
addlen + 1);
- if (tmp == NULL) {
- GDKfree(result);
- return NULL;
- }
- result = tmp;
- *max_result = len_result + addlen + 1;
- }
- if (backrefs[i].start > prevend) {
- strncpy(result + len_result,
replacement + prevend,
- backrefs[i].start -
prevend);
- len_result += backrefs[i].start -
prevend;
- }
- if (len > 0) {
- strncpy(result + len_result, origin_str
+ off, len);
- len_result += len;
- }
- prevend = backrefs[i].end;
- }
- /* copy rest of replacement string (after last backref)
*/
- addlen = len_replacement - prevend;
- if (addlen > 0) {
- if (len_result + addlen >= *max_result) {
- tmp = GDKrealloc(result, len_result +
addlen + 1);
- if (tmp == NULL) {
- GDKfree(result);
- return NULL;
- }
- result = tmp;
- *max_result = len_result + addlen + 1;
- }
- strncpy(result + len_result, replacement +
prevend, addlen);
- len_result += addlen;
- }
- }
- // In case of an empty match just advance the offset by 1
- offset = ovector[1] + empty_match_correction;
- // and copy the character that we just advanced over
- if (empty_match_correction) {
- strncpy(result + len_result, origin_str + ovector[1],
1);
- ++len_result;
- }
- // before we loop around check with the offset - 1 if we had an
empty match
- // since we manually advanced the offset by one. otherwise we
gonna skip a
- // replacement at the end of the string
- } while ((offset - empty_match_correction) < len_origin_str && global);
_______________________________________________
checkin-list mailing list -- [email protected]
To unsubscribe send an email to [email protected]