MonetDB: default - merged with Mar2025

Niels Nes via checkin-list Sun, 03 Aug 2025 03:59:12 -0700

Changeset: 246b51d94896 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/246b51d94896
Modified Files:
        MonetDB.spec
        debian/control
        sql/test/BugTracker-2025/Tests/All
Branch: default
Log Message:


merged with Mar2025


diffs (truncated from 987 to 300 lines):

diff --git a/MonetDB.spec b/MonetDB.spec
--- a/MonetDB.spec
+++ b/MonetDB.spec
@@ -140,7 +140,7 @@ BuildRequires: pkgconfig(openssl) >= 1.1
 %global with_openssl 1
 %endif
 %if %{with pcre}
-BuildRequires: pkgconfig(libpcre) >= 4.5
+BuildRequires: pkgconfig(libpcre2-8)
 %endif
 BuildRequires: pkgconfig(zlib)
 BuildRequires: pkgconfig(liblz4) >= 1.8
diff --git a/NT/mksqlwxs.py b/NT/mksqlwxs.py
--- a/NT/mksqlwxs.py
+++ b/NT/mksqlwxs.py
@@ -169,7 +169,7 @@ def main():
                vcpkg.format(r'bin\libxml2.dll'),
                vcpkg.format(r'bin\lz4.dll'),
                vcpkg.format(r'bin\liblzma.dll'),
-               vcpkg.format(r'bin\pcre.dll'),
+               vcpkg.format(r'bin\pcre2-8.dll'),
                vcpkg.format(r'bin\zlib1.dll')])
     id = comp(debug, id, 14,
               [r'bin\mclient.pdb',
@@ -230,7 +230,7 @@ def main():
                vcpkg.format(r'lib\libxml2.lib'),
                vcpkg.format(r'lib\lz4.lib'),
                vcpkg.format(r'lib\lzma.lib'),
-               vcpkg.format(r'lib\pcre.lib'),
+               vcpkg.format(r'lib\pcre2-8.lib'),
                vcpkg.format(r'lib\zlib.lib')])
     print(r'            </Directory>')
     print(r'            <Directory Id="share" Name="share">')
diff --git a/cmake/Modules/FindPCRE.cmake b/cmake/Modules/FindPCRE.cmake
--- a/cmake/Modules/FindPCRE.cmake
+++ b/cmake/Modules/FindPCRE.cmake
@@ -24,9 +24,9 @@
 # PCRE_VERSION - The version found.
 # PCRE_FOUND   - True if pcre found.
 
-find_path(PCRE_INCLUDE_DIR NAMES pcre.h)
+find_path(PCRE_INCLUDE_DIR NAMES pcre2.h)
 
-find_library(PCRE_LIBRARIES NAMES pcre)
+find_library(PCRE_LIBRARIES NAMES pcre2-8)
 
 include(FindPackageHandleStandardArgs)
 find_package_handle_standard_args(PCRE
@@ -37,9 +37,9 @@ find_package_handle_standard_args(PCRE
 mark_as_advanced(PCRE_INCLUDE_DIR PCRE_LIBRARIES PCRE_VERSION)
 
 if(PCRE_FOUND)
-  file(STRINGS "${PCRE_INCLUDE_DIR}/pcre.h" PCRE_VERSION_LINES REGEX "[ 
\t]*#define[ \t]+PCRE_(MAJOR|MINOR)")
-  string(REGEX REPLACE ".*PCRE_MAJOR *\([0-9]*\).*" "\\1" PCRE_VERSION_MAJOR 
"${PCRE_VERSION_LINES}")
-  string(REGEX REPLACE ".*PCRE_MINOR *\([0-9]*\).*" "\\1" PCRE_VERSION_MINOR 
"${PCRE_VERSION_LINES}")
+  file(STRINGS "${PCRE_INCLUDE_DIR}/pcre2.h" PCRE_VERSION_LINES REGEX "[ 
\t]*#define[ \t]+PCRE2_(MAJOR|MINOR)")
+  string(REGEX REPLACE ".*PCRE2_MAJOR *\([0-9]*\).*" "\\1" PCRE_VERSION_MAJOR 
"${PCRE_VERSION_LINES}")
+  string(REGEX REPLACE ".*PCRE2_MINOR *\([0-9]*\).*" "\\1" PCRE_VERSION_MINOR 
"${PCRE_VERSION_LINES}")
   set(PCRE_VERSION "${PCRE_VERSION_MAJOR}.${PCRE_VERSION_MINOR}")
 
   if(NOT TARGET PCRE::PCRE AND
diff --git a/debian/control b/debian/control
--- a/debian/control
+++ b/debian/control
@@ -7,7 +7,7 @@ Vcs-Browser: https://dev.monetdb.org/hg/
 Vcs-Hg: https://dev.monetdb.org/hg/MonetDB/ -b default
 Build-Depends: debhelper-compat (= 12), cmake (>= 3.12), bison,
  libbz2-dev, libcurl4-gnutls-dev, libgeos-dev (>= 3.10.0),
- libpcre3-dev, libreadline-dev, liblzma-dev, liblz4-dev (>= 1.8.0),
+ libpcre2-dev, libreadline-dev, liblzma-dev, liblz4-dev (>= 1.8.0),
  libxml2-dev, pkg-config,
  python3, python3-dev, python3-numpy,
  unixodbc-dev, zlib1g-dev, r-base-dev,
diff --git a/monetdb5/ChangeLog.Mar2025 b/monetdb5/ChangeLog.Mar2025
--- a/monetdb5/ChangeLog.Mar2025
+++ b/monetdb5/ChangeLog.Mar2025
@@ -1,3 +1,9 @@
 # ChangeLog file for MonetDB5
 # This file is updated with Maddlog
 
+* Fri Aug  1 2025 Sjoerd Mullender <[email protected]>
+- The PCRE module has been ported to the PCRE2 version of the library.
+  The main difference is in the regexp_replace function which now no
+  longer accepts \ to introduce replacements.  Only $ is accepted (it
+  was already accepted before).
+
diff --git a/monetdb5/modules/mal/pcre.c b/monetdb5/modules/mal/pcre.c
--- a/monetdb5/modules/mal/pcre.c
+++ b/monetdb5/modules/mal/pcre.c
@@ -15,12 +15,7 @@
  * PCRE library interface
  * The  PCRE library is a set of functions that implement regular
  * expression pattern matching using the same syntax  and  semantics  as  Perl,
- * with  just  a  few  differences.  The  current  implementation of PCRE
- * (release 4.x) corresponds approximately with Perl 5.8, including  support
- * for  UTF-8  encoded  strings.   However,  this support has to be
- * explicitly enabled; it is not the default.
- *
- * ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre
+ * with  just  a  few  differences.
  */
 #include "monetdb_config.h"
 #include <string.h>
@@ -34,19 +29,15 @@
 #include <wctype.h>
 
 #ifdef HAVE_LIBPCRE
-#include <pcre.h>
-#ifndef PCRE_STUDY_JIT_COMPILE
-/* old library version on e.g. EPEL 6 */
-#define pcre_free_study(x)             pcre_free(x)
-#define PCRE_STUDY_JIT_COMPILE 0
-#endif
+#define PCRE2_CODE_UNIT_WIDTH 8
+#include <pcre2.h>
 #define JIT_COMPILE_MIN        1024    /* when to try JIT compilation of 
patterns */
 
 #else
 
 #include <regex.h>
 
-typedef regex_t pcre;
+typedef regex_t pcre2_code;
 #endif
 
 /* current implementation assumes simple %keyword% [keyw%]* */
@@ -316,26 +307,6 @@ mnre_create(const char *pat, bool caseig
        return NULL;
 }
 
-#ifdef HAVE_LIBPCRE
-static str
-pcre_compile_wrap(pcre **res, const char *pattern, bit insensitive)
-{
-       pcre *r;
-       const char *err_p = NULL;
-       int errpos = 0;
-       int options = PCRE_UTF8 | PCRE_NO_UTF8_CHECK | PCRE_MULTILINE;
-       if (insensitive)
-               options |= PCRE_CASELESS;
-
-       if ((r = pcre_compile(pattern, options, &err_p, &errpos, NULL)) == 
NULL) {
-               throw(MAL, "pcre.compile", OPERATION_FAILED
-                         " with\n'%s'\nat %d in\n'%s'.\n", err_p, errpos, 
pattern);
-       }
-       *res = r;
-       return MAL_SUCCEED;
-}
-#endif
-
 /* maximum number of back references and quoted \ or $ in replacement string */
 #define MAX_NR_REFS            20
 
@@ -346,160 +317,26 @@ struct backref {
 };
 
 #ifdef HAVE_LIBPCRE
-/* fill in parameter backrefs (length maxrefs) with information about
- * back references in the replacement string; a back reference is a
- * dollar or backslash followed by a number */
-static int
-parse_replacement(const char *replacement, int len_replacement,
-                                 struct backref *backrefs, int maxrefs)
-{
-       int nbackrefs = 0;
-
-       for (int i = 0; i < len_replacement && nbackrefs < maxrefs; i++) {
-               if (replacement[i] == '$' || replacement[i] == '\\') {
-                       char *endptr;
-                       backrefs[nbackrefs].idx = strtol(replacement + i + 1, 
&endptr, 10);
-                       if (endptr > replacement + i + 1) {
-                               int k = (int) (endptr - (replacement + i + 1));
-                               backrefs[nbackrefs].start = i;
-                               backrefs[nbackrefs].end = i + k + 1;
-                               nbackrefs++;
-                       } else if (replacement[i] == replacement[i + 1]) {
-                               /* doubled $ or \, we must copy just one to the 
output */
-                               backrefs[nbackrefs].idx = INT_MAX;      /* 
impossible value > 0 */
-                               backrefs[nbackrefs].start = i;
-                               backrefs[nbackrefs].end = i + 1;
-                               i++;                    /* don't look at second 
$ or \ again */
-                               nbackrefs++;
-                       }
-                       /* else: $ or \ followed by something we don't 
recognize,
-                        * so just leave it */
-               }
-       }
-       return nbackrefs;
-}
-
-static char *
-single_replace(pcre *pcre_code, pcre_extra *extra,
-                          const char *origin_str, int len_origin_str,
-                          int exec_options, int *ovector, int ovecsize,
-                          const char *replacement, int len_replacement,
-                          struct backref *backrefs, int nbackrefs,
-                          bool global, char *result, int *max_result)
+static PCRE2_UCHAR *
+single_replace(pcre2_code *pcre_code, pcre2_match_data *match_data,
+                          PCRE2_SPTR origin_str, PCRE2_SIZE len_origin_str,
+                          uint32_t exec_options,
+                          PCRE2_SPTR replacement, PCRE2_SIZE len_replacement,
+                          PCRE2_UCHAR *result, PCRE2_SIZE *max_result)
 {
-       int offset = 0;
-       int len_result = 0;
-       int addlen;
-       int empty_match_correction = 0;
-       char *tmp;
-
-       do {
-               int j = pcre_exec(pcre_code, extra, origin_str, len_origin_str, 
offset,
-                                                 exec_options, ovector, 
ovecsize);
-               if (j <= 0)
-                       break;
-
-               empty_match_correction = ovector[0] == ovector[1] ? 1 : 0;
-
-               // calculate the length of the string that will be appended to 
result
-               addlen = ovector[0] - offset
-                               + (nbackrefs == 0 ? len_replacement : 0) + 
empty_match_correction;
-               if (len_result + addlen >= *max_result) {
-                       tmp = GDKrealloc(result, len_result + addlen + 1);
-                       if (tmp == NULL) {
-                               GDKfree(result);
-                               return NULL;
-                       }
-                       result = tmp;
-                       *max_result = len_result + addlen + 1;
-               }
-               // append to the result the parts of the original string that 
are left unchanged
-               if (ovector[0] > offset) {
-                       strncpy(result + len_result, origin_str + offset,
-                                       ovector[0] - offset);
-                       len_result += ovector[0] - offset;
-               }
-               // append to the result the replacement of the matched string
-               if (nbackrefs == 0) {
-                       strncpy(result + len_result, replacement, 
len_replacement);
-                       len_result += len_replacement;
-               } else {
-                       int prevend = 0;
-                       for (int i = 0; i < nbackrefs; i++) {
-                               int off, len;
-                               if (backrefs[i].idx >= ovecsize / 3) {
-                                       /* out of bounds, replace with empty 
string */
-                                       off = 0;
-                                       len = 0;
-                               } else {
-                                       off = ovector[backrefs[i].idx * 2];
-                                       len = ovector[backrefs[i].idx * 2 + 1] 
- off;
-                               }
-                               addlen = backrefs[i].start - prevend + len;
-                               if (len_result + addlen >= *max_result) {
-                                       tmp = GDKrealloc(result, len_result + 
addlen + 1);
-                                       if (tmp == NULL) {
-                                               GDKfree(result);
-                                               return NULL;
-                                       }
-                                       result = tmp;
-                                       *max_result = len_result + addlen + 1;
-                               }
-                               if (backrefs[i].start > prevend) {
-                                       strncpy(result + len_result, 
replacement + prevend,
-                                                       backrefs[i].start - 
prevend);
-                                       len_result += backrefs[i].start - 
prevend;
-                               }
-                               if (len > 0) {
-                                       strncpy(result + len_result, origin_str 
+ off, len);
-                                       len_result += len;
-                               }
-                               prevend = backrefs[i].end;
-                       }
-                       /* copy rest of replacement string (after last backref) 
*/
-                       addlen = len_replacement - prevend;
-                       if (addlen > 0) {
-                               if (len_result + addlen >= *max_result) {
-                                       tmp = GDKrealloc(result, len_result + 
addlen + 1);
-                                       if (tmp == NULL) {
-                                               GDKfree(result);
-                                               return NULL;
-                                       }
-                                       result = tmp;
-                                       *max_result = len_result + addlen + 1;
-                               }
-                               strncpy(result + len_result, replacement + 
prevend, addlen);
-                               len_result += addlen;
-                       }
-               }
-               // In case of an empty match just advance the offset by 1
-               offset = ovector[1] + empty_match_correction;
-               // and copy the character that we just advanced over
-               if (empty_match_correction) {
-                       strncpy(result + len_result, origin_str + ovector[1], 
1);
-                       ++len_result;
-               }
-               // before we loop around check with the offset - 1 if we had an 
empty match
-               // since we manually advanced the offset by one. otherwise we 
gonna skip a
-               // replacement at the end of the string
-       } while ((offset - empty_match_correction) < len_origin_str && global);
_______________________________________________
checkin-list mailing list -- [email protected]
To unsubscribe send an email to [email protected]

MonetDB: default - merged with Mar2025

Reply via email to