This is an automated email from the ASF dual-hosted git repository.

bcall pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/trafficserver.git


The following commit(s) were added to refs/heads/master by this push:
     new 8e1c1b9faf Change Regex class to use PCRE2 (#11014)
8e1c1b9faf is described below

commit 8e1c1b9fafb13df87ca431a1e855f64223b06ffb
Author: Bryan Call <bc...@apache.org>
AuthorDate: Thu Feb 29 12:06:24 2024 -0800

    Change Regex class to use PCRE2 (#11014)
---
 CMakeLists.txt                                 |   2 +-
 include/proxy/http/remap/UrlRewrite.h          |   2 +-
 include/tsutil/Regex.h                         | 106 +++++---
 plugins/experimental/tls_bridge/CMakeLists.txt |   2 +-
 src/proxy/http/remap/UrlRewrite.cc             |  14 +-
 src/tsutil/CMakeLists.txt                      |   3 +-
 src/tsutil/Regex.cc                            | 329 +++++++++++++++++--------
 src/tsutil/unit_tests/test_Regex.cc            | 138 ++++++++++-
 8 files changed, 440 insertions(+), 156 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 63ec25723a..a276d827cc 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -253,7 +253,7 @@ if(LibLZMA_FOUND)
 endif()
 
 find_package(PCRE REQUIRED)
-find_package(PCRE2 COMPONENTS 8BIT)
+pkg_check_modules(PCRE2 REQUIRED IMPORTED_TARGET libpcre2-8)
 
 include(CheckOpenSSLIsBoringSSL)
 include(CheckOpenSSLIsQuictls)
diff --git a/include/proxy/http/remap/UrlRewrite.h 
b/include/proxy/http/remap/UrlRewrite.h
index 86dcb50a07..797ad94c47 100644
--- a/include/proxy/http/remap/UrlRewrite.h
+++ b/include/proxy/http/remap/UrlRewrite.h
@@ -232,7 +232,7 @@ private:
                             int request_host_len);
   bool _regexMappingLookup(RegexMappingList &regex_mappings, URL *request_url, 
int request_port, const char *request_host,
                            int request_host_len, int rank_ceiling, 
UrlMappingContainer &mapping_container);
-  int _expandSubstitutions(int *matches_info, const RegexMapping *reg_map, 
const char *matched_string, char *dest_buf,
+  int _expandSubstitutions(size_t *matches_info, const RegexMapping *reg_map, 
const char *matched_string, char *dest_buf,
                            int dest_buf_size);
   void _destroyTable(std::unique_ptr<URLTable> &h_table);
   void _destroyList(RegexMappingList &regexes);
diff --git a/include/tsutil/Regex.h b/include/tsutil/Regex.h
index a1c51e3661..c4ca8feb03 100644
--- a/include/tsutil/Regex.h
+++ b/include/tsutil/Regex.h
@@ -28,24 +28,60 @@
 #include <vector>
 #include <memory>
 
-#include "swoc/MemSpan.h"
+#define PCRE2_CODE_UNIT_WIDTH 8
+#include <pcre2.h>
 
-/// Match flags for regular expression evaluation.
+/// @brief Match flags for regular expression evaluation.
 enum REFlags {
-  RE_CASE_INSENSITIVE = 0x0001, ///< Ignore case (default: case sensitive).
-  RE_UNANCHORED       = 0x0002, ///< Unanchored (DFA defaults to anchored).
-  RE_ANCHORED         = 0x0004, ///< Anchored (Regex defaults to unanchored).
+  RE_CASE_INSENSITIVE = PCRE2_CASELESS,  ///< Ignore case (default: case 
sensitive).
+  RE_UNANCHORED       = PCRE2_MULTILINE, ///< Unanchored (DFA defaults to 
anchored).
+  RE_ANCHORED         = PCRE2_ANCHORED,  ///< Anchored (Regex defaults to 
unanchored).
 };
 
-/** Wrapper for PCRE evaluation.
- *
- */
-class Regex
+/// @brief Wrapper for PCRE2 match data.
+class RegexMatches
 {
+  friend class Regex;
+
 public:
-  /// Default number of capture groups.
-  static constexpr size_t DEFAULT_GROUP_COUNT = 10;
+  /** Construct a new RegexMatches object.
+   *
+   * @param size The number of matches to allocate space for.
+   */
+  RegexMatches(uint32_t size = DEFAULT_MATCHES);
+  ~RegexMatches();
+
+  /** Get the match at the given index.
+   *
+   * @return The match at the given index.
+   */
+  std::string_view operator[](size_t index) const;
+  /** Get the ovector pointer for the capture groups.  Don't use this unless 
you know what you are doing.
+   *
+   * @return ovector pointer.
+   */
+  size_t *get_ovector_pointer();
+  int32_t size() const;
+
+protected:
+  pcre2_match_data *get_match_data();
+  void set_subject(std::string_view subject);
+  void set_size(int32_t size);
+
+private:
+  constexpr static uint32_t DEFAULT_MATCHES = 10;
+  static void *malloc(size_t size, void *caller);
+  pcre2_match_data *_match_data = nullptr;
+  std::string_view _subject;
+  char _buffer[24 + 96 + 16 * DEFAULT_MATCHES]; // 24 bytes for the general 
context, 96 bytes overhead, 16 bytes per match.
+  size_t _buffer_bytes_used = 0;
+  int32_t _size             = 0;
+};
 
+/// @brief Wrapper for PCRE2 regular expression.
+class Regex
+{
+public:
   Regex()              = default;
   Regex(Regex const &) = delete; // No copying.
   Regex(Regex &&that) noexcept;
@@ -59,46 +95,43 @@ public:
    *
    * @a flags should be the bitwise @c or of @c REFlags values.
    */
-  bool compile(const char *pattern, unsigned flags = 0);
+  bool compile(std::string_view pattern, uint32_t flags = 0);
 
-  /** Execute the regular expression.
+  /** Compile the @a pattern into a regular expression.
    *
-   * @param str String to match against.
-   * @return @c true if the pattern matched, @a false if not.
+   * @param pattern Source pattern for regular expression (null terminated).
+   * @param error String to receive error message.
+   * @param erroffset Pointer to integer to receive error offset.
+   * @param flags Compilation flags.
+   * @return @a true if compiled successfully, @a false otherwise.
    *
-   * It is safe to call this method concurrently on the same instance of @a 
this.
+   * @a flags should be the bitwise @c or of @c REFlags values.
    */
-  bool exec(std::string_view const &str) const;
+  bool compile(std::string_view pattern, std::string &error, int &erroffset, 
unsigned flags = 0);
 
   /** Execute the regular expression.
    *
-   * @param str String to match against.
-   * @param ovector Capture results.
-   * @param ovecsize Number of elements in @a ovector.
+   * @param subject String to match against.
    * @return @c true if the pattern matched, @a false if not.
    *
    * It is safe to call this method concurrently on the same instance of @a 
this.
-   *
-   * Each capture group takes 3 elements of @a ovector, therefore @a ovecsize 
must
-   * be a multiple of 3 and at least three times the number of desired capture 
groups.
    */
-  bool exec(std::string_view const &str, int *ovector, int ovecsize) const;
+  bool exec(std::string_view subject) const;
 
   /** Execute the regular expression.
    *
-   * @param str String to match against.
-   * @param ovector Capture results.
-   * @param ovecsize Number of elements in @a ovector.
-   * @return @c true if the pattern matched, @a false if not.
+   * @param subject String to match against.
+   * @param matches Place to store the capture groups.
+   * @return @c The number of capture groups. < 0 if an error occurred. 0 if 
the number of Matches is too small.
    *
    * It is safe to call this method concurrently on the same instance of @a 
this.
    *
    * Each capture group takes 3 elements of @a ovector, therefore @a ovecsize 
must
    * be a multiple of 3 and at least three times the number of desired capture 
groups.
    */
-  bool exec(std::string_view str, swoc::MemSpan<int> groups) const;
+  int exec(std::string_view subject, RegexMatches &matches) const;
 
-  /// @return The number of groups captured in the last call to @c exec.
+  /// @return The number of capture groups in the compiled pattern.
   int get_capture_count();
 
 private:
@@ -106,8 +139,7 @@ private:
   // enough to use as pointers. For some reason the header defines in name 
only a struct and
   // then aliases it to the standard name, rather than simply declare the 
latter in name only.
   // The goal is completely wrap PCRE and not include that header in client 
code.
-  void *regex       = nullptr; ///< Compiled expression.
-  void *regex_extra = nullptr; ///< Extra information about the expression.
+  pcre2_code *_code = nullptr;
 };
 
 /** Deterministic Finite state Automata container.
@@ -122,18 +154,18 @@ public:
   ~DFA();
 
   /// @return The number of patterns successfully compiled.
-  int compile(std::string_view const &pattern, unsigned flags = 0);
+  int32_t compile(std::string_view pattern, unsigned flags = 0);
   /// @return The number of patterns successfully compiled.
-  int compile(std::string_view *patterns, int npatterns, unsigned flags = 0);
+  int32_t compile(std::string_view *patterns, int npatterns, unsigned flags = 
0);
   /// @return The number of patterns successfully compiled.
-  int compile(const char **patterns, int npatterns, unsigned flags = 0);
+  int32_t compile(const char **patterns, int npatterns, unsigned flags = 0);
 
   /** Match @a str against the internal patterns.
    *
    * @param str String to match.
    * @return Index of the matched pattern, -1 if no match.
    */
-  int match(std::string_view const &str) const;
+  int32_t match(std::string_view str) const;
 
 private:
   struct Pattern {
@@ -148,7 +180,7 @@ private:
    * @param flags Regular expression compilation flags.
    * @return @c true if @a pattern was successfully compiled, @c false if not.
    */
-  bool build(std::string_view const &pattern, unsigned flags = 0);
+  bool build(std::string_view pattern, unsigned flags = 0);
 
   std::vector<Pattern> _patterns;
 };
diff --git a/plugins/experimental/tls_bridge/CMakeLists.txt 
b/plugins/experimental/tls_bridge/CMakeLists.txt
index e3de8b9249..5430505431 100644
--- a/plugins/experimental/tls_bridge/CMakeLists.txt
+++ b/plugins/experimental/tls_bridge/CMakeLists.txt
@@ -17,5 +17,5 @@
 
 add_atsplugin(tls_bridge tls_bridge.cc)
 
-target_link_libraries(tls_bridge PRIVATE libswoc::libswoc)
+target_link_libraries(tls_bridge PRIVATE ts::tsutil libswoc::libswoc)
 verify_global_plugin(tls_bridge)
diff --git a/src/proxy/http/remap/UrlRewrite.cc 
b/src/proxy/http/remap/UrlRewrite.cc
index 1f8176e97c..e377451921 100644
--- a/src/proxy/http/remap/UrlRewrite.cc
+++ b/src/proxy/http/remap/UrlRewrite.cc
@@ -857,7 +857,7 @@ UrlRewrite::_mappingLookup(MappingsStore &mappings, URL 
*request_url, int reques
 
 // does not null terminate return string
 int
-UrlRewrite::_expandSubstitutions(int *matches_info, const RegexMapping 
*reg_map, const char *matched_string, char *dest_buf,
+UrlRewrite::_expandSubstitutions(size_t *matches_info, const RegexMapping 
*reg_map, const char *matched_string, char *dest_buf,
                                  int dest_buf_size)
 {
   int cur_buf_size = 0;
@@ -908,6 +908,7 @@ UrlRewrite::_regexMappingLookup(RegexMappingList 
&regex_mappings, URL *request_u
                                 int request_host_len, int rank_ceiling, 
UrlMappingContainer &mapping_container)
 {
   bool retval = false;
+  RegexMatches matches;
 
   if (rank_ceiling == -1) { // we will now look at all regex mappings
     rank_ceiling = INT_MAX;
@@ -959,11 +960,9 @@ UrlRewrite::_regexMappingLookup(RegexMappingList 
&regex_mappings, URL *request_u
       continue;
     }
 
-    int matches_info[MAX_REGEX_SUBS * 3];
-    bool match_result =
-      list_iter->regular_expression.exec(std::string_view(request_host, 
request_host_len), matches_info, countof(matches_info));
+    int match_result = 
list_iter->regular_expression.exec(std::string_view(request_host, 
request_host_len), matches);
 
-    if (match_result == true) {
+    if (match_result > 0) {
       Debug("url_rewrite_regex",
             "Request URL host [%.*s] matched regex in mapping of rank %d "
             "with %d possible substitutions",
@@ -975,8 +974,9 @@ UrlRewrite::_regexMappingLookup(RegexMappingList 
&regex_mappings, URL *request_u
       int buf_len;
 
       // Expand substitutions in the host field from the stored template
-      buf_len           = _expandSubstitutions(matches_info, list_iter, 
request_host, buf, sizeof(buf));
-      URL *expanded_url = mapping_container.createNewToURL();
+      size_t *matches_info = matches.get_ovector_pointer();
+      buf_len              = _expandSubstitutions(matches_info, list_iter, 
request_host, buf, sizeof(buf));
+      URL *expanded_url    = mapping_container.createNewToURL();
       expanded_url->copy(&((list_iter->url_map)->toURL));
       expanded_url->host_set(buf, buf_len);
 
diff --git a/src/tsutil/CMakeLists.txt b/src/tsutil/CMakeLists.txt
index 44b83448fd..a747431daa 100644
--- a/src/tsutil/CMakeLists.txt
+++ b/src/tsutil/CMakeLists.txt
@@ -50,9 +50,10 @@ add_library(
   ts_unit_parser.cc
   Regex.cc
 )
+
 add_library(ts::tsutil ALIAS tsutil)
 set_target_properties(tsutil PROPERTIES POSITION_INDEPENDENT_CODE TRUE 
PUBLIC_HEADER "${TSUTIL_PUBLIC_HEADERS}")
-target_link_libraries(tsutil PUBLIC libswoc::libswoc yaml-cpp::yaml-cpp 
PCRE::PCRE)
+target_link_libraries(tsutil PUBLIC libswoc::libswoc yaml-cpp::yaml-cpp 
PkgConfig::PCRE2)
 
 install(
   TARGETS tsutil
diff --git a/src/tsutil/Regex.cc b/src/tsutil/Regex.cc
index 42d9d27c28..faea3b8546 100644
--- a/src/tsutil/Regex.cc
+++ b/src/tsutil/Regex.cc
@@ -26,160 +26,281 @@
 #include <array>
 #include <assert.h>
 
-#if __has_include(<pcre/pcre.h>)
-#include <pcre/pcre.h>
-#else
-#include <pcre.h>
-#endif
-
+//----------------------------------------------------------------------------
 namespace
 {
-inline pcre *
-as_pcre(void *p)
+void *
+my_malloc(size_t size, void * /*caller*/)
 {
-  return static_cast<pcre *>(p);
+  void *ptr = malloc(size);
+  return ptr;
 }
-inline pcre_extra *
-as_extra(void *p)
+
+void
+my_free(void *ptr, void * /*caller*/)
 {
-  return static_cast<pcre_extra *>(p);
+  free(ptr);
 }
 } // namespace
 
-#ifdef PCRE_CONFIG_JIT
-/*
-Using two thread locals avoids the deadlock because without the thread local 
object access, get_jit_stack doesn't call
-the TLS init function which ends up calling __cxx_thread_atexit(which locks 
the dl_whatever mutex). Since the raw
-pointer doesn't have a destructor to call, it doesn't need to call this. 
Interestingly, get_jit_stack was calling the
-TLS init function to setup the destructor call at thread exit whether or not 
the class was declared in the function
-body.
-*/
-namespace
+//----------------------------------------------------------------------------
+class RegexContext
 {
-thread_local pcre_jit_stack *jit_stack;
-
-struct JitStackCleanup {
-  ~JitStackCleanup()
+public:
+  static RegexContext *
+  get_instance()
+  {
+    if (!_regex_context) {
+      _regex_context = new RegexContext();
+    }
+    return _regex_context;
+  }
+  ~RegexContext()
   {
-    if (jit_stack) {
-      pcre_jit_stack_free(jit_stack);
+    if (_general_context != nullptr) {
+      pcre2_general_context_free(_general_context);
+    }
+    if (_compile_context != nullptr) {
+      pcre2_compile_context_free(_compile_context);
+    }
+    if (_match_context != nullptr) {
+      pcre2_match_context_free(_match_context);
+    }
+    if (_jit_stack != nullptr) {
+      pcre2_jit_stack_free(_jit_stack);
     }
   }
+  pcre2_general_context *
+  get_general_context()
+  {
+    return _general_context;
+  }
+  pcre2_compile_context *
+  get_compile_context()
+  {
+    return _compile_context;
+  }
+  pcre2_match_context *
+  get_match_context()
+  {
+    return _match_context;
+  }
+
+private:
+  RegexContext()
+  {
+    _general_context = pcre2_general_context_create(my_malloc, my_free, 
nullptr);
+    _compile_context = pcre2_compile_context_create(_general_context);
+    _match_context   = pcre2_match_context_create(_general_context);
+    _jit_stack       = pcre2_jit_stack_create(4096, 1024 * 1024, nullptr); // 
1 page min and 1MB max
+    pcre2_jit_stack_assign(_match_context, nullptr, _jit_stack);
+  }
+  pcre2_general_context *_general_context = nullptr;
+  pcre2_compile_context *_compile_context = nullptr;
+  pcre2_match_context *_match_context     = nullptr;
+  pcre2_jit_stack *_jit_stack             = nullptr;
+  thread_local static RegexContext *_regex_context;
 };
 
-thread_local JitStackCleanup jsc;
+thread_local RegexContext *RegexContext::_regex_context = nullptr;
 
-pcre_jit_stack *
-get_jit_stack(void *)
+//----------------------------------------------------------------------------
+namespace
 {
-  if (!jit_stack) {
-    jit_stack = pcre_jit_stack_alloc(4096, 1024 * 1024); // 1 page min and 1MB 
max
-  }
-  return jit_stack;
-}
-
-} // end anonymous namespace
-#endif // def PCRE_CONFIG_JIT
+struct RegexContextCleanup {
+  ~RegexContextCleanup() { delete RegexContext::get_instance(); }
+};
+thread_local RegexContextCleanup cleanup;
+} // namespace
 
-Regex::Regex(Regex &&that) noexcept : regex(that.regex), 
regex_extra(that.regex_extra)
+//----------------------------------------------------------------------------
+RegexMatches::RegexMatches(uint32_t size)
 {
-  that.regex       = nullptr;
-  that.regex_extra = nullptr;
+  pcre2_general_context *ctx = pcre2_general_context_create(
+    &RegexMatches::malloc, [](void *, void *) -> void {}, static_cast<void 
*>(this));
+
+  _match_data = pcre2_match_data_create(size, ctx);
 }
 
-bool
-Regex::compile(const char *pattern, const unsigned flags)
+//----------------------------------------------------------------------------
+void *
+RegexMatches::malloc(size_t size, void *caller)
 {
-  const char *error = nullptr;
-  int erroffset     = 0;
-  int options       = 0;
-  int study_opts    = 0;
+  auto *matches = static_cast<RegexMatches *>(caller);
 
-  if (regex) {
-    return false;
+  // allocate from the buffer if possible
+  if (size <= sizeof(matches->_buffer) - matches->_buffer_bytes_used) {
+    void *ptr                    = matches->_buffer + 
matches->_buffer_bytes_used;
+    matches->_buffer_bytes_used += size;
+    return ptr;
   }
 
-  if (flags & RE_CASE_INSENSITIVE) {
-    options |= PCRE_CASELESS;
-  }
+  // otherwise use system malloc if the buffer is too small
+  void *ptr = ::malloc(size);
+  return ptr;
+}
 
-  if (flags & RE_ANCHORED) {
-    options |= PCRE_ANCHORED;
+//----------------------------------------------------------------------------
+RegexMatches::~RegexMatches()
+{
+  if (_match_data != nullptr) {
+    pcre2_match_data_free(_match_data);
   }
+}
 
-  regex = pcre_compile(pattern, options, &error, &erroffset, nullptr);
-  if (error) {
-    regex = nullptr;
-    return false;
-  }
+//----------------------------------------------------------------------------
+size_t *
+RegexMatches::get_ovector_pointer()
+{
+  return pcre2_get_ovector_pointer(_match_data);
+}
+
+//----------------------------------------------------------------------------
+int32_t
+RegexMatches::size() const
+{
+  return _size;
+}
+
+//----------------------------------------------------------------------------
+pcre2_match_data *
+RegexMatches::get_match_data()
+{
+  return _match_data;
+}
 
-#ifdef PCRE_CONFIG_JIT
-  study_opts |= PCRE_STUDY_JIT_COMPILE;
-#endif
+//----------------------------------------------------------------------------
+void
+RegexMatches::set_size(int32_t size)
+{
+  _size = size;
+}
 
-  regex_extra = pcre_study(as_pcre(regex), study_opts, &error);
+//----------------------------------------------------------------------------
+void
+RegexMatches::set_subject(std::string_view subject)
+{
+  _subject = subject;
+}
 
-#ifdef PCRE_CONFIG_JIT
-  if (regex_extra) {
-    pcre_assign_jit_stack(as_extra(regex_extra), &get_jit_stack, nullptr);
+//----------------------------------------------------------------------------
+std::string_view
+RegexMatches::operator[](size_t index) const
+{
+  // check if the index is valid
+  if (index >= pcre2_get_ovector_count(_match_data)) {
+    return std::string_view();
   }
-#endif
 
-  return true;
+  PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(_match_data);
+  return std::string_view(_subject.data() + ovector[2 * index], ovector[2 * 
index + 1] - ovector[2 * index]);
 }
 
-int
-Regex::get_capture_count()
+//----------------------------------------------------------------------------
+Regex::Regex(Regex &&that) noexcept
 {
-  int captures = -1;
-  if (pcre_fullinfo(as_pcre(regex), as_extra(regex_extra), 
PCRE_INFO_CAPTURECOUNT, &captures) != 0) {
-    return -1;
-  }
+  _code      = that._code;
+  that._code = nullptr;
+}
 
-  return captures;
+//----------------------------------------------------------------------------
+Regex::~Regex()
+{
+  if (_code != nullptr) {
+    pcre2_code_free(_code);
+  }
 }
 
+//----------------------------------------------------------------------------
 bool
-Regex::exec(std::string_view const &str) const
+Regex::compile(std::string_view pattern, uint32_t flags)
 {
-  std::array<int, DEFAULT_GROUP_COUNT * 3> ovector = {{0}};
-  return this->exec(str, ovector);
+  std::string error;
+  int erroroffset;
+
+  return this->compile(pattern, error, erroroffset, flags);
 }
 
+//----------------------------------------------------------------------------
 bool
-Regex::exec(std::string_view const &str, int *ovector, int ovecsize) const
+Regex::compile(std::string_view pattern, std::string &error, int &erroroffset, 
uint32_t flags)
 {
-  int rv;
+  if (_code != nullptr) {
+    pcre2_code_free(_code);
+  }
+  PCRE2_SIZE error_offset;
+  int error_code;
+  _code = pcre2_compile(reinterpret_cast<PCRE2_SPTR>(pattern.data()), 
pattern.size(), flags, &error_code, &error_offset,
+                        RegexContext::get_instance()->get_compile_context());
+  if (!_code) {
+    erroroffset = error_offset;
+
+    // get pcre2 error message
+    PCRE2_UCHAR buffer[256];
+    pcre2_get_error_message(error_code, buffer, sizeof(buffer));
+    error.assign((char *)buffer);
+    return false;
+  }
+
+  // support for JIT
+  pcre2_jit_compile(_code, PCRE2_JIT_COMPLETE);
 
-  rv = pcre_exec(as_pcre(regex), as_extra(regex_extra), str.data(), 
static_cast<int>(str.size()), 0, 0, ovector, ovecsize);
-  return rv > 0;
+  return true;
 }
 
+//----------------------------------------------------------------------------
 bool
-Regex::exec(std::string_view str, swoc::MemSpan<int> groups) const
+Regex::exec(std::string_view subject) const
 {
-  return 0 <
-         pcre_exec(as_pcre(regex), as_extra(regex_extra), str.data(), 
int(str.size()), 0, 0, groups.data(), int(groups.count()));
+  if (_code == nullptr) {
+    return false;
+  }
+  RegexMatches matches;
+
+  int count = this->exec(subject, matches);
+  return count > 0;
 }
 
-Regex::~Regex()
+//----------------------------------------------------------------------------
+int32_t
+Regex::exec(std::string_view subject, RegexMatches &matches) const
 {
-  if (regex_extra) {
-#ifdef PCRE_CONFIG_JIT
-    pcre_free_study(as_extra(regex_extra));
-#else
-    pcre_free(regex_extra);
-#endif
+  if (_code == nullptr) {
+    return 0;
+  }
+  int count = pcre2_match(_code, reinterpret_cast<PCRE2_SPTR>(subject.data()), 
subject.size(), 0, 0, matches.get_match_data(),
+                          RegexContext::get_instance()->get_match_context());
+
+  matches.set_size(count);
+
+  if (count < 0) {
+    return count;
   }
-  if (regex) {
-    pcre_free(regex);
+
+  if (count > 0) {
+    matches.set_subject(subject);
+  }
+
+  return count;
+}
+
+//----------------------------------------------------------------------------
+int32_t
+Regex::get_capture_count()
+{
+  int captures = -1;
+  if (pcre2_pattern_info(_code, PCRE2_INFO_CAPTURECOUNT, &captures) != 0) {
+    return -1;
   }
+  return captures;
 }
 
+//----------------------------------------------------------------------------
 DFA::~DFA() {}
 
+//----------------------------------------------------------------------------
 bool
-DFA::build(std::string_view const &pattern, unsigned flags)
+DFA::build(const std::string_view pattern, unsigned flags)
 {
   Regex rxp;
   std::string string{pattern};
@@ -188,22 +309,24 @@ DFA::build(std::string_view const &pattern, unsigned 
flags)
     flags |= RE_ANCHORED;
   }
 
-  if (!rxp.compile(string.c_str(), flags)) {
+  if (!rxp.compile(pattern, flags)) {
     return false;
   }
   _patterns.emplace_back(std::move(rxp), std::move(string));
   return true;
 }
 
-int
-DFA::compile(std::string_view const &pattern, unsigned flags)
+//----------------------------------------------------------------------------
+int32_t
+DFA::compile(std::string_view pattern, unsigned flags)
 {
   assert(_patterns.empty());
   this->build(pattern, flags);
   return _patterns.size();
 }
 
-int
+//----------------------------------------------------------------------------
+int32_t
 DFA::compile(std::string_view *patterns, int npatterns, unsigned flags)
 {
   _patterns.reserve(npatterns); // try to pre-allocate.
@@ -213,7 +336,8 @@ DFA::compile(std::string_view *patterns, int npatterns, 
unsigned flags)
   return _patterns.size();
 }
 
-int
+//----------------------------------------------------------------------------
+int32_t
 DFA::compile(const char **patterns, int npatterns, unsigned flags)
 {
   _patterns.reserve(npatterns); // try to pre-allocate.
@@ -223,8 +347,9 @@ DFA::compile(const char **patterns, int npatterns, unsigned 
flags)
   return _patterns.size();
 }
 
-int
-DFA::match(std::string_view const &str) const
+//----------------------------------------------------------------------------
+int32_t
+DFA::match(std::string_view str) const
 {
   for (auto spot = _patterns.begin(), limit = _patterns.end(); spot != limit; 
++spot) {
     if (spot->_re.exec(str)) {
diff --git a/src/tsutil/unit_tests/test_Regex.cc 
b/src/tsutil/unit_tests/test_Regex.cc
index 16f327dff6..f17d2b17c8 100644
--- a/src/tsutil/unit_tests/test_Regex.cc
+++ b/src/tsutil/unit_tests/test_Regex.cc
@@ -20,8 +20,8 @@
   limitations under the License.
 */
 
-#include <array>
 #include <string_view>
+#include <vector>
 
 #include "tscore/ink_assert.h"
 #include "tscore/ink_defs.h"
@@ -35,22 +35,148 @@ struct subject_match_t {
 
 struct test_t {
   std::string_view regex;
-  std::array<subject_match_t, 4> tests;
+  std::vector<subject_match_t> tests;
 };
 
-std::array<test_t, 2> test_data{
-  {{{"^foo"}, {{{{"foo"}, true}, {{"bar"}, false}, {{"foobar"}, true}, 
{{"foobarbaz"}, true}}}},
-   {{"foo$"}, {{{{"foo"}, true}, {{"bar"}, false}, {{"foobar"}, false}, 
{{"foobarbaz"}, false}}}}}
+std::vector<test_t> test_data{
+  {
+   {{R"(^foo)"}, {{{{"foo"}, true}, {{"bar"}, false}, {{"foobar"}, true}, 
{{"foobarbaz"}, true}}}},
+   {{R"(foo$)"}, {{{{"foo"}, true}, {{"bar"}, false}, {{"foobar"}, false}, 
{{"foobarbaz"}, false}}}},
+   // url regular expression
+    {{R"(^(https?:\/\/)?([\da-z\.-]+)\.([a-z\.]{2,6})([\/\w \.-]*)*\/?$)"},
+     {{{{"http://www.example.com"}, true},
+       {{"https://www.example.com"}, true},
+       {{"http://~example.com"}, false},
+       {{"http://www.example.com/foo/bar"}, true}}}},
+   // ip address regular expression
+    
{R"(^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$)",
+     {{{{"1.2.3.4"}, true}, {{"127.0.0.1"}, true}, {{"256.256.256.256"}, 
false}, {{".1.1.1.1"}, false}}}},
+   }
+};
+
+// test case insensitive test data
+std::vector<test_t> test_data_case_insensitive{
+  {
+   {{R"(^foo)"}, {{{{"FoO"}, true}, {{"bar"}, false}, {{"foObar"}, true}, 
{{"foobaRbaz"}, true}}}},
+   {{R"(foo$)"}, {{{{"foO"}, true}, {{"bar"}, false}, {{"foobar"}, false}, 
{{"foobarbaz"}, false}}}},
+   }
+};
+
+// test case for anchored flag
+std::vector<test_t> test_data_anchored{
+  {
+   {{R"(foo)"}, {{{{"foo"}, true}, {{"bar"}, false}, {{"foobar"}, true}, 
{{"foobarbaz"}, true}}}},
+   {{R"(bar)"}, {{{{"foo"}, false}, {{"bar"}, true}, {{"foobar"}, false}, 
{{"foobarbaz"}, false}}}},
+   }
+};
+
+struct submatch_t {
+  std::string_view subject;
+  int32_t count;
+  std::vector<std::string_view> submatches;
+};
+
+struct submatch_test_t {
+  std::string_view regex;
+  int capture_count;
+  std::vector<submatch_t> tests;
+};
+
+std::vector<submatch_test_t> submatch_test_data{
+  {
+   {{R"(^foo)"}, 0, {{{{"foo"}, 1, {{"foo"}}}, {{"bar"}, -1, {}}, {{"foobar"}, 
1, {{"foo"}}}, {{"foobarbaz"}, 1, {{"foo"}}}}}},
+   {{R"(foo$)"}, 0, {{{{"foo"}, 1, {{"foo"}}}, {{"bar"}, -1, {}}, {{"foobar"}, 
-1, {}}, {{"foobarbaz"}, -1, {}}}}},
+   {{R"(^(foo)(bar))"}, 2, {{{{"foobar"}, 3, {{"foobar", "foo", "bar"}}}, 
{{"barfoo"}, -1, {}}, {{"foo"}, -1, {}}}}},
+   }
 };
 
 TEST_CASE("Regex", "[libts][Regex]")
 {
+  // case sensitive test
   for (auto &item : test_data) {
     Regex r;
-    r.compile(item.regex.data());
+    REQUIRE(r.compile(item.regex.data()) == true);
 
     for (auto &test : item.tests) {
       REQUIRE(r.exec(test.subject.data()) == test.match);
     }
   }
+
+  // case insensitive test
+  for (auto &item : test_data_case_insensitive) {
+    Regex r;
+    REQUIRE(r.compile(item.regex.data(), RE_CASE_INSENSITIVE) == true);
+
+    for (auto &test : item.tests) {
+      REQUIRE(r.exec(test.subject.data()) == test.match);
+    }
+  }
+
+  // case anchored test
+  for (auto &item : test_data_anchored) {
+    Regex r;
+    REQUIRE(r.compile(item.regex.data(), RE_ANCHORED) == true);
+
+    for (auto &test : item.tests) {
+      REQUIRE(r.exec(test.subject.data()) == test.match);
+    }
+  }
+
+  // test getting submatches with operator[]
+  for (auto &item : submatch_test_data) {
+    Regex r;
+    REQUIRE(r.compile(item.regex.data()) == true);
+    REQUIRE(r.get_capture_count() == item.capture_count);
+
+    for (auto &test : item.tests) {
+      RegexMatches matches;
+      REQUIRE(r.exec(test.subject.data(), matches) == test.count);
+      REQUIRE(matches.size() == test.count);
+
+      for (int32_t i = 0; i < test.count; i++) {
+        REQUIRE(matches[i] == test.submatches[i]);
+      }
+    }
+  }
+
+  // test getting submatches with ovector pointer
+  for (auto &item : submatch_test_data) {
+    Regex r;
+    REQUIRE(r.compile(item.regex.data()) == true);
+    REQUIRE(r.get_capture_count() == item.capture_count);
+
+    for (auto &test : item.tests) {
+      RegexMatches matches;
+      REQUIRE(r.exec(test.subject.data(), matches) == test.count);
+      REQUIRE(matches.size() == test.count);
+
+      size_t *ovector = matches.get_ovector_pointer();
+      for (int32_t i = 0; i < test.count; i++) {
+        REQUIRE(test.submatches[i] == std::string_view{test.subject.data() + 
ovector[i * 2], ovector[i * 2 + 1] - ovector[i * 2]});
+      }
+    }
+  }
+
+  // test for invalid regular expression
+  {
+    Regex r;
+    REQUIRE(r.compile(R"((\d+)", RE_CASE_INSENSITIVE) == false);
+  }
+
+  // test for not compiling regular expression
+  {
+    Regex r;
+    RegexMatches matches;
+    REQUIRE(r.exec("foo") == false);
+    REQUIRE(r.exec("foo", matches) == 0);
+  }
+
+  // test for recompiling the regular expression
+  {
+    Regex r;
+    REQUIRE(r.compile(R"(foo)") == true);
+    REQUIRE(r.exec("foo") == true);
+    REQUIRE(r.compile(R"(bar)") == true);
+    REQUIRE(r.exec("bar") == true);
+  }
 }

Reply via email to