This is an automated email from the ASF dual-hosted git repository. cmcfarlen pushed a commit to branch 10.0.x in repository https://gitbox.apache.org/repos/asf/trafficserver.git
commit ed2b5731a0fce52046dbaf7d6216ee2991af496b Author: Walt Karas <wka...@yahooinc.com> AuthorDate: Mon Apr 15 10:58:32 2024 -0400 Avoid including pcre2.h in Regex.h. (#11246) (cherry picked from commit 08506ef708b9bb05c6254be1a521f7d60f12e1cf) --- include/tsutil/Regex.h | 46 ++++++++++++-------- src/tsutil/Regex.cc | 115 +++++++++++++++++++++++++++---------------------- 2 files changed, 93 insertions(+), 68 deletions(-) diff --git a/include/tsutil/Regex.h b/include/tsutil/Regex.h index c4ca8feb03..44e10d53b2 100644 --- a/include/tsutil/Regex.h +++ b/include/tsutil/Regex.h @@ -28,14 +28,14 @@ #include <vector> #include <memory> -#define PCRE2_CODE_UNIT_WIDTH 8 -#include <pcre2.h> - /// @brief Match flags for regular expression evaluation. +/// +/// @internal These values are copied from pcre2.h, to avoid having to include it. The values are checked (with +/// static_assert) in Regex.cc against PCRE2 named constants, in case they change in future PCRE2 releases. enum REFlags { - RE_CASE_INSENSITIVE = PCRE2_CASELESS, ///< Ignore case (default: case sensitive). - RE_UNANCHORED = PCRE2_MULTILINE, ///< Unanchored (DFA defaults to anchored). - RE_ANCHORED = PCRE2_ANCHORED, ///< Anchored (Regex defaults to unanchored). + RE_CASE_INSENSITIVE = 0x00000008u, ///< Ignore case (default: case sensitive). + RE_UNANCHORED = 0x00000400u, ///< Unanchored (DFA defaults to anchored). + RE_ANCHORED = 0x80000000u, ///< Anchored (Regex defaults to unanchored). }; /// @brief Wrapper for PCRE2 match data. @@ -63,19 +63,25 @@ public: size_t *get_ovector_pointer(); int32_t size() const; -protected: - pcre2_match_data *get_match_data(); - void set_subject(std::string_view subject); - void set_size(int32_t size); - private: constexpr static uint32_t DEFAULT_MATCHES = 10; static void *malloc(size_t size, void *caller); - pcre2_match_data *_match_data = nullptr; std::string_view _subject; char _buffer[24 + 96 + 16 * DEFAULT_MATCHES]; // 24 bytes for the general context, 96 bytes overhead, 16 bytes per match. size_t _buffer_bytes_used = 0; int32_t _size = 0; + + /// @internal This effectively wraps a void* so that we can avoid requiring the pcre2.h include for the user of the Regex + /// API (see Regex.cc). + struct _MatchData; + class _MatchDataPtr + { + friend struct _MatchData; + + private: + void *_ptr = nullptr; + }; + _MatchDataPtr _match_data; }; /// @brief Wrapper for PCRE2 regular expression. @@ -135,11 +141,17 @@ public: int get_capture_count(); private: - // @internal - Because the PCRE header is badly done, we can't forward declare the PCRE - // enough to use as pointers. For some reason the header defines in name only a struct and - // then aliases it to the standard name, rather than simply declare the latter in name only. - // The goal is completely wrap PCRE and not include that header in client code. - pcre2_code *_code = nullptr; + /// @internal This effectively wraps a void* so that we can avoid requiring the pcre2.h include for the user of the Regex + /// API (see Regex.cc). + struct _Code; + class _CodePtr + { + friend struct _Code; + + private: + void *_ptr = nullptr; + }; + _CodePtr _code; }; /** Deterministic Finite state Automata container. diff --git a/src/tsutil/Regex.cc b/src/tsutil/Regex.cc index 37e4b4337b..fb2cddbb2e 100644 --- a/src/tsutil/Regex.cc +++ b/src/tsutil/Regex.cc @@ -23,11 +23,18 @@ #include "tsutil/Regex.h" +#define PCRE2_CODE_UNIT_WIDTH 8 +#include <pcre2.h> + #include <array> #include <assert.h> #include <vector> #include <mutex> +static_assert(RE_CASE_INSENSITIVE == PCRE2_CASELESS, "Update RE_CASE_INSERSITIVE for current PCRE2 version."); +static_assert(RE_UNANCHORED == PCRE2_MULTILINE, "Update RE_MULTILINE for current PCRE2 version."); +static_assert(RE_ANCHORED == PCRE2_ANCHORED, "Update RE_ANCHORED for current PCRE2 version."); + //---------------------------------------------------------------------------- namespace { @@ -145,13 +152,27 @@ RegexContextCleanup::push_back(RegexContext *ctx) } // namespace +//---------------------------------------------------------------------------- +struct RegexMatches::_MatchData { + static pcre2_match_data * + get(_MatchDataPtr const &p) + { + return static_cast<pcre2_match_data *>(p._ptr); + } + static void + set(_MatchDataPtr &p, pcre2_match_data *ptr) + { + p._ptr = ptr; + } +}; + //---------------------------------------------------------------------------- RegexMatches::RegexMatches(uint32_t size) { pcre2_general_context *ctx = pcre2_general_context_create( &RegexMatches::malloc, [](void *, void *) -> void {}, static_cast<void *>(this)); - _match_data = pcre2_match_data_create(size, ctx); + _MatchData::set(_match_data, pcre2_match_data_create(size, ctx)); } //---------------------------------------------------------------------------- @@ -175,8 +196,9 @@ RegexMatches::malloc(size_t size, void *caller) //---------------------------------------------------------------------------- RegexMatches::~RegexMatches() { - if (_match_data != nullptr) { - pcre2_match_data_free(_match_data); + auto ptr = _MatchData::get(_match_data); + if (ptr != nullptr) { + pcre2_match_data_free(ptr); } } @@ -184,7 +206,7 @@ RegexMatches::~RegexMatches() size_t * RegexMatches::get_ovector_pointer() { - return pcre2_get_ovector_pointer(_match_data); + return pcre2_get_ovector_pointer(_MatchData::get(_match_data)); } //---------------------------------------------------------------------------- @@ -194,52 +216,46 @@ RegexMatches::size() const return _size; } -//---------------------------------------------------------------------------- -pcre2_match_data * -RegexMatches::get_match_data() -{ - return _match_data; -} - -//---------------------------------------------------------------------------- -void -RegexMatches::set_size(int32_t size) -{ - _size = size; -} - -//---------------------------------------------------------------------------- -void -RegexMatches::set_subject(std::string_view subject) -{ - _subject = subject; -} - //---------------------------------------------------------------------------- std::string_view RegexMatches::operator[](size_t index) const { // check if the index is valid - if (index >= pcre2_get_ovector_count(_match_data)) { + if (index >= pcre2_get_ovector_count(_MatchData::get(_match_data))) { return std::string_view(); } - PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(_match_data); + PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(_MatchData::get(_match_data)); return std::string_view(_subject.data() + ovector[2 * index], ovector[2 * index + 1] - ovector[2 * index]); } +//---------------------------------------------------------------------------- +struct Regex::_Code { + static pcre2_code * + get(_CodePtr const &p) + { + return static_cast<pcre2_code *>(p._ptr); + } + static void + set(_CodePtr &p, pcre2_code *ptr) + { + p._ptr = ptr; + } +}; + //---------------------------------------------------------------------------- Regex::Regex(Regex &&that) noexcept { - _code = that._code; - that._code = nullptr; + _code = that._code; + _Code::set(that._code, nullptr); } //---------------------------------------------------------------------------- Regex::~Regex() { - if (_code != nullptr) { - pcre2_code_free(_code); + auto ptr = _Code::get(_code); + if (ptr != nullptr) { + pcre2_code_free(ptr); } } @@ -258,8 +274,8 @@ bool Regex::compile(std::string_view pattern, std::string &error, int &erroroffset, uint32_t flags) { // free the existing compiled regex if there is one - if (_code != nullptr) { - pcre2_code_free(_code); + if (auto ptr = _Code::get(_code); ptr != nullptr) { + pcre2_code_free(ptr); } // get the RegexContext instance - should only be null when shutting down @@ -270,9 +286,9 @@ Regex::compile(std::string_view pattern, std::string &error, int &erroroffset, u PCRE2_SIZE error_offset; int error_code; - _code = pcre2_compile(reinterpret_cast<PCRE2_SPTR>(pattern.data()), pattern.size(), flags, &error_code, &error_offset, - regex_context->get_compile_context()); - if (!_code) { + auto code = pcre2_compile(reinterpret_cast<PCRE2_SPTR>(pattern.data()), pattern.size(), flags, &error_code, &error_offset, + regex_context->get_compile_context()); + if (!code) { erroroffset = error_offset; // get pcre2 error message @@ -283,7 +299,9 @@ Regex::compile(std::string_view pattern, std::string &error, int &erroroffset, u } // support for JIT - pcre2_jit_compile(_code, PCRE2_JIT_COMPLETE); + pcre2_jit_compile(code, PCRE2_JIT_COMPLETE); + + _Code::set(_code, code); return true; } @@ -292,7 +310,7 @@ Regex::compile(std::string_view pattern, std::string &error, int &erroroffset, u bool Regex::exec(std::string_view subject) const { - if (_code == nullptr) { + if (_Code::get(_code) == nullptr) { return false; } RegexMatches matches; @@ -305,28 +323,23 @@ Regex::exec(std::string_view subject) const int32_t Regex::exec(std::string_view subject, RegexMatches &matches) const { - // check if there is a compiled regex - if (_code == nullptr) { - return 0; - } + auto code = _Code::get(_code); - // get the RegexContext instance - should only be null when shutting down - RegexContext *regex_context = RegexContext::get_instance(); - if (regex_context == nullptr) { + // check if there is a compiled regex + if (code == nullptr) { return 0; } + int count = pcre2_match(code, reinterpret_cast<PCRE2_SPTR>(subject.data()), subject.size(), 0, 0, + RegexMatches::_MatchData::get(matches._match_data), RegexContext::get_instance()->get_match_context()); - int count = pcre2_match(_code, reinterpret_cast<PCRE2_SPTR>(subject.data()), subject.size(), 0, 0, matches.get_match_data(), - regex_context->get_match_context()); - - matches.set_size(count); + matches._size = count; if (count < 0) { return count; } if (count > 0) { - matches.set_subject(subject); + matches._subject = subject; } return count; @@ -337,7 +350,7 @@ int32_t Regex::get_capture_count() { int captures = -1; - if (pcre2_pattern_info(_code, PCRE2_INFO_CAPTURECOUNT, &captures) != 0) { + if (pcre2_pattern_info(_Code::get(_code), PCRE2_INFO_CAPTURECOUNT, &captures) != 0) { return -1; } return captures;