This is an automated email from the ASF dual-hosted git repository.
bnolsen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/trafficserver.git
The following commit(s) were added to refs/heads/master by this push:
new 67593d9f9f regex_revalidate: migrate from pcre to Regex (#12574)
67593d9f9f is described below
commit 67593d9f9f232379b91e11ca28e4d9d12d60d281
Author: Brian Olsen <[email protected]>
AuthorDate: Fri Oct 24 06:30:23 2025 -0600
regex_revalidate: migrate from pcre to Regex (#12574)
---
plugins/regex_revalidate/CMakeLists.txt | 1 -
plugins/regex_revalidate/regex_revalidate.cc | 153 +++++++++++----------
.../regex_revalidate/regex_revalidate.test.py | 1 -
3 files changed, 80 insertions(+), 75 deletions(-)
diff --git a/plugins/regex_revalidate/CMakeLists.txt
b/plugins/regex_revalidate/CMakeLists.txt
index e095fd5684..b4e04fd202 100644
--- a/plugins/regex_revalidate/CMakeLists.txt
+++ b/plugins/regex_revalidate/CMakeLists.txt
@@ -17,5 +17,4 @@
add_atsplugin(regex_revalidate regex_revalidate.cc)
-target_link_libraries(regex_revalidate PRIVATE PCRE::PCRE)
verify_global_plugin(regex_revalidate)
diff --git a/plugins/regex_revalidate/regex_revalidate.cc
b/plugins/regex_revalidate/regex_revalidate.cc
index 470cd95b1b..5e8d8cf2f7 100644
--- a/plugins/regex_revalidate/regex_revalidate.cc
+++ b/plugins/regex_revalidate/regex_revalidate.cc
@@ -33,11 +33,7 @@
#include <fcntl.h>
#include <unistd.h>
-#ifdef HAVE_PCRE_PCRE_H
-#include <pcre/pcre.h>
-#else
-#include <pcre.h>
-#endif
+#include "tsutil/Regex.h"
#define CONFIG_TMOUT 60000
#define FREE_TMOUT 300000
@@ -115,9 +111,8 @@ strForResult(TSCacheLookupResult const result)
}
typedef struct invalidate_t {
- const char *regex_text;
- pcre *regex;
- pcre_extra *regex_extra;
+ char *regex_text;
+ Regex *regex;
time_t epoch;
time_t expiry;
TSCacheLookupResult new_result;
@@ -136,31 +131,25 @@ typedef struct {
static invalidate_t *
init_invalidate_t(invalidate_t *i)
{
- i->regex_text = nullptr;
- i->regex = nullptr;
- i->regex_extra = nullptr;
- i->epoch = 0;
- i->expiry = 0;
- i->new_result = TS_CACHE_LOOKUP_HIT_STALE;
- i->next = nullptr;
+ i->regex_text = nullptr;
+ i->regex = nullptr;
+ i->epoch = 0;
+ i->expiry = 0;
+ i->new_result = TS_CACHE_LOOKUP_HIT_STALE;
+ i->next = nullptr;
return i;
}
static void
free_invalidate_t(invalidate_t *i)
{
- if (i->regex_extra) {
-#ifndef PCRE_STUDY_JIT_COMPILE
- pcre_free(i->regex_extra);
-#else
- pcre_free_study(i->regex_extra);
-#endif
+ if (nullptr != i->regex) {
+ delete i->regex;
+ i->regex = nullptr;
}
- if (i->regex) {
- pcre_free(i->regex);
- }
- if (i->regex_text) {
- pcre_free_substring(i->regex_text);
+ if (nullptr != i->regex_text) {
+ TSfree(i->regex_text);
+ i->regex_text = nullptr;
}
TSfree(i);
}
@@ -212,17 +201,18 @@ static invalidate_t *
copy_invalidate_t(invalidate_t *i)
{
invalidate_t *iptr;
- const char *errptr;
- int erroffset;
-
- iptr = (invalidate_t *)TSmalloc(sizeof(invalidate_t));
- iptr->regex_text = TSstrdup(i->regex_text);
- iptr->regex = pcre_compile(iptr->regex_text, 0, &errptr, &erroffset,
nullptr); // There is no pcre_copy :-(
- iptr->regex_extra = pcre_study(iptr->regex, 0, &errptr); // Assuming no
errors since this worked before :-/
- iptr->epoch = i->epoch;
- iptr->expiry = i->expiry;
- iptr->new_result = i->new_result;
- iptr->next = nullptr;
+
+ iptr = (invalidate_t *)TSmalloc(sizeof(invalidate_t));
+ iptr->regex_text = TSstrdup(i->regex_text);
+
+ // assume this works since the source exists.
+ iptr->regex = new Regex;
+ iptr->regex->compile(iptr->regex_text);
+
+ iptr->epoch = i->epoch;
+ iptr->expiry = i->expiry;
+ iptr->new_result = i->new_result;
+ iptr->next = nullptr;
return iptr;
}
@@ -296,26 +286,30 @@ load_state(plugin_state_t *pstate, invalidate_t **ilist)
time_t const now = time(nullptr);
- const char *errptr;
- int erroffset;
- int ovector[OVECTOR_SIZE];
- pcre *const config_re =
pcre_compile("^([^#].+?)\\s+(\\d+)\\s+(\\d+)\\s+(\\w+)\\s*$", 0, &errptr,
&erroffset, nullptr);
- TSReleaseAssert(nullptr != config_re);
+ Regex config_re;
+ bool const re_stat =
config_re.compile("^([^#].+?)\\s+(\\d+)\\s+(\\d+)\\s+(\\w+)\\s*$");
+ TSReleaseAssert(true == re_stat);
char line[LINE_MAX];
int ln = 0;
while (fgets(line, LINE_MAX, fs) != nullptr) {
Dbg(dbg_ctl, "state: processing: %d %s", ln, line);
++ln;
- int const rc = pcre_exec(config_re, nullptr, line, strlen(line), 0, 0,
ovector, OVECTOR_SIZE);
+
+ RegexMatches matches;
+ int const rc = config_re.exec(line, matches);
if (5 == rc) {
invalidate_t *const inv = (invalidate_t *)TSmalloc(sizeof(invalidate_t));
init_invalidate_t(inv);
- pcre_get_substring(line, ovector, rc, 1, &(inv->regex_text));
- inv->epoch = atoi(line + ovector[4]);
- inv->expiry = atoi(line + ovector[6]);
+ auto const regv = matches[1];
+ inv->regex_text = TSstrndup(regv.data(), regv.length());
+ Dbg(dbg_ctl, "regex_text: %s", inv->regex_text);
+
+ // atoi will terminate when whitespace/eol is reached
+ inv->epoch = atoi(matches[2].data());
+ inv->expiry = atoi(matches[3].data());
if (inv->expiry < now) {
Dbg(dbg_ctl, "state: skipping expired : '%s'", inv->regex_text);
@@ -323,16 +317,15 @@ load_state(plugin_state_t *pstate, invalidate_t **ilist)
continue;
}
- int const len = ovector[9] - ovector[8];
- char const *const type = line + ovector[8];
-
- if (0 == strncasecmp(type, RESULT_STALE, len)) {
+ auto const type = matches[4];
+ if (0 == strncasecmp(type.data(), RESULT_STALE, type.length())) {
Dbg(dbg_ctl, "state: regex line set to result type %s: '%s'",
RESULT_STALE, inv->regex_text);
- } else if (0 == strncasecmp(type, RESULT_MISS, len)) {
+ } else if (0 == strncasecmp(type.data(), RESULT_MISS, type.length())) {
Dbg(dbg_ctl, "state: regex line set to result type %s: '%s'",
RESULT_MISS, inv->regex_text);
inv->new_result = TS_CACHE_LOOKUP_MISS;
} else {
- Dbg(dbg_ctl, "state: unknown regex line result type '%.*s', skipping
'%s'", len, type, inv->regex_text);
+ Dbg(dbg_ctl, "state: unknown regex line result type '%.*s', skipping
'%s'", (int)type.length(), type.data(),
+ inv->regex_text);
}
// iterate through the loaded config and try to merge
@@ -358,7 +351,6 @@ load_state(plugin_state_t *pstate, invalidate_t **ilist)
}
}
- pcre_free(config_re);
fclose(fs);
return true;
}
@@ -402,11 +394,10 @@ load_config(plugin_state_t *pstate, invalidate_t **ilist)
}
Dbg(dbg_ctl, "Attempting to load rules from: '%s'", path);
- const char *errptr;
- int erroffset;
- int ovector[OVECTOR_SIZE];
- pcre *const config_re =
pcre_compile("^([^#].+?)\\s+(\\d+)(\\s+(\\w+))?\\s*$", 0, &errptr, &erroffset,
nullptr);
- TSReleaseAssert(nullptr != config_re);
+
+ Regex config_re;
+ bool const regstat =
config_re.compile("^([^#].+?)\\s+(\\d+)(\\s+(\\w+))?\\s*$");
+ TSReleaseAssert(true == regstat);
char line[LINE_MAX];
int ln = 0;
@@ -415,25 +406,40 @@ load_config(plugin_state_t *pstate, invalidate_t **ilist)
while (fgets(line, LINE_MAX, fs) != nullptr) {
Dbg(dbg_ctl, "Processing: %d %s", ln, line);
++ln;
- int const rc = pcre_exec(config_re, nullptr, line, strlen(line), 0, 0,
ovector, OVECTOR_SIZE);
+ RegexMatches matches;
+ int const rc = config_re.exec(line, matches);
if (3 <= rc) {
i = (invalidate_t *)TSmalloc(sizeof(invalidate_t));
init_invalidate_t(i);
- pcre_get_substring(line, ovector, rc, 1, &i->regex_text);
- i->regex = pcre_compile(i->regex_text, 0, &errptr, &erroffset,
nullptr);
- i->epoch = now;
- i->expiry = atoi(line + ovector[4]);
+ auto const regv = matches[1];
+
+ i->regex = new Regex;
+ std::string error;
+ int erroff = 0;
+ bool rstat = i->regex->compile(regv, error, erroff);
+ if (!rstat) {
+ Dbg(dbg_ctl, "Invalid rule regex!, message: %s, offset: %d",
error.c_str(), erroff);
+ free_invalidate_t(i);
+ i = nullptr;
+ continue;
+ }
+
+ i->regex_text = TSstrndup(regv.data(), regv.length());
+ Dbg(dbg_ctl, "regex_tex: %s", i->regex_text);
+ i->epoch = now;
+ // atoi will terminate when whitespace/eol is reached
+ i->expiry = atoi(matches[2].data());
if (5 == rc) {
- int const len = ovector[9] - ovector[8];
- char const *const type = line + ovector[8];
- if (0 == strncasecmp(type, RESULT_MISS, len)) {
+ auto const type = matches[4];
+ if (0 == strncasecmp(type.data(), RESULT_MISS, type.length())) {
Dbg(dbg_ctl, "Regex line set to result type %s: '%s'",
RESULT_MISS, i->regex_text);
i->new_result = TS_CACHE_LOOKUP_MISS;
- } else if (0 != strncasecmp(type, RESULT_STALE, len)) {
- Dbg(dbg_ctl, "Unknown regex line result type '%s', using default
'%s' '%s'", type, RESULT_STALE, i->regex_text);
+ } else if (0 != strncasecmp(type.data(), RESULT_STALE,
type.length())) {
+ Dbg(dbg_ctl, "Unknown regex line result type '%.*s', using default
'%s' '%s'", (int)type.length(), type.data(),
+ RESULT_STALE, i->regex_text);
}
}
@@ -446,7 +452,6 @@ load_config(plugin_state_t *pstate, invalidate_t **ilist)
free_invalidate_t(i);
i = nullptr;
} else {
- i->regex_extra = pcre_study(i->regex, 0, &errptr);
if (!*ilist) {
*ilist = i;
Dbg(dbg_ctl, "Created new list and Loaded %s %jd %jd %s",
i->regex_text, (intmax_t)i->epoch, (intmax_t)i->expiry,
@@ -485,7 +490,6 @@ load_config(plugin_state_t *pstate, invalidate_t **ilist)
Dbg(dbg_ctl, "Skipping line %d, too few fields", ln);
}
}
- pcre_free(config_re);
fclose(fs);
pstate->last_load = s.st_mtime;
return true;
@@ -695,11 +699,14 @@ main_handler(TSCont cont, TSEvent event, void *edata)
now = time(nullptr);
}
if (date <= iptr->epoch && now < iptr->expiry) {
- if (!url) {
+ if (nullptr == url) {
url = TSHttpTxnEffectiveUrlStringGet(txn, &url_len);
Dbg(dbg_ctl, "Effective url is is '%.*s'", url_len, url);
}
- if (pcre_exec(iptr->regex, iptr->regex_extra, url, url_len, 0, 0,
nullptr, 0) >= 0) {
+ Dbg(dbg_ctl, "checking: %.*s, %s", url_len, url, iptr->regex_text);
+
+ std::string_view const urlv(url, url_len);
+ if (iptr->regex->exec(urlv)) {
Dbg(dbg_ctl, "Forced revalidate, Match with rule regex: '%s'
epoch: %jd, expiry: %jd, result: '%s'", iptr->regex_text,
intmax_t(iptr->epoch), intmax_t(iptr->expiry),
strForResult(iptr->new_result));
TSHttpTxnCacheLookupStatusSet(txn, iptr->new_result);
@@ -715,7 +722,7 @@ main_handler(TSCont cont, TSEvent event, void *edata)
iptr = iptr->next;
}
}
- if (url) {
+ if (nullptr != url) {
TSfree(url);
}
}
diff --git
a/tests/gold_tests/pluginTest/regex_revalidate/regex_revalidate.test.py
b/tests/gold_tests/pluginTest/regex_revalidate/regex_revalidate.test.py
index d497ecfc20..aac1205ca8 100644
--- a/tests/gold_tests/pluginTest/regex_revalidate/regex_revalidate.test.py
+++ b/tests/gold_tests/pluginTest/regex_revalidate/regex_revalidate.test.py
@@ -125,7 +125,6 @@ ts.Disk.records_config.update(
{
'proxy.config.diags.debug.enabled': 1,
'proxy.config.diags.debug.tags': 'regex_revalidate',
- # 'proxy.config.diags.debug.enabled': 0,
'proxy.config.http.insert_age_in_response': 0,
'proxy.config.http.response_via_str': 3,
})