fgerlits commented on code in PR #1387:
URL: https://github.com/apache/nifi-minifi-cpp/pull/1387#discussion_r964051600
##########
libminifi/src/utils/RegexUtils.cpp:
##########
@@ -22,58 +22,86 @@
#include <vector>
#include "Exception.h"
+#include <regex.h>
+
+namespace org::apache::nifi::minifi::utils {
#ifndef NO_MORE_REGFREEE
-namespace {
-std::size_t getMaxGroupCountOfRegex(const std::string& regex) {
- return std::count(regex.begin(), regex.end(), '(') + 1;
+SMatch::SMatch(const SMatch& other) {
+ *this = other;
}
-} // namespace
-#endif
-
-namespace org::apache::nifi::minifi::utils {
+SMatch::SMatch(SMatch&& other) {
+ *this = std::move(other);
+}
-#ifndef NO_MORE_REGFREEE
-SMatch::SuffixWrapper SMatch::suffix() const {
- if ((size_t) matches_[0].match.rm_eo >= string_.size()) {
- return SuffixWrapper{std::string()};
- } else {
- return SuffixWrapper{string_.substr(matches_[0].match.rm_eo)};
+SMatch& SMatch::operator=(const SMatch& other) {
+ if (this == &other) {
+ return *this;
+ }
+ reset(other.string_);
+ matches_.reserve(other.matches_.size());
+ ready_ = other.ready_;
+ for (const auto& sub_match : other.matches_) {
+ size_t begin_off =
gsl::narrow<size_t>(std::distance(other.string_.begin(), sub_match.first));
+ size_t end_off = gsl::narrow<size_t>(std::distance(other.string_.begin(),
sub_match.second));
+ matches_.push_back(Regmatch{sub_match.matched, string_.begin() +
begin_off, string_.begin() + end_off});
}
+ return *this;
+}
+
+SMatch& SMatch::operator=(SMatch&& other) {
+ // trigger the copy assignment, we could optimize this (by moving the
string/matches)
+ // but we would need to maintain a separate offsets vector, as after the
move the original
+ // sub_matches' iterators are invalidated, if this turns out to be a
performance bottleneck
+ // revisit this
+ return *this = other;
+}
Review Comment:
This move had to be changed to a copy because we are storing iterators
instead of offsets now. What is the advantage of storing iterators? Could we
go back to storing offsets?
##########
libminifi/src/utils/RegexUtils.cpp:
##########
@@ -22,58 +22,86 @@
#include <vector>
#include "Exception.h"
+#include <regex.h>
Review Comment:
Is this `#include` necessary? `regex.h` is already included in the header
in the non-standard-lib case; also both MSVC and the linter dislike it.
##########
libminifi/include/utils/RegexUtils.h:
##########
@@ -150,18 +132,32 @@ class Regex {
int regex_mode_;
#endif
- friend bool regexMatch(const std::string &string, const Regex& regex);
- friend bool regexMatch(const std::string &string, SMatch& match, const
Regex& regex);
- friend bool regexSearch(const std::string &string, const Regex& regex);
- friend bool regexSearch(const std::string &string, SMatch& match, const
Regex& regex);
+ friend bool regexMatch(const char* str, CMatch& match, const Regex& regex);
+ friend bool regexSearch(const char* str, CMatch& match, const Regex& regex);
+
+ friend bool regexMatch(const std::string_view& str, SVMatch& match, const
Regex& regex);
+ friend bool regexSearch(const std::string_view& str, SVMatch& match, const
Regex& regex);
+
+ friend bool regexMatch(const std::string& str, SMatch& match, const Regex&
regex);
+ friend bool regexSearch(const std::string& str, SMatch& match, const Regex&
regex);
+
friend SMatch getLastRegexMatch(const std::string& string, const
utils::Regex& regex);
};
-bool regexMatch(const std::string &string, const Regex& regex);
-bool regexMatch(const std::string &string, SMatch& match, const Regex& regex);
+bool regexMatch(const char* str, const Regex& regex);
+bool regexMatch(const char* str, CMatch& match, const Regex& regex);
+bool regexSearch(const char* str, const Regex& regex);
+bool regexSearch(const char* str, CMatch& match, const Regex& regex);
+
+bool regexMatch(const std::string_view& str, const Regex& regex);
+bool regexMatch(const std::string_view& str, SVMatch& match, const Regex&
regex);
+bool regexSearch(const std::string_view& str, const Regex& regex);
+bool regexSearch(const std::string_view& str, SVMatch& match, const Regex&
regex);
Review Comment:
Can you add some unit tests for these new overloads, please?
##########
libminifi/src/utils/RegexUtils.cpp:
##########
@@ -22,58 +22,86 @@
#include <vector>
#include "Exception.h"
+#include <regex.h>
+
+namespace org::apache::nifi::minifi::utils {
#ifndef NO_MORE_REGFREEE
-namespace {
-std::size_t getMaxGroupCountOfRegex(const std::string& regex) {
- return std::count(regex.begin(), regex.end(), '(') + 1;
+SMatch::SMatch(const SMatch& other) {
+ *this = other;
}
-} // namespace
-#endif
-
-namespace org::apache::nifi::minifi::utils {
+SMatch::SMatch(SMatch&& other) {
+ *this = std::move(other);
+}
-#ifndef NO_MORE_REGFREEE
-SMatch::SuffixWrapper SMatch::suffix() const {
- if ((size_t) matches_[0].match.rm_eo >= string_.size()) {
- return SuffixWrapper{std::string()};
- } else {
- return SuffixWrapper{string_.substr(matches_[0].match.rm_eo)};
+SMatch& SMatch::operator=(const SMatch& other) {
+ if (this == &other) {
+ return *this;
+ }
+ reset(other.string_);
+ matches_.reserve(other.matches_.size());
+ ready_ = other.ready_;
+ for (const auto& sub_match : other.matches_) {
+ size_t begin_off =
gsl::narrow<size_t>(std::distance(other.string_.begin(), sub_match.first));
+ size_t end_off = gsl::narrow<size_t>(std::distance(other.string_.begin(),
sub_match.second));
+ matches_.push_back(Regmatch{sub_match.matched, string_.begin() +
begin_off, string_.begin() + end_off});
Review Comment:
`clang-tidy` wants this changed to `emplace_back`, and although it looks
like this check is not turned on yet, it will be eventually, so let's change it
(also in a few other places in this file)
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]