fgerlits commented on code in PR #1387:
URL: https://github.com/apache/nifi-minifi-cpp/pull/1387#discussion_r964051600


##########
libminifi/src/utils/RegexUtils.cpp:
##########
@@ -22,58 +22,86 @@
 #include <vector>
 
 #include "Exception.h"
+#include <regex.h>
+
+namespace org::apache::nifi::minifi::utils {
 
 #ifndef NO_MORE_REGFREEE
-namespace {
 
-std::size_t getMaxGroupCountOfRegex(const std::string& regex) {
-  return std::count(regex.begin(), regex.end(), '(') + 1;
+SMatch::SMatch(const SMatch& other) {
+  *this = other;
 }
 
-}  // namespace
-#endif
-
-namespace org::apache::nifi::minifi::utils {
+SMatch::SMatch(SMatch&& other) {
+  *this = std::move(other);
+}
 
-#ifndef NO_MORE_REGFREEE
-SMatch::SuffixWrapper SMatch::suffix() const {
-  if ((size_t) matches_[0].match.rm_eo >= string_.size()) {
-    return SuffixWrapper{std::string()};
-  } else {
-    return SuffixWrapper{string_.substr(matches_[0].match.rm_eo)};
+SMatch& SMatch::operator=(const SMatch& other) {
+  if (this == &other) {
+    return *this;
+  }
+  reset(other.string_);
+  matches_.reserve(other.matches_.size());
+  ready_ = other.ready_;
+  for (const auto& sub_match : other.matches_) {
+    size_t begin_off = 
gsl::narrow<size_t>(std::distance(other.string_.begin(), sub_match.first));
+    size_t end_off = gsl::narrow<size_t>(std::distance(other.string_.begin(), 
sub_match.second));
+    matches_.push_back(Regmatch{sub_match.matched, string_.begin() + 
begin_off, string_.begin() + end_off});
   }
+  return *this;
+}
+
+SMatch& SMatch::operator=(SMatch&& other) {
+  // trigger the copy assignment, we could optimize this (by moving the 
string/matches)
+  // but we would need to maintain a separate offsets vector, as after the 
move the original
+  // sub_matches' iterators are invalidated, if this turns out to be a 
performance bottleneck
+  // revisit this
+  return *this = other;
+}

Review Comment:
   This move had to be changed to a copy because we are storing iterators 
instead of offsets now.  What is the advantage of storing iterators?  Could we 
go back to storing offsets?



##########
libminifi/src/utils/RegexUtils.cpp:
##########
@@ -22,58 +22,86 @@
 #include <vector>
 
 #include "Exception.h"
+#include <regex.h>

Review Comment:
   Is this `#include` necessary?  `regex.h` is already included in the header 
in the non-standard-lib case; also both MSVC and the linter dislike it.



##########
libminifi/include/utils/RegexUtils.h:
##########
@@ -150,18 +132,32 @@ class Regex {
   int regex_mode_;
 #endif
 
-  friend bool regexMatch(const std::string &string, const Regex& regex);
-  friend bool regexMatch(const std::string &string, SMatch& match, const 
Regex& regex);
-  friend bool regexSearch(const std::string &string, const Regex& regex);
-  friend bool regexSearch(const std::string &string, SMatch& match, const 
Regex& regex);
+  friend bool regexMatch(const char* str, CMatch& match, const Regex& regex);
+  friend bool regexSearch(const char* str, CMatch& match, const Regex& regex);
+
+  friend bool regexMatch(const std::string_view& str, SVMatch& match, const 
Regex& regex);
+  friend bool regexSearch(const std::string_view& str, SVMatch& match, const 
Regex& regex);
+
+  friend bool regexMatch(const std::string& str, SMatch& match, const Regex& 
regex);
+  friend bool regexSearch(const std::string& str, SMatch& match, const Regex& 
regex);
+
   friend SMatch getLastRegexMatch(const std::string& string, const 
utils::Regex& regex);
 };
 
-bool regexMatch(const std::string &string, const Regex& regex);
-bool regexMatch(const std::string &string, SMatch& match, const Regex& regex);
+bool regexMatch(const char* str, const Regex& regex);
+bool regexMatch(const char* str, CMatch& match, const Regex& regex);
+bool regexSearch(const char* str, const Regex& regex);
+bool regexSearch(const char* str, CMatch& match, const Regex& regex);
+
+bool regexMatch(const std::string_view& str, const Regex& regex);
+bool regexMatch(const std::string_view& str, SVMatch& match, const Regex& 
regex);
+bool regexSearch(const std::string_view& str, const Regex& regex);
+bool regexSearch(const std::string_view& str, SVMatch& match, const Regex& 
regex);

Review Comment:
   Can you add some unit tests for these new overloads, please?



##########
libminifi/src/utils/RegexUtils.cpp:
##########
@@ -22,58 +22,86 @@
 #include <vector>
 
 #include "Exception.h"
+#include <regex.h>
+
+namespace org::apache::nifi::minifi::utils {
 
 #ifndef NO_MORE_REGFREEE
-namespace {
 
-std::size_t getMaxGroupCountOfRegex(const std::string& regex) {
-  return std::count(regex.begin(), regex.end(), '(') + 1;
+SMatch::SMatch(const SMatch& other) {
+  *this = other;
 }
 
-}  // namespace
-#endif
-
-namespace org::apache::nifi::minifi::utils {
+SMatch::SMatch(SMatch&& other) {
+  *this = std::move(other);
+}
 
-#ifndef NO_MORE_REGFREEE
-SMatch::SuffixWrapper SMatch::suffix() const {
-  if ((size_t) matches_[0].match.rm_eo >= string_.size()) {
-    return SuffixWrapper{std::string()};
-  } else {
-    return SuffixWrapper{string_.substr(matches_[0].match.rm_eo)};
+SMatch& SMatch::operator=(const SMatch& other) {
+  if (this == &other) {
+    return *this;
+  }
+  reset(other.string_);
+  matches_.reserve(other.matches_.size());
+  ready_ = other.ready_;
+  for (const auto& sub_match : other.matches_) {
+    size_t begin_off = 
gsl::narrow<size_t>(std::distance(other.string_.begin(), sub_match.first));
+    size_t end_off = gsl::narrow<size_t>(std::distance(other.string_.begin(), 
sub_match.second));
+    matches_.push_back(Regmatch{sub_match.matched, string_.begin() + 
begin_off, string_.begin() + end_off});

Review Comment:
   `clang-tidy` wants this changed to `emplace_back`, and although it looks 
like this check is not turned on yet, it will be eventually, so let's change it 
(also in a few other places in this file)



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to