fgerlits commented on a change in pull request #1138:
URL: https://github.com/apache/nifi-minifi-cpp/pull/1138#discussion_r679004197



##########
File path: libminifi/test/unit/FileMatcherTests.cpp
##########
@@ -0,0 +1,208 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define CUSTOM_EXTENSION_LIST
+
+#include "../TestBase.h"
+#include "../Path.h"
+#include "utils/file/FileMatcher.h"
+
+struct FileMatcherTestAccessor {
+  using FilePattern = fileutils::FileMatcher::FilePattern;
+};
+
+using FilePattern = FileMatcherTestAccessor::FilePattern;
+using FileMatcher = fileutils::FileMatcher;
+
+TEST_CASE("Invalid paths") {
+  REQUIRE_FALSE(FilePattern::fromPattern(""));
+  REQUIRE_FALSE(FilePattern::fromPattern("."));
+  REQUIRE_FALSE(FilePattern::fromPattern(".."));
+  REQUIRE_FALSE(FilePattern::fromPattern("!"));
+  REQUIRE_FALSE(FilePattern::fromPattern("!."));
+  REQUIRE_FALSE(FilePattern::fromPattern("!.."));
+}
+
+TEST_CASE("Matching directories without globs") {
+#ifdef WIN32
+  utils::Path root{"C:\\"};
+#else
+  utils::Path root{"/"};
+#endif
+  auto pattern = FilePattern::fromPattern((root / "one" / "banana" / 
"file").str()).value();
+  REQUIRE(pattern.match((root / "one").str()));
+  REQUIRE(pattern.match((root / "one" / "banana").str()));
+  REQUIRE_FALSE(pattern.match((root / "two").str()));
+  REQUIRE_FALSE(pattern.match((root / "one" / "apple").str()));
+  REQUIRE_FALSE(pattern.match((root / "one" / "banana" / "file").str()));

Review comment:
       Why doesn't this match?  A comment could be useful.

##########
File path: libminifi/include/utils/file/FileUtils.h
##########
@@ -500,13 +500,26 @@ inline void addFilesMatchingExtension(const 
std::shared_ptr<logging::Logger> &lo
 #endif
 }
 
+inline std::string concat_path(const std::string& root, const std::string& 
child, bool force_posix = false) {
+  if (root.empty()) {
+    return child;
+  }
+  std::stringstream new_path;
+  if (root.back() == get_separator(force_posix)) {
+    new_path << root << child;
+  } else {
+    new_path << root << get_separator(force_posix) << child;
+  }
+  return new_path.str();
+}
+
 /*
  * Provides a platform-independent function to list a directory
  * Callback is called for every file found: first argument is the path of the 
directory, second is the filename
  * Return value of the callback is used to continue (true) or stop (false) 
listing
  */

Review comment:
       Please add a comment here about what `dir_callback` is for.

##########
File path: libminifi/test/script-tests/ExecutePythonProcessorTests.cpp
##########
@@ -119,7 +118,7 @@ class SimplePythonFlowFileTransferTest : public 
ExecutePythonProcessorTestBase {
     const std::string output_dir = createTempDir(testController_.get());
 
     auto executePythonProcessor = 
plan_->addProcessor("ExecutePythonProcessor", "executePythonProcessor");
-    plan_->setProperty(executePythonProcessor, 
org::apache::nifi::minifi::python::processors::ExecutePythonProcessor::ScriptFile.getName(),
 getScriptFullPath("stateful_processor.py"));
+    plan_->setProperty(executePythonProcessor, "Script File", 
getScriptFullPath("stateful_processor.py"));

Review comment:
       I can see that `ExecutePythonProcessor::ScriptFile` is hidden, so you 
need to do this, but why is it hidden?  Can we unhide it?

##########
File path: libminifi/src/utils/file/FileMatcher.cpp
##########
@@ -0,0 +1,273 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "utils/file/FileMatcher.h"
+#include "utils/file/FileUtils.h"
+#include "utils/StringUtils.h"
+
+namespace org {
+namespace apache {
+namespace nifi {
+namespace minifi {
+namespace utils {
+namespace file {
+
+std::shared_ptr<core::logging::Logger> FileMatcher::FilePattern::logger_ = 
logging::LoggerFactory<FileMatcher::FilePattern>::getLogger();
+std::shared_ptr<core::logging::Logger> FileMatcher::logger_ = 
logging::LoggerFactory<FileMatcher>::getLogger();
+
+static bool isGlobPattern(const std::string& pattern) {
+  return pattern.find_first_of("?*") != std::string::npos;
+}
+
+static std::vector<std::string> split(const std::string& str, const 
std::vector<std::string>& delimiters) {
+  std::vector<std::string> result;
+
+  size_t prev_delim_end = 0;
+  size_t next_delim_begin = std::string::npos;
+  do {
+    for (const auto& delim : delimiters) {
+      next_delim_begin = str.find(delim, prev_delim_end);
+      if (next_delim_begin != std::string::npos) {
+        result.push_back(str.substr(prev_delim_end, next_delim_begin - 
prev_delim_end));
+        prev_delim_end = next_delim_begin + delim.length();
+        break;
+      }
+    }
+  } while (next_delim_begin != std::string::npos);
+  result.push_back(str.substr(prev_delim_end));
+  return result;
+}
+
+optional<FileMatcher::FilePattern> 
FileMatcher::FilePattern::fromPattern(std::string pattern, bool log_errors) {
+  bool excluding = false;
+  if (!pattern.empty() && pattern[0] == '!') {
+    excluding = true;
+    pattern = pattern.substr(1);
+  }
+  if (pattern.empty()) {
+    if (log_errors) logger_->log_error("Empty pattern");
+    return nullopt;
+  }
+  std::string exe_dir = get_executable_dir();
+  if (exe_dir.empty() && !isAbsolutePath(pattern.c_str())) {
+    if (log_errors) logger_->log_error("Couldn't determine executable dir, 
relative pattern '%s' not supported", pattern);
+    return nullopt;
+  }
+  pattern = resolve(exe_dir, pattern);
+  auto segments = split(pattern, {"/", "\\"});
+  gsl_Expects(!segments.empty());
+  auto file_pattern = segments.back();
+  if (file_pattern == "**") {
+    file_pattern = "*";
+  } else {
+    segments.pop_back();
+  }
+  if (file_pattern == "." || file_pattern == "..") {
+    if (log_errors) logger_->log_error("Invalid file pattern '%s'", 
file_pattern);
+    return nullopt;
+  }
+  return FilePattern(segments, file_pattern, excluding);
+}
+
+std::string FileMatcher::FilePattern::getBaseDirectory() const {
+  std::string base_dir;
+  for (const auto& segment : directory_segments_) {
+    // ignore segments at or after wildcards
+    if (isGlobPattern(segment)) {
+      break;
+    }
+    base_dir += segment + get_separator();
+  }
+  return base_dir;
+}
+
+FileMatcher::FileMatcher(const std::string &patterns) {
+  for (auto&& pattern : split(patterns, {","})) {
+    if (auto&& p = FilePattern::fromPattern(pattern)) {
+      patterns_.push_back(std::move(p.value()));
+    }
+  }
+}
+
+template<typename It>
+static bool advance_if_not_equal(It& it, const It& end) {
+  if (it == end) {
+    return false;
+  }
+  ++it;
+  return true;
+}
+
+static bool is_this_dir(const std::string& dir) {
+  return dir.empty() || dir == ".";
+}
+
+template<typename It, typename Fn>
+static void skip_if(It& it, const It& end, const Fn& fn) {
+  while (it != end && fn(*it)) {
+    ++it;
+  }
+}
+
+static bool matchGlob(std::string::const_iterator pattern_it, 
std::string::const_iterator pattern_end, std::string::const_iterator value_it, 
std::string::const_iterator value_end) {
+  // match * and ?
+  for (; pattern_it != pattern_end; ++pattern_it) {
+    if (*pattern_it == '*') {
+      do {
+        if (matchGlob(std::next(pattern_it), pattern_end, value_it, 
value_end)) {
+          return true;
+        }
+      } while (advance_if_not_equal(value_it, value_end));
+      return false;
+    }
+    if (value_it == value_end) {
+      return false;
+    }
+    if (*pattern_it != '?' && *pattern_it != *value_it) {
+      return false;
+    }
+    ++value_it;
+  }
+  return value_it == value_end;
+}
+
+FileMatcher::FilePattern::DirMatchResult 
FileMatcher::FilePattern::matchDirectory(DirIt pattern_it, DirIt pattern_end, 
DirIt value_it, DirIt value_end) {
+  for (; pattern_it != pattern_end; ++pattern_it) {
+    if (is_this_dir(*pattern_it)) {
+      continue;
+    }
+    if (*pattern_it == "**") {
+      if (std::next(pattern_it) == pattern_end) {
+        return DirMatchResult::TREE;
+      }
+      bool matched_parent = false;
+      // any number of nested directories
+      do {
+        skip_if(value_it, value_end, is_this_dir);
+        auto result = matchDirectory(std::next(pattern_it), pattern_end, 
value_it, value_end);
+        if (result == DirMatchResult::TREE || result == DirMatchResult::EXACT) 
{
+          return result;
+        }
+        if (result == DirMatchResult::PARENT) {
+          // even though we have a parent match, there may be a "better" 
(exact, tree) match
+          matched_parent = true;
+        }
+      } while (advance_if_not_equal(value_it, value_end));
+      if (matched_parent) {
+        return DirMatchResult::PARENT;
+      }
+      return DirMatchResult::NONE;
+    }
+    skip_if(value_it, value_end, is_this_dir);
+    if (value_it == value_end) {
+      // we used up all the value segments but there are still pattern segments
+      return DirMatchResult::PARENT;
+    }
+    if (!matchGlob(pattern_it->begin(), pattern_it->end(), value_it->begin(), 
value_it->end())) {
+      return DirMatchResult::NONE;
+    }
+    ++value_it;
+  }
+  skip_if(value_it, value_end, is_this_dir);
+  if (value_it == value_end) {
+    // used up all pattern and value segments
+    return DirMatchResult::EXACT;
+  } else {
+    // used up all pattern segments but we still have value segments
+    return DirMatchResult::NONE;
+  }
+}
+
+bool FileMatcher::FilePattern::match(const std::string& directory, const 
optional<std::string>& filename) const {
+  auto value = split(directory, {"/", "\\"});
+  auto result = matchDirectory(directory_segments_.begin(), 
directory_segments_.end(), value.begin(), value.end());
+  if (!filename) {
+    if (excluding_) {
+      if (result == DirMatchResult::TREE && file_pattern_ == "*") {
+        // all files are excluded in this directory
+        return true;

Review comment:
       I can see in the unit tests that this works, but I don't understand why 
we return `true` here.  Maybe the comment could be expanded?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to