This is an automated email from the ASF dual-hosted git repository. fgerlits pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/nifi-minifi-cpp.git
commit 287fe853822880c083728d8d03b00f9c5c1321ef Author: Gabor Gyimesi <[email protected]> AuthorDate: Wed Jun 29 17:00:57 2022 +0200 MINIFICPP-1830 Verify regex properties in processors in case it is set to empty Also: - Replace regex_search with regex_match when full match is expected - Fix default value for Authorized DN Pattern Signed-off-by: Ferenc Gerlits <[email protected]> This closes #1362 --- PROCESSORS.md | 2 +- extensions/aws/s3/S3Wrapper.cpp | 16 +++------------- extensions/aws/s3/S3Wrapper.h | 14 ++------------ extensions/civetweb/processors/ListenHTTP.cpp | 13 ++++++++----- extensions/civetweb/processors/ListenHTTP.h | 4 ++-- extensions/http-curl/client/HTTPClient.cpp | 11 ----------- extensions/http-curl/client/HTTPClient.h | 2 -- extensions/librdkafka/PublishKafka.cpp | 6 +++--- extensions/librdkafka/PublishKafka.h | 2 +- extensions/sftp/processors/ListSFTP.cpp | 22 ++++++---------------- extensions/sftp/processors/ListSFTP.h | 7 +++---- .../standard-processors/processors/GetFile.cpp | 4 ++-- .../standard-processors/processors/GetFile.h | 2 +- .../windows-event-log/wel/MetadataWalker.cpp | 4 ++-- extensions/windows-event-log/wel/MetadataWalker.h | 9 +++------ libminifi/include/utils/HTTPClient.h | 2 -- libminifi/include/utils/HTTPUtils.h | 6 +++--- 17 files changed, 40 insertions(+), 86 deletions(-) diff --git a/PROCESSORS.md b/PROCESSORS.md index 71a7221c4..20a393c57 100644 --- a/PROCESSORS.md +++ b/PROCESSORS.md @@ -938,7 +938,7 @@ In the list below, the names of required properties appear in bold. Any other pr | Name | Default Value | Allowable Values | Description | |------------------------|---------------|------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------| | Batch Size | 10 | | The maximum number of files to pull in each iteration | -| File Filter | [^\.].* | | Only files whose names match the given regular expression will be picked up | +| File Filter | .* | | Only files whose names match the given regular expression will be picked up | | Ignore Hidden Files | true | | Indicates whether or not hidden files should be ignored | | **Input Directory** | | | The input directory from which to pull files<br/>**Supports Expression Language: true** | | Keep Source File | false | | If true, the file is not deleted after it has been copied to the Content Repository | diff --git a/extensions/aws/s3/S3Wrapper.cpp b/extensions/aws/s3/S3Wrapper.cpp index 757bf0210..6e76676ba 100644 --- a/extensions/aws/s3/S3Wrapper.cpp +++ b/extensions/aws/s3/S3Wrapper.cpp @@ -29,12 +29,7 @@ #include "utils/gsl.h" #include "utils/RegexUtils.h" -namespace org { -namespace apache { -namespace nifi { -namespace minifi { -namespace aws { -namespace s3 { +namespace org::apache::nifi::minifi::aws::s3 { void HeadObjectResult::setFilePaths(const std::string& key) { absolute_path = key; @@ -58,7 +53,7 @@ void S3Wrapper::setCannedAcl(Aws::S3::Model::PutObjectRequest& request, const st Expiration S3Wrapper::getExpiration(const std::string& expiration) { minifi::utils::Regex expr("expiry-date=\"(.*)\", rule-id=\"(.*)\""); minifi::utils::SMatch matches; - const bool matched = minifi::utils::regexSearch(expiration, matches, expr); + const bool matched = minifi::utils::regexMatch(expiration, matches, expr); if (!matched || matches.size() < 3) return Expiration{}; return Expiration{matches[1], matches[2]}; @@ -301,9 +296,4 @@ FetchObjectResult S3Wrapper::fillFetchObjectResult(const GetObjectRequestParamet return result; } -} // namespace s3 -} // namespace aws -} // namespace minifi -} // namespace nifi -} // namespace apache -} // namespace org +} // namespace org::apache::nifi::minifi::aws::s3 diff --git a/extensions/aws/s3/S3Wrapper.h b/extensions/aws/s3/S3Wrapper.h index 6e32ec428..5424319e1 100644 --- a/extensions/aws/s3/S3Wrapper.h +++ b/extensions/aws/s3/S3Wrapper.h @@ -43,12 +43,7 @@ #include "io/BaseStream.h" #include "S3RequestSender.h" -namespace org { -namespace apache { -namespace nifi { -namespace minifi { -namespace aws { -namespace s3 { +namespace org::apache::nifi::minifi::aws::s3 { static const std::unordered_map<std::string, Aws::S3::Model::StorageClass> STORAGE_CLASS_MAP { {"Standard", Aws::S3::Model::StorageClass::STANDARD}, @@ -241,9 +236,4 @@ class S3Wrapper { uint64_t last_bucket_list_timestamp_ = 0; }; -} // namespace s3 -} // namespace aws -} // namespace minifi -} // namespace nifi -} // namespace apache -} // namespace org +} // namespace org::apache::nifi::minifi::aws::s3 diff --git a/extensions/civetweb/processors/ListenHTTP.cpp b/extensions/civetweb/processors/ListenHTTP.cpp index 31d146852..6aa0fc750 100644 --- a/extensions/civetweb/processors/ListenHTTP.cpp +++ b/extensions/civetweb/processors/ListenHTTP.cpp @@ -109,9 +109,11 @@ void ListenHTTP::onSchedule(core::ProcessContext *context, core::ProcessSessionF bool randomPort = listeningPort == "0"; std::string authDNPattern; - if (context->getProperty(AuthorizedDNPattern.getName(), authDNPattern) && !authDNPattern.empty()) { logger_->log_debug("ListenHTTP using %s: %s", AuthorizedDNPattern.getName(), authDNPattern); + } else { + authDNPattern = ".*"; + logger_->log_debug("Authorized DN Pattern not set or invalid, using default '%s' pattern", authDNPattern); } std::string sslCertFile; @@ -201,7 +203,8 @@ void ListenHTTP::onSchedule(core::ProcessContext *context, core::ProcessSessionF context->getProperty(BatchSize.getName(), batch_size_); logger_->log_debug("ListenHTTP using %s: %zu", BatchSize.getName(), batch_size_); - handler_ = std::make_unique<Handler>(basePath, context, std::move(authDNPattern), std::move(headersAsAttributesPattern)); + handler_ = std::make_unique<Handler>(basePath, context, std::move(authDNPattern), + headersAsAttributesPattern.empty() ? std::nullopt : std::make_optional<utils::Regex>(headersAsAttributesPattern)); server_->addHandler(basePath, handler_.get()); if (randomPort) { @@ -275,10 +278,10 @@ void ListenHTTP::processRequestBuffer(core::ProcessSession *session) { logger_->log_debug("ListenHTTP transferred %zu flow files from HTTP request buffer", flow_file_count); } -ListenHTTP::Handler::Handler(std::string base_uri, core::ProcessContext *context, std::string &&auth_dn_regex, std::string &&header_as_attrs_regex) +ListenHTTP::Handler::Handler(std::string base_uri, core::ProcessContext *context, std::string &&auth_dn_regex, std::optional<utils::Regex> &&headers_as_attrs_regex) : base_uri_(std::move(base_uri)), auth_dn_regex_(std::move(auth_dn_regex)), - headers_as_attrs_regex_(std::move(header_as_attrs_regex)), + headers_as_attrs_regex_(std::move(headers_as_attrs_regex)), process_context_(context) { context->getProperty(BufferSize.getName(), buffer_size_); logger_->log_debug("ListenHTTP using %s: %zu", BufferSize.getName(), buffer_size_); @@ -303,7 +306,7 @@ void ListenHTTP::Handler::setHeaderAttributes(const mg_request_info *req_info, c if (strcmp("filename", header->name) == 0) { flow_file->setAttribute("filename", header->value); - } else if (utils::regexMatch(header->name, headers_as_attrs_regex_)) { + } else if (headers_as_attrs_regex_ && utils::regexMatch(header->name, *headers_as_attrs_regex_)) { flow_file->setAttribute(header->name, header->value); } } diff --git a/extensions/civetweb/processors/ListenHTTP.h b/extensions/civetweb/processors/ListenHTTP.h index a25ec4c73..bc3ad4b9d 100644 --- a/extensions/civetweb/processors/ListenHTTP.h +++ b/extensions/civetweb/processors/ListenHTTP.h @@ -110,7 +110,7 @@ class ListenHTTP : public core::Processor { Handler(std::string base_uri, core::ProcessContext *context, std::string &&auth_dn_regex, - std::string &&header_as_attrs_regex); + std::optional<utils::Regex> &&headers_as_attrs_regex); bool handlePost(CivetServer *server, struct mg_connection *conn) override; bool handleGet(CivetServer *server, struct mg_connection *conn) override; bool handleHead(CivetServer *server, struct mg_connection *conn) override; @@ -134,7 +134,7 @@ class ListenHTTP : public core::Processor { std::string base_uri_; utils::Regex auth_dn_regex_; - utils::Regex headers_as_attrs_regex_; + std::optional<utils::Regex> headers_as_attrs_regex_; core::ProcessContext *process_context_; std::shared_ptr<core::logging::Logger> logger_ = core::logging::LoggerFactory<ListenHTTP>::getLogger(); std::map<std::string, ResponseBody> response_uri_map_; diff --git a/extensions/http-curl/client/HTTPClient.cpp b/extensions/http-curl/client/HTTPClient.cpp index 42dc658d9..185593c51 100644 --- a/extensions/http-curl/client/HTTPClient.cpp +++ b/extensions/http-curl/client/HTTPClient.cpp @@ -387,17 +387,6 @@ int HTTPClient::onProgress(void *clientp, curl_off_t /*dltotal*/, curl_off_t dln return 0; } -bool HTTPClient::matches(const std::string &value, const std::string &sregex) { - if (sregex == ".*") - return true; - try { - utils::Regex rgx(sregex); - return utils::regexSearch(value, rgx); - } catch (const Exception &) { - return false; - } -} - void HTTPClient::configure_secure_connection(CURL *http_session) { logger_->log_debug("Using certificate file \"%s\"", ssl_context_service_->getCertificateFile()); logger_->log_debug("Using private key file \"%s\"", ssl_context_service_->getPrivateKeyFile()); diff --git a/extensions/http-curl/client/HTTPClient.h b/extensions/http-curl/client/HTTPClient.h index d2ef25e12..0ce64b882 100644 --- a/extensions/http-curl/client/HTTPClient.h +++ b/extensions/http-curl/client/HTTPClient.h @@ -256,8 +256,6 @@ class HTTPClient : public BaseHTTPClient, public core::Connectable { Progress progress_; protected: - inline bool matches(const std::string &value, const std::string &sregex) override; - static CURLcode configure_ssl_context(CURL* /*curl*/, void *ctx, void *param) { #ifdef OPENSSL_SUPPORT minifi::controllers::SSLContextService *ssl_context_service = static_cast<minifi::controllers::SSLContextService*>(param); diff --git a/extensions/librdkafka/PublishKafka.cpp b/extensions/librdkafka/PublishKafka.cpp index 066c32527..0c4a50a8c 100644 --- a/extensions/librdkafka/PublishKafka.cpp +++ b/extensions/librdkafka/PublishKafka.cpp @@ -184,12 +184,12 @@ class ReadCallback { }); } - static rd_kafka_headers_unique_ptr make_headers(const core::FlowFile& flow_file, utils::Regex& attribute_name_regex) { + static rd_kafka_headers_unique_ptr make_headers(const core::FlowFile& flow_file, const std::optional<utils::Regex>& attribute_name_regex) { const gsl::owner<rd_kafka_headers_t*> result{ rd_kafka_headers_new(8) }; if (!result) { throw std::bad_alloc{}; } for (const auto& kv : flow_file.getAttributes()) { - if (utils::regexSearch(kv.first, attribute_name_regex)) { + if (attribute_name_regex && utils::regexMatch(kv.first, *attribute_name_regex)) { rd_kafka_header_add(result, kv.first.c_str(), kv.first.size(), kv.second.c_str(), kv.second.size()); } } @@ -238,7 +238,7 @@ class ReadCallback { rd_kafka_topic_t* const rkt, rd_kafka_t* const rk, const core::FlowFile& flowFile, - utils::Regex& attributeNameRegex, + const std::optional<utils::Regex>& attributeNameRegex, std::shared_ptr<PublishKafka::Messages> messages, const size_t flow_file_index, const bool fail_empty_flow_files, diff --git a/extensions/librdkafka/PublishKafka.h b/extensions/librdkafka/PublishKafka.h index 57e5ccd75..7e8f218e7 100644 --- a/extensions/librdkafka/PublishKafka.h +++ b/extensions/librdkafka/PublishKafka.h @@ -148,7 +148,7 @@ class PublishKafka : public KafkaProcessorBase { uint32_t batch_size_{}; uint64_t target_batch_payload_size_{}; uint64_t max_flow_seg_size_{}; - utils::Regex attributeNameRegex_; + std::optional<utils::Regex> attributeNameRegex_; std::atomic<bool> interrupted_{false}; std::mutex messages_mutex_; // If both connection_mutex_ and messages_mutex_ are needed, always take connection_mutex_ first to avoid deadlock diff --git a/extensions/sftp/processors/ListSFTP.cpp b/extensions/sftp/processors/ListSFTP.cpp index 4603bf7a0..aa841fae4 100644 --- a/extensions/sftp/processors/ListSFTP.cpp +++ b/extensions/sftp/processors/ListSFTP.cpp @@ -78,8 +78,6 @@ ListSFTP::ListSFTP(const std::string& name, const utils::Identifier& uuid /*= ut : SFTPProcessorBase(name, uuid) , search_recursively_(false) , follow_symlink_(false) - , file_filter_regex_set_(false) - , path_filter_regex_set_(false) , ignore_dotted_files_(false) , minimum_file_age_(0U) , maximum_file_age_(0U) @@ -116,27 +114,19 @@ void ListSFTP::onSchedule(const std::shared_ptr<core::ProcessContext> &context, } else { follow_symlink_ = utils::StringUtils::toBool(value).value_or(false); } - if (context->getProperty(FileFilterRegex.getName(), file_filter_regex_)) { + if (context->getProperty(FileFilterRegex.getName(), file_filter_regex_) && !file_filter_regex_.empty()) { try { compiled_file_filter_regex_ = utils::Regex(file_filter_regex_); - file_filter_regex_set_ = true; } catch (const Exception &e) { logger_->log_error("Failed to compile File Filter Regex \"%s\"", file_filter_regex_.c_str()); - file_filter_regex_set_ = false; } - } else { - file_filter_regex_set_ = false; } - if (context->getProperty(PathFilterRegex.getName(), path_filter_regex_)) { + if (context->getProperty(PathFilterRegex.getName(), path_filter_regex_) && !path_filter_regex_.empty()) { try { compiled_path_filter_regex_ = utils::Regex(path_filter_regex_); - path_filter_regex_set_ = true; } catch (const Exception &e) { logger_->log_error("Failed to compile Path Filter Regex \"%s\"", path_filter_regex_.c_str()); - path_filter_regex_set_ = false; } - } else { - path_filter_regex_set_ = false; } if (!context->getProperty(IgnoreDottedFiles.getName(), value)) { logger_->log_error("Ignore Dotted Files attribute is missing or invalid"); @@ -278,9 +268,9 @@ bool ListSFTP::filterFile(const std::string& parent_path, const std::string& fil } /* File Filter Regex */ - if (file_filter_regex_set_) { + if (compiled_file_filter_regex_) { bool match = false; - match = utils::regexSearch(filename, compiled_file_filter_regex_); + match = utils::regexMatch(filename, *compiled_file_filter_regex_); if (!match) { logger_->log_debug(R"(Ignoring "%s/%s" because it did not match the File Filter Regex "%s")", parent_path.c_str(), @@ -299,10 +289,10 @@ bool ListSFTP::filterDirectory(const std::string& parent_path, const std::string } /* Path Filter Regex */ - if (path_filter_regex_set_) { + if (compiled_path_filter_regex_) { std::string dir_path = utils::file::FileUtils::concat_path(parent_path, filename, true /*force_posix*/); bool match = false; - match = utils::regexSearch(dir_path, compiled_path_filter_regex_); + match = utils::regexMatch(dir_path, *compiled_path_filter_regex_); if (!match) { logger_->log_debug(R"(Not recursing into "%s" because it did not match the Path Filter Regex "%s")", dir_path.c_str(), diff --git a/extensions/sftp/processors/ListSFTP.h b/extensions/sftp/processors/ListSFTP.h index 871aea98a..88c850502 100644 --- a/extensions/sftp/processors/ListSFTP.h +++ b/extensions/sftp/processors/ListSFTP.h @@ -25,6 +25,7 @@ #include <set> #include <tuple> #include <vector> +#include <optional> #include "SFTPProcessorBase.h" #include "core/Processor.h" @@ -124,10 +125,8 @@ class ListSFTP : public SFTPProcessorBase { bool follow_symlink_; std::string file_filter_regex_; std::string path_filter_regex_; - bool file_filter_regex_set_; - bool path_filter_regex_set_; - utils::Regex compiled_file_filter_regex_; - utils::Regex compiled_path_filter_regex_; + std::optional<utils::Regex> compiled_file_filter_regex_; + std::optional<utils::Regex> compiled_path_filter_regex_; bool ignore_dotted_files_; std::string target_system_timestamp_precision_; std::string entity_tracking_initial_listing_target_; diff --git a/extensions/standard-processors/processors/GetFile.cpp b/extensions/standard-processors/processors/GetFile.cpp index 2e62db146..c1451d917 100644 --- a/extensions/standard-processors/processors/GetFile.cpp +++ b/extensions/standard-processors/processors/GetFile.cpp @@ -81,7 +81,7 @@ const core::Property GetFile::Recurse( core::PropertyBuilder::createProperty("Recurse Subdirectories")->withDescription("Indicates whether or not to pull files from subdirectories")->withDefaultValue<bool>(true)->build()); const core::Property GetFile::FileFilter( - core::PropertyBuilder::createProperty("File Filter")->withDescription("Only files whose names match the given regular expression will be picked up")->withDefaultValue("[^\\.].*")->build()); + core::PropertyBuilder::createProperty("File Filter")->withDescription("Only files whose names match the given regular expression will be picked up")->withDefaultValue(".*")->build()); const core::Relationship GetFile::Success("success", "All files are routed to success"); @@ -244,7 +244,7 @@ bool GetFile::fileMatchesRequestCriteria(std::string fullName, std::string name, return false; utils::Regex rgx(request.fileFilter); - if (!utils::regexSearch(name, rgx)) { + if (!utils::regexMatch(name, rgx)) { return false; } diff --git a/extensions/standard-processors/processors/GetFile.h b/extensions/standard-processors/processors/GetFile.h index 0ef739d54..004427982 100644 --- a/extensions/standard-processors/processors/GetFile.h +++ b/extensions/standard-processors/processors/GetFile.h @@ -44,7 +44,7 @@ struct GetFileRequest { bool ignoreHiddenFile = true; std::chrono::milliseconds pollInterval{0}; uint64_t batchSize = 10; - std::string fileFilter = "[^\\.].*"; + std::string fileFilter = ".*"; std::string inputDirectory; }; diff --git a/extensions/windows-event-log/wel/MetadataWalker.cpp b/extensions/windows-event-log/wel/MetadataWalker.cpp index 9ac4f02ed..bf7eb6784 100644 --- a/extensions/windows-event-log/wel/MetadataWalker.cpp +++ b/extensions/windows-event-log/wel/MetadataWalker.cpp @@ -52,11 +52,11 @@ bool MetadataWalker::for_each(pugi::xml_node &node) { return input; }; - if (utils::regexMatch(attr.name(), regex_)) { + if (regex_ && utils::regexMatch(attr.name(), *regex_)) { updateText(node, attr.name(), idUpdate); } - if (utils::regexMatch(attr.value(), regex_)) { + if (regex_ && utils::regexMatch(attr.value(), *regex_)) { updateText(node, attr.value(), idUpdate); } } diff --git a/extensions/windows-event-log/wel/MetadataWalker.h b/extensions/windows-event-log/wel/MetadataWalker.h index f4938d7a9..05fc2ea61 100644 --- a/extensions/windows-event-log/wel/MetadataWalker.h +++ b/extensions/windows-event-log/wel/MetadataWalker.h @@ -28,6 +28,7 @@ #include <sstream> #include <string> #include <vector> +#include <optional> #include "core/Core.h" #include "core/Processor.h" @@ -46,8 +47,6 @@ namespace nifi { namespace minifi { namespace wel { - - /** * Defines a tree walker for the XML input * @@ -57,8 +56,7 @@ class MetadataWalker : public pugi::xml_tree_walker { MetadataWalker(const WindowsEventLogMetadata& windows_event_log_metadata, const std::string &log_name, bool update_xml, bool resolve, const std::string ®ex = "") : windows_event_log_metadata_(windows_event_log_metadata), log_name_(log_name), - regex_(regex), - regex_str_(regex), + regex_(regex.empty() ? std::nullopt : std::make_optional(regex)), update_xml_(update_xml), resolve_(resolve) { } @@ -97,8 +95,7 @@ class MetadataWalker : public pugi::xml_tree_walker { const WindowsEventLogMetadata& windows_event_log_metadata_; std::string log_name_; - utils::Regex regex_; - std::string regex_str_; + std::optional<utils::Regex> regex_; bool update_xml_; bool resolve_; std::map<std::string, std::string> metadata_; diff --git a/libminifi/include/utils/HTTPClient.h b/libminifi/include/utils/HTTPClient.h index 4a171de65..469b5f71b 100644 --- a/libminifi/include/utils/HTTPClient.h +++ b/libminifi/include/utils/HTTPClient.h @@ -362,8 +362,6 @@ class BaseHTTPClient { std::vector<char> response_body_; std::vector<std::string> headers_; std::map<std::string, std::string> header_mapping_; - - virtual inline bool matches(const std::string &value, const std::string &sregex) = 0; }; std::string get_token(utils::BaseHTTPClient *client, std::string username, std::string password); diff --git a/libminifi/include/utils/HTTPUtils.h b/libminifi/include/utils/HTTPUtils.h index 8e62af8e3..708525e98 100644 --- a/libminifi/include/utils/HTTPUtils.h +++ b/libminifi/include/utils/HTTPUtils.h @@ -31,14 +31,14 @@ so we convert localhost to our local hostname. inline bool parse_http_components(const std::string &url, std::string &port, std::string &scheme, std::string &path) { #ifdef WIN32 auto hostname = (url.find(org::apache::nifi::minifi::io::Socket::getMyHostName()) != std::string::npos ? org::apache::nifi::minifi::io::Socket::getMyHostName() : "localhost"); - std::string regex_str = "^(http|https)://(" + hostname + ":)([0-9]+)?(/.*)$"; + std::string regex_str = "(http|https)://(" + hostname + ":)([0-9]+)?(/.*)"; #else - std::string regex_str = "^(http|https)://(localhost:)([0-9]+)?(/.*)$"; + std::string regex_str = "(http|https)://(localhost:)([0-9]+)?(/.*)"; #endif auto rgx = org::apache::nifi::minifi::utils::Regex(regex_str, {org::apache::nifi::minifi::utils::Regex::Mode::ICASE}); org::apache::nifi::minifi::utils::SMatch matches; - if (org::apache::nifi::minifi::utils::regexSearch(url, matches, rgx)) { + if (org::apache::nifi::minifi::utils::regexMatch(url, matches, rgx)) { if (matches.size() >= 5) { scheme = matches[1]; port = matches[3];
