This is an automated email from the ASF dual-hosted git repository. aboda pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/nifi-minifi-cpp.git
commit 870249f713219cce57f0719efa3e14c6e25e15bf Author: Ferenc Gerlits <[email protected]> AuthorDate: Tue Apr 6 12:51:29 2021 +0200 MINIFICPP-1032 Refactor parse_url Modernize the parse_url() function and add error checking. Signed-off-by: Arpad Boda <[email protected]> This closes #1046 --- libminifi/include/RemoteProcessorGroupPort.h | 22 ++-- libminifi/include/utils/HTTPClient.h | 24 ++++- libminifi/src/c2/C2Agent.cpp | 16 ++- libminifi/src/utils/HTTPClient.cpp | 155 ++++++++++++++++++--------- libminifi/test/unit/HTTPUtilTests.cpp | 97 ++++++++++------- 5 files changed, 198 insertions(+), 116 deletions(-) diff --git a/libminifi/include/RemoteProcessorGroupPort.h b/libminifi/include/RemoteProcessorGroupPort.h index 3c33706..8598103 100644 --- a/libminifi/include/RemoteProcessorGroupPort.h +++ b/libminifi/include/RemoteProcessorGroupPort.h @@ -146,23 +146,17 @@ class RemoteProcessorGroupPort : public core::Processor { */ void setURL(std::string val) { auto urls = utils::StringUtils::split(val, ","); - for (auto url : urls) { - logger_->log_trace("Parsing %s", url); - std::string host, protocol; - int port = -1; - url = utils::StringUtils::trim(url); - utils::parse_url(&url, &host, &port, &protocol); - logger_->log_trace("Parsed -%s- %s %s, %d", url, protocol, host, port); - if (port == -1) { - if (protocol.find("https") != std::string::npos) { - port = 443; - } else if (protocol.find("http") != std::string::npos) { - port = 80; - } + for (const auto& url : urls) { + utils::URL parsed_url{utils::StringUtils::trim(url)}; + if (parsed_url.isValid()) { + logger_->log_debug("Parsed RPG URL '%s' -> '%s'", url, parsed_url.hostPort()); + nifi_instances_.push_back({parsed_url.host(), parsed_url.port(), parsed_url.protocol()}); + } else { + logger_->log_error("Could not parse RPG URL '%s'", url); } - nifi_instances_.push_back({ host, port, protocol }); } } + void setHTTPProxy(const utils::HTTPProxy &proxy) { this->proxy_ = proxy; } diff --git a/libminifi/include/utils/HTTPClient.h b/libminifi/include/utils/HTTPClient.h index 82aa55b..425c716 100644 --- a/libminifi/include/utils/HTTPClient.h +++ b/libminifi/include/utils/HTTPClient.h @@ -36,6 +36,7 @@ #include "controllers/SSLContextService.h" #include "core/Deprecated.h" #include "utils/gsl.h" +#include "utils/OptionalUtils.h" namespace org { namespace apache { @@ -362,10 +363,27 @@ class BaseHTTPClient { virtual inline bool matches(const std::string &value, const std::string &sregex) = 0; }; -extern std::string get_token(utils::BaseHTTPClient *client, std::string username, std::string password); +std::string get_token(utils::BaseHTTPClient *client, std::string username, std::string password); + +class URL { + public: + explicit URL(const std::string& url_input); + bool isValid() const { return is_valid_; } + std::string protocol() const { return protocol_; } + std::string host() const { return host_; } + int port() const; + std::string hostPort() const; + std::string toString() const; + + private: + std::string protocol_; + std::string host_; + utils::optional<int> port_; + utils::optional<std::string> path_; + bool is_valid_ = false; + std::shared_ptr<logging::Logger> logger_ = logging::LoggerFactory<URL>::getLogger(); +}; -extern void parse_url(const std::string *url, std::string *host, int *port, std::string *protocol); -extern void parse_url(const std::string *url, std::string *host, int *port, std::string *protocol, std::string *path, std::string *query); } // namespace utils } // namespace minifi } // namespace nifi diff --git a/libminifi/src/c2/C2Agent.cpp b/libminifi/src/c2/C2Agent.cpp index 0914ed5..9c914cc 100644 --- a/libminifi/src/c2/C2Agent.cpp +++ b/libminifi/src/c2/C2Agent.cpp @@ -760,22 +760,20 @@ utils::optional<std::string> C2Agent::fetchFlow(const std::string& uri) const { std::stringstream adjusted_url; std::string base; if (configuration_->get(minifi::Configure::nifi_c2_flow_base_url, base)) { + base = utils::StringUtils::trim(base); adjusted_url << base; if (!utils::StringUtils::endsWith(base, "/")) { adjusted_url << "/"; } adjusted_url << uri; resolved_url = adjusted_url.str(); - } else if (configuration_->get("c2.rest.url", base)) { - std::string host, protocol; - int port = -1; - utils::parse_url(&base, &host, &port, &protocol); - adjusted_url << protocol << host; - if (port > 0) { - adjusted_url << ":" << port; + } else if (configuration_->get("nifi.c2.rest.url", "c2.rest.url", base)) { + utils::URL base_url{utils::StringUtils::trim(base)}; + if (!base_url.isValid()) { + logger_->log_error("Could not parse C2 REST URL '%s'", base); + return {}; } - adjusted_url << "/c2/api/" << uri; - resolved_url = adjusted_url.str(); + resolved_url = base_url.hostPort() + "/c2/api/" + uri; } } diff --git a/libminifi/src/utils/HTTPClient.cpp b/libminifi/src/utils/HTTPClient.cpp index 3328aa5..fe9d702 100644 --- a/libminifi/src/utils/HTTPClient.cpp +++ b/libminifi/src/utils/HTTPClient.cpp @@ -15,8 +15,43 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include "utils/HTTPClient.h" +#include <algorithm> #include <string> + +#include "utils/HTTPClient.h" +#include "utils/StringUtils.h" + +namespace { + +constexpr const char* HTTP = "http://"; +constexpr const char* HTTPS = "https://"; + +utils::optional<std::string> parseProtocol(const std::string& url_input) { + if (utils::StringUtils::startsWith(url_input, HTTP)) { + return HTTP; + } else if (utils::StringUtils::startsWith(url_input, HTTPS)) { + return HTTPS; + } else { + return {}; + } +} + +utils::optional<int> parsePortNumber(const std::string& port_string) { + try { + size_t pos; + int port = std::stoi(port_string, &pos); + if (pos == port_string.size()) { + return port; + } + } catch (const std::invalid_argument&) { + } catch (const std::out_of_range&) { + } + + return {}; +} + +} // namespace + namespace org { namespace apache { namespace nifi { @@ -48,72 +83,86 @@ std::string get_token(utils::BaseHTTPClient *client, std::string username, std:: return token; } -void parse_url(const std::string *url, std::string *host, int *port, std::string *protocol) { - static std::string http("http://"); - static std::string https("https://"); - - if (url->compare(0, http.size(), http) == 0) - *protocol = http; - - if (url->compare(0, https.size(), https) == 0) - *protocol = https; - - if (!protocol->empty()) { - size_t pos = url->find_first_of(":", protocol->size()); +URL::URL(const std::string& url_input) { + const auto protocol = parseProtocol(url_input); + if (protocol) { + protocol_ = *protocol; + } else { + logger_->log_error("Unknown protocol in URL '%s'", url_input); + return; + } - if (pos == std::string::npos) { - pos = url->size(); - } + std::string::const_iterator current_pos = url_input.begin(); + std::advance(current_pos, protocol_.size()); - *host = url->substr(protocol->size(), pos - protocol->size()); - - if (pos < url->size() && (*url)[pos] == ':') { - size_t ppos = url->find_first_of("/", pos); - if (ppos == std::string::npos) { - ppos = url->size(); - } - std::string portStr(url->substr(pos + 1, ppos - pos - 1)); - if (portStr.size() > 0) { - *port = std::stoi(portStr); - } + constexpr const char HOST_TERMINATORS[] = ":/?#"; + std::string::const_iterator end_of_host = std::find_first_of(current_pos, url_input.end(), std::begin(HOST_TERMINATORS), std::end(HOST_TERMINATORS)); + host_ = std::string{current_pos, end_of_host}; + if (host_.empty()) { + logger_->log_error("No host found in URL '%s'", url_input); + return; + } + current_pos = end_of_host; + + if (current_pos != url_input.end() && *current_pos == ':') { + constexpr const char PORT_TERMINATORS[] = "/?#"; + ++current_pos; + std::string::const_iterator end_of_port = std::find_first_of(current_pos, url_input.end(), std::begin(PORT_TERMINATORS), std::end(PORT_TERMINATORS)); + const auto port_number = parsePortNumber(std::string{current_pos, end_of_port}); + if (port_number) { + port_ = *port_number; } else { - // In case the host contains no port, the first part is needed only - // For eg.: nifi.io/nifi - size_t ppos = host->find_first_of("/"); - if (ppos != std::string::npos) { - *host = host->substr(0, ppos); - } + logger_->log_error("Could not parse the port number in URL '%s'", url_input); + return; } + current_pos = end_of_port; + } + + if (current_pos != url_input.end()) { + path_ = std::string{current_pos, url_input.end()}; } -} -void parse_url(const std::string *url, std::string *host, int *port, std::string *protocol, std::string *path, std::string *query) { - int temp_port = -1; + is_valid_ = true; +} - parse_url(url, host, &temp_port, protocol); +int URL::port() const { + if (port_) { + return *port_; + } else if (protocol_ == HTTP) { + return 80; + } else if (protocol_ == HTTPS) { + return 443; + } else { + throw std::logic_error{"Undefined port in URL: " + toString()}; + } +} - if (host->empty() || protocol->empty()) { - return; +std::string URL::hostPort() const { + if (!isValid()) { + return "INVALID"; } - size_t base_len = host->size() + protocol->size(); - if (temp_port != -1) { - *port = temp_port; - base_len += std::to_string(temp_port).size() + 1; // +1 for the : + if (port_) { + return protocol_ + host_ + ':' + std::to_string(*port_); + } else { + return protocol_ + host_; } +} - auto query_loc = url->find_first_of("?", base_len); +std::string URL::toString() const { + if (!isValid()) { + return "INVALID"; + } - if (query_loc < url->size()) { - *path = url->substr(base_len + 1, query_loc - base_len - 1); - *query = url->substr(query_loc + 1, url->size() - query_loc - 1); + if (path_) { + return hostPort() + *path_; } else { - *path = url->substr(base_len + 1, url->size() - base_len - 1); + return hostPort(); } } -} /* namespace utils */ -} /* namespace minifi */ -} /* namespace nifi */ -} /* namespace apache */ -} /* namespace org */ +} // namespace utils +} // namespace minifi +} // namespace nifi +} // namespace apache +} // namespace org diff --git a/libminifi/test/unit/HTTPUtilTests.cpp b/libminifi/test/unit/HTTPUtilTests.cpp index e3b7c45..dfb04f4 100644 --- a/libminifi/test/unit/HTTPUtilTests.cpp +++ b/libminifi/test/unit/HTTPUtilTests.cpp @@ -17,51 +17,74 @@ */ #include <string> -#include <iostream> #include "../TestBase.h" #include "utils/HTTPClient.h" -TEST_CASE("TestHTTPUtils::simple", "[test parse no port]") { - std::string protocol, host; - int port = -1; - std::string url = "http://nifi.io/nifi"; - minifi::utils::parse_url(&url, &host, &port, &protocol); - REQUIRE(port == -1); - REQUIRE(host == "nifi.io"); - REQUIRE(protocol == "http://"); +TEST_CASE("The URL class can parse various URL strings", "[URL][parsing]") { + const auto canParseURL = [](const std::string& url_string) { return utils::URL{url_string}.toString() == url_string; }; + + REQUIRE(canParseURL("http://nifi.io")); + REQUIRE(canParseURL("http://nifi.io:777")); + REQUIRE(canParseURL("http://nifi.io/nifi")); + REQUIRE(canParseURL("https://nifi.somewhere.far.away:321/nifi")); + REQUIRE(canParseURL("http://nifi.io?what_is_love")); + REQUIRE(canParseURL("http://nifi.io:123?what_is_love")); + REQUIRE(canParseURL("https://nifi.io/nifi_path?what_is_love")); + REQUIRE(canParseURL("http://nifi.io:4321/nifi_path?what_is_love")); + REQUIRE(canParseURL("http://nifi.io#anchors_aweigh")); + REQUIRE(canParseURL("https://nifi.io:123#anchors_aweigh")); + REQUIRE(canParseURL("http://nifi.io/nifi_path#anchors_aweigh")); + REQUIRE(canParseURL("https://nifi.io:4321/nifi_path#anchors_aweigh")); } -TEST_CASE("TestHTTPUtils::urlwithport", "[test parse with port]") { - std::string protocol, host; - int port = -1; - std::string url = "https://nifi.somewhere.far.away:321/nifi"; - minifi::utils::parse_url(&url, &host, &port, &protocol); - REQUIRE(port == 321); - REQUIRE(host == "nifi.somewhere.far.away"); - REQUIRE(protocol == "https://"); +TEST_CASE("The URL class will fail to parse invalid URL strings", "[URL][parsing]") { + const auto failToParseURL = [](const std::string& url_string) { return utils::URL{url_string}.toString() == "INVALID"; }; + + REQUIRE(failToParseURL("mailto:[email protected]")); + REQUIRE(failToParseURL("http:nifi.io")); + REQUIRE(failToParseURL("http://")); + REQUIRE(failToParseURL("http://:123")); + REQUIRE(failToParseURL("http://nifi.io:0x50")); + REQUIRE(failToParseURL("http://nifi.io:port_number")); } -TEST_CASE("TestHTTPUtils::query", "[test parse query without port]") { - std::string protocol, host, path, query; - int port = -1; - std::string url = "https://nifi.io/nifi/path?what"; - minifi::utils::parse_url(&url, &host, &port, &protocol, &path, &query); - REQUIRE(port == -1); - REQUIRE(host == "nifi.io"); - REQUIRE(protocol == "https://"); - REQUIRE(path == "nifi/path"); - REQUIRE(query == "what"); +TEST_CASE("The URL class can extract the port from URL strings", "[URL][port]") { + REQUIRE(utils::URL{"http://nifi.io"}.port() == 80); + REQUIRE(utils::URL{"http://nifi.io/nifi"}.port() == 80); + REQUIRE(utils::URL{"https://nifi.io"}.port() == 443); + REQUIRE(utils::URL{"https://nifi.io/nifi"}.port() == 443); + REQUIRE(utils::URL{"http://nifi.io:123"}.port() == 123); + REQUIRE(utils::URL{"http://nifi.io:123/nifi"}.port() == 123); + REQUIRE(utils::URL{"https://nifi.io:456"}.port() == 456); + REQUIRE(utils::URL{"https://nifi.io:456/nifi"}.port() == 456); } -TEST_CASE("TestHTTPUtils::querywithport", "[test parse query with port]") { - std::string protocol, host, path, query; - int port = -1; - std::string url = "http://nifi.io:4321/nifi_path?what_is_love"; - minifi::utils::parse_url(&url, &host, &port, &protocol, &path, &query); - REQUIRE(port == 4321); - REQUIRE(host == "nifi.io"); - REQUIRE(protocol == "http://"); - REQUIRE(path == "nifi_path"); - REQUIRE(query == "what_is_love"); +TEST_CASE("The URL class can extract the host", "[URL][host]") { + REQUIRE(utils::URL{"http://nifi.io"}.host() == "nifi.io"); + REQUIRE(utils::URL{"http://nifi.io:777"}.host() == "nifi.io"); + REQUIRE(utils::URL{"http://nifi.io/nifi"}.host() == "nifi.io"); + REQUIRE(utils::URL{"https://nifi.somewhere.far.away:321/nifi"}.host() == "nifi.somewhere.far.away"); + REQUIRE(utils::URL{"http://nifi.io?what_is_love"}.host() == "nifi.io"); + REQUIRE(utils::URL{"http://nifi.io:123?what_is_love"}.host() == "nifi.io"); + REQUIRE(utils::URL{"https://nifi.io/nifi_path?what_is_love"}.host() == "nifi.io"); + REQUIRE(utils::URL{"http://nifi.io:4321/nifi_path?what_is_love"}.host() == "nifi.io"); + REQUIRE(utils::URL{"http://nifi.io#anchors_aweigh"}.host() == "nifi.io"); + REQUIRE(utils::URL{"https://nifi.io:123#anchors_aweigh"}.host() == "nifi.io"); + REQUIRE(utils::URL{"http://nifi.io/nifi_path#anchors_aweigh"}.host() == "nifi.io"); + REQUIRE(utils::URL{"https://nifi.io:4321/nifi_path#anchors_aweigh"}.host() == "nifi.io"); } +TEST_CASE("The URL class can extract the host and port", "[URL][hostPort]") { + REQUIRE(utils::URL{"http://nifi.io"}.hostPort() == "http://nifi.io"); + REQUIRE(utils::URL{"http://nifi.io:777"}.hostPort() == "http://nifi.io:777"); + REQUIRE(utils::URL{"http://nifi.io/nifi"}.hostPort() == "http://nifi.io"); + REQUIRE(utils::URL{"https://nifi.somewhere.far.away:321/nifi"}.hostPort() == "https://nifi.somewhere.far.away:321"); + REQUIRE(utils::URL{"http://nifi.io?what_is_love"}.hostPort() == "http://nifi.io"); + REQUIRE(utils::URL{"http://nifi.io:123?what_is_love"}.hostPort() == "http://nifi.io:123"); + REQUIRE(utils::URL{"https://nifi.io/nifi_path?what_is_love"}.hostPort() == "https://nifi.io"); + REQUIRE(utils::URL{"http://nifi.io:4321/nifi_path?what_is_love"}.hostPort() == "http://nifi.io:4321"); + REQUIRE(utils::URL{"http://nifi.io#anchors_aweigh"}.hostPort() == "http://nifi.io"); + REQUIRE(utils::URL{"https://nifi.io:123#anchors_aweigh"}.hostPort() == "https://nifi.io:123"); + REQUIRE(utils::URL{"http://nifi.io/nifi_path#anchors_aweigh"}.hostPort() == "http://nifi.io"); + REQUIRE(utils::URL{"https://nifi.io:4321/nifi_path#anchors_aweigh"}.hostPort() == "https://nifi.io:4321"); +}
