This is an automated email from the ASF dual-hosted git repository. szaszm pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/nifi-minifi-cpp.git
commit 47f3324df607c457b0a47b5f85e7de99efaf8f0f Author: Marton Szasz <[email protected]> AuthorDate: Fri Mar 28 15:44:35 2025 +0100 MINIFICPP-2550 invokehttp: retry with attributes in headers includes … …CRLF, ending HTTP header list. This change removes ASCII control characters (<32, space is allowed), and it trims the status message before writing it into flow file attributes, so CRLF doesn't end up in a flow file attribute. Closes #1954 Signed-off-by: Marton Szasz <[email protected]> --- .../standard-processors/processors/InvokeHTTP.cpp | 10 +++--- .../tests/integration/InvokeHTTPTests.cpp | 37 ++++++++++++++++++++++ utils/include/http/HTTPClient.h | 1 + utils/src/http/HTTPClient.cpp | 6 ++++ 4 files changed, 49 insertions(+), 5 deletions(-) diff --git a/extensions/standard-processors/processors/InvokeHTTP.cpp b/extensions/standard-processors/processors/InvokeHTTP.cpp index 19f0f4361..810568a5d 100644 --- a/extensions/standard-processors/processors/InvokeHTTP.cpp +++ b/extensions/standard-processors/processors/InvokeHTTP.cpp @@ -208,16 +208,16 @@ bool InvokeHTTP::appendHeaders(const core::FlowFile& flow_file, /*std::invocable switch (invalid_http_header_field_handling_strategy_) { case invoke_http::InvalidHTTPHeaderFieldHandlingOption::fail: if (ranges::any_of(matching_attributes, std::not_fn(&http::HTTPClient::isValidHttpHeaderField), key_fn)) return false; - for (const auto& header: matching_attributes) append_header(header.first, header.second); + for (const auto& header: matching_attributes) append_header(header.first, http::HTTPClient::removeInvalidCharactersFromHttpHeaderFieldBody(header.second)); return true; case invoke_http::InvalidHTTPHeaderFieldHandlingOption::drop: for (const auto& header: matching_attributes | ranges::views::filter(&http::HTTPClient::isValidHttpHeaderField, key_fn)) { - append_header(header.first, header.second); + append_header(header.first, http::HTTPClient::removeInvalidCharactersFromHttpHeaderFieldBody(header.second)); } return true; case invoke_http::InvalidHTTPHeaderFieldHandlingOption::transform: for (const auto& header: matching_attributes) { - append_header(http::HTTPClient::replaceInvalidCharactersInHttpHeaderFieldName(header.first), header.second); + append_header(http::HTTPClient::replaceInvalidCharactersInHttpHeaderFieldName(header.first), http::HTTPClient::removeInvalidCharactersFromHttpHeaderFieldBody(header.second)); } return true; } @@ -312,7 +312,7 @@ void InvokeHTTP::onTriggerWithClient(core::ProcessContext& context, core::Proces int64_t http_code = client.getResponseCode(); const char* content_type = client.getContentType(); flow_file->addAttribute(STATUS_CODE, std::to_string(http_code)); - if (!response_headers.empty()) { flow_file->addAttribute(STATUS_MESSAGE, response_headers.at(0)); } + if (!response_headers.empty()) { flow_file->addAttribute(STATUS_MESSAGE, utils::string::trim(response_headers.at(0))); } flow_file->addAttribute(REQUEST_URL, client.getURL()); flow_file->addAttribute(TRANSACTION_ID, transaction_id); @@ -329,7 +329,7 @@ void InvokeHTTP::onTriggerWithClient(core::ProcessContext& context, core::Proces // as per RFC 2046 -- 4.5.1 response_flow->addAttribute(core::SpecialFlowAttribute::MIME_TYPE, content_type ? std::string(content_type) : DefaultContentType); response_flow->addAttribute(STATUS_CODE, std::to_string(http_code)); - if (!response_headers.empty()) { response_flow->addAttribute(STATUS_MESSAGE, response_headers.at(0)); } + if (!response_headers.empty()) { response_flow->addAttribute(STATUS_MESSAGE, utils::string::trim(response_headers.at(0))); } response_flow->addAttribute(REQUEST_URL, client.getURL()); response_flow->addAttribute(TRANSACTION_ID, transaction_id); io::BufferStream stream(gsl::make_span(response_body).as_span<const std::byte>()); diff --git a/extensions/standard-processors/tests/integration/InvokeHTTPTests.cpp b/extensions/standard-processors/tests/integration/InvokeHTTPTests.cpp index b0872b8a1..e26956b9e 100644 --- a/extensions/standard-processors/tests/integration/InvokeHTTPTests.cpp +++ b/extensions/standard-processors/tests/integration/InvokeHTTPTests.cpp @@ -49,6 +49,7 @@ class TestHTTPServer { log_attribute_ = dynamic_cast<processors::LogAttribute*>(test_plan_->addProcessor("LogAttribute", "LogAttribute", core::Relationship("success", "description"), true)); REQUIRE(listen_http_); REQUIRE(log_attribute_); + log_attribute_->setProperty(processors::LogAttribute::LogPayload, "true"); test_plan_->setProperty(listen_http_, org::apache::nifi::minifi::processors::ListenHTTP::BasePath, "testytesttest"); test_plan_->setProperty(listen_http_, org::apache::nifi::minifi::processors::ListenHTTP::Port, "8681"); test_plan_->setProperty(listen_http_, org::apache::nifi::minifi::processors::ListenHTTP::HeadersAsAttributesRegex, ".*"); @@ -347,4 +348,40 @@ TEST_CASE("Test Keepalive", "[InvokeHTTP]") { CHECK(1 == connection_counting_server.getConnectionCounter()); } +TEST_CASE("InvokeHTTP: invalid characters are removed from outgoing HTTP headers", "[InvokeHTTP][http][attribute][header][sanitize]") { + using processors::InvokeHTTP; + constexpr std::string_view test_content = "flow file content"; + std::string_view test_attr_value_in; + std::string_view test_attr_value_out; + SECTION("HTTP status message: CR and LF removed") { + test_attr_value_in = "400 Bad Request\r\n"; + test_attr_value_out = "400 Bad Request"; + }; + SECTION("UTF-8 case 1: accented characters are kept") { + test_attr_value_in = "árvíztűrő tükörfúrógép"; + test_attr_value_out = test_attr_value_in; + }; + SECTION("UTF-8 case 2: chinese characters are kept") { + test_attr_value_in = "你知道吗?最近我开始注重健康饮了"; + test_attr_value_out = test_attr_value_in; + }; + + SingleProcessorTestController controller{std::make_unique<InvokeHTTP>("InvokeHTTP")}; + const TestHTTPServer http_server(controller); + auto* const invoke_http = controller.getProcessor<InvokeHTTP>(); + invoke_http->setProperty(InvokeHTTP::Method, "POST"); + invoke_http->setProperty(InvokeHTTP::URL, http_server.URL); + invoke_http->setProperty(InvokeHTTP::AttributesToSend, ".*"); + const auto result = controller.trigger(InputFlowFileData{.content = test_content, .attributes = { + {std::string{InvokeHTTP::STATUS_MESSAGE}, std::string{test_attr_value_in}}, + }}); + CHECK(result.at(InvokeHTTP::RelFailure).empty()); + CHECK(result.at(InvokeHTTP::RelNoRetry).empty()); + CHECK(result.at(InvokeHTTP::RelRetry).empty()); + CHECK(!result.at(InvokeHTTP::Success).empty()); + CHECK(!result.at(InvokeHTTP::RelResponse).empty()); + CHECK(utils::verifyLogLinePresenceInPollTime(1s, fmt::format("key:{} value:{}", InvokeHTTP::STATUS_MESSAGE, test_attr_value_out))); + CHECK(utils::verifyLogLinePresenceInPollTime(1s, fmt::format("Payload:\n{}\n----", test_content))); +} + } // namespace org::apache::nifi::minifi::test diff --git a/utils/include/http/HTTPClient.h b/utils/include/http/HTTPClient.h index 0cd92dfcb..e6dc56d61 100644 --- a/utils/include/http/HTTPClient.h +++ b/utils/include/http/HTTPClient.h @@ -194,6 +194,7 @@ class HTTPClient : public BaseHTTPClient, public core::ConnectableImpl { static bool isValidHttpHeaderField(std::string_view field_name); static std::string replaceInvalidCharactersInHttpHeaderFieldName(std::string field_name); + static std::string removeInvalidCharactersFromHttpHeaderFieldBody(std::string field_body); private: static int onProgress(void *client, curl_off_t dltotal, curl_off_t dlnow, curl_off_t ultotal, curl_off_t ulnow); diff --git a/utils/src/http/HTTPClient.cpp b/utils/src/http/HTTPClient.cpp index 035284ca1..29691134a 100644 --- a/utils/src/http/HTTPClient.cpp +++ b/utils/src/http/HTTPClient.cpp @@ -522,6 +522,12 @@ std::string HTTPClient::replaceInvalidCharactersInHttpHeaderFieldName(std::strin return field_name; } +std::string HTTPClient::removeInvalidCharactersFromHttpHeaderFieldBody(std::string field_body) { + // removing invalid/control characters, to avoid making a mess in the text-based protocol + std::erase_if(field_body, [](const char ch) { return static_cast<unsigned char>(ch) < 32; }); + return field_body; +} + void HTTPClient::CurlEasyCleanup::operator()(CURL* curl) const { curl_easy_cleanup(curl); }
