This is an automated email from the ASF dual-hosted git repository.

szaszm pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/nifi-minifi-cpp.git

commit 47f3324df607c457b0a47b5f85e7de99efaf8f0f
Author: Marton Szasz <[email protected]>
AuthorDate: Fri Mar 28 15:44:35 2025 +0100

    MINIFICPP-2550 invokehttp: retry with attributes in headers includes …
    
    …CRLF, ending HTTP header list.
    
    This change removes ASCII control characters (<32, space is allowed),
    and it trims the status message before writing it into flow file
    attributes, so CRLF doesn't end up in a flow file attribute.
    
    Closes #1954
    
    Signed-off-by: Marton Szasz <[email protected]>
---
 .../standard-processors/processors/InvokeHTTP.cpp  | 10 +++---
 .../tests/integration/InvokeHTTPTests.cpp          | 37 ++++++++++++++++++++++
 utils/include/http/HTTPClient.h                    |  1 +
 utils/src/http/HTTPClient.cpp                      |  6 ++++
 4 files changed, 49 insertions(+), 5 deletions(-)

diff --git a/extensions/standard-processors/processors/InvokeHTTP.cpp 
b/extensions/standard-processors/processors/InvokeHTTP.cpp
index 19f0f4361..810568a5d 100644
--- a/extensions/standard-processors/processors/InvokeHTTP.cpp
+++ b/extensions/standard-processors/processors/InvokeHTTP.cpp
@@ -208,16 +208,16 @@ bool InvokeHTTP::appendHeaders(const core::FlowFile& 
flow_file, /*std::invocable
   switch (invalid_http_header_field_handling_strategy_) {
     case invoke_http::InvalidHTTPHeaderFieldHandlingOption::fail:
       if (ranges::any_of(matching_attributes, 
std::not_fn(&http::HTTPClient::isValidHttpHeaderField), key_fn)) return false;
-      for (const auto& header: matching_attributes) 
append_header(header.first, header.second);
+      for (const auto& header: matching_attributes) 
append_header(header.first, 
http::HTTPClient::removeInvalidCharactersFromHttpHeaderFieldBody(header.second));
       return true;
     case invoke_http::InvalidHTTPHeaderFieldHandlingOption::drop:
       for (const auto& header: matching_attributes | 
ranges::views::filter(&http::HTTPClient::isValidHttpHeaderField, key_fn)) {
-        append_header(header.first, header.second);
+        append_header(header.first, 
http::HTTPClient::removeInvalidCharactersFromHttpHeaderFieldBody(header.second));
       }
       return true;
     case invoke_http::InvalidHTTPHeaderFieldHandlingOption::transform:
       for (const auto& header: matching_attributes) {
-        
append_header(http::HTTPClient::replaceInvalidCharactersInHttpHeaderFieldName(header.first),
 header.second);
+        
append_header(http::HTTPClient::replaceInvalidCharactersInHttpHeaderFieldName(header.first),
 
http::HTTPClient::removeInvalidCharactersFromHttpHeaderFieldBody(header.second));
       }
       return true;
   }
@@ -312,7 +312,7 @@ void InvokeHTTP::onTriggerWithClient(core::ProcessContext& 
context, core::Proces
     int64_t http_code = client.getResponseCode();
     const char* content_type = client.getContentType();
     flow_file->addAttribute(STATUS_CODE, std::to_string(http_code));
-    if (!response_headers.empty()) { flow_file->addAttribute(STATUS_MESSAGE, 
response_headers.at(0)); }
+    if (!response_headers.empty()) { flow_file->addAttribute(STATUS_MESSAGE, 
utils::string::trim(response_headers.at(0))); }
     flow_file->addAttribute(REQUEST_URL, client.getURL());
     flow_file->addAttribute(TRANSACTION_ID, transaction_id);
 
@@ -329,7 +329,7 @@ void InvokeHTTP::onTriggerWithClient(core::ProcessContext& 
context, core::Proces
         // as per RFC 2046 -- 4.5.1
         response_flow->addAttribute(core::SpecialFlowAttribute::MIME_TYPE, 
content_type ? std::string(content_type) : DefaultContentType);
         response_flow->addAttribute(STATUS_CODE, std::to_string(http_code));
-        if (!response_headers.empty()) { 
response_flow->addAttribute(STATUS_MESSAGE, response_headers.at(0)); }
+        if (!response_headers.empty()) { 
response_flow->addAttribute(STATUS_MESSAGE, 
utils::string::trim(response_headers.at(0))); }
         response_flow->addAttribute(REQUEST_URL, client.getURL());
         response_flow->addAttribute(TRANSACTION_ID, transaction_id);
         io::BufferStream stream(gsl::make_span(response_body).as_span<const 
std::byte>());
diff --git 
a/extensions/standard-processors/tests/integration/InvokeHTTPTests.cpp 
b/extensions/standard-processors/tests/integration/InvokeHTTPTests.cpp
index b0872b8a1..e26956b9e 100644
--- a/extensions/standard-processors/tests/integration/InvokeHTTPTests.cpp
+++ b/extensions/standard-processors/tests/integration/InvokeHTTPTests.cpp
@@ -49,6 +49,7 @@ class TestHTTPServer {
     log_attribute_ = 
dynamic_cast<processors::LogAttribute*>(test_plan_->addProcessor("LogAttribute",
 "LogAttribute", core::Relationship("success", "description"), true));
     REQUIRE(listen_http_);
     REQUIRE(log_attribute_);
+    log_attribute_->setProperty(processors::LogAttribute::LogPayload, "true");
     test_plan_->setProperty(listen_http_, 
org::apache::nifi::minifi::processors::ListenHTTP::BasePath, "testytesttest");
     test_plan_->setProperty(listen_http_, 
org::apache::nifi::minifi::processors::ListenHTTP::Port, "8681");
     test_plan_->setProperty(listen_http_, 
org::apache::nifi::minifi::processors::ListenHTTP::HeadersAsAttributesRegex, 
".*");
@@ -347,4 +348,40 @@ TEST_CASE("Test Keepalive", "[InvokeHTTP]") {
   CHECK(1 == connection_counting_server.getConnectionCounter());
 }
 
+TEST_CASE("InvokeHTTP: invalid characters are removed from outgoing HTTP 
headers", "[InvokeHTTP][http][attribute][header][sanitize]") {
+  using processors::InvokeHTTP;
+  constexpr std::string_view test_content = "flow file content";
+  std::string_view test_attr_value_in;
+  std::string_view test_attr_value_out;
+  SECTION("HTTP status message: CR and LF removed") {
+    test_attr_value_in = "400 Bad Request\r\n";
+    test_attr_value_out = "400 Bad Request";
+  };
+  SECTION("UTF-8 case 1: accented characters are kept") {
+    test_attr_value_in = "árvíztűrő tükörfúrógép";
+    test_attr_value_out = test_attr_value_in;
+  };
+  SECTION("UTF-8 case 2: chinese characters are kept") {
+    test_attr_value_in = "你知道吗?最近我开始注重健康饮了";
+    test_attr_value_out = test_attr_value_in;
+  };
+
+  SingleProcessorTestController 
controller{std::make_unique<InvokeHTTP>("InvokeHTTP")};
+  const TestHTTPServer http_server(controller);
+  auto* const invoke_http = controller.getProcessor<InvokeHTTP>();
+  invoke_http->setProperty(InvokeHTTP::Method, "POST");
+  invoke_http->setProperty(InvokeHTTP::URL, http_server.URL);
+  invoke_http->setProperty(InvokeHTTP::AttributesToSend, ".*");
+  const auto result = controller.trigger(InputFlowFileData{.content = 
test_content, .attributes = {
+    {std::string{InvokeHTTP::STATUS_MESSAGE}, std::string{test_attr_value_in}},
+  }});
+  CHECK(result.at(InvokeHTTP::RelFailure).empty());
+  CHECK(result.at(InvokeHTTP::RelNoRetry).empty());
+  CHECK(result.at(InvokeHTTP::RelRetry).empty());
+  CHECK(!result.at(InvokeHTTP::Success).empty());
+  CHECK(!result.at(InvokeHTTP::RelResponse).empty());
+  CHECK(utils::verifyLogLinePresenceInPollTime(1s, fmt::format("key:{} 
value:{}", InvokeHTTP::STATUS_MESSAGE, test_attr_value_out)));
+  CHECK(utils::verifyLogLinePresenceInPollTime(1s, 
fmt::format("Payload:\n{}\n----", test_content)));
+}
+
 }  // namespace org::apache::nifi::minifi::test
diff --git a/utils/include/http/HTTPClient.h b/utils/include/http/HTTPClient.h
index 0cd92dfcb..e6dc56d61 100644
--- a/utils/include/http/HTTPClient.h
+++ b/utils/include/http/HTTPClient.h
@@ -194,6 +194,7 @@ class HTTPClient : public BaseHTTPClient, public 
core::ConnectableImpl {
 
   static bool isValidHttpHeaderField(std::string_view field_name);
   static std::string replaceInvalidCharactersInHttpHeaderFieldName(std::string 
field_name);
+  static std::string 
removeInvalidCharactersFromHttpHeaderFieldBody(std::string field_body);
 
  private:
   static int onProgress(void *client, curl_off_t dltotal, curl_off_t dlnow, 
curl_off_t ultotal, curl_off_t ulnow);
diff --git a/utils/src/http/HTTPClient.cpp b/utils/src/http/HTTPClient.cpp
index 035284ca1..29691134a 100644
--- a/utils/src/http/HTTPClient.cpp
+++ b/utils/src/http/HTTPClient.cpp
@@ -522,6 +522,12 @@ std::string 
HTTPClient::replaceInvalidCharactersInHttpHeaderFieldName(std::strin
   return field_name;
 }
 
+std::string 
HTTPClient::removeInvalidCharactersFromHttpHeaderFieldBody(std::string 
field_body) {
+  // removing invalid/control characters, to avoid making a mess in the 
text-based protocol
+  std::erase_if(field_body, [](const char ch) { return static_cast<unsigned 
char>(ch) < 32; });
+  return field_body;
+}
+
 void HTTPClient::CurlEasyCleanup::operator()(CURL* curl) const {
   curl_easy_cleanup(curl);
 }

Reply via email to