lordgamez commented on a change in pull request #1219:
URL: https://github.com/apache/nifi-minifi-cpp/pull/1219#discussion_r777534222
##########
File path: docker/test/integration/MiNiFi_integration_test_driver.py
##########
@@ -60,6 +60,12 @@ def start_kafka_broker(self):
self.cluster.deploy('kafka-broker')
assert self.wait_for_container_startup_to_finish('kafka-broker')
+ def start_splunk(self):
+ self.cluster.acquire_container('splunk', 'splunk')
+ self.cluster.deploy('splunk')
+ assert self.wait_for_container_startup_to_finish('splunk')
+ assert self.cluster.enable_hec_indexer('splunk', 'splunk_hec_token')
Review comment:
From the test's point of view is it necessary to start splunk before the
minifi process or is it only done separately for us to be able to enable the
hec indexer? In the latter case it could be possible to have the hec indexer
enabling be set as part of the entrypoint of the container (like a single
command starting splunk then the hec indexer, or creating a starter script)
then it wouldn't be necessary to have this container started separately from
all the other cluster containers.
##########
File path: extensions/splunk/PutSplunkHTTP.cpp
##########
@@ -0,0 +1,179 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+#include "PutSplunkHTTP.h"
+
+#include <vector>
+#include <utility>
+
+#include "SplunkAttributes.h"
+
+#include "core/Resource.h"
+#include "utils/StringUtils.h"
+#include "client/HTTPClient.h"
+#include "utils/HTTPClient.h"
+#include "utils/TimeUtil.h"
+
+#include "rapidjson/document.h"
+
+
+namespace org::apache::nifi::minifi::extensions::splunk {
+
+const core::Property
PutSplunkHTTP::Source(core::PropertyBuilder::createProperty("Source")
+ ->withDescription("Basic field describing the source of the event. If
unspecified, the event will use the default defined in splunk.")
+ ->supportsExpressionLanguage(true)->build());
+
+const core::Property
PutSplunkHTTP::SourceType(core::PropertyBuilder::createProperty("Source Type")
+ ->withDescription("Basic field describing the source type of the event. If
unspecified, the event will use the default defined in splunk.")
+ ->supportsExpressionLanguage(true)->build());
+
+const core::Property
PutSplunkHTTP::Host(core::PropertyBuilder::createProperty("Host")
+ ->withDescription("Basic field describing the host of the event. If
unspecified, the event will use the default defined in splunk.")
+ ->supportsExpressionLanguage(true)->build());
+
+const core::Property
PutSplunkHTTP::Index(core::PropertyBuilder::createProperty("Index")
+ ->withDescription("Identifies the index where to send the event. If
unspecified, the event will use the default defined in splunk.")
+ ->supportsExpressionLanguage(true)->build());
+
+const core::Property
PutSplunkHTTP::ContentType(core::PropertyBuilder::createProperty("Content Type")
+ ->withDescription("The media type of the event sent to Splunk. If not set,
\"mime.type\" flow file attribute will be used. "
+ "In case of neither of them is specified, this
information will not be sent to the server.")
+ ->supportsExpressionLanguage(true)->build());
+
+
+const core::Relationship PutSplunkHTTP::Success("success", "FlowFiles that are
sent successfully to the destination are sent to this relationship.");
+const core::Relationship PutSplunkHTTP::Failure("failure", "FlowFiles that
failed to send to the destination are sent to this relationship.");
+
+void PutSplunkHTTP::initialize() {
+ setSupportedRelationships({Success, Failure});
+ setSupportedProperties({Hostname, Port, Token, SplunkRequestChannel, Source,
SourceType, Host, Index, ContentType});
+}
+
+void PutSplunkHTTP::onSchedule(const std::shared_ptr<core::ProcessContext>&
context, const std::shared_ptr<core::ProcessSessionFactory>& sessionFactory) {
+ SplunkHECProcessor::onSchedule(context, sessionFactory);
+}
+
+
+namespace {
+std::optional<std::string> getContentType(core::ProcessContext& context, const
core::FlowFile& flow_file) {
+ std::optional<std::string> content_type =
context.getProperty(PutSplunkHTTP::ContentType);
+ if (content_type.has_value())
+ return content_type;
+ return flow_file.getAttribute("mime.key");
+}
+
+
+std::string getEndpoint(core::ProcessContext& context, const
gsl::not_null<std::shared_ptr<core::FlowFile>>& flow_file) {
+ std::stringstream endpoint;
+ endpoint << "/services/collector/raw";
+ std::vector<std::string> parameters;
+ std::string prop_value;
+ if (context.getProperty(PutSplunkHTTP::SourceType, prop_value, flow_file)) {
+ parameters.push_back("sourcetype=" + prop_value);
+ }
+ if (context.getProperty(PutSplunkHTTP::Source, prop_value, flow_file)) {
+ parameters.push_back("source=" + prop_value);
+ }
+ if (context.getProperty(PutSplunkHTTP::Host, prop_value, flow_file)) {
+ parameters.push_back("host=" + prop_value);
+ }
+ if (context.getProperty(PutSplunkHTTP::Index, prop_value, flow_file)) {
+ parameters.push_back("index=" + prop_value);
+ }
+ if (!parameters.empty()) {
+ endpoint << "?" << utils::StringUtils::join("&", parameters);
+ }
+ return endpoint.str();
+}
+
+bool addAttributesFromClientResponse(core::FlowFile& flow_file,
utils::HTTPClient& client) {
+ rapidjson::Document response_json;
+ rapidjson::ParseResult parse_result =
response_json.Parse<rapidjson::kParseStopWhenDoneFlag>(client.getResponseBody().data());
+ bool result = true;
+ if (parse_result.IsError())
+ return false;
+
+ if (response_json.HasMember("code") && response_json["code"].IsInt())
+ flow_file.addAttribute(SPLUNK_RESPONSE_CODE,
std::to_string(response_json["code"].GetInt()));
+ else
+ result = false;
+
+ if (response_json.HasMember("ackId") && response_json["ackId"].IsUint64())
+ flow_file.addAttribute(SPLUNK_ACK_ID,
std::to_string(response_json["ackId"].GetUint64()));
+ else
+ result = false;
+
+ return result;
+}
+
+bool enrichFlowFileWithAttributes(core::FlowFile& flow_file,
utils::HTTPClient& client) {
+ flow_file.addAttribute(SPLUNK_STATUS_CODE,
std::to_string(client.getResponseCode()));
+ flow_file.addAttribute(SPLUNK_RESPONSE_TIME,
std::to_string(utils::timeutils::getTimestamp<std::chrono::milliseconds>(std::chrono::system_clock::now())));
+
+ return addAttributesFromClientResponse(flow_file, client) &&
client.getResponseCode() == 200;
+}
+
+void setFlowFileAsPayload(core::ProcessSession& session,
+ core::ProcessContext& context,
+ utils::HTTPClient& client,
+ const
gsl::not_null<std::shared_ptr<core::FlowFile>>& flow_file,
+ utils::ByteInputCallBack&
payload_callback,
+ utils::HTTPUploadCallback&
payload_callback_obj) {
Review comment:
Indentation is a bit odd here
##########
File path: extensions/splunk/SplunkHECProcessor.h
##########
@@ -0,0 +1,65 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+#include <string>
+#include <memory>
+
+#include "core/Processor.h"
+
+
+namespace org::apache::nifi::minifi::utils {
+class HTTPClient;
+}
+
+namespace org::apache::nifi::minifi::extensions::splunk {
+
+class SplunkHECProcessor : public core::Processor {
+ public:
+ EXTENSIONAPI static const core::Property Hostname;
+ EXTENSIONAPI static const core::Property Port;
+ EXTENSIONAPI static const core::Property Token;
+ EXTENSIONAPI static const core::Property SplunkRequestChannel;
+ EXTENSIONAPI static const core::Property SSLContext;
+
+ explicit SplunkHECProcessor(const std::string& name, const
utils::Identifier& uuid = {})
+ : Processor(name, uuid) {
+ }
+ ~SplunkHECProcessor() override = default;
+
+ void initialize() override;
+ void onSchedule(const std::shared_ptr<core::ProcessContext> &context, const
std::shared_ptr<core::ProcessSessionFactory> &sessionFactory) override;
+
+ bool isSingleThreaded() const override {
Review comment:
What's the reason for not support multithreading for this processor?
##########
File path: docker/test/integration/features/splunk.feature
##########
@@ -0,0 +1,24 @@
+Feature: Sending data to Splunk HEC using PutSplunkHTTP
+
+ Background:
+ Given the content of "/tmp/output" is monitored
+
+ Scenario: A MiNiFi instance transfers data to a Splunk HEC
Review comment:
Would it be possible to set up an SSL connection test as well with this
splunk environment?
##########
File path: extensions/splunk/QuerySplunkIndexingStatus.cpp
##########
@@ -0,0 +1,193 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+#include "QuerySplunkIndexingStatus.h"
+
+#include <unordered_map>
+#include <utility>
+
+#include "SplunkAttributes.h"
+
+#include "core/Resource.h"
+#include "client/HTTPClient.h"
+#include "utils/HTTPClient.h"
+#include "utils/TimeUtil.h"
+
+#include "rapidjson/document.h"
+#include "rapidjson/stringbuffer.h"
+#include "rapidjson/writer.h"
+
+namespace org::apache::nifi::minifi::extensions::splunk {
+
+const core::Property
QuerySplunkIndexingStatus::MaximumWaitingTime(core::PropertyBuilder::createProperty("Maximum
Waiting Time")
+ ->withDescription("The maximum time the processor tries to acquire
acknowledgement confirmation for an index, from the point of registration. "
+ "After the given amount of time, the processor considers
the index as not acknowledged and transfers the FlowFile to the
\"unacknowledged\" relationship.")
+ ->withDefaultValue("1 hour")->isRequired(true)->build());
+
+const core::Property
QuerySplunkIndexingStatus::MaxQuerySize(core::PropertyBuilder::createProperty("Maximum
Query Size")
+ ->withDescription("The maximum number of acknowledgement identifiers the
outgoing query contains in one batch. "
+ "It is recommended not to set it too low in order to
reduce network communication.")
+ ->withDefaultValue("1000")->isRequired(true)->build());
+
+const core::Relationship
QuerySplunkIndexingStatus::Acknowledged("acknowledged",
+ "A FlowFile is transferred to this relationship when the acknowledgement
was successful.");
+
+const core::Relationship
QuerySplunkIndexingStatus::Unacknowledged("unacknowledged",
+ "A FlowFile is transferred to this relationship when the acknowledgement
was not successful. "
+ "This can happen when the acknowledgement did not happened within the time
period set for Maximum Waiting Time. "
+ "FlowFiles with acknowledgement id unknown for the Splunk server will be
transferred to this relationship after the Maximum Waiting Time is reached.");
+
+const core::Relationship
QuerySplunkIndexingStatus::Undetermined("undetermined",
+ "A FlowFile is transferred to this relationship when the acknowledgement
state is not determined. "
+ "FlowFiles transferred to this relationship might be penalized. "
+ "This happens when Splunk returns with HTTP 200 but with false response
for the acknowledgement id in the flow file attribute.");
+
+const core::Relationship QuerySplunkIndexingStatus::Failure("failure",
+ "A FlowFile is transferred to this relationship when the acknowledgement
was not successful due to errors during the communication, "
+ "or if the flowfile was missing the acknowledgement id");
+
+void QuerySplunkIndexingStatus::initialize() {
+ setSupportedRelationships({Acknowledged, Unacknowledged, Undetermined,
Failure});
+ setSupportedProperties({Hostname, Port, Token, SplunkRequestChannel,
MaximumWaitingTime, MaxQuerySize});
+}
+
+void QuerySplunkIndexingStatus::onSchedule(const
std::shared_ptr<core::ProcessContext>& context, const
std::shared_ptr<core::ProcessSessionFactory>& sessionFactory) {
+ gsl_Expects(context && sessionFactory);
+ SplunkHECProcessor::onSchedule(context, sessionFactory);
+ std::string max_wait_time_str;
+ if (context->getProperty(MaximumWaitingTime.getName(), max_wait_time_str)) {
+ core::TimeUnit unit;
+ uint64_t max_wait_time;
+ if (core::Property::StringToTime(max_wait_time_str, max_wait_time, unit)
&& core::Property::ConvertTimeUnitToMS(max_wait_time, unit, max_wait_time)) {
+ max_age_ = std::chrono::milliseconds(max_wait_time);
+ }
+ }
+
+ context->getProperty(MaxQuerySize.getName(), batch_size_);
+}
+
+namespace {
+constexpr std::string_view getEndpoint() {
+ return "/services/collector/ack";
+}
+
+struct FlowFileWithIndexStatus {
+ explicit
FlowFileWithIndexStatus(gsl::not_null<std::shared_ptr<core::FlowFile>>&&
flow_file) : flow_file_(std::move(flow_file)) {}
+
+ gsl::not_null<std::shared_ptr<core::FlowFile>> flow_file_;
+ std::optional<bool> indexing_status_ = std::nullopt;
+};
+
+std::unordered_map<uint64_t, FlowFileWithIndexStatus>
getUndeterminedFlowFiles(core::ProcessSession& session, size_t batch_size) {
+ std::unordered_map<uint64_t, FlowFileWithIndexStatus>
undetermined_flow_files;
+ for (size_t i = 0; i < batch_size; ++i) {
+ auto flow = session.get();
+ if (flow == nullptr)
+ break;
+ std::optional<std::string> splunk_ack_id_str =
flow->getAttribute(SPLUNK_ACK_ID);
+ if (!splunk_ack_id_str.has_value()) {
+ session.transfer(flow, QuerySplunkIndexingStatus::Failure);
+ continue;
+ }
+ uint64_t splunk_ack_id = std::stoull(splunk_ack_id_str.value());
+ undetermined_flow_files.emplace(std::make_pair(splunk_ack_id,
gsl::not_null(std::move(flow))));
+ }
+ return undetermined_flow_files;
+}
+
+std::string getAckIdsAsPayload(const std::unordered_map<uint64_t,
FlowFileWithIndexStatus>& undetermined_flow_files) {
+ rapidjson::Document payload = rapidjson::Document(rapidjson::kObjectType);
+ payload.AddMember("acks", rapidjson::kArrayType, payload.GetAllocator());
+ for (const auto& [ack_id, ff_status] : undetermined_flow_files) {
+ payload["acks"].PushBack(ack_id, payload.GetAllocator());
+ }
+ rapidjson::StringBuffer buffer;
+ rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
+ payload.Accept(writer);
+ return buffer.GetString();
+}
+
+void getIndexingStatusFromSplunk(utils::HTTPClient& client,
std::unordered_map<uint64_t, FlowFileWithIndexStatus>& undetermined_flow_files)
{
+ rapidjson::Document response;
+ if (client.submit()) {
Review comment:
I would go for an early here, I just don't like long ifs :)
##########
File path: extensions/splunk/tests/QuerySplunkIndexingStatusTests.cpp
##########
@@ -0,0 +1,123 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <chrono>
+
+#include "QuerySplunkIndexingStatus.h"
+#include "MockSplunkHEC.h"
+#include "SplunkAttributes.h"
+#include "TestBase.h"
+#include "processors/UpdateAttribute.h"
+#include "ReadFromFlowFileTestProcessor.h"
+#include "WriteToFlowFileTestProcessor.h"
+#include "utils/TimeUtil.h"
+
+using QuerySplunkIndexingStatus =
org::apache::nifi::minifi::extensions::splunk::QuerySplunkIndexingStatus;
+using ReadFromFlowFileTestProcessor =
org::apache::nifi::minifi::processors::ReadFromFlowFileTestProcessor;
+using WriteToFlowFileTestProcessor =
org::apache::nifi::minifi::processors::WriteToFlowFileTestProcessor;
+using UpdateAttribute = org::apache::nifi::minifi::processors::UpdateAttribute;
+using namespace std::chrono_literals; // NOLINT(build/namespaces)
Review comment:
Can be replaced with `using namespace std::literals::chrono_literals` to
remove the NOLINT marker
##########
File path: extensions/splunk/tests/QuerySplunkIndexingStatusTests.cpp
##########
@@ -0,0 +1,123 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <chrono>
+
+#include "QuerySplunkIndexingStatus.h"
+#include "MockSplunkHEC.h"
+#include "SplunkAttributes.h"
+#include "TestBase.h"
+#include "processors/UpdateAttribute.h"
+#include "ReadFromFlowFileTestProcessor.h"
+#include "WriteToFlowFileTestProcessor.h"
+#include "utils/TimeUtil.h"
+
+using QuerySplunkIndexingStatus =
org::apache::nifi::minifi::extensions::splunk::QuerySplunkIndexingStatus;
+using ReadFromFlowFileTestProcessor =
org::apache::nifi::minifi::processors::ReadFromFlowFileTestProcessor;
+using WriteToFlowFileTestProcessor =
org::apache::nifi::minifi::processors::WriteToFlowFileTestProcessor;
+using UpdateAttribute = org::apache::nifi::minifi::processors::UpdateAttribute;
+using namespace std::chrono_literals; // NOLINT(build/namespaces)
+
+TEST_CASE("QuerySplunkIndexingStatus tests", "[querysplunkindexingstatus]") {
+ MockSplunkHEC mock_splunk_hec("10132");
+
+ TestController test_controller;
+ auto plan = test_controller.createPlan();
+ auto write_to_flow_file =
std::dynamic_pointer_cast<WriteToFlowFileTestProcessor>(plan->addProcessor("WriteToFlowFileTestProcessor",
"write_to_flow_file"));
+ auto update_attribute =
std::dynamic_pointer_cast<UpdateAttribute>(plan->addProcessor("UpdateAttribute",
"update_attribute"));
+ auto query_splunk_indexing_status =
std::dynamic_pointer_cast<QuerySplunkIndexingStatus>(plan->addProcessor("QuerySplunkIndexingStatus",
"query_splunk_indexing_status"));
+ auto read_from_acknowledged =
std::dynamic_pointer_cast<ReadFromFlowFileTestProcessor>(plan->addProcessor("ReadFromFlowFileTestProcessor",
"read_from_acknowledged"));
+ auto read_from_undetermined =
std::dynamic_pointer_cast<ReadFromFlowFileTestProcessor>(plan->addProcessor("ReadFromFlowFileTestProcessor",
"read_from_undetermined"));
+ auto read_from_unacknowledged =
std::dynamic_pointer_cast<ReadFromFlowFileTestProcessor>(plan->addProcessor("ReadFromFlowFileTestProcessor",
"read_from_unacknowledged"));
+ auto read_from_failure =
std::dynamic_pointer_cast<ReadFromFlowFileTestProcessor>(plan->addProcessor("ReadFromFlowFileTestProcessor",
"read_from_failure"));
+
+ plan->addConnection(write_to_flow_file,
WriteToFlowFileTestProcessor::Success, update_attribute);
+ plan->addConnection(update_attribute, UpdateAttribute ::Success,
query_splunk_indexing_status);
+ plan->addConnection(query_splunk_indexing_status,
QuerySplunkIndexingStatus::Acknowledged, read_from_acknowledged);
+ plan->addConnection(query_splunk_indexing_status,
QuerySplunkIndexingStatus::Undetermined, read_from_undetermined);
+ plan->addConnection(query_splunk_indexing_status,
QuerySplunkIndexingStatus::Unacknowledged, read_from_unacknowledged);
+ plan->addConnection(query_splunk_indexing_status,
QuerySplunkIndexingStatus::Failure, read_from_failure);
+
+
read_from_acknowledged->setAutoTerminatedRelationships({ReadFromFlowFileTestProcessor::Success});
+
read_from_undetermined->setAutoTerminatedRelationships({ReadFromFlowFileTestProcessor::Success});
+
read_from_unacknowledged->setAutoTerminatedRelationships({ReadFromFlowFileTestProcessor::Success});
+
read_from_failure->setAutoTerminatedRelationships({ReadFromFlowFileTestProcessor::Success});
+
+ plan->setProperty(query_splunk_indexing_status,
QuerySplunkIndexingStatus::Hostname.getName(), "localhost");
+ plan->setProperty(query_splunk_indexing_status,
QuerySplunkIndexingStatus::Port.getName(), mock_splunk_hec.getPort());
+ plan->setProperty(query_splunk_indexing_status,
QuerySplunkIndexingStatus::Token.getName(), MockSplunkHEC::TOKEN);
+ plan->setProperty(query_splunk_indexing_status,
QuerySplunkIndexingStatus::SplunkRequestChannel.getName(),
"a12254b4-f481-435d-896d-3b6033eabe58");
+
+ auto response_timestamp =
std::to_string(utils::timeutils::getTimestamp<std::chrono::milliseconds>(std::chrono::system_clock::now()));
+ plan->setProperty(update_attribute,
org::apache::nifi::minifi::extensions::splunk::SPLUNK_RESPONSE_TIME,
response_timestamp, true);
+
+ write_to_flow_file->setContent("foobar");
+
+ SECTION("Querying indexed id") {
+ plan->setProperty(update_attribute,
org::apache::nifi::minifi::extensions::splunk::SPLUNK_ACK_ID,
std::to_string(MockSplunkHEC::indexed_events[0]), true);
+ test_controller.runSession(plan);
+ CHECK(read_from_failure->numberOfFlowFilesRead() == 0);
+ CHECK(read_from_undetermined->numberOfFlowFilesRead() == 0);
+ CHECK(read_from_unacknowledged->numberOfFlowFilesRead() == 0);
+ CHECK(read_from_acknowledged->numberOfFlowFilesRead() == 1);
+ }
+
+ SECTION("Querying not indexed id") {
+ plan->setProperty(update_attribute,
org::apache::nifi::minifi::extensions::splunk::SPLUNK_ACK_ID, "100", true);
+ query_splunk_indexing_status->setPenalizationPeriod(50ms);
+ test_controller.runSession(plan);
+ CHECK(read_from_failure->numberOfFlowFilesRead() == 0);
+ CHECK(read_from_undetermined->numberOfFlowFilesRead() == 0); // result
penalized
+ CHECK(read_from_unacknowledged->numberOfFlowFilesRead() == 0);
+ CHECK(read_from_acknowledged->numberOfFlowFilesRead() == 0);
+
+ write_to_flow_file->setContent("");
+ plan->reset();
+ std::this_thread::sleep_for(std::chrono::milliseconds(100ms));
+ test_controller.runSession(plan);
+
+ CHECK(read_from_failure->numberOfFlowFilesRead() == 0);
+ CHECK(read_from_undetermined->numberOfFlowFilesRead() == 1);
+ CHECK(read_from_unacknowledged->numberOfFlowFilesRead() == 0);
+ CHECK(read_from_acknowledged->numberOfFlowFilesRead() == 0);
+ }
+
+ SECTION("Querying not indexed old id") {
Review comment:
Could we add a test for verifying the MaxQuerySize?
##########
File path: extensions/splunk/tests/PutSplunkHTTPTests.cpp
##########
@@ -0,0 +1,78 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PutSplunkHTTP.h"
+#include "SplunkAttributes.h"
+#include "TestBase.h"
+#include "ReadFromFlowFileTestProcessor.h"
+#include "WriteToFlowFileTestProcessor.h"
+#include "MockSplunkHEC.h"
+
+using PutSplunkHTTP =
org::apache::nifi::minifi::extensions::splunk::PutSplunkHTTP;
+using ReadFromFlowFileTestProcessor =
org::apache::nifi::minifi::processors::ReadFromFlowFileTestProcessor;
+using WriteToFlowFileTestProcessor =
org::apache::nifi::minifi::processors::WriteToFlowFileTestProcessor;
+
+
+TEST_CASE("PutSplunkHTTP tests", "[putsplunkhttp]") {
+ MockSplunkHEC mock_splunk_hec("10133");
+
+ TestController test_controller;
+ auto plan = test_controller.createPlan();
+ auto write_to_flow_file =
std::dynamic_pointer_cast<WriteToFlowFileTestProcessor>(plan->addProcessor("WriteToFlowFileTestProcessor",
"write_to_flow_file"));
+ auto put_splunk_http =
std::dynamic_pointer_cast<PutSplunkHTTP>(plan->addProcessor("PutSplunkHTTP",
"put_splunk_http"));
+ auto read_from_success =
std::dynamic_pointer_cast<ReadFromFlowFileTestProcessor>(plan->addProcessor("ReadFromFlowFileTestProcessor",
"read_from_success"));
+ auto read_from_failure =
std::dynamic_pointer_cast<ReadFromFlowFileTestProcessor>(plan->addProcessor("ReadFromFlowFileTestProcessor",
"read_from_failure"));
+
+ plan->addConnection(write_to_flow_file,
WriteToFlowFileTestProcessor::Success, put_splunk_http);
+ plan->addConnection(put_splunk_http, PutSplunkHTTP::Success,
read_from_success);
+ plan->addConnection(put_splunk_http, PutSplunkHTTP::Failure,
read_from_failure);
+
+
read_from_success->setAutoTerminatedRelationships({ReadFromFlowFileTestProcessor::Success});
+
read_from_failure->setAutoTerminatedRelationships({ReadFromFlowFileTestProcessor::Success});
+
+ plan->setProperty(put_splunk_http, PutSplunkHTTP::Hostname.getName(),
"localhost");
+ plan->setProperty(put_splunk_http, PutSplunkHTTP::Port.getName(),
mock_splunk_hec.getPort());
+ plan->setProperty(put_splunk_http, PutSplunkHTTP::Token.getName(),
MockSplunkHEC::TOKEN);
+ plan->setProperty(put_splunk_http,
PutSplunkHTTP::SplunkRequestChannel.getName(),
"a12254b4-f481-435d-896d-3b6033eabe58");
+
+ write_to_flow_file->setContent("foobar");
+
+ SECTION("Happy path") {
Review comment:
Could we add some additional tests for checking non-default endpoint
parameters? Also checking if the content type is set correctly with the default
and a non-default value. I think with this great MockSplunkHEC we have a lot of
options :)
##########
File path: extensions/splunk/PutSplunkHTTP.cpp
##########
@@ -0,0 +1,179 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+#include "PutSplunkHTTP.h"
+
+#include <vector>
+#include <utility>
+
+#include "SplunkAttributes.h"
+
+#include "core/Resource.h"
+#include "utils/StringUtils.h"
+#include "client/HTTPClient.h"
+#include "utils/HTTPClient.h"
+#include "utils/TimeUtil.h"
+
+#include "rapidjson/document.h"
+
+
+namespace org::apache::nifi::minifi::extensions::splunk {
+
+const core::Property
PutSplunkHTTP::Source(core::PropertyBuilder::createProperty("Source")
+ ->withDescription("Basic field describing the source of the event. If
unspecified, the event will use the default defined in splunk.")
+ ->supportsExpressionLanguage(true)->build());
+
+const core::Property
PutSplunkHTTP::SourceType(core::PropertyBuilder::createProperty("Source Type")
+ ->withDescription("Basic field describing the source type of the event. If
unspecified, the event will use the default defined in splunk.")
+ ->supportsExpressionLanguage(true)->build());
+
+const core::Property
PutSplunkHTTP::Host(core::PropertyBuilder::createProperty("Host")
+ ->withDescription("Basic field describing the host of the event. If
unspecified, the event will use the default defined in splunk.")
+ ->supportsExpressionLanguage(true)->build());
+
+const core::Property
PutSplunkHTTP::Index(core::PropertyBuilder::createProperty("Index")
+ ->withDescription("Identifies the index where to send the event. If
unspecified, the event will use the default defined in splunk.")
+ ->supportsExpressionLanguage(true)->build());
+
+const core::Property
PutSplunkHTTP::ContentType(core::PropertyBuilder::createProperty("Content Type")
+ ->withDescription("The media type of the event sent to Splunk. If not set,
\"mime.type\" flow file attribute will be used. "
+ "In case of neither of them is specified, this
information will not be sent to the server.")
+ ->supportsExpressionLanguage(true)->build());
+
+
+const core::Relationship PutSplunkHTTP::Success("success", "FlowFiles that are
sent successfully to the destination are sent to this relationship.");
+const core::Relationship PutSplunkHTTP::Failure("failure", "FlowFiles that
failed to send to the destination are sent to this relationship.");
+
+void PutSplunkHTTP::initialize() {
+ setSupportedRelationships({Success, Failure});
+ setSupportedProperties({Hostname, Port, Token, SplunkRequestChannel, Source,
SourceType, Host, Index, ContentType});
+}
+
+void PutSplunkHTTP::onSchedule(const std::shared_ptr<core::ProcessContext>&
context, const std::shared_ptr<core::ProcessSessionFactory>& sessionFactory) {
+ SplunkHECProcessor::onSchedule(context, sessionFactory);
+}
+
+
+namespace {
+std::optional<std::string> getContentType(core::ProcessContext& context, const
core::FlowFile& flow_file) {
+ std::optional<std::string> content_type =
context.getProperty(PutSplunkHTTP::ContentType);
+ if (content_type.has_value())
+ return content_type;
+ return flow_file.getAttribute("mime.key");
+}
+
+
+std::string getEndpoint(core::ProcessContext& context, const
gsl::not_null<std::shared_ptr<core::FlowFile>>& flow_file) {
+ std::stringstream endpoint;
+ endpoint << "/services/collector/raw";
+ std::vector<std::string> parameters;
+ std::string prop_value;
+ if (context.getProperty(PutSplunkHTTP::SourceType, prop_value, flow_file)) {
+ parameters.push_back("sourcetype=" + prop_value);
+ }
+ if (context.getProperty(PutSplunkHTTP::Source, prop_value, flow_file)) {
+ parameters.push_back("source=" + prop_value);
+ }
+ if (context.getProperty(PutSplunkHTTP::Host, prop_value, flow_file)) {
+ parameters.push_back("host=" + prop_value);
+ }
+ if (context.getProperty(PutSplunkHTTP::Index, prop_value, flow_file)) {
+ parameters.push_back("index=" + prop_value);
+ }
+ if (!parameters.empty()) {
+ endpoint << "?" << utils::StringUtils::join("&", parameters);
+ }
+ return endpoint.str();
+}
+
+bool addAttributesFromClientResponse(core::FlowFile& flow_file,
utils::HTTPClient& client) {
+ rapidjson::Document response_json;
+ rapidjson::ParseResult parse_result =
response_json.Parse<rapidjson::kParseStopWhenDoneFlag>(client.getResponseBody().data());
+ bool result = true;
+ if (parse_result.IsError())
+ return false;
+
+ if (response_json.HasMember("code") && response_json["code"].IsInt())
+ flow_file.addAttribute(SPLUNK_RESPONSE_CODE,
std::to_string(response_json["code"].GetInt()));
+ else
+ result = false;
+
+ if (response_json.HasMember("ackId") && response_json["ackId"].IsUint64())
+ flow_file.addAttribute(SPLUNK_ACK_ID,
std::to_string(response_json["ackId"].GetUint64()));
+ else
+ result = false;
+
+ return result;
+}
+
+bool enrichFlowFileWithAttributes(core::FlowFile& flow_file,
utils::HTTPClient& client) {
+ flow_file.addAttribute(SPLUNK_STATUS_CODE,
std::to_string(client.getResponseCode()));
+ flow_file.addAttribute(SPLUNK_RESPONSE_TIME,
std::to_string(utils::timeutils::getTimestamp<std::chrono::milliseconds>(std::chrono::system_clock::now())));
+
+ return addAttributesFromClientResponse(flow_file, client) &&
client.getResponseCode() == 200;
+}
+
+void setFlowFileAsPayload(core::ProcessSession& session,
+ core::ProcessContext& context,
+ utils::HTTPClient& client,
+ const
gsl::not_null<std::shared_ptr<core::FlowFile>>& flow_file,
+ utils::ByteInputCallBack&
payload_callback,
+ utils::HTTPUploadCallback&
payload_callback_obj) {
+ session.read(flow_file, &payload_callback);
+ payload_callback_obj.ptr = &payload_callback;
+ payload_callback_obj.pos = 0;
+ client.appendHeader("Content-Length", std::to_string(flow_file->getSize()));
+
+ client.setUploadCallback(&payload_callback_obj);
+ client.setSeekFunction(&payload_callback_obj);
+
+ auto content_type = getContentType(context, *flow_file);
+ if (content_type.has_value())
+ client.setContentType(content_type.value());
+}
+} // namespace
+
+void PutSplunkHTTP::onTrigger(const std::shared_ptr<core::ProcessContext>&
context, const std::shared_ptr<core::ProcessSession>& session) {
+ gsl_Expects(context && session);
+
+ auto ff = session->get();
+ if (!ff) {
+ context->yield();
+ return;
+ }
+ auto flow_file = gsl::not_null(std::move(ff));
+
+ utils::HTTPClient client(getNetworkLocation() + getEndpoint(*context,
flow_file), getSSLContextService(*context));
+ setHeaders(client);
+
+ auto payload_callback = std::make_unique<utils::ByteInputCallBack>();
+ auto payload_callback_obj = std::make_unique<utils::HTTPUploadCallback>();
Review comment:
What's the benefit of allocating the callbacks on the heap?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]