martinzink commented on a change in pull request #1219: URL: https://github.com/apache/nifi-minifi-cpp/pull/1219#discussion_r781271742
########## File path: extensions/splunk/QuerySplunkIndexingStatus.cpp ########## @@ -0,0 +1,191 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#include "QuerySplunkIndexingStatus.h" + +#include <unordered_map> +#include <utility> + +#include "SplunkAttributes.h" + +#include "core/Resource.h" +#include "client/HTTPClient.h" +#include "utils/HTTPClient.h" + +#include "rapidjson/document.h" +#include "rapidjson/stringbuffer.h" +#include "rapidjson/writer.h" + +namespace org::apache::nifi::minifi::extensions::splunk { + +const core::Property QuerySplunkIndexingStatus::MaximumWaitingTime(core::PropertyBuilder::createProperty("Maximum Waiting Time") + ->withDescription("The maximum time the processor tries to acquire acknowledgement confirmation for an index, from the point of registration. " + "After the given amount of time, the processor considers the index as not acknowledged and transfers the FlowFile to the \"unacknowledged\" relationship.") + ->withDefaultValue("1 hour")->isRequired(true)->build()); + +const core::Property QuerySplunkIndexingStatus::MaxQuerySize(core::PropertyBuilder::createProperty("Maximum Query Size") + ->withDescription("The maximum number of acknowledgement identifiers the outgoing query contains in one batch. " + "It is recommended not to set it too low in order to reduce network communication.") + ->withDefaultValue("1000")->isRequired(true)->build()); + +const core::Relationship QuerySplunkIndexingStatus::Acknowledged("acknowledged", + "A FlowFile is transferred to this relationship when the acknowledgement was successful."); + +const core::Relationship QuerySplunkIndexingStatus::Unacknowledged("unacknowledged", + "A FlowFile is transferred to this relationship when the acknowledgement was not successful. " + "This can happen when the acknowledgement did not happened within the time period set for Maximum Waiting Time. " + "FlowFiles with acknowledgement id unknown for the Splunk server will be transferred to this relationship after the Maximum Waiting Time is reached."); + +const core::Relationship QuerySplunkIndexingStatus::Undetermined("undetermined", + "A FlowFile is transferred to this relationship when the acknowledgement state is not determined. " + "FlowFiles transferred to this relationship might be penalized. " + "This happens when Splunk returns with HTTP 200 but with false response for the acknowledgement id in the flow file attribute."); + +const core::Relationship QuerySplunkIndexingStatus::Failure("failure", + "A FlowFile is transferred to this relationship when the acknowledgement was not successful due to errors during the communication, " + "or if the flowfile was missing the acknowledgement id"); + +void QuerySplunkIndexingStatus::initialize() { + setSupportedRelationships({Acknowledged, Unacknowledged, Undetermined, Failure}); + setSupportedProperties({Hostname, Port, Token, SplunkRequestChannel, SSLContext, MaximumWaitingTime, MaxQuerySize}); +} + +void QuerySplunkIndexingStatus::onSchedule(const std::shared_ptr<core::ProcessContext>& context, const std::shared_ptr<core::ProcessSessionFactory>& sessionFactory) { + gsl_Expects(context && sessionFactory); + SplunkHECProcessor::onSchedule(context, sessionFactory); + std::string max_wait_time_str; + if (context->getProperty(MaximumWaitingTime.getName(), max_wait_time_str)) { + core::TimeUnit unit; + uint64_t max_wait_time; + if (core::Property::StringToTime(max_wait_time_str, max_wait_time, unit) && core::Property::ConvertTimeUnitToMS(max_wait_time, unit, max_wait_time)) { + max_age_ = std::chrono::milliseconds(max_wait_time); + } + } + + context->getProperty(MaxQuerySize.getName(), batch_size_); +} + +namespace { +constexpr std::string_view getEndpoint() { + return "/services/collector/ack"; +} + +struct FlowFileWithIndexStatus { + explicit FlowFileWithIndexStatus(gsl::not_null<std::shared_ptr<core::FlowFile>>&& flow_file) : flow_file_(std::move(flow_file)) {} + + gsl::not_null<std::shared_ptr<core::FlowFile>> flow_file_; + std::optional<bool> indexing_status_ = std::nullopt; +}; + +std::unordered_map<uint64_t, FlowFileWithIndexStatus> getUndeterminedFlowFiles(core::ProcessSession& session, size_t batch_size) { + std::unordered_map<uint64_t, FlowFileWithIndexStatus> undetermined_flow_files; + for (size_t i = 0; i < batch_size; ++i) { + auto flow = session.get(); + if (flow == nullptr) + break; + std::optional<std::string> splunk_ack_id_str = flow->getAttribute(SPLUNK_ACK_ID); + if (!splunk_ack_id_str.has_value()) { + session.transfer(flow, QuerySplunkIndexingStatus::Failure); + continue; + } + uint64_t splunk_ack_id = std::stoull(splunk_ack_id_str.value()); + undetermined_flow_files.emplace(std::make_pair(splunk_ack_id, gsl::not_null(std::move(flow)))); Review comment: thats a valid point, in that case I think every flowfile with duplicated ack_id should be transferred to failure (because even if the server acknowledges we cant be sure which one is it) https://github.com/apache/nifi-minifi-cpp/pull/1219/commits/412fa041455d782eedc52bdfb24c680ae7e56808#diff-a2db2ff59dd1ebf5f1e3e053781c55708df909e3009ee21015da3fe04218def4R109-R119 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
