martinzink commented on a change in pull request #1044:
URL: https://github.com/apache/nifi-minifi-cpp/pull/1044#discussion_r632507305



##########
File path: extensions/systemd/ConsumeJournald.cpp
##########
@@ -0,0 +1,262 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConsumeJournald.h"
+
+#include <algorithm>
+
+#include <date/date.h>
+#include "spdlog/spdlog.h"  // TODO(szaszm): make fmt directly available
+#include "utils/GeneralUtils.h"
+
+namespace org { namespace apache { namespace nifi { namespace minifi { 
namespace extensions { namespace systemd {
+
+constexpr const char* ConsumeJournald::CURSOR_KEY;
+const core::Relationship ConsumeJournald::Success("success", "Successfully 
consumed journal messages.");
+
+const core::Property ConsumeJournald::BatchSize = 
core::PropertyBuilder::createProperty("Batch Size")
+    ->withDescription("The maximum number of entries processed in a single 
execution.")
+    ->withDefaultValue<size_t>(1000)
+    ->isRequired(true)
+    ->build();
+
+const core::Property ConsumeJournald::PayloadFormat = 
core::PropertyBuilder::createProperty("Payload Format")
+    ->withDescription("Configures flow file content formatting. Raw: only the 
message. Syslog: similar to syslog or journalctl output.")
+    ->withDefaultValue<std::string>(PAYLOAD_FORMAT_SYSLOG)
+    ->withAllowableValues<std::string>({PAYLOAD_FORMAT_RAW, 
PAYLOAD_FORMAT_SYSLOG})
+    ->isRequired(true)
+    ->build();
+
+const core::Property ConsumeJournald::IncludeTimestamp = 
core::PropertyBuilder::createProperty("Include Timestamp")
+    ->withDescription("Include message timestamp in the 'timestamp' 
attribute.")
+    ->withDefaultValue<bool>(true)
+    ->isRequired(true)
+    ->build();
+
+const core::Property ConsumeJournald::JournalType = 
core::PropertyBuilder::createProperty("Journal Type")
+    ->withDescription("Type of journal to consume.")
+    ->withDefaultValue<std::string>(JOURNAL_TYPE_SYSTEM)
+    ->withAllowableValues<std::string>({JOURNAL_TYPE_USER, 
JOURNAL_TYPE_SYSTEM, JOURNAL_TYPE_BOTH})
+    ->isRequired(true)
+    ->build();
+
+const core::Property ConsumeJournald::ProcessOldMessages = 
core::PropertyBuilder::createProperty("Process Old Messages")
+    ->withDescription("Process events created before the first usage 
(schedule) of the processor instance.")
+    ->withDefaultValue<bool>(false)
+    ->isRequired(true)
+    ->build();
+
+const core::Property ConsumeJournald::TimestampFormat = 
core::PropertyBuilder::createProperty("Timestamp Format")
+    ->withDescription("Format string to use when creating the timestamp 
attribute or writing messages in the syslog format.")
+    ->withDefaultValue("%x %X %Z")
+    ->isRequired(true)
+    ->build();
+
+ConsumeJournald::ConsumeJournald(const std::string &name, const 
utils::Identifier &id, std::unique_ptr<libwrapper::LibWrapper>&& libwrapper)
+    :core::Processor{name, id}, libwrapper_{std::move(libwrapper)}
+{}
+
+void ConsumeJournald::initialize() {
+  setSupportedProperties({BatchSize, PayloadFormat, IncludeTimestamp, 
JournalType, ProcessOldMessages, TimestampFormat});
+  setSupportedRelationships({Success});
+
+  worker_ = utils::make_unique<Worker>();
+}
+
+void ConsumeJournald::notifyStop() {
+  bool running = true;
+  if (!running_.compare_exchange_strong(running, false, 
std::memory_order_acq_rel) || !journal_) return;
+  worker_->enqueue([this] {
+    journal_ = nullptr;
+  }).get();
+  worker_ = nullptr;
+}
+
+void ConsumeJournald::onSchedule(core::ProcessContext* const context, 
core::ProcessSessionFactory* const sessionFactory) {
+  gsl_Expects(context && sessionFactory && !running_ && worker_);
+  using JournalTypeEnum = systemd::JournalType;
+
+  const auto parse_payload_format = [](const std::string& property_value) -> 
utils::optional<systemd::PayloadFormat> {
+    if (utils::StringUtils::equalsIgnoreCase(property_value, 
PAYLOAD_FORMAT_RAW)) return systemd::PayloadFormat::Raw;
+    if (utils::StringUtils::equalsIgnoreCase(property_value, 
PAYLOAD_FORMAT_SYSLOG)) return systemd::PayloadFormat::Syslog;
+    return utils::nullopt;
+  };
+  const auto parse_journal_type = [](const std::string& property_value) -> 
utils::optional<JournalTypeEnum> {
+    if (utils::StringUtils::equalsIgnoreCase(property_value, 
JOURNAL_TYPE_USER)) return JournalTypeEnum::User;
+    if (utils::StringUtils::equalsIgnoreCase(property_value, 
JOURNAL_TYPE_SYSTEM)) return JournalTypeEnum::System;
+    if (utils::StringUtils::equalsIgnoreCase(property_value, 
JOURNAL_TYPE_BOTH)) return JournalTypeEnum::Both;
+    return utils::nullopt;
+  };
+  batch_size_ = context->getProperty<size_t>(BatchSize).value_or(10);
+  payload_format_ = (context->getProperty(PayloadFormat) | 
utils::flatMap(parse_payload_format)).value_or(systemd::PayloadFormat::Syslog);
+  include_timestamp_ = 
context->getProperty<bool>(IncludeTimestamp).value_or(true);
+  const auto journal_type = (context->getProperty(JournalType) | 
utils::flatMap(parse_journal_type)).value_or(JournalTypeEnum::System);
+  const auto process_old_messages = 
context->getProperty<bool>(ProcessOldMessages).value_or(false);
+  timestamp_format_ = [&context] {
+    auto tf_prop = 
context->getProperty(TimestampFormat).value_or(TimestampFormat.getDefaultValue());
+    if (tf_prop == "ISO" || tf_prop == "ISO 8601" || tf_prop == "ISO8601") 
return std::string{"%FT%T%Ez"};
+    return tf_prop;
+  }();
+
+  state_manager_ = context->getStateManager();
+  // All journal-related API calls are thread-agnostic, meaning they need to 
be called from the same thread. In our environment,
+  // where a processor can easily be scheduled on different threads, we ensure 
this by executing all library calls on a dedicated
+  // worker thread. This is why all such operations are dispatched to a thread 
and immediately waited for in the initiating thread.
+  journal_ = worker_->enqueue([this, journal_type]{ return 
libwrapper_->openJournal(journal_type); }).get();
+  const auto seek_default = [this, process_old_messages](libwrapper::Journal& 
journal) {
+    if (process_old_messages) journal.seekHead();
+    else journal.seekTail();
+    state_manager_->set({{"cursor", getCursor()}});

Review comment:
       When I tried to run the processor locally (both on Ubuntu 18.04 and 
manjaro) the onSchedule failed because ​this getCursor calls returned an error.
   sd_journal_get_cursor: Cannot assign requested address
   
   From the man I think we are missing a sd_journal_next call (I hacked it into 
here, and now it seems working)
   
   From the man
   ```
   Note that sd_journal_get_cursor() will not work before
         ​sd_journal_next(3) (or related call) has been called at least
         ​once, in order to position the read pointer at a valid entry.
   
   ``




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to