Github user phrocker commented on a diff in the pull request:
https://github.com/apache/nifi-minifi-cpp/pull/133#discussion_r137021454
--- Diff: libminifi/include/processors/BinFiles.h ---
@@ -0,0 +1,295 @@
+/**
+ * @file BinFiles.h
+ * BinFiles class declaration
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __BIN_FILES_H__
+#define __BIN_FILES_H__
+
+#include <climits>
+#include <deque>
+#include <map>
+#include "FlowFileRecord.h"
+#include "core/Processor.h"
+#include "core/ProcessSession.h"
+#include "core/Core.h"
+#include "core/Resource.h"
+#include "core/logging/LoggerConfiguration.h"
+#include "utils/Id.h"
+
+namespace org {
+namespace apache {
+namespace nifi {
+namespace minifi {
+namespace processors {
+
+// Bin Class
+class Bin {
+ public:
+ // Constructor
+ /*!
+ * Create a new Bin
+ */
+ explicit Bin(uint64_t minSize, uint64_t maxSize, int minEntries, int
maxEntries, std::string fileCount, std::string groupId)
+ : minSize_(minSize), maxSize_(maxSize), maxEntries_(maxEntries),
minEntries_(minEntries), fileCount_(fileCount),
+ groupId_(groupId),
logger_(logging::LoggerFactory<Bin>::getLogger()) {
+ queued_data_size_ = 0;
+ creation_dated_ = getTimeMillis();
+ std::shared_ptr<utils::IdGenerator> id_generator =
utils::IdGenerator::getIdGenerator();
+ char uuidStr[37] = { 0 };
+ id_generator->generate(uuid_);
+ uuid_unparse_lower(uuid_, uuidStr);
+ uuid_str_ = uuidStr;
+ logger_->log_info("Bin %s for group %s created", uuid_str_, groupId_);
+ }
+ virtual ~Bin() {
+ logger_->log_info("Bin %s for group %s destroyed", uuid_str_,
groupId_);
+ }
+ // check whether the bin is full
+ bool isFull() {
+ if (queued_data_size_ >= maxSize_ || queue_.size() >= maxEntries_)
+ return true;
+ else
+ return false;
+ }
+ // check whether the bin meet the min required size and entries
+ bool isFullEnough() {
+ return isFull() || (queued_data_size_ >= minSize_ && queue_.size() >=
minEntries_);
+ }
+ // check whether the bin is older than the time specified in msec
+ bool isOlderThan(uint64_t duration) {
+ uint64_t currentTime = getTimeMillis();
+ if (currentTime > (creation_dated_ + duration))
+ return true;
+ else
+ return false;
+ }
+ std::deque<std::shared_ptr<core::FlowFile>> & getFlowFile() {
+ return queue_;
+ }
+ // offer the flowfile to the bin
+ bool offer(std::shared_ptr<core::FlowFile> flow) {
+ if (!fileCount_.empty()) {
+ std::string value;
+ if (flow->getAttribute(fileCount_, value)) {
+ try {
+ // for defrag case using the identification
+ int count = std::stoi(value);
+ maxEntries_ = count;
+ minEntries_ = count;
+ } catch (...) {
+
+ }
+ }
+ }
+
+ if ((queued_data_size_ + flow->getSize()) > maxSize_ || (queue_.size()
+ 1) > maxEntries_)
+ return false;
+
+ queue_.push_back(flow);
+ queued_data_size_ += flow->getSize();
+ logger_->log_info("Bin %s for group %s offer size %d byte %d min_entry
%d max_entry %d",
+ uuid_str_, groupId_, queue_.size(), queued_data_size_,
minEntries_, maxEntries_);
+
+ return true;
+ }
+ // getBinAge
+ uint64_t getBinAge() {
+ return creation_dated_;
+ }
+ int getSize() {
+ return queue_.size();
+ }
+ // Get the UUID as string
+ std::string getUUIDStr() {
+ return uuid_str_;
+ }
+ std::string getGroupId() {
+ return groupId_;
+ }
+
+ protected:
+
+ private:
+ uint64_t minSize_;
+ uint64_t maxSize_;
+ int maxEntries_;
+ int minEntries_;
+ // Queued data size
+ uint64_t queued_data_size_;
+ // Queue for the Flow File
+ std::deque<std::shared_ptr<core::FlowFile>> queue_;
+ uint64_t creation_dated_;
+ std::string fileCount_;
+ std::string groupId_;
+ std::shared_ptr<logging::Logger> logger_;
+ // A global unique identifier
+ uuid_t uuid_;
+ // UUID string
+ std::string uuid_str_;
+};
+
+// BinManager Class
+class BinManager {
+ public:
+ // Constructor
+ /*!
+ * Create a new BinManager
+ */
+ explicit BinManager()
+ : minSize_(0), maxSize_(ULLONG_MAX), maxEntries_(INT_MAX),
minEntries_(1), binAge_(ULLONG_MAX), binCount_(0),
+ logger_(logging::LoggerFactory<BinManager>::getLogger()) {
+ }
+ virtual ~BinManager() {
+ purge();
+ }
+ void setMinSize(uint64_t size) {
+ minSize_ = size;
+ }
+ void setMaxSize(uint64_t size) {
+ maxSize_ = size;
+ }
+ void setMaxEntries(int entries) {
+ maxEntries_ = entries;
+ }
+ void setMinEntries(int entries) {
+ minEntries_ = entries;
+ }
+ void setBinAge(uint64_t age) {
+ binAge_ = age;
+ }
+ int getBinCount() {
+ return binCount_;
+ }
+ void setFileCount(std::string value) {
+ fileCount_ = value;
+ }
+ void purge() {
+ std::lock_guard < std::mutex > lock(mutex_);
+ groupBinMap_.clear();
+ binCount_ = 0;
+ }
+ // Adds the given flowFile to the first available bin in which it fits
for the given group or creates a new bin in the specified group if necessary.
+ bool offer(std::string group, std::shared_ptr<core::FlowFile> flow);
+ // gather ready bins once the bin are full enough or exceed bin age
+ void gatherReadyBins();
+ // remove oldest bin
+ void removeOldestBin();
+ // get ready bin from binManager
+ void getReadyBin(std::deque<std::unique_ptr<Bin>> &retBins);
+
+ protected:
+
+ private:
+ std::mutex mutex_;
+ uint64_t minSize_;
+ uint64_t maxSize_;
+ int maxEntries_;
+ int minEntries_;
+ std::string fileCount_;
+ // Bin Age in msec
+ uint64_t binAge_;
+ std::map<std::string, std::unique_ptr<std::deque<std::unique_ptr<Bin>>>>
groupBinMap_;
+ std::deque<std::unique_ptr<Bin>> readyBin_;
+ int binCount_;
+ std::shared_ptr<logging::Logger> logger_;
+};
+
+// BinFiles Class
--- End diff --
I think we should break this header file up for readability.
---