szaszm commented on a change in pull request #900:
URL: https://github.com/apache/nifi-minifi-cpp/pull/900#discussion_r496800050
##########
File path: libminifi/CMakeLists.txt
##########
@@ -89,7 +89,7 @@ if (NOT OPENSSL_OFF)
set(TLS_SOURCES "src/io/tls/*.cpp")
endif()
-file(GLOB SOURCES "src/utils/file/*.cpp" "src/sitetosite/*.cpp"
"src/core/logging/*.cpp" "src/core/state/*.cpp" "src/core/state/nodes/*.cpp"
"src/c2/protocols/*.cpp" "src/c2/triggers/*.cpp" "src/c2/*.cpp" "src/io/*.cpp"
${SOCKET_SOURCES} ${TLS_SOURCES} "src/core/controller/*.cpp"
"src/controllers/*.cpp" "src/controllers/keyvalue/*.cpp" "src/core/*.cpp"
"src/core/repository/*.cpp" "src/core/yaml/*.cpp" "src/core/reporting/*.cpp"
"src/provenance/*.cpp" "src/utils/*.cpp" "src/*.cpp")
+file(GLOB SOURCES "src/utils/file/*.cpp" "src/sitetosite/*.cpp"
"src/core/logging/*.cpp" "src/core/state/*.cpp" "src/core/state/nodes/*.cpp"
"src/c2/protocols/*.cpp" "src/c2/triggers/*.cpp" "src/c2/*.cpp" "src/io/*.cpp"
${SOCKET_SOURCES} ${TLS_SOURCES} "src/core/controller/*.cpp"
"src/controllers/*.cpp" "src/controllers/keyvalue/*.cpp" "src/core/*.cpp"
"src/core/repository/*.cpp" "src/core/yaml/*.cpp" "src/core/reporting/*.cpp"
"src/serialization/*.cpp" "src/provenance/*.cpp" "src/utils/*.cpp" "src/*.cpp")
Review comment:
Despite it being common in the codebase, I recommend against against
using globs to match source files. Rationale: see note at
https://cmake.org/cmake/help/latest/command/file.html#glob
```suggestion
file(GLOB SOURCES "src/utils/file/*.cpp" "src/sitetosite/*.cpp"
"src/core/logging/*.cpp" "src/core/state/*.cpp" "src/core/state/nodes/*.cpp"
"src/c2/protocols/*.cpp" "src/c2/triggers/*.cpp" "src/c2/*.cpp" "src/io/*.cpp"
${SOCKET_SOURCES} ${TLS_SOURCES} "src/core/controller/*.cpp"
"src/controllers/*.cpp" "src/controllers/keyvalue/*.cpp" "src/core/*.cpp"
"src/core/repository/*.cpp" "src/core/yaml/*.cpp" "src/core/reporting/*.cpp"
"src/provenance/*.cpp" "src/utils/*.cpp" "src/*.cpp")
list(APPEND SOURCES "src/serialization/FlowFileV3Serializer.cpp"
"src/serialization/PayloadSerializer.cpp")
```
##########
File path: libminifi/test/archive-tests/MergeFileTests.cpp
##########
@@ -973,3 +975,95 @@ TEST_CASE("Test Merge File Attributes Keeping All Unique
Attributes", "[testMerg
LogTestController::getInstance().reset();
}
+
+void writeString(const std::string& str, const
std::shared_ptr<minifi::io::BaseStream>& out) {
+ out->write(const_cast<uint8_t*>(reinterpret_cast<const
uint8_t*>(str.data())), str.length());
+}
+
+TEST_CASE("FlowFile serialization", "[testFlowFileSerialization]") {
+ MergeTestController testController;
+ auto context = testController.context;
+ auto processor = testController.processor;
+ auto input = testController.input;
+ auto output = testController.output;
+
+ const std::string header = "BEGIN{";
+ const std::string footer = "}END";
+ const std::string demarcator = "_";
+
+ core::ProcessSession session(context);
+
+ minifi::PayloadSerializer payloadSerializer([&] (const
std::shared_ptr<core::FlowFile>& ff, minifi::InputStreamCallback* cb) {
+ return session.read(ff, cb);
+ });
+ minifi::FlowFileV3Serializer ffV3Serializer([&] (const
std::shared_ptr<core::FlowFile>& ff, minifi::InputStreamCallback* cb) {
+ return session.read(ff, cb);
+ });
+
+ minifi::FlowFileSerializer* usedSerializer;
+
+ std::vector<std::shared_ptr<core::FlowFile>> files;
+
+ for (const auto& content : std::vector<std::string>{"first ff content",
"second ff content", "some other data"}) {
+ minifi::io::DataStream contentStream{reinterpret_cast<const
uint8_t*>(content.data()), static_cast<int>(content.length())};
+ auto ff = session.create();
+ session.importFrom(contentStream, ff);
+ ff->addAttribute("one", "banana");
+ ff->addAttribute("two", "seven");
Review comment:
1. 🍌
2. 7️⃣
##########
File path: libminifi/include/FlowFileRecord.h
##########
@@ -39,6 +39,7 @@
#include "ResourceClaim.h"
#include "Connection.h"
#include "io/OutputStream.h"
+#include "io/StreamPipe.h"
Review comment:
Is this a leftover?
##########
File path: libminifi/include/io/StreamPipe.h
##########
@@ -0,0 +1,110 @@
+/**
+ * @file FlowFileRecord.h
+ * Flow file record class declaration
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <memory>
+#include <utility>
+#include "BaseStream.h"
+
+namespace org {
+namespace apache {
+namespace nifi {
+namespace minifi {
+
Review comment:
I'd expect these to be in the `io` namespace like everything else in the
io directory.
##########
File path: libminifi/include/io/StreamPipe.h
##########
@@ -0,0 +1,110 @@
+/**
+ * @file FlowFileRecord.h
+ * Flow file record class declaration
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <memory>
+#include <utility>
+#include "BaseStream.h"
+
+namespace org {
+namespace apache {
+namespace nifi {
+namespace minifi {
+
+// FlowFile IO Callback functions for input and output
+// throw exception for error
+class InputStreamCallback {
+ public:
+ virtual ~InputStreamCallback() = default;
+
+ virtual int64_t process(const std::shared_ptr<io::BaseStream>& stream) = 0;
+};
+class OutputStreamCallback {
+ public:
+ virtual ~OutputStreamCallback() = default;
+ virtual int64_t process(const std::shared_ptr<io::BaseStream>& stream) = 0;
+};
+
+namespace internal {
+
+inline int64_t pipe(const std::shared_ptr<io::BaseStream>& src, const
std::shared_ptr<io::BaseStream>& dst) {
+ uint8_t buffer[4096U];
+ int64_t totalTransferred = 0;
+ while (true) {
+ int readRet = src->read(buffer, sizeof(buffer));
+ if (readRet < 0) {
+ return readRet;
+ }
+ if (readRet == 0) {
+ break;
+ }
+ int remaining = readRet;
+ int transferred = 0;
+ while (remaining > 0) {
+ int writeRet = dst->write(buffer + transferred, remaining);
+ // TODO(adebreceni):
+ // write might return 0, e.g. in case of a congested server
+ // what should we return then?
+ // - the number of bytes read or
+ // - the number of bytes wrote
Review comment:
The number of bytes wrote. The copy is successful if the data was
successfully read from the source and written to the destination stream. So the
number of successfully copied bytes is the number of bytes written to the
destination stream. This approach also leaves some room for a robust user that
can restore the data that was read from the source but not copied before
retrying.
##########
File path: extensions/libarchive/MergeContent.cpp
##########
@@ -72,10 +74,12 @@ core::Property MergeContent::AttributeStrategy(
"only the attributes that exist on all FlowFiles in the
bundle, with the same value, will be preserved.")
->withAllowableValues<std::string>({merge_content_options::ATTRIBUTE_STRATEGY_KEEP_COMMON,
merge_content_options::ATTRIBUTE_STRATEGY_KEEP_ALL_UNIQUE})
->withDefaultValue(merge_content_options::ATTRIBUTE_STRATEGY_KEEP_COMMON)->build());
+core::Property MergeContent::FlowFileSerializer(
+ core::PropertyBuilder::createProperty("Flow File Serializer")
+ ->withDescription("Determines how to flow files should be serialized before
merging")
+
->withAllowableValues<std::string>({merge_content_options::SERIALIZER_PAYLOAD,
merge_content_options::SERIALIZER_FLOW_FILE_V3})
+ ->withDefaultValue(merge_content_options::SERIALIZER_PAYLOAD)->build());
Review comment:
NiFi uses the merge format property for this purpose, with the new
serializer bound to the equivalent of `BinaryConcatenationMerge`. Why is it
separate here?
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]