fgerlits commented on code in PR #1682:
URL: https://github.com/apache/nifi-minifi-cpp/pull/1682#discussion_r1395500165


##########
extensions/standard-processors/tests/unit/SplitTextTests.cpp:
##########
@@ -0,0 +1,860 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "FlowFileRecord.h"
+#include "TestBase.h"
+#include "Catch.h"
+#include "processors/SplitText.h"
+#include "SingleProcessorTestController.h"
+#include "io/BufferStream.h"
+
+namespace org::apache::nifi::minifi::test {
+
+TEST_CASE("Test LineReader with nullptr") {
+  processors::detail::LineReader reader{nullptr};
+  CHECK(reader.readNextLine() == std::nullopt);
+  CHECK(reader.getState() == processors::detail::StreamReadState::EndOfStream);
+}
+
+TEST_CASE("Test LineReader with empty stream") {
+  auto stream = std::make_shared<io::BufferStream>();
+  processors::detail::LineReader reader{nullptr};
+  CHECK(reader.readNextLine() == std::nullopt);
+  CHECK(reader.getState() == processors::detail::StreamReadState::EndOfStream);
+}
+
+TEST_CASE("Test LineReader with trailing endline") {
+  auto stream = std::make_shared<io::BufferStream>();
+  std::string input = "this is a new line\nand another line\r\nthirdline\n";
+  stream->write(reinterpret_cast<const uint8_t*>(input.data()), input.size());
+  processors::detail::LineReader reader{stream};
+  CHECK(*reader.readNextLine() == 
processors::detail::LineReader::LineInfo{.offset = 0, .size = 19, .endline_size 
= 1});
+  CHECK(*reader.readNextLine() == 
processors::detail::LineReader::LineInfo{.offset = 19, .size = 18, 
.endline_size = 2});
+  CHECK(*reader.readNextLine() == 
processors::detail::LineReader::LineInfo{.offset = 37, .size = 10, 
.endline_size = 1});
+  CHECK(reader.readNextLine() == std::nullopt);
+  CHECK(reader.getState() == processors::detail::StreamReadState::EndOfStream);
+}
+
+TEST_CASE("Test LineReader without trailing endlines") {
+  auto stream = std::make_shared<io::BufferStream>();
+  std::string input = "this is a new line\nand another line\r\nthirdline";
+  stream->write(reinterpret_cast<const uint8_t*>(input.data()), input.size());
+  processors::detail::LineReader reader{stream};
+  CHECK(*reader.readNextLine() == 
processors::detail::LineReader::LineInfo{.offset = 0, .size = 19, .endline_size 
= 1});
+  CHECK(*reader.readNextLine() == 
processors::detail::LineReader::LineInfo{.offset = 19, .size = 18, 
.endline_size = 2});
+  CHECK(*reader.readNextLine() == 
processors::detail::LineReader::LineInfo{.offset = 37, .size = 9, .endline_size 
= 0});
+  CHECK(reader.readNextLine() == std::nullopt);
+  CHECK(reader.getState() == processors::detail::StreamReadState::EndOfStream);
+}
+
+TEST_CASE("Test LineReader with input larger than buffer length") {
+  auto stream = std::make_shared<io::BufferStream>();
+  const auto first_line_size = 
static_cast<size_t>(processors::detail::SPLIT_TEXT_BUFFER_SIZE * 1.5);
+  const auto second_line_size = 
static_cast<size_t>(processors::detail::SPLIT_TEXT_BUFFER_SIZE * 1.7);
+  std::string input = std::string(first_line_size, 'a') + "\n" + 
std::string(second_line_size, 'b') + "\n";
+  stream->write(reinterpret_cast<const uint8_t*>(input.data()), input.size());
+  processors::detail::LineReader reader{stream};
+  CHECK(*reader.readNextLine() == 
processors::detail::LineReader::LineInfo{.offset = 0, .size = first_line_size + 
1, .endline_size = 1});
+  CHECK(*reader.readNextLine() == 
processors::detail::LineReader::LineInfo{.offset = first_line_size +1 , .size = 
second_line_size + 1, .endline_size = 1});
+  CHECK(reader.readNextLine() == std::nullopt);
+  CHECK(reader.getState() == processors::detail::StreamReadState::EndOfStream);
+}
+
+TEST_CASE("Test LineReader with input of same size as buffer length") {
+  auto stream = std::make_shared<io::BufferStream>();
+  std::string input = std::string(processors::detail::SPLIT_TEXT_BUFFER_SIZE - 
1, 'a') + "\n" + std::string(processors::detail::SPLIT_TEXT_BUFFER_SIZE * 2 - 
1, 'b') + "\n";
+  stream->write(reinterpret_cast<const uint8_t*>(input.data()), input.size());
+  processors::detail::LineReader reader{stream};
+  CHECK(*reader.readNextLine() == 
processors::detail::LineReader::LineInfo{.offset = 0, .size = 
processors::detail::SPLIT_TEXT_BUFFER_SIZE, .endline_size = 1});
+  CHECK(*reader.readNextLine() ==
+    processors::detail::LineReader::LineInfo{.offset = 
processors::detail::SPLIT_TEXT_BUFFER_SIZE, .size = 
processors::detail::SPLIT_TEXT_BUFFER_SIZE * 2, .endline_size = 1});
+  CHECK(reader.readNextLine() == std::nullopt);
+  CHECK(reader.getState() == processors::detail::StreamReadState::EndOfStream);
+}
+
+TEST_CASE("Test LineReader with input larger than buffer length without 
trailing endline") {
+  auto stream = std::make_shared<io::BufferStream>();
+  const auto first_line_size = 
static_cast<size_t>(processors::detail::SPLIT_TEXT_BUFFER_SIZE * 1.5);
+  const auto second_line_size = 
static_cast<size_t>(processors::detail::SPLIT_TEXT_BUFFER_SIZE * 1.7);
+  std::string input = std::string(first_line_size, 'a') + "\n" + 
std::string(second_line_size, 'b');
+  stream->write(reinterpret_cast<const uint8_t*>(input.data()), input.size());
+  processors::detail::LineReader reader{stream};
+  CHECK(*reader.readNextLine() == 
processors::detail::LineReader::LineInfo{.offset = 0, .size = first_line_size + 
1, .endline_size = 1});
+  CHECK(*reader.readNextLine() == 
processors::detail::LineReader::LineInfo{.offset = first_line_size + 1, .size = 
second_line_size, .endline_size = 0});
+  CHECK(reader.readNextLine() == std::nullopt);
+  CHECK(reader.getState() == processors::detail::StreamReadState::EndOfStream);
+}
+
+TEST_CASE("Test LineReader with input of same size as buffer length without 
trailing endline") {
+  auto stream = std::make_shared<io::BufferStream>();
+  std::string input = std::string(processors::detail::SPLIT_TEXT_BUFFER_SIZE - 
1, 'a') + "\n" + std::string(processors::detail::SPLIT_TEXT_BUFFER_SIZE * 2, 
'b');
+  stream->write(reinterpret_cast<const uint8_t*>(input.data()), input.size());
+  processors::detail::LineReader reader{stream};
+  CHECK(*reader.readNextLine() == 
processors::detail::LineReader::LineInfo{.offset = 0, .size = 
processors::detail::SPLIT_TEXT_BUFFER_SIZE, .endline_size = 1});
+  CHECK(*reader.readNextLine() ==
+    processors::detail::LineReader::LineInfo{.offset = 
processors::detail::SPLIT_TEXT_BUFFER_SIZE, .size = 
processors::detail::SPLIT_TEXT_BUFFER_SIZE * 2, .endline_size = 0});
+  CHECK(reader.readNextLine() == std::nullopt);
+  CHECK(reader.getState() == processors::detail::StreamReadState::EndOfStream);
+}
+
+TEST_CASE("Test LineReader with starts with filter") {
+  auto stream = std::make_shared<io::BufferStream>();
+  std::string input = "header this is a new line\nheader and another 
line\r\nthirdline\nheader line\n";
+  stream->write(reinterpret_cast<const uint8_t*>(input.data()), input.size());
+  processors::detail::LineReader reader{stream};
+  CHECK(*reader.readNextLine("header") == 
processors::detail::LineReader::LineInfo{.offset = 0, .size = 26, .endline_size 
= 1, .matches_starts_with = true});
+  CHECK(*reader.readNextLine("header") == 
processors::detail::LineReader::LineInfo{.offset = 26, .size = 25, 
.endline_size = 2, .matches_starts_with = true});
+  CHECK(*reader.readNextLine("header") == 
processors::detail::LineReader::LineInfo{.offset = 51, .size = 10, 
.endline_size = 1, .matches_starts_with = false});
+  CHECK(*reader.readNextLine("header") == 
processors::detail::LineReader::LineInfo{.offset = 61, .size = 12, 
.endline_size = 1, .matches_starts_with = true});
+  CHECK(reader.readNextLine() == std::nullopt);
+  CHECK(reader.getState() == processors::detail::StreamReadState::EndOfStream);
+}
+
+struct ExpectedSplitTextResult {
+  std::string content;
+  uint64_t fragment_index = 0;
+  uint64_t fragment_count = 0;
+  uint64_t text_line_count = 0;
+};
+
+struct SplitTextProperties {
+  uint64_t line_split_count = 0;
+  std::optional<bool> trim_trailing_newlines;
+  std::optional<uint64_t> maximum_fragment_size;
+  std::optional<uint64_t> header_line_count;
+  std::optional<std::string> header_line_marker_characters;
+};

Review Comment:
   OK, that makes sense



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@nifi.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to