phrocker commented on a change in pull request #590: MINIFICPP-621 Nanofi Tailfile example URL: https://github.com/apache/nifi-minifi-cpp/pull/590#discussion_r294907911
########## File path: nanofi/tests/CTailFileTests.cpp ########## @@ -0,0 +1,365 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "catch.hpp" + +#include <vector> +#include <string> +#include <fstream> +#include <numeric> +#include <algorithm> +#include <assert.h> +#include <unistd.h> +#include <string.h> +#include <sys/stat.h> +#include "core/string_utils.h" +#include "core/file_utils.h" + + +void test_lists_equal(token_list * tknlist, const std::vector<std::string>& sv) { + REQUIRE(tknlist != NULL); + if (sv.empty()) { + REQUIRE(tknlist->head == NULL); + REQUIRE(tknlist->size == 0); + return; + } + REQUIRE(tknlist->size == sv.size()); + for (const auto& s : sv) { + if (tknlist->head) { + REQUIRE(strcmp(s.c_str(), tknlist->head->data) == 0); + tknlist->head = tknlist->head->next; + } + } +} + +std::string join_strings(const std::vector<std::string>& strings, const std::string& token) { + if (strings.size() == 1) { + return strings.at(0); + } + const auto& initialValue = strings.at(0); + return std::accumulate(strings.begin()+1, strings.end(), initialValue, + [token,initialValue](const std::string& s1, const std::string& s2) { + return s1 + token + s2; + }); +} + +/**** + * ################################################################## + * CTAILFILE STRING TOKENIZER TESTS + * ################################################################## + */ + +TEST_CASE("Test string tokenizer empty string", "[stringTokenizerEmptyString]") { + std::string delimitedString = ""; + char delim = '-'; + struct token_list tokens = tokenize_string(delimitedString.c_str(), delim); + REQUIRE(tokens.size == 0); +} + +TEST_CASE("Test string tokenizer normal delimited string", "[stringTokenizerDelimitedString]") { + std::vector<std::string> slist = {"this", "is a", "delimited", "string"}; + std::string delimitedString = join_strings(slist, "-"); + char delim = '-'; + struct token_list tokens = tokenize_string(delimitedString.c_str(), delim); + REQUIRE(tokens.size == 4); + REQUIRE(validate_list(&tokens) == 1); + test_lists_equal(&tokens, slist); +} + +TEST_CASE("Test string tokenizer delimiter started string", "[stringTokenizerDelimiterStartedString]") { + std::vector<std::string> slist = {"", "this", "is", "a test"}; + std::string delimitedString = join_strings(slist, "--"); + char delim = '-'; + struct token_list tokens = tokenize_string(delimitedString.c_str(), delim); + REQUIRE(tokens.size == 3); + REQUIRE(validate_list(&tokens) == 1); + slist.erase(std::remove(slist.begin(), slist.end(), ""), slist.end()); + test_lists_equal(&tokens, slist); +} + +TEST_CASE("Test string tokenizer only delimiter character string", "[stringTokenizerDelimiterOnlyString]") { + std::string delimitedString = "--------"; + char delim = '-'; + struct token_list tokens = tokenize_string(delimitedString.c_str(), delim); + REQUIRE(tokens.size == 0); +} + +/**** + * ################################################################## + * CTAILFILE TAIL FILE BEHAVED DELIMITED STRING TOKENIZER TESTS + * ################################################################## + */ + +TEST_CASE("Test string tokenizer for a delimited string less than 4096 bytes", "[testDelimitedStringTokenizer]") { + + std::vector<std::string> slist = {"this", "is a", "delimited", "string", ""}; + std::string delimitedString = join_strings(slist, "-"); + int len = strlen(delimitedString.c_str()); + char delim = '-'; + struct token_list tokens = tokenize_string_tailfile(delimitedString.c_str(), delim); + REQUIRE(tokens.has_non_delimited_token == 0); + REQUIRE(tokens.size == 4); + REQUIRE(validate_list(&tokens) == 1); + slist.pop_back(); + test_lists_equal(&tokens, slist); +} + +TEST_CASE("Test string tokenizer for a non-delimited string less than 4096 bytes", "[testNonDelimitedStringTokenizer]") { + std::string nonDelimitedString = "this is a non delimited string"; + char delim = '-'; + struct token_list tokens = tokenize_string_tailfile(nonDelimitedString.c_str(), delim); + REQUIRE(tokens.has_non_delimited_token == 0); + REQUIRE(tokens.size == 0); + test_lists_equal(&tokens, {}); +} + +TEST_CASE("Test string tokenizer for empty string", "[testEmptyStringTokenizer]") { + const std::string emptyString = ""; + char delim = '-'; + struct token_list tokens = tokenize_string_tailfile(emptyString.c_str(), delim); + REQUIRE(tokens.has_non_delimited_token == 0); + REQUIRE(tokens.size == 0); + test_lists_equal(&tokens, {}); +} + +TEST_CASE("Test string tokenizer for string containing only delimited characters", "[testDelimiterCharOnlyStringTokenizer]") { + const std::string str = "----"; + char delim = '-'; + struct token_list tokens = tokenize_string_tailfile(str.c_str(), delim); + REQUIRE(tokens.has_non_delimited_token == 0); + REQUIRE(tokens.size == 0); + test_lists_equal(&tokens, {}); +} + +TEST_CASE("Test string tokenizer for string starting with delimited characters", "[testDelimitedStartingStringTokenizer]") { + const std::string str = "----mystring"; + char delim = '-'; + struct token_list tokens = tokenize_string_tailfile(str.c_str(), delim); + REQUIRE(tokens.has_non_delimited_token == 0); + REQUIRE(tokens.size == 0); + test_lists_equal(&tokens, {}); +} + +TEST_CASE("Test string tokenizer for string starting and ending with delimited characters", "[testDelimitedStartingEndingStringTokenizer]") { + const std::string str = "---token1---token2---token3"; + char delim = '-'; + struct token_list tokens = tokenize_string_tailfile(str.c_str(), delim); + REQUIRE(tokens.has_non_delimited_token == 0); + REQUIRE(tokens.size == 2); + test_lists_equal(&tokens, std::vector<std::string>{"token1", "token2"}); +} + +/**** + * ################################################################## + * CTAILFILE TAIL TESTS + * ################################################################## + */ + +class FileManager { +public: + FileManager(const std::string& filePath) { + assert(!filePath.empty() && "filePath provided cannot be empty!"); + filePath_ = filePath; + outputStream_.open(filePath_, std::ios::binary); + } + + ~FileManager() { + std::ifstream ifs(filePath_); + if (ifs.good()) { + remove(filePath_.c_str()); + } + } + + void Write(const std::string& str) { + outputStream_ << str; + } + + std::string WriteNChars(uint64_t n, char c) { + std::string s(n, c); + outputStream_ << s; + return s; + } + + std::string getFilePath() const { + return filePath_; + } + + void CloseStream() { + outputStream_.flush(); + outputStream_.close(); + } + + uint64_t GetFileSize() { + CloseStream(); + struct stat buff; + if (stat(filePath_.c_str(), &buff) == 0) { + return buff.st_size; + } + return 0; + } + +private: + std::string filePath_; + std::ofstream outputStream_; +}; + +TEST_CASE("Simple tail file test", "[testTailFile]") { + + FileManager fm("test.txt"); + fm.Write("hello world"); + fm.CloseStream(); + + const char * file = fm.getFilePath().c_str(); + struct token_list tkn_list = tail_file(file, ';', 0); + REQUIRE(tkn_list.size == 0); + REQUIRE(tkn_list.head == NULL); + REQUIRE(tkn_list.total_bytes == 0); +} + +TEST_CASE("Empty file tail test", "[testEmptyFileTail]") { + FileManager fm("test.txt"); + fm.CloseStream(); + + const char * file = fm.getFilePath().c_str(); + struct token_list tkn_list = tail_file(file, ';', 0); + REQUIRE(tkn_list.size == 0); + REQUIRE(tkn_list.head == NULL); + REQUIRE(tkn_list.total_bytes == 0); +} + +TEST_CASE("File containing only delimiters tail test", "[testDelimiterOnlyFileTail]") { + FileManager fm("test.txt"); + fm.Write("----"); + fm.CloseStream(); + + const char * file = fm.getFilePath().c_str(); + struct token_list tkn_list = tail_file(file, '-', 0); + REQUIRE(tkn_list.size == 0); + REQUIRE(tkn_list.head == NULL); + REQUIRE(tkn_list.total_bytes == 4); +} + +TEST_CASE("File tail test string starting with delimiter", "[testDelimiterOnlyFileTail]") { + FileManager fm("test.txt"); + fm.Write("----hello"); + fm.CloseStream(); + + const char * file = fm.getFilePath().c_str(); + struct token_list tkn_list = tail_file(file, '-', 0); + REQUIRE(tkn_list.size == 0); + REQUIRE(tkn_list.head == NULL); + REQUIRE(tkn_list.total_bytes == 4); +} + +TEST_CASE("Test tail file with less than 4096 delimited chars", "[testTailFileDelimitedString]") { + + const std::string delimitedString = "token1--token2--token3"; + FileManager fm("test.txt"); + fm.Write(delimitedString); + const std::string filePath = fm.getFilePath(); + fm.CloseStream(); + + struct token_list tokens = tail_file(filePath.c_str(), '-', 0); + test_lists_equal(&tokens, std::vector<std::string>{"token1", "token2"}); +} + +// Although there is no delimiter within the string that is at least 4096 bytes long, +// tail_file still creates a flow file for the first 4096 bytes. +TEST_CASE("Test tail file having 4096 bytes without delimiter", "[testTailFile4096Chars]") { + + FileManager fm("test.txt"); + const std::string s = std::move(fm.WriteNChars(4096, 'a')); + const std::string filePath = fm.getFilePath(); + fm.CloseStream(); + + struct token_list tokens = tail_file(filePath.c_str(), '-', 0); + test_lists_equal(&tokens, std::vector<std::string>{std::move(s)}); +} + +// Although there is no delimiter within the string that is equal to 4096 bytes or longer +// tail_file creates a flow file for each subsequent 4096 byte chunk. It leaves the last chunk +// if it is smaller than 4096 bytes and not delimited +TEST_CASE("Test tail file having more than 4096 bytes without delimiter", "[testTailFileMoreThan4096Chars]") { + + FileManager fm("test.txt"); + const std::string s1 = std::move(fm.WriteNChars(4096, 'a')); + const std::string s2 = std::move(fm.WriteNChars(4096, 'b')); + const std::string s3 = "helloworld"; + fm.Write(s3); + fm.CloseStream(); + + const uint64_t totalStringsSize = s1.size() + s2.size() + s3.size(); + const std::string filePath = fm.getFilePath(); + const uint64_t bytesWrittenToStream = fm.GetFileSize(); + REQUIRE(bytesWrittenToStream == totalStringsSize); + + struct token_list tokens = tail_file(filePath.c_str(), '-', 0); + test_lists_equal(&tokens, std::vector<std::string>{std::move(s1), std::move(s2)}); +} + +TEST_CASE("Test tail file having more than 4096 bytes with delimiter", "[testTailFileWithDelimitedString]") { + + FileManager fm("test.txt"); + const std::string s1 = std::move(fm.WriteNChars(4096, 'a')); + const std::string d1 = std::move(fm.WriteNChars(2, '-')); + const std::string s2 = std::move(fm.WriteNChars(4096, 'b')); + fm.CloseStream(); + + const uint64_t totalStringsSize = s1.size() + s2.size() + d1.size(); + const std::string filePath = fm.getFilePath(); + const uint64_t bytesWrittenToStream = fm.GetFileSize(); + REQUIRE(bytesWrittenToStream == totalStringsSize); + + struct token_list tokens = tail_file(filePath.c_str(), '-', 0); + test_lists_equal(&tokens, std::vector<std::string>{std::move(s1), std::move(s2)}); +} + +TEST_CASE("Test tail file having more than 4096 bytes with delimiter and second chunk less than 4096", "[testTailFileWithDelimitedString]") { Review comment: Taking another casual look to make it look like I'm useful. I like the breadth of tests! Nice! ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [email protected] With regards, Apache Git Services
