szaszm commented on a change in pull request #1073: URL: https://github.com/apache/nifi-minifi-cpp/pull/1073#discussion_r644587782
########## File path: libminifi/test/sql-tests/mocks/MockConnectors.cpp ########## @@ -0,0 +1,386 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "MockConnectors.h" + +#include <fstream> +#include <algorithm> +#include <utility> +#include <string> +#include <memory> + +#include "utils/GeneralUtils.h" + +namespace org { +namespace apache { +namespace nifi { +namespace minifi { +namespace sql { + +std::size_t MockRow::size() const { + return column_names_.size(); +} + +std::string MockRow::getColumnName(std::size_t index) const { + return column_names_.at(index); +} + +bool MockRow::isNull(std::size_t index) const { + return column_values_.at(index) == "NULL"; +} + +DataType MockRow::getDataType(std::size_t index) const { + return column_types_.at(index); +} + +std::string MockRow::getString(std::size_t index) const { + return column_values_.at(index); +} + +double MockRow::getDouble(std::size_t index) const { + return std::atof(column_values_.at(index).c_str()); +} + +int MockRow::getInteger(std::size_t index) const { + return std::atoi(column_values_.at(index).c_str()); +} + +long long MockRow::getLongLong(std::size_t index) const { // NOLINT + return std::atoll(column_values_.at(index).c_str()); +} + +unsigned long long MockRow::getUnsignedLongLong(std::size_t index) const { // NOLINT + return static_cast<unsigned long long>(std::atoll(column_values_.at(index).c_str())); // NOLINT +} + +std::tm MockRow::getDate(std::size_t /*index*/) const { + throw std::runtime_error("date not implemented"); +} + +std::vector<std::string> MockRow::getValues() const { + return column_values_; +} + +std::string MockRow::getValue(const std::string& col_name) const { + auto it = std::find(column_names_.begin(), column_names_.end(), col_name); + if (it != column_names_.end()) { + return column_values_.at(it-column_names_.begin()); + } + throw std::runtime_error("Unknown column name for getting value"); +} + +void MockRowset::addRow(const std::vector<std::string>& column_values) { + rows_.emplace_back(column_names_, column_types_, column_values); +} + +void MockRowset::reset() { + current_row_ = rows_.begin(); +} + +bool MockRowset::is_done() { + return current_row_ == rows_.end(); +} + +Row& MockRowset::getCurrent() { + return *current_row_; +} + +void MockRowset::next() { + ++current_row_; +} + +std::vector<std::string> MockRowset::getColumnNames() const { + return column_names_; +} + +std::vector<DataType> MockRowset::getColumnTypes() const { + return column_types_; +} + +std::vector<MockRow> MockRowset::getRows() const { + return rows_; +} + +std::size_t MockRowset::getColumnIndex(const std::string& col_name) const { + auto it = std::find(column_names_.begin(), column_names_.end(), col_name); + if (it != column_names_.end()) { + return it-column_names_.begin(); + } + throw std::runtime_error("Unknown column name for getting index"); +} + +void MockRowset::sort(const std::string& order_by_col, bool order_ascending) { + std::sort(rows_.begin(), rows_.end(), [&](const MockRow& first, const MockRow& second) { + if (order_ascending) { + return first.getValue(order_by_col) < second.getValue(order_by_col); + } else { + return first.getValue(order_by_col) > second.getValue(order_by_col); + } + }); +} + +std::unique_ptr<MockRowset> MockRowset::select(const std::vector<std::string>& cols, const std::function<bool(const MockRow&)>& condition, const std::string& order_by_col, bool order_ascending) { + if (!order_by_col.empty()) { + sort(order_by_col, order_ascending); + } + + std::unique_ptr<MockRowset> rowset; + if (cols.empty()) { + rowset = utils::make_unique<MockRowset>(column_names_, column_types_); + } else { + std::vector<DataType> col_types; + for (const auto& col : cols) { + col_types.push_back(column_types_.at(getColumnIndex(col))); + } + rowset = utils::make_unique<MockRowset>(cols, col_types); + } + + std::vector<std::string> used_cols = cols.empty() ? column_names_ : cols; + for (const auto& row : rows_) { + if (condition(row)) { + std::vector<std::string> values; + for (const auto& col : used_cols) { + values.push_back(row.getValue(col)); + } + rowset->addRow(values); + } + } + + return rowset; +} + +std::unique_ptr<Rowset> MockDB::execute(const std::string& query, const std::vector<std::string>& args) { + if (minifi::utils::StringUtils::startsWith(query, "create table")) { + createTable(query); + } else if (minifi::utils::StringUtils::startsWith(query, "insert into")) { + insertInto(query, args); + } else if (minifi::utils::StringUtils::startsWith(query, "select")) { + return select(query, args); + } else { + throw std::runtime_error("Unknown query type"); + } + + return nullptr; +} + +void MockDB::createTable(const std::string& query) { + std::smatch match; + std::regex expr("create table (\\w+)\\s*\\((.*)\\);"); + std::regex_search(query, match, expr); + std::string table_name = match[1]; + auto columns_with_type = minifi::utils::StringUtils::splitAndTrim(match[2], ","); + std::vector<std::string> col_names; + std::vector<DataType> col_types; + for (const auto& col_with_type : columns_with_type) { + auto splitted = minifi::utils::StringUtils::splitAndTrim(col_with_type, " "); + col_names.push_back(splitted[0]); + col_types.push_back(stringToDataType(splitted[1])); + } + tables_.emplace(table_name, MockRowset{col_names, col_types}); + storeDb(); +} + +void MockDB::insertInto(const std::string& query, const std::vector<std::string>& args) { + std::string replaced_query = query; + for (const auto& arg : args) { + replaced_query = minifi::utils::StringUtils::replaceOne(replaced_query, "?", arg); + } + + std::smatch match; + std::regex expr("insert into (\\w+)\\s*(\\((.*)\\))*\\s*values\\s*\\((.+)\\)"); + std::regex_search(replaced_query, match, expr); + std::string table_name = match[1]; + std::vector<std::string> values = minifi::utils::StringUtils::splitAndTrim(match[4], ","); + for (auto& value : values) { + value = minifi::utils::StringUtils::removeFramingCharacters(value, '\''); + } + auto insert_col_names = minifi::utils::StringUtils::splitAndTrim(match[3], ","); + if (!insert_col_names.empty()) { + auto col_names = tables_.at(table_name).getColumnNames(); + std::vector<std::string> row; + for (const auto& col_name : col_names) { + auto it = std::find(insert_col_names.begin(), insert_col_names.end(), col_name); + if (it != insert_col_names.end()) { + row.push_back(values.at(it-insert_col_names.begin())); + } else { + row.push_back("NULL"); + } + } + tables_.at(table_name).addRow(row); + } else { + tables_.at(table_name).addRow(values); + } + + storeDb(); +} + +std::unique_ptr<Rowset> MockDB::select(const std::string& query, const std::vector<std::string>& args) { + std::string replaced_query = query; + for (const auto& arg : args) { + replaced_query = minifi::utils::StringUtils::replaceOne(replaced_query, "?", arg); + } + + std::smatch match; + std::regex expr("select\\s+(.+)\\s+from\\s+(\\w+)\\s*(where ((.+(?= order by))|.+$))*\\s*(order by (.+))*"); + std::regex_search(replaced_query, match, expr); + auto cols = minifi::utils::StringUtils::splitAndTrim(match[1], ","); + if (cols[0] == "*") { + cols = {}; + } + std::string table_name = match[2]; + std::string condition_str = match[4]; + std::function<bool(const MockRow&)> condition; + if (!condition_str.empty()) { + if (condition_str == "int_col > 103") { Review comment: Two problems with reusing Expression.cpp: 1. It's not available when compiling with expression language support disabled 2. It's parsing nifi expression language, but this looks like SQL to me Not sure what would be the best way to solve this. ########## File path: libminifi/test/sql-tests/mocks/MockConnectors.h ########## @@ -0,0 +1,175 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include <regex> +#include <map> +#include <vector> +#include <algorithm> +#include <memory> +#include <string> + +#include "data/DatabaseConnectors.h" +#include "utils/StringUtils.h" + +namespace org { +namespace apache { +namespace nifi { +namespace minifi { +namespace sql { + +class MockRow : public Row { + public: + MockRow(std::vector<std::string>& column_names, std::vector<DataType>& column_types, const std::vector<std::string>& column_values) + : column_names_(column_names), column_types_(column_types), column_values_(column_values) { + } + + MockRow(MockRow&& other) = default; + MockRow(const MockRow& other) = default; + MockRow& operator=(MockRow&& other) { + column_names_ = other.column_names_; + column_types_ = other.column_types_; Review comment: I'd say just use pointers. Whether you write `*column_names_` or `column_names_.get()` doesn't really matter. Raw pointers are not evil as observer pointers. ########## File path: libminifi/test/sql-tests/mocks/MockConnectors.cpp ########## @@ -0,0 +1,386 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "MockConnectors.h" + +#include <fstream> +#include <algorithm> +#include <utility> +#include <string> +#include <memory> + +#include "utils/GeneralUtils.h" + +namespace org { +namespace apache { +namespace nifi { +namespace minifi { +namespace sql { + +std::size_t MockRow::size() const { + return column_names_.size(); +} + +std::string MockRow::getColumnName(std::size_t index) const { + return column_names_.at(index); +} + +bool MockRow::isNull(std::size_t index) const { + return column_values_.at(index) == "NULL"; +} + +DataType MockRow::getDataType(std::size_t index) const { + return column_types_.at(index); +} + +std::string MockRow::getString(std::size_t index) const { + return column_values_.at(index); +} + +double MockRow::getDouble(std::size_t index) const { + return std::atof(column_values_.at(index).c_str()); +} + +int MockRow::getInteger(std::size_t index) const { + return std::atoi(column_values_.at(index).c_str()); +} + +long long MockRow::getLongLong(std::size_t index) const { // NOLINT + return std::atoll(column_values_.at(index).c_str()); +} + +unsigned long long MockRow::getUnsignedLongLong(std::size_t index) const { // NOLINT + return static_cast<unsigned long long>(std::atoll(column_values_.at(index).c_str())); // NOLINT +} + +std::tm MockRow::getDate(std::size_t /*index*/) const { + throw std::runtime_error("date not implemented"); +} + +std::vector<std::string> MockRow::getValues() const { + return column_values_; +} + +std::string MockRow::getValue(const std::string& col_name) const { + auto it = std::find(column_names_.begin(), column_names_.end(), col_name); + if (it != column_names_.end()) { + return column_values_.at(it-column_names_.begin()); + } + throw std::runtime_error("Unknown column name for getting value"); +} + +void MockRowset::addRow(const std::vector<std::string>& column_values) { + rows_.emplace_back(column_names_, column_types_, column_values); +} + +void MockRowset::reset() { + current_row_ = rows_.begin(); +} + +bool MockRowset::is_done() { + return current_row_ == rows_.end(); +} + +Row& MockRowset::getCurrent() { + return *current_row_; +} + +void MockRowset::next() { + ++current_row_; +} + +std::vector<std::string> MockRowset::getColumnNames() const { + return column_names_; +} + +std::vector<DataType> MockRowset::getColumnTypes() const { + return column_types_; +} + +std::vector<MockRow> MockRowset::getRows() const { + return rows_; +} + +std::size_t MockRowset::getColumnIndex(const std::string& col_name) const { + auto it = std::find(column_names_.begin(), column_names_.end(), col_name); + if (it != column_names_.end()) { + return it-column_names_.begin(); + } + throw std::runtime_error("Unknown column name for getting index"); +} + +void MockRowset::sort(const std::string& order_by_col, bool order_ascending) { + std::sort(rows_.begin(), rows_.end(), [&](const MockRow& first, const MockRow& second) { + if (order_ascending) { + return first.getValue(order_by_col) < second.getValue(order_by_col); + } else { + return first.getValue(order_by_col) > second.getValue(order_by_col); + } + }); +} + +std::unique_ptr<MockRowset> MockRowset::select(const std::vector<std::string>& cols, const std::function<bool(const MockRow&)>& condition, const std::string& order_by_col, bool order_ascending) { + if (!order_by_col.empty()) { + sort(order_by_col, order_ascending); + } + + std::unique_ptr<MockRowset> rowset; + if (cols.empty()) { + rowset = utils::make_unique<MockRowset>(column_names_, column_types_); + } else { + std::vector<DataType> col_types; + for (const auto& col : cols) { + col_types.push_back(column_types_.at(getColumnIndex(col))); + } + rowset = utils::make_unique<MockRowset>(cols, col_types); + } + + std::vector<std::string> used_cols = cols.empty() ? column_names_ : cols; + for (const auto& row : rows_) { + if (condition(row)) { + std::vector<std::string> values; + for (const auto& col : used_cols) { + values.push_back(row.getValue(col)); + } + rowset->addRow(values); + } + } + + return rowset; +} + +std::unique_ptr<Rowset> MockDB::execute(const std::string& query, const std::vector<std::string>& args) { + if (minifi::utils::StringUtils::startsWith(query, "create table")) { + createTable(query); + } else if (minifi::utils::StringUtils::startsWith(query, "insert into")) { + insertInto(query, args); + } else if (minifi::utils::StringUtils::startsWith(query, "select")) { + return select(query, args); + } else { + throw std::runtime_error("Unknown query type"); + } + + return nullptr; +} + +void MockDB::createTable(const std::string& query) { + std::smatch match; + std::regex expr("create table (\\w+)\\s*\\((.*)\\);"); + std::regex_search(query, match, expr); + std::string table_name = match[1]; + auto columns_with_type = minifi::utils::StringUtils::splitAndTrim(match[2], ","); + std::vector<std::string> col_names; + std::vector<DataType> col_types; + for (const auto& col_with_type : columns_with_type) { + auto splitted = minifi::utils::StringUtils::splitAndTrim(col_with_type, " "); + col_names.push_back(splitted[0]); + col_types.push_back(stringToDataType(splitted[1])); + } + tables_.emplace(table_name, MockRowset{col_names, col_types}); + storeDb(); +} + +void MockDB::insertInto(const std::string& query, const std::vector<std::string>& args) { + std::string replaced_query = query; + for (const auto& arg : args) { + replaced_query = minifi::utils::StringUtils::replaceOne(replaced_query, "?", arg); + } + + std::smatch match; + std::regex expr("insert into (\\w+)\\s*(\\((.*)\\))*\\s*values\\s*\\((.+)\\)"); + std::regex_search(replaced_query, match, expr); + std::string table_name = match[1]; + std::vector<std::string> values = minifi::utils::StringUtils::splitAndTrim(match[4], ","); + for (auto& value : values) { + value = minifi::utils::StringUtils::removeFramingCharacters(value, '\''); + } + auto insert_col_names = minifi::utils::StringUtils::splitAndTrim(match[3], ","); + if (!insert_col_names.empty()) { + auto col_names = tables_.at(table_name).getColumnNames(); + std::vector<std::string> row; + for (const auto& col_name : col_names) { + auto it = std::find(insert_col_names.begin(), insert_col_names.end(), col_name); + if (it != insert_col_names.end()) { + row.push_back(values.at(it-insert_col_names.begin())); + } else { + row.push_back("NULL"); + } + } + tables_.at(table_name).addRow(row); + } else { + tables_.at(table_name).addRow(values); + } + + storeDb(); +} + +std::unique_ptr<Rowset> MockDB::select(const std::string& query, const std::vector<std::string>& args) { + std::string replaced_query = query; + for (const auto& arg : args) { + replaced_query = minifi::utils::StringUtils::replaceOne(replaced_query, "?", arg); + } + + std::smatch match; + std::regex expr("select\\s+(.+)\\s+from\\s+(\\w+)\\s*(where ((.+(?= order by))|.+$))*\\s*(order by (.+))*"); + std::regex_search(replaced_query, match, expr); + auto cols = minifi::utils::StringUtils::splitAndTrim(match[1], ","); + if (cols[0] == "*") { + cols = {}; + } + std::string table_name = match[2]; + std::string condition_str = match[4]; + std::function<bool(const MockRow&)> condition; + if (!condition_str.empty()) { + if (condition_str == "int_col > 103") { + condition = [&](const MockRow& row){ return std::atoi(row.getValue("int_col").c_str()) > 103; }; + } else if (condition_str == "int_col > 102") { + condition = [&](const MockRow& row){ return std::atoi(row.getValue("int_col").c_str()) > 102; }; + } else if (condition_str == "int_col = 11") { + condition = [&](const MockRow& row){ return std::atoi(row.getValue("int_col").c_str()) == 11; }; + } else if (condition_str == "int_col = 11 and text_col = banana") { Review comment: Shouldn't "banana" be in apostrophes here? Unless it's another column. ########## File path: extensions/sql/data/DatabaseConnectors.h ########## @@ -20,72 +20,68 @@ #include <memory> #include <string> - -#include <soci/soci.h> - -#include "Utils.h" +#include <vector> +#include <ctime> namespace org { namespace apache { namespace nifi { namespace minifi { namespace sql { -/** - * We do not intend to create an abstract facade here. We know that SOCI is the underlying - * SQL library. We only wish to abstract ODBC specific information - */ +enum class DataType { + STRING, + DOUBLE, + INTEGER, + LONG_LONG, + UNSIGNED_LONG_LONG, + DATE +}; -class Statement { +class Row { public: + virtual ~Row() = default; + virtual std::size_t size() const = 0; + virtual std::string getColumnName(std::size_t index) const = 0; + virtual bool isNull(std::size_t index) const = 0; + virtual DataType getDataType(std::size_t index) const = 0; + virtual std::string getString(std::size_t index) const = 0; + virtual double getDouble(std::size_t index) const = 0; + virtual int getInteger(std::size_t index) const = 0; + virtual long long getLongLong(std::size_t index) const = 0; + virtual unsigned long long getUnsignedLongLong(std::size_t index) const = 0; + virtual std::tm getDate(std::size_t index) const = 0; Review comment: This could be a `std::chrono::time_point` or if you only need the date part, then `date::year_month_day`. These types are easier to work with. ########## File path: libminifi/test/sql-tests/mocks/MockConnectors.h ########## @@ -0,0 +1,175 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include <regex> +#include <map> +#include <vector> +#include <algorithm> +#include <memory> +#include <string> + +#include "data/DatabaseConnectors.h" +#include "utils/StringUtils.h" + +namespace org { +namespace apache { +namespace nifi { +namespace minifi { +namespace sql { + +class MockRow : public Row { + public: + MockRow(std::vector<std::string>& column_names, std::vector<DataType>& column_types, const std::vector<std::string>& column_values) + : column_names_(column_names), column_types_(column_types), column_values_(column_values) { + } + + MockRow(MockRow&& other) = default; + MockRow(const MockRow& other) = default; + MockRow& operator=(MockRow&& other) { + column_names_ = other.column_names_; + column_types_ = other.column_types_; Review comment: I'd say just use pointers. Whether you write `*column_names_` or `column_names_.get()` doesn't really matter. Raw pointers are not evil as observer pointers. edit: looks like I was late with this reply :) -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [email protected]
