fgerlits commented on a change in pull request #1004:
URL: https://github.com/apache/nifi-minifi-cpp/pull/1004#discussion_r601685853
##########
File path: extensions/sql/processors/QueryDatabaseTable.cpp
##########
@@ -75,361 +72,233 @@ const core::Property
QueryDatabaseTable::s_maxValueColumnNames(
"If no columns are provided, all rows from the table will be considered,
which could have a performance impact. "
"NOTE: It is important to use consistent max-value column names for a
given table for incremental fetch to work properly. "
"NOTE: Because of a limitation of database access library 'soci', which
doesn't support milliseconds in it's 'dt_date', "
- "there is a possibility that flowfiles might have duplicated records, if a
max-value column with 'dt_date' type has value with milliseconds.")->
- supportsExpressionLanguage(true)->build());
+ "there is a possibility that flowfiles might have duplicated records, if a
max-value column with 'dt_date' type has value with milliseconds.")
+ ->supportsExpressionLanguage(true)->build());
-const core::Property QueryDatabaseTable::s_whereClause(
-
core::PropertyBuilder::createProperty("db-fetch-where-clause")->isRequired(false)->withDescription(
- "A custom clause to be added in the WHERE condition when building SQL
queries.")->supportsExpressionLanguage(true)->build());
+const core::Property QueryDatabaseTable::WhereClause(
+ core::PropertyBuilder::createProperty("Where Clause")
+ ->isRequired(false)
+ ->withDescription("A custom clause to be added in the WHERE condition when
building SQL queries.")
+ ->supportsExpressionLanguage(true)->build());
-const core::Property QueryDatabaseTable::s_sqlQuery(
-
core::PropertyBuilder::createProperty("db-fetch-sql-query")->isRequired(false)->withDescription(
- "A custom SQL query used to retrieve data. Instead of building a SQL query
from other properties, this query will be wrapped as a sub-query. "
- "Query must have no ORDER BY
statement.")->supportsExpressionLanguage(true)->build());
+const std::string
QueryDatabaseTable::InitialMaxValueDynamicPropertyPrefix("initial.maxvalue.");
-const core::Property QueryDatabaseTable::s_maxRowsPerFlowFile(
-
core::PropertyBuilder::createProperty("qdbt-max-rows")->isRequired(true)->withDefaultValue<int>(0)->withDescription(
- "The maximum number of result rows that will be included in a single
FlowFile. This will allow you to break up very large result sets into multiple
FlowFiles. "
- "If the value specified is zero, then all rows are returned in a single
FlowFile.")->supportsExpressionLanguage(true)->build());
+const core::Relationship QueryDatabaseTable::Success("success", "Successfully
created FlowFile from SQL query result set.");
-const core::Property QueryDatabaseTable::s_stateDirectory(
- core::PropertyBuilder::createProperty("State
Directory")->isRequired(false)->withDefaultValue("QDTState")->withDescription("DEPRECATED.
Only use it for state migration from the state file, supplying the legacy
state directory.")->build());
+const std::string QueryDatabaseTable::RESULT_TABLE_NAME = "tablename";
+const std::string QueryDatabaseTable::RESULT_ROW_COUNT =
"querydbtable.row.count";
-const std::string
QueryDatabaseTable::s_initialMaxValueDynamicPropertyPrefix("initial.maxvalue.");
-
-const core::Relationship QueryDatabaseTable::s_success("success",
"Successfully created FlowFile from SQL query result set.");
-
-static const std::string ResultTableName = "tablename";
-static const std::string ResultRowCount = "querydbtable.row.count";
-
-static const std::string TABLENAME_KEY = "tablename";
-static const std::string MAXVALUE_KEY_PREFIX = "maxvalue.";
-
-// State
-class LegacyState {
- public:
- LegacyState(const std::string& tableName, const std::string& stateDir, const
std::string& uuid, std::shared_ptr<logging::Logger> logger)
- :tableName_(tableName), logger_(logger) {
-
- filePath_ = utils::file::FileUtils::concat_path(
- utils::file::FileUtils::concat_path(
- utils::file::FileUtils::concat_path(stateDir, "uuid"), uuid),
"State.txt");
-
- if (!getStateFromFile())
- return;
-
- ok_ = true;
- }
-
- explicit operator bool() const {
- return ok_;
- }
-
- const std::unordered_map<std::string, std::string>& getStateMap() const {
- return mapState_;
- }
-
- bool moveStateFileToMigrated() {
- if (!ok_) {
- return false;
- }
- return rename(filePath_.c_str(), (filePath_ + "-migrated").c_str()) == 0;
- }
-
- private:
- static const std::string separator_;
-
- bool getStateFromFile() {
- std::string state;
-
- std::ifstream file(filePath_);
- if (!file) {
- return false;
- }
-
- std::stringstream ss;
- ss << file.rdbuf();
-
- state = ss.str();
-
- file.close();
-
- std::vector<std::string> listColumnNameValue;
-
- size_t pos = state.find(separator_, 0);
- if (pos == std::string::npos) {
- logger_->log_error("Invalid data in '%s' file.", filePath_.c_str());
- mapState_.clear();
- return false;
- }
-
- auto tableName = state.substr(0, pos);
- if (tableName != tableName_) {
- logger_->log_warn("tableName is changed - now: '%s', in State.txt:
'%s'.", tableName_.c_str(), tableName.c_str());
- mapState_.clear();
-
- return false;
- }
-
- pos += separator_.size();
-
- while (true) {
- auto newPos = state.find(separator_, pos);
- if (newPos == std::string::npos)
- break;
-
- const std::string& columnNameValue = state.substr(pos, newPos - pos);
- listColumnNameValue.emplace_back(columnNameValue);
-
- pos = newPos + separator_.size();
- }
-
- for (const auto& columnNameValue : listColumnNameValue) {
- const auto posEQ = columnNameValue.find('=');
- if (posEQ == std::string::npos) {
- logger_->log_error("Invalid data in '%s' file.", filePath_.c_str());
- mapState_.clear();
- return false;
- }
-
- const auto& name = columnNameValue.substr(0, posEQ);
- const auto& value = columnNameValue.substr(posEQ + 1);
-
- mapState_.insert({ name, value });
- }
-
- return true;
- }
-
- private:
- std::unordered_map<std::string, std::string> mapState_;
- std::string filePath_;
- std::string tableName_;
- std::shared_ptr<logging::Logger> logger_;
- bool ok_{};
-};
-
-const std::string LegacyState::separator_ = "@!qdt!@";
+const std::string QueryDatabaseTable::TABLENAME_KEY = "tablename";
+const std::string QueryDatabaseTable::MAXVALUE_KEY_PREFIX = "maxvalue.";
// QueryDatabaseTable
QueryDatabaseTable::QueryDatabaseTable(const std::string& name,
utils::Identifier uuid)
- : SQLProcessor(name, uuid) {
+ : SQLProcessor(name, uuid,
logging::LoggerFactory<QueryDatabaseTable>::getLogger()) {
}
-QueryDatabaseTable::~QueryDatabaseTable() = default;
-
void QueryDatabaseTable::initialize() {
//! Set the supported properties
- setSupportedProperties({ dbControllerService(), outputFormat(), s_tableName,
s_columnNames, s_maxValueColumnNames, s_whereClause, s_sqlQuery,
s_maxRowsPerFlowFile, s_stateDirectory});
+ setSupportedProperties({
+ DBControllerService, OutputFormat, TableName, ColumnNames,
+ MaxValueColumnNames, WhereClause, MaxRowsPerFlowFile});
//! Set the supported relationships
- setSupportedRelationships({ s_success });
+ setSupportedRelationships({ Success });
}
-void QueryDatabaseTable::processOnSchedule(core::ProcessContext &context) {
- initOutputFormat(context);
+void QueryDatabaseTable::processOnSchedule(core::ProcessContext& context) {
+ context.getProperty(OutputFormat.getName(), output_format_);
+ max_rows_ = [&] {
+ uint64_t max_rows;
+ context.getProperty(MaxRowsPerFlowFile.getName(), max_rows);
+ return gsl::narrow<size_t>(max_rows);
+ }();
- context.getProperty(s_tableName.getName(), tableName_);
- context.getProperty(s_columnNames.getName(), columnNames_);
+ state_manager_ = context.getStateManager();
+ if (state_manager_ == nullptr) {
+ throw Exception(PROCESSOR_EXCEPTION, "Failed to get StateManager");
+ }
+
+ context.getProperty(TableName.getName(), table_name_);
+ context.getProperty(WhereClause.getName(), extra_where_clause_);
+ max_value_columns_ = [&] {
+ std::string max_value_columns_str;
+ context.getProperty(MaxValueColumnNames.getName(), max_value_columns_str);
+ return utils::inputStringToList(max_value_columns_str);
+ }();
+ return_columns_ = [&] {
+ std::string return_columns_str;
+ context.getProperty(ColumnNames.getName(), return_columns_str);
+ return utils::inputStringToList(return_columns_str);
+ }();
+ queried_columns_ = utils::StringUtils::join(", ", return_columns_);
+ if (!queried_columns_.empty() && !max_value_columns_.empty()) {
+ // columns will be explicitly enumerated, we need to add the max value
columns
+ queried_columns_ = queried_columns_ + ", " + utils::StringUtils::join(",
", max_value_columns_);
+ }
- context.getProperty(s_maxValueColumnNames.getName(), maxValueColumnNames_);
- listMaxValueColumnName_ = utils::inputStringToList(maxValueColumnNames_);
+ initializeMaxValues(context);
+}
- context.getProperty(s_whereClause.getName(), whereClause_);
- context.getProperty(s_sqlQuery.getName(), sqlQuery_);
- context.getProperty(s_maxRowsPerFlowFile.getName(), maxRowsPerFlowFile_);
+void QueryDatabaseTable::processOnTrigger(core::ProcessContext& /*context*/,
core::ProcessSession& session) {
+ const auto& selectQuery = buildSelectQuery();
- mapState_.clear();
+ logger_->log_info("QueryDatabaseTable: selectQuery: '%s'",
selectQuery.c_str());
- state_manager_ = context.getStateManager();
- if (state_manager_ == nullptr) {
- throw Exception(PROCESSOR_EXCEPTION, "Failed to get StateManager");
+ auto statement = connection_->prepareStatement(selectQuery);
+
+ auto rowset = statement->execute();
+
+ std::unordered_map<std::string, std::string> new_max_values = max_values_;
+ sql::MaxCollector maxCollector{selectQuery, new_max_values};
+ auto column_filter = [&] (const std::string& column_name) {
+ return return_columns_.empty()
+ || std::find(return_columns_.begin(), return_columns_.end(),
column_name) != return_columns_.end();
+ };
+ sql::JSONSQLWriter sqlWriter{output_format_ == OutputType::JSONPretty,
column_filter};
Review comment:
this could be `json_writer`, too
##########
File path: extensions/sql/processors/QueryDatabaseTable.cpp
##########
@@ -75,361 +72,233 @@ const core::Property
QueryDatabaseTable::s_maxValueColumnNames(
"If no columns are provided, all rows from the table will be considered,
which could have a performance impact. "
"NOTE: It is important to use consistent max-value column names for a
given table for incremental fetch to work properly. "
"NOTE: Because of a limitation of database access library 'soci', which
doesn't support milliseconds in it's 'dt_date', "
- "there is a possibility that flowfiles might have duplicated records, if a
max-value column with 'dt_date' type has value with milliseconds.")->
- supportsExpressionLanguage(true)->build());
+ "there is a possibility that flowfiles might have duplicated records, if a
max-value column with 'dt_date' type has value with milliseconds.")
+ ->supportsExpressionLanguage(true)->build());
-const core::Property QueryDatabaseTable::s_whereClause(
-
core::PropertyBuilder::createProperty("db-fetch-where-clause")->isRequired(false)->withDescription(
- "A custom clause to be added in the WHERE condition when building SQL
queries.")->supportsExpressionLanguage(true)->build());
+const core::Property QueryDatabaseTable::WhereClause(
+ core::PropertyBuilder::createProperty("Where Clause")
+ ->isRequired(false)
+ ->withDescription("A custom clause to be added in the WHERE condition when
building SQL queries.")
+ ->supportsExpressionLanguage(true)->build());
-const core::Property QueryDatabaseTable::s_sqlQuery(
-
core::PropertyBuilder::createProperty("db-fetch-sql-query")->isRequired(false)->withDescription(
- "A custom SQL query used to retrieve data. Instead of building a SQL query
from other properties, this query will be wrapped as a sub-query. "
- "Query must have no ORDER BY
statement.")->supportsExpressionLanguage(true)->build());
+const std::string
QueryDatabaseTable::InitialMaxValueDynamicPropertyPrefix("initial.maxvalue.");
-const core::Property QueryDatabaseTable::s_maxRowsPerFlowFile(
-
core::PropertyBuilder::createProperty("qdbt-max-rows")->isRequired(true)->withDefaultValue<int>(0)->withDescription(
- "The maximum number of result rows that will be included in a single
FlowFile. This will allow you to break up very large result sets into multiple
FlowFiles. "
- "If the value specified is zero, then all rows are returned in a single
FlowFile.")->supportsExpressionLanguage(true)->build());
+const core::Relationship QueryDatabaseTable::Success("success", "Successfully
created FlowFile from SQL query result set.");
-const core::Property QueryDatabaseTable::s_stateDirectory(
- core::PropertyBuilder::createProperty("State
Directory")->isRequired(false)->withDefaultValue("QDTState")->withDescription("DEPRECATED.
Only use it for state migration from the state file, supplying the legacy
state directory.")->build());
+const std::string QueryDatabaseTable::RESULT_TABLE_NAME = "tablename";
+const std::string QueryDatabaseTable::RESULT_ROW_COUNT =
"querydbtable.row.count";
-const std::string
QueryDatabaseTable::s_initialMaxValueDynamicPropertyPrefix("initial.maxvalue.");
-
-const core::Relationship QueryDatabaseTable::s_success("success",
"Successfully created FlowFile from SQL query result set.");
-
-static const std::string ResultTableName = "tablename";
-static const std::string ResultRowCount = "querydbtable.row.count";
-
-static const std::string TABLENAME_KEY = "tablename";
-static const std::string MAXVALUE_KEY_PREFIX = "maxvalue.";
-
-// State
-class LegacyState {
- public:
- LegacyState(const std::string& tableName, const std::string& stateDir, const
std::string& uuid, std::shared_ptr<logging::Logger> logger)
- :tableName_(tableName), logger_(logger) {
-
- filePath_ = utils::file::FileUtils::concat_path(
- utils::file::FileUtils::concat_path(
- utils::file::FileUtils::concat_path(stateDir, "uuid"), uuid),
"State.txt");
-
- if (!getStateFromFile())
- return;
-
- ok_ = true;
- }
-
- explicit operator bool() const {
- return ok_;
- }
-
- const std::unordered_map<std::string, std::string>& getStateMap() const {
- return mapState_;
- }
-
- bool moveStateFileToMigrated() {
- if (!ok_) {
- return false;
- }
- return rename(filePath_.c_str(), (filePath_ + "-migrated").c_str()) == 0;
- }
-
- private:
- static const std::string separator_;
-
- bool getStateFromFile() {
- std::string state;
-
- std::ifstream file(filePath_);
- if (!file) {
- return false;
- }
-
- std::stringstream ss;
- ss << file.rdbuf();
-
- state = ss.str();
-
- file.close();
-
- std::vector<std::string> listColumnNameValue;
-
- size_t pos = state.find(separator_, 0);
- if (pos == std::string::npos) {
- logger_->log_error("Invalid data in '%s' file.", filePath_.c_str());
- mapState_.clear();
- return false;
- }
-
- auto tableName = state.substr(0, pos);
- if (tableName != tableName_) {
- logger_->log_warn("tableName is changed - now: '%s', in State.txt:
'%s'.", tableName_.c_str(), tableName.c_str());
- mapState_.clear();
-
- return false;
- }
-
- pos += separator_.size();
-
- while (true) {
- auto newPos = state.find(separator_, pos);
- if (newPos == std::string::npos)
- break;
-
- const std::string& columnNameValue = state.substr(pos, newPos - pos);
- listColumnNameValue.emplace_back(columnNameValue);
-
- pos = newPos + separator_.size();
- }
-
- for (const auto& columnNameValue : listColumnNameValue) {
- const auto posEQ = columnNameValue.find('=');
- if (posEQ == std::string::npos) {
- logger_->log_error("Invalid data in '%s' file.", filePath_.c_str());
- mapState_.clear();
- return false;
- }
-
- const auto& name = columnNameValue.substr(0, posEQ);
- const auto& value = columnNameValue.substr(posEQ + 1);
-
- mapState_.insert({ name, value });
- }
-
- return true;
- }
-
- private:
- std::unordered_map<std::string, std::string> mapState_;
- std::string filePath_;
- std::string tableName_;
- std::shared_ptr<logging::Logger> logger_;
- bool ok_{};
-};
-
-const std::string LegacyState::separator_ = "@!qdt!@";
+const std::string QueryDatabaseTable::TABLENAME_KEY = "tablename";
+const std::string QueryDatabaseTable::MAXVALUE_KEY_PREFIX = "maxvalue.";
// QueryDatabaseTable
QueryDatabaseTable::QueryDatabaseTable(const std::string& name,
utils::Identifier uuid)
- : SQLProcessor(name, uuid) {
+ : SQLProcessor(name, uuid,
logging::LoggerFactory<QueryDatabaseTable>::getLogger()) {
}
-QueryDatabaseTable::~QueryDatabaseTable() = default;
-
void QueryDatabaseTable::initialize() {
//! Set the supported properties
- setSupportedProperties({ dbControllerService(), outputFormat(), s_tableName,
s_columnNames, s_maxValueColumnNames, s_whereClause, s_sqlQuery,
s_maxRowsPerFlowFile, s_stateDirectory});
+ setSupportedProperties({
+ DBControllerService, OutputFormat, TableName, ColumnNames,
+ MaxValueColumnNames, WhereClause, MaxRowsPerFlowFile});
//! Set the supported relationships
- setSupportedRelationships({ s_success });
+ setSupportedRelationships({ Success });
}
-void QueryDatabaseTable::processOnSchedule(core::ProcessContext &context) {
- initOutputFormat(context);
+void QueryDatabaseTable::processOnSchedule(core::ProcessContext& context) {
+ context.getProperty(OutputFormat.getName(), output_format_);
+ max_rows_ = [&] {
+ uint64_t max_rows;
+ context.getProperty(MaxRowsPerFlowFile.getName(), max_rows);
+ return gsl::narrow<size_t>(max_rows);
+ }();
- context.getProperty(s_tableName.getName(), tableName_);
- context.getProperty(s_columnNames.getName(), columnNames_);
+ state_manager_ = context.getStateManager();
+ if (state_manager_ == nullptr) {
+ throw Exception(PROCESSOR_EXCEPTION, "Failed to get StateManager");
+ }
+
+ context.getProperty(TableName.getName(), table_name_);
+ context.getProperty(WhereClause.getName(), extra_where_clause_);
+ max_value_columns_ = [&] {
+ std::string max_value_columns_str;
+ context.getProperty(MaxValueColumnNames.getName(), max_value_columns_str);
+ return utils::inputStringToList(max_value_columns_str);
+ }();
+ return_columns_ = [&] {
+ std::string return_columns_str;
+ context.getProperty(ColumnNames.getName(), return_columns_str);
+ return utils::inputStringToList(return_columns_str);
+ }();
+ queried_columns_ = utils::StringUtils::join(", ", return_columns_);
+ if (!queried_columns_.empty() && !max_value_columns_.empty()) {
+ // columns will be explicitly enumerated, we need to add the max value
columns
+ queried_columns_ = queried_columns_ + ", " + utils::StringUtils::join(",
", max_value_columns_);
+ }
- context.getProperty(s_maxValueColumnNames.getName(), maxValueColumnNames_);
- listMaxValueColumnName_ = utils::inputStringToList(maxValueColumnNames_);
+ initializeMaxValues(context);
+}
- context.getProperty(s_whereClause.getName(), whereClause_);
- context.getProperty(s_sqlQuery.getName(), sqlQuery_);
- context.getProperty(s_maxRowsPerFlowFile.getName(), maxRowsPerFlowFile_);
+void QueryDatabaseTable::processOnTrigger(core::ProcessContext& /*context*/,
core::ProcessSession& session) {
+ const auto& selectQuery = buildSelectQuery();
- mapState_.clear();
+ logger_->log_info("QueryDatabaseTable: selectQuery: '%s'",
selectQuery.c_str());
- state_manager_ = context.getStateManager();
- if (state_manager_ == nullptr) {
- throw Exception(PROCESSOR_EXCEPTION, "Failed to get StateManager");
+ auto statement = connection_->prepareStatement(selectQuery);
+
+ auto rowset = statement->execute();
+
+ std::unordered_map<std::string, std::string> new_max_values = max_values_;
+ sql::MaxCollector maxCollector{selectQuery, new_max_values};
+ auto column_filter = [&] (const std::string& column_name) {
+ return return_columns_.empty()
+ || std::find(return_columns_.begin(), return_columns_.end(),
column_name) != return_columns_.end();
+ };
+ sql::JSONSQLWriter sqlWriter{output_format_ == OutputType::JSONPretty,
column_filter};
+ FlowFileGenerator flow_file_creator{session, sqlWriter};
+ sql::SQLRowsetProcessor sqlRowsetProcessor(rowset, {sqlWriter, maxCollector,
flow_file_creator});
+
+ while (size_t row_count = sqlRowsetProcessor.process(max_rows_)) {
+ auto new_file = flow_file_creator.getLastFlowFile();
+ new_file->addAttribute(RESULT_ROW_COUNT, std::to_string(row_count));
Review comment:
a `gsl_Expects(new_file)` would be useful here, too
##########
File path: extensions/sql/processors/SQLProcessor.cpp
##########
@@ -0,0 +1,95 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "SQLProcessor.h"
+
+#include <vector>
+#include <memory>
+
+#include "core/FlowFile.h"
+#include "core/ProcessContext.h"
+#include "core/ProcessSession.h"
+#include "Exception.h"
+
+#include <soci/error.h>
+
+namespace org {
+namespace apache {
+namespace nifi {
+namespace minifi {
+namespace processors {
+
+const core::Property SQLProcessor::DBControllerService(
+ core::PropertyBuilder::createProperty("DB Controller Service")
+ ->isRequired(true)
+ ->withDescription("Database Controller Service.")
+ ->supportsExpressionLanguage(true)->build());
+
+void SQLProcessor::onSchedule(const std::shared_ptr<core::ProcessContext>&
context, const std::shared_ptr<core::ProcessSessionFactory>&
/*sessionFactory*/) {
+ std::string controllerService;
+ context->getProperty(DBControllerService.getName(), controllerService);
+
+ db_service_ =
std::dynamic_pointer_cast<sql::controllers::DatabaseService>(context->getControllerService(controllerService));
+ if (!db_service_) {
+ throw minifi::Exception(PROCESSOR_EXCEPTION, "'DB Controller Service' must
be defined");
Review comment:
`DBControllerService.getName()` would be better
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]