Repository: incubator-quickstep Updated Branches: refs/heads/master 612e103cb -> cc9ab901f
Added the an optimization rule that eliminates a Physical node with an empty TableReference into a Selection w/ an temp TableReference. Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/cc9ab901 Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/cc9ab901 Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/cc9ab901 Branch: refs/heads/master Commit: cc9ab901f92880f4dd33c46797381e081241c281 Parents: 612e103 Author: Zuyu Zhang <z...@cs.wisc.edu> Authored: Wed Apr 18 17:15:34 2018 -0500 Committer: Zuyu Zhang <z...@cs.wisc.edu> Committed: Thu Apr 26 00:24:29 2018 -0500 ---------------------------------------------------------------------- cli/tests/CMakeLists.txt | 6 +- cli/tests/CommandExecutorTest.cpp | 2 - cli/tests/CommandExecutorTestRunner.cpp | 31 +- cli/tests/CommandExecutorTestRunner.hpp | 35 +- cli/tests/command_executor/Analyze.test | 136 +++++++ cli/tests/command_executor/CMakeLists.txt | 6 + cli/tests/command_executor/D.test | 1 - cli/tests/command_executor/Dt.test | 1 - query_optimizer/CMakeLists.txt | 1 + query_optimizer/ExecutionGenerator.cpp | 18 + query_optimizer/Optimizer.cpp | 3 +- query_optimizer/PhysicalGenerator.cpp | 15 +- query_optimizer/PhysicalGenerator.hpp | 10 +- query_optimizer/physical/Aggregate.cpp | 20 ++ query_optimizer/physical/Aggregate.hpp | 3 + query_optimizer/physical/CMakeLists.txt | 2 + query_optimizer/physical/HashJoin.cpp | 10 + query_optimizer/physical/HashJoin.hpp | 3 + query_optimizer/physical/NestedLoopsJoin.cpp | 9 + query_optimizer/physical/NestedLoopsJoin.hpp | 3 + query_optimizer/physical/PatternMatcher.hpp | 2 + query_optimizer/physical/Physical.hpp | 6 + query_optimizer/physical/Selection.cpp | 7 + query_optimizer/physical/Selection.hpp | 3 + query_optimizer/physical/UnionAll.hpp | 4 + query_optimizer/rules/CMakeLists.txt | 24 ++ query_optimizer/rules/EliminateEmptyNode.cpp | 358 +++++++++++++++++++ query_optimizer/rules/EliminateEmptyNode.hpp | 70 ++++ .../tests/OptimizerTextTestRunner.cpp | 3 +- query_optimizer/tests/TestDatabaseLoader.cpp | 2 +- 30 files changed, 741 insertions(+), 53 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cc9ab901/cli/tests/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/cli/tests/CMakeLists.txt b/cli/tests/CMakeLists.txt index f402ac0..a59491e 100644 --- a/cli/tests/CMakeLists.txt +++ b/cli/tests/CMakeLists.txt @@ -49,6 +49,7 @@ target_link_libraries(quickstep_cli_tests_CommandExecutorTest gtest quickstep_catalog_CatalogDatabase quickstep_cli_CommandExecutor + quickstep_cli_DefaultsConfigurator quickstep_cli_DropRelation quickstep_cli_PrintToScreen quickstep_parser_ParseStatement @@ -59,10 +60,9 @@ target_link_libraries(quickstep_cli_tests_CommandExecutorTest quickstep_queryexecution_QueryExecutionUtil quickstep_queryexecution_Worker quickstep_queryexecution_WorkerDirectory - quickstep_queryoptimizer_Optimizer - quickstep_queryoptimizer_OptimizerContext quickstep_queryoptimizer_QueryHandle - quickstep_queryoptimizer_tests_TestDatabaseLoader + quickstep_queryoptimizer_QueryProcessor + quickstep_storage_StorageConstants quickstep_utility_Macros quickstep_utility_MemStream quickstep_utility_SqlError http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cc9ab901/cli/tests/CommandExecutorTest.cpp ---------------------------------------------------------------------- diff --git a/cli/tests/CommandExecutorTest.cpp b/cli/tests/CommandExecutorTest.cpp index 6ad2183..a9c64dd 100644 --- a/cli/tests/CommandExecutorTest.cpp +++ b/cli/tests/CommandExecutorTest.cpp @@ -45,8 +45,6 @@ int main(int argc, char** argv) { new quickstep::CommandExecutorTestRunner(argv[3])); test_driver.reset( new quickstep::TextBasedTestDriver(&input_file, test_runner.get())); - test_driver->registerOption( - quickstep::CommandExecutorTestRunner::kResetOption); ::testing::InitGoogleTest(&argc, argv); const int success = RUN_ALL_TESTS(); http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cc9ab901/cli/tests/CommandExecutorTestRunner.cpp ---------------------------------------------------------------------- diff --git a/cli/tests/CommandExecutorTestRunner.cpp b/cli/tests/CommandExecutorTestRunner.cpp index e352b24..4ab94d8 100644 --- a/cli/tests/CommandExecutorTestRunner.cpp +++ b/cli/tests/CommandExecutorTestRunner.cpp @@ -32,9 +32,8 @@ #include "query_execution/AdmitRequestMessage.hpp" #include "query_execution/ForemanSingleNode.hpp" #include "query_execution/QueryExecutionTypedefs.hpp" -#include "query_optimizer/Optimizer.hpp" -#include "query_optimizer/OptimizerContext.hpp" #include "query_optimizer/QueryHandle.hpp" +#include "query_optimizer/QueryProcessor.hpp" #include "utility/MemStream.hpp" #include "utility/SqlError.hpp" @@ -48,9 +47,6 @@ class CatalogRelation; namespace O = ::quickstep::optimizer; -const char CommandExecutorTestRunner::kResetOption[] = - "reset_before_execution"; - void CommandExecutorTestRunner::runTestCase( const std::string &input, const std::set<std::string> &options, std::string *output) { @@ -58,12 +54,6 @@ void CommandExecutorTestRunner::runTestCase( VLOG(4) << "Test SQL(s): " << input; - if (options.find(kResetOption) != options.end()) { - test_database_loader_.clear(); - test_database_loader_.createTestRelation(false /* allow_vchar */); - test_database_loader_.loadTestRelation(); - } - MemStream output_stream; sql_parser_.feedNextBuffer(new std::string(input)); @@ -81,23 +71,18 @@ void CommandExecutorTestRunner::runTestCase( if (parse_statement.getStatementType() == ParseStatement::kCommand) { quickstep::cli::executeCommand( *result.parsed_statement, - *(test_database_loader_.catalog_database()), + *query_processor_->getDefaultDatabase(), main_thread_client_id_, foreman_->getBusClientID(), &bus_, - test_database_loader_.storage_manager(), - nullptr, + &storage_manager_, + query_processor_.get(), output_stream.file()); } else { const CatalogRelation *query_result_relation = nullptr; { auto query_handle = std::make_unique<QueryHandle>(0 /* query_id */, main_thread_client_id_); - O::OptimizerContext optimizer_context; - - optimizer_.generateQueryHandle(parse_statement, - test_database_loader_.catalog_database(), - &optimizer_context, - query_handle.get()); + query_processor_->generateQueryHandle(parse_statement, query_handle.get()); query_result_relation = query_handle->getQueryResultRelation(); QueryExecutionUtil::ConstructAndSendAdmitRequestMessage( @@ -111,11 +96,11 @@ void CommandExecutorTestRunner::runTestCase( DCHECK_EQ(kWorkloadCompletionMessage, tagged_message.message_type()); if (query_result_relation) { PrintToScreen::PrintRelation(*query_result_relation, - test_database_loader_.storage_manager(), + &storage_manager_, output_stream.file()); DropRelation::Drop(*query_result_relation, - test_database_loader_.catalog_database(), - test_database_loader_.storage_manager()); + query_processor_->getDefaultDatabase(), + &storage_manager_); } } } catch (const SqlError &error) { http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cc9ab901/cli/tests/CommandExecutorTestRunner.hpp ---------------------------------------------------------------------- diff --git a/cli/tests/CommandExecutorTestRunner.hpp b/cli/tests/CommandExecutorTestRunner.hpp index 83c5a9a..fb65498 100644 --- a/cli/tests/CommandExecutorTestRunner.hpp +++ b/cli/tests/CommandExecutorTestRunner.hpp @@ -20,12 +20,14 @@ #ifndef QUICKSTEP_CLI_TESTS_COMMAND_EXECUTOR_TEST_RUNNER_HPP_ #define QUICKSTEP_CLI_TESTS_COMMAND_EXECUTOR_TEST_RUNNER_HPP_ +#include <cstdlib> #include <memory> #include <set> #include <string> #include <utility> #include <vector> +#include "cli/DefaultsConfigurator.hpp" #include "parser/SqlParserWrapper.hpp" #include "query_execution/ForemanSingleNode.hpp" #include "query_execution/QueryExecutionTypedefs.hpp" @@ -33,14 +35,16 @@ #include "query_execution/Worker.hpp" #include "query_execution/WorkerDirectory.hpp" #include "query_execution/WorkerMessage.hpp" -#include "query_optimizer/Optimizer.hpp" -#include "query_optimizer/tests/TestDatabaseLoader.hpp" +#include "query_optimizer/QueryProcessor.hpp" +#include "storage/StorageConstants.hpp" #include "utility/Macros.hpp" #include "utility/textbased_test/TextBasedTestDriver.hpp" #include "tmb/id_typedefs.h" #include "tmb/message_bus.h" +#include "glog/logging.h" + namespace quickstep { /** @@ -49,18 +53,13 @@ namespace quickstep { class CommandExecutorTestRunner : public TextBasedTestRunner { public: /** - * @brief If this option is enabled, recreate the entire database and - * repopulate the data before every test. - */ - static const char kResetOption[]; - - /** * @brief Constructor. */ explicit CommandExecutorTestRunner(const std::string &storage_path) - : test_database_loader_(storage_path) { - test_database_loader_.createTestRelation(false /* allow_vchar */); - test_database_loader_.loadTestRelation(); + : catalog_path_(storage_path + kCatalogFilename), + storage_manager_(storage_path) { + DefaultsConfigurator::InitializeDefaultDatabase(storage_path, catalog_path_); + query_processor_ = std::make_unique<QueryProcessor>(std::string(catalog_path_)); bus_.Initialize(); @@ -84,8 +83,8 @@ class CommandExecutorTestRunner : public TextBasedTestRunner { new ForemanSingleNode(main_thread_client_id_, workers_.get(), &bus_, - test_database_loader_.catalog_database(), - test_database_loader_.storage_manager())); + query_processor_->getDefaultDatabase(), + &storage_manager_)); foreman_->start(); worker_->start(); @@ -95,6 +94,10 @@ class CommandExecutorTestRunner : public TextBasedTestRunner { QueryExecutionUtil::BroadcastPoisonMessage(main_thread_client_id_, &bus_); worker_->join(); foreman_->join(); + + const std::string command = "rm -f " + catalog_path_; + CHECK(!std::system(command.c_str())) + << "Failed when attempting to remove catalog proto file: " << catalog_path_; } void runTestCase(const std::string &input, @@ -102,9 +105,11 @@ class CommandExecutorTestRunner : public TextBasedTestRunner { std::string *output) override; private: + const std::string catalog_path_; + SqlParserWrapper sql_parser_; - optimizer::TestDatabaseLoader test_database_loader_; - optimizer::Optimizer optimizer_; + StorageManager storage_manager_; + std::unique_ptr<QueryProcessor> query_processor_; tmb::client_id main_thread_client_id_; http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cc9ab901/cli/tests/command_executor/Analyze.test ---------------------------------------------------------------------- diff --git a/cli/tests/command_executor/Analyze.test b/cli/tests/command_executor/Analyze.test new file mode 100644 index 0000000..57b8312 --- /dev/null +++ b/cli/tests/command_executor/Analyze.test @@ -0,0 +1,136 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +CREATE TABLE r (src INT, dst INT); +CREATE TABLE s (src INT, dst INT); +CREATE TABLE t (src INT, dst INT); + +INSERT INTO s VALUES(0, 0); +INSERT INTO s VALUES(1, 5); + +INSERT INTO t VALUES(0, 0); +INSERT INTO t VALUES(0, 0); +-- +== + +\analyze +-- +Analyzing r ... done +Analyzing s ... done +Analyzing t ... done +== + +SELECT r.src, r.dst FROM r; +-- ++-----------+-----------+ +|src |dst | ++-----------+-----------+ ++-----------+-----------+ +== + +# One side of InnerJoin is empty. +SELECT r.src, r.dst +FROM r, t +WHERE r.src = t.src AND r.dst = t.dst; +-- ++-----------+-----------+ +|src |dst | ++-----------+-----------+ ++-----------+-----------+ +== + +# One side of LeftSemiJoin is empty. +SELECT s.src, s.dst +FROM s +WHERE EXISTS( + SELECT r.src, r.dst FROM r WHERE s.src=r.src AND s.dst=r.dst); +-- ++-----------+-----------+ +|src |dst | ++-----------+-----------+ ++-----------+-----------+ +== + +# One side of LeftAntiJoin is empty. +SELECT s.src, s.dst +FROM s +WHERE NOT EXISTS( + SELECT r.src, r.dst FROM r WHERE s.src=r.src AND s.dst=r.dst); +-- ++-----------+-----------+ +|src |dst | ++-----------+-----------+ +| 0| 0| +| 1| 5| ++-----------+-----------+ +== + +# Union between an empty relation and a non-empty. +SELECT r.src, r.dst FROM r +UNION +SELECT t.src, t.dst FROM t; +-- ++-----------+-----------+ +|src |dst | ++-----------+-----------+ +| 0| 0| ++-----------+-----------+ +== + +# Union All between an empty relation and a non-empty. +SELECT r.src, r.dst FROM r +UNION ALL +SELECT t.src, t.dst FROM t; +-- ++-----------+-----------+ +|src |dst | ++-----------+-----------+ +| 0| 0| +| 0| 0| ++-----------+-----------+ +== + +# Union on two InnerJoins, one of which involves an empty relation. +SELECT r.src, r.dst FROM r, s WHERE r.src=s.src AND r.dst=s.dst +UNION +SELECT s.src, s.dst FROM s, t WHERE s.src=t.src AND s.dst=t.dst; +-- ++-----------+-----------+ +|src |dst | ++-----------+-----------+ +| 0| 0| ++-----------+-----------+ +== + +# Union All on two InnerJoins, one of which involves an empty relation. +SELECT r.src, r.dst FROM r, s WHERE r.src=s.src AND r.dst=s.dst +UNION ALL +SELECT s.src, s.dst FROM s, t WHERE s.src=t.src AND s.dst=t.dst; +-- ++-----------+-----------+ +|src |dst | ++-----------+-----------+ +| 0| 0| +| 0| 0| ++-----------+-----------+ +== + +DROP TABLE r; +DROP TABLE s; +DROP TABLE t; +-- +== http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cc9ab901/cli/tests/command_executor/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/cli/tests/command_executor/CMakeLists.txt b/cli/tests/command_executor/CMakeLists.txt index e62d954..3850de7 100644 --- a/cli/tests/command_executor/CMakeLists.txt +++ b/cli/tests/command_executor/CMakeLists.txt @@ -15,6 +15,11 @@ # specific language governing permissions and limitations # under the License. +add_test(quickstep_cli_tests_commandexecutor_analyze + "../quickstep_cli_tests_CommandExecutorTest" + "${CMAKE_CURRENT_SOURCE_DIR}/Analyze.test" + "${CMAKE_CURRENT_BINARY_DIR}/Analyze.test" + "${CMAKE_CURRENT_BINARY_DIR}/Analyze/") add_test(quickstep_cli_tests_commandexecutor_d "../quickstep_cli_tests_CommandExecutorTest" "${CMAKE_CURRENT_SOURCE_DIR}/D.test" @@ -41,6 +46,7 @@ endif(ENABLE_DISTRIBUTED) # Create the folders where the unit tests will store their data blocks for the # duration of their test. +file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/Analyze) file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/D) file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/Dt) http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cc9ab901/cli/tests/command_executor/D.test ---------------------------------------------------------------------- diff --git a/cli/tests/command_executor/D.test b/cli/tests/command_executor/D.test index c3564a6..2d96b47 100644 --- a/cli/tests/command_executor/D.test +++ b/cli/tests/command_executor/D.test @@ -44,7 +44,6 @@ PARTITION BY HASH(col1) PARTITIONS 4; INSERT INTO foo_hash_part SELECT i, i * 3.0 FROM generate_series(1, 100) AS gs(i); CREATE TABLE averylongtablenamethatseemstoneverend (col1 INT); -DROP TABLE TEST; INSERT INTO averylongtablenamethatseemstoneverend VALUES (1); INSERT INTO averylongtablenamethatseemstoneverend VALUES (2); INSERT INTO averylongtablenamethatseemstoneverend VALUES (3); http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cc9ab901/cli/tests/command_executor/Dt.test ---------------------------------------------------------------------- diff --git a/cli/tests/command_executor/Dt.test b/cli/tests/command_executor/Dt.test index 022cae6..f735fd3 100644 --- a/cli/tests/command_executor/Dt.test +++ b/cli/tests/command_executor/Dt.test @@ -35,7 +35,6 @@ CREATE TABLE foo4 (col1 INT, col3 DOUBLE, col4 FLOAT, col5 CHAR(5)); -DROP TABLE TEST; CREATE TABLE averylongtablenamethatseemstoneverend (col1 INT); INSERT INTO averylongtablenamethatseemstoneverend VALUES (1); INSERT INTO averylongtablenamethatseemstoneverend VALUES (2); http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cc9ab901/query_optimizer/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/query_optimizer/CMakeLists.txt b/query_optimizer/CMakeLists.txt index 9128c63..ab4f6ec 100644 --- a/query_optimizer/CMakeLists.txt +++ b/query_optimizer/CMakeLists.txt @@ -226,6 +226,7 @@ target_link_libraries(quickstep_queryoptimizer_PhysicalGenerator quickstep_queryoptimizer_physical_Physical quickstep_queryoptimizer_rules_AttachLIPFilters quickstep_queryoptimizer_rules_CollapseSelection + quickstep_queryoptimizer_rules_EliminateEmptyNode quickstep_queryoptimizer_rules_ExtractCommonSubexpression quickstep_queryoptimizer_rules_FuseAggregateJoin quickstep_queryoptimizer_rules_FuseHashSelect http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cc9ab901/query_optimizer/ExecutionGenerator.cpp ---------------------------------------------------------------------- diff --git a/query_optimizer/ExecutionGenerator.cpp b/query_optimizer/ExecutionGenerator.cpp index 2cdcffe..8dd93d0 100644 --- a/query_optimizer/ExecutionGenerator.cpp +++ b/query_optimizer/ExecutionGenerator.cpp @@ -25,6 +25,7 @@ #include <functional> #include <memory> #include <numeric> +#include <sstream> #include <string> #include <type_traits> #include <unordered_map> @@ -347,6 +348,14 @@ void ExecutionGenerator::generatePlan(const P::PhysicalPtr &physical_plan) { if (it != physical_to_output_relation_map_.end()) { result_relation = it->second.relation; } + +#ifdef QUICKSTEP_DEBUG + std::unordered_set<relation_id> temp_relations; + for (const CatalogRelationInfo &temporary_relation_info : temporary_relation_info_vec_) { + temp_relations.insert(temporary_relation_info.relation->getID()); + } + DCHECK_EQ(temporary_relation_info_vec_.size(), temp_relations.size()); +#endif } catch (...) { // Drop all temporary relations. dropAllTemporaryRelations(); @@ -784,6 +793,9 @@ void ExecutionGenerator::convertSelection( execution_plan_->addDirectDependency(select_index, input_relation_info->producer_operator_index, false /* is_pipeline_breaker */); + } else if (input_relation.isTemporary()) { + // NOTE(zuyu): drop the temp relation created by EliminateEmptyNode rule. + temporary_relation_info_vec_.emplace_back(select_index, &input_relation); } physical_to_output_relation_map_.emplace( std::piecewise_construct, @@ -1285,6 +1297,9 @@ void ExecutionGenerator::convertCopyTo(const P::CopyToPtr &physical_plan) { execution_plan_->addDirectDependency(table_export_operator_index, input_relation_info->producer_operator_index, false /* is_pipeline_breaker */); + } else if (input_relation->isTemporary()) { + // NOTE(zuyu): drop the temp relation created by EliminateEmptyNode rule. + temporary_relation_info_vec_.emplace_back(table_export_operator_index, input_relation); } } @@ -1639,6 +1654,9 @@ void ExecutionGenerator::convertInsertSelection( execution_plan_->addDirectDependency(insert_selection_index, selection_relation_info->producer_operator_index, false /* is_pipeline_breaker */); + } else if (selection_relation.isTemporary()) { + // NOTE(zuyu): drop the temp relation created by EliminateEmptyNode rule. + temporary_relation_info_vec_.emplace_back(insert_selection_index, &selection_relation); } execution_plan_->addDirectDependency(save_blocks_index, insert_selection_index, http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cc9ab901/query_optimizer/Optimizer.cpp ---------------------------------------------------------------------- diff --git a/query_optimizer/Optimizer.cpp b/query_optimizer/Optimizer.cpp index d002ca1..9e6472f 100644 --- a/query_optimizer/Optimizer.cpp +++ b/query_optimizer/Optimizer.cpp @@ -37,7 +37,8 @@ void Optimizer::generateQueryHandle(const ParseStatement &parse_statement, execution_generator.generatePlan( physical_generator.generatePlan( - logical_generator.generatePlan(*catalog_database, parse_statement))); + logical_generator.generatePlan(*catalog_database, parse_statement), + catalog_database)); } void Optimizer::findReferencedBaseRelations(const ParseStatement &parse_statement, http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cc9ab901/query_optimizer/PhysicalGenerator.cpp ---------------------------------------------------------------------- diff --git a/query_optimizer/PhysicalGenerator.cpp b/query_optimizer/PhysicalGenerator.cpp index 0d15a66..c53707c 100644 --- a/query_optimizer/PhysicalGenerator.cpp +++ b/query_optimizer/PhysicalGenerator.cpp @@ -28,6 +28,7 @@ #include "query_optimizer/physical/Physical.hpp" #include "query_optimizer/rules/AttachLIPFilters.hpp" #include "query_optimizer/rules/CollapseSelection.hpp" +#include "query_optimizer/rules/EliminateEmptyNode.hpp" #include "query_optimizer/rules/ExtractCommonSubexpression.hpp" #include "query_optimizer/rules/FuseAggregateJoin.hpp" #include "query_optimizer/rules/FuseHashSelect.hpp" @@ -64,6 +65,9 @@ DEFINE_bool(reorder_hash_joins, true, "cardinality and selective tables to be joined first, which is suitable " "for queries on star-schema tables."); +DEFINE_bool(use_eliminate_empty_node, true, + "If true, apply an optimization to eliminate joins if at least " + "one side is empty."); DEFINE_bool(use_partition_rule, true, "If true, apply an optimization to support partitioned inputs. The " "optimization may add additional Selection for repartitioning."); @@ -105,9 +109,9 @@ void PhysicalGenerator::createStrategies() { } P::PhysicalPtr PhysicalGenerator::generatePlan( - const L::LogicalPtr &logical_plan) { + const L::LogicalPtr &logical_plan, CatalogDatabase *catalog_database) { physical_plan_ = generateInitialPlan(logical_plan); - return optimizePlan(); + return optimizePlan(catalog_database); } P::PhysicalPtr PhysicalGenerator::generateInitialPlan( @@ -130,10 +134,15 @@ P::PhysicalPtr PhysicalGenerator::generateInitialPlan( return physical_plan; } -P::PhysicalPtr PhysicalGenerator::optimizePlan() { +P::PhysicalPtr PhysicalGenerator::optimizePlan( + CatalogDatabase *catalog_database) { std::vector<std::unique_ptr<Rule<P::Physical>>> rules; rules.emplace_back(new PruneColumns()); + if (FLAGS_use_eliminate_empty_node) { + rules.emplace_back(new EliminateEmptyNode(catalog_database)); + } + rules.emplace_back(new PushDownLowCostDisjunctivePredicate()); rules.emplace_back(new ReduceGroupByAttributes(optimizer_context_)); http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cc9ab901/query_optimizer/PhysicalGenerator.hpp ---------------------------------------------------------------------- diff --git a/query_optimizer/PhysicalGenerator.hpp b/query_optimizer/PhysicalGenerator.hpp index 42fea86..1b6a6a6 100644 --- a/query_optimizer/PhysicalGenerator.hpp +++ b/query_optimizer/PhysicalGenerator.hpp @@ -31,6 +31,9 @@ #include "utility/Macros.hpp" namespace quickstep { + +class CatalogDatabase; + namespace optimizer { class OptimizerContext; @@ -68,9 +71,11 @@ class PhysicalGenerator : public LogicalToPhysicalMapper { * @brief Generates and optimizes a physical plan for \p logical_plan. * * @param logical_plan The logical plan to be converted to a physical plan. + * @param catalog_database The catalog database where this query is executed. * @return The physical plan. */ - physical::PhysicalPtr generatePlan(const logical::LogicalPtr &logical_plan); + physical::PhysicalPtr generatePlan(const logical::LogicalPtr &logical_plan, + CatalogDatabase *catalog_database); /** * @brief Sets the best physical plan for \p logical_plan is \p physical_plan. @@ -106,9 +111,10 @@ class PhysicalGenerator : public LogicalToPhysicalMapper { /** * @brief Applies physical rules to the initial physical plan. * + * @param catalog_database The catalog database where this query is executed. * @return The optimized physical plan. */ - physical::PhysicalPtr optimizePlan(); + physical::PhysicalPtr optimizePlan(CatalogDatabase *catalog_database); std::vector<std::unique_ptr<strategy::Strategy>> strategies_; http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cc9ab901/query_optimizer/physical/Aggregate.cpp ---------------------------------------------------------------------- diff --git a/query_optimizer/physical/Aggregate.cpp b/query_optimizer/physical/Aggregate.cpp index 94be616..5ae9fc7 100644 --- a/query_optimizer/physical/Aggregate.cpp +++ b/query_optimizer/physical/Aggregate.cpp @@ -23,14 +23,18 @@ #include <vector> #include "query_optimizer/OptimizerTree.hpp" +#include "query_optimizer/expressions/Alias.hpp" #include "query_optimizer/expressions/AttributeReference.hpp" #include "query_optimizer/expressions/ExpressionUtil.hpp" #include "query_optimizer/expressions/NamedExpression.hpp" +#include "query_optimizer/expressions/PatternMatcher.hpp" #include "query_optimizer/expressions/Predicate.hpp" #include "query_optimizer/physical/PartitionSchemeHeader.hpp" #include "query_optimizer/physical/Physical.hpp" #include "utility/Cast.hpp" +#include "glog/logging.h" + namespace quickstep { namespace optimizer { namespace physical { @@ -89,6 +93,22 @@ std::vector<E::AttributeReferencePtr> Aggregate::getReferencedAttributes() return referenced_attributes; } +PhysicalPtr Aggregate::copyWithNewProjectExpressions( + const std::vector<E::NamedExpressionPtr> &output_expressions) const { + DCHECK_EQ(aggregate_expressions_.size(), output_expressions.size()); + + std::vector<E::AliasPtr> new_aggregate_expressions; + new_aggregate_expressions.reserve(aggregate_expressions_.size()); + for (const auto &output_expression : output_expressions) { + DCHECK(E::SomeAlias::Matches(output_expression)); + + new_aggregate_expressions.push_back( + std::static_pointer_cast<const E::Alias>(output_expression)); + } + + return Create(input_, grouping_expressions_, new_aggregate_expressions, filter_predicate_); +} + void Aggregate::getFieldStringItems( std::vector<std::string> *inline_field_names, std::vector<std::string> *inline_field_values, http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cc9ab901/query_optimizer/physical/Aggregate.hpp ---------------------------------------------------------------------- diff --git a/query_optimizer/physical/Aggregate.hpp b/query_optimizer/physical/Aggregate.hpp index 68f8811..736082c 100644 --- a/query_optimizer/physical/Aggregate.hpp +++ b/query_optimizer/physical/Aggregate.hpp @@ -103,6 +103,9 @@ class Aggregate : public Physical { has_repartition, partition_scheme_header); } + PhysicalPtr copyWithNewProjectExpressions( + const std::vector<expressions::NamedExpressionPtr> &output_expressions) const override; + bool maybeCopyWithPrunedExpressions( const expressions::UnorderedNamedExpressionSet &referenced_expressions, PhysicalPtr *output) const override { http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cc9ab901/query_optimizer/physical/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/query_optimizer/physical/CMakeLists.txt b/query_optimizer/physical/CMakeLists.txt index a1a72f7..dff8f99 100644 --- a/query_optimizer/physical/CMakeLists.txt +++ b/query_optimizer/physical/CMakeLists.txt @@ -58,6 +58,7 @@ target_link_libraries(quickstep_queryoptimizer_physical_Aggregate quickstep_queryoptimizer_expressions_AttributeReference quickstep_queryoptimizer_expressions_ExpressionUtil quickstep_queryoptimizer_expressions_NamedExpression + quickstep_queryoptimizer_expressions_PatternMatcher quickstep_queryoptimizer_expressions_Predicate quickstep_queryoptimizer_physical_PartitionSchemeHeader quickstep_queryoptimizer_physical_Physical @@ -220,6 +221,7 @@ target_link_libraries(quickstep_queryoptimizer_physical_Physical quickstep_queryoptimizer_OptimizerTree quickstep_queryoptimizer_expressions_AttributeReference quickstep_queryoptimizer_expressions_ExpressionUtil + quickstep_queryoptimizer_expressions_NamedExpression quickstep_queryoptimizer_physical_PartitionSchemeHeader quickstep_queryoptimizer_physical_PhysicalType quickstep_utility_Macros) http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cc9ab901/query_optimizer/physical/HashJoin.cpp ---------------------------------------------------------------------- diff --git a/query_optimizer/physical/HashJoin.cpp b/query_optimizer/physical/HashJoin.cpp index ca6bf9f..7ce80e8 100644 --- a/query_optimizer/physical/HashJoin.cpp +++ b/query_optimizer/physical/HashJoin.cpp @@ -34,6 +34,8 @@ namespace quickstep { namespace optimizer { namespace physical { +namespace E = ::quickstep::optimizer::expressions; + std::vector<expressions::AttributeReferencePtr> HashJoin::getReferencedAttributes() const { std::vector<expressions::AttributeReferencePtr> referenced_attributes; for (const expressions::NamedExpressionPtr &project_expression : @@ -67,6 +69,14 @@ std::vector<expressions::AttributeReferencePtr> HashJoin::getReferencedAttribute return referenced_attributes; } +PhysicalPtr HashJoin::copyWithNewProjectExpressions( + const std::vector<E::NamedExpressionPtr> &output_expressions) const { + DCHECK_EQ(project_expressions().size(), output_expressions.size()); + + return Create(left(), right(), left_join_attributes_, right_join_attributes_, + residual_predicate_, build_predicate_, output_expressions, join_type_); +} + bool HashJoin::maybeCopyWithPrunedExpressions( const expressions::UnorderedNamedExpressionSet &referenced_expressions, PhysicalPtr *output) const { http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cc9ab901/query_optimizer/physical/HashJoin.hpp ---------------------------------------------------------------------- diff --git a/query_optimizer/physical/HashJoin.hpp b/query_optimizer/physical/HashJoin.hpp index 1c341df..1f0df0c 100644 --- a/query_optimizer/physical/HashJoin.hpp +++ b/query_optimizer/physical/HashJoin.hpp @@ -141,6 +141,9 @@ class HashJoin : public BinaryJoin { has_repartition, partition_scheme_header); } + PhysicalPtr copyWithNewProjectExpressions( + const std::vector<expressions::NamedExpressionPtr> &output_expressions) const override; + bool maybeCopyWithPrunedExpressions( const expressions::UnorderedNamedExpressionSet &referenced_expressions, PhysicalPtr *output) const override; http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cc9ab901/query_optimizer/physical/NestedLoopsJoin.cpp ---------------------------------------------------------------------- diff --git a/query_optimizer/physical/NestedLoopsJoin.cpp b/query_optimizer/physical/NestedLoopsJoin.cpp index ba3d223..59e3b00 100644 --- a/query_optimizer/physical/NestedLoopsJoin.cpp +++ b/query_optimizer/physical/NestedLoopsJoin.cpp @@ -31,6 +31,8 @@ namespace quickstep { namespace optimizer { namespace physical { +namespace E = ::quickstep::optimizer::expressions; + std::vector<expressions::AttributeReferencePtr> NestedLoopsJoin::getReferencedAttributes() const { std::vector<expressions::AttributeReferencePtr> referenced_attributes; for (const expressions::NamedExpressionPtr &project_expression : @@ -49,6 +51,13 @@ std::vector<expressions::AttributeReferencePtr> NestedLoopsJoin::getReferencedAt return referenced_attributes; } +PhysicalPtr NestedLoopsJoin::copyWithNewProjectExpressions( + const std::vector<E::NamedExpressionPtr> &output_expressions) const { + DCHECK_EQ(project_expressions().size(), output_expressions.size()); + + return Create(left(), right(), join_predicate_, output_expressions); +} + bool NestedLoopsJoin::maybeCopyWithPrunedExpressions( const expressions::UnorderedNamedExpressionSet &referenced_expressions, PhysicalPtr *output) const { http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cc9ab901/query_optimizer/physical/NestedLoopsJoin.hpp ---------------------------------------------------------------------- diff --git a/query_optimizer/physical/NestedLoopsJoin.hpp b/query_optimizer/physical/NestedLoopsJoin.hpp index 0e0260c..87db5c3 100644 --- a/query_optimizer/physical/NestedLoopsJoin.hpp +++ b/query_optimizer/physical/NestedLoopsJoin.hpp @@ -81,6 +81,9 @@ class NestedLoopsJoin : public BinaryJoin { std::vector<expressions::AttributeReferencePtr> getReferencedAttributes() const override; + PhysicalPtr copyWithNewProjectExpressions( + const std::vector<expressions::NamedExpressionPtr> &output_expressions) const override; + bool maybeCopyWithPrunedExpressions( const expressions::UnorderedNamedExpressionSet &referenced_expressions, PhysicalPtr *output) const override; http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cc9ab901/query_optimizer/physical/PatternMatcher.hpp ---------------------------------------------------------------------- diff --git a/query_optimizer/physical/PatternMatcher.hpp b/query_optimizer/physical/PatternMatcher.hpp index 0204504..8ae7da0 100644 --- a/query_optimizer/physical/PatternMatcher.hpp +++ b/query_optimizer/physical/PatternMatcher.hpp @@ -46,6 +46,7 @@ class SharedSubplanReference; class Sort; class TableReference; class TopLevelPlan; +class UnionAll; class UpdateTable; /** \addtogroup OptimizerPhysical @@ -127,6 +128,7 @@ using SomeSharedSubplanReference = SomePhysicalNode<SharedSubplanReference, Phys using SomeSort = SomePhysicalNode<Sort, PhysicalType::kSort>; using SomeTableReference = SomePhysicalNode<TableReference, PhysicalType::kTableReference>; using SomeTopLevelPlan = SomePhysicalNode<TopLevelPlan, PhysicalType::kTopLevelPlan>; +using SomeUnionAll = SomePhysicalNode<UnionAll, PhysicalType::kUnionAll>; using SomeUpdateTable = SomePhysicalNode<UpdateTable, PhysicalType::kUpdateTable>; /** @} */ http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cc9ab901/query_optimizer/physical/Physical.hpp ---------------------------------------------------------------------- diff --git a/query_optimizer/physical/Physical.hpp b/query_optimizer/physical/Physical.hpp index 4491520..be5d282 100644 --- a/query_optimizer/physical/Physical.hpp +++ b/query_optimizer/physical/Physical.hpp @@ -26,6 +26,7 @@ #include "query_optimizer/OptimizerTree.hpp" #include "query_optimizer/expressions/AttributeReference.hpp" #include "query_optimizer/expressions/ExpressionUtil.hpp" +#include "query_optimizer/expressions/NamedExpression.hpp" #include "query_optimizer/physical/PartitionSchemeHeader.hpp" #include "query_optimizer/physical/PhysicalType.hpp" #include "utility/Macros.hpp" @@ -107,6 +108,11 @@ class Physical : public OptimizerTree<Physical> { LOG(FATAL) << "copyWithNewOutputPartitionSchemeHeader is not implemented for " << getName(); } + virtual PhysicalPtr copyWithNewProjectExpressions( + const std::vector<expressions::NamedExpressionPtr> &output_expressions) const { + LOG(FATAL) << "copyWithNewProjectExpressions is not implemented for " << getName(); + } + /** * @brief Get the partition scheme of the physical plan node. * http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cc9ab901/query_optimizer/physical/Selection.cpp ---------------------------------------------------------------------- diff --git a/query_optimizer/physical/Selection.cpp b/query_optimizer/physical/Selection.cpp index b2b3f7e..bd155ea 100644 --- a/query_optimizer/physical/Selection.cpp +++ b/query_optimizer/physical/Selection.cpp @@ -72,6 +72,13 @@ std::vector<E::AttributeReferencePtr> Selection::getReferencedAttributes() const return referenced_attributes; } +PhysicalPtr Selection::copyWithNewProjectExpressions( + const std::vector<E::NamedExpressionPtr> &output_expressions) const { + DCHECK_EQ(project_expressions_.size(), output_expressions.size()); + + return Create(input(), output_expressions, filter_predicate_); +} + bool Selection::maybeCopyWithPrunedExpressions( const E::UnorderedNamedExpressionSet &referenced_attributes, PhysicalPtr *output) const { http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cc9ab901/query_optimizer/physical/Selection.hpp ---------------------------------------------------------------------- diff --git a/query_optimizer/physical/Selection.hpp b/query_optimizer/physical/Selection.hpp index 2b50061..fb1fa0b 100644 --- a/query_optimizer/physical/Selection.hpp +++ b/query_optimizer/physical/Selection.hpp @@ -91,6 +91,9 @@ class Selection : public Physical { has_repartition, partition_scheme_header); } + PhysicalPtr copyWithNewProjectExpressions( + const std::vector<expressions::NamedExpressionPtr> &output_expressions) const override; + bool maybeCopyWithPrunedExpressions( const expressions::UnorderedNamedExpressionSet &referenced_attributes, PhysicalPtr *output) const override; http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cc9ab901/query_optimizer/physical/UnionAll.hpp ---------------------------------------------------------------------- diff --git a/query_optimizer/physical/UnionAll.hpp b/query_optimizer/physical/UnionAll.hpp index 939249f..943c3c3 100644 --- a/query_optimizer/physical/UnionAll.hpp +++ b/query_optimizer/physical/UnionAll.hpp @@ -132,6 +132,10 @@ class UnionAll : public Physical { return true; } + const std::vector<expressions::AttributeReferencePtr>& project_attributes() const { + return project_attributes_; + } + /** * @brief Creates the physical node of UnionAll. * http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cc9ab901/query_optimizer/rules/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/query_optimizer/rules/CMakeLists.txt b/query_optimizer/rules/CMakeLists.txt index f9cd51c..17ef695 100644 --- a/query_optimizer/rules/CMakeLists.txt +++ b/query_optimizer/rules/CMakeLists.txt @@ -22,6 +22,7 @@ add_library(quickstep_queryoptimizer_rules_AttachLIPFilters AttachLIPFilters.cpp add_library(quickstep_queryoptimizer_rules_BottomUpRule ../../empty_src.cpp BottomUpRule.hpp) add_library(quickstep_queryoptimizer_rules_CollapseProject CollapseProject.cpp CollapseProject.hpp) add_library(quickstep_queryoptimizer_rules_CollapseSelection CollapseSelection.cpp CollapseSelection.hpp) +add_library(quickstep_queryoptimizer_rules_EliminateEmptyNode EliminateEmptyNode.cpp EliminateEmptyNode.hpp) add_library(quickstep_queryoptimizer_rules_ExtractCommonSubexpression ExtractCommonSubexpression.cpp ExtractCommonSubexpression.hpp) @@ -101,6 +102,28 @@ target_link_libraries(quickstep_queryoptimizer_rules_CollapseSelection quickstep_queryoptimizer_rules_BottomUpRule quickstep_queryoptimizer_rules_RuleHelper quickstep_utility_Macros) +target_link_libraries(quickstep_queryoptimizer_rules_EliminateEmptyNode + quickstep_catalog_CatalogAttribute + quickstep_catalog_CatalogDatabase + quickstep_catalog_CatalogRelation + quickstep_catalog_CatalogRelationStatistics + quickstep_queryoptimizer_OptimizerContext + quickstep_queryoptimizer_expressions_Alias + quickstep_queryoptimizer_expressions_AttributeReference + quickstep_queryoptimizer_expressions_ExpressionUtil + quickstep_queryoptimizer_expressions_NamedExpression + quickstep_queryoptimizer_expressions_PatternMatcher + quickstep_queryoptimizer_physical_Aggregate + quickstep_queryoptimizer_physical_HashJoin + quickstep_queryoptimizer_physical_PatternMatcher + quickstep_queryoptimizer_physical_Physical + quickstep_queryoptimizer_physical_PhysicalType + quickstep_queryoptimizer_physical_Selection + quickstep_queryoptimizer_physical_TableReference + quickstep_queryoptimizer_physical_TopLevelPlan + quickstep_queryoptimizer_physical_UnionAll + quickstep_queryoptimizer_rules_Rule + quickstep_utility_Macros) target_link_libraries(quickstep_queryoptimizer_rules_ExtractCommonSubexpression glog quickstep_queryoptimizer_OptimizerContext @@ -434,6 +457,7 @@ target_link_libraries(quickstep_queryoptimizer_rules quickstep_queryoptimizer_rules_BottomUpRule quickstep_queryoptimizer_rules_CollapseProject quickstep_queryoptimizer_rules_CollapseSelection + quickstep_queryoptimizer_rules_EliminateEmptyNode quickstep_queryoptimizer_rules_ExtractCommonSubexpression quickstep_queryoptimizer_rules_FuseAggregateJoin quickstep_queryoptimizer_rules_FuseHashSelect http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cc9ab901/query_optimizer/rules/EliminateEmptyNode.cpp ---------------------------------------------------------------------- diff --git a/query_optimizer/rules/EliminateEmptyNode.cpp b/query_optimizer/rules/EliminateEmptyNode.cpp new file mode 100644 index 0000000..716a167 --- /dev/null +++ b/query_optimizer/rules/EliminateEmptyNode.cpp @@ -0,0 +1,358 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + **/ + +#include "query_optimizer/rules/EliminateEmptyNode.hpp" + +#include <cstddef> +#include <memory> +#include <sstream> +#include <string> +#include <unordered_set> +#include <vector> + +#include "catalog/CatalogAttribute.hpp" +#include "catalog/CatalogDatabase.hpp" +#include "catalog/CatalogRelation.hpp" +#include "catalog/CatalogRelationStatistics.hpp" +#include "query_optimizer/OptimizerContext.hpp" +#include "query_optimizer/expressions/Alias.hpp" +#include "query_optimizer/expressions/AttributeReference.hpp" +#include "query_optimizer/expressions/ExpressionUtil.hpp" +#include "query_optimizer/expressions/NamedExpression.hpp" +#include "query_optimizer/expressions/PatternMatcher.hpp" +#include "query_optimizer/physical/Aggregate.hpp" +#include "query_optimizer/physical/HashJoin.hpp" +#include "query_optimizer/physical/PatternMatcher.hpp" +#include "query_optimizer/physical/Physical.hpp" +#include "query_optimizer/physical/PhysicalType.hpp" +#include "query_optimizer/physical/Selection.hpp" +#include "query_optimizer/physical/TableReference.hpp" +#include "query_optimizer/physical/TopLevelPlan.hpp" +#include "query_optimizer/physical/UnionAll.hpp" + +#include "glog/logging.h" + +namespace quickstep { +namespace optimizer { + +namespace E = expressions; +namespace P = physical; + +namespace { + +std::string GetNewRelationName(const std::size_t id) { + std::ostringstream out; + out << OptimizerContext::kInternalTemporaryRelationNamePrefix << id; + return out.str(); +} + +bool IsTableReferenceOnEmptyRelation(const P::PhysicalPtr &node) { + P::TableReferencePtr table_reference; + if (!P::SomeTableReference::MatchesWithConditionalCast(node, &table_reference)) { + return false; + } + + const CatalogRelationStatistics &stat = table_reference->relation()->getStatistics(); + return stat.isExact() && (stat.getNumTuples() == 0u); +} + +P::PhysicalPtr ApplyToHashJoin(const P::HashJoinPtr &hash_join) { + const P::PhysicalPtr &left = hash_join->left(); + if (IsTableReferenceOnEmptyRelation(left)) { + return nullptr; + } + + const P::PhysicalPtr &right = hash_join->right(); + switch (hash_join->join_type()) { + case P::HashJoin::JoinType::kInnerJoin: + case P::HashJoin::JoinType::kLeftSemiJoin: + if (IsTableReferenceOnEmptyRelation(right)) { + return nullptr; + } + break; + case P::HashJoin::JoinType::kLeftAntiJoin: { + const auto &project_expressions = hash_join->project_expressions(); +#ifdef QUICKSTEP_DEBUG + const auto left_output_attributes = left->getOutputAttributes(); + if (!E::SubsetOfExpressions(project_expressions, left_output_attributes)) { + std::vector<E::NamedExpressionPtr> non_alias_expressions, alias_expressions; + for (const auto &project_expression : project_expressions) { + if (E::SomeAlias::Matches(project_expression)) { + for (const auto referenced_attribute : project_expression->getReferencedAttributes()) { + alias_expressions.push_back(referenced_attribute); + } + } else { + non_alias_expressions.push_back(project_expression); + } + } + + CHECK(E::SubsetOfExpressions(non_alias_expressions, left_output_attributes)); + CHECK(E::SubsetOfExpressions(alias_expressions, left_output_attributes)); + } +#endif + + if (IsTableReferenceOnEmptyRelation(right)) { + if (hash_join->residual_predicate()) { + return nullptr; + } + return P::Selection::Create(left, project_expressions, nullptr); + } + break; + } + case P::HashJoin::JoinType::kLeftOuterJoin: + if (IsTableReferenceOnEmptyRelation(right)) { + if (hash_join->residual_predicate()) { + return nullptr; + } + + const auto &project_expressions = hash_join->project_expressions(); + if (E::SubsetOfExpressions(project_expressions, left->getOutputAttributes())) { + return P::Selection::Create(left, project_expressions, nullptr); + } + } + break; + } + + return hash_join; +} + +P::PhysicalPtr ApplyToNode(const P::PhysicalPtr &node) { + switch (node->getPhysicalType()) { + case P::PhysicalType::kHashJoin: + return ApplyToHashJoin(std::static_pointer_cast<const P::HashJoin>(node)); + case P::PhysicalType::kTableReference: + if (IsTableReferenceOnEmptyRelation(node)) { + return nullptr; + } + break; + default: + break; + } + + return node; +} + + +P::PhysicalPtr CopyWithNewProjectExpressions(const P::UnionAllPtr &union_all, + const P::PhysicalPtr &child) { + const auto &project_attributes = union_all->project_attributes(); + + std::vector<E::NamedExpressionPtr> alias_expressions; + P::AggregatePtr aggregate; + if (P::SomeAggregate::MatchesWithConditionalCast(child, &aggregate)) { + const auto &aggregate_expressions = aggregate->aggregate_expressions(); + alias_expressions.reserve(aggregate_expressions.size()); + + int aid = 0; + for (const auto &project_attribute : project_attributes) { + const auto alias_referenced_attributes = + aggregate_expressions[aid]->getReferencedAttributes(); + DCHECK_EQ(1u, alias_referenced_attributes.size()); + + alias_expressions.emplace_back(E::Alias::Create( + project_attribute->id(), + alias_referenced_attributes.front(), + project_attribute->attribute_name(), + project_attribute->attribute_alias(), + project_attribute->relation_name())); + ++aid; + } + } else { + const auto child_output_attributes = child->getOutputAttributes(); + alias_expressions.reserve(child_output_attributes.size()); + + int aid = 0; + for (const auto &project_attribute : project_attributes) { + alias_expressions.emplace_back(E::Alias::Create( + project_attribute->id(), + child_output_attributes[aid], + project_attribute->attribute_name(), + project_attribute->attribute_alias(), + project_attribute->relation_name())); + ++aid; + } + } + + return child->copyWithNewProjectExpressions(alias_expressions); +} + +// TopDown approach. +P::PhysicalPtr Apply(const P::PhysicalPtr &node) { + DCHECK(node); + + const auto new_node = ApplyToNode(node); + if (new_node == nullptr) { + return nullptr; + } + + std::vector<P::PhysicalPtr> new_children; + bool has_changed_children = false; + for (const auto &child : new_node->children()) { + const auto new_child = Apply(child); + if (new_child == nullptr) { + if (P::SomeUnionAll::Matches(node)) { + has_changed_children = true; + continue; + } + + return nullptr; + } else if (child != new_child && !has_changed_children) { + has_changed_children = true; + } + new_children.push_back(new_child); + } + + if (has_changed_children) { + if (P::SomeUnionAll::Matches(node)) { + switch (new_children.size()) { + case 0u: + return nullptr; + case 1u: + return CopyWithNewProjectExpressions( + std::static_pointer_cast<const P::UnionAll>(node), new_children.front()); + default: + break; + } + } + + DCHECK(!new_children.empty()); + return new_node->copyWithNewChildren(new_children); + } else { + return new_node; + } +} + +} // namespace + +P::PhysicalPtr EliminateEmptyNode::apply(const P::PhysicalPtr &input) { + DCHECK(P::SomeTopLevelPlan::Matches(input)); + const P::TopLevelPlanPtr &top_level_plan = + std::static_pointer_cast<const P::TopLevelPlan>(input); + + // TODO(quickstep-team): handle subquery. + if (!top_level_plan->shared_subplans().empty()) { + return input; + } + + const auto &plan = top_level_plan->plan(); + + P::SelectionPtr selection; + if (P::SomeSelection::MatchesWithConditionalCast(plan, &selection) && + P::SomeTableReference::Matches(selection->input())) { + return input; + } + + const P::PhysicalType plan_type = plan->getPhysicalType(); + std::vector<E::AttributeReferencePtr> project_expressions; + switch (plan_type) { + case P::PhysicalType::kAggregate: + case P::PhysicalType::kCrossReferenceCoalesceAggregate: + case P::PhysicalType::kFilterJoin: + case P::PhysicalType::kHashJoin: + case P::PhysicalType::kNestedLoopsJoin: + case P::PhysicalType::kSample: + case P::PhysicalType::kSelection: + case P::PhysicalType::kSort: + case P::PhysicalType::kUnionAll: + case P::PhysicalType::kWindowAggregate: + project_expressions = plan->getOutputAttributes(); + break; + case P::PhysicalType::kCopyTo: + case P::PhysicalType::kInsertSelection: + project_expressions = plan->getReferencedAttributes(); + break; + case P::PhysicalType::kCopyFrom: + case P::PhysicalType::kCreateIndex: + case P::PhysicalType::kCreateTable: + case P::PhysicalType::kDeleteTuples: + case P::PhysicalType::kDropTable: + case P::PhysicalType::kInsertTuple: + case P::PhysicalType::kSharedSubplanReference: + case P::PhysicalType::kTableGenerator: + case P::PhysicalType::kUpdateTable: + // TODO(quickstep-team): revisit the cases above. + return input; + case P::PhysicalType::kTableReference: + case P::PhysicalType::kTopLevelPlan: + LOG(FATAL) << "Unexpected PhysicalType."; + } + DCHECK(!project_expressions.empty()); + + auto output = Apply(plan); + if (output == plan) { + return input; + } + + if (output) { + return input->copyWithNewChildren({output}); + } + + auto catalog_relation = std::make_unique<CatalogRelation>( + catalog_database_, GetNewRelationName(catalog_database_->size()), -1, true); + + attribute_id aid = 0; + std::unordered_set<std::string> relation_name_alias; + for (const auto &project_expression : project_expressions) { + // The attribute name is simply set to the attribute id to make it distinct. + auto catalog_attribute = std::make_unique<CatalogAttribute>( + catalog_relation.get(), project_expression->attribute_name(), + project_expression->getValueType(), aid, + project_expression->attribute_alias()); + catalog_relation->addAttribute(catalog_attribute.release()); + ++aid; + + relation_name_alias.insert(project_expression->relation_name()); + } + + // TODO(quickstep-team): use Nop physical node. + const auto temp_table = + P::TableReference::Create(catalog_relation.get(), + relation_name_alias.size() == 1u ? *relation_name_alias.begin() : "", + project_expressions); + catalog_database_->addRelation(catalog_relation.release()); + + switch (plan_type) { + case P::PhysicalType::kAggregate: + case P::PhysicalType::kCrossReferenceCoalesceAggregate: + case P::PhysicalType::kFilterJoin: + case P::PhysicalType::kHashJoin: + case P::PhysicalType::kNestedLoopsJoin: + case P::PhysicalType::kSample: + case P::PhysicalType::kSelection: + case P::PhysicalType::kSort: + case P::PhysicalType::kUnionAll: + case P::PhysicalType::kWindowAggregate: + output = P::Selection::Create(temp_table, E::ToNamedExpressions(project_expressions), nullptr); + break; + case P::PhysicalType::kCopyTo: + output = plan->copyWithNewChildren({ temp_table }); + break; + case P::PhysicalType::kInsertSelection: + output = plan->copyWithNewChildren({ plan->children().front(), temp_table }); + break; + default: + LOG(FATAL) << "Unexpected PhysicalType."; + } + + DCHECK(output); + return input->copyWithNewChildren({output}); +} + +} // namespace optimizer +} // namespace quickstep http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cc9ab901/query_optimizer/rules/EliminateEmptyNode.hpp ---------------------------------------------------------------------- diff --git a/query_optimizer/rules/EliminateEmptyNode.hpp b/query_optimizer/rules/EliminateEmptyNode.hpp new file mode 100644 index 0000000..1e8f6fd --- /dev/null +++ b/query_optimizer/rules/EliminateEmptyNode.hpp @@ -0,0 +1,70 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + **/ + +#ifndef QUICKSTEP_QUERY_OPTIMIZER_RULES_ELIMINATE_EMPTY_NODE_HPP_ +#define QUICKSTEP_QUERY_OPTIMIZER_RULES_ELIMINATE_EMPTY_NODE_HPP_ + +#include <string> + +#include "query_optimizer/physical/Physical.hpp" +#include "query_optimizer/rules/Rule.hpp" +#include "utility/Macros.hpp" + +namespace quickstep { + +class CatalogDatabase; + +namespace optimizer { + +/** \addtogroup OptimizerRules + * @{ + */ + +/** + * @brief Rule that applies to a physical plan to eliminate a node with an empty + * TableReference to a Selection on a temp table. + */ +class EliminateEmptyNode : public Rule<physical::Physical> { + public: + /** + * @brief Constructor. + */ + explicit EliminateEmptyNode(CatalogDatabase *catalog_database) + : catalog_database_(catalog_database) {} + + ~EliminateEmptyNode() override {} + + std::string getName() const override { + return "EliminateEmptyNode"; + } + + physical::PhysicalPtr apply(const physical::PhysicalPtr &input) override; + + private: + CatalogDatabase *catalog_database_; + + DISALLOW_COPY_AND_ASSIGN(EliminateEmptyNode); +}; + +/** @} */ + +} // namespace optimizer +} // namespace quickstep + +#endif // QUICKSTEP_QUERY_OPTIMIZER_RULES_ELIMINATE_EMPTY_NODE_HPP_ http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cc9ab901/query_optimizer/tests/OptimizerTextTestRunner.cpp ---------------------------------------------------------------------- diff --git a/query_optimizer/tests/OptimizerTextTestRunner.cpp b/query_optimizer/tests/OptimizerTextTestRunner.cpp index cb8f153..cea529d 100644 --- a/query_optimizer/tests/OptimizerTextTestRunner.cpp +++ b/query_optimizer/tests/OptimizerTextTestRunner.cpp @@ -129,7 +129,8 @@ physical::PhysicalPtr OptimizerTextTestRunner::generatePhysicalPlan( const logical::LogicalPtr &logical_plan, OptimizerContext *optimizer_context) { PhysicalGenerator physical_generator(optimizer_context); - return physical_generator.generatePlan(logical_plan); + return physical_generator.generatePlan(logical_plan, + test_database_loader_.catalog_database()); } } // namespace optimizer http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/cc9ab901/query_optimizer/tests/TestDatabaseLoader.cpp ---------------------------------------------------------------------- diff --git a/query_optimizer/tests/TestDatabaseLoader.cpp b/query_optimizer/tests/TestDatabaseLoader.cpp index f40a3e0..45bb6d3 100644 --- a/query_optimizer/tests/TestDatabaseLoader.cpp +++ b/query_optimizer/tests/TestDatabaseLoader.cpp @@ -51,7 +51,7 @@ CatalogRelation *TestDatabaseLoader::createTestRelation(bool allow_vchar) { catalog_relation.reset(new CatalogRelation(&catalog_database_, "Test" /* name */, 0 /* id */, - true /* temporary */)); + false /* temporary */)); int attr_id = -1; catalog_relation->addAttribute(new CatalogAttribute(catalog_relation.get(), "int_col" /* name */,