Repository: incubator-quickstep Updated Branches: refs/heads/common-subexpression b0acc9ce9 -> c27f5beb8
Updates Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/c27f5beb Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/c27f5beb Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/c27f5beb Branch: refs/heads/common-subexpression Commit: c27f5beb8d473895dfa637c699590f4d4a2408cc Parents: b0acc9c Author: Jianqiao Zhu <jianq...@cs.wisc.edu> Authored: Wed Apr 19 13:45:01 2017 -0500 Committer: Jianqiao Zhu <jianq...@cs.wisc.edu> Committed: Wed Apr 19 13:45:01 2017 -0500 ---------------------------------------------------------------------- query_optimizer/CMakeLists.txt | 4 +- query_optimizer/PhysicalGenerator.cpp | 10 +- query_optimizer/rules/CMakeLists.txt | 49 +++- query_optimizer/rules/CollapseSelection.cpp | 57 ++++ query_optimizer/rules/CollapseSelection.hpp | 59 +++++ .../rules/CommonSubexpressionExtraction.cpp | 264 ------------------- .../rules/CommonSubexpressionExtraction.hpp | 135 ---------- .../rules/ExtractCommonSubexpression.cpp | 264 +++++++++++++++++++ .../rules/ExtractCommonSubexpression.hpp | 135 ++++++++++ .../rules/ReuseAggregateExpressions.cpp | 235 +++++++++++++++++ .../rules/ReuseAggregateExpressions.hpp | 95 +++++++ 11 files changed, 900 insertions(+), 407 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/c27f5beb/query_optimizer/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/query_optimizer/CMakeLists.txt b/query_optimizer/CMakeLists.txt index 9e5a2c8..c969f16 100644 --- a/query_optimizer/CMakeLists.txt +++ b/query_optimizer/CMakeLists.txt @@ -214,13 +214,15 @@ target_link_libraries(quickstep_queryoptimizer_PhysicalGenerator quickstep_queryoptimizer_logical_Logical quickstep_queryoptimizer_physical_Physical quickstep_queryoptimizer_rules_AttachLIPFilters - quickstep_queryoptimizer_rules_CommonSubexpressionExtraction + quickstep_queryoptimizer_rules_CollapseSelection + quickstep_queryoptimizer_rules_ExtractCommonSubexpression quickstep_queryoptimizer_rules_FuseAggregateJoin quickstep_queryoptimizer_rules_InjectJoinFilters quickstep_queryoptimizer_rules_PruneColumns quickstep_queryoptimizer_rules_PushDownLowCostDisjunctivePredicate quickstep_queryoptimizer_rules_ReduceGroupByAttributes quickstep_queryoptimizer_rules_ReorderColumns + quickstep_queryoptimizer_rules_ReuseAggregateExpressions quickstep_queryoptimizer_rules_StarSchemaHashJoinOrderOptimization quickstep_queryoptimizer_rules_SwapProbeBuild quickstep_queryoptimizer_strategy_Aggregate http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/c27f5beb/query_optimizer/PhysicalGenerator.cpp ---------------------------------------------------------------------- diff --git a/query_optimizer/PhysicalGenerator.cpp b/query_optimizer/PhysicalGenerator.cpp index 34deb76..427c365 100644 --- a/query_optimizer/PhysicalGenerator.cpp +++ b/query_optimizer/PhysicalGenerator.cpp @@ -27,13 +27,15 @@ #include "query_optimizer/logical/Logical.hpp" #include "query_optimizer/physical/Physical.hpp" #include "query_optimizer/rules/AttachLIPFilters.hpp" -#include "query_optimizer/rules/CommonSubexpressionExtraction.hpp" +#include "query_optimizer/rules/CollapseSelection.hpp" +#include "query_optimizer/rules/ExtractCommonSubexpression.hpp" #include "query_optimizer/rules/FuseAggregateJoin.hpp" #include "query_optimizer/rules/InjectJoinFilters.hpp" #include "query_optimizer/rules/PruneColumns.hpp" #include "query_optimizer/rules/PushDownLowCostDisjunctivePredicate.hpp" #include "query_optimizer/rules/ReduceGroupByAttributes.hpp" #include "query_optimizer/rules/ReorderColumns.hpp" +#include "query_optimizer/rules/ReuseAggregateExpressions.hpp" #include "query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp" #include "query_optimizer/rules/SwapProbeBuild.hpp" #include "query_optimizer/strategy/Aggregate.hpp" @@ -147,9 +149,13 @@ P::PhysicalPtr PhysicalGenerator::optimizePlan() { rules.emplace_back(new ReorderColumns()); } + rules.emplace_back(new ReuseAggregateExpressions()); + rules.emplace_back(new FuseAggregateJoin()); - rules.emplace_back(new CommonSubexpressionExtraction(optimizer_context_)); + rules.emplace_back(new CollapseSelection()); + + rules.emplace_back(new ExtractCommonSubexpression(optimizer_context_)); // NOTE(jianqiao): Adding rules after InjectJoinFilters (or AttachLIPFilters) requires // extra handling of LIPFilterConfiguration for transformed nodes. So currently it is http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/c27f5beb/query_optimizer/rules/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/query_optimizer/rules/CMakeLists.txt b/query_optimizer/rules/CMakeLists.txt index b2d0f90..dcc2072 100644 --- a/query_optimizer/rules/CMakeLists.txt +++ b/query_optimizer/rules/CMakeLists.txt @@ -21,9 +21,10 @@ add_subdirectory(tests) add_library(quickstep_queryoptimizer_rules_AttachLIPFilters AttachLIPFilters.cpp AttachLIPFilters.hpp) add_library(quickstep_queryoptimizer_rules_BottomUpRule ../../empty_src.cpp BottomUpRule.hpp) add_library(quickstep_queryoptimizer_rules_CollapseProject CollapseProject.cpp CollapseProject.hpp) -add_library(quickstep_queryoptimizer_rules_CommonSubexpressionExtraction - CommonSubexpressionExtraction.cpp - CommonSubexpressionExtraction.hpp) +add_library(quickstep_queryoptimizer_rules_CollapseSelection CollapseSelection.cpp CollapseSelection.hpp) +add_library(quickstep_queryoptimizer_rules_ExtractCommonSubexpression + ExtractCommonSubexpression.cpp + ExtractCommonSubexpression.hpp) add_library(quickstep_queryoptimizer_rules_FuseAggregateJoin FuseAggregateJoin.cpp FuseAggregateJoin.hpp) add_library(quickstep_queryoptimizer_rules_GenerateJoins GenerateJoins.cpp GenerateJoins.hpp) add_library(quickstep_queryoptimizer_rules_InjectJoinFilters InjectJoinFilters.cpp InjectJoinFilters.hpp) @@ -37,6 +38,9 @@ add_library(quickstep_queryoptimizer_rules_ReduceGroupByAttributes ReduceGroupByAttributes.cpp ReduceGroupByAttributes.hpp) add_library(quickstep_queryoptimizer_rules_ReorderColumns ReorderColumns.cpp ReorderColumns.hpp) +add_library(quickstep_queryoptimizer_rules_ReuseAggregateExpressions + ReuseAggregateExpressions.cpp + ReuseAggregateExpressions.hpp) add_library(quickstep_queryoptimizer_rules_Rule ../../empty_src.cpp Rule.hpp) add_library(quickstep_queryoptimizer_rules_RuleHelper RuleHelper.cpp RuleHelper.hpp) add_library(quickstep_queryoptimizer_rules_StarSchemaHashJoinOrderOptimization @@ -80,7 +84,15 @@ target_link_libraries(quickstep_queryoptimizer_rules_CollapseProject quickstep_queryoptimizer_rules_Rule quickstep_queryoptimizer_rules_RuleHelper quickstep_utility_Macros) -target_link_libraries(quickstep_queryoptimizer_rules_CommonSubexpressionExtraction +target_link_libraries(quickstep_queryoptimizer_rules_CollapseSelection + quickstep_queryoptimizer_expressions_NamedExpression + quickstep_queryoptimizer_physical_PatternMatcher + quickstep_queryoptimizer_physical_Physical + quickstep_queryoptimizer_physical_Selection + quickstep_queryoptimizer_rules_BottomUpRule + quickstep_queryoptimizer_rules_RuleHelper + quickstep_utility_Macros) +target_link_libraries(quickstep_queryoptimizer_rules_ExtractCommonSubexpression glog quickstep_queryoptimizer_OptimizerContext quickstep_queryoptimizer_expressions_Alias @@ -220,6 +232,31 @@ target_link_libraries(quickstep_queryoptimizer_rules_ReorderColumns quickstep_queryoptimizer_physical_TableReference quickstep_queryoptimizer_rules_Rule quickstep_utility_Macros) +target_link_libraries(quickstep_queryoptimizer_rules_ReuseAggregateExpressions + glog + quickstep_expressions_aggregation_AggregateFunction + quickstep_expressions_aggregation_AggregationID + quickstep_queryoptimizer_costmodel_StarSchemaSimpleCostModel + quickstep_queryoptimizer_expressions_AggregateFunction + quickstep_queryoptimizer_expressions_Alias + quickstep_queryoptimizer_expressions_AttributeReference + quickstep_queryoptimizer_expressions_BinaryExpression + quickstep_queryoptimizer_expressions_ExpressionType + quickstep_queryoptimizer_expressions_ExpressionUtil + quickstep_queryoptimizer_expressions_NamedExpression + quickstep_queryoptimizer_expressions_Scalar + quickstep_queryoptimizer_physical_Aggregate + quickstep_queryoptimizer_physical_PatternMatcher + quickstep_queryoptimizer_physical_Physical + quickstep_queryoptimizer_physical_PhysicalType + quickstep_queryoptimizer_physical_Selection + quickstep_queryoptimizer_physical_TopLevelPlan + quickstep_queryoptimizer_rules_BottomUpRule + quickstep_types_operations_binaryoperations_BinaryOperation + quickstep_types_operations_binaryoperations_BinaryOperationFactory + quickstep_types_operations_binaryoperations_BinaryOperationID + quickstep_utility_EqualsAnyConstant + quickstep_utility_Macros) target_link_libraries(quickstep_queryoptimizer_rules_Rule glog quickstep_utility_Macros) @@ -331,7 +368,8 @@ target_link_libraries(quickstep_queryoptimizer_rules quickstep_queryoptimizer_rules_AttachLIPFilters quickstep_queryoptimizer_rules_BottomUpRule quickstep_queryoptimizer_rules_CollapseProject - quickstep_queryoptimizer_rules_CommonSubexpressionExtraction + quickstep_queryoptimizer_rules_CollapseSelection + quickstep_queryoptimizer_rules_ExtractCommonSubexpression quickstep_queryoptimizer_rules_FuseAggregateJoin quickstep_queryoptimizer_rules_GenerateJoins quickstep_queryoptimizer_rules_InjectJoinFilters @@ -341,6 +379,7 @@ target_link_libraries(quickstep_queryoptimizer_rules quickstep_queryoptimizer_rules_PushDownSemiAntiJoin quickstep_queryoptimizer_rules_ReduceGroupByAttributes quickstep_queryoptimizer_rules_ReorderColumns + quickstep_queryoptimizer_rules_ReuseAggregateExpressions quickstep_queryoptimizer_rules_Rule quickstep_queryoptimizer_rules_RuleHelper quickstep_queryoptimizer_rules_StarSchemaHashJoinOrderOptimization http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/c27f5beb/query_optimizer/rules/CollapseSelection.cpp ---------------------------------------------------------------------- diff --git a/query_optimizer/rules/CollapseSelection.cpp b/query_optimizer/rules/CollapseSelection.cpp new file mode 100644 index 0000000..a03faa5 --- /dev/null +++ b/query_optimizer/rules/CollapseSelection.cpp @@ -0,0 +1,57 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + **/ + +#include "query_optimizer/rules/CollapseSelection.hpp" + +#include <vector> + +#include "query_optimizer/expressions/NamedExpression.hpp" +#include "query_optimizer/physical/PatternMatcher.hpp" +#include "query_optimizer/physical/Physical.hpp" +#include "query_optimizer/physical/Selection.hpp" +#include "query_optimizer/rules/RuleHelper.hpp" + +namespace quickstep { +namespace optimizer { + +namespace E = ::quickstep::optimizer::expressions; +namespace P = ::quickstep::optimizer::physical; + +P::PhysicalPtr CollapseSelection::applyToNode(const P::PhysicalPtr &input) { + P::SelectionPtr selection; + P::SelectionPtr child_selection; + + if (P::SomeSelection::MatchesWithConditionalCast(input, &selection) && + P::SomeSelection::MatchesWithConditionalCast(selection->input(), &child_selection) && + child_selection->filter_predicate() == nullptr) { + std::vector<E::NamedExpressionPtr> project_expressions = + selection->project_expressions(); + PullUpProjectExpressions(child_selection->project_expressions(), + {} /* non_project_expression_lists */, + {&project_expressions} /* project_expression_lists */); + return P::Selection::Create(child_selection->input(), + project_expressions, + selection->filter_predicate()); + } + + return input; +} + +} // namespace optimizer +} // namespace quickstep http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/c27f5beb/query_optimizer/rules/CollapseSelection.hpp ---------------------------------------------------------------------- diff --git a/query_optimizer/rules/CollapseSelection.hpp b/query_optimizer/rules/CollapseSelection.hpp new file mode 100644 index 0000000..2417574 --- /dev/null +++ b/query_optimizer/rules/CollapseSelection.hpp @@ -0,0 +1,59 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + **/ + +#ifndef QUICKSTEP_QUERY_OPTIMIZER_RULES_COLLAPSE_SELECTION_HPP_ +#define QUICKSTEP_QUERY_OPTIMIZER_RULES_COLLAPSE_SELECTION_HPP_ + +#include <string> + +#include "query_optimizer/physical/Physical.hpp" +#include "query_optimizer/rules/BottomUpRule.hpp" +#include "utility/Macros.hpp" + +namespace quickstep { +namespace optimizer { + +/** \addtogroup OptimizerRules + * @{ + */ + +/** + * @brief Merges nested Selections into one single Selection. + */ +class CollapseSelection : public BottomUpRule<physical::Physical> { + public: + CollapseSelection() {} + + std::string getName() const override { + return "CollapseSelection"; + } + + protected: + physical::PhysicalPtr applyToNode(const physical::PhysicalPtr &input) override; + + private: + DISALLOW_COPY_AND_ASSIGN(CollapseSelection); +}; + +/** @} */ + +} // namespace optimizer +} // namespace quickstep + +#endif // QUICKSTEP_QUERY_OPTIMIZER_RULES_COLLAPSE_SELECTION_HPP_ http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/c27f5beb/query_optimizer/rules/CommonSubexpressionExtraction.cpp ---------------------------------------------------------------------- diff --git a/query_optimizer/rules/CommonSubexpressionExtraction.cpp b/query_optimizer/rules/CommonSubexpressionExtraction.cpp deleted file mode 100644 index b06d9fc..0000000 --- a/query_optimizer/rules/CommonSubexpressionExtraction.cpp +++ /dev/null @@ -1,264 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - **/ - -#include "query_optimizer/rules/CommonSubexpressionExtraction.hpp" - -#include <memory> -#include <unordered_set> -#include <vector> - -#include "query_optimizer/OptimizerContext.hpp" -#include "query_optimizer/expressions/Alias.hpp" -#include "query_optimizer/expressions/CommonSubexpression.hpp" -#include "query_optimizer/expressions/ExpressionType.hpp" -#include "query_optimizer/expressions/NamedExpression.hpp" -#include "query_optimizer/expressions/PatternMatcher.hpp" -#include "query_optimizer/expressions/Scalar.hpp" -#include "query_optimizer/physical/Aggregate.hpp" -#include "query_optimizer/physical/Physical.hpp" -#include "query_optimizer/physical/PhysicalType.hpp" -#include "query_optimizer/physical/Selection.hpp" -#include "utility/HashError.hpp" - -#include "glog/logging.h" - -namespace quickstep { -namespace optimizer { - -namespace E = ::quickstep::optimizer::expressions; -namespace P = ::quickstep::optimizer::physical; - -CommonSubexpressionExtraction::CommonSubexpressionExtraction( - OptimizerContext *optimizer_context) - : optimizer_context_(optimizer_context) { - const std::vector<E::ExpressionType> whitelist = { - E::ExpressionType::kAlias, - E::ExpressionType::kAttributeReference, - E::ExpressionType::kAggregateFunction, - E::ExpressionType::kBinaryExpression, - E::ExpressionType::kCast, - E::ExpressionType::kCommonSubexpression, - E::ExpressionType::kScalarLiteral, - E::ExpressionType::kUnaryExpression - }; - - for (const auto &expr_type : whitelist) { - homogeneous_whitelist_.emplace(static_cast<int>(expr_type)); - } -} - -P::PhysicalPtr CommonSubexpressionExtraction::apply(const P::PhysicalPtr &input) { - DCHECK(input->getPhysicalType() == P::PhysicalType::kTopLevelPlan); - - return applyInternal(input); -} - -P::PhysicalPtr CommonSubexpressionExtraction::applyInternal( - const P::PhysicalPtr &input) { - std::vector<P::PhysicalPtr> new_children; - for (const auto &child : input->children()) { - new_children.emplace_back(applyInternal(child)); - } - - const P::PhysicalPtr node = - new_children == input->children() - ? input - : input->copyWithNewChildren(new_children); - - switch (node->getPhysicalType()) { - case P::PhysicalType::kSelection: { - const P::SelectionPtr selection = - std::static_pointer_cast<const P::Selection>(node); - - const std::vector<E::NamedExpressionPtr> new_expressions = - DownCast<E::NamedExpression>( - transformExpressions(UpCast(selection->project_expressions()))); - - if (new_expressions != selection->project_expressions()) { - return P::Selection::Create(selection->input(), - new_expressions, - selection->filter_predicate()); - } - break; - } - case P::PhysicalType::kAggregate: { - const P::AggregatePtr aggregate = - std::static_pointer_cast<const P::Aggregate>(node); - std::vector<E::ExpressionPtr> expressions = - UpCast(aggregate->aggregate_expressions()); - for (const auto &expr : aggregate->grouping_expressions()) { - expressions.emplace_back(expr); - } - - const std::vector<E::ExpressionPtr> new_expressions = - transformExpressions(expressions); - - if (new_expressions != expressions) { - std::vector<E::AliasPtr> new_aggregate_expressions; - std::vector<E::NamedExpressionPtr> new_grouping_expressions; - const std::size_t num_aggrs = aggregate->aggregate_expressions().size(); - - for (std::size_t i = 0; i < num_aggrs; ++i) { - DCHECK(E::SomeAlias::Matches(new_expressions[i])); - new_aggregate_expressions.emplace_back( - std::static_pointer_cast<const E::Alias>(new_expressions[i])); - } - for (std::size_t i = num_aggrs; i < new_expressions.size(); ++i) { - DCHECK(E::SomeNamedExpression::Matches(new_expressions[i])); - new_grouping_expressions.emplace_back( - std::static_pointer_cast<const E::NamedExpression>(new_expressions[i])); - } - return P::Aggregate::Create(aggregate->input(), - new_grouping_expressions, - new_aggregate_expressions, - aggregate->filter_predicate()); - } - break; - } - default: - break; - } - - return node; -} - -std::vector<E::ExpressionPtr> CommonSubexpressionExtraction::transformExpressions( - const std::vector<E::ExpressionPtr> &expressions) { - ScalarCounter counter; - ScalarHashable hashable; - for (const auto &expr : expressions) { - visitAndCount(expr, &counter, &hashable); - } - - ScalarMap substitution_map; - std::vector<E::ExpressionPtr> new_expressions; - for (const auto &expr : expressions) { - new_expressions.emplace_back( - visitAndTransform(expr, 1, counter, hashable, &substitution_map)); - } - return new_expressions; -} - -E::ExpressionPtr CommonSubexpressionExtraction::transformExpression( - const E::ExpressionPtr &expression) { - return transformExpressions({expression}).front(); -} - -bool CommonSubexpressionExtraction::visitAndCount( - const E::ExpressionPtr &expression, - ScalarCounter *counter, - ScalarHashable *hashable) { - bool children_hashable = true; - - const auto homogeneous_whitelist_it = - homogeneous_whitelist_.find(static_cast<int>(expression->getExpressionType())); - if (homogeneous_whitelist_it != homogeneous_whitelist_.end()) { - for (const auto &child : expression->children()) { - children_hashable &= visitAndCount(child, counter, hashable); - } - } - - E::ScalarPtr scalar; - if (children_hashable && - E::SomeScalar::MatchesWithConditionalCast(expression, &scalar)) { - try { - ++(*counter)[scalar]; - } catch (const HashNotSupported &e) { - return false; - } - hashable->emplace(scalar); - return true; - } - return false; -} - -E::ExpressionPtr CommonSubexpressionExtraction::visitAndTransform( - const E::ExpressionPtr &expression, - const std::size_t max_reference_count, - const ScalarCounter &counter, - const ScalarHashable &hashable, - ScalarMap *substitution_map) { - // TODO: figure out whether it is beneficial. - if (expression->getExpressionType() == E::ExpressionType::kScalarLiteral || - expression->getExpressionType() == E::ExpressionType::kAttributeReference) { - return expression; - } - - E::ScalarPtr scalar; - const bool is_hashable = - E::SomeScalar::MatchesWithConditionalCast(expression, &scalar) - && hashable.find(scalar) != hashable.end(); - - std::size_t new_max_reference_count; - if (is_hashable) { - // CommonSubexpression node already generated. - const auto substitution_map_it = substitution_map->find(scalar); - if (substitution_map_it != substitution_map->end()) { - return substitution_map_it->second; - } - - const auto counter_it = counter.find(scalar); - DCHECK(counter_it != counter.end()); - DCHECK_LE(max_reference_count, counter_it->second); - new_max_reference_count = counter_it->second; - } else { - new_max_reference_count = max_reference_count; - } - - std::vector<E::ExpressionPtr> new_children; - const auto homogeneous_whitelist_it = - homogeneous_whitelist_.find(static_cast<int>(expression->getExpressionType())); - if (homogeneous_whitelist_it == homogeneous_whitelist_.end()) { - for (const auto &child : expression->children()) { - new_children.emplace_back(transformExpression(child)); - } - } else { - for (const auto &child : expression->children()) { - new_children.emplace_back( - visitAndTransform(child, - new_max_reference_count, - counter, - hashable, - substitution_map)); - } - } - - E::ExpressionPtr output; - if (new_children == expression->children()) { - output = expression; - } else { - output = std::static_pointer_cast<const E::Scalar>( - expression->copyWithNewChildren(new_children)); - } - - if (is_hashable && new_max_reference_count > max_reference_count) { - DCHECK(E::SomeScalar::Matches(output)); - const E::CommonSubexpressionPtr common_subexpression = - E::CommonSubexpression::Create( - optimizer_context_->nextExprId(), - std::static_pointer_cast<const E::Scalar>(output)); - substitution_map->emplace(scalar, common_subexpression); - output = common_subexpression; - } - - return output; -} - -} // namespace optimizer -} // namespace quickstep http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/c27f5beb/query_optimizer/rules/CommonSubexpressionExtraction.hpp ---------------------------------------------------------------------- diff --git a/query_optimizer/rules/CommonSubexpressionExtraction.hpp b/query_optimizer/rules/CommonSubexpressionExtraction.hpp deleted file mode 100644 index 121552c..0000000 --- a/query_optimizer/rules/CommonSubexpressionExtraction.hpp +++ /dev/null @@ -1,135 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - **/ - -#ifndef QUICKSTEP_QUERY_OPTIMIZER_RULES_COMMON_SUBEXPRESSION_EXTRACTION_HPP_ -#define QUICKSTEP_QUERY_OPTIMIZER_RULES_COMMON_SUBEXPRESSION_EXTRACTION_HPP_ - -#include <memory> -#include <string> -#include <unordered_map> -#include <unordered_set> - -#include "query_optimizer/expressions/CommonSubexpression.hpp" -#include "query_optimizer/expressions/Expression.hpp" -#include "query_optimizer/expressions/Scalar.hpp" -#include "query_optimizer/physical/Physical.hpp" -#include "query_optimizer/rules/Rule.hpp" -#include "utility/Macros.hpp" - -namespace quickstep { -namespace optimizer { - -class OptimizerContext; - -/** \addtogroup OptimizerRules - * @{ - */ - -class CommonSubexpressionExtraction : public Rule<physical::Physical> { - public: - /** - * @brief Constructor. - */ - CommonSubexpressionExtraction(OptimizerContext *optimizer_context); - - ~CommonSubexpressionExtraction() override {} - - std::string getName() const override { - return "CommonSubexpressionExtraction"; - } - - physical::PhysicalPtr apply(const physical::PhysicalPtr &input) override; - - private: - physical::PhysicalPtr applyInternal(const physical::PhysicalPtr &input); - - struct ScalarHash { - inline std::size_t operator()(const expressions::ScalarPtr &scalar) const { - return scalar->hash(); - } - }; - - struct ScalarEqual { - inline bool operator()(const expressions::ScalarPtr &lhs, - const expressions::ScalarPtr &rhs) const { - return lhs->equals(rhs); - } - }; - - using ScalarCounter = - std::unordered_map<expressions::ScalarPtr, std::size_t, ScalarHash, ScalarEqual>; - - using ScalarMap = - std::unordered_map<expressions::ScalarPtr, - expressions::CommonSubexpressionPtr, - ScalarHash, - ScalarEqual>; - - using ScalarHashable = std::unordered_set<expressions::ScalarPtr>; - - std::vector<expressions::ExpressionPtr> transformExpressions( - const std::vector<expressions::ExpressionPtr> &expressions); - - expressions::ExpressionPtr transformExpression( - const expressions::ExpressionPtr &expression); - - bool visitAndCount( - const expressions::ExpressionPtr &expression, - ScalarCounter *counter, - ScalarHashable *hashable); - - expressions::ExpressionPtr visitAndTransform( - const expressions::ExpressionPtr &expression, - const std::size_t max_reference_count, - const ScalarCounter &counter, - const ScalarHashable &hashable, - ScalarMap *substitution_map); - - template <typename ScalarSubclassT> - static std::vector<expressions::ExpressionPtr> UpCast( - const std::vector<std::shared_ptr<const ScalarSubclassT>> &expressions) { - std::vector<expressions::ExpressionPtr> output; - for (const auto &expr : expressions) { - output.emplace_back(expr); - } - return output; - } - - template <typename ScalarSubclassT> - static std::vector<std::shared_ptr<const ScalarSubclassT>> DownCast( - const std::vector<expressions::ExpressionPtr> &expressions) { - std::vector<std::shared_ptr<const ScalarSubclassT>> output; - for (const auto &expr : expressions) { - output.emplace_back(std::static_pointer_cast<const ScalarSubclassT>(expr)); - } - return output; - } - - OptimizerContext *optimizer_context_; - std::unordered_set<int> homogeneous_whitelist_; - - DISALLOW_COPY_AND_ASSIGN(CommonSubexpressionExtraction); -}; - -/** @} */ - -} // namespace optimizer -} // namespace quickstep - -#endif // QUICKSTEP_QUERY_OPTIMIZER_RULES_COMMON_SUBEXPRESSION_EXTRACTION_HPP_ http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/c27f5beb/query_optimizer/rules/ExtractCommonSubexpression.cpp ---------------------------------------------------------------------- diff --git a/query_optimizer/rules/ExtractCommonSubexpression.cpp b/query_optimizer/rules/ExtractCommonSubexpression.cpp new file mode 100644 index 0000000..ad2aec1 --- /dev/null +++ b/query_optimizer/rules/ExtractCommonSubexpression.cpp @@ -0,0 +1,264 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + **/ + +#include "query_optimizer/rules/ExtractCommonSubexpression.hpp" + +#include <memory> +#include <unordered_set> +#include <vector> + +#include "query_optimizer/OptimizerContext.hpp" +#include "query_optimizer/expressions/Alias.hpp" +#include "query_optimizer/expressions/CommonSubexpression.hpp" +#include "query_optimizer/expressions/ExpressionType.hpp" +#include "query_optimizer/expressions/NamedExpression.hpp" +#include "query_optimizer/expressions/PatternMatcher.hpp" +#include "query_optimizer/expressions/Scalar.hpp" +#include "query_optimizer/physical/Aggregate.hpp" +#include "query_optimizer/physical/Physical.hpp" +#include "query_optimizer/physical/PhysicalType.hpp" +#include "query_optimizer/physical/Selection.hpp" +#include "utility/HashError.hpp" + +#include "glog/logging.h" + +namespace quickstep { +namespace optimizer { + +namespace E = ::quickstep::optimizer::expressions; +namespace P = ::quickstep::optimizer::physical; + +ExtractCommonSubexpression::ExtractCommonSubexpression( + OptimizerContext *optimizer_context) + : optimizer_context_(optimizer_context) { + const std::vector<E::ExpressionType> whitelist = { + E::ExpressionType::kAlias, + E::ExpressionType::kAttributeReference, + E::ExpressionType::kAggregateFunction, + E::ExpressionType::kBinaryExpression, + E::ExpressionType::kCast, + E::ExpressionType::kCommonSubexpression, + E::ExpressionType::kScalarLiteral, + E::ExpressionType::kUnaryExpression + }; + + for (const auto &expr_type : whitelist) { + homogeneous_whitelist_.emplace(static_cast<int>(expr_type)); + } +} + +P::PhysicalPtr ExtractCommonSubexpression::apply(const P::PhysicalPtr &input) { + DCHECK(input->getPhysicalType() == P::PhysicalType::kTopLevelPlan); + + return applyInternal(input); +} + +P::PhysicalPtr ExtractCommonSubexpression::applyInternal( + const P::PhysicalPtr &input) { + std::vector<P::PhysicalPtr> new_children; + for (const auto &child : input->children()) { + new_children.emplace_back(applyInternal(child)); + } + + const P::PhysicalPtr node = + new_children == input->children() + ? input + : input->copyWithNewChildren(new_children); + + switch (node->getPhysicalType()) { + case P::PhysicalType::kSelection: { + const P::SelectionPtr selection = + std::static_pointer_cast<const P::Selection>(node); + + const std::vector<E::NamedExpressionPtr> new_expressions = + DownCast<E::NamedExpression>( + transformExpressions(UpCast(selection->project_expressions()))); + + if (new_expressions != selection->project_expressions()) { + return P::Selection::Create(selection->input(), + new_expressions, + selection->filter_predicate()); + } + break; + } + case P::PhysicalType::kAggregate: { + const P::AggregatePtr aggregate = + std::static_pointer_cast<const P::Aggregate>(node); + std::vector<E::ExpressionPtr> expressions = + UpCast(aggregate->aggregate_expressions()); + for (const auto &expr : aggregate->grouping_expressions()) { + expressions.emplace_back(expr); + } + + const std::vector<E::ExpressionPtr> new_expressions = + transformExpressions(expressions); + + if (new_expressions != expressions) { + std::vector<E::AliasPtr> new_aggregate_expressions; + std::vector<E::NamedExpressionPtr> new_grouping_expressions; + const std::size_t num_aggrs = aggregate->aggregate_expressions().size(); + + for (std::size_t i = 0; i < num_aggrs; ++i) { + DCHECK(E::SomeAlias::Matches(new_expressions[i])); + new_aggregate_expressions.emplace_back( + std::static_pointer_cast<const E::Alias>(new_expressions[i])); + } + for (std::size_t i = num_aggrs; i < new_expressions.size(); ++i) { + DCHECK(E::SomeNamedExpression::Matches(new_expressions[i])); + new_grouping_expressions.emplace_back( + std::static_pointer_cast<const E::NamedExpression>(new_expressions[i])); + } + return P::Aggregate::Create(aggregate->input(), + new_grouping_expressions, + new_aggregate_expressions, + aggregate->filter_predicate()); + } + break; + } + default: + break; + } + + return node; +} + +std::vector<E::ExpressionPtr> ExtractCommonSubexpression::transformExpressions( + const std::vector<E::ExpressionPtr> &expressions) { + ScalarCounter counter; + ScalarHashable hashable; + for (const auto &expr : expressions) { + visitAndCount(expr, &counter, &hashable); + } + + ScalarMap substitution_map; + std::vector<E::ExpressionPtr> new_expressions; + for (const auto &expr : expressions) { + new_expressions.emplace_back( + visitAndTransform(expr, 1, counter, hashable, &substitution_map)); + } + return new_expressions; +} + +E::ExpressionPtr ExtractCommonSubexpression::transformExpression( + const E::ExpressionPtr &expression) { + return transformExpressions({expression}).front(); +} + +bool ExtractCommonSubexpression::visitAndCount( + const E::ExpressionPtr &expression, + ScalarCounter *counter, + ScalarHashable *hashable) { + bool children_hashable = true; + + const auto homogeneous_whitelist_it = + homogeneous_whitelist_.find(static_cast<int>(expression->getExpressionType())); + if (homogeneous_whitelist_it != homogeneous_whitelist_.end()) { + for (const auto &child : expression->children()) { + children_hashable &= visitAndCount(child, counter, hashable); + } + } + + E::ScalarPtr scalar; + if (children_hashable && + E::SomeScalar::MatchesWithConditionalCast(expression, &scalar)) { + try { + ++(*counter)[scalar]; + } catch (const HashNotSupported &e) { + return false; + } + hashable->emplace(scalar); + return true; + } + return false; +} + +E::ExpressionPtr ExtractCommonSubexpression::visitAndTransform( + const E::ExpressionPtr &expression, + const std::size_t max_reference_count, + const ScalarCounter &counter, + const ScalarHashable &hashable, + ScalarMap *substitution_map) { + // TODO: figure out whether it is beneficial. + if (expression->getExpressionType() == E::ExpressionType::kScalarLiteral || + expression->getExpressionType() == E::ExpressionType::kAttributeReference) { + return expression; + } + + E::ScalarPtr scalar; + const bool is_hashable = + E::SomeScalar::MatchesWithConditionalCast(expression, &scalar) + && hashable.find(scalar) != hashable.end(); + + std::size_t new_max_reference_count; + if (is_hashable) { + // CommonSubexpression node already generated. + const auto substitution_map_it = substitution_map->find(scalar); + if (substitution_map_it != substitution_map->end()) { + return substitution_map_it->second; + } + + const auto counter_it = counter.find(scalar); + DCHECK(counter_it != counter.end()); + DCHECK_LE(max_reference_count, counter_it->second); + new_max_reference_count = counter_it->second; + } else { + new_max_reference_count = max_reference_count; + } + + std::vector<E::ExpressionPtr> new_children; + const auto homogeneous_whitelist_it = + homogeneous_whitelist_.find(static_cast<int>(expression->getExpressionType())); + if (homogeneous_whitelist_it == homogeneous_whitelist_.end()) { + for (const auto &child : expression->children()) { + new_children.emplace_back(transformExpression(child)); + } + } else { + for (const auto &child : expression->children()) { + new_children.emplace_back( + visitAndTransform(child, + new_max_reference_count, + counter, + hashable, + substitution_map)); + } + } + + E::ExpressionPtr output; + if (new_children == expression->children()) { + output = expression; + } else { + output = std::static_pointer_cast<const E::Scalar>( + expression->copyWithNewChildren(new_children)); + } + + if (is_hashable && new_max_reference_count > max_reference_count) { + DCHECK(E::SomeScalar::Matches(output)); + const E::CommonSubexpressionPtr common_subexpression = + E::CommonSubexpression::Create( + optimizer_context_->nextExprId(), + std::static_pointer_cast<const E::Scalar>(output)); + substitution_map->emplace(scalar, common_subexpression); + output = common_subexpression; + } + + return output; +} + +} // namespace optimizer +} // namespace quickstep http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/c27f5beb/query_optimizer/rules/ExtractCommonSubexpression.hpp ---------------------------------------------------------------------- diff --git a/query_optimizer/rules/ExtractCommonSubexpression.hpp b/query_optimizer/rules/ExtractCommonSubexpression.hpp new file mode 100644 index 0000000..0b63b7e --- /dev/null +++ b/query_optimizer/rules/ExtractCommonSubexpression.hpp @@ -0,0 +1,135 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + **/ + +#ifndef QUICKSTEP_QUERY_OPTIMIZER_RULES_EXTRACT_COMMON_SUBEXPRESSION_HPP_ +#define QUICKSTEP_QUERY_OPTIMIZER_RULES_EXTRACT_COMMON_SUBEXPRESSION_HPP_ + +#include <memory> +#include <string> +#include <unordered_map> +#include <unordered_set> + +#include "query_optimizer/expressions/CommonSubexpression.hpp" +#include "query_optimizer/expressions/Expression.hpp" +#include "query_optimizer/expressions/Scalar.hpp" +#include "query_optimizer/physical/Physical.hpp" +#include "query_optimizer/rules/Rule.hpp" +#include "utility/Macros.hpp" + +namespace quickstep { +namespace optimizer { + +class OptimizerContext; + +/** \addtogroup OptimizerRules + * @{ + */ + +class ExtractCommonSubexpression : public Rule<physical::Physical> { + public: + /** + * @brief Constructor. + */ + ExtractCommonSubexpression(OptimizerContext *optimizer_context); + + ~ExtractCommonSubexpression() override {} + + std::string getName() const override { + return "ExtractCommonSubexpression"; + } + + physical::PhysicalPtr apply(const physical::PhysicalPtr &input) override; + + private: + physical::PhysicalPtr applyInternal(const physical::PhysicalPtr &input); + + struct ScalarHash { + inline std::size_t operator()(const expressions::ScalarPtr &scalar) const { + return scalar->hash(); + } + }; + + struct ScalarEqual { + inline bool operator()(const expressions::ScalarPtr &lhs, + const expressions::ScalarPtr &rhs) const { + return lhs->equals(rhs); + } + }; + + using ScalarCounter = + std::unordered_map<expressions::ScalarPtr, std::size_t, ScalarHash, ScalarEqual>; + + using ScalarMap = + std::unordered_map<expressions::ScalarPtr, + expressions::CommonSubexpressionPtr, + ScalarHash, + ScalarEqual>; + + using ScalarHashable = std::unordered_set<expressions::ScalarPtr>; + + std::vector<expressions::ExpressionPtr> transformExpressions( + const std::vector<expressions::ExpressionPtr> &expressions); + + expressions::ExpressionPtr transformExpression( + const expressions::ExpressionPtr &expression); + + bool visitAndCount( + const expressions::ExpressionPtr &expression, + ScalarCounter *counter, + ScalarHashable *hashable); + + expressions::ExpressionPtr visitAndTransform( + const expressions::ExpressionPtr &expression, + const std::size_t max_reference_count, + const ScalarCounter &counter, + const ScalarHashable &hashable, + ScalarMap *substitution_map); + + template <typename ScalarSubclassT> + static std::vector<expressions::ExpressionPtr> UpCast( + const std::vector<std::shared_ptr<const ScalarSubclassT>> &expressions) { + std::vector<expressions::ExpressionPtr> output; + for (const auto &expr : expressions) { + output.emplace_back(expr); + } + return output; + } + + template <typename ScalarSubclassT> + static std::vector<std::shared_ptr<const ScalarSubclassT>> DownCast( + const std::vector<expressions::ExpressionPtr> &expressions) { + std::vector<std::shared_ptr<const ScalarSubclassT>> output; + for (const auto &expr : expressions) { + output.emplace_back(std::static_pointer_cast<const ScalarSubclassT>(expr)); + } + return output; + } + + OptimizerContext *optimizer_context_; + std::unordered_set<int> homogeneous_whitelist_; + + DISALLOW_COPY_AND_ASSIGN(ExtractCommonSubexpression); +}; + +/** @} */ + +} // namespace optimizer +} // namespace quickstep + +#endif // QUICKSTEP_QUERY_OPTIMIZER_RULES_EXTRACT_COMMON_SUBEXPRESSION_HPP_ http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/c27f5beb/query_optimizer/rules/ReuseAggregateExpressions.cpp ---------------------------------------------------------------------- diff --git a/query_optimizer/rules/ReuseAggregateExpressions.cpp b/query_optimizer/rules/ReuseAggregateExpressions.cpp new file mode 100644 index 0000000..77dfd1e --- /dev/null +++ b/query_optimizer/rules/ReuseAggregateExpressions.cpp @@ -0,0 +1,235 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + **/ + +#include "query_optimizer/rules/ReuseAggregateExpressions.hpp" + +#include <cstddef> +#include <list> +#include <map> +#include <unordered_map> +#include <vector> + +#include "expressions/aggregation/AggregateFunction.hpp" +#include "expressions/aggregation/AggregationID.hpp" +#include "query_optimizer/expressions/AggregateFunction.hpp" +#include "query_optimizer/expressions/Alias.hpp" +#include "query_optimizer/expressions/AttributeReference.hpp" +#include "query_optimizer/expressions/BinaryExpression.hpp" +#include "query_optimizer/expressions/ExpressionType.hpp" +#include "query_optimizer/expressions/ExpressionUtil.hpp" +#include "query_optimizer/expressions/NamedExpression.hpp" +#include "query_optimizer/expressions/Scalar.hpp" +#include "query_optimizer/physical/Aggregate.hpp" +#include "query_optimizer/physical/PatternMatcher.hpp" +#include "query_optimizer/physical/Physical.hpp" +#include "query_optimizer/physical/PhysicalType.hpp" +#include "query_optimizer/physical/Selection.hpp" +#include "query_optimizer/physical/TopLevelPlan.hpp" +#include "types/operations/binary_operations/BinaryOperation.hpp" +#include "types/operations/binary_operations/BinaryOperationFactory.hpp" +#include "types/operations/binary_operations/BinaryOperationID.hpp" +#include "utility/EqualsAnyConstant.hpp" + +#include "glog/logging.h" + +namespace quickstep { +namespace optimizer { + +namespace E = ::quickstep::optimizer::expressions; +namespace P = ::quickstep::optimizer::physical; + +void ReuseAggregateExpressions::init(const P::PhysicalPtr &input) { + DCHECK(input->getPhysicalType() == P::PhysicalType::kTopLevelPlan); + + const P::TopLevelPlanPtr top_level_plan = + std::static_pointer_cast<const P::TopLevelPlan>(input); + cost_model_.reset( + new cost::StarSchemaSimpleCostModel(top_level_plan->shared_subplans())); +} + +P::PhysicalPtr ReuseAggregateExpressions::applyToNode( + const P::PhysicalPtr &input) { + P::AggregatePtr aggregate; + if (!P::SomeAggregate::MatchesWithConditionalCast(input, &aggregate)) { + return input; + } + + const std::vector<E::AliasPtr> &agg_exprs = aggregate->aggregate_expressions(); + + std::unordered_map<E::ScalarPtr, + std::map<AggregationID, std::vector<std::size_t>>, + ScalarHash, ScalarEqual> agg_expr_info; + + std::list<std::size_t> count_star_info; + + for (std::size_t i = 0; i < agg_exprs.size(); ++i) { + DCHECK(agg_exprs[i]->expression()->getExpressionType() + == E::ExpressionType::kAggregateFunction); + const E::AggregateFunctionPtr agg_func = + std::static_pointer_cast<const E::AggregateFunction>( + agg_exprs[i]->expression()); + const AggregationID agg_id = agg_func->getAggregate().getAggregationID(); + + // Currently we only consider aggregate functions with 0 or 1 argument. + const std::vector<E::ScalarPtr> &arguments = agg_func->getArguments(); + if (agg_id == AggregationID::kCount) { + if (arguments.empty()) { + count_star_info.emplace_front(i); + continue; + } else if (!arguments.front()->getValueType().isNullable()) { + count_star_info.emplace_back(i); + continue; + } + } + if (arguments.size() == 1) { + agg_expr_info[arguments.front()][agg_id].emplace_back(i); + } + } + + std::vector<std::unique_ptr<AggregateReference>> agg_refs(agg_exprs.size()); + + constexpr std::size_t kInvalidRef = static_cast<std::size_t>(-1); + std::size_t count_star_ref; + + if (count_star_info.empty()) { + count_star_ref = kInvalidRef; + } else { + auto it = count_star_info.begin(); + count_star_ref = *it; + + for (++it; it != count_star_info.end(); ++it) { + agg_refs[*it].reset(new AggregateReference(count_star_ref)); + } + } + + for (const auto &it : agg_expr_info) { + const auto &ref_map = it.second; + + // First, check whether AVG can be reduced to SUM/COUNT. + bool is_avg_processed = false; + + const auto sum_it = ref_map.find(AggregationID::kSum); + const auto avg_it = ref_map.find(AggregationID::kAvg); + + if (avg_it != ref_map.end() && sum_it != ref_map.end()) { + std::size_t count_ref = kInvalidRef; + if (it.first->getValueType().isNullable()) { + const auto count_it = ref_map.find(AggregationID::kCount); + if (count_it != ref_map.end()) { + DCHECK(!count_it->second.empty()); + count_ref = count_it->second.front(); + } + } else { + count_ref = count_star_ref; + } + + if (count_ref != kInvalidRef) { + DCHECK(!sum_it->second.empty()); + const std::size_t sum_ref = sum_it->second.front(); + for (const std::size_t idx : avg_it->second) { + agg_refs[idx].reset(new AggregateReference(sum_ref, count_ref)); + } + is_avg_processed = true; + } + } + + // Then, eliminate duplicate aggregate expressions. + for (const auto &ref_it : ref_map) { + if (ref_it.first == AggregationID::kAvg && is_avg_processed) { + continue; + } + + const auto &indices = ref_it.second; + DCHECK(!indices.empty()); + const std::size_t ref = indices.front(); + for (std::size_t i = 1; i < indices.size(); ++i) { + agg_refs[indices[i]].reset(new AggregateReference(ref)); + } + } + } + + bool need_transform = false; + for (const auto &agg_ref : agg_refs) { + if (agg_ref != nullptr) { + need_transform = true; + break; + } + } + + if (!need_transform) { + return input; + } + + const std::vector<E::AttributeReferencePtr> agg_attrs = E::ToRefVector(agg_exprs); + + std::vector<E::AliasPtr> new_agg_exprs; + std::vector<E::NamedExpressionPtr> new_select_exprs; + + for (std::size_t i = 0; i < agg_refs.size(); ++i) { + const auto &agg_ref = agg_refs[i]; + const E::AliasPtr &agg_expr = agg_exprs[i]; + + if (agg_ref == nullptr) { + new_agg_exprs.emplace_back(agg_expr); + new_select_exprs.emplace_back( + E::AttributeReference::Create(agg_expr->id(), + agg_expr->attribute_name(), + agg_expr->attribute_alias(), + agg_expr->relation_name(), + agg_expr->getValueType(), + E::AttributeReferenceScope::kLocal)); + } else { + switch (agg_ref->kind) { + case AggregateReference::kDirect: { + new_select_exprs.emplace_back( + E::Alias::Create(agg_expr->id(), + agg_attrs[agg_ref->first_ref], + agg_expr->attribute_name(), + agg_expr->attribute_alias())); + break; + } + case AggregateReference::kAvg: { + const BinaryOperation ÷_op = + BinaryOperationFactory::GetBinaryOperation(BinaryOperationID::kDivide); + const E::BinaryExpressionPtr avg_expr = + E::BinaryExpression::Create(divide_op, + agg_attrs[agg_ref->first_ref], + agg_attrs[agg_ref->second_ref]); + new_select_exprs.emplace_back( + E::Alias::Create(agg_expr->id(), + avg_expr, + agg_expr->attribute_name(), + agg_expr->attribute_alias())); + } + } + } + } + + const P::AggregatePtr new_aggregate = + P::Aggregate::Create(aggregate->input(), + aggregate->grouping_expressions(), + new_agg_exprs, + aggregate->filter_predicate()); + + return P::Selection::Create(new_aggregate, new_select_exprs, nullptr); +} + + +} // namespace optimizer +} // namespace quickstep http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/c27f5beb/query_optimizer/rules/ReuseAggregateExpressions.hpp ---------------------------------------------------------------------- diff --git a/query_optimizer/rules/ReuseAggregateExpressions.hpp b/query_optimizer/rules/ReuseAggregateExpressions.hpp new file mode 100644 index 0000000..0b18023 --- /dev/null +++ b/query_optimizer/rules/ReuseAggregateExpressions.hpp @@ -0,0 +1,95 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + **/ + +#ifndef QUICKSTEP_QUERY_OPTIMIZER_RULES_REUSE_AGGREGATE_EXPRESSIONS_HPP_ +#define QUICKSTEP_QUERY_OPTIMIZER_RULES_REUSE_AGGREGATE_EXPRESSIONS_HPP_ + +#include <cstddef> +#include <memory> +#include <string> +#include <vector> + +#include "query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp" +#include "query_optimizer/expressions/Scalar.hpp" +#include "query_optimizer/physical/Physical.hpp" +#include "query_optimizer/rules/BottomUpRule.hpp" +#include "utility/Macros.hpp" + +namespace quickstep { +namespace optimizer { + +class OptimizerContext; + +/** \addtogroup OptimizerRules + * @{ + */ + +class ReuseAggregateExpressions : public BottomUpRule<physical::Physical> { + public: + ReuseAggregateExpressions() {} + + std::string getName() const override { + return "ReuseAggregateExpressions"; + } + + protected: + void init(const physical::PhysicalPtr &input) override; + + physical::PhysicalPtr applyToNode(const physical::PhysicalPtr &input) override; + + private: + struct ScalarHash { + inline std::size_t operator()(const expressions::ScalarPtr &scalar) const { + return scalar->hash(); + } + }; + + struct ScalarEqual { + inline bool operator()(const expressions::ScalarPtr &lhs, + const expressions::ScalarPtr &rhs) const { + return lhs->equals(rhs); + } + }; + + struct AggregateReference { + enum Kind { + kDirect = 0, + kAvg + }; + + AggregateReference(const std::size_t ref) + : kind(kDirect), first_ref(ref), second_ref(0) {} + + AggregateReference(const std::size_t sum_ref, const std::size_t count_ref) + : kind(kAvg), first_ref(sum_ref), second_ref(count_ref) {} + + const Kind kind; + const std::size_t first_ref; + const std::size_t second_ref; + }; + + std::unique_ptr<cost::StarSchemaSimpleCostModel> cost_model_; + + DISALLOW_COPY_AND_ASSIGN(ReuseAggregateExpressions); +}; + +} // namespace optimizer +} // namespace quickstep + +#endif // QUICKSTEP_QUERY_OPTIMIZER_RULES_REUSE_AGGREGATE_EXPRESSIONS_HPP_