Repository: incubator-quickstep
Updated Branches:
  refs/heads/common-subexpression b0acc9ce9 -> c27f5beb8


Updates


Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/c27f5beb
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/c27f5beb
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/c27f5beb

Branch: refs/heads/common-subexpression
Commit: c27f5beb8d473895dfa637c699590f4d4a2408cc
Parents: b0acc9c
Author: Jianqiao Zhu <jianq...@cs.wisc.edu>
Authored: Wed Apr 19 13:45:01 2017 -0500
Committer: Jianqiao Zhu <jianq...@cs.wisc.edu>
Committed: Wed Apr 19 13:45:01 2017 -0500

----------------------------------------------------------------------
 query_optimizer/CMakeLists.txt                  |   4 +-
 query_optimizer/PhysicalGenerator.cpp           |  10 +-
 query_optimizer/rules/CMakeLists.txt            |  49 +++-
 query_optimizer/rules/CollapseSelection.cpp     |  57 ++++
 query_optimizer/rules/CollapseSelection.hpp     |  59 +++++
 .../rules/CommonSubexpressionExtraction.cpp     | 264 -------------------
 .../rules/CommonSubexpressionExtraction.hpp     | 135 ----------
 .../rules/ExtractCommonSubexpression.cpp        | 264 +++++++++++++++++++
 .../rules/ExtractCommonSubexpression.hpp        | 135 ++++++++++
 .../rules/ReuseAggregateExpressions.cpp         | 235 +++++++++++++++++
 .../rules/ReuseAggregateExpressions.hpp         |  95 +++++++
 11 files changed, 900 insertions(+), 407 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/c27f5beb/query_optimizer/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_optimizer/CMakeLists.txt b/query_optimizer/CMakeLists.txt
index 9e5a2c8..c969f16 100644
--- a/query_optimizer/CMakeLists.txt
+++ b/query_optimizer/CMakeLists.txt
@@ -214,13 +214,15 @@ 
target_link_libraries(quickstep_queryoptimizer_PhysicalGenerator
                       quickstep_queryoptimizer_logical_Logical
                       quickstep_queryoptimizer_physical_Physical
                       quickstep_queryoptimizer_rules_AttachLIPFilters
-                      
quickstep_queryoptimizer_rules_CommonSubexpressionExtraction
+                      quickstep_queryoptimizer_rules_CollapseSelection
+                      quickstep_queryoptimizer_rules_ExtractCommonSubexpression
                       quickstep_queryoptimizer_rules_FuseAggregateJoin
                       quickstep_queryoptimizer_rules_InjectJoinFilters
                       quickstep_queryoptimizer_rules_PruneColumns
                       
quickstep_queryoptimizer_rules_PushDownLowCostDisjunctivePredicate
                       quickstep_queryoptimizer_rules_ReduceGroupByAttributes
                       quickstep_queryoptimizer_rules_ReorderColumns
+                      quickstep_queryoptimizer_rules_ReuseAggregateExpressions
                       
quickstep_queryoptimizer_rules_StarSchemaHashJoinOrderOptimization
                       quickstep_queryoptimizer_rules_SwapProbeBuild
                       quickstep_queryoptimizer_strategy_Aggregate

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/c27f5beb/query_optimizer/PhysicalGenerator.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/PhysicalGenerator.cpp 
b/query_optimizer/PhysicalGenerator.cpp
index 34deb76..427c365 100644
--- a/query_optimizer/PhysicalGenerator.cpp
+++ b/query_optimizer/PhysicalGenerator.cpp
@@ -27,13 +27,15 @@
 #include "query_optimizer/logical/Logical.hpp"
 #include "query_optimizer/physical/Physical.hpp"
 #include "query_optimizer/rules/AttachLIPFilters.hpp"
-#include "query_optimizer/rules/CommonSubexpressionExtraction.hpp"
+#include "query_optimizer/rules/CollapseSelection.hpp"
+#include "query_optimizer/rules/ExtractCommonSubexpression.hpp"
 #include "query_optimizer/rules/FuseAggregateJoin.hpp"
 #include "query_optimizer/rules/InjectJoinFilters.hpp"
 #include "query_optimizer/rules/PruneColumns.hpp"
 #include "query_optimizer/rules/PushDownLowCostDisjunctivePredicate.hpp"
 #include "query_optimizer/rules/ReduceGroupByAttributes.hpp"
 #include "query_optimizer/rules/ReorderColumns.hpp"
+#include "query_optimizer/rules/ReuseAggregateExpressions.hpp"
 #include "query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp"
 #include "query_optimizer/rules/SwapProbeBuild.hpp"
 #include "query_optimizer/strategy/Aggregate.hpp"
@@ -147,9 +149,13 @@ P::PhysicalPtr PhysicalGenerator::optimizePlan() {
     rules.emplace_back(new ReorderColumns());
   }
 
+  rules.emplace_back(new ReuseAggregateExpressions());
+
   rules.emplace_back(new FuseAggregateJoin());
 
-  rules.emplace_back(new CommonSubexpressionExtraction(optimizer_context_));
+  rules.emplace_back(new CollapseSelection());
+
+  rules.emplace_back(new ExtractCommonSubexpression(optimizer_context_));
 
   // NOTE(jianqiao): Adding rules after InjectJoinFilters (or 
AttachLIPFilters) requires
   // extra handling of LIPFilterConfiguration for transformed nodes. So 
currently it is

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/c27f5beb/query_optimizer/rules/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/CMakeLists.txt 
b/query_optimizer/rules/CMakeLists.txt
index b2d0f90..dcc2072 100644
--- a/query_optimizer/rules/CMakeLists.txt
+++ b/query_optimizer/rules/CMakeLists.txt
@@ -21,9 +21,10 @@ add_subdirectory(tests)
 add_library(quickstep_queryoptimizer_rules_AttachLIPFilters 
AttachLIPFilters.cpp AttachLIPFilters.hpp)
 add_library(quickstep_queryoptimizer_rules_BottomUpRule ../../empty_src.cpp 
BottomUpRule.hpp)
 add_library(quickstep_queryoptimizer_rules_CollapseProject CollapseProject.cpp 
CollapseProject.hpp)
-add_library(quickstep_queryoptimizer_rules_CommonSubexpressionExtraction
-            CommonSubexpressionExtraction.cpp
-            CommonSubexpressionExtraction.hpp)
+add_library(quickstep_queryoptimizer_rules_CollapseSelection 
CollapseSelection.cpp CollapseSelection.hpp)
+add_library(quickstep_queryoptimizer_rules_ExtractCommonSubexpression
+            ExtractCommonSubexpression.cpp
+            ExtractCommonSubexpression.hpp)
 add_library(quickstep_queryoptimizer_rules_FuseAggregateJoin 
FuseAggregateJoin.cpp FuseAggregateJoin.hpp)
 add_library(quickstep_queryoptimizer_rules_GenerateJoins GenerateJoins.cpp 
GenerateJoins.hpp)
 add_library(quickstep_queryoptimizer_rules_InjectJoinFilters 
InjectJoinFilters.cpp InjectJoinFilters.hpp)
@@ -37,6 +38,9 @@ 
add_library(quickstep_queryoptimizer_rules_ReduceGroupByAttributes
             ReduceGroupByAttributes.cpp
             ReduceGroupByAttributes.hpp)
 add_library(quickstep_queryoptimizer_rules_ReorderColumns ReorderColumns.cpp 
ReorderColumns.hpp)
+add_library(quickstep_queryoptimizer_rules_ReuseAggregateExpressions
+            ReuseAggregateExpressions.cpp
+            ReuseAggregateExpressions.hpp)
 add_library(quickstep_queryoptimizer_rules_Rule ../../empty_src.cpp Rule.hpp)
 add_library(quickstep_queryoptimizer_rules_RuleHelper RuleHelper.cpp 
RuleHelper.hpp)
 add_library(quickstep_queryoptimizer_rules_StarSchemaHashJoinOrderOptimization
@@ -80,7 +84,15 @@ 
target_link_libraries(quickstep_queryoptimizer_rules_CollapseProject
                       quickstep_queryoptimizer_rules_Rule
                       quickstep_queryoptimizer_rules_RuleHelper
                       quickstep_utility_Macros)
-target_link_libraries(quickstep_queryoptimizer_rules_CommonSubexpressionExtraction
+target_link_libraries(quickstep_queryoptimizer_rules_CollapseSelection
+                      quickstep_queryoptimizer_expressions_NamedExpression
+                      quickstep_queryoptimizer_physical_PatternMatcher
+                      quickstep_queryoptimizer_physical_Physical
+                      quickstep_queryoptimizer_physical_Selection
+                      quickstep_queryoptimizer_rules_BottomUpRule
+                      quickstep_queryoptimizer_rules_RuleHelper
+                      quickstep_utility_Macros)
+target_link_libraries(quickstep_queryoptimizer_rules_ExtractCommonSubexpression
                       glog
                       quickstep_queryoptimizer_OptimizerContext
                       quickstep_queryoptimizer_expressions_Alias
@@ -220,6 +232,31 @@ 
target_link_libraries(quickstep_queryoptimizer_rules_ReorderColumns
                       quickstep_queryoptimizer_physical_TableReference
                       quickstep_queryoptimizer_rules_Rule
                       quickstep_utility_Macros)
+target_link_libraries(quickstep_queryoptimizer_rules_ReuseAggregateExpressions
+                      glog
+                      quickstep_expressions_aggregation_AggregateFunction
+                      quickstep_expressions_aggregation_AggregationID
+                      
quickstep_queryoptimizer_costmodel_StarSchemaSimpleCostModel
+                      quickstep_queryoptimizer_expressions_AggregateFunction
+                      quickstep_queryoptimizer_expressions_Alias
+                      quickstep_queryoptimizer_expressions_AttributeReference
+                      quickstep_queryoptimizer_expressions_BinaryExpression
+                      quickstep_queryoptimizer_expressions_ExpressionType
+                      quickstep_queryoptimizer_expressions_ExpressionUtil
+                      quickstep_queryoptimizer_expressions_NamedExpression
+                      quickstep_queryoptimizer_expressions_Scalar
+                      quickstep_queryoptimizer_physical_Aggregate
+                      quickstep_queryoptimizer_physical_PatternMatcher
+                      quickstep_queryoptimizer_physical_Physical
+                      quickstep_queryoptimizer_physical_PhysicalType
+                      quickstep_queryoptimizer_physical_Selection
+                      quickstep_queryoptimizer_physical_TopLevelPlan
+                      quickstep_queryoptimizer_rules_BottomUpRule
+                      
quickstep_types_operations_binaryoperations_BinaryOperation
+                      
quickstep_types_operations_binaryoperations_BinaryOperationFactory
+                      
quickstep_types_operations_binaryoperations_BinaryOperationID
+                      quickstep_utility_EqualsAnyConstant
+                      quickstep_utility_Macros)
 target_link_libraries(quickstep_queryoptimizer_rules_Rule
                       glog
                       quickstep_utility_Macros)
@@ -331,7 +368,8 @@ target_link_libraries(quickstep_queryoptimizer_rules
                       quickstep_queryoptimizer_rules_AttachLIPFilters
                       quickstep_queryoptimizer_rules_BottomUpRule
                       quickstep_queryoptimizer_rules_CollapseProject
-                      
quickstep_queryoptimizer_rules_CommonSubexpressionExtraction
+                      quickstep_queryoptimizer_rules_CollapseSelection
+                      quickstep_queryoptimizer_rules_ExtractCommonSubexpression
                       quickstep_queryoptimizer_rules_FuseAggregateJoin
                       quickstep_queryoptimizer_rules_GenerateJoins
                       quickstep_queryoptimizer_rules_InjectJoinFilters
@@ -341,6 +379,7 @@ target_link_libraries(quickstep_queryoptimizer_rules
                       quickstep_queryoptimizer_rules_PushDownSemiAntiJoin
                       quickstep_queryoptimizer_rules_ReduceGroupByAttributes
                       quickstep_queryoptimizer_rules_ReorderColumns
+                      quickstep_queryoptimizer_rules_ReuseAggregateExpressions
                       quickstep_queryoptimizer_rules_Rule
                       quickstep_queryoptimizer_rules_RuleHelper
                       
quickstep_queryoptimizer_rules_StarSchemaHashJoinOrderOptimization

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/c27f5beb/query_optimizer/rules/CollapseSelection.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/CollapseSelection.cpp 
b/query_optimizer/rules/CollapseSelection.cpp
new file mode 100644
index 0000000..a03faa5
--- /dev/null
+++ b/query_optimizer/rules/CollapseSelection.cpp
@@ -0,0 +1,57 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ **/
+
+#include "query_optimizer/rules/CollapseSelection.hpp"
+
+#include <vector>
+
+#include "query_optimizer/expressions/NamedExpression.hpp"
+#include "query_optimizer/physical/PatternMatcher.hpp"
+#include "query_optimizer/physical/Physical.hpp"
+#include "query_optimizer/physical/Selection.hpp"
+#include "query_optimizer/rules/RuleHelper.hpp"
+
+namespace quickstep {
+namespace optimizer {
+
+namespace E = ::quickstep::optimizer::expressions;
+namespace P = ::quickstep::optimizer::physical;
+
+P::PhysicalPtr CollapseSelection::applyToNode(const P::PhysicalPtr &input) {
+  P::SelectionPtr selection;
+  P::SelectionPtr child_selection;
+
+  if (P::SomeSelection::MatchesWithConditionalCast(input, &selection) &&
+      P::SomeSelection::MatchesWithConditionalCast(selection->input(), 
&child_selection) &&
+      child_selection->filter_predicate() == nullptr) {
+    std::vector<E::NamedExpressionPtr> project_expressions =
+        selection->project_expressions();
+    PullUpProjectExpressions(child_selection->project_expressions(),
+                             {} /* non_project_expression_lists */,
+                             {&project_expressions} /* 
project_expression_lists */);
+    return P::Selection::Create(child_selection->input(),
+                                project_expressions,
+                                selection->filter_predicate());
+  }
+
+  return input;
+}
+
+}  // namespace optimizer
+}  // namespace quickstep

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/c27f5beb/query_optimizer/rules/CollapseSelection.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/CollapseSelection.hpp 
b/query_optimizer/rules/CollapseSelection.hpp
new file mode 100644
index 0000000..2417574
--- /dev/null
+++ b/query_optimizer/rules/CollapseSelection.hpp
@@ -0,0 +1,59 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ **/
+
+#ifndef QUICKSTEP_QUERY_OPTIMIZER_RULES_COLLAPSE_SELECTION_HPP_
+#define QUICKSTEP_QUERY_OPTIMIZER_RULES_COLLAPSE_SELECTION_HPP_
+
+#include <string>
+
+#include "query_optimizer/physical/Physical.hpp"
+#include "query_optimizer/rules/BottomUpRule.hpp"
+#include "utility/Macros.hpp"
+
+namespace quickstep {
+namespace optimizer {
+
+/** \addtogroup OptimizerRules
+ *  @{
+ */
+
+/**
+ * @brief Merges nested Selections into one single Selection.
+ */
+class CollapseSelection : public BottomUpRule<physical::Physical> {
+ public:
+  CollapseSelection() {}
+
+  std::string getName() const override {
+    return "CollapseSelection";
+  }
+
+ protected:
+  physical::PhysicalPtr applyToNode(const physical::PhysicalPtr &input) 
override;
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(CollapseSelection);
+};
+
+/** @} */
+
+}  // namespace optimizer
+}  // namespace quickstep
+
+#endif  // QUICKSTEP_QUERY_OPTIMIZER_RULES_COLLAPSE_SELECTION_HPP_

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/c27f5beb/query_optimizer/rules/CommonSubexpressionExtraction.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/CommonSubexpressionExtraction.cpp 
b/query_optimizer/rules/CommonSubexpressionExtraction.cpp
deleted file mode 100644
index b06d9fc..0000000
--- a/query_optimizer/rules/CommonSubexpressionExtraction.cpp
+++ /dev/null
@@ -1,264 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- **/
-
-#include "query_optimizer/rules/CommonSubexpressionExtraction.hpp"
-
-#include <memory>
-#include <unordered_set>
-#include <vector>
-
-#include "query_optimizer/OptimizerContext.hpp"
-#include "query_optimizer/expressions/Alias.hpp"
-#include "query_optimizer/expressions/CommonSubexpression.hpp"
-#include "query_optimizer/expressions/ExpressionType.hpp"
-#include "query_optimizer/expressions/NamedExpression.hpp"
-#include "query_optimizer/expressions/PatternMatcher.hpp"
-#include "query_optimizer/expressions/Scalar.hpp"
-#include "query_optimizer/physical/Aggregate.hpp"
-#include "query_optimizer/physical/Physical.hpp"
-#include "query_optimizer/physical/PhysicalType.hpp"
-#include "query_optimizer/physical/Selection.hpp"
-#include "utility/HashError.hpp"
-
-#include "glog/logging.h"
-
-namespace quickstep {
-namespace optimizer {
-
-namespace E = ::quickstep::optimizer::expressions;
-namespace P = ::quickstep::optimizer::physical;
-
-CommonSubexpressionExtraction::CommonSubexpressionExtraction(
-    OptimizerContext *optimizer_context)
-    : optimizer_context_(optimizer_context) {
-  const std::vector<E::ExpressionType> whitelist = {
-      E::ExpressionType::kAlias,
-      E::ExpressionType::kAttributeReference,
-      E::ExpressionType::kAggregateFunction,
-      E::ExpressionType::kBinaryExpression,
-      E::ExpressionType::kCast,
-      E::ExpressionType::kCommonSubexpression,
-      E::ExpressionType::kScalarLiteral,
-      E::ExpressionType::kUnaryExpression
-  };
-
-  for (const auto &expr_type : whitelist) {
-    homogeneous_whitelist_.emplace(static_cast<int>(expr_type));
-  }
-}
-
-P::PhysicalPtr CommonSubexpressionExtraction::apply(const P::PhysicalPtr 
&input) {
-  DCHECK(input->getPhysicalType() == P::PhysicalType::kTopLevelPlan);
-
-  return applyInternal(input);
-}
-
-P::PhysicalPtr CommonSubexpressionExtraction::applyInternal(
-    const P::PhysicalPtr &input) {
-  std::vector<P::PhysicalPtr> new_children;
-  for (const auto &child : input->children()) {
-    new_children.emplace_back(applyInternal(child));
-  }
-
-  const P::PhysicalPtr node =
-      new_children == input->children()
-          ? input
-          : input->copyWithNewChildren(new_children);
-
-  switch (node->getPhysicalType()) {
-    case P::PhysicalType::kSelection: {
-      const P::SelectionPtr selection =
-          std::static_pointer_cast<const P::Selection>(node);
-
-      const std::vector<E::NamedExpressionPtr> new_expressions =
-          DownCast<E::NamedExpression>(
-              transformExpressions(UpCast(selection->project_expressions())));
-
-      if (new_expressions != selection->project_expressions()) {
-        return P::Selection::Create(selection->input(),
-                                    new_expressions,
-                                    selection->filter_predicate());
-      }
-      break;
-    }
-    case P::PhysicalType::kAggregate: {
-      const P::AggregatePtr aggregate =
-          std::static_pointer_cast<const P::Aggregate>(node);
-      std::vector<E::ExpressionPtr> expressions =
-          UpCast(aggregate->aggregate_expressions());
-      for (const auto &expr : aggregate->grouping_expressions()) {
-        expressions.emplace_back(expr);
-      }
-
-      const std::vector<E::ExpressionPtr> new_expressions =
-          transformExpressions(expressions);
-
-      if (new_expressions != expressions) {
-        std::vector<E::AliasPtr> new_aggregate_expressions;
-        std::vector<E::NamedExpressionPtr> new_grouping_expressions;
-        const std::size_t num_aggrs = 
aggregate->aggregate_expressions().size();
-
-        for (std::size_t i = 0; i < num_aggrs; ++i) {
-          DCHECK(E::SomeAlias::Matches(new_expressions[i]));
-          new_aggregate_expressions.emplace_back(
-              std::static_pointer_cast<const E::Alias>(new_expressions[i]));
-        }
-        for (std::size_t i = num_aggrs; i < new_expressions.size(); ++i) {
-          DCHECK(E::SomeNamedExpression::Matches(new_expressions[i]));
-          new_grouping_expressions.emplace_back(
-              std::static_pointer_cast<const 
E::NamedExpression>(new_expressions[i]));
-        }
-        return P::Aggregate::Create(aggregate->input(),
-                                    new_grouping_expressions,
-                                    new_aggregate_expressions,
-                                    aggregate->filter_predicate());
-      }
-      break;
-    }
-    default:
-      break;
-  }
-
-  return node;
-}
-
-std::vector<E::ExpressionPtr> 
CommonSubexpressionExtraction::transformExpressions(
-    const std::vector<E::ExpressionPtr> &expressions) {
-  ScalarCounter counter;
-  ScalarHashable hashable;
-  for (const auto &expr : expressions) {
-    visitAndCount(expr, &counter, &hashable);
-  }
-
-  ScalarMap substitution_map;
-  std::vector<E::ExpressionPtr> new_expressions;
-  for (const auto &expr : expressions) {
-    new_expressions.emplace_back(
-        visitAndTransform(expr, 1, counter, hashable, &substitution_map));
-  }
-  return new_expressions;
-}
-
-E::ExpressionPtr CommonSubexpressionExtraction::transformExpression(
-    const E::ExpressionPtr &expression) {
-  return transformExpressions({expression}).front();
-}
-
-bool CommonSubexpressionExtraction::visitAndCount(
-    const E::ExpressionPtr &expression,
-    ScalarCounter *counter,
-    ScalarHashable *hashable) {
-  bool children_hashable = true;
-
-  const auto homogeneous_whitelist_it =
-      
homogeneous_whitelist_.find(static_cast<int>(expression->getExpressionType()));
-  if (homogeneous_whitelist_it != homogeneous_whitelist_.end()) {
-    for (const auto &child : expression->children()) {
-      children_hashable &= visitAndCount(child, counter, hashable);
-    }
-  }
-
-  E::ScalarPtr scalar;
-  if (children_hashable &&
-      E::SomeScalar::MatchesWithConditionalCast(expression, &scalar)) {
-    try {
-      ++(*counter)[scalar];
-    } catch (const HashNotSupported &e) {
-      return false;
-    }
-    hashable->emplace(scalar);
-    return true;
-  }
-  return false;
-}
-
-E::ExpressionPtr CommonSubexpressionExtraction::visitAndTransform(
-    const E::ExpressionPtr &expression,
-    const std::size_t max_reference_count,
-    const ScalarCounter &counter,
-    const ScalarHashable &hashable,
-    ScalarMap *substitution_map) {
-  // TODO: figure out whether it is beneficial.
-  if (expression->getExpressionType() == E::ExpressionType::kScalarLiteral ||
-      expression->getExpressionType() == 
E::ExpressionType::kAttributeReference) {
-    return expression;
-  }
-
-  E::ScalarPtr scalar;
-  const bool is_hashable =
-      E::SomeScalar::MatchesWithConditionalCast(expression, &scalar)
-          && hashable.find(scalar) != hashable.end();
-
-  std::size_t new_max_reference_count;
-  if (is_hashable) {
-    // CommonSubexpression node already generated.
-    const auto substitution_map_it = substitution_map->find(scalar);
-    if (substitution_map_it != substitution_map->end()) {
-      return substitution_map_it->second;
-    }
-
-    const auto counter_it = counter.find(scalar);
-    DCHECK(counter_it != counter.end());
-    DCHECK_LE(max_reference_count, counter_it->second);
-    new_max_reference_count = counter_it->second;
-  } else {
-    new_max_reference_count = max_reference_count;
-  }
-
-  std::vector<E::ExpressionPtr> new_children;
-  const auto homogeneous_whitelist_it =
-      
homogeneous_whitelist_.find(static_cast<int>(expression->getExpressionType()));
-  if (homogeneous_whitelist_it == homogeneous_whitelist_.end()) {
-    for (const auto &child : expression->children()) {
-      new_children.emplace_back(transformExpression(child));
-    }
-  } else {
-    for (const auto &child : expression->children()) {
-      new_children.emplace_back(
-          visitAndTransform(child,
-                            new_max_reference_count,
-                            counter,
-                            hashable,
-                            substitution_map));
-    }
-  }
-
-  E::ExpressionPtr output;
-  if (new_children == expression->children()) {
-    output = expression;
-  } else {
-    output = std::static_pointer_cast<const E::Scalar>(
-        expression->copyWithNewChildren(new_children));
-  }
-
-  if (is_hashable && new_max_reference_count > max_reference_count) {
-    DCHECK(E::SomeScalar::Matches(output));
-    const E::CommonSubexpressionPtr common_subexpression =
-        E::CommonSubexpression::Create(
-            optimizer_context_->nextExprId(),
-            std::static_pointer_cast<const E::Scalar>(output));
-    substitution_map->emplace(scalar, common_subexpression);
-    output = common_subexpression;
-  }
-
-  return output;
-}
-
-}  // namespace optimizer
-}  // namespace quickstep

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/c27f5beb/query_optimizer/rules/CommonSubexpressionExtraction.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/CommonSubexpressionExtraction.hpp 
b/query_optimizer/rules/CommonSubexpressionExtraction.hpp
deleted file mode 100644
index 121552c..0000000
--- a/query_optimizer/rules/CommonSubexpressionExtraction.hpp
+++ /dev/null
@@ -1,135 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- **/
-
-#ifndef QUICKSTEP_QUERY_OPTIMIZER_RULES_COMMON_SUBEXPRESSION_EXTRACTION_HPP_
-#define QUICKSTEP_QUERY_OPTIMIZER_RULES_COMMON_SUBEXPRESSION_EXTRACTION_HPP_
-
-#include <memory>
-#include <string>
-#include <unordered_map>
-#include <unordered_set>
-
-#include "query_optimizer/expressions/CommonSubexpression.hpp"
-#include "query_optimizer/expressions/Expression.hpp"
-#include "query_optimizer/expressions/Scalar.hpp"
-#include "query_optimizer/physical/Physical.hpp"
-#include "query_optimizer/rules/Rule.hpp"
-#include "utility/Macros.hpp"
-
-namespace quickstep {
-namespace optimizer {
-
-class OptimizerContext;
-
-/** \addtogroup OptimizerRules
- *  @{
- */
-
-class CommonSubexpressionExtraction : public Rule<physical::Physical> {
- public:
-  /**
-   * @brief Constructor.
-   */
-  CommonSubexpressionExtraction(OptimizerContext *optimizer_context);
-
-  ~CommonSubexpressionExtraction() override {}
-
-  std::string getName() const override {
-    return "CommonSubexpressionExtraction";
-  }
-
-  physical::PhysicalPtr apply(const physical::PhysicalPtr &input) override;
-
- private:
-  physical::PhysicalPtr applyInternal(const physical::PhysicalPtr &input);
-
-  struct ScalarHash {
-    inline std::size_t operator()(const expressions::ScalarPtr &scalar) const {
-      return scalar->hash();
-    }
-  };
-
-  struct ScalarEqual {
-    inline bool operator()(const expressions::ScalarPtr &lhs,
-                           const expressions::ScalarPtr &rhs) const {
-      return lhs->equals(rhs);
-    }
-  };
-
-  using ScalarCounter =
-      std::unordered_map<expressions::ScalarPtr, std::size_t, ScalarHash, 
ScalarEqual>;
-
-  using ScalarMap =
-      std::unordered_map<expressions::ScalarPtr,
-                         expressions::CommonSubexpressionPtr,
-                         ScalarHash,
-                         ScalarEqual>;
-
-  using ScalarHashable = std::unordered_set<expressions::ScalarPtr>;
-
-  std::vector<expressions::ExpressionPtr> transformExpressions(
-      const std::vector<expressions::ExpressionPtr> &expressions);
-
-  expressions::ExpressionPtr transformExpression(
-      const expressions::ExpressionPtr &expression);
-
-  bool visitAndCount(
-      const expressions::ExpressionPtr &expression,
-      ScalarCounter *counter,
-      ScalarHashable *hashable);
-
-  expressions::ExpressionPtr visitAndTransform(
-      const expressions::ExpressionPtr &expression,
-      const std::size_t max_reference_count,
-      const ScalarCounter &counter,
-      const ScalarHashable &hashable,
-      ScalarMap *substitution_map);
-
-  template <typename ScalarSubclassT>
-  static std::vector<expressions::ExpressionPtr> UpCast(
-      const std::vector<std::shared_ptr<const ScalarSubclassT>> &expressions) {
-    std::vector<expressions::ExpressionPtr> output;
-    for (const auto &expr : expressions) {
-      output.emplace_back(expr);
-    }
-    return output;
-  }
-
-  template <typename ScalarSubclassT>
-  static std::vector<std::shared_ptr<const ScalarSubclassT>> DownCast(
-      const std::vector<expressions::ExpressionPtr> &expressions) {
-    std::vector<std::shared_ptr<const ScalarSubclassT>> output;
-    for (const auto &expr : expressions) {
-      output.emplace_back(std::static_pointer_cast<const 
ScalarSubclassT>(expr));
-    }
-    return output;
-  }
-
-  OptimizerContext *optimizer_context_;
-  std::unordered_set<int> homogeneous_whitelist_;
-
-  DISALLOW_COPY_AND_ASSIGN(CommonSubexpressionExtraction);
-};
-
-/** @} */
-
-}  // namespace optimizer
-}  // namespace quickstep
-
-#endif  // QUICKSTEP_QUERY_OPTIMIZER_RULES_COMMON_SUBEXPRESSION_EXTRACTION_HPP_

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/c27f5beb/query_optimizer/rules/ExtractCommonSubexpression.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/ExtractCommonSubexpression.cpp 
b/query_optimizer/rules/ExtractCommonSubexpression.cpp
new file mode 100644
index 0000000..ad2aec1
--- /dev/null
+++ b/query_optimizer/rules/ExtractCommonSubexpression.cpp
@@ -0,0 +1,264 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ **/
+
+#include "query_optimizer/rules/ExtractCommonSubexpression.hpp"
+
+#include <memory>
+#include <unordered_set>
+#include <vector>
+
+#include "query_optimizer/OptimizerContext.hpp"
+#include "query_optimizer/expressions/Alias.hpp"
+#include "query_optimizer/expressions/CommonSubexpression.hpp"
+#include "query_optimizer/expressions/ExpressionType.hpp"
+#include "query_optimizer/expressions/NamedExpression.hpp"
+#include "query_optimizer/expressions/PatternMatcher.hpp"
+#include "query_optimizer/expressions/Scalar.hpp"
+#include "query_optimizer/physical/Aggregate.hpp"
+#include "query_optimizer/physical/Physical.hpp"
+#include "query_optimizer/physical/PhysicalType.hpp"
+#include "query_optimizer/physical/Selection.hpp"
+#include "utility/HashError.hpp"
+
+#include "glog/logging.h"
+
+namespace quickstep {
+namespace optimizer {
+
+namespace E = ::quickstep::optimizer::expressions;
+namespace P = ::quickstep::optimizer::physical;
+
+ExtractCommonSubexpression::ExtractCommonSubexpression(
+    OptimizerContext *optimizer_context)
+    : optimizer_context_(optimizer_context) {
+  const std::vector<E::ExpressionType> whitelist = {
+      E::ExpressionType::kAlias,
+      E::ExpressionType::kAttributeReference,
+      E::ExpressionType::kAggregateFunction,
+      E::ExpressionType::kBinaryExpression,
+      E::ExpressionType::kCast,
+      E::ExpressionType::kCommonSubexpression,
+      E::ExpressionType::kScalarLiteral,
+      E::ExpressionType::kUnaryExpression
+  };
+
+  for (const auto &expr_type : whitelist) {
+    homogeneous_whitelist_.emplace(static_cast<int>(expr_type));
+  }
+}
+
+P::PhysicalPtr ExtractCommonSubexpression::apply(const P::PhysicalPtr &input) {
+  DCHECK(input->getPhysicalType() == P::PhysicalType::kTopLevelPlan);
+
+  return applyInternal(input);
+}
+
+P::PhysicalPtr ExtractCommonSubexpression::applyInternal(
+    const P::PhysicalPtr &input) {
+  std::vector<P::PhysicalPtr> new_children;
+  for (const auto &child : input->children()) {
+    new_children.emplace_back(applyInternal(child));
+  }
+
+  const P::PhysicalPtr node =
+      new_children == input->children()
+          ? input
+          : input->copyWithNewChildren(new_children);
+
+  switch (node->getPhysicalType()) {
+    case P::PhysicalType::kSelection: {
+      const P::SelectionPtr selection =
+          std::static_pointer_cast<const P::Selection>(node);
+
+      const std::vector<E::NamedExpressionPtr> new_expressions =
+          DownCast<E::NamedExpression>(
+              transformExpressions(UpCast(selection->project_expressions())));
+
+      if (new_expressions != selection->project_expressions()) {
+        return P::Selection::Create(selection->input(),
+                                    new_expressions,
+                                    selection->filter_predicate());
+      }
+      break;
+    }
+    case P::PhysicalType::kAggregate: {
+      const P::AggregatePtr aggregate =
+          std::static_pointer_cast<const P::Aggregate>(node);
+      std::vector<E::ExpressionPtr> expressions =
+          UpCast(aggregate->aggregate_expressions());
+      for (const auto &expr : aggregate->grouping_expressions()) {
+        expressions.emplace_back(expr);
+      }
+
+      const std::vector<E::ExpressionPtr> new_expressions =
+          transformExpressions(expressions);
+
+      if (new_expressions != expressions) {
+        std::vector<E::AliasPtr> new_aggregate_expressions;
+        std::vector<E::NamedExpressionPtr> new_grouping_expressions;
+        const std::size_t num_aggrs = 
aggregate->aggregate_expressions().size();
+
+        for (std::size_t i = 0; i < num_aggrs; ++i) {
+          DCHECK(E::SomeAlias::Matches(new_expressions[i]));
+          new_aggregate_expressions.emplace_back(
+              std::static_pointer_cast<const E::Alias>(new_expressions[i]));
+        }
+        for (std::size_t i = num_aggrs; i < new_expressions.size(); ++i) {
+          DCHECK(E::SomeNamedExpression::Matches(new_expressions[i]));
+          new_grouping_expressions.emplace_back(
+              std::static_pointer_cast<const 
E::NamedExpression>(new_expressions[i]));
+        }
+        return P::Aggregate::Create(aggregate->input(),
+                                    new_grouping_expressions,
+                                    new_aggregate_expressions,
+                                    aggregate->filter_predicate());
+      }
+      break;
+    }
+    default:
+      break;
+  }
+
+  return node;
+}
+
+std::vector<E::ExpressionPtr> ExtractCommonSubexpression::transformExpressions(
+    const std::vector<E::ExpressionPtr> &expressions) {
+  ScalarCounter counter;
+  ScalarHashable hashable;
+  for (const auto &expr : expressions) {
+    visitAndCount(expr, &counter, &hashable);
+  }
+
+  ScalarMap substitution_map;
+  std::vector<E::ExpressionPtr> new_expressions;
+  for (const auto &expr : expressions) {
+    new_expressions.emplace_back(
+        visitAndTransform(expr, 1, counter, hashable, &substitution_map));
+  }
+  return new_expressions;
+}
+
+E::ExpressionPtr ExtractCommonSubexpression::transformExpression(
+    const E::ExpressionPtr &expression) {
+  return transformExpressions({expression}).front();
+}
+
+bool ExtractCommonSubexpression::visitAndCount(
+    const E::ExpressionPtr &expression,
+    ScalarCounter *counter,
+    ScalarHashable *hashable) {
+  bool children_hashable = true;
+
+  const auto homogeneous_whitelist_it =
+      
homogeneous_whitelist_.find(static_cast<int>(expression->getExpressionType()));
+  if (homogeneous_whitelist_it != homogeneous_whitelist_.end()) {
+    for (const auto &child : expression->children()) {
+      children_hashable &= visitAndCount(child, counter, hashable);
+    }
+  }
+
+  E::ScalarPtr scalar;
+  if (children_hashable &&
+      E::SomeScalar::MatchesWithConditionalCast(expression, &scalar)) {
+    try {
+      ++(*counter)[scalar];
+    } catch (const HashNotSupported &e) {
+      return false;
+    }
+    hashable->emplace(scalar);
+    return true;
+  }
+  return false;
+}
+
+E::ExpressionPtr ExtractCommonSubexpression::visitAndTransform(
+    const E::ExpressionPtr &expression,
+    const std::size_t max_reference_count,
+    const ScalarCounter &counter,
+    const ScalarHashable &hashable,
+    ScalarMap *substitution_map) {
+  // TODO: figure out whether it is beneficial.
+  if (expression->getExpressionType() == E::ExpressionType::kScalarLiteral ||
+      expression->getExpressionType() == 
E::ExpressionType::kAttributeReference) {
+    return expression;
+  }
+
+  E::ScalarPtr scalar;
+  const bool is_hashable =
+      E::SomeScalar::MatchesWithConditionalCast(expression, &scalar)
+          && hashable.find(scalar) != hashable.end();
+
+  std::size_t new_max_reference_count;
+  if (is_hashable) {
+    // CommonSubexpression node already generated.
+    const auto substitution_map_it = substitution_map->find(scalar);
+    if (substitution_map_it != substitution_map->end()) {
+      return substitution_map_it->second;
+    }
+
+    const auto counter_it = counter.find(scalar);
+    DCHECK(counter_it != counter.end());
+    DCHECK_LE(max_reference_count, counter_it->second);
+    new_max_reference_count = counter_it->second;
+  } else {
+    new_max_reference_count = max_reference_count;
+  }
+
+  std::vector<E::ExpressionPtr> new_children;
+  const auto homogeneous_whitelist_it =
+      
homogeneous_whitelist_.find(static_cast<int>(expression->getExpressionType()));
+  if (homogeneous_whitelist_it == homogeneous_whitelist_.end()) {
+    for (const auto &child : expression->children()) {
+      new_children.emplace_back(transformExpression(child));
+    }
+  } else {
+    for (const auto &child : expression->children()) {
+      new_children.emplace_back(
+          visitAndTransform(child,
+                            new_max_reference_count,
+                            counter,
+                            hashable,
+                            substitution_map));
+    }
+  }
+
+  E::ExpressionPtr output;
+  if (new_children == expression->children()) {
+    output = expression;
+  } else {
+    output = std::static_pointer_cast<const E::Scalar>(
+        expression->copyWithNewChildren(new_children));
+  }
+
+  if (is_hashable && new_max_reference_count > max_reference_count) {
+    DCHECK(E::SomeScalar::Matches(output));
+    const E::CommonSubexpressionPtr common_subexpression =
+        E::CommonSubexpression::Create(
+            optimizer_context_->nextExprId(),
+            std::static_pointer_cast<const E::Scalar>(output));
+    substitution_map->emplace(scalar, common_subexpression);
+    output = common_subexpression;
+  }
+
+  return output;
+}
+
+}  // namespace optimizer
+}  // namespace quickstep

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/c27f5beb/query_optimizer/rules/ExtractCommonSubexpression.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/ExtractCommonSubexpression.hpp 
b/query_optimizer/rules/ExtractCommonSubexpression.hpp
new file mode 100644
index 0000000..0b63b7e
--- /dev/null
+++ b/query_optimizer/rules/ExtractCommonSubexpression.hpp
@@ -0,0 +1,135 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ **/
+
+#ifndef QUICKSTEP_QUERY_OPTIMIZER_RULES_EXTRACT_COMMON_SUBEXPRESSION_HPP_
+#define QUICKSTEP_QUERY_OPTIMIZER_RULES_EXTRACT_COMMON_SUBEXPRESSION_HPP_
+
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <unordered_set>
+
+#include "query_optimizer/expressions/CommonSubexpression.hpp"
+#include "query_optimizer/expressions/Expression.hpp"
+#include "query_optimizer/expressions/Scalar.hpp"
+#include "query_optimizer/physical/Physical.hpp"
+#include "query_optimizer/rules/Rule.hpp"
+#include "utility/Macros.hpp"
+
+namespace quickstep {
+namespace optimizer {
+
+class OptimizerContext;
+
+/** \addtogroup OptimizerRules
+ *  @{
+ */
+
+class ExtractCommonSubexpression : public Rule<physical::Physical> {
+ public:
+  /**
+   * @brief Constructor.
+   */
+  ExtractCommonSubexpression(OptimizerContext *optimizer_context);
+
+  ~ExtractCommonSubexpression() override {}
+
+  std::string getName() const override {
+    return "ExtractCommonSubexpression";
+  }
+
+  physical::PhysicalPtr apply(const physical::PhysicalPtr &input) override;
+
+ private:
+  physical::PhysicalPtr applyInternal(const physical::PhysicalPtr &input);
+
+  struct ScalarHash {
+    inline std::size_t operator()(const expressions::ScalarPtr &scalar) const {
+      return scalar->hash();
+    }
+  };
+
+  struct ScalarEqual {
+    inline bool operator()(const expressions::ScalarPtr &lhs,
+                           const expressions::ScalarPtr &rhs) const {
+      return lhs->equals(rhs);
+    }
+  };
+
+  using ScalarCounter =
+      std::unordered_map<expressions::ScalarPtr, std::size_t, ScalarHash, 
ScalarEqual>;
+
+  using ScalarMap =
+      std::unordered_map<expressions::ScalarPtr,
+                         expressions::CommonSubexpressionPtr,
+                         ScalarHash,
+                         ScalarEqual>;
+
+  using ScalarHashable = std::unordered_set<expressions::ScalarPtr>;
+
+  std::vector<expressions::ExpressionPtr> transformExpressions(
+      const std::vector<expressions::ExpressionPtr> &expressions);
+
+  expressions::ExpressionPtr transformExpression(
+      const expressions::ExpressionPtr &expression);
+
+  bool visitAndCount(
+      const expressions::ExpressionPtr &expression,
+      ScalarCounter *counter,
+      ScalarHashable *hashable);
+
+  expressions::ExpressionPtr visitAndTransform(
+      const expressions::ExpressionPtr &expression,
+      const std::size_t max_reference_count,
+      const ScalarCounter &counter,
+      const ScalarHashable &hashable,
+      ScalarMap *substitution_map);
+
+  template <typename ScalarSubclassT>
+  static std::vector<expressions::ExpressionPtr> UpCast(
+      const std::vector<std::shared_ptr<const ScalarSubclassT>> &expressions) {
+    std::vector<expressions::ExpressionPtr> output;
+    for (const auto &expr : expressions) {
+      output.emplace_back(expr);
+    }
+    return output;
+  }
+
+  template <typename ScalarSubclassT>
+  static std::vector<std::shared_ptr<const ScalarSubclassT>> DownCast(
+      const std::vector<expressions::ExpressionPtr> &expressions) {
+    std::vector<std::shared_ptr<const ScalarSubclassT>> output;
+    for (const auto &expr : expressions) {
+      output.emplace_back(std::static_pointer_cast<const 
ScalarSubclassT>(expr));
+    }
+    return output;
+  }
+
+  OptimizerContext *optimizer_context_;
+  std::unordered_set<int> homogeneous_whitelist_;
+
+  DISALLOW_COPY_AND_ASSIGN(ExtractCommonSubexpression);
+};
+
+/** @} */
+
+}  // namespace optimizer
+}  // namespace quickstep
+
+#endif  // QUICKSTEP_QUERY_OPTIMIZER_RULES_EXTRACT_COMMON_SUBEXPRESSION_HPP_

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/c27f5beb/query_optimizer/rules/ReuseAggregateExpressions.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/ReuseAggregateExpressions.cpp 
b/query_optimizer/rules/ReuseAggregateExpressions.cpp
new file mode 100644
index 0000000..77dfd1e
--- /dev/null
+++ b/query_optimizer/rules/ReuseAggregateExpressions.cpp
@@ -0,0 +1,235 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ **/
+
+#include "query_optimizer/rules/ReuseAggregateExpressions.hpp"
+
+#include <cstddef>
+#include <list>
+#include <map>
+#include <unordered_map>
+#include <vector>
+
+#include "expressions/aggregation/AggregateFunction.hpp"
+#include "expressions/aggregation/AggregationID.hpp"
+#include "query_optimizer/expressions/AggregateFunction.hpp"
+#include "query_optimizer/expressions/Alias.hpp"
+#include "query_optimizer/expressions/AttributeReference.hpp"
+#include "query_optimizer/expressions/BinaryExpression.hpp"
+#include "query_optimizer/expressions/ExpressionType.hpp"
+#include "query_optimizer/expressions/ExpressionUtil.hpp"
+#include "query_optimizer/expressions/NamedExpression.hpp"
+#include "query_optimizer/expressions/Scalar.hpp"
+#include "query_optimizer/physical/Aggregate.hpp"
+#include "query_optimizer/physical/PatternMatcher.hpp"
+#include "query_optimizer/physical/Physical.hpp"
+#include "query_optimizer/physical/PhysicalType.hpp"
+#include "query_optimizer/physical/Selection.hpp"
+#include "query_optimizer/physical/TopLevelPlan.hpp"
+#include "types/operations/binary_operations/BinaryOperation.hpp"
+#include "types/operations/binary_operations/BinaryOperationFactory.hpp"
+#include "types/operations/binary_operations/BinaryOperationID.hpp"
+#include "utility/EqualsAnyConstant.hpp"
+
+#include "glog/logging.h"
+
+namespace quickstep {
+namespace optimizer {
+
+namespace E = ::quickstep::optimizer::expressions;
+namespace P = ::quickstep::optimizer::physical;
+
+void ReuseAggregateExpressions::init(const P::PhysicalPtr &input) {
+  DCHECK(input->getPhysicalType() == P::PhysicalType::kTopLevelPlan);
+
+  const P::TopLevelPlanPtr top_level_plan =
+     std::static_pointer_cast<const P::TopLevelPlan>(input);
+  cost_model_.reset(
+      new cost::StarSchemaSimpleCostModel(top_level_plan->shared_subplans()));
+}
+
+P::PhysicalPtr ReuseAggregateExpressions::applyToNode(
+    const P::PhysicalPtr &input) {
+  P::AggregatePtr aggregate;
+  if (!P::SomeAggregate::MatchesWithConditionalCast(input, &aggregate)) {
+    return input;
+  }
+
+  const std::vector<E::AliasPtr> &agg_exprs = 
aggregate->aggregate_expressions();
+
+  std::unordered_map<E::ScalarPtr,
+                     std::map<AggregationID, std::vector<std::size_t>>,
+                     ScalarHash, ScalarEqual> agg_expr_info;
+
+  std::list<std::size_t> count_star_info;
+
+  for (std::size_t i = 0; i < agg_exprs.size(); ++i) {
+    DCHECK(agg_exprs[i]->expression()->getExpressionType()
+               == E::ExpressionType::kAggregateFunction);
+    const E::AggregateFunctionPtr agg_func =
+        std::static_pointer_cast<const E::AggregateFunction>(
+            agg_exprs[i]->expression());
+    const AggregationID agg_id = agg_func->getAggregate().getAggregationID();
+
+    // Currently we only consider aggregate functions with 0 or 1 argument.
+    const std::vector<E::ScalarPtr> &arguments = agg_func->getArguments();
+    if (agg_id == AggregationID::kCount) {
+      if (arguments.empty()) {
+        count_star_info.emplace_front(i);
+        continue;
+      } else if (!arguments.front()->getValueType().isNullable()) {
+        count_star_info.emplace_back(i);
+        continue;
+      }
+    }
+    if (arguments.size() == 1) {
+      agg_expr_info[arguments.front()][agg_id].emplace_back(i);
+    }
+  }
+
+  std::vector<std::unique_ptr<AggregateReference>> agg_refs(agg_exprs.size());
+
+  constexpr std::size_t kInvalidRef = static_cast<std::size_t>(-1);
+  std::size_t count_star_ref;
+
+  if (count_star_info.empty()) {
+    count_star_ref = kInvalidRef;
+  } else {
+    auto it = count_star_info.begin();
+    count_star_ref = *it;
+
+    for (++it; it != count_star_info.end(); ++it) {
+      agg_refs[*it].reset(new AggregateReference(count_star_ref));
+    }
+  }
+
+  for (const auto &it : agg_expr_info) {
+    const auto &ref_map = it.second;
+
+    // First, check whether AVG can be reduced to SUM/COUNT.
+    bool is_avg_processed = false;
+
+    const auto sum_it = ref_map.find(AggregationID::kSum);
+    const auto avg_it = ref_map.find(AggregationID::kAvg);
+
+    if (avg_it != ref_map.end() && sum_it != ref_map.end()) {
+      std::size_t count_ref = kInvalidRef;
+      if (it.first->getValueType().isNullable()) {
+        const auto count_it = ref_map.find(AggregationID::kCount);
+        if (count_it != ref_map.end()) {
+          DCHECK(!count_it->second.empty());
+          count_ref = count_it->second.front();
+        }
+      } else {
+        count_ref = count_star_ref;
+      }
+
+      if (count_ref != kInvalidRef) {
+        DCHECK(!sum_it->second.empty());
+        const std::size_t sum_ref = sum_it->second.front();
+        for (const std::size_t idx : avg_it->second) {
+          agg_refs[idx].reset(new AggregateReference(sum_ref, count_ref));
+        }
+        is_avg_processed = true;
+      }
+    }
+
+    // Then, eliminate duplicate aggregate expressions.
+    for (const auto &ref_it : ref_map) {
+      if (ref_it.first == AggregationID::kAvg && is_avg_processed) {
+        continue;
+      }
+
+      const auto &indices = ref_it.second;
+      DCHECK(!indices.empty());
+      const std::size_t ref = indices.front();
+      for (std::size_t i = 1; i < indices.size(); ++i) {
+        agg_refs[indices[i]].reset(new AggregateReference(ref));
+      }
+    }
+  }
+
+  bool need_transform = false;
+  for (const auto &agg_ref : agg_refs) {
+    if (agg_ref != nullptr) {
+      need_transform = true;
+      break;
+    }
+  }
+
+  if (!need_transform) {
+    return input;
+  }
+
+  const std::vector<E::AttributeReferencePtr> agg_attrs = 
E::ToRefVector(agg_exprs);
+
+  std::vector<E::AliasPtr> new_agg_exprs;
+  std::vector<E::NamedExpressionPtr> new_select_exprs;
+
+  for (std::size_t i = 0; i < agg_refs.size(); ++i) {
+    const auto &agg_ref = agg_refs[i];
+    const E::AliasPtr &agg_expr = agg_exprs[i];
+
+    if (agg_ref == nullptr) {
+      new_agg_exprs.emplace_back(agg_expr);
+      new_select_exprs.emplace_back(
+          E::AttributeReference::Create(agg_expr->id(),
+                                        agg_expr->attribute_name(),
+                                        agg_expr->attribute_alias(),
+                                        agg_expr->relation_name(),
+                                        agg_expr->getValueType(),
+                                        E::AttributeReferenceScope::kLocal));
+    } else {
+      switch (agg_ref->kind) {
+        case AggregateReference::kDirect: {
+          new_select_exprs.emplace_back(
+              E::Alias::Create(agg_expr->id(),
+                               agg_attrs[agg_ref->first_ref],
+                               agg_expr->attribute_name(),
+                               agg_expr->attribute_alias()));
+          break;
+        }
+        case AggregateReference::kAvg: {
+          const BinaryOperation &divide_op =
+              
BinaryOperationFactory::GetBinaryOperation(BinaryOperationID::kDivide);
+          const E::BinaryExpressionPtr avg_expr =
+              E::BinaryExpression::Create(divide_op,
+                                          agg_attrs[agg_ref->first_ref],
+                                          agg_attrs[agg_ref->second_ref]);
+          new_select_exprs.emplace_back(
+              E::Alias::Create(agg_expr->id(),
+                               avg_expr,
+                               agg_expr->attribute_name(),
+                               agg_expr->attribute_alias()));
+        }
+      }
+    }
+  }
+
+  const P::AggregatePtr new_aggregate =
+      P::Aggregate::Create(aggregate->input(),
+                           aggregate->grouping_expressions(),
+                           new_agg_exprs,
+                           aggregate->filter_predicate());
+
+  return P::Selection::Create(new_aggregate, new_select_exprs, nullptr);
+}
+
+
+}  // namespace optimizer
+}  // namespace quickstep

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/c27f5beb/query_optimizer/rules/ReuseAggregateExpressions.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/ReuseAggregateExpressions.hpp 
b/query_optimizer/rules/ReuseAggregateExpressions.hpp
new file mode 100644
index 0000000..0b18023
--- /dev/null
+++ b/query_optimizer/rules/ReuseAggregateExpressions.hpp
@@ -0,0 +1,95 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ **/
+
+#ifndef QUICKSTEP_QUERY_OPTIMIZER_RULES_REUSE_AGGREGATE_EXPRESSIONS_HPP_
+#define QUICKSTEP_QUERY_OPTIMIZER_RULES_REUSE_AGGREGATE_EXPRESSIONS_HPP_
+
+#include <cstddef>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp"
+#include "query_optimizer/expressions/Scalar.hpp"
+#include "query_optimizer/physical/Physical.hpp"
+#include "query_optimizer/rules/BottomUpRule.hpp"
+#include "utility/Macros.hpp"
+
+namespace quickstep {
+namespace optimizer {
+
+class OptimizerContext;
+
+/** \addtogroup OptimizerRules
+ *  @{
+ */
+
+class ReuseAggregateExpressions : public BottomUpRule<physical::Physical> {
+ public:
+  ReuseAggregateExpressions() {}
+
+  std::string getName() const override {
+    return "ReuseAggregateExpressions";
+  }
+
+ protected:
+  void init(const physical::PhysicalPtr &input) override;
+
+  physical::PhysicalPtr applyToNode(const physical::PhysicalPtr &input) 
override;
+
+ private:
+  struct ScalarHash {
+    inline std::size_t operator()(const expressions::ScalarPtr &scalar) const {
+      return scalar->hash();
+    }
+  };
+
+  struct ScalarEqual {
+    inline bool operator()(const expressions::ScalarPtr &lhs,
+                           const expressions::ScalarPtr &rhs) const {
+      return lhs->equals(rhs);
+    }
+  };
+
+  struct AggregateReference {
+    enum Kind {
+      kDirect = 0,
+      kAvg
+    };
+
+    AggregateReference(const std::size_t ref)
+        : kind(kDirect), first_ref(ref), second_ref(0) {}
+
+    AggregateReference(const std::size_t sum_ref, const std::size_t count_ref)
+        : kind(kAvg), first_ref(sum_ref), second_ref(count_ref) {}
+
+    const Kind kind;
+    const std::size_t first_ref;
+    const std::size_t second_ref;
+  };
+
+  std::unique_ptr<cost::StarSchemaSimpleCostModel> cost_model_;
+
+  DISALLOW_COPY_AND_ASSIGN(ReuseAggregateExpressions);
+};
+
+}  // namespace optimizer
+}  // namespace quickstep
+
+#endif  // QUICKSTEP_QUERY_OPTIMIZER_RULES_REUSE_AGGREGATE_EXPRESSIONS_HPP_

Reply via email to