Repository: incubator-quickstep Updated Branches: refs/heads/exact-filter d52b91265 -> 8b70d662e
Updates Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/8b70d662 Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/8b70d662 Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/8b70d662 Branch: refs/heads/exact-filter Commit: 8b70d662e46893a222372a0a8f2c4643fb3ccc48 Parents: d52b912 Author: Jianqiao Zhu <jianq...@cs.wisc.edu> Authored: Thu Jan 26 22:34:11 2017 -0600 Committer: Jianqiao Zhu <jianq...@cs.wisc.edu> Committed: Thu Jan 26 23:32:20 2017 -0600 ---------------------------------------------------------------------- query_optimizer/ExecutionGenerator.cpp | 7 ++ query_optimizer/LIPFilterGenerator.cpp | 2 - query_optimizer/LIPFilterGenerator.hpp | 2 + query_optimizer/PhysicalGenerator.cpp | 12 ++- .../cost_model/StarSchemaSimpleCostModel.cpp | 9 +- .../cost_model/StarSchemaSimpleCostModel.hpp | 103 +++++++++---------- query_optimizer/physical/FilterJoin.hpp | 37 ++++++- query_optimizer/rules/CMakeLists.txt | 2 +- query_optimizer/rules/InjectJoinFilters.cpp | 103 +++++++++++++------ query_optimizer/rules/InjectJoinFilters.hpp | 24 ++++- query_optimizer/tests/OptimizerTextTest.cpp | 2 + relational_operators/BuildLIPFilterOperator.cpp | 53 +++++++--- relational_operators/BuildLIPFilterOperator.hpp | 38 ++++++- relational_operators/CMakeLists.txt | 1 + relational_operators/WorkOrder.proto | 49 +++++---- relational_operators/WorkOrderFactory.cpp | 45 ++++++++ utility/lip_filter/BitVectorExactFilter.hpp | 12 ++- utility/lip_filter/LIPFilterDeployment.cpp | 2 - 18 files changed, 365 insertions(+), 138 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/8b70d662/query_optimizer/ExecutionGenerator.cpp ---------------------------------------------------------------------- diff --git a/query_optimizer/ExecutionGenerator.cpp b/query_optimizer/ExecutionGenerator.cpp index 8249beb..ce1452e 100644 --- a/query_optimizer/ExecutionGenerator.cpp +++ b/query_optimizer/ExecutionGenerator.cpp @@ -617,6 +617,10 @@ void ExecutionGenerator::convertFilterJoin(const P::FilterJoinPtr &physical_plan P::PhysicalPtr probe_physical = physical_plan->left(); P::PhysicalPtr build_physical = physical_plan->right(); + // Let B denote the build side child. If B is also a FilterJoin, then the + // actual "concrete" input relation is B's probe side child, and B's build + // side becomes a LIPFilter that is attached to the BuildLIPFilterOperator + // created below. P::FilterJoinPtr filter_join; if (P::SomeFilterJoin::MatchesWithConditionalCast(build_physical, &filter_join)) { build_physical = filter_join->left(); @@ -638,6 +642,9 @@ void ExecutionGenerator::convertFilterJoin(const P::FilterJoinPtr &physical_plan const CatalogRelationInfo *build_relation_info = findRelationInfoOutputByPhysical(build_physical); + // Create a BuildLIPFilterOperator for the FilterJoin. This operator builds + // LIP filters that are applied properly in downstream operators to achieve + // the filter-join semantics. const QueryPlan::DAGNodeIndex build_filter_operator_index = execution_plan_->addRelationalOperator( new BuildLIPFilterOperator( http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/8b70d662/query_optimizer/LIPFilterGenerator.cpp ---------------------------------------------------------------------- diff --git a/query_optimizer/LIPFilterGenerator.cpp b/query_optimizer/LIPFilterGenerator.cpp index a80c261..ef984d4 100644 --- a/query_optimizer/LIPFilterGenerator.cpp +++ b/query_optimizer/LIPFilterGenerator.cpp @@ -30,8 +30,6 @@ #include "utility/lip_filter/LIPFilter.hpp" #include "utility/lip_filter/LIPFilter.pb.h" -#include "google/protobuf/text_format.h" - #include "glog/logging.h" namespace quickstep { http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/8b70d662/query_optimizer/LIPFilterGenerator.hpp ---------------------------------------------------------------------- diff --git a/query_optimizer/LIPFilterGenerator.hpp b/query_optimizer/LIPFilterGenerator.hpp index 43973cb..f6d931e 100644 --- a/query_optimizer/LIPFilterGenerator.hpp +++ b/query_optimizer/LIPFilterGenerator.hpp @@ -205,6 +205,8 @@ class LIPFilterGenerator { std::map<std::pair<expressions::ExprId, physical::PhysicalPtr>, std::pair<QueryContext::lip_filter_id, QueryPlan::DAGNodeIndex>> lip_filter_builder_map_; + // Maps each relational operator's index to the attached LIPFilterDeployment's + // index and proto. std::map<QueryPlan::DAGNodeIndex, std::pair<int, serialization::LIPFilterDeployment *>> lip_filter_deployment_protos_; http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/8b70d662/query_optimizer/PhysicalGenerator.cpp ---------------------------------------------------------------------- diff --git a/query_optimizer/PhysicalGenerator.cpp b/query_optimizer/PhysicalGenerator.cpp index 9df9b9a..64545e0 100644 --- a/query_optimizer/PhysicalGenerator.cpp +++ b/query_optimizer/PhysicalGenerator.cpp @@ -27,10 +27,10 @@ #include "query_optimizer/logical/Logical.hpp" #include "query_optimizer/physical/Physical.hpp" #include "query_optimizer/rules/AttachLIPFilters.hpp" +#include "query_optimizer/rules/InjectJoinFilters.hpp" #include "query_optimizer/rules/PruneColumns.hpp" #include "query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp" #include "query_optimizer/rules/SwapProbeBuild.hpp" -#include "query_optimizer/rules/InjectJoinFilters.hpp" #include "query_optimizer/strategy/Aggregate.hpp" #include "query_optimizer/strategy/Join.hpp" #include "query_optimizer/strategy/OneToOne.hpp" @@ -51,7 +51,13 @@ DEFINE_bool(reorder_hash_joins, true, "cardinality and selective tables to be joined first, which is suitable " "for queries on star-schema tables."); -DEFINE_bool(use_filter_join, true, "Transform HashJoin to FilterJoin."); +DEFINE_bool(use_filter_joins, true, + "If true, apply an optimization that strength-reduces HashJoins to " + "FilterJoins (implemented as LIPFilters attached to some anchoring " + "operators. Briefly speaking, in the case that the join attribute has " + "consecutive integer values bounded in a reasonably small range, we " + "build a BitVector on the build-side attribute and use the BitVector " + "to filter the probe side table."); DEFINE_bool(use_lip_filters, true, "If true, use LIP (Lookahead Information Passing) filters to accelerate " @@ -112,7 +118,7 @@ P::PhysicalPtr PhysicalGenerator::optimizePlan() { } else { rules.emplace_back(new SwapProbeBuild()); } - if (FLAGS_use_filter_join) { + if (FLAGS_use_filter_joins) { rules.emplace_back(new InjectJoinFilters()); } if (FLAGS_use_lip_filters) { http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/8b70d662/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp ---------------------------------------------------------------------- diff --git a/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp b/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp index 8b91ee6..df0ead4 100644 --- a/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp +++ b/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp @@ -21,7 +21,6 @@ #include <algorithm> #include <memory> -#include <unordered_map> #include <vector> #include "catalog/CatalogRelation.hpp" @@ -489,7 +488,8 @@ bool StarSchemaSimpleCostModel::impliesUniqueAttributes( TypedValue StarSchemaSimpleCostModel::findCatalogRelationStat( const P::PhysicalPtr &physical_plan, const E::ExprId attr_id, - const StatType stat_type) { + const StatType stat_type, + bool *is_exact_stat) { P::TableReferencePtr table_reference; if (P::SomeTableReference::MatchesWithConditionalCast(physical_plan, &table_reference)) { const attribute_id rel_attr_id = @@ -497,6 +497,11 @@ TypedValue StarSchemaSimpleCostModel::findCatalogRelationStat( if (rel_attr_id != kInvalidAttributeID) { const CatalogRelationStatistics &stat = table_reference->relation()->getStatistics(); + + if (is_exact_stat != nullptr) { + *is_exact_stat = stat.isExact(); + } + switch (stat_type) { case StatType::kMin: { if (stat.hasMinValue(rel_attr_id)) { http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/8b70d662/query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp ---------------------------------------------------------------------- diff --git a/query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp b/query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp index e65e353..48e8e22 100644 --- a/query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp +++ b/query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp @@ -25,6 +25,7 @@ #include "catalog/CatalogTypedefs.hpp" #include "query_optimizer/cost_model/CostModel.hpp" +#include "query_optimizer/expressions/AttributeReference.hpp" #include "query_optimizer/expressions/ExprId.hpp" #include "query_optimizer/expressions/Predicate.hpp" #include "query_optimizer/physical/Aggregate.hpp" @@ -108,31 +109,63 @@ class StarSchemaSimpleCostModel : public CostModel { double estimateSelectivityForFilterPredicate( const physical::PhysicalPtr &physical_plan); + /** + * @brief Check whether a set of attributes are unique (i.e. have distinct + * values) for a relation. + * + * @param physical_plan The physical plan that corresponds to a relation. + * @param attributes The set of attributes to be checked. Note that each + * attribute in this set must be an output attribute of the physical + * plan. + * @return True if it is guaranteed that the attributes are unique; false + * otherwise. + */ bool impliesUniqueAttributes( const physical::PhysicalPtr &physical_plan, const std::vector<expressions::AttributeReferencePtr> &attributes); + /** + * @brief For a physical plan attribute, find its correponding catalog attribute's + * MIN statistic. Returns Null value if there is no corresponding catalog + * attribute for the physical plan attribute. + * + * @param physical_plan The physical plan. + * @param attribute The attribute. Must be an output attribute of the given + * physical plan. + * @param is_exact_stat If this pointer is not null, its pointed content will + * be modified by this method to indicate whether the returned statistic + * is EXACT for the stored relation (i.e. not outdated or estimated). + * @return The MIN statistic for the attribute. + */ TypedValue findMinValueStat( const physical::PhysicalPtr &physical_plan, - const expressions::AttributeReferencePtr &attribute) { + const expressions::AttributeReferencePtr &attribute, + bool *is_exact_stat = nullptr) { return findCatalogRelationStat( - physical_plan, attribute->id(), StatType::kMin); + physical_plan, attribute->id(), StatType::kMin, is_exact_stat); } + /** + * @brief For a physical plan attribute, find its correponding catalog attribute's + * MAX statistic. Returns Null value if there is no corresponding catalog + * attribute for the physical plan attribute. + * + * @param physical_plan The physical plan. + * @param attribute The attribute. Must be an output attribute of the given + * physical plan. + * @param is_exact_stat If this pointer is not null, its pointed content will + * be modified by this method to indicate whether the returned statistic + * is EXACT for the stored relation (i.e. not not outdated or estimated). + * @return The MAX statistic for the attribute. + */ TypedValue findMaxValueStat( const physical::PhysicalPtr &physical_plan, - const expressions::AttributeReferencePtr &attribute) { + const expressions::AttributeReferencePtr &attribute, + bool *is_exact_stat = nullptr) { return findCatalogRelationStat( - physical_plan, attribute->id(), StatType::kMax); + physical_plan, attribute->id(), StatType::kMax, is_exact_stat); } - template <typename CppType> - bool findMinMaxStatsCppValue( - const physical::PhysicalPtr &physical_plan, - const expressions::AttributeReferencePtr &attribute, - CppType *min_cpp_value, - CppType *max_cpp_value); - private: std::size_t estimateCardinalityForAggregate( const physical::AggregatePtr &physical_plan); @@ -180,11 +213,17 @@ class StarSchemaSimpleCostModel : public CostModel { kMin }; + // For a physical plan attribute, find its correponding catalog attribute's + // min/max statistics. Returns Null value if there is no corresponding catalog + // attribute for the physical plan attribute (e.g. the attribute is the result + // of an expression). TypedValue findCatalogRelationStat( const physical::PhysicalPtr &physical_plan, const expressions::ExprId expr_id, - const StatType stat_type); + const StatType stat_type, + bool *is_exact_stat = nullptr); + // For a table reference attribute, find its correponding catalog attribute. attribute_id findCatalogRelationAttributeId( const physical::TableReferencePtr &table_reference, const expressions::ExprId expr_id); @@ -192,46 +231,6 @@ class StarSchemaSimpleCostModel : public CostModel { DISALLOW_COPY_AND_ASSIGN(StarSchemaSimpleCostModel); }; -template <typename CppType> -bool StarSchemaSimpleCostModel::findMinMaxStatsCppValue( - const physical::PhysicalPtr &physical_plan, - const expressions::AttributeReferencePtr &attribute, - CppType *min_cpp_value, - CppType *max_cpp_value) { - const TypedValue min_value = - findMinValueStat(physical_plan, attribute); - const TypedValue max_value = - findMaxValueStat(physical_plan, attribute); - if (min_value.isNull() || max_value.isNull()) { - return false; - } - - switch (attribute->getValueType().getTypeID()) { - case TypeID::kInt: { - *min_cpp_value = min_value.getLiteral<int>(); - *max_cpp_value = max_value.getLiteral<int>(); - return true; - } - case TypeID::kLong: { - *min_cpp_value = min_value.getLiteral<std::int64_t>(); - *max_cpp_value = max_value.getLiteral<std::int64_t>(); - return true; - } - case TypeID::kFloat: { - *min_cpp_value = min_value.getLiteral<float>(); - *max_cpp_value = max_value.getLiteral<float>(); - return true; - } - case TypeID::kDouble: { - *min_cpp_value = min_value.getLiteral<double>(); - *max_cpp_value = max_value.getLiteral<double>(); - return true; - } - default: - return false; - } -} - /** @} */ } // namespace cost http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/8b70d662/query_optimizer/physical/FilterJoin.hpp ---------------------------------------------------------------------- diff --git a/query_optimizer/physical/FilterJoin.hpp b/query_optimizer/physical/FilterJoin.hpp index 3d3fc39..ad4e18b 100644 --- a/query_optimizer/physical/FilterJoin.hpp +++ b/query_optimizer/physical/FilterJoin.hpp @@ -20,7 +20,6 @@ #ifndef QUICKSTEP_QUERY_OPTIMIZER_PHYSICAL_FILTER_JOIN_HPP_ #define QUICKSTEP_QUERY_OPTIMIZER_PHYSICAL_FILTER_JOIN_HPP_ -#include <cstddef> #include <memory> #include <string> #include <vector> @@ -28,6 +27,7 @@ #include "query_optimizer/OptimizerTree.hpp" #include "query_optimizer/expressions/AttributeReference.hpp" #include "query_optimizer/expressions/ExpressionUtil.hpp" +#include "query_optimizer/expressions/NamedExpression.hpp" #include "query_optimizer/expressions/Predicate.hpp" #include "query_optimizer/physical/BinaryJoin.hpp" #include "query_optimizer/physical/Physical.hpp" @@ -48,11 +48,18 @@ class FilterJoin; typedef std::shared_ptr<const FilterJoin> FilterJoinPtr; /** - * @brief Physical filter join node. + * @brief Physical filter join node. Semantically, FilterJoin is similar to + * HashJoin where the difference is that FilterJoin builds a bit vector + * instead of a hash table. + * + * @note FilterJoin's backend execution relies on LIPFilter injection (attach + * the bit vectors as filters into downstream relational operators). */ class FilterJoin : public BinaryJoin { public: - PhysicalType getPhysicalType() const override { return PhysicalType::kFilterJoin; } + PhysicalType getPhysicalType() const override { + return PhysicalType::kFilterJoin; + } std::string getName() const override { if (is_anti_join_) { @@ -62,18 +69,30 @@ class FilterJoin : public BinaryJoin { } } + /** + * @return The probe side attributes. + */ const std::vector<expressions::AttributeReferencePtr>& probe_attributes() const { return probe_attributes_; } + /** + * @return The build side attributes. + */ const std::vector<expressions::AttributeReferencePtr>& build_attributes() const { return build_attributes_; } + /** + * @return The build side filter predicate. + */ const expressions::PredicatePtr& build_side_filter_predicate() const { return build_side_filter_predicate_; } + /** + * @return Whether this is an anti-join. + */ const bool is_anti_join() const { return is_anti_join_; } @@ -96,6 +115,18 @@ class FilterJoin : public BinaryJoin { const expressions::UnorderedNamedExpressionSet &referenced_expressions, PhysicalPtr *output) const override; + /** + * @brief Creates a physical FilterJoin. + * @param probe_child The probe side child plan. + * @param build_child The build side child plan. + * @param probe_attributes The probe side attributes. + * @param build_attributes The build side attributes. + * @param project_expressions The project expressions. + * @param build_side_filter_predicate Optional filtering predicate to be + * applied to the build side child BEFORE join. + * @param is_anti_join Whether this is an anti-join. + * @return An immutable physical FilterJoin. + */ static FilterJoinPtr Create( const PhysicalPtr &probe_child, const PhysicalPtr &build_child, http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/8b70d662/query_optimizer/rules/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/query_optimizer/rules/CMakeLists.txt b/query_optimizer/rules/CMakeLists.txt index b4d0f20..9846859 100644 --- a/query_optimizer/rules/CMakeLists.txt +++ b/query_optimizer/rules/CMakeLists.txt @@ -164,8 +164,8 @@ target_link_libraries(quickstep_queryoptimizer_rules_TopDownRule target_link_libraries(quickstep_queryoptimizer_rules_InjectJoinFilters quickstep_queryoptimizer_costmodel_StarSchemaSimpleCostModel quickstep_queryoptimizer_expressions_AttributeReference - quickstep_queryoptimizer_expressions_ExprId quickstep_queryoptimizer_expressions_ExpressionUtil + quickstep_queryoptimizer_expressions_Predicate quickstep_queryoptimizer_physical_Aggregate quickstep_queryoptimizer_physical_FilterJoin quickstep_queryoptimizer_physical_HashJoin http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/8b70d662/query_optimizer/rules/InjectJoinFilters.cpp ---------------------------------------------------------------------- diff --git a/query_optimizer/rules/InjectJoinFilters.cpp b/query_optimizer/rules/InjectJoinFilters.cpp index 3d35382..f77d532 100644 --- a/query_optimizer/rules/InjectJoinFilters.cpp +++ b/query_optimizer/rules/InjectJoinFilters.cpp @@ -26,6 +26,7 @@ #include "query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp" #include "query_optimizer/expressions/AttributeReference.hpp" #include "query_optimizer/expressions/ExpressionUtil.hpp" +#include "query_optimizer/expressions/Predicate.hpp" #include "query_optimizer/physical/LIPFilterConfiguration.hpp" #include "query_optimizer/physical/Aggregate.hpp" #include "query_optimizer/physical/FilterJoin.hpp" @@ -58,11 +59,21 @@ P::PhysicalPtr InjectJoinFilters::apply(const P::PhysicalPtr &input) { top_level_plan->shared_subplans())); lip_filter_configuration_.reset(new P::LIPFilterConfiguration()); + // Step 1. Transform applicable HashJoin nodes to FilterJoin nodes. P::PhysicalPtr output = transformHashJoinToFilters(input); + + // Step 2. Push down FilterJoin nodes to be evaluated early. output = pushDownFilters(output); + + // Step 3. Add Selection nodes for attaching the LIPFilters, if necessary. output = addFilterAnchors(output, false); + + // Step 4. Because of the pushdown of FilterJoin nodes, there are optimization + // opportunities for projecting columns early. output = PruneColumns().apply(output); + // Step 5. For each FilterJoin node, attach its corresponding LIPFilter to + // proper nodes. concretizeAsLIPFilters(output, nullptr); if (!lip_filter_configuration_->getBuildInfoMap().empty() || @@ -77,18 +88,24 @@ P::PhysicalPtr InjectJoinFilters::apply(const P::PhysicalPtr &input) { bool InjectJoinFilters::isTransformable( const physical::HashJoinPtr &hash_join) const { + // Conditions for replacing a HashJoin with a FilterJoin: + + // No residual predicate. if (hash_join->residual_predicate() != nullptr) { return false; } + // Single attribute equi-join. if (hash_join->right_join_attributes().size() > 1) { return false; } + // All the output attributes must be from the probe side. if (!E::SubsetOfExpressions(hash_join->getOutputAttributes(), hash_join->left()->getOutputAttributes())) { return false; } switch (hash_join->join_type()) { case P::HashJoin::JoinType::kInnerJoin: { + // In the case of inner join, the build side join attributes must be unique. if (!cost_model_->impliesUniqueAttributes(hash_join->right(), hash_join->right_join_attributes())) { return false; @@ -102,14 +119,16 @@ bool InjectJoinFilters::isTransformable( return false; } + // The build side join attribute has integer type and its values are exactly + // within a reasonable range. std::int64_t min_cpp_value; std::int64_t max_cpp_value; - const bool has_min_max_stats = - findMinMaxValuesForAttributeHelper(hash_join->right(), - hash_join->right_join_attributes().front(), - &min_cpp_value, - &max_cpp_value); - if (!has_min_max_stats) { + const bool has_exact_min_max_stats = + findExactMinMaxValuesForAttributeHelper(hash_join->right(), + hash_join->right_join_attributes().front(), + &min_cpp_value, + &max_cpp_value); + if (!has_exact_min_max_stats) { return false; } @@ -255,15 +274,26 @@ physical::PhysicalPtr InjectJoinFilters::addFilterAnchors( addFilterAnchors(selection->input(), true)); break; } -// case P::PhysicalType::kHashJoin: { -// const P::HashJoinPtr &hash_join = -// std::static_pointer_cast<const P::HashJoin>(input); -// new_children.emplace_back( -// addFilterAnchors(hash_join->left(), true)); -// new_children.emplace_back( -// addFilterAnchors(hash_join->right(), false)); -// break; -// } + // NOTE(jianqiao): Some of the SSB/TPCH queries slow down significantly if + // we attach converted filters to parent HashJoin nodes. E.g. one HashJoin + + // one attached LIPFilter is slower than the original two HashJoins. This is + // due to some implementation issues with the current HashJoinOperator. So + // currently we disable the anchoring of filters to HashJoin nodes. That is, + // in the case that a FilterJoin's parent node (or ancestor node, if there + // is a chain of FilterJoins) is a HashJoin, we create an extra Selection + // before the parent HashJoin as anchoring node to attach the filters. This + // guarantees non-degrading performance. + /* + case P::PhysicalType::kHashJoin: { + const P::HashJoinPtr &hash_join = + std::static_pointer_cast<const P::HashJoin>(input); + new_children.emplace_back( + addFilterAnchors(hash_join->left(), true)); + new_children.emplace_back( + addFilterAnchors(hash_join->right(), false)); + break; + } + */ case P::PhysicalType::kFilterJoin: { const P::FilterJoinPtr &filter_join = std::static_pointer_cast<const P::FilterJoin>(input); @@ -314,13 +344,17 @@ void InjectJoinFilters::concretizeAsLIPFilters( concretizeAsLIPFilters(selection->input(), selection); break; } -// case P::PhysicalType::kHashJoin: { -// const P::HashJoinPtr &hash_join = -// std::static_pointer_cast<const P::HashJoin>(input); -// concretizeAsLIPFilters(hash_join->left(), hash_join); -// concretizeAsLIPFilters(hash_join->right(), nullptr); -// break; -// } + // Currently we disable the attachment of filters to HashJoin nodes. See the + // comments in InjectJoinFilters::addFilterAnchors(). + /* + case P::PhysicalType::kHashJoin: { + const P::HashJoinPtr &hash_join = + std::static_pointer_cast<const P::HashJoin>(input); + concretizeAsLIPFilters(hash_join->left(), hash_join); + concretizeAsLIPFilters(hash_join->right(), nullptr); + break; + } + */ case P::PhysicalType::kFilterJoin: { const P::FilterJoinPtr &filter_join = std::static_pointer_cast<const P::FilterJoin>(input); @@ -330,12 +364,12 @@ void InjectJoinFilters::concretizeAsLIPFilters( std::int64_t min_cpp_value; std::int64_t max_cpp_value; - const bool has_min_max_stats = - findMinMaxValuesForAttributeHelper(filter_join, - build_attr, - &min_cpp_value, - &max_cpp_value); - DCHECK(has_min_max_stats); + const bool has_exact_min_max_stats = + findExactMinMaxValuesForAttributeHelper(filter_join, + build_attr, + &min_cpp_value, + &max_cpp_value); + DCHECK(has_exact_min_max_stats); DCHECK_GE(min_cpp_value, 0); DCHECK_GE(max_cpp_value, 0); DCHECK_LE(max_cpp_value, kMaxFilterSize); @@ -365,16 +399,20 @@ void InjectJoinFilters::concretizeAsLIPFilters( } } -bool InjectJoinFilters::findMinMaxValuesForAttributeHelper( +bool InjectJoinFilters::findExactMinMaxValuesForAttributeHelper( const physical::PhysicalPtr &physical_plan, const expressions::AttributeReferencePtr &attribute, std::int64_t *min_cpp_value, std::int64_t *max_cpp_value) const { + bool min_value_is_exact; + bool max_value_is_exact; + const TypedValue min_value = - cost_model_->findMinValueStat(physical_plan, attribute); + cost_model_->findMinValueStat(physical_plan, attribute, &min_value_is_exact); const TypedValue max_value = - cost_model_->findMaxValueStat(physical_plan, attribute); - if (min_value.isNull() || max_value.isNull()) { + cost_model_->findMaxValueStat(physical_plan, attribute, &max_value_is_exact); + if (min_value.isNull() || max_value.isNull() || + (!min_value_is_exact) || (!max_value_is_exact)) { return false; } @@ -394,6 +432,5 @@ bool InjectJoinFilters::findMinMaxValuesForAttributeHelper( } } - } // namespace optimizer } // namespace quickstep http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/8b70d662/query_optimizer/rules/InjectJoinFilters.hpp ---------------------------------------------------------------------- diff --git a/query_optimizer/rules/InjectJoinFilters.hpp b/query_optimizer/rules/InjectJoinFilters.hpp index 0eaebdc..21e3779 100644 --- a/query_optimizer/rules/InjectJoinFilters.hpp +++ b/query_optimizer/rules/InjectJoinFilters.hpp @@ -23,11 +23,9 @@ #include <cstdint> #include <memory> #include <string> -#include <vector> #include "query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp" #include "query_optimizer/expressions/AttributeReference.hpp" -#include "query_optimizer/expressions/ExprId.hpp" #include "query_optimizer/physical/LIPFilterConfiguration.hpp" #include "query_optimizer/physical/FilterJoin.hpp" #include "query_optimizer/physical/HashJoin.hpp" @@ -42,6 +40,18 @@ namespace optimizer { * @{ */ +/** + * @brief Rule that applies to a physical plan to transform HashJoin nodes into + * FilterJoin nodes. + * + * This is an optimization that strength-reduces HashJoins to FilterJoins + * (implemented as LIPFilters attached to some anchoring operators where the + * filters get applied). Briefly speaking, the idea is that in the case that + * (1) the join attribute has consecutive integer values bounded in a reasonably + * small range AND (2) the output attributes are all from the probe-side table, + * we can eliminate the HashJoin by building a BitVector on the build-side + * attribute and using the BitVector to filter the probe-side table. + */ class InjectJoinFilters : public Rule<physical::Physical> { public: /** @@ -58,16 +68,22 @@ class InjectJoinFilters : public Rule<physical::Physical> { physical::PhysicalPtr apply(const physical::PhysicalPtr &input) override; private: + // Check whether a HashJoin can be transformed into a FilterJoin. bool isTransformable(const physical::HashJoinPtr &hash_join) const; + // Transform applicable HashJoin nodes into FilterJoin nodes. physical::PhysicalPtr transformHashJoinToFilters( const physical::PhysicalPtr &input) const; + // Push down FilterJoin nodes to be evaluated early. physical::PhysicalPtr pushDownFilters(const physical::PhysicalPtr &input) const; + // Add Selection node, if necessary, for anchoring the LIP filters built by + // FilterJoin nodes. physical::PhysicalPtr addFilterAnchors(const physical::PhysicalPtr &input, const bool ancestor_can_anchor_filter) const; + // Setup lip_filter_configuration_ with the transformed plan tree. void concretizeAsLIPFilters(const physical::PhysicalPtr &input, const physical::PhysicalPtr &anchor_node) const; @@ -76,7 +92,7 @@ class InjectJoinFilters : public Rule<physical::Physical> { const physical::PhysicalPtr &build_child, const physical::FilterJoinPtr &filter_join) const; - bool findMinMaxValuesForAttributeHelper( + bool findExactMinMaxValuesForAttributeHelper( const physical::PhysicalPtr &physical_plan, const expressions::AttributeReferencePtr &attribute, std::int64_t *min_cpp_value, @@ -86,7 +102,7 @@ class InjectJoinFilters : public Rule<physical::Physical> { std::unique_ptr<physical::LIPFilterConfiguration> lip_filter_configuration_; // 1G bits = 128MB - static constexpr std::int64_t kMaxFilterSize = 1000000000; + static constexpr std::int64_t kMaxFilterSize = 1000000000L; DISALLOW_COPY_AND_ASSIGN(InjectJoinFilters); }; http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/8b70d662/query_optimizer/tests/OptimizerTextTest.cpp ---------------------------------------------------------------------- diff --git a/query_optimizer/tests/OptimizerTextTest.cpp b/query_optimizer/tests/OptimizerTextTest.cpp index 759c173..c633705 100644 --- a/query_optimizer/tests/OptimizerTextTest.cpp +++ b/query_optimizer/tests/OptimizerTextTest.cpp @@ -33,6 +33,7 @@ namespace optimizer { DECLARE_bool(reorder_hash_joins); DECLARE_bool(use_lip_filters); +DECLARE_bool(use_filter_joins); } } @@ -62,6 +63,7 @@ int main(int argc, char** argv) { // it is up to change and affects a large number of test cases. quickstep::optimizer::FLAGS_reorder_hash_joins = false; quickstep::optimizer::FLAGS_use_lip_filters = false; + quickstep::optimizer::FLAGS_use_filter_joins = false; ::testing::InitGoogleTest(&argc, argv); int success = RUN_ALL_TESTS(); http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/8b70d662/relational_operators/BuildLIPFilterOperator.cpp ---------------------------------------------------------------------- diff --git a/relational_operators/BuildLIPFilterOperator.cpp b/relational_operators/BuildLIPFilterOperator.cpp index 34df385..f7c09cd 100644 --- a/relational_operators/BuildLIPFilterOperator.cpp +++ b/relational_operators/BuildLIPFilterOperator.cpp @@ -58,18 +58,19 @@ bool BuildLIPFilterOperator::getAllWorkOrders( if (!started_) { for (const block_id input_block_id : input_relation_block_ids_) { container->addNormalWorkOrder( - new BuildLIPFilterWorkOrder(query_id_, - input_relation_, - input_block_id, - build_side_predicate, - storage_manager, - CreateLIPFilterAdaptiveProberHelper(lip_deployment_index_, query_context), - CreateLIPFilterBuilderHelper(lip_deployment_index_, query_context)), + new BuildLIPFilterWorkOrder( + query_id_, + input_relation_, + input_block_id, + build_side_predicate, + storage_manager, + CreateLIPFilterAdaptiveProberHelper(lip_deployment_index_, query_context), + CreateLIPFilterBuilderHelper(lip_deployment_index_, query_context)), op_index_); } started_ = true; } - return started_; + return true; } else { while (num_workorders_generated_ < input_relation_block_ids_.size()) { container->addNormalWorkOrder( @@ -89,19 +90,44 @@ bool BuildLIPFilterOperator::getAllWorkOrders( } bool BuildLIPFilterOperator::getAllWorkOrderProtos(WorkOrderProtosContainer *container) { - // TODO - return true; + if (input_relation_is_stored_) { + if (!started_) { + for (const block_id block : input_relation_block_ids_) { + container->addWorkOrderProto(createWorkOrderProto(block), op_index_); + } + started_ = true; + } + return true; + } else { + while (num_workorders_generated_ < input_relation_block_ids_.size()) { + container->addWorkOrderProto( + createWorkOrderProto(input_relation_block_ids_[num_workorders_generated_]), + op_index_); + ++num_workorders_generated_; + } + return done_feeding_input_relation_; + } } serialization::WorkOrder* BuildLIPFilterOperator::createWorkOrderProto(const block_id block) { - // TODO - return nullptr; + serialization::WorkOrder *proto = new serialization::WorkOrder; + proto->set_work_order_type(serialization::BUILD_LIP_FILTER); + proto->set_query_id(query_id_); + + proto->SetExtension(serialization::BuildLIPFilterWorkOrder::relation_id, input_relation_.getID()); + proto->SetExtension(serialization::BuildLIPFilterWorkOrder::build_block_id, block); + proto->SetExtension(serialization::BuildLIPFilterWorkOrder::build_side_predicate_index, + build_side_predicate_index_); + proto->SetExtension(serialization::BuildLIPFilterWorkOrder::lip_deployment_index, lip_deployment_index_); + + return proto; } void BuildLIPFilterWorkOrder::execute() { BlockReference block( storage_manager_->getBlock(build_block_id_, input_relation_)); + // Apply the predicate first. std::unique_ptr<TupleIdSequence> predicate_matches; if (build_side_predicate_ != nullptr) { predicate_matches.reset(block->getMatchesForPredicate(build_side_predicate_)); @@ -111,6 +137,9 @@ void BuildLIPFilterWorkOrder::execute() { block->getTupleStorageSubBlock().createValueAccessor(predicate_matches.get())); if (lip_filter_adaptive_prober_ != nullptr) { + // Probe the LIP filters if there are any. Note that the LIP filters to be + // probed are for filtering the input relation. They are distinct from the + // target LIP filters we are building. std::unique_ptr<TupleIdSequence> matches( lip_filter_adaptive_prober_->filterValueAccessor(accessor.get())); std::unique_ptr<ValueAccessor> filtered_accessor( http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/8b70d662/relational_operators/BuildLIPFilterOperator.hpp ---------------------------------------------------------------------- diff --git a/relational_operators/BuildLIPFilterOperator.hpp b/relational_operators/BuildLIPFilterOperator.hpp index fe8a0fb..02c2d0e 100644 --- a/relational_operators/BuildLIPFilterOperator.hpp +++ b/relational_operators/BuildLIPFilterOperator.hpp @@ -20,9 +20,9 @@ #ifndef QUICKSTEP_RELATIONAL_OPERATORS_BUILD_LIP_FILTER_OPERATOR_HPP_ #define QUICKSTEP_RELATIONAL_OPERATORS_BUILD_LIP_FILTER_OPERATOR_HPP_ +#include <cstddef> #include <memory> #include <string> -#include <utility> #include <vector> #include "catalog/CatalogRelation.hpp" @@ -56,10 +56,26 @@ namespace serialization { class WorkOrder; } */ /** - * @brief An operator which builds a LIPFilter on one relation. + * @brief An operator which builds LIPFilters on one relation. **/ class BuildLIPFilterOperator : public RelationalOperator { public: + /** + * @brief Constructor. + * + * @note The LIPFilters' information are not passed explicitly as parameters + * to this constructor, but attached later via RelationalOperator::deployLIPFilters(). + * + * @param query_id The ID of the query to which this operator belongs. + * @param input_relation The relation to build LIP filters on. + * @param build_side_predicate_index The index of the predicate in QueryContext + * where the predicate is to be applied to the input relation before + * building the LIP filters (or kInvalidPredicateId if no predicate is + * to be applied). + * @param input_relation_is_stored If input_relation is a stored relation and + * is fully available to the operator before it can start generating + * workorders. + **/ BuildLIPFilterOperator(const std::size_t query_id, const CatalogRelation &input_relation, const QueryContext::predicate_id build_side_predicate_index, @@ -75,6 +91,9 @@ class BuildLIPFilterOperator : public RelationalOperator { ~BuildLIPFilterOperator() override {} + /** + * @return The input relation to build LIP filters on. + */ const CatalogRelation& input_relation() const { return input_relation_; } @@ -118,10 +137,23 @@ class BuildLIPFilterOperator : public RelationalOperator { }; /** - * @brief A WorkOrder produced by BuildLIPFilterOperator + * @brief A WorkOrder produced by BuildLIPFilterOperator. **/ class BuildLIPFilterWorkOrder : public WorkOrder { public: + /** + * @brief Constructor. + * + * @param query_id The ID of the query to which this WorkOrder belongs. + * @param input_relation The relation to build LIP filters on. + * @param build_block_id The block id. + * @param build_side_predicate The predicate to be applied to filter the input + * relation before building the LIP filters (or nullptr if no predicate + * is to be applied). + * @param storage_manager The StorageManager to use. + * @param lip_filter_adaptive_prober The attached LIP filter prober. + * @param lip_filter_builder The attached LIP filter builder. + **/ BuildLIPFilterWorkOrder(const std::size_t query_id, const CatalogRelationSchema &input_relation, const block_id build_block_id, http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/8b70d662/relational_operators/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/relational_operators/CMakeLists.txt b/relational_operators/CMakeLists.txt index 85887b1..bbded80 100644 --- a/relational_operators/CMakeLists.txt +++ b/relational_operators/CMakeLists.txt @@ -507,6 +507,7 @@ target_link_libraries(quickstep_relationaloperators_WorkOrderFactory quickstep_queryexecution_QueryContext quickstep_relationaloperators_AggregationOperator quickstep_relationaloperators_BuildHashOperator + quickstep_relationaloperators_BuildLIPFilterOperator quickstep_relationaloperators_DeleteOperator quickstep_relationaloperators_DestroyAggregationStateOperator quickstep_relationaloperators_DestroyHashOperator http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/8b70d662/relational_operators/WorkOrder.proto ---------------------------------------------------------------------- diff --git a/relational_operators/WorkOrder.proto b/relational_operators/WorkOrder.proto index f8d9246..76753d2 100644 --- a/relational_operators/WorkOrder.proto +++ b/relational_operators/WorkOrder.proto @@ -24,25 +24,26 @@ import "relational_operators/SortMergeRunOperator.proto"; enum WorkOrderType { AGGREGATION = 1; BUILD_HASH = 2; - CREATE_INDEX = 3; // Placeholder. - CREATE_TABLE = 4; // Placeholder. - DELETE = 5; - DESTROY_HASH = 6; - DROP_TABLE = 7; - FINALIZE_AGGREGATION = 8; - HASH_JOIN = 9; - INSERT = 10; - NESTED_LOOP_JOIN = 11; - SAMPLE = 12; - SAVE_BLOCKS = 13; - SELECT = 14; - SORT_MERGE_RUN = 15; - SORT_RUN_GENERATION = 16; - TABLE_GENERATOR = 17; - TEXT_SCAN = 18; - UPDATE = 19; - WINDOW_AGGREGATION = 20; - DESTROY_AGGREGATION_STATE = 21; + BUILD_LIP_FILTER = 3; + CREATE_INDEX = 4; // Placeholder. + CREATE_TABLE = 5; // Placeholder. + DELETE = 6; + DESTROY_HASH = 7; + DROP_TABLE = 8; + FINALIZE_AGGREGATION = 9; + HASH_JOIN = 10; + INSERT = 11; + NESTED_LOOP_JOIN = 12; + SAMPLE = 13; + SAVE_BLOCKS = 14; + SELECT = 15; + SORT_MERGE_RUN = 16; + SORT_RUN_GENERATION = 17; + TABLE_GENERATOR = 18; + TEXT_SCAN = 19; + UPDATE = 20; + WINDOW_AGGREGATION = 21; + DESTROY_AGGREGATION_STATE = 22; } message WorkOrder { @@ -77,6 +78,16 @@ message BuildHashWorkOrder { } } +message BuildLIPFilterWorkOrder { + extend WorkOrder { + // All required. + optional int32 relation_id = 48; + optional fixed64 build_block_id = 49; + optional int32 build_side_predicate_index = 50; + optional int32 lip_deployment_index = 51; + } +} + message DeleteWorkOrder { extend WorkOrder { // All required. http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/8b70d662/relational_operators/WorkOrderFactory.cpp ---------------------------------------------------------------------- diff --git a/relational_operators/WorkOrderFactory.cpp b/relational_operators/WorkOrderFactory.cpp index a6cba02..5e8d03d 100644 --- a/relational_operators/WorkOrderFactory.cpp +++ b/relational_operators/WorkOrderFactory.cpp @@ -30,6 +30,7 @@ #include "query_execution/QueryContext.hpp" #include "relational_operators/AggregationOperator.hpp" #include "relational_operators/BuildHashOperator.hpp" +#include "relational_operators/BuildLIPFilterOperator.hpp" #include "relational_operators/DeleteOperator.hpp" #include "relational_operators/DestroyAggregationStateOperator.hpp" #include "relational_operators/DestroyHashOperator.hpp" @@ -90,6 +91,23 @@ WorkOrder* WorkOrderFactory::ReconstructFromProto(const serialization::WorkOrder CreateLIPFilterAdaptiveProberHelper( proto.GetExtension(serialization::AggregationWorkOrder::lip_deployment_index), query_context)); } + case serialization::BUILD_LIP_FILTER: { + LOG(INFO) << "Creating BuildLIPFilterWorkOrder in Shiftboss " << shiftboss_index; + + const QueryContext::lip_deployment_id lip_deployment_index = + proto.GetExtension(serialization::BuildLIPFilterWorkOrder::lip_deployment_index); + + return new BuildLIPFilterWorkOrder( + proto.query_id(), + catalog_database->getRelationSchemaById( + proto.GetExtension(serialization::BuildLIPFilterWorkOrder::relation_id)), + proto.GetExtension(serialization::BuildLIPFilterWorkOrder::build_block_id), + query_context->getPredicate( + proto.GetExtension(serialization::BuildLIPFilterWorkOrder::build_side_predicate_index)), + storage_manager, + CreateLIPFilterAdaptiveProberHelper(lip_deployment_index, query_context), + CreateLIPFilterBuilderHelper(lip_deployment_index, query_context)); + } case serialization::BUILD_HASH: { LOG(INFO) << "Creating BuildHashWorkOrder in Shiftboss " << shiftboss_index; vector<attribute_id> join_key_attributes; @@ -541,6 +559,33 @@ bool WorkOrderFactory::ProtoIsValid(const serialization::WorkOrder &proto, proto.GetExtension(serialization::BuildHashWorkOrder::join_hash_table_index), proto.GetExtension(serialization::BuildHashWorkOrder::partition_id)); } + case serialization::BUILD_LIP_FILTER: { + if (!proto.HasExtension(serialization::BuildLIPFilterWorkOrder::relation_id)) { + return false; + } + + const relation_id rel_id = + proto.GetExtension(serialization::BuildLIPFilterWorkOrder::relation_id); + if (!catalog_database.hasRelationWithId(rel_id)) { + return false; + } + + if (!proto.HasExtension(serialization::BuildLIPFilterWorkOrder::lip_deployment_index)) { + return false; + } else { + const QueryContext::lip_deployment_id lip_deployment_index = + proto.GetExtension(serialization::BuildLIPFilterWorkOrder::lip_deployment_index); + if (lip_deployment_index != QueryContext::kInvalidLIPDeploymentId && + !query_context.isValidLIPDeploymentId(lip_deployment_index)) { + return false; + } + } + + return proto.HasExtension(serialization::BuildLIPFilterWorkOrder::build_block_id) && + proto.HasExtension(serialization::BuildLIPFilterWorkOrder::build_side_predicate_index) && + query_context.isValidPredicate( + proto.GetExtension(serialization::BuildLIPFilterWorkOrder::build_side_predicate_index)); + } case serialization::DELETE: { return proto.HasExtension(serialization::DeleteWorkOrder::relation_id) && catalog_database.hasRelationWithId( http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/8b70d662/utility/lip_filter/BitVectorExactFilter.hpp ---------------------------------------------------------------------- diff --git a/utility/lip_filter/BitVectorExactFilter.hpp b/utility/lip_filter/BitVectorExactFilter.hpp index 15c8f0b..0964a8e 100644 --- a/utility/lip_filter/BitVectorExactFilter.hpp +++ b/utility/lip_filter/BitVectorExactFilter.hpp @@ -43,13 +43,21 @@ namespace quickstep { * @{ */ +/** + * @brief A LIP filter that tests the EXACT memberships of elements, i.e. there + * will be neither false positives nor false negatives. The implementation + * is to simply reinterpret_cast a value's byte stream into the specified + * CppType as the underlying bit vector's index. Therefore, to use this + * filter, the corresponding LIP attribute's values must be bounded in a + * reasonably small integer range. + */ template <typename CppType, bool is_anti_filter> class BitVectorExactFilter : public LIPFilter { public: /** * @brief Constructor. * - * @param filter_cardinality The cardinality of this hash filter. + * @param filter_cardinality The cardinality of this bit vector. */ explicit BitVectorExactFilter(const std::size_t filter_cardinality) : LIPFilter(LIPFilterType::kBitVectorExactFilter), @@ -99,7 +107,7 @@ class BitVectorExactFilter : public LIPFilter { * @brief Round up bit_size to multiples of 8. */ inline static std::size_t GetByteSize(const std::size_t bit_size) { - return (bit_size + 7) / 8; + return (bit_size + 7u) / 8u; } /** http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/8b70d662/utility/lip_filter/LIPFilterDeployment.cpp ---------------------------------------------------------------------- diff --git a/utility/lip_filter/LIPFilterDeployment.cpp b/utility/lip_filter/LIPFilterDeployment.cpp index bbb6dd6..5721496 100644 --- a/utility/lip_filter/LIPFilterDeployment.cpp +++ b/utility/lip_filter/LIPFilterDeployment.cpp @@ -28,8 +28,6 @@ #include "utility/lip_filter/LIPFilterBuilder.hpp" #include "utility/lip_filter/LIPFilterAdaptiveProber.hpp" -#include "glog/logging.h" - namespace quickstep { LIPFilterDeployment::LIPFilterDeployment(