Add BitVectorExactFilter as a LIP filter and supports Join-to-Semijoin transformation.
Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/70624122 Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/70624122 Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/70624122 Branch: refs/heads/exact-filter Commit: 70624122fdb291dbe4d5adadd1224172a7d628d4 Parents: 670e2c0 Author: Jianqiao Zhu <jianq...@cs.wisc.edu> Authored: Thu Oct 27 14:16:32 2016 -0500 Committer: Jianqiao Zhu <jianq...@cs.wisc.edu> Committed: Sat Dec 17 12:58:01 2016 -0600 ---------------------------------------------------------------------- query_optimizer/CMakeLists.txt | 4 + query_optimizer/ExecutionGenerator.cpp | 55 +++ query_optimizer/ExecutionGenerator.hpp | 8 + query_optimizer/LIPFilterGenerator.cpp | 86 ++-- query_optimizer/LIPFilterGenerator.hpp | 47 ++- query_optimizer/PhysicalGenerator.cpp | 6 + query_optimizer/cost_model/CMakeLists.txt | 5 + query_optimizer/cost_model/SimpleCostModel.cpp | 9 + query_optimizer/cost_model/SimpleCostModel.hpp | 5 + .../cost_model/StarSchemaSimpleCostModel.cpp | 156 +++++++- .../cost_model/StarSchemaSimpleCostModel.hpp | 84 ++++ query_optimizer/expressions/ExpressionUtil.hpp | 8 +- query_optimizer/physical/CMakeLists.txt | 14 + query_optimizer/physical/FilterInjection.cpp | 115 ++++++ query_optimizer/physical/FilterInjection.hpp | 157 ++++++++ .../physical/LIPFilterConfiguration.hpp | 65 ++- query_optimizer/physical/PatternMatcher.hpp | 2 + query_optimizer/physical/PhysicalType.hpp | 1 + query_optimizer/physical/TopLevelPlan.hpp | 3 +- query_optimizer/rules/AttachLIPFilters.cpp | 14 +- query_optimizer/rules/CMakeLists.txt | 22 + query_optimizer/rules/InjectJoinFilters.cpp | 399 +++++++++++++++++++ query_optimizer/rules/InjectJoinFilters.hpp | 99 +++++ relational_operators/BuildLIPFilterOperator.cpp | 125 ++++++ relational_operators/BuildLIPFilterOperator.hpp | 171 ++++++++ relational_operators/CMakeLists.txt | 23 ++ utility/CMakeLists.txt | 1 + utility/PlanVisualizer.cpp | 20 +- utility/lip_filter/BitVectorExactFilter.hpp | 180 +++++++++ utility/lip_filter/CMakeLists.txt | 11 + utility/lip_filter/LIPFilter.hpp | 2 +- utility/lip_filter/LIPFilter.proto | 20 +- utility/lip_filter/LIPFilterDeployment.cpp | 70 ++-- utility/lip_filter/LIPFilterDeployment.hpp | 28 +- utility/lip_filter/LIPFilterFactory.cpp | 46 +++ 35 files changed, 1941 insertions(+), 120 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/70624122/query_optimizer/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/query_optimizer/CMakeLists.txt b/query_optimizer/CMakeLists.txt index 10c52a1..64c3a01 100644 --- a/query_optimizer/CMakeLists.txt +++ b/query_optimizer/CMakeLists.txt @@ -98,6 +98,7 @@ target_link_libraries(quickstep_queryoptimizer_ExecutionGenerator quickstep_queryoptimizer_physical_CreateTable quickstep_queryoptimizer_physical_DeleteTuples quickstep_queryoptimizer_physical_DropTable + quickstep_queryoptimizer_physical_FilterInjection quickstep_queryoptimizer_physical_HashJoin quickstep_queryoptimizer_physical_InsertSelection quickstep_queryoptimizer_physical_InsertTuple @@ -117,6 +118,7 @@ target_link_libraries(quickstep_queryoptimizer_ExecutionGenerator quickstep_queryoptimizer_physical_WindowAggregate quickstep_relationaloperators_AggregationOperator quickstep_relationaloperators_BuildHashOperator + quickstep_relationaloperators_BuildLIPFilterOperator quickstep_relationaloperators_CreateIndexOperator quickstep_relationaloperators_CreateTableOperator quickstep_relationaloperators_DeleteOperator @@ -163,6 +165,7 @@ target_link_libraries(quickstep_queryoptimizer_LIPFilterGenerator quickstep_queryoptimizer_QueryPlan quickstep_queryoptimizer_expressions_ExprId quickstep_queryoptimizer_physical_Aggregate + quickstep_queryoptimizer_physical_FilterInjection quickstep_queryoptimizer_physical_HashJoin quickstep_queryoptimizer_physical_LIPFilterConfiguration quickstep_queryoptimizer_physical_Physical @@ -208,6 +211,7 @@ target_link_libraries(quickstep_queryoptimizer_PhysicalGenerator quickstep_queryoptimizer_logical_Logical quickstep_queryoptimizer_physical_Physical quickstep_queryoptimizer_rules_AttachLIPFilters + quickstep_queryoptimizer_rules_InjectJoinFilters quickstep_queryoptimizer_rules_PruneColumns quickstep_queryoptimizer_rules_StarSchemaHashJoinOrderOptimization quickstep_queryoptimizer_rules_SwapProbeBuild http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/70624122/query_optimizer/ExecutionGenerator.cpp ---------------------------------------------------------------------- diff --git a/query_optimizer/ExecutionGenerator.cpp b/query_optimizer/ExecutionGenerator.cpp index d24f498..3c45cb7 100644 --- a/query_optimizer/ExecutionGenerator.cpp +++ b/query_optimizer/ExecutionGenerator.cpp @@ -74,6 +74,7 @@ #include "query_optimizer/physical/CreateTable.hpp" #include "query_optimizer/physical/DeleteTuples.hpp" #include "query_optimizer/physical/DropTable.hpp" +#include "query_optimizer/physical/FilterInjection.hpp" #include "query_optimizer/physical/HashJoin.hpp" #include "query_optimizer/physical/InsertSelection.hpp" #include "query_optimizer/physical/InsertTuple.hpp" @@ -93,6 +94,7 @@ #include "query_optimizer/physical/WindowAggregate.hpp" #include "relational_operators/AggregationOperator.hpp" #include "relational_operators/BuildHashOperator.hpp" +#include "relational_operators/BuildLIPFilterOperator.hpp" #include "relational_operators/CreateIndexOperator.hpp" #include "relational_operators/CreateTableOperator.hpp" #include "relational_operators/DeleteOperator.hpp" @@ -269,6 +271,9 @@ void ExecutionGenerator::generatePlanInternal( case P::PhysicalType::kDropTable: return convertDropTable( std::static_pointer_cast<const P::DropTable>(physical_plan)); + case P::PhysicalType::kFilterInjection: + return convertFilterInjection( + std::static_pointer_cast<const P::FilterInjection>(physical_plan)); case P::PhysicalType::kHashJoin: return convertHashJoin( std::static_pointer_cast<const P::HashJoin>(physical_plan)); @@ -606,6 +611,56 @@ void ExecutionGenerator::convertSharedSubplanReference(const physical::SharedSub } } +void ExecutionGenerator::convertFilterInjection(const P::FilterInjectionPtr &physical_plan) { + P::PhysicalPtr probe_physical = physical_plan->left(); + P::PhysicalPtr build_physical = physical_plan->right(); + + P::FilterInjectionPtr filter_injection; + if (P::SomeFilterInjection::MatchesWithConditionalCast(build_physical, &filter_injection)) { + build_physical = filter_injection->left(); + DCHECK(build_physical->getPhysicalType() != P::PhysicalType::kFilterInjection); + } + + // Convert the predicate proto. + QueryContext::predicate_id build_side_predicate_index = QueryContext::kInvalidPredicateId; + if (physical_plan->build_side_filter_predicate()) { + build_side_predicate_index = query_context_proto_->predicates_size(); + + std::unique_ptr<const Predicate> build_side_predicate( + convertPredicate(physical_plan->build_side_filter_predicate())); + query_context_proto_->add_predicates()->CopyFrom(build_side_predicate->getProto()); + } + + const CatalogRelationInfo *probe_relation_info = + findRelationInfoOutputByPhysical(probe_physical); + const CatalogRelationInfo *build_relation_info = + findRelationInfoOutputByPhysical(build_physical); + + const QueryPlan::DAGNodeIndex build_filter_operator_index = + execution_plan_->addRelationalOperator( + new BuildLIPFilterOperator( + query_handle_->query_id(), + *build_relation_info->relation, + build_side_predicate_index, + build_relation_info->isStoredRelation())); + + if (!build_relation_info->isStoredRelation()) { + execution_plan_->addDirectDependency(build_filter_operator_index, + build_relation_info->producer_operator_index, + false /* is_pipeline_breaker */); + } + + physical_to_output_relation_map_.emplace( + std::piecewise_construct, + std::forward_as_tuple(physical_plan), + std::forward_as_tuple(probe_relation_info->producer_operator_index, + probe_relation_info->relation)); + + DCHECK(lip_filter_generator_ != nullptr); + lip_filter_generator_->addFilterInjectionInfo(physical_plan, + build_filter_operator_index); +} + void ExecutionGenerator::convertHashJoin(const P::HashJoinPtr &physical_plan) { // HashJoin is converted to three operators: // BuildHash, HashJoin, DestroyHash. The second is the primary operator. http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/70624122/query_optimizer/ExecutionGenerator.hpp ---------------------------------------------------------------------- diff --git a/query_optimizer/ExecutionGenerator.hpp b/query_optimizer/ExecutionGenerator.hpp index 55197c9..a874dbb 100644 --- a/query_optimizer/ExecutionGenerator.hpp +++ b/query_optimizer/ExecutionGenerator.hpp @@ -46,6 +46,7 @@ #include "query_optimizer/physical/CreateTable.hpp" #include "query_optimizer/physical/DeleteTuples.hpp" #include "query_optimizer/physical/DropTable.hpp" +#include "query_optimizer/physical/FilterInjection.hpp" #include "query_optimizer/physical/HashJoin.hpp" #include "query_optimizer/physical/InsertSelection.hpp" #include "query_optimizer/physical/InsertTuple.hpp" @@ -248,6 +249,13 @@ class ExecutionGenerator { void convertSharedSubplanReference(const physical::SharedSubplanReferencePtr &physical_plan); /** + * @brief Converts a FilterInjection to a BuildLIPFilter operator. + * + * @param physical_plan The FilterInjection to be converted. + */ + void convertFilterInjection(const physical::FilterInjectionPtr &physical_plan); + + /** * @brief Converts a HashJoin to BuildHash, HashJoin and * DestroyHash operators. * http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/70624122/query_optimizer/LIPFilterGenerator.cpp ---------------------------------------------------------------------- diff --git a/query_optimizer/LIPFilterGenerator.cpp b/query_optimizer/LIPFilterGenerator.cpp index 404037e..37559ed 100644 --- a/query_optimizer/LIPFilterGenerator.cpp +++ b/query_optimizer/LIPFilterGenerator.cpp @@ -30,6 +30,8 @@ #include "utility/lip_filter/LIPFilter.hpp" #include "utility/lip_filter/LIPFilter.pb.h" +#include "google/protobuf/text_format.h" + #include "glog/logging.h" namespace quickstep { @@ -64,8 +66,9 @@ void LIPFilterGenerator::registerAttributeMap( } void LIPFilterGenerator::deployLIPFilters(QueryPlan *execution_plan, - serialization::QueryContext *query_context_proto) const { - LIPFilterBuilderMap lip_filter_builder_map; + serialization::QueryContext *query_context_proto) { + lip_filter_builder_map_.clear(); + lip_filter_deployment_protos_.clear(); // Deploy builders const auto &build_info_map = lip_filter_configuration_->getBuildInfoMap(); @@ -76,8 +79,7 @@ void LIPFilterGenerator::deployLIPFilters(QueryPlan *execution_plan, query_context_proto, info.builder_node, info.builder_operator_index, - build_it->second, - &lip_filter_builder_map); + build_it->second); } } @@ -90,10 +92,22 @@ void LIPFilterGenerator::deployLIPFilters(QueryPlan *execution_plan, query_context_proto, info.prober_node, info.prober_operator_index, - probe_it->second, - lip_filter_builder_map); + probe_it->second); } } + + // Attach LIPFilterDeployment information to the RelationalOperators. + for (const auto &entry : lip_filter_deployment_protos_) { + RelationalOperator *relop = + execution_plan->getQueryPlanDAGMutable()->getNodePayloadMutable(entry.first); + relop->deployLIPFilters(entry.second.first); + +// std::cerr << "op_index: " << entry.first << " " +// << "deployment_index: " << entry.second.first << "\n"; +// std::string str; +// google::protobuf::TextFormat::PrintToString(*entry.second.second, &str); +// std::cerr << str << "\n"; + } } void LIPFilterGenerator::deployBuilderInternal( @@ -101,12 +115,9 @@ void LIPFilterGenerator::deployBuilderInternal( serialization::QueryContext *query_context_proto, const physical::PhysicalPtr &builder_node, const QueryPlan::DAGNodeIndex builder_operator_index, - const std::vector<physical::LIPFilterBuildInfo> &build_info_vec, - LIPFilterBuilderMap *lip_filter_builder_map) const { - const auto lip_deployment_index = query_context_proto->lip_filter_deployments_size(); + const std::vector<physical::LIPFilterBuildInfo> &build_info_vec) { auto *lip_filter_deployment_info_proto = - query_context_proto->add_lip_filter_deployments(); - lip_filter_deployment_info_proto->set_action_type(serialization::LIPFilterActionType::BUILD); + getLIPFilterDeploymentProto(builder_operator_index, query_context_proto); const auto &builder_attribute_map = attribute_map_.at(builder_node); for (const auto &info : build_info_vec) { @@ -119,6 +130,7 @@ void LIPFilterGenerator::deployBuilderInternal( switch (info.filter_type) { case LIPFilterType::kSingleIdentityHashFilter: { DCHECK(!attr_type.isVariableLength()); + DCHECK(!info.is_anti_filter); lip_filter_proto->set_lip_filter_type( serialization::LIPFilterType::SINGLE_IDENTITY_HASH_FILTER); lip_filter_proto->SetExtension( @@ -127,27 +139,34 @@ void LIPFilterGenerator::deployBuilderInternal( serialization::SingleIdentityHashFilter::attribute_size, attr_type.minimumByteLength()); break; } + case LIPFilterType::kBitVectorExactFilter: { + DCHECK(!attr_type.isVariableLength()); + lip_filter_proto->set_lip_filter_type( + serialization::LIPFilterType::BIT_VECTOR_EXACT_FILTER); + lip_filter_proto->SetExtension( + serialization::BitVectorExactFilter::filter_cardinality, info.filter_cardinality); + lip_filter_proto->SetExtension( + serialization::BitVectorExactFilter::attribute_size, attr_type.minimumByteLength()); + lip_filter_proto->SetExtension( + serialization::BitVectorExactFilter::is_anti_filter, info.is_anti_filter); + break; + } default: LOG(FATAL) << "Unsupported LIPFilter type"; break; } // Register the builder information which is needed later by the probers. - lip_filter_builder_map->emplace( + lip_filter_builder_map_.emplace( std::make_pair(info.build_attribute->id(), builder_node), std::make_pair(lip_filter_id, builder_operator_index)); // Add the builder deployment information into query context. - auto *lip_filter_entry_proto = lip_filter_deployment_info_proto->add_entries(); + auto *lip_filter_entry_proto = lip_filter_deployment_info_proto->add_build_entries(); lip_filter_entry_proto->set_lip_filter_id(lip_filter_id); lip_filter_entry_proto->set_attribute_id(target_attr->getID()); lip_filter_entry_proto->mutable_attribute_type()->CopyFrom(attr_type.getProto()); } - - // Attach the LIPFilterDeployment information to the RelationalOperator. - RelationalOperator *relop = - execution_plan->getQueryPlanDAGMutable()->getNodePayloadMutable(builder_operator_index); - relop->deployLIPFilters(lip_deployment_index); } void LIPFilterGenerator::deployProberInteral( @@ -155,23 +174,20 @@ void LIPFilterGenerator::deployProberInteral( serialization::QueryContext *query_context_proto, const physical::PhysicalPtr &prober_node, const QueryPlan::DAGNodeIndex prober_operator_index, - const std::vector<physical::LIPFilterProbeInfo> &probe_info_vec, - const LIPFilterBuilderMap &lip_filter_builder_map) const { - const auto lip_deployment_index = query_context_proto->lip_filter_deployments_size(); + const std::vector<physical::LIPFilterProbeInfo> &probe_info_vec) { auto *lip_filter_deployment_info_proto = - query_context_proto->add_lip_filter_deployments(); - lip_filter_deployment_info_proto->set_action_type(serialization::LIPFilterActionType::PROBE); + getLIPFilterDeploymentProto(prober_operator_index, query_context_proto); const auto &prober_attribute_map = attribute_map_.at(prober_node); for (const auto &info : probe_info_vec) { // Find the corresponding builder for the to-be-probed LIPFilter. const auto &builder_info = - lip_filter_builder_map.at( + lip_filter_builder_map_.at( std::make_pair(info.build_attribute->id(), info.builder)); const CatalogAttribute *target_attr = prober_attribute_map.at(info.probe_attribute->id()); // Add the prober deployment information into query context. - auto *lip_filter_entry_proto = lip_filter_deployment_info_proto->add_entries(); + auto *lip_filter_entry_proto = lip_filter_deployment_info_proto->add_probe_entries(); lip_filter_entry_proto->set_lip_filter_id(builder_info.first); lip_filter_entry_proto->set_attribute_id(target_attr->getID()); lip_filter_entry_proto->mutable_attribute_type()->CopyFrom( @@ -183,11 +199,23 @@ void LIPFilterGenerator::deployProberInteral( builder_info.second, true /* is_pipeline_breaker */); } +} - // Attach the LIPFilterDeployment information to the RelationalOperator. - RelationalOperator *relop = - execution_plan->getQueryPlanDAGMutable()->getNodePayloadMutable(prober_operator_index); - relop->deployLIPFilters(lip_deployment_index); +serialization::LIPFilterDeployment* LIPFilterGenerator::getLIPFilterDeploymentProto( + const QueryPlan::DAGNodeIndex op_index, + serialization::QueryContext *query_context_proto) { + const auto proto_it = lip_filter_deployment_protos_.find(op_index); + if (proto_it == lip_filter_deployment_protos_.end()) { + const int lip_deployment_index = + query_context_proto->lip_filter_deployments_size(); + auto *lip_filter_deployment_proto = + query_context_proto->add_lip_filter_deployments(); + lip_filter_deployment_protos_.emplace( + op_index, std::make_pair(lip_deployment_index, lip_filter_deployment_proto)); + return lip_filter_deployment_proto; + } else { + return proto_it->second.second; + } } } // namespace optimizer http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/70624122/query_optimizer/LIPFilterGenerator.hpp ---------------------------------------------------------------------- diff --git a/query_optimizer/LIPFilterGenerator.hpp b/query_optimizer/LIPFilterGenerator.hpp index 9d191a1..74daf73 100644 --- a/query_optimizer/LIPFilterGenerator.hpp +++ b/query_optimizer/LIPFilterGenerator.hpp @@ -30,6 +30,7 @@ #include "query_optimizer/expressions/ExprId.hpp" #include "query_optimizer/physical/LIPFilterConfiguration.hpp" #include "query_optimizer/physical/Aggregate.hpp" +#include "query_optimizer/physical/FilterInjection.hpp" #include "query_optimizer/physical/HashJoin.hpp" #include "query_optimizer/physical/Physical.hpp" #include "query_optimizer/physical/Selection.hpp" @@ -39,7 +40,12 @@ namespace quickstep { -namespace serialization { class QueryContext; } +namespace serialization { + +class QueryContext; +class LIPFilterDeployment; + +} class CatalogAttribute; @@ -93,6 +99,20 @@ class LIPFilterGenerator { /** * @brief Add physical-to-execution mapping information for deploying LIPFilters + * to a filter-injection node. + * + * @param filter_injection A physical FilterInjection node. + * @param build_filter_operator_index The index of the BuildLIPFilterOperator + * that corresponds to \p filter_injection in the execution plan. + */ + void addFilterInjectionInfo(const physical::FilterInjectionPtr &filter_injection, + const QueryPlan::DAGNodeIndex build_filter_operator_index) { + builder_infos_.emplace_back(filter_injection, build_filter_operator_index); + prober_infos_.emplace_back(filter_injection, build_filter_operator_index); + } + + /** + * @brief Add physical-to-execution mapping information for deploying LIPFilters * to a hash-join. * * @param hash_join A physical HashJoin node. @@ -128,7 +148,7 @@ class LIPFilterGenerator { * @param query_context_proto QueryContext protobuf for the execution plan. */ void deployLIPFilters(QueryPlan *execution_plan, - serialization::QueryContext *query_context_proto) const; + serialization::QueryContext *query_context_proto); private: /** @@ -157,24 +177,21 @@ class LIPFilterGenerator { const QueryPlan::DAGNodeIndex prober_operator_index; }; - // Maps each LIPFilter's building attribute to the LIPFilter's id in QueryContext - // as well as the LIPFilter's building relational operator's index. - typedef std::map<std::pair<expressions::ExprId, physical::PhysicalPtr>, - std::pair<QueryContext::lip_filter_id, QueryPlan::DAGNodeIndex>> LIPFilterBuilderMap; - void deployBuilderInternal(QueryPlan *execution_plan, serialization::QueryContext *query_context_proto, const physical::PhysicalPtr &builder_node, const QueryPlan::DAGNodeIndex builder_operator_index, - const std::vector<physical::LIPFilterBuildInfo> &build_info_vec, - LIPFilterBuilderMap *lip_filter_builder_map) const; + const std::vector<physical::LIPFilterBuildInfo> &build_info_vec); void deployProberInteral(QueryPlan *execution_plan, serialization::QueryContext *query_context_proto, const physical::PhysicalPtr &prober_node, const QueryPlan::DAGNodeIndex prober_operator_index, - const std::vector<physical::LIPFilterProbeInfo> &probe_info_vec, - const LIPFilterBuilderMap &lip_filter_builder_map) const; + const std::vector<physical::LIPFilterProbeInfo> &probe_info_vec); + + serialization::LIPFilterDeployment* getLIPFilterDeploymentProto( + const QueryPlan::DAGNodeIndex op_index, + serialization::QueryContext *query_context_proto); const physical::LIPFilterConfigurationPtr lip_filter_configuration_; @@ -183,6 +200,14 @@ class LIPFilterGenerator { std::map<physical::PhysicalPtr, std::map<expressions::ExprId, const CatalogAttribute *>> attribute_map_; + // Maps each LIPFilter's building attribute to the LIPFilter's id in QueryContext + // as well as the LIPFilter's building relational operator's index. + std::map<std::pair<expressions::ExprId, physical::PhysicalPtr>, + std::pair<QueryContext::lip_filter_id, QueryPlan::DAGNodeIndex>> lip_filter_builder_map_; + + std::map<QueryPlan::DAGNodeIndex, + std::pair<int, serialization::LIPFilterDeployment *>> lip_filter_deployment_protos_; + DISALLOW_COPY_AND_ASSIGN(LIPFilterGenerator); }; http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/70624122/query_optimizer/PhysicalGenerator.cpp ---------------------------------------------------------------------- diff --git a/query_optimizer/PhysicalGenerator.cpp b/query_optimizer/PhysicalGenerator.cpp index 7cb97dc..0e1b61e 100644 --- a/query_optimizer/PhysicalGenerator.cpp +++ b/query_optimizer/PhysicalGenerator.cpp @@ -30,6 +30,7 @@ #include "query_optimizer/rules/PruneColumns.hpp" #include "query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp" #include "query_optimizer/rules/SwapProbeBuild.hpp" +#include "query_optimizer/rules/InjectJoinFilters.hpp" #include "query_optimizer/strategy/Aggregate.hpp" #include "query_optimizer/strategy/Join.hpp" #include "query_optimizer/strategy/OneToOne.hpp" @@ -50,6 +51,8 @@ DEFINE_bool(reorder_hash_joins, true, "cardinality and selective tables to be joined first, which is suitable " "for queries on star-schema tables."); +DEFINE_bool(inject_join_filters, true, "Transform HashJoin to FilterInjection."); + DEFINE_bool(use_lip_filters, true, "If true, use LIP (Lookahead Information Passing) filters to accelerate " "query processing. LIP filters are effective for queries on star schema " @@ -109,6 +112,9 @@ P::PhysicalPtr PhysicalGenerator::optimizePlan() { } else { rules.emplace_back(new SwapProbeBuild()); } + if (FLAGS_inject_join_filters) { + rules.emplace_back(new InjectJoinFilters()); + } if (FLAGS_use_lip_filters) { rules.emplace_back(new AttachLIPFilters()); } http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/70624122/query_optimizer/cost_model/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/query_optimizer/cost_model/CMakeLists.txt b/query_optimizer/cost_model/CMakeLists.txt index 90133e7..148f586 100644 --- a/query_optimizer/cost_model/CMakeLists.txt +++ b/query_optimizer/cost_model/CMakeLists.txt @@ -33,6 +33,7 @@ target_link_libraries(quickstep_queryoptimizer_costmodel_SimpleCostModel quickstep_catalog_CatalogRelationStatistics quickstep_queryoptimizer_costmodel_CostModel quickstep_queryoptimizer_physical_Aggregate + quickstep_queryoptimizer_physical_FilterInjection quickstep_queryoptimizer_physical_HashJoin quickstep_queryoptimizer_physical_NestedLoopsJoin quickstep_queryoptimizer_physical_Physical @@ -49,6 +50,7 @@ target_link_libraries(quickstep_queryoptimizer_costmodel_StarSchemaSimpleCostMod glog quickstep_catalog_CatalogRelation quickstep_catalog_CatalogRelationStatistics + quickstep_catalog_CatalogTypedefs quickstep_queryoptimizer_costmodel_CostModel quickstep_queryoptimizer_expressions_AttributeReference quickstep_queryoptimizer_expressions_ComparisonExpression @@ -60,6 +62,7 @@ target_link_libraries(quickstep_queryoptimizer_costmodel_StarSchemaSimpleCostMod quickstep_queryoptimizer_expressions_PatternMatcher quickstep_queryoptimizer_expressions_Predicate quickstep_queryoptimizer_physical_Aggregate + quickstep_queryoptimizer_physical_FilterInjection quickstep_queryoptimizer_physical_HashJoin quickstep_queryoptimizer_physical_NestedLoopsJoin quickstep_queryoptimizer_physical_PatternMatcher @@ -72,6 +75,8 @@ target_link_libraries(quickstep_queryoptimizer_costmodel_StarSchemaSimpleCostMod quickstep_queryoptimizer_physical_TableReference quickstep_queryoptimizer_physical_TopLevelPlan quickstep_queryoptimizer_physical_WindowAggregate + quickstep_types_NullType + quickstep_types_TypedValue quickstep_utility_Macros) # Module all-in-one library: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/70624122/query_optimizer/cost_model/SimpleCostModel.cpp ---------------------------------------------------------------------- diff --git a/query_optimizer/cost_model/SimpleCostModel.cpp b/query_optimizer/cost_model/SimpleCostModel.cpp index 7808898..4a1da0a 100644 --- a/query_optimizer/cost_model/SimpleCostModel.cpp +++ b/query_optimizer/cost_model/SimpleCostModel.cpp @@ -27,6 +27,7 @@ #include "query_optimizer/cost_model/CostModel.hpp" #include "query_optimizer/physical/Aggregate.hpp" #include "query_optimizer/physical/NestedLoopsJoin.hpp" +#include "query_optimizer/physical/FilterInjection.hpp" #include "query_optimizer/physical/HashJoin.hpp" #include "query_optimizer/physical/Physical.hpp" #include "query_optimizer/physical/PhysicalType.hpp" @@ -61,6 +62,9 @@ std::size_t SimpleCostModel::estimateCardinality( case P::PhysicalType::kTableGenerator: return estimateCardinalityForTableGenerator( std::static_pointer_cast<const P::TableGenerator>(physical_plan)); + case P::PhysicalType::kFilterInjection: + return estimateCardinalityForFilterInjection( + std::static_pointer_cast<const P::FilterInjection>(physical_plan)); case P::PhysicalType::kHashJoin: return estimateCardinalityForHashJoin( std::static_pointer_cast<const P::HashJoin>(physical_plan)); @@ -119,6 +123,11 @@ std::size_t SimpleCostModel::estimateCardinalityForTableGenerator( return physical_plan->generator_function_handle()->getEstimatedCardinality(); } +std::size_t SimpleCostModel::estimateCardinalityForFilterInjection( + const P::FilterInjectionPtr &physical_plan) { + return estimateCardinality(physical_plan->left()); +} + std::size_t SimpleCostModel::estimateCardinalityForHashJoin( const P::HashJoinPtr &physical_plan) { return std::max(estimateCardinality(physical_plan->left()), http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/70624122/query_optimizer/cost_model/SimpleCostModel.hpp ---------------------------------------------------------------------- diff --git a/query_optimizer/cost_model/SimpleCostModel.hpp b/query_optimizer/cost_model/SimpleCostModel.hpp index 16366cd..1c60e24 100644 --- a/query_optimizer/cost_model/SimpleCostModel.hpp +++ b/query_optimizer/cost_model/SimpleCostModel.hpp @@ -26,6 +26,7 @@ #include "query_optimizer/cost_model/CostModel.hpp" #include "query_optimizer/physical/Aggregate.hpp" #include "query_optimizer/physical/NestedLoopsJoin.hpp" +#include "query_optimizer/physical/FilterInjection.hpp" #include "query_optimizer/physical/HashJoin.hpp" #include "query_optimizer/physical/Physical.hpp" #include "query_optimizer/physical/Selection.hpp" @@ -80,6 +81,10 @@ class SimpleCostModel : public CostModel { std::size_t estimateCardinalityForSort( const physical::SortPtr &physical_plan); + // Returns the left child's cardinality + std::size_t estimateCardinalityForFilterInjection( + const physical::FilterInjectionPtr &physical_plan); + // Returns the larger value of the estimated cardinalities of two // input plans. std::size_t estimateCardinalityForHashJoin( http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/70624122/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp ---------------------------------------------------------------------- diff --git a/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp b/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp index 75b1b2b..9c73d89 100644 --- a/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp +++ b/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp @@ -26,6 +26,7 @@ #include "catalog/CatalogRelation.hpp" #include "catalog/CatalogRelationStatistics.hpp" +#include "catalog/CatalogTypedefs.hpp" #include "query_optimizer/cost_model/CostModel.hpp" #include "query_optimizer/expressions/AttributeReference.hpp" #include "query_optimizer/expressions/ComparisonExpression.hpp" @@ -38,6 +39,7 @@ #include "query_optimizer/expressions/PatternMatcher.hpp" #include "query_optimizer/physical/Aggregate.hpp" #include "query_optimizer/physical/NestedLoopsJoin.hpp" +#include "query_optimizer/physical/FilterInjection.hpp" #include "query_optimizer/physical/HashJoin.hpp" #include "query_optimizer/physical/PatternMatcher.hpp" #include "query_optimizer/physical/Physical.hpp" @@ -48,6 +50,8 @@ #include "query_optimizer/physical/TableGenerator.hpp" #include "query_optimizer/physical/TableReference.hpp" #include "query_optimizer/physical/TopLevelPlan.hpp" +#include "types/TypedValue.hpp" +#include "types/NullType.hpp" #include "glog/logging.h" @@ -73,6 +77,9 @@ std::size_t StarSchemaSimpleCostModel::estimateCardinality( case P::PhysicalType::kTableGenerator: return estimateCardinalityForTableGenerator( std::static_pointer_cast<const P::TableGenerator>(physical_plan)); + case P::PhysicalType::kFilterInjection: + return estimateCardinalityForFilterInjection( + std::static_pointer_cast<const P::FilterInjection>(physical_plan)); case P::PhysicalType::kHashJoin: return estimateCardinalityForHashJoin( std::static_pointer_cast<const P::HashJoin>(physical_plan)); @@ -134,6 +141,17 @@ std::size_t StarSchemaSimpleCostModel::estimateCardinalityForTableGenerator( return physical_plan->generator_function_handle()->getEstimatedCardinality(); } +std::size_t StarSchemaSimpleCostModel::estimateCardinalityForFilterInjection( + const P::FilterInjectionPtr &physical_plan) { + double build_side_filter_selectivity = + estimateSelectivityForPredicate(physical_plan->build_side_filter_predicate(), + physical_plan->right()); + std::size_t left_cardinality = estimateCardinality(physical_plan->left()); + double right_selectivity = estimateSelectivity(physical_plan->right()); + return static_cast<std::size_t>( + left_cardinality * build_side_filter_selectivity * right_selectivity + 0.5); +} + std::size_t StarSchemaSimpleCostModel::estimateCardinalityForHashJoin( const P::HashJoinPtr &physical_plan) { std::size_t left_cardinality = estimateCardinality(physical_plan->left()); @@ -216,6 +234,18 @@ std::size_t StarSchemaSimpleCostModel::estimateNumDistinctValues( } break; } + case P::PhysicalType::kFilterInjection: { + const P::FilterInjectionPtr &filter_injection = + std::static_pointer_cast<const P::FilterInjection>(physical_plan); + if (E::ContainsExprId(filter_injection->left()->getOutputAttributes(), attribute_id)) { + std::size_t left_child_num_distinct_values = + estimateNumDistinctValues(attribute_id, filter_injection->left()); + double right_child_selectivity = + estimateSelectivity(filter_injection->right()); + return static_cast<std::size_t>( + left_child_num_distinct_values * right_child_selectivity + 0.5); + } + } case P::PhysicalType::kHashJoin: { const P::HashJoinPtr &hash_join = std::static_pointer_cast<const P::HashJoin>(physical_plan); @@ -254,6 +284,16 @@ double StarSchemaSimpleCostModel::estimateSelectivity( double child_selectivity = estimateSelectivity(selection->input()); return filter_selectivity * child_selectivity; } + case P::PhysicalType::kFilterInjection: { + const P::FilterInjectionPtr &filter_injection = + std::static_pointer_cast<const P::FilterInjection>(physical_plan); + double left_selectivity = estimateSelectivity(filter_injection->left()); + double right_selectivity = estimateSelectivity(filter_injection->right()); + double build_side_filter_selectivity = + estimateSelectivityForPredicate(filter_injection->build_side_filter_predicate(), + filter_injection->right()); + return left_selectivity * right_selectivity * build_side_filter_selectivity; + } case P::PhysicalType::kHashJoin: { const P::HashJoinPtr &hash_join = std::static_pointer_cast<const P::HashJoin>(physical_plan); @@ -383,18 +423,118 @@ double StarSchemaSimpleCostModel::estimateSelectivityForPredicate( std::size_t StarSchemaSimpleCostModel::getNumDistinctValues( const E::ExprId attribute_id, const P::TableReferencePtr &table_reference) { - const CatalogRelation &relation = *table_reference->relation(); - const std::vector<E::AttributeReferencePtr> &attributes = table_reference->attribute_list(); - for (std::size_t i = 0; i < attributes.size(); ++i) { - if (attributes[i]->id() == attribute_id) { - const CatalogRelationStatistics &stat = relation.getStatistics(); - if (stat.hasNumDistinctValues(i)) { - return stat.getNumDistinctValues(i); + const auto rel_attr_id = + findCatalogRelationAttributeId(table_reference, attribute_id); + if (rel_attr_id != kInvalidAttributeID) { + const CatalogRelationStatistics &stat = + table_reference->relation()->getStatistics(); + if (stat.hasNumDistinctValues(rel_attr_id)) { + return stat.getNumDistinctValues(rel_attr_id); + } + } + return estimateCardinalityForTableReference(table_reference); +} + +bool StarSchemaSimpleCostModel::impliesUniqueAttributes( + const P::PhysicalPtr &physical_plan, + const std::vector<E::AttributeReferencePtr> &attributes) { + switch (physical_plan->getPhysicalType()) { + case P::PhysicalType::kAggregate: { + const P::AggregatePtr &aggregate = + std::static_pointer_cast<const P::Aggregate>(physical_plan); + return E::SubsetOfExpressions(aggregate->grouping_expressions(), attributes); + } + case P::PhysicalType::kHashJoin: { + const P::HashJoinPtr &hash_join = + std::static_pointer_cast<const P::HashJoin>(physical_plan); + bool unique_from_left = + impliesUniqueAttributes(hash_join->right(), hash_join->right_join_attributes()) + && impliesUniqueAttributes(hash_join->left(), attributes); + bool unique_from_right = + impliesUniqueAttributes(hash_join->left(), hash_join->left_join_attributes()) + && impliesUniqueAttributes(hash_join->right(), attributes); + return unique_from_left || unique_from_right; + } + case P::PhysicalType::kTableReference: { + const P::TableReferencePtr &table_reference = + std::static_pointer_cast<const P::TableReference>(physical_plan); + const CatalogRelationStatistics &stat = + table_reference->relation()->getStatistics(); + if (stat.hasNumTuples()) { + const std::size_t num_tuples = stat.getNumTuples(); + for (const auto &attr : attributes) { + const attribute_id rel_attr_id = + findCatalogRelationAttributeId(table_reference, attr->id()); + if (rel_attr_id != kInvalidAttributeID && + stat.hasNumDistinctValues(rel_attr_id) && + stat.getNumDistinctValues(rel_attr_id) == num_tuples) { + return true; + } + } } + return false; + } + case P::PhysicalType::kSample: // Fall through + case P::PhysicalType::kSelection: + case P::PhysicalType::kSort: { + DCHECK_EQ(physical_plan->getNumChildren(), 1u); + return impliesUniqueAttributes(physical_plan->children()[0], attributes); + } + default: break; + } + return false; +} + +TypedValue StarSchemaSimpleCostModel::findCatalogRelationStat( + const P::PhysicalPtr &physical_plan, + const E::ExprId attr_id, + const StatType stat_type) { + P::TableReferencePtr table_reference; + if (P::SomeTableReference::MatchesWithConditionalCast(physical_plan, &table_reference)) { + const attribute_id rel_attr_id = + findCatalogRelationAttributeId(table_reference, attr_id); + if (rel_attr_id != kInvalidAttributeID) { + const CatalogRelationStatistics &stat = + table_reference->relation()->getStatistics(); + switch (stat_type) { + case StatType::kMin: { + if (stat.hasMinValue(rel_attr_id)) { + return stat.getMinValue(rel_attr_id); + } + break; + } + case StatType::kMax: { + if (stat.hasMaxValue(rel_attr_id)) { + return stat.getMaxValue(rel_attr_id); + } + break; + } + default: + break; + } + return NullType::InstanceNullable().makeNullValue(); } } - return estimateCardinalityForTableReference(table_reference); + + for (const auto &child : physical_plan->children()) { + if (E::ContainsExprId(child->getOutputAttributes(), attr_id)) { + return findCatalogRelationStat(child, attr_id, stat_type); + } + } + return NullType::InstanceNullable().makeNullValue(); +} + +attribute_id StarSchemaSimpleCostModel::findCatalogRelationAttributeId( + const physical::TableReferencePtr &table_reference, + const expressions::ExprId expr_id) { + const auto &attribute_list = table_reference->attribute_list(); + for (std::size_t i = 0; i < attribute_list.size(); ++i) { + if (attribute_list[i]->id() == expr_id) { + return i; + } + } + return kInvalidAttributeID; } } // namespace cost http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/70624122/query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp ---------------------------------------------------------------------- diff --git a/query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp b/query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp index 6f6aa29..b5def39 100644 --- a/query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp +++ b/query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp @@ -23,11 +23,13 @@ #include <cstddef> #include <vector> +#include "catalog/CatalogTypedefs.hpp" #include "query_optimizer/cost_model/CostModel.hpp" #include "query_optimizer/expressions/ExprId.hpp" #include "query_optimizer/expressions/Predicate.hpp" #include "query_optimizer/physical/Aggregate.hpp" #include "query_optimizer/physical/NestedLoopsJoin.hpp" +#include "query_optimizer/physical/FilterInjection.hpp" #include "query_optimizer/physical/HashJoin.hpp" #include "query_optimizer/physical/Physical.hpp" #include "query_optimizer/physical/Selection.hpp" @@ -36,6 +38,7 @@ #include "query_optimizer/physical/TableReference.hpp" #include "query_optimizer/physical/TopLevelPlan.hpp" #include "query_optimizer/physical/WindowAggregate.hpp" +#include "types/TypedValue.hpp" #include "utility/Macros.hpp" namespace quickstep { @@ -105,10 +108,38 @@ class StarSchemaSimpleCostModel : public CostModel { double estimateSelectivityForFilterPredicate( const physical::PhysicalPtr &physical_plan); + bool impliesUniqueAttributes( + const physical::PhysicalPtr &physical_plan, + const std::vector<expressions::AttributeReferencePtr> &attributes); + + TypedValue findMinValueStat( + const physical::PhysicalPtr &physical_plan, + const expressions::AttributeReferencePtr &attribute) { + return findCatalogRelationStat( + physical_plan, attribute->id(), StatType::kMin); + } + + TypedValue findMaxValueStat( + const physical::PhysicalPtr &physical_plan, + const expressions::AttributeReferencePtr &attribute) { + return findCatalogRelationStat( + physical_plan, attribute->id(), StatType::kMax); + } + + template <typename CppType> + bool findMinMaxStatsCppValue( + const physical::PhysicalPtr &physical_plan, + const expressions::AttributeReferencePtr &attribute, + CppType *min_cpp_value, + CppType *max_cpp_value); + private: std::size_t estimateCardinalityForAggregate( const physical::AggregatePtr &physical_plan); + std::size_t estimateCardinalityForFilterInjection( + const physical::FilterInjectionPtr &physical_plan); + std::size_t estimateCardinalityForHashJoin( const physical::HashJoinPtr &physical_plan); @@ -144,10 +175,63 @@ class StarSchemaSimpleCostModel : public CostModel { std::size_t getNumDistinctValues(const expressions::ExprId attribute_id, const physical::TableReferencePtr &table_reference); + enum class StatType { + kMax = 0, + kMin + }; + + TypedValue findCatalogRelationStat( + const physical::PhysicalPtr &physical_plan, + const expressions::ExprId expr_id, + const StatType stat_type); + + attribute_id findCatalogRelationAttributeId( + const physical::TableReferencePtr &table_reference, + const expressions::ExprId expr_id); DISALLOW_COPY_AND_ASSIGN(StarSchemaSimpleCostModel); }; +template <typename CppType> +bool StarSchemaSimpleCostModel::findMinMaxStatsCppValue( + const physical::PhysicalPtr &physical_plan, + const expressions::AttributeReferencePtr &attribute, + CppType *min_cpp_value, + CppType *max_cpp_value) { + const TypedValue min_value = + findMinValueStat(physical_plan, attribute); + const TypedValue max_value = + findMaxValueStat(physical_plan, attribute); + if (min_value.isNull() || max_value.isNull()) { + return false; + } + + switch (attribute->getValueType().getTypeID()) { + case TypeID::kInt: { + *min_cpp_value = min_value.getLiteral<int>(); + *max_cpp_value = max_value.getLiteral<int>(); + return true; + } + case TypeID::kLong: { + *min_cpp_value = min_value.getLiteral<std::int64_t>(); + *max_cpp_value = max_value.getLiteral<std::int64_t>(); + return true; + } + case TypeID::kFloat: { + *min_cpp_value = min_value.getLiteral<float>(); + *max_cpp_value = max_value.getLiteral<float>(); + return true; + } + case TypeID::kDouble: { + *min_cpp_value = min_value.getLiteral<double>(); + *max_cpp_value = max_value.getLiteral<double>(); + return true; + } + default: + return false; + } +} + /** @} */ } // namespace cost http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/70624122/query_optimizer/expressions/ExpressionUtil.hpp ---------------------------------------------------------------------- diff --git a/query_optimizer/expressions/ExpressionUtil.hpp b/query_optimizer/expressions/ExpressionUtil.hpp index 422d5ab..6b8666e 100644 --- a/query_optimizer/expressions/ExpressionUtil.hpp +++ b/query_optimizer/expressions/ExpressionUtil.hpp @@ -122,12 +122,12 @@ bool ContainsExprId( * contain the other operand). * @return True if \p left is a subset of \p right. */ -template <class NamedExpressionType> +template <class LeftNamedExpressionType, class RightNamedExpressionType> bool SubsetOfExpressions( - const std::vector<std::shared_ptr<const NamedExpressionType>> &left, - const std::vector<std::shared_ptr<const NamedExpressionType>> &right) { + const std::vector<std::shared_ptr<const LeftNamedExpressionType>> &left, + const std::vector<std::shared_ptr<const RightNamedExpressionType>> &right) { UnorderedNamedExpressionSet supset(right.begin(), right.end()); - for (const std::shared_ptr<const NamedExpressionType> &expr : left) { + for (const std::shared_ptr<const LeftNamedExpressionType> &expr : left) { if (supset.find(expr) == supset.end()) { return false; } http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/70624122/query_optimizer/physical/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/query_optimizer/physical/CMakeLists.txt b/query_optimizer/physical/CMakeLists.txt index 5c2cd0b..2ef60ce 100644 --- a/query_optimizer/physical/CMakeLists.txt +++ b/query_optimizer/physical/CMakeLists.txt @@ -23,6 +23,7 @@ add_library(quickstep_queryoptimizer_physical_CreateIndex CreateIndex.cpp Create add_library(quickstep_queryoptimizer_physical_CreateTable CreateTable.cpp CreateTable.hpp) add_library(quickstep_queryoptimizer_physical_DeleteTuples DeleteTuples.cpp DeleteTuples.hpp) add_library(quickstep_queryoptimizer_physical_DropTable DropTable.cpp DropTable.hpp) +add_library(quickstep_queryoptimizer_physical_FilterInjection FilterInjection.cpp FilterInjection.hpp) add_library(quickstep_queryoptimizer_physical_HashJoin HashJoin.cpp HashJoin.hpp) add_library(quickstep_queryoptimizer_physical_InsertSelection InsertSelection.cpp InsertSelection.hpp) add_library(quickstep_queryoptimizer_physical_InsertTuple InsertTuple.cpp InsertTuple.hpp) @@ -114,6 +115,18 @@ target_link_libraries(quickstep_queryoptimizer_physical_DropTable quickstep_queryoptimizer_physical_Physical quickstep_queryoptimizer_physical_PhysicalType quickstep_utility_Macros) +target_link_libraries(quickstep_queryoptimizer_physical_FilterInjection + glog + quickstep_queryoptimizer_OptimizerTree + quickstep_queryoptimizer_expressions_AttributeReference + quickstep_queryoptimizer_expressions_ExpressionUtil + quickstep_queryoptimizer_expressions_NamedExpression + quickstep_queryoptimizer_expressions_Predicate + quickstep_queryoptimizer_physical_BinaryJoin + quickstep_queryoptimizer_physical_Physical + quickstep_queryoptimizer_physical_PhysicalType + quickstep_utility_Cast + quickstep_utility_Macros) target_link_libraries(quickstep_queryoptimizer_physical_HashJoin glog quickstep_queryoptimizer_OptimizerTree @@ -281,6 +294,7 @@ target_link_libraries(quickstep_queryoptimizer_physical quickstep_queryoptimizer_physical_CreateTable quickstep_queryoptimizer_physical_DeleteTuples quickstep_queryoptimizer_physical_DropTable + quickstep_queryoptimizer_physical_FilterInjection quickstep_queryoptimizer_physical_HashJoin quickstep_queryoptimizer_physical_InsertSelection quickstep_queryoptimizer_physical_InsertTuple http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/70624122/query_optimizer/physical/FilterInjection.cpp ---------------------------------------------------------------------- diff --git a/query_optimizer/physical/FilterInjection.cpp b/query_optimizer/physical/FilterInjection.cpp new file mode 100644 index 0000000..3a933e2 --- /dev/null +++ b/query_optimizer/physical/FilterInjection.cpp @@ -0,0 +1,115 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + **/ + +#include "query_optimizer/physical/FilterInjection.hpp" + +#include <string> +#include <vector> + +#include "query_optimizer/OptimizerTree.hpp" +#include "query_optimizer/expressions/AttributeReference.hpp" +#include "query_optimizer/expressions/ExpressionUtil.hpp" +#include "query_optimizer/expressions/NamedExpression.hpp" +#include "utility/Cast.hpp" + +namespace quickstep { +namespace optimizer { +namespace physical { + +namespace E = ::quickstep::optimizer::expressions; + +std::vector<E::AttributeReferencePtr> FilterInjection::getReferencedAttributes() const { + std::vector<E::AttributeReferencePtr> referenced_attributes; + for (const auto &project_expression : project_expressions()) { + const auto referenced_attributes_in_expression = + project_expression->getReferencedAttributes(); + referenced_attributes.insert(referenced_attributes.end(), + referenced_attributes_in_expression.begin(), + referenced_attributes_in_expression.end()); + } + referenced_attributes.insert(referenced_attributes.end(), + probe_attributes_.begin(), + probe_attributes_.end()); + referenced_attributes.insert(referenced_attributes.end(), + build_attributes_.begin(), + build_attributes_.end()); + if (build_side_filter_predicate_ != nullptr) { + const auto referenced_attributes_in_predicate = + build_side_filter_predicate_->getReferencedAttributes(); + referenced_attributes.insert(referenced_attributes.end(), + referenced_attributes_in_predicate.begin(), + referenced_attributes_in_predicate.end()); + } + return referenced_attributes; +} + +bool FilterInjection::maybeCopyWithPrunedExpressions( + const expressions::UnorderedNamedExpressionSet &referenced_expressions, + PhysicalPtr *output) const { + std::vector<E::NamedExpressionPtr> new_project_expressions; + const auto ¤t_project_expressions = project_expressions(); + for (const auto &project_expression : current_project_expressions) { + if (referenced_expressions.find(project_expression) != referenced_expressions.end()) { + new_project_expressions.emplace_back(project_expression); + } + } + if (new_project_expressions.size() != current_project_expressions.size()) { + *output = Create(left(), + right(), + probe_attributes_, + build_attributes_, + new_project_expressions, + build_side_filter_predicate_, + is_anti_filter_); + return true; + } + return false; +} + +void FilterInjection::getFieldStringItems( + std::vector<std::string> *inline_field_names, + std::vector<std::string> *inline_field_values, + std::vector<std::string> *non_container_child_field_names, + std::vector<OptimizerTreeBaseNodePtr> *non_container_child_fields, + std::vector<std::string> *container_child_field_names, + std::vector<std::vector<OptimizerTreeBaseNodePtr>> *container_child_fields) const { + BinaryJoin::getFieldStringItems(inline_field_names, + inline_field_values, + non_container_child_field_names, + non_container_child_fields, + container_child_field_names, + container_child_fields); + + inline_field_names->push_back("is_anti_filter"); + inline_field_values->push_back(std::to_string(is_anti_filter_)); + + if (build_side_filter_predicate_ != nullptr) { + non_container_child_field_names->emplace_back("build_side_filter_predicate"); + non_container_child_fields->emplace_back(build_side_filter_predicate_); + } + + container_child_field_names->push_back("probe_attributes"); + container_child_fields->push_back(CastSharedPtrVector<OptimizerTreeBase>(probe_attributes_)); + container_child_field_names->push_back("build_attributes"); + container_child_fields->push_back(CastSharedPtrVector<OptimizerTreeBase>(build_attributes_)); +} + +} // namespace physical +} // namespace optimizer +} // namespace quickstep http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/70624122/query_optimizer/physical/FilterInjection.hpp ---------------------------------------------------------------------- diff --git a/query_optimizer/physical/FilterInjection.hpp b/query_optimizer/physical/FilterInjection.hpp new file mode 100644 index 0000000..041e332 --- /dev/null +++ b/query_optimizer/physical/FilterInjection.hpp @@ -0,0 +1,157 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + **/ + +#ifndef QUICKSTEP_QUERY_OPTIMIZER_PHYSICAL_FILTER_INJECTION_HPP_ +#define QUICKSTEP_QUERY_OPTIMIZER_PHYSICAL_FILTER_INJECTION_HPP_ + +#include <cstddef> +#include <memory> +#include <string> +#include <type_traits> +#include <vector> + +#include "query_optimizer/OptimizerTree.hpp" +#include "query_optimizer/expressions/AttributeReference.hpp" +#include "query_optimizer/expressions/ExpressionUtil.hpp" +#include "query_optimizer/expressions/Predicate.hpp" +#include "query_optimizer/physical/BinaryJoin.hpp" +#include "query_optimizer/physical/Physical.hpp" +#include "query_optimizer/physical/PhysicalType.hpp" +#include "utility/Macros.hpp" + +#include "glog/logging.h" + +namespace quickstep { +namespace optimizer { +namespace physical { + +/** \addtogroup OptimizerPhysical + * @{ + */ + +class FilterInjection; +typedef std::shared_ptr<const FilterInjection> FilterInjectionPtr; + +/** + * @brief Physical filter injection node. + */ +class FilterInjection : public BinaryJoin { + public: + PhysicalType getPhysicalType() const override { return PhysicalType::kFilterInjection; } + + std::string getName() const override { + if (is_anti_filter_) { + return "FilterInjection(Anti)"; + } else { + return "FilterInjection"; + } + } + + const std::vector<expressions::AttributeReferencePtr>& probe_attributes() const { + return probe_attributes_; + } + + const std::vector<expressions::AttributeReferencePtr>& build_attributes() const { + return build_attributes_; + } + + const expressions::PredicatePtr& build_side_filter_predicate() const { + return build_side_filter_predicate_; + } + + const bool is_anti_filter() const { + return is_anti_filter_; + } + + PhysicalPtr copyWithNewChildren( + const std::vector<PhysicalPtr> &new_children) const override { + DCHECK_EQ(children().size(), new_children.size()); + return Create(new_children[0], + new_children[1], + probe_attributes_, + build_attributes_, + project_expressions(), + build_side_filter_predicate_, + is_anti_filter_); + } + + std::vector<expressions::AttributeReferencePtr> getReferencedAttributes() const override; + + bool maybeCopyWithPrunedExpressions( + const expressions::UnorderedNamedExpressionSet &referenced_expressions, + PhysicalPtr *output) const override; + + static FilterInjectionPtr Create( + const PhysicalPtr &probe_child, + const PhysicalPtr &build_child, + const std::vector<expressions::AttributeReferencePtr> &probe_attributes, + const std::vector<expressions::AttributeReferencePtr> &build_attributes, + const std::vector<expressions::NamedExpressionPtr> &project_expressions, + const expressions::PredicatePtr &build_side_filter_predicate, + const bool is_anti_filter) { + return FilterInjectionPtr( + new FilterInjection(probe_child, + build_child, + probe_attributes, + build_attributes, + project_expressions, + build_side_filter_predicate, + is_anti_filter)); + } + + protected: + void getFieldStringItems( + std::vector<std::string> *inline_field_names, + std::vector<std::string> *inline_field_values, + std::vector<std::string> *non_container_child_field_names, + std::vector<OptimizerTreeBaseNodePtr> *non_container_child_fields, + std::vector<std::string> *container_child_field_names, + std::vector<std::vector<OptimizerTreeBaseNodePtr>> *container_child_fields) const override; + + private: + FilterInjection( + const PhysicalPtr &probe_child, + const PhysicalPtr &build_child, + const std::vector<expressions::AttributeReferencePtr> &probe_attributes, + const std::vector<expressions::AttributeReferencePtr> &build_attributes, + const std::vector<expressions::NamedExpressionPtr> &project_expressions, + const expressions::PredicatePtr &build_side_filter_predicate, + const bool is_anti_filter) + : BinaryJoin(probe_child, build_child, project_expressions), + probe_attributes_(probe_attributes), + build_attributes_(build_attributes), + build_side_filter_predicate_(build_side_filter_predicate), + is_anti_filter_(is_anti_filter) { + } + + std::vector<expressions::AttributeReferencePtr> probe_attributes_; + std::vector<expressions::AttributeReferencePtr> build_attributes_; + expressions::PredicatePtr build_side_filter_predicate_; + bool is_anti_filter_; + + DISALLOW_COPY_AND_ASSIGN(FilterInjection); +}; + +/** @} */ + +} // namespace physical +} // namespace optimizer +} // namespace quickstep + +#endif // QUICKSTEP_QUERY_OPTIMIZER_PHYSICAL_FILTER_INJECTION_HPP_ http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/70624122/query_optimizer/physical/LIPFilterConfiguration.hpp ---------------------------------------------------------------------- diff --git a/query_optimizer/physical/LIPFilterConfiguration.hpp b/query_optimizer/physical/LIPFilterConfiguration.hpp index 62a6149..b4ae548 100644 --- a/query_optimizer/physical/LIPFilterConfiguration.hpp +++ b/query_optimizer/physical/LIPFilterConfiguration.hpp @@ -50,17 +50,32 @@ struct LIPFilterBuildInfo { * @param build_attribute_in The attribute to build the LIP filter with. * @param filter_cardinality_in The LIP filter's cardinality. * @param filter_type_in The LIP filter's type. + * @param is_anti_filter_in Whether this LIPFilter is an anti-filter. */ LIPFilterBuildInfo(const expressions::AttributeReferencePtr &build_attribute_in, const std::size_t filter_cardinality_in, - const LIPFilterType &filter_type_in) + const LIPFilterType &filter_type_in, + const bool is_anti_filter_in) : build_attribute(build_attribute_in), filter_cardinality(filter_cardinality_in), - filter_type(filter_type_in) { - } + filter_type(filter_type_in), + is_anti_filter(is_anti_filter_in) {} + + /** + * @brief Copy constructor. + * + * @param info The LIPFilter build info to copy. + */ + LIPFilterBuildInfo(const LIPFilterBuildInfo &info) + : build_attribute(info.build_attribute), + filter_cardinality(info.filter_cardinality), + filter_type(info.filter_type), + is_anti_filter(info.is_anti_filter) {} + const expressions::AttributeReferencePtr build_attribute; const std::size_t filter_cardinality; const LIPFilterType filter_type; + const bool is_anti_filter; }; /** @@ -79,8 +94,18 @@ struct LIPFilterProbeInfo { const PhysicalPtr &builder_in) : probe_attribute(probe_attribute_in), build_attribute(build_attribute_in), - builder(builder_in) { - } + builder(builder_in) {} + + /** + * @brief Copy constructor. + * + * @param info The LIPFilter probe info to copy. + */ + LIPFilterProbeInfo(const LIPFilterProbeInfo &info) + : probe_attribute(info.probe_attribute), + build_attribute(info.build_attribute), + builder(info.builder) {} + const expressions::AttributeReferencePtr probe_attribute; const expressions::AttributeReferencePtr build_attribute; const PhysicalPtr builder; @@ -112,9 +137,10 @@ class LIPFilterConfiguration { void addBuildInfo(const expressions::AttributeReferencePtr &build_attribute, const PhysicalPtr &builder, const std::size_t filter_size, - const LIPFilterType &filter_type) { + const LIPFilterType &filter_type, + const bool is_anti_filter) { build_info_map_[builder].emplace_back( - build_attribute, filter_size, filter_type); + build_attribute, filter_size, filter_type, is_anti_filter); } /** @@ -155,6 +181,31 @@ class LIPFilterConfiguration { return probe_info_map_; } + /** + * @brief Clone a copy of this configuration. + * + * @return A copy of this confiugration. Caller should take ownership of the + * returned object. + */ + LIPFilterConfiguration* clone() const { + LIPFilterConfiguration *new_conf = new LIPFilterConfiguration(); + for (const auto &build_pair : build_info_map_) { + auto &new_build_vec = new_conf->build_info_map_[build_pair.first]; + const auto &build_vec = build_pair.second; + for (const auto &info : build_vec) { + new_build_vec.emplace_back(info); + } + } + for (const auto &probe_pair : probe_info_map_) { + auto &new_probe_vec = new_conf->probe_info_map_[probe_pair.first]; + const auto &probe_vec = probe_pair.second; + for (const auto &info : probe_vec) { + new_probe_vec.emplace_back(info); + } + } + return new_conf; + } + private: std::map<PhysicalPtr, std::vector<LIPFilterBuildInfo>> build_info_map_; std::map<PhysicalPtr, std::vector<LIPFilterProbeInfo>> probe_info_map_; http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/70624122/query_optimizer/physical/PatternMatcher.hpp ---------------------------------------------------------------------- diff --git a/query_optimizer/physical/PatternMatcher.hpp b/query_optimizer/physical/PatternMatcher.hpp index 5cd6fd3..c675cdb 100644 --- a/query_optimizer/physical/PatternMatcher.hpp +++ b/query_optimizer/physical/PatternMatcher.hpp @@ -35,6 +35,7 @@ class CopyFrom; class CreateTable; class DeleteTuples; class DropTable; +class FilterInjection; class HashJoin; class InsertTuple; class Join; @@ -113,6 +114,7 @@ using SomeCopyFrom = SomePhysicalNode<CopyFrom, PhysicalType::kCopyFrom>; using SomeCreateTable = SomePhysicalNode<CreateTable, PhysicalType::kCreateTable>; using SomeDeleteTuples = SomePhysicalNode<DeleteTuples, PhysicalType::kDeleteTuples>; using SomeDropTable = SomePhysicalNode<DropTable, PhysicalType::kDropTable>; +using SomeFilterInjection = SomePhysicalNode<FilterInjection, PhysicalType::kFilterInjection>; using SomeHashJoin = SomePhysicalNode<HashJoin, PhysicalType::kHashJoin>; using SomeInsertTuple = SomePhysicalNode<InsertTuple, PhysicalType::kInsertTuple>; using SomeJoin = SomePhysicalNode<Join, PhysicalType::kHashJoin, PhysicalType::kNestedLoopsJoin>; http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/70624122/query_optimizer/physical/PhysicalType.hpp ---------------------------------------------------------------------- diff --git a/query_optimizer/physical/PhysicalType.hpp b/query_optimizer/physical/PhysicalType.hpp index f5f35a1..bcf7d22 100644 --- a/query_optimizer/physical/PhysicalType.hpp +++ b/query_optimizer/physical/PhysicalType.hpp @@ -38,6 +38,7 @@ enum class PhysicalType { kCreateTable, kDeleteTuples, kDropTable, + kFilterInjection, kHashJoin, kInsertSelection, kInsertTuple, http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/70624122/query_optimizer/physical/TopLevelPlan.hpp ---------------------------------------------------------------------- diff --git a/query_optimizer/physical/TopLevelPlan.hpp b/query_optimizer/physical/TopLevelPlan.hpp index 7dfc2b6..9e567e1 100644 --- a/query_optimizer/physical/TopLevelPlan.hpp +++ b/query_optimizer/physical/TopLevelPlan.hpp @@ -126,7 +126,8 @@ class TopLevelPlan : public Physical { } return TopLevelPlan::Create(new_children[0], new_shared_subplans, - uncorrelated_subquery_map_); + uncorrelated_subquery_map_, + lip_filter_configuration_); } std::vector<expressions::AttributeReferencePtr> getOutputAttributes() const override { http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/70624122/query_optimizer/rules/AttachLIPFilters.cpp ---------------------------------------------------------------------- diff --git a/query_optimizer/rules/AttachLIPFilters.cpp b/query_optimizer/rules/AttachLIPFilters.cpp index b3c57ab..c4551bc 100644 --- a/query_optimizer/rules/AttachLIPFilters.cpp +++ b/query_optimizer/rules/AttachLIPFilters.cpp @@ -55,7 +55,14 @@ P::PhysicalPtr AttachLIPFilters::apply(const P::PhysicalPtr &input) { cost_model_.reset( new cost::StarSchemaSimpleCostModel( top_level_plan->shared_subplans())); - lip_filter_configuration_.reset(new P::LIPFilterConfiguration()); + + const P::LIPFilterConfigurationPtr &existing_configuration = + top_level_plan->lip_filter_configuration(); + if (existing_configuration != nullptr) { + lip_filter_configuration_.reset(existing_configuration->clone()); + } else { + lip_filter_configuration_.reset(new P::LIPFilterConfiguration()); + } std::set<E::ExprId> already_filtered_attributes; attachLIPFilters(NodeList(input), &already_filtered_attributes); @@ -101,7 +108,7 @@ void AttachLIPFilters::attachLIPFilters( } if (probe_child != nullptr && - cost_model_->estimateCardinality(probe_child) > 10000000) { + cost_model_->estimateCardinality(probe_child) >= 100000) { const auto &candidate_lip_filters = getProbeSideInfo(path.cons(probe_child)); if (!candidate_lip_filters.empty()) { std::map<E::AttributeReferencePtr, LIPFilterInfoPtr> selected_filters; @@ -122,7 +129,8 @@ void AttachLIPFilters::attachLIPFilters( pair.second->source_attribute, pair.second->source, pair.second->estimated_cardinality * 8, - LIPFilterType::kSingleIdentityHashFilter); + LIPFilterType::kSingleIdentityHashFilter, + false); lip_filter_configuration_->addProbeInfo( pair.first, node, http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/70624122/query_optimizer/rules/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/query_optimizer/rules/CMakeLists.txt b/query_optimizer/rules/CMakeLists.txt index 7fffadc..1eeb3a5 100644 --- a/query_optimizer/rules/CMakeLists.txt +++ b/query_optimizer/rules/CMakeLists.txt @@ -22,6 +22,7 @@ add_library(quickstep_queryoptimizer_rules_AttachLIPFilters AttachLIPFilters.cpp add_library(quickstep_queryoptimizer_rules_BottomUpRule ../../empty_src.cpp BottomUpRule.hpp) add_library(quickstep_queryoptimizer_rules_CollapseProject CollapseProject.cpp CollapseProject.hpp) add_library(quickstep_queryoptimizer_rules_GenerateJoins GenerateJoins.cpp GenerateJoins.hpp) +add_library(quickstep_queryoptimizer_rules_InjectJoinFilters InjectJoinFilters.cpp InjectJoinFilters.hpp) add_library(quickstep_queryoptimizer_rules_PruneColumns PruneColumns.cpp PruneColumns.hpp) add_library(quickstep_queryoptimizer_rules_PushDownFilter PushDownFilter.cpp PushDownFilter.hpp) add_library(quickstep_queryoptimizer_rules_PushDownSemiAntiJoin PushDownSemiAntiJoin.cpp PushDownSemiAntiJoin.hpp) @@ -160,6 +161,26 @@ target_link_libraries(quickstep_queryoptimizer_rules_SwapProbeBuild target_link_libraries(quickstep_queryoptimizer_rules_TopDownRule quickstep_queryoptimizer_rules_Rule quickstep_utility_Macros) +target_link_libraries(quickstep_queryoptimizer_rules_InjectJoinFilters + quickstep_queryoptimizer_costmodel_StarSchemaSimpleCostModel + quickstep_queryoptimizer_expressions_AttributeReference + quickstep_queryoptimizer_expressions_ExprId + quickstep_queryoptimizer_expressions_ExpressionUtil + quickstep_queryoptimizer_physical_Aggregate + quickstep_queryoptimizer_physical_FilterInjection + quickstep_queryoptimizer_physical_HashJoin + quickstep_queryoptimizer_physical_LIPFilterConfiguration + quickstep_queryoptimizer_physical_PatternMatcher + quickstep_queryoptimizer_physical_Physical + quickstep_queryoptimizer_physical_PhysicalType + quickstep_queryoptimizer_physical_Selection + quickstep_queryoptimizer_physical_TopLevelPlan + quickstep_queryoptimizer_rules_Rule + quickstep_queryoptimizer_rules_PruneColumns + quickstep_types_TypeID + quickstep_types_TypedValue + quickstep_utility_Macros + quickstep_utility_lipfilter_LIPFilter) target_link_libraries(quickstep_queryoptimizer_rules_UnnestSubqueries quickstep_queryoptimizer_OptimizerContext quickstep_queryoptimizer_expressions_AttributeReference @@ -210,6 +231,7 @@ target_link_libraries(quickstep_queryoptimizer_rules quickstep_queryoptimizer_rules_BottomUpRule quickstep_queryoptimizer_rules_CollapseProject quickstep_queryoptimizer_rules_GenerateJoins + quickstep_queryoptimizer_rules_InjectJoinFilters quickstep_queryoptimizer_rules_PruneColumns quickstep_queryoptimizer_rules_PushDownFilter quickstep_queryoptimizer_rules_PushDownSemiAntiJoin