Repository: incubator-quickstep Updated Branches: refs/heads/adaptive-bloom-filters 32608da27 -> 9cbd3e847
updates Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/9cbd3e84 Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/9cbd3e84 Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/9cbd3e84 Branch: refs/heads/adaptive-bloom-filters Commit: 9cbd3e84786c6dc4280ddb048d8d3f8feef646a6 Parents: 32608da Author: Jianqiao Zhu <jianq...@cs.wisc.edu> Authored: Thu Jul 7 17:07:18 2016 -0500 Committer: Jianqiao Zhu <jianq...@cs.wisc.edu> Committed: Thu Jul 7 17:07:18 2016 -0500 ---------------------------------------------------------------------- query_optimizer/CMakeLists.txt | 1 + query_optimizer/PhysicalGenerator.cpp | 3 + query_optimizer/rules/AttachBloomFilters.cpp | 118 ++++++++++++++++++++++ query_optimizer/rules/AttachBloomFilters.hpp | 100 ++++++++++++++++++ query_optimizer/rules/CMakeLists.txt | 17 ++++ storage/HashTable.hpp | 28 ++--- utility/CMakeLists.txt | 1 + 7 files changed, 249 insertions(+), 19 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/9cbd3e84/query_optimizer/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/query_optimizer/CMakeLists.txt b/query_optimizer/CMakeLists.txt index 8912414..ff48a54 100644 --- a/query_optimizer/CMakeLists.txt +++ b/query_optimizer/CMakeLists.txt @@ -193,6 +193,7 @@ target_link_libraries(quickstep_queryoptimizer_PhysicalGenerator quickstep_queryoptimizer_LogicalToPhysicalMapper quickstep_queryoptimizer_logical_Logical quickstep_queryoptimizer_physical_Physical + quickstep_queryoptimizer_rules_AttachBloomFilters quickstep_queryoptimizer_rules_PruneColumns quickstep_queryoptimizer_rules_StarSchemaHashJoinOrderOptimization quickstep_queryoptimizer_strategy_Aggregate http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/9cbd3e84/query_optimizer/PhysicalGenerator.cpp ---------------------------------------------------------------------- diff --git a/query_optimizer/PhysicalGenerator.cpp b/query_optimizer/PhysicalGenerator.cpp index 5f97d6f..50e1d86 100644 --- a/query_optimizer/PhysicalGenerator.cpp +++ b/query_optimizer/PhysicalGenerator.cpp @@ -26,6 +26,7 @@ #include "query_optimizer/Validator.hpp" #include "query_optimizer/logical/Logical.hpp" #include "query_optimizer/physical/Physical.hpp" +#include "query_optimizer/rules/AttachBloomFilters.hpp" #include "query_optimizer/rules/PruneColumns.hpp" #include "query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp" #include "query_optimizer/strategy/Aggregate.hpp" @@ -99,6 +100,7 @@ P::PhysicalPtr PhysicalGenerator::optimizePlan() { rules.emplace_back(new StarSchemaHashJoinOrderOptimization()); } rules.emplace_back(new PruneColumns()); + rules.emplace_back(new AttachBloomFilters()); for (std::unique_ptr<Rule<P::Physical>> &rule : rules) { physical_plan_ = rule->apply(physical_plan_); @@ -112,6 +114,7 @@ P::PhysicalPtr PhysicalGenerator::optimizePlan() { quickstep::PlanVisualizer plan_visualizer; std::cerr << "\n" << plan_visualizer.visualize(physical_plan_) << "\n"; } + exit(0); #ifdef QUICKSTEP_DEBUG Validate(physical_plan_); http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/9cbd3e84/query_optimizer/rules/AttachBloomFilters.cpp ---------------------------------------------------------------------- diff --git a/query_optimizer/rules/AttachBloomFilters.cpp b/query_optimizer/rules/AttachBloomFilters.cpp new file mode 100644 index 0000000..34822e5 --- /dev/null +++ b/query_optimizer/rules/AttachBloomFilters.cpp @@ -0,0 +1,118 @@ +/** + * Copyright 2016, Quickstep Research Group, Computer Sciences Department, + * University of WisconsinâMadison. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + **/ + +#include "query_optimizer/rules/AttachBloomFilters.hpp" + +#include <memory> +#include <set> +#include <unordered_set> +#include <unordered_map> +#include <vector> + +#include "query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp" +#include "query_optimizer/expressions/AttributeReference.hpp" +#include "query_optimizer/expressions/NamedExpression.hpp" +#include "query_optimizer/expressions/PatternMatcher.hpp" +#include "query_optimizer/physical/HashJoin.hpp" +#include "query_optimizer/physical/PatternMatcher.hpp" +#include "query_optimizer/physical/Physical.hpp" +#include "query_optimizer/physical/PhysicalType.hpp" +#include "query_optimizer/physical/TopLevelPlan.hpp" + +#include "glog/logging.h" + +namespace quickstep { +namespace optimizer { + +namespace E = ::quickstep::optimizer::expressions; +namespace P = ::quickstep::optimizer::physical; + +P::PhysicalPtr AttachBloomFilters::apply(const P::PhysicalPtr &input) { + DCHECK(input->getPhysicalType() == P::PhysicalType::kTopLevelPlan); + cost_model_.reset( + new cost::StarSchemaSimpleCostModel( + std::static_pointer_cast<const P::TopLevelPlan>(input)->shared_subplans())); + + visitProducer(input); + visitConsumer(input); + + for (const auto &info_vec_pair : producers_) { + std::cerr << "--------\n" + << "Node " << info_vec_pair.first->getName() + << info_vec_pair.first << "\n"; + for (const auto &info : info_vec_pair.second) { + std::cerr << info.attribute->attribute_alias() << "@" + << info.source + << ": " << info.selectivity << "\n"; + } + std::cerr << "********\n"; + } + + return input; +} + +void AttachBloomFilters::visitProducer(const P::PhysicalPtr &node) { + std::vector<const BloomFilterInfo> bloom_filters; + + // First check inherited bloom filters + std::vector<const BloomFilterInfo*> candidates; + switch (node->getPhysicalType()) { + case P::PhysicalType::kAggregate: + case P::PhysicalType::kSelection: + case P::PhysicalType::kHashJoin: { + for (const P::PhysicalPtr &child : node->children()) { + const auto bloom_filters_it = producers_.find(child); + if (bloom_filters_it != producers_.end()) { + for (const BloomFilterInfo info : bloom_filters_it->second) { + candidates.emplace_back(&info); + } + } + } + } + default: + break; + } + + const std::vector<E::AttributeReferencePtr> output_attributes( + node->getOutputAttributes()); + + std::unordered_set<E::ExprId> output_attribute_ids; + for (const auto &attr : output_attributes) { + output_attribute_ids.emplace(attr->id()); + } + for (const BloomFilterInfo *info : candidates) { + if (output_attribute_ids.find(info->attribute->id()) != output_attribute_ids.end()) { + bloom_filters.emplace_back(*info); + } + } + + // Self-produced bloom filters + double selectivity = cost_model_->estimateSelectivity(node); + if (selectivity < 1.0) { + for (const auto &attr : output_attributes) { + bloom_filters.emplace_back(node, attr, selectivity, false); + } + } + + producers_.emplace(node, std::move(bloom_filters)); +} + +void AttachBloomFilters::visitConsumer(const P::PhysicalPtr &node) { +} + +} // namespace optimizer +} // namespace quickstep http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/9cbd3e84/query_optimizer/rules/AttachBloomFilters.hpp ---------------------------------------------------------------------- diff --git a/query_optimizer/rules/AttachBloomFilters.hpp b/query_optimizer/rules/AttachBloomFilters.hpp new file mode 100644 index 0000000..dfcc7f5 --- /dev/null +++ b/query_optimizer/rules/AttachBloomFilters.hpp @@ -0,0 +1,100 @@ +/** + * Copyright 2016, Quickstep Research Group, Computer Sciences Department, + * University of WisconsinâMadison. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + **/ + +#ifndef QUICKSTEP_QUERY_OPTIMIZER_RULES_ATTACH_BLOOM_FILTERS_HPP_ +#define QUICKSTEP_QUERY_OPTIMIZER_RULES_ATTACH_BLOOM_FILTERS_HPP_ + +#include <algorithm> +#include <cstddef> +#include <memory> +#include <string> +#include <unordered_map> +#include <unordered_set> +#include <utility> +#include <vector> + +#include "query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp" +#include "query_optimizer/expressions/ExprId.hpp" +#include "query_optimizer/expressions/NamedExpression.hpp" +#include "query_optimizer/expressions/Predicate.hpp" +#include "query_optimizer/physical/Physical.hpp" +#include "query_optimizer/rules/Rule.hpp" +#include "utility/Macros.hpp" + +namespace quickstep { +namespace optimizer { + +/** \addtogroup OptimizerRules + * @{ + */ + +/** + * @brief TODO + */ +class AttachBloomFilters : public Rule<physical::Physical> { + public: + AttachBloomFilters() {} + + ~AttachBloomFilters() override {} + + std::string getName() const override { + return "AttachBloomFilters"; + } + + physical::PhysicalPtr apply(const physical::PhysicalPtr &input) override; + + private: + struct BloomFilterInfo { + BloomFilterInfo(const physical::PhysicalPtr &source_in, + const expressions::AttributeReferencePtr &attribute_in, + const double selectivity_in, + const bool from_sibling_in) + : source(source_in), + attribute(attribute_in), + selectivity(selectivity_in), + from_sibling(from_sibling_in) { + } + BloomFilterInfo(const BloomFilterInfo &info) + : source(info.source), + attribute(info.attribute), + selectivity(info.selectivity), + from_sibling(info.from_sibling) { + } + physical::PhysicalPtr source; + expressions::AttributeReferencePtr attribute; + double selectivity; + bool from_sibling; + }; + + void visitProducer(const physical::PhysicalPtr &node); + + void visitConsumer(const physical::PhysicalPtr &node); + + std::unique_ptr<cost::StarSchemaSimpleCostModel> cost_model_; + + std::map<physical::PhysicalPtr, std::vector<const BloomFilterInfo>> producers_; + std::map<physical::PhysicalPtr, std::vector<const BloomFilterInfo>> consumers_; + + DISALLOW_COPY_AND_ASSIGN(AttachBloomFilters); +}; + +/** @} */ + +} // namespace optimizer +} // namespace quickstep + +#endif /* QUICKSTEP_QUERY_OPTIMIZER_RULES_ATTACH_BLOOM_FILTERS_HPP_ */ http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/9cbd3e84/query_optimizer/rules/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/query_optimizer/rules/CMakeLists.txt b/query_optimizer/rules/CMakeLists.txt index 1990174..a943ef7 100644 --- a/query_optimizer/rules/CMakeLists.txt +++ b/query_optimizer/rules/CMakeLists.txt @@ -18,6 +18,7 @@ add_subdirectory(tests) # Declare micro-libs: +add_library(quickstep_queryoptimizer_rules_AttachBloomFilters AttachBloomFilters.cpp AttachBloomFilters.hpp) add_library(quickstep_queryoptimizer_rules_BottomUpRule ../../empty_src.cpp BottomUpRule.hpp) add_library(quickstep_queryoptimizer_rules_CollapseProject CollapseProject.cpp CollapseProject.hpp) add_library(quickstep_queryoptimizer_rules_GenerateJoins GenerateJoins.cpp GenerateJoins.hpp) @@ -35,6 +36,20 @@ add_library(quickstep_queryoptimizer_rules_UnnestSubqueries UnnestSubqueries.cpp # Link dependencies: +target_link_libraries(quickstep_queryoptimizer_rules_AttachBloomFilters + quickstep_queryoptimizer_costmodel_StarSchemaSimpleCostModel + quickstep_queryoptimizer_expressions_AttributeReference + quickstep_queryoptimizer_expressions_ExprId + quickstep_queryoptimizer_expressions_NamedExpression + quickstep_queryoptimizer_expressions_PatternMatcher + quickstep_queryoptimizer_expressions_Predicate + quickstep_queryoptimizer_physical_HashJoin + quickstep_queryoptimizer_physical_PatternMatcher + quickstep_queryoptimizer_physical_Physical + quickstep_queryoptimizer_physical_PhysicalType + quickstep_queryoptimizer_physical_TopLevelPlan + quickstep_queryoptimizer_rules_Rule + quickstep_utility_Macros) target_link_libraries(quickstep_queryoptimizer_rules_BottomUpRule glog quickstep_queryoptimizer_rules_Rule @@ -126,6 +141,7 @@ target_link_libraries(quickstep_queryoptimizer_rules_StarSchemaHashJoinOrderOpti quickstep_queryoptimizer_physical_PhysicalType quickstep_queryoptimizer_physical_TopLevelPlan quickstep_queryoptimizer_rules_Rule + quickstep_utility_DisjointTreeForest quickstep_utility_Macros) target_link_libraries(quickstep_queryoptimizer_rules_TopDownRule quickstep_queryoptimizer_rules_Rule @@ -176,6 +192,7 @@ target_link_libraries(quickstep_queryoptimizer_rules_UpdateExpression # Module all-in-one library: add_library(quickstep_queryoptimizer_rules ../../empty_src.cpp OptimizerRulesModule.hpp) target_link_libraries(quickstep_queryoptimizer_rules + quickstep_queryoptimizer_rules_AttachBloomFilters quickstep_queryoptimizer_rules_BottomUpRule quickstep_queryoptimizer_rules_CollapseProject quickstep_queryoptimizer_rules_GenerateJoins http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/9cbd3e84/storage/HashTable.hpp ---------------------------------------------------------------------- diff --git a/storage/HashTable.hpp b/storage/HashTable.hpp index be31fd9..740e678 100644 --- a/storage/HashTable.hpp +++ b/storage/HashTable.hpp @@ -23,6 +23,7 @@ #include <atomic> #include <cstddef> #include <cstdlib> +#include <memory> #include <type_traits> #include <vector> @@ -39,6 +40,7 @@ #include "types/Type.hpp" #include "types/TypedValue.hpp" #include "utility/BloomFilter.hpp" +#include "utility/BloomFilterAdapter.hpp" #include "utility/HashPair.hpp" #include "utility/Macros.hpp" @@ -2246,26 +2248,14 @@ void HashTable<ValueT, resizable, serializable, force_key_copy, allow_duplicate_ InvokeOnAnyValueAccessor( accessor, [&](auto *accessor) -> void { // NOLINT(build/c++11) + std::unique_ptr<BloomFilterAdapter> bloom_filter_adapter; + if (has_probe_side_bloom_filter_) { + bloom_filter_adapter.reset( + new BloomFilterAdapter(probe_bloom_filters_, probe_attribute_ids_)); + } while (accessor->next()) { - // Probe any bloom filters, if enabled. - if (has_probe_side_bloom_filter_) { - DCHECK_EQ(probe_bloom_filters_.size(), probe_attribute_ids_.size()); - // Check if the key is contained in the BloomFilters or not. - bool bloom_miss = false; - for (std::size_t i = 0; i < probe_bloom_filters_.size() && !bloom_miss; ++i) { - const BloomFilter *bloom_filter = probe_bloom_filters_[i]; - for (const attribute_id &attr_id : probe_attribute_ids_[i]) { - TypedValue bloom_key = accessor->getTypedValue(attr_id); - if (!bloom_filter->contains(static_cast<const std::uint8_t*>(bloom_key.getDataPtr()), - bloom_key.getDataSize())) { - bloom_miss = true; - break; - } - } - } - if (bloom_miss) { - continue; // On a bloom filter miss, probing the hash table can be skipped. - } + if (has_probe_side_bloom_filter_ && bloom_filter_adapter->miss(accessor)) { + continue; // On a bloom filter miss, probing the hash table can be skipped. } TypedValue key = accessor->getTypedValue(key_attr_id); http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/9cbd3e84/utility/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/utility/CMakeLists.txt b/utility/CMakeLists.txt index 8582981..5883470 100644 --- a/utility/CMakeLists.txt +++ b/utility/CMakeLists.txt @@ -320,6 +320,7 @@ target_link_libraries(quickstep_utility quickstep_utility_CheckSnprintf quickstep_utility_DAG quickstep_utility_DAGVisualizer + quickstep_utility_DisjointTreeForest quickstep_utility_EventProfiler quickstep_utility_EqualsAnyConstant quickstep_utility_Glob