Repository: incubator-quickstep Updated Branches: refs/heads/min-max-stats f8feba19b -> c8ef1991e
Add min/max statistics and support the exactness flag for CatalogRelationStatistics Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/c8ef1991 Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/c8ef1991 Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/c8ef1991 Branch: refs/heads/min-max-stats Commit: c8ef1991e62ecffdd54b1347bb5acdda784fc44d Parents: f8feba1 Author: Jianqiao Zhu <jianq...@cs.wisc.edu> Authored: Fri Dec 16 14:12:58 2016 -0600 Committer: Jianqiao Zhu <jianq...@cs.wisc.edu> Committed: Fri Dec 16 14:12:58 2016 -0600 ---------------------------------------------------------------------- catalog/CMakeLists.txt | 3 ++ catalog/Catalog.proto | 5 ++-- catalog/CatalogRelationStatistics.cpp | 11 ++++++++ catalog/CatalogRelationStatistics.hpp | 25 +++++++++++++++- cli/CMakeLists.txt | 3 ++ cli/CommandExecutor.cpp | 21 ++++++++------ query_optimizer/ExecutionGenerator.cpp | 36 ++++++++++++++++-------- query_optimizer/cost_model/CMakeLists.txt | 1 + relational_operators/CMakeLists.txt | 1 + relational_operators/SaveBlocksOperator.cpp | 5 ++++ relational_operators/SaveBlocksOperator.hpp | 7 +++++ 11 files changed, 95 insertions(+), 23 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/c8ef1991/catalog/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/catalog/CMakeLists.txt b/catalog/CMakeLists.txt index dd4ef99..143d12f 100644 --- a/catalog/CMakeLists.txt +++ b/catalog/CMakeLists.txt @@ -120,6 +120,9 @@ target_link_libraries(quickstep_catalog_CatalogRelation target_link_libraries(quickstep_catalog_CatalogRelationStatistics quickstep_catalog_CatalogTypedefs quickstep_catalog_Catalog_proto + quickstep_types_LongType + quickstep_types_NullType + quickstep_types_TypedValue quickstep_utility_Macros) target_link_libraries(quickstep_catalog_IndexScheme glog http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/c8ef1991/catalog/Catalog.proto ---------------------------------------------------------------------- diff --git a/catalog/Catalog.proto b/catalog/Catalog.proto index 58de363..a54862f 100644 --- a/catalog/Catalog.proto +++ b/catalog/Catalog.proto @@ -81,7 +81,8 @@ message IndexScheme { } message CatalogRelationStatistics { - optional TypedValue num_tuples = 1; + optional bool is_exact = 1; + optional TypedValue num_tuples = 2; message ColumnStats { required int32 attr_id = 1; @@ -89,7 +90,7 @@ message CatalogRelationStatistics { optional TypedValue min_value = 3; optional TypedValue max_value = 4; } - repeated ColumnStats column_stats = 2; + repeated ColumnStats column_stats = 3; } message CatalogRelationSchema { http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/c8ef1991/catalog/CatalogRelationStatistics.cpp ---------------------------------------------------------------------- diff --git a/catalog/CatalogRelationStatistics.cpp b/catalog/CatalogRelationStatistics.cpp index 0f67234..f3f224d 100644 --- a/catalog/CatalogRelationStatistics.cpp +++ b/catalog/CatalogRelationStatistics.cpp @@ -28,6 +28,12 @@ const TypedValue CatalogRelationStatistics::kNullValue = CatalogRelationStatistics::CatalogRelationStatistics( const serialization::CatalogRelationStatistics &proto) { + if (proto.has_is_exact()) { + is_exact_ = proto.is_exact(); + } else { + is_exact_ = false; + } + if (proto.has_num_tuples()) { num_tuples_ = TypedValue::ReconstructFromProto(proto.num_tuples()); } else { @@ -54,9 +60,13 @@ CatalogRelationStatistics::CatalogRelationStatistics( serialization::CatalogRelationStatistics CatalogRelationStatistics::getProto() const { serialization::CatalogRelationStatistics proto; + + proto.set_is_exact(is_exact_); + if (!num_tuples_.isNull()) { proto.mutable_num_tuples()->CopyFrom(num_tuples_.getProto()); } + for (const auto &pair : column_stats_) { auto entry = proto.add_column_stats(); entry->set_attr_id(pair.first); @@ -74,6 +84,7 @@ serialization::CatalogRelationStatistics CatalogRelationStatistics::getProto() c stat.max_value.getProto()); } } + return proto; } http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/c8ef1991/catalog/CatalogRelationStatistics.hpp ---------------------------------------------------------------------- diff --git a/catalog/CatalogRelationStatistics.hpp b/catalog/CatalogRelationStatistics.hpp index ecb3110..df95231 100644 --- a/catalog/CatalogRelationStatistics.hpp +++ b/catalog/CatalogRelationStatistics.hpp @@ -47,7 +47,8 @@ class CatalogRelationStatistics { * @brief Constructor. **/ CatalogRelationStatistics() - : num_tuples_(kNullValue) {} + : is_exact_(false), + num_tuples_(kNullValue) {} /** * @brief Reconstruct a CatalogRelationStatistics object from its serialized @@ -67,6 +68,25 @@ class CatalogRelationStatistics { serialization::CatalogRelationStatistics getProto() const; /** + * @brief Check whether the statistics are exact for the relation. + * + * return True if the statistics are exact for the relation, false otherwise. + */ + bool isExact() const { + return is_exact_; + } + + /** + * @brief Set the boolean flag that indicates whether the statistics are exact + * for the relation. + * + * @param Exactness of the statistics. + */ + void setExactness(const bool is_exact) { + is_exact_ = is_exact; + } + + /** * @brief Check whether the number of tuples statistic is available. * * @return True if the number of tuples statistic is available, false otherwise. @@ -215,6 +235,9 @@ class CatalogRelationStatistics { static const TypedValue kNullValue; + // Whether the statistics are exact (i.e. up-to-date) for the relation. + bool is_exact_; + // Total number of tuples in the relation. TypedValue num_tuples_; http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/c8ef1991/cli/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/cli/CMakeLists.txt b/cli/CMakeLists.txt index be13c82..8c7fe2d 100644 --- a/cli/CMakeLists.txt +++ b/cli/CMakeLists.txt @@ -89,8 +89,11 @@ target_link_libraries(quickstep_cli_CommandExecutor quickstep_catalog_CatalogDatabase quickstep_catalog_CatalogRelation quickstep_catalog_CatalogRelationSchema + quickstep_catalog_CatalogRelationStatistics quickstep_cli_DropRelation quickstep_cli_PrintToScreen + quickstep_expressions_aggregation_AggregateFunctionMax + quickstep_expressions_aggregation_AggregateFunctionMin quickstep_parser_ParseStatement quickstep_parser_SqlParserWrapper quickstep_queryoptimizer_QueryHandle http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/c8ef1991/cli/CommandExecutor.cpp ---------------------------------------------------------------------- diff --git a/cli/CommandExecutor.cpp b/cli/CommandExecutor.cpp index 3e1312a..377ca73 100644 --- a/cli/CommandExecutor.cpp +++ b/cli/CommandExecutor.cpp @@ -31,6 +31,7 @@ #include "catalog/CatalogDatabase.hpp" #include "catalog/CatalogRelation.hpp" #include "catalog/CatalogRelationSchema.hpp" +#include "catalog/CatalogRelationStatistics.hpp" #include "cli/DropRelation.hpp" #include "cli/PrintToScreen.hpp" #include "expressions/aggregation/AggregateFunctionMax.hpp" @@ -323,6 +324,8 @@ void executeAnalyze(const PtrVector<ParseString> *arguments, CatalogRelation *mutable_relation = query_processor->getDefaultDatabase()->getRelationByIdMutable(relation.getID()); + CatalogRelationStatistics *mutable_stat = + mutable_relation->getStatisticsMutable(); // Get the number of distinct values for each column. for (const CatalogAttribute &attribute : relation) { @@ -358,20 +361,19 @@ void executeAnalyze(const PtrVector<ParseString> *arguments, query_processor, parser_wrapper.get()); - auto *stat = mutable_relation->getStatisticsMutable(); - const attribute_id attr_id = attribute.getID(); - auto results_it = results.begin(); DCHECK(results_it->getTypeID() == TypeID::kLong); - stat->setNumDistinctValues(attr_id, - results_it->getLiteral<std::int64_t>()); + + const attribute_id attr_id = attribute.getID(); + mutable_stat->setNumDistinctValues(attr_id, + results_it->getLiteral<std::int64_t>()); if (is_min_applicable) { ++results_it; - stat->setMinValue(attr_id, *results_it); + mutable_stat->setMinValue(attr_id, *results_it); } if (is_max_applicable) { ++results_it; - stat->setMaxValue(attr_id, *results_it); + mutable_stat->setMaxValue(attr_id, *results_it); } } @@ -390,8 +392,9 @@ void executeAnalyze(const PtrVector<ParseString> *arguments, parser_wrapper.get()); DCHECK(num_tuples.getTypeID() == TypeID::kLong); - mutable_relation->getStatisticsMutable()->setNumTuples( - num_tuples.getLiteral<std::int64_t>()); + mutable_stat->setNumTuples(num_tuples.getLiteral<std::int64_t>()); + + mutable_stat->setExactness(true); fprintf(out, "done\n"); fflush(out); http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/c8ef1991/query_optimizer/ExecutionGenerator.cpp ---------------------------------------------------------------------- diff --git a/query_optimizer/ExecutionGenerator.cpp b/query_optimizer/ExecutionGenerator.cpp index 5a2c450..d24f498 100644 --- a/query_optimizer/ExecutionGenerator.cpp +++ b/query_optimizer/ExecutionGenerator.cpp @@ -924,9 +924,11 @@ void ExecutionGenerator::convertCopyFrom( insert_destination_index)); insert_destination_proto->set_relational_op_index(scan_operator_index); + CatalogRelation *mutable_output_relation = + catalog_database_->getRelationByIdMutable(output_relation->getID()); const QueryPlan::DAGNodeIndex save_blocks_operator_index = execution_plan_->addRelationalOperator( - new SaveBlocksOperator(query_handle_->query_id())); + new SaveBlocksOperator(query_handle_->query_id(), mutable_output_relation)); execution_plan_->addDirectDependency(save_blocks_operator_index, scan_operator_index, false /* is_pipeline_breaker */); @@ -1038,13 +1040,16 @@ void ExecutionGenerator::convertDeleteTuples( const CatalogRelationInfo *input_relation_info = findRelationInfoOutputByPhysical(physical_plan->input()); DCHECK(input_relation_info != nullptr); + + const CatalogRelation *input_relation = input_relation_info->relation; + if (execution_predicate == nullptr || (execution_predicate->hasStaticResult() && execution_predicate->getStaticResult())) { const QueryPlan::DAGNodeIndex drop_table_index = execution_plan_->addRelationalOperator( new DropTableOperator(query_handle_->query_id(), - *input_relation_info->relation, + *input_relation, catalog_database_, true /* only_drop_blocks */)); if (!input_relation_info->isStoredRelation()) { @@ -1059,7 +1064,7 @@ void ExecutionGenerator::convertDeleteTuples( const QueryPlan::DAGNodeIndex delete_tuples_index = execution_plan_->addRelationalOperator( new DeleteOperator(query_handle_->query_id(), - *input_relation_info->relation, + *input_relation, execution_predicate_index, input_relation_info->isStoredRelation())); @@ -1069,9 +1074,11 @@ void ExecutionGenerator::convertDeleteTuples( false /* is_pipeline_breaker */); } + CatalogRelation *mutable_relation = + catalog_database_->getRelationByIdMutable(input_relation->getID()); const QueryPlan::DAGNodeIndex save_blocks_index = execution_plan_->addRelationalOperator( - new SaveBlocksOperator(query_handle_->query_id())); + new SaveBlocksOperator(query_handle_->query_id(), mutable_relation)); execution_plan_->addDirectDependency(save_blocks_index, delete_tuples_index, false /* is_pipeline_breaker */); @@ -1150,9 +1157,11 @@ void ExecutionGenerator::convertInsertTuple( tuple_index)); insert_destination_proto->set_relational_op_index(insert_operator_index); + CatalogRelation *mutable_relation = + catalog_database_->getRelationByIdMutable(input_relation.getID()); const QueryPlan::DAGNodeIndex save_blocks_index = execution_plan_->addRelationalOperator( - new SaveBlocksOperator(query_handle_->query_id())); + new SaveBlocksOperator(query_handle_->query_id(), mutable_relation)); if (!input_relation_info->isStoredRelation()) { execution_plan_->addDirectDependency(insert_operator_index, input_relation_info->producer_operator_index, @@ -1201,6 +1210,7 @@ void ExecutionGenerator::convertInsertSelection( const CatalogRelationInfo *selection_relation_info = findRelationInfoOutputByPhysical(physical_plan->selection()); + const CatalogRelation *selection_relation = selection_relation_info->relation; // Prepare the attributes, which are output columns of the selection relation. std::vector<attribute_id> attributes; @@ -1221,7 +1231,7 @@ void ExecutionGenerator::convertInsertSelection( // physical plan by modifying class Physical. SelectOperator *insert_selection_op = new SelectOperator(query_handle_->query_id(), - *selection_relation_info->relation, + *selection_relation, destination_relation, insert_destination_index, QueryContext::kInvalidPredicateId, @@ -1232,8 +1242,11 @@ void ExecutionGenerator::convertInsertSelection( execution_plan_->addRelationalOperator(insert_selection_op); insert_destination_proto->set_relational_op_index(insert_selection_index); + CatalogRelation *mutable_relation = + catalog_database_->getRelationByIdMutable(selection_relation->getID()); const QueryPlan::DAGNodeIndex save_blocks_index = - execution_plan_->addRelationalOperator(new SaveBlocksOperator(query_handle_->query_id())); + execution_plan_->addRelationalOperator( + new SaveBlocksOperator(query_handle_->query_id(), mutable_relation)); if (!selection_relation_info->isStoredRelation()) { execution_plan_->addDirectDependency(insert_selection_index, @@ -1253,7 +1266,8 @@ void ExecutionGenerator::convertUpdateTable( findRelationInfoOutputByPhysical(physical_plan->input()); DCHECK(input_relation_info != nullptr); - const relation_id input_rel_id = input_relation_info->relation->getID(); + const CatalogRelation *input_relation = input_relation_info->relation; + const relation_id input_rel_id = input_relation->getID(); // Create InsertDestination proto. const QueryContext::insert_destination_id relocation_destination_index = @@ -1303,8 +1317,7 @@ void ExecutionGenerator::convertUpdateTable( const QueryPlan::DAGNodeIndex update_operator_index = execution_plan_->addRelationalOperator(new UpdateOperator( query_handle_->query_id(), - *catalog_database_->getRelationById( - input_rel_id), + *input_relation, relocation_destination_index, execution_predicate_index, update_group_index)); @@ -1312,7 +1325,8 @@ void ExecutionGenerator::convertUpdateTable( const QueryPlan::DAGNodeIndex save_blocks_index = execution_plan_->addRelationalOperator( - new SaveBlocksOperator(query_handle_->query_id())); + new SaveBlocksOperator(query_handle_->query_id(), + catalog_database_->getRelationByIdMutable(input_rel_id))); if (!input_relation_info->isStoredRelation()) { execution_plan_->addDirectDependency(update_operator_index, input_relation_info->producer_operator_index, http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/c8ef1991/query_optimizer/cost_model/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/query_optimizer/cost_model/CMakeLists.txt b/query_optimizer/cost_model/CMakeLists.txt index 032e34c..90133e7 100644 --- a/query_optimizer/cost_model/CMakeLists.txt +++ b/query_optimizer/cost_model/CMakeLists.txt @@ -48,6 +48,7 @@ target_link_libraries(quickstep_queryoptimizer_costmodel_SimpleCostModel target_link_libraries(quickstep_queryoptimizer_costmodel_StarSchemaSimpleCostModel glog quickstep_catalog_CatalogRelation + quickstep_catalog_CatalogRelationStatistics quickstep_queryoptimizer_costmodel_CostModel quickstep_queryoptimizer_expressions_AttributeReference quickstep_queryoptimizer_expressions_ComparisonExpression http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/c8ef1991/relational_operators/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/relational_operators/CMakeLists.txt b/relational_operators/CMakeLists.txt index 0735bce..9e4b1b6 100644 --- a/relational_operators/CMakeLists.txt +++ b/relational_operators/CMakeLists.txt @@ -303,6 +303,7 @@ target_link_libraries(quickstep_relationaloperators_SampleOperator tmb) target_link_libraries(quickstep_relationaloperators_SaveBlocksOperator glog + quickstep_catalog_CatalogRelation quickstep_catalog_CatalogTypedefs quickstep_queryexecution_WorkOrderProtosContainer quickstep_queryexecution_WorkOrdersContainer http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/c8ef1991/relational_operators/SaveBlocksOperator.cpp ---------------------------------------------------------------------- diff --git a/relational_operators/SaveBlocksOperator.cpp b/relational_operators/SaveBlocksOperator.cpp index 04d4211..ba27faa 100644 --- a/relational_operators/SaveBlocksOperator.cpp +++ b/relational_operators/SaveBlocksOperator.cpp @@ -21,6 +21,7 @@ #include <vector> +#include "catalog/CatalogRelation.hpp" #include "query_execution/WorkOrderProtosContainer.hpp" #include "query_execution/WorkOrdersContainer.hpp" #include "relational_operators/WorkOrder.pb.h" @@ -70,6 +71,10 @@ void SaveBlocksOperator::feedInputBlock(const block_id input_block_id, const rel destination_block_ids_.push_back(input_block_id); } +void SaveBlocksOperator::updateCatalogOnCompletion() { + relation_->getStatisticsMutable()->setExactness(false); +} + void SaveBlocksWorkOrder::execute() { // It may happen that the block gets saved to disk as a result of an eviction, // before this invocation. In either case, we don't care about the return http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/c8ef1991/relational_operators/SaveBlocksOperator.hpp ---------------------------------------------------------------------- diff --git a/relational_operators/SaveBlocksOperator.hpp b/relational_operators/SaveBlocksOperator.hpp index e84b641..a8d5327 100644 --- a/relational_operators/SaveBlocksOperator.hpp +++ b/relational_operators/SaveBlocksOperator.hpp @@ -38,6 +38,7 @@ namespace tmb { class MessageBus; } namespace quickstep { +class CatalogRelation; class QueryContext; class StorageManager; class WorkOrderProtosContainer; @@ -56,13 +57,16 @@ class SaveBlocksOperator : public RelationalOperator { * @brief Constructor for saving only modified blocks in a relation. * * @param query_id The ID of the query to which this operator belongs. + * @param relation The relation whose blocks will be saved. * @param force If true, force writing of all blocks to disk, otherwise only * write dirty blocks. **/ explicit SaveBlocksOperator(const std::size_t query_id, + CatalogRelation *relation, const bool force = false) : RelationalOperator(query_id), force_(force), + relation_(relation), num_workorders_generated_(0) {} ~SaveBlocksOperator() override {} @@ -89,9 +93,12 @@ class SaveBlocksOperator : public RelationalOperator { } } + void updateCatalogOnCompletion() override; + private: const bool force_; + CatalogRelation *relation_; std::vector<block_id> destination_block_ids_; std::vector<block_id>::size_type num_workorders_generated_;