http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/27380a69/expressions/aggregation/AggregationHandleMax.hpp ---------------------------------------------------------------------- diff --git a/expressions/aggregation/AggregationHandleMax.hpp b/expressions/aggregation/AggregationHandleMax.hpp index d851a0c..cfaa663 100644 --- a/expressions/aggregation/AggregationHandleMax.hpp +++ b/expressions/aggregation/AggregationHandleMax.hpp @@ -21,15 +21,14 @@ #define QUICKSTEP_EXPRESSIONS_AGGREGATION_AGGREGATION_HANDLE_MAX_HPP_ #include <cstddef> +#include <cstdint> #include <memory> #include <utility> #include <vector> -#include "catalog/CatalogTypedefs.hpp" #include "expressions/aggregation/AggregationConcreteHandle.hpp" #include "expressions/aggregation/AggregationHandle.hpp" -#include "storage/FastHashTable.hpp" -#include "storage/HashTableBase.hpp" +#include "storage/ValueAccessorMultiplexer.hpp" #include "threading/SpinMutex.hpp" #include "types/Type.hpp" #include "types/TypedValue.hpp" @@ -40,9 +39,8 @@ namespace quickstep { +class AggregationStateHashTableBase; class ColumnVector; -class StorageManager; -class ValueAccessor; /** \addtogroup Expressions * @{ @@ -86,42 +84,41 @@ class AggregationHandleMax : public AggregationConcreteHandle { public: ~AggregationHandleMax() override {} + std::vector<const Type *> getArgumentTypes() const override { + return {&type_}; + } + + const Type* getResultType() const override { + return &type_; + } + AggregationState* createInitialState() const override { return new AggregationStateMax(type_); } - AggregationStateHashTableBase* createGroupByHashTable( - const HashTableImplType hash_table_impl, - const std::vector<const Type *> &group_by_types, - const std::size_t estimated_num_groups, - StorageManager *storage_manager) const override; - - /** - * @brief Iterate with max aggregation state. - */ inline void iterateUnaryInl(AggregationStateMax *state, const TypedValue &value) const { DCHECK(value.isPlausibleInstanceOf(type_.getSignature())); compareAndUpdate(static_cast<AggregationStateMax *>(state), value); } - inline void iterateUnaryInlFast(const TypedValue &value, - std::uint8_t *byte_ptr) const { + inline void iterateUnaryInl(const TypedValue &value, + std::uint8_t *byte_ptr) const { DCHECK(value.isPlausibleInstanceOf(type_.getSignature())); TypedValue *max_ptr = reinterpret_cast<TypedValue *>(byte_ptr); - compareAndUpdateFast(max_ptr, value); + compareAndUpdate(max_ptr, value); } - inline void updateStateUnary(const TypedValue &argument, - std::uint8_t *byte_ptr) const override { - if (!block_update_) { - iterateUnaryInlFast(argument, byte_ptr); - } - } + AggregationState* accumulateValueAccessor( + const std::vector<MultiSourceAttributeId> &argument_ids, + const ValueAccessorMultiplexer &accessor_mux) const override; - void blockUpdate() override { block_update_ = true; } + void mergeStates(const AggregationState &source, + AggregationState *destination) const override; - void allowUpdate() override { block_update_ = false; } + std::size_t getPayloadSize() const override { + return sizeof(TypedValue); + } void initPayload(std::uint8_t *byte_ptr) const override { TypedValue *max_ptr = reinterpret_cast<TypedValue *>(byte_ptr); @@ -136,38 +133,21 @@ class AggregationHandleMax : public AggregationConcreteHandle { } } - AggregationState* accumulateColumnVectors( - const std::vector<std::unique_ptr<ColumnVector>> &column_vectors) - const override; - -#ifdef QUICKSTEP_ENABLE_VECTOR_COPY_ELISION_SELECTION - AggregationState* accumulateValueAccessor( - ValueAccessor *accessor, - const std::vector<attribute_id> &accessor_ids) const override; -#endif - - void aggregateValueAccessorIntoHashTable( - ValueAccessor *accessor, - const std::vector<attribute_id> &argument_ids, - const std::vector<attribute_id> &group_by_key_ids, - AggregationStateHashTableBase *hash_table) const override; - - void mergeStates(const AggregationState &source, - AggregationState *destination) const override; - - void mergeStatesFast(const std::uint8_t *source, - std::uint8_t *destination) const override; + inline void updateStateUnary(const TypedValue &argument, + std::uint8_t *byte_ptr) const override { + DCHECK(argument.isPlausibleInstanceOf(type_.getSignature())); + TypedValue *max_ptr = reinterpret_cast<TypedValue *>(byte_ptr); + compareAndUpdate(max_ptr, argument); + } TypedValue finalize(const AggregationState &state) const override { return TypedValue(static_cast<const AggregationStateMax &>(state).max_); } - inline TypedValue finalizeHashTableEntry( - const AggregationState &state) const { - return TypedValue(static_cast<const AggregationStateMax &>(state).max_); - } + void mergeStates(const std::uint8_t *source, + std::uint8_t *destination) const override; - inline TypedValue finalizeHashTableEntryFast( + inline TypedValue finalizeHashTableEntry( const std::uint8_t *byte_ptr) const { const TypedValue *max_ptr = reinterpret_cast<const TypedValue *>(byte_ptr); return TypedValue(*max_ptr); @@ -175,29 +155,16 @@ class AggregationHandleMax : public AggregationConcreteHandle { ColumnVector* finalizeHashTable( const AggregationStateHashTableBase &hash_table, - std::vector<std::vector<TypedValue>> *group_by_keys, - int index) const override; + const std::size_t index, + std::vector<std::vector<TypedValue>> *group_by_keys) const override; - /** - * @brief Implementation of - * AggregationHandle::aggregateOnDistinctifyHashTableForSingle() - * for MAX aggregation. - */ AggregationState* aggregateOnDistinctifyHashTableForSingle( - const AggregationStateHashTableBase &distinctify_hash_table) - const override; + const AggregationStateHashTableBase &distinctify_hash_table) const override; - /** - * @brief Implementation of - * AggregationHandle::aggregateOnDistinctifyHashTableForGroupBy() - * for MAX aggregation. - */ void aggregateOnDistinctifyHashTableForGroupBy( const AggregationStateHashTableBase &distinctify_hash_table, - AggregationStateHashTableBase *aggregation_hash_table, - std::size_t index) const override; - - std::size_t getPayloadSize() const override { return sizeof(TypedValue); } + const std::size_t index, + AggregationStateHashTableBase *aggregation_hash_table) const override; private: friend class AggregateFunctionMax; @@ -227,8 +194,8 @@ class AggregationHandleMax : public AggregationConcreteHandle { } } - inline void compareAndUpdateFast(TypedValue *max_ptr, - const TypedValue &value) const { + inline void compareAndUpdate(TypedValue *max_ptr, + const TypedValue &value) const { if (value.isNull()) return; if (max_ptr->isNull() || fast_comparator_->compareTypedValues(value, *max_ptr)) { @@ -239,8 +206,6 @@ class AggregationHandleMax : public AggregationConcreteHandle { const Type &type_; std::unique_ptr<UncheckedComparator> fast_comparator_; - bool block_update_; - DISALLOW_COPY_AND_ASSIGN(AggregationHandleMax); };
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/27380a69/expressions/aggregation/AggregationHandleMin.cpp ---------------------------------------------------------------------- diff --git a/expressions/aggregation/AggregationHandleMin.cpp b/expressions/aggregation/AggregationHandleMin.cpp index a07f299..08fb141 100644 --- a/expressions/aggregation/AggregationHandleMin.cpp +++ b/expressions/aggregation/AggregationHandleMin.cpp @@ -19,15 +19,16 @@ #include "expressions/aggregation/AggregationHandleMin.hpp" +#include <cstddef> +#include <cstdint> #include <memory> #include <vector> #include "catalog/CatalogTypedefs.hpp" -#include "storage/HashTable.hpp" -#include "storage/HashTableFactory.hpp" +#include "expressions/aggregation/AggregationID.hpp" +#include "storage/ValueAccessorMultiplexer.hpp" #include "types/Type.hpp" #include "types/TypedValue.hpp" -#include "types/containers/ColumnVector.hpp" #include "types/operations/comparisons/Comparison.hpp" #include "types/operations/comparisons/ComparisonFactory.hpp" #include "types/operations/comparisons/ComparisonID.hpp" @@ -36,54 +37,32 @@ namespace quickstep { -class StorageManager; +class ColumnVector; AggregationHandleMin::AggregationHandleMin(const Type &type) - : type_(type), block_update_(false) { + : AggregationConcreteHandle(AggregationID::kMin), + type_(type) { fast_comparator_.reset( ComparisonFactory::GetComparison(ComparisonID::kLess) .makeUncheckedComparatorForTypes(type, type.getNonNullableVersion())); } -AggregationStateHashTableBase* AggregationHandleMin::createGroupByHashTable( - const HashTableImplType hash_table_impl, - const std::vector<const Type *> &group_by_types, - const std::size_t estimated_num_groups, - StorageManager *storage_manager) const { - return AggregationStateHashTableFactory<AggregationStateMin>::CreateResizable( - hash_table_impl, group_by_types, estimated_num_groups, storage_manager); -} +AggregationState* AggregationHandleMin::accumulateValueAccessor( + const std::vector<MultiSourceAttributeId> &argument_ids, + const ValueAccessorMultiplexer &accessor_mux) const { + DCHECK_EQ(1u, argument_ids.size()) + << "Got wrong number of attributes for MIN: " << argument_ids.size(); -AggregationState* AggregationHandleMin::accumulateColumnVectors( - const std::vector<std::unique_ptr<ColumnVector>> &column_vectors) const { - DCHECK_EQ(1u, column_vectors.size()) - << "Got wrong number of ColumnVectors for MIN: " << column_vectors.size(); + const ValueAccessorSource argument_source = argument_ids.front().source; + const attribute_id argument_id = argument_ids.front().attr_id; - return new AggregationStateMin(fast_comparator_->accumulateColumnVector( - type_.getNullableVersion().makeNullValue(), *column_vectors.front())); -} - -#ifdef QUICKSTEP_ENABLE_VECTOR_COPY_ELISION_SELECTION -AggregationState* AggregationHandleMin::accumulateValueAccessor( - ValueAccessor *accessor, - const std::vector<attribute_id> &accessor_ids) const { - DCHECK_EQ(1u, accessor_ids.size()) - << "Got wrong number of attributes for MIN: " << accessor_ids.size(); + DCHECK(argument_source != ValueAccessorSource::kInvalid); + DCHECK_NE(argument_id, kInvalidAttributeID); return new AggregationStateMin(fast_comparator_->accumulateValueAccessor( type_.getNullableVersion().makeNullValue(), - accessor, - accessor_ids.front())); -} -#endif // QUICKSTEP_ENABLE_VECTOR_COPY_ELISION_SELECTION - -void AggregationHandleMin::aggregateValueAccessorIntoHashTable( - ValueAccessor *accessor, - const std::vector<attribute_id> &argument_ids, - const std::vector<attribute_id> &group_by_key_ids, - AggregationStateHashTableBase *hash_table) const { - DCHECK_EQ(1u, argument_ids.size()) - << "Got wrong number of arguments for MIN: " << argument_ids.size(); + accessor_mux.getValueAccessorBySource(argument_source), + argument_id)); } void AggregationHandleMin::mergeStates(const AggregationState &source, @@ -98,41 +77,37 @@ void AggregationHandleMin::mergeStates(const AggregationState &source, } } -void AggregationHandleMin::mergeStatesFast(const std::uint8_t *source, - std::uint8_t *destination) const { +void AggregationHandleMin::mergeStates(const std::uint8_t *source, + std::uint8_t *destination) const { const TypedValue *src_min_ptr = reinterpret_cast<const TypedValue *>(source); TypedValue *dst_min_ptr = reinterpret_cast<TypedValue *>(destination); if (!(src_min_ptr->isNull())) { - compareAndUpdateFast(dst_min_ptr, *src_min_ptr); + compareAndUpdate(dst_min_ptr, *src_min_ptr); } } ColumnVector* AggregationHandleMin::finalizeHashTable( const AggregationStateHashTableBase &hash_table, - std::vector<std::vector<TypedValue>> *group_by_keys, - int index) const { - return finalizeHashTableHelperFast<AggregationHandleMin, - AggregationStateFastHashTable>( - type_.getNonNullableVersion(), hash_table, group_by_keys, index); + const std::size_t index, + std::vector<std::vector<TypedValue>> *group_by_keys) const { + return finalizeHashTableHelper<AggregationHandleMin>( + type_, hash_table, index, group_by_keys); } -AggregationState* -AggregationHandleMin::aggregateOnDistinctifyHashTableForSingle( +AggregationState* AggregationHandleMin::aggregateOnDistinctifyHashTableForSingle( const AggregationStateHashTableBase &distinctify_hash_table) const { - return aggregateOnDistinctifyHashTableForSingleUnaryHelperFast< - AggregationHandleMin, - AggregationStateMin>(distinctify_hash_table); + return aggregateOnDistinctifyHashTableForSingleUnaryHelper< + AggregationHandleMin, AggregationStateMin>( + distinctify_hash_table); } void AggregationHandleMin::aggregateOnDistinctifyHashTableForGroupBy( const AggregationStateHashTableBase &distinctify_hash_table, - AggregationStateHashTableBase *aggregation_hash_table, - std::size_t index) const { - aggregateOnDistinctifyHashTableForGroupByUnaryHelperFast< - AggregationHandleMin, - AggregationStateFastHashTable>( - distinctify_hash_table, aggregation_hash_table, index); + const std::size_t index, + AggregationStateHashTableBase *aggregation_hash_table) const { + aggregateOnDistinctifyHashTableForGroupByUnaryHelper<AggregationHandleMin>( + distinctify_hash_table, index, aggregation_hash_table); } } // namespace quickstep http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/27380a69/expressions/aggregation/AggregationHandleMin.hpp ---------------------------------------------------------------------- diff --git a/expressions/aggregation/AggregationHandleMin.hpp b/expressions/aggregation/AggregationHandleMin.hpp index e3472ec..9c0012e 100644 --- a/expressions/aggregation/AggregationHandleMin.hpp +++ b/expressions/aggregation/AggregationHandleMin.hpp @@ -21,15 +21,14 @@ #define QUICKSTEP_EXPRESSIONS_AGGREGATION_AGGREGATION_HANDLE_MIN_HPP_ #include <cstddef> +#include <cstdint> #include <memory> #include <utility> #include <vector> -#include "catalog/CatalogTypedefs.hpp" #include "expressions/aggregation/AggregationConcreteHandle.hpp" #include "expressions/aggregation/AggregationHandle.hpp" -#include "storage/FastHashTable.hpp" -#include "storage/HashTableBase.hpp" +#include "storage/ValueAccessorMultiplexer.hpp" #include "threading/SpinMutex.hpp" #include "types/Type.hpp" #include "types/TypedValue.hpp" @@ -40,9 +39,8 @@ namespace quickstep { +class AggregationStateHashTableBase; class ColumnVector; -class StorageManager; -class ValueAccessor; /** \addtogroup Expressions * @{ @@ -88,42 +86,45 @@ class AggregationHandleMin : public AggregationConcreteHandle { public: ~AggregationHandleMin() override {} + std::vector<const Type *> getArgumentTypes() const override { + return {&type_}; + } + + const Type* getResultType() const override { + return &type_; + } + AggregationState* createInitialState() const override { return new AggregationStateMin(type_); } - AggregationStateHashTableBase* createGroupByHashTable( - const HashTableImplType hash_table_impl, - const std::vector<const Type *> &group_by_types, - const std::size_t estimated_num_groups, - StorageManager *storage_manager) const override; - - /** - * @brief Iterate with min aggregation state. - */ inline void iterateUnaryInl(AggregationStateMin *state, const TypedValue &value) const { DCHECK(value.isPlausibleInstanceOf(type_.getSignature())); compareAndUpdate(state, value); } - inline void iterateUnaryInlFast(const TypedValue &value, - std::uint8_t *byte_ptr) const { + inline void iterateUnaryInl(const TypedValue &value, + std::uint8_t *byte_ptr) const { DCHECK(value.isPlausibleInstanceOf(type_.getSignature())); TypedValue *min_ptr = reinterpret_cast<TypedValue *>(byte_ptr); - compareAndUpdateFast(min_ptr, value); + compareAndUpdate(min_ptr, value); } - inline void updateStateUnary(const TypedValue &argument, - std::uint8_t *byte_ptr) const override { - if (!block_update_) { - iterateUnaryInlFast(argument, byte_ptr); - } - } + AggregationState* accumulateValueAccessor( + const std::vector<MultiSourceAttributeId> &argument_ids, + const ValueAccessorMultiplexer &accessor_mux) const override; - void blockUpdate() override { block_update_ = true; } + void mergeStates(const AggregationState &source, + AggregationState *destination) const override; - void allowUpdate() override { block_update_ = false; } + TypedValue finalize(const AggregationState &state) const override { + return static_cast<const AggregationStateMin &>(state).min_; + } + + std::size_t getPayloadSize() const override { + return sizeof(TypedValue); + } void initPayload(std::uint8_t *byte_ptr) const override { TypedValue *min_ptr = reinterpret_cast<TypedValue *>(byte_ptr); @@ -138,68 +139,33 @@ class AggregationHandleMin : public AggregationConcreteHandle { } } - AggregationState* accumulateColumnVectors( - const std::vector<std::unique_ptr<ColumnVector>> &column_vectors) - const override; - -#ifdef QUICKSTEP_ENABLE_VECTOR_COPY_ELISION_SELECTION - AggregationState* accumulateValueAccessor( - ValueAccessor *accessor, - const std::vector<attribute_id> &accessor_ids) const override; -#endif - - void aggregateValueAccessorIntoHashTable( - ValueAccessor *accessor, - const std::vector<attribute_id> &argument_ids, - const std::vector<attribute_id> &group_by_key_ids, - AggregationStateHashTableBase *hash_table) const override; - - void mergeStates(const AggregationState &source, - AggregationState *destination) const override; - - void mergeStatesFast(const std::uint8_t *source, - std::uint8_t *destination) const override; - - TypedValue finalize(const AggregationState &state) const override { - return static_cast<const AggregationStateMin &>(state).min_; + inline void updateStateUnary(const TypedValue &argument, + std::uint8_t *byte_ptr) const override { + DCHECK(argument.isPlausibleInstanceOf(type_.getSignature())); + TypedValue *min_ptr = reinterpret_cast<TypedValue *>(byte_ptr); + compareAndUpdate(min_ptr, argument); } - inline TypedValue finalizeHashTableEntry( - const AggregationState &state) const { - return static_cast<const AggregationStateMin &>(state).min_; - } + void mergeStates(const std::uint8_t *source, + std::uint8_t *destination) const override; - inline TypedValue finalizeHashTableEntryFast( + inline TypedValue finalizeHashTableEntry( const std::uint8_t *byte_ptr) const { - const TypedValue *min_ptr = reinterpret_cast<const TypedValue *>(byte_ptr); - return TypedValue(*min_ptr); + return *reinterpret_cast<const TypedValue *>(byte_ptr); } ColumnVector* finalizeHashTable( const AggregationStateHashTableBase &hash_table, - std::vector<std::vector<TypedValue>> *group_by_keys, - int index) const override; + const std::size_t index, + std::vector<std::vector<TypedValue>> *group_by_keys) const override; - /** - * @brief Implementation of - * AggregationHandle::aggregateOnDistinctifyHashTableForSingle() - * for MIN aggregation. - */ AggregationState* aggregateOnDistinctifyHashTableForSingle( - const AggregationStateHashTableBase &distinctify_hash_table) - const override; + const AggregationStateHashTableBase &distinctify_hash_table) const override; - /** - * @brief Implementation of - * AggregationHandle::aggregateOnDistinctifyHashTableForGroupBy() - * for MIN aggregation. - */ void aggregateOnDistinctifyHashTableForGroupBy( const AggregationStateHashTableBase &distinctify_hash_table, - AggregationStateHashTableBase *aggregation_hash_table, - std::size_t index) const override; - - std::size_t getPayloadSize() const override { return sizeof(TypedValue); } + const std::size_t index, + AggregationStateHashTableBase *aggregation_hash_table) const override; private: friend class AggregateFunctionMin; @@ -228,8 +194,8 @@ class AggregationHandleMin : public AggregationConcreteHandle { } } - inline void compareAndUpdateFast(TypedValue *min_ptr, - const TypedValue &value) const { + inline void compareAndUpdate(TypedValue *min_ptr, + const TypedValue &value) const { if (value.isNull()) return; if (min_ptr->isNull() || fast_comparator_->compareTypedValues(value, *min_ptr)) { @@ -240,8 +206,6 @@ class AggregationHandleMin : public AggregationConcreteHandle { const Type &type_; std::unique_ptr<UncheckedComparator> fast_comparator_; - bool block_update_; - DISALLOW_COPY_AND_ASSIGN(AggregationHandleMin); }; http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/27380a69/expressions/aggregation/AggregationHandleSum.cpp ---------------------------------------------------------------------- diff --git a/expressions/aggregation/AggregationHandleSum.cpp b/expressions/aggregation/AggregationHandleSum.cpp index 642d88d..9f5f220 100644 --- a/expressions/aggregation/AggregationHandleSum.cpp +++ b/expressions/aggregation/AggregationHandleSum.cpp @@ -20,13 +20,13 @@ #include "expressions/aggregation/AggregationHandleSum.hpp" #include <cstddef> +#include <cstdint> #include <memory> -#include <utility> #include <vector> #include "catalog/CatalogTypedefs.hpp" -#include "storage/HashTable.hpp" -#include "storage/HashTableFactory.hpp" +#include "expressions/aggregation/AggregationID.hpp" +#include "storage/ValueAccessorMultiplexer.hpp" #include "threading/SpinMutex.hpp" #include "types/Type.hpp" #include "types/TypeFactory.hpp" @@ -40,10 +40,11 @@ namespace quickstep { -class StorageManager; +class ColumnVector; AggregationHandleSum::AggregationHandleSum(const Type &type) - : argument_type_(type), block_update_(false) { + : AggregationConcreteHandle(AggregationID::kSum), + argument_type_(type) { // We sum Int as Long and Float as Double so that we have more headroom when // adding many values. TypeID type_precision_id; @@ -79,47 +80,27 @@ AggregationHandleSum::AggregationHandleSum(const Type &type) result_type_ = &sum_type.getNullableVersion(); } -AggregationStateHashTableBase* AggregationHandleSum::createGroupByHashTable( - const HashTableImplType hash_table_impl, - const std::vector<const Type *> &group_by_types, - const std::size_t estimated_num_groups, - StorageManager *storage_manager) const { - return AggregationStateHashTableFactory<AggregationStateSum>::CreateResizable( - hash_table_impl, group_by_types, estimated_num_groups, storage_manager); -} +AggregationState* AggregationHandleSum::accumulateValueAccessor( + const std::vector<MultiSourceAttributeId> &argument_ids, + const ValueAccessorMultiplexer &accessor_mux) const { + DCHECK_EQ(1u, argument_ids.size()) + << "Got wrong number of attributes for SUM: " << argument_ids.size(); -AggregationState* AggregationHandleSum::accumulateColumnVectors( - const std::vector<std::unique_ptr<ColumnVector>> &column_vectors) const { - DCHECK_EQ(1u, column_vectors.size()) - << "Got wrong number of ColumnVectors for SUM: " << column_vectors.size(); - std::size_t num_tuples = 0; - TypedValue cv_sum = fast_operator_->accumulateColumnVector( - blank_state_.sum_, *column_vectors.front(), &num_tuples); - return new AggregationStateSum(std::move(cv_sum), num_tuples == 0); -} + const ValueAccessorSource argument_source = argument_ids.front().source; + const attribute_id argument_id = argument_ids.front().attr_id; -#ifdef QUICKSTEP_ENABLE_VECTOR_COPY_ELISION_SELECTION -AggregationState* AggregationHandleSum::accumulateValueAccessor( - ValueAccessor *accessor, - const std::vector<attribute_id> &accessor_ids) const { - DCHECK_EQ(1u, accessor_ids.size()) - << "Got wrong number of attributes for SUM: " << accessor_ids.size(); + DCHECK(argument_source != ValueAccessorSource::kInvalid); + DCHECK_NE(argument_id, kInvalidAttributeID); std::size_t num_tuples = 0; - TypedValue va_sum = fast_operator_->accumulateValueAccessor( - blank_state_.sum_, accessor, accessor_ids.front(), &num_tuples); + TypedValue va_sum = + fast_operator_->accumulateValueAccessor( + blank_state_.sum_, + accessor_mux.getValueAccessorBySource(argument_source), + argument_id, + &num_tuples); return new AggregationStateSum(std::move(va_sum), num_tuples == 0); } -#endif - -void AggregationHandleSum::aggregateValueAccessorIntoHashTable( - ValueAccessor *accessor, - const std::vector<attribute_id> &argument_ids, - const std::vector<attribute_id> &group_by_key_ids, - AggregationStateHashTableBase *hash_table) const { - DCHECK_EQ(1u, argument_ids.size()) - << "Got wrong number of arguments for SUM: " << argument_ids.size(); -} void AggregationHandleSum::mergeStates(const AggregationState &source, AggregationState *destination) const { @@ -134,8 +115,8 @@ void AggregationHandleSum::mergeStates(const AggregationState &source, sum_destination->null_ = sum_destination->null_ && sum_source.null_; } -void AggregationHandleSum::mergeStatesFast(const std::uint8_t *source, - std::uint8_t *destination) const { +void AggregationHandleSum::mergeStates(const std::uint8_t *source, + std::uint8_t *destination) const { const TypedValue *src_sum_ptr = reinterpret_cast<const TypedValue *>(source + blank_state_.sum_offset_); const bool *src_null_ptr = @@ -162,29 +143,25 @@ TypedValue AggregationHandleSum::finalize(const AggregationState &state) const { ColumnVector* AggregationHandleSum::finalizeHashTable( const AggregationStateHashTableBase &hash_table, - std::vector<std::vector<TypedValue>> *group_by_keys, - int index) const { - return finalizeHashTableHelperFast<AggregationHandleSum, - AggregationStateFastHashTable>( - *result_type_, hash_table, group_by_keys, index); + const std::size_t index, + std::vector<std::vector<TypedValue>> *group_by_keys) const { + return finalizeHashTableHelper<AggregationHandleSum>( + *result_type_, hash_table, index, group_by_keys); } -AggregationState* -AggregationHandleSum::aggregateOnDistinctifyHashTableForSingle( +AggregationState* AggregationHandleSum::aggregateOnDistinctifyHashTableForSingle( const AggregationStateHashTableBase &distinctify_hash_table) const { - return aggregateOnDistinctifyHashTableForSingleUnaryHelperFast< - AggregationHandleSum, - AggregationStateSum>(distinctify_hash_table); + return aggregateOnDistinctifyHashTableForSingleUnaryHelper< + AggregationHandleSum, AggregationStateSum>( + distinctify_hash_table); } void AggregationHandleSum::aggregateOnDistinctifyHashTableForGroupBy( const AggregationStateHashTableBase &distinctify_hash_table, - AggregationStateHashTableBase *aggregation_hash_table, - std::size_t index) const { - aggregateOnDistinctifyHashTableForGroupByUnaryHelperFast< - AggregationHandleSum, - AggregationStateFastHashTable>( - distinctify_hash_table, aggregation_hash_table, index); + const std::size_t index, + AggregationStateHashTableBase *aggregation_hash_table) const { + aggregateOnDistinctifyHashTableForGroupByUnaryHelper<AggregationHandleSum>( + distinctify_hash_table, index, aggregation_hash_table); } } // namespace quickstep http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/27380a69/expressions/aggregation/AggregationHandleSum.hpp ---------------------------------------------------------------------- diff --git a/expressions/aggregation/AggregationHandleSum.hpp b/expressions/aggregation/AggregationHandleSum.hpp index f0d23e1..edab7bb 100644 --- a/expressions/aggregation/AggregationHandleSum.hpp +++ b/expressions/aggregation/AggregationHandleSum.hpp @@ -21,15 +21,14 @@ #define QUICKSTEP_EXPRESSIONS_AGGREGATION_AGGREGATION_HANDLE_SUM_HPP_ #include <cstddef> +#include <cstdint> #include <memory> #include <utility> #include <vector> -#include "catalog/CatalogTypedefs.hpp" #include "expressions/aggregation/AggregationConcreteHandle.hpp" #include "expressions/aggregation/AggregationHandle.hpp" -#include "storage/FastHashTable.hpp" -#include "storage/HashTableBase.hpp" +#include "storage/ValueAccessorMultiplexer.hpp" #include "threading/SpinMutex.hpp" #include "types/Type.hpp" #include "types/TypedValue.hpp" @@ -40,9 +39,8 @@ namespace quickstep { +class AggregationStateHashTableBase; class ColumnVector; -class StorageManager; -class ValueAccessor; /** \addtogroup Expressions * @{ @@ -101,16 +99,18 @@ class AggregationHandleSum : public AggregationConcreteHandle { public: ~AggregationHandleSum() override {} + std::vector<const Type *> getArgumentTypes() const override { + return {&argument_type_}; + } + + const Type* getResultType() const override { + return result_type_; + } + AggregationState* createInitialState() const override { return new AggregationStateSum(blank_state_); } - AggregationStateHashTableBase* createGroupByHashTable( - const HashTableImplType hash_table_impl, - const std::vector<const Type *> &group_by_types, - const std::size_t estimated_num_groups, - StorageManager *storage_manager) const override; - inline void iterateUnaryInl(AggregationStateSum *state, const TypedValue &value) const { DCHECK(value.isPlausibleInstanceOf(argument_type_.getSignature())); @@ -121,8 +121,8 @@ class AggregationHandleSum : public AggregationConcreteHandle { state->null_ = false; } - inline void iterateUnaryInlFast(const TypedValue &value, - std::uint8_t *byte_ptr) const { + inline void iterateUnaryInl(const TypedValue &value, + std::uint8_t *byte_ptr) const { DCHECK(value.isPlausibleInstanceOf(argument_type_.getSignature())); if (value.isNull()) return; TypedValue *sum_ptr = @@ -133,16 +133,18 @@ class AggregationHandleSum : public AggregationConcreteHandle { *null_ptr = false; } - inline void updateStateUnary(const TypedValue &argument, - std::uint8_t *byte_ptr) const override { - if (!block_update_) { - iterateUnaryInlFast(argument, byte_ptr); - } - } + AggregationState* accumulateValueAccessor( + const std::vector<MultiSourceAttributeId> &argument_ids, + const ValueAccessorMultiplexer &accessor_mux) const override; - void blockUpdate() override { block_update_ = true; } + void mergeStates(const AggregationState &source, + AggregationState *destination) const override; - void allowUpdate() override { block_update_ = false; } + TypedValue finalize(const AggregationState &state) const override; + + std::size_t getPayloadSize() const override { + return blank_state_.getPayloadSize(); + } void initPayload(std::uint8_t *byte_ptr) const override { TypedValue *sum_ptr = @@ -161,70 +163,37 @@ class AggregationHandleSum : public AggregationConcreteHandle { } } - AggregationState* accumulateColumnVectors( - const std::vector<std::unique_ptr<ColumnVector>> &column_vectors) - const override; - -#ifdef QUICKSTEP_ENABLE_VECTOR_COPY_ELISION_SELECTION - AggregationState* accumulateValueAccessor( - ValueAccessor *accessor, - const std::vector<attribute_id> &accessor_id) const override; -#endif - - void aggregateValueAccessorIntoHashTable( - ValueAccessor *accessor, - const std::vector<attribute_id> &argument_ids, - const std::vector<attribute_id> &group_by_key_ids, - AggregationStateHashTableBase *hash_table) const override; - - void mergeStates(const AggregationState &source, - AggregationState *destination) const override; - - void mergeStatesFast(const std::uint8_t *source, - std::uint8_t *destination) const override; - - TypedValue finalize(const AggregationState &state) const override; - - inline TypedValue finalizeHashTableEntry( - const AggregationState &state) const { - return static_cast<const AggregationStateSum &>(state).sum_; + inline void updateStateUnary(const TypedValue &argument, + std::uint8_t *byte_ptr) const override { + DCHECK(argument.isPlausibleInstanceOf(argument_type_.getSignature())); + if (argument.isNull()) return; + TypedValue *sum_ptr = + reinterpret_cast<TypedValue *>(byte_ptr + blank_state_.sum_offset_); + bool *null_ptr = + reinterpret_cast<bool *>(byte_ptr + blank_state_.null_offset_); + *sum_ptr = fast_operator_->applyToTypedValues(*sum_ptr, argument); + *null_ptr = false; } - inline TypedValue finalizeHashTableEntryFast( - const std::uint8_t *byte_ptr) const { - std::uint8_t *value_ptr = const_cast<std::uint8_t *>(byte_ptr); - TypedValue *sum_ptr = - reinterpret_cast<TypedValue *>(value_ptr + blank_state_.sum_offset_); - return *sum_ptr; + void mergeStates(const std::uint8_t *source, + std::uint8_t *destination) const override; + + inline TypedValue finalizeHashTableEntry(const std::uint8_t *byte_ptr) const { + return *reinterpret_cast<const TypedValue *>(byte_ptr + blank_state_.sum_offset_); } ColumnVector* finalizeHashTable( const AggregationStateHashTableBase &hash_table, - std::vector<std::vector<TypedValue>> *group_by_keys, - int index) const override; + const std::size_t index, + std::vector<std::vector<TypedValue>> *group_by_keys) const override; - /** - * @brief Implementation of - * AggregationHandle::aggregateOnDistinctifyHashTableForSingle() - * for SUM aggregation. - */ AggregationState* aggregateOnDistinctifyHashTableForSingle( - const AggregationStateHashTableBase &distinctify_hash_table) - const override; + const AggregationStateHashTableBase &distinctify_hash_table) const override; - /** - * @brief Implementation of - * AggregationHandle::aggregateOnDistinctifyHashTableForGroupBy() - * for SUM aggregation. - */ void aggregateOnDistinctifyHashTableForGroupBy( const AggregationStateHashTableBase &distinctify_hash_table, - AggregationStateHashTableBase *aggregation_hash_table, - std::size_t index) const override; - - std::size_t getPayloadSize() const override { - return blank_state_.getPayloadSize(); - } + const std::size_t index, + AggregationStateHashTableBase *aggregation_hash_table) const override; private: friend class AggregateFunctionSum; @@ -242,8 +211,6 @@ class AggregationHandleSum : public AggregationConcreteHandle { std::unique_ptr<UncheckedBinaryOperator> fast_operator_; std::unique_ptr<UncheckedBinaryOperator> merge_operator_; - bool block_update_; - DISALLOW_COPY_AND_ASSIGN(AggregationHandleSum); }; http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/27380a69/expressions/aggregation/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/expressions/aggregation/CMakeLists.txt b/expressions/aggregation/CMakeLists.txt index e9503f7..7203c8c 100644 --- a/expressions/aggregation/CMakeLists.txt +++ b/expressions/aggregation/CMakeLists.txt @@ -55,9 +55,6 @@ add_library(quickstep_expressions_aggregation_AggregationHandleAvg add_library(quickstep_expressions_aggregation_AggregationHandleCount AggregationHandleCount.cpp AggregationHandleCount.hpp) -add_library(quickstep_expressions_aggregation_AggregationHandleDistinct - AggregationHandleDistinct.cpp - AggregationHandleDistinct.hpp) add_library(quickstep_expressions_aggregation_AggregationHandleMax AggregationHandleMax.cpp AggregationHandleMax.hpp) @@ -146,10 +143,11 @@ target_link_libraries(quickstep_expressions_aggregation_AggregationConcreteHandl glog quickstep_catalog_CatalogTypedefs quickstep_expressions_aggregation_AggregationHandle - quickstep_storage_FastHashTable - quickstep_storage_HashTable + quickstep_expressions_aggregation_AggregationID quickstep_storage_HashTableBase quickstep_storage_HashTableFactory + quickstep_storage_PackedPayloadHashTable + quickstep_storage_ValueAccessorMultiplexer quickstep_threading_SpinMutex quickstep_types_TypedValue quickstep_types_containers_ColumnVector @@ -157,7 +155,9 @@ target_link_libraries(quickstep_expressions_aggregation_AggregationConcreteHandl target_link_libraries(quickstep_expressions_aggregation_AggregationHandle glog quickstep_catalog_CatalogTypedefs + quickstep_expressions_aggregation_AggregationID quickstep_storage_HashTableBase + quickstep_storage_ValueAccessorMultiplexer quickstep_types_TypedValue quickstep_utility_Macros) target_link_libraries(quickstep_expressions_aggregation_AggregationHandleAvg @@ -165,10 +165,9 @@ target_link_libraries(quickstep_expressions_aggregation_AggregationHandleAvg quickstep_catalog_CatalogTypedefs quickstep_expressions_aggregation_AggregationConcreteHandle quickstep_expressions_aggregation_AggregationHandle - quickstep_storage_FastHashTable - quickstep_storage_HashTable + quickstep_expressions_aggregation_AggregationID quickstep_storage_HashTableBase - quickstep_storage_HashTableFactory + quickstep_storage_ValueAccessorMultiplexer quickstep_threading_SpinMutex quickstep_types_Type quickstep_types_TypeFactory @@ -183,35 +182,26 @@ target_link_libraries(quickstep_expressions_aggregation_AggregationHandleCount quickstep_catalog_CatalogTypedefs quickstep_expressions_aggregation_AggregationConcreteHandle quickstep_expressions_aggregation_AggregationHandle - quickstep_storage_FastHashTable - quickstep_storage_HashTable + quickstep_expressions_aggregation_AggregationID quickstep_storage_HashTableBase - quickstep_storage_HashTableFactory quickstep_storage_ValueAccessor + quickstep_storage_ValueAccessorMultiplexer quickstep_storage_ValueAccessorUtil + quickstep_types_LongType quickstep_types_TypeFactory quickstep_types_TypeID quickstep_types_TypedValue quickstep_types_containers_ColumnVector quickstep_types_containers_ColumnVectorUtil quickstep_utility_Macros) -target_link_libraries(quickstep_expressions_aggregation_AggregationHandleDistinct - glog - quickstep_catalog_CatalogTypedefs - quickstep_expressions_aggregation_AggregationConcreteHandle - quickstep_storage_HashTable - quickstep_storage_HashTableBase - quickstep_types_TypedValue - quickstep_utility_Macros) target_link_libraries(quickstep_expressions_aggregation_AggregationHandleMax glog quickstep_catalog_CatalogTypedefs quickstep_expressions_aggregation_AggregationConcreteHandle quickstep_expressions_aggregation_AggregationHandle - quickstep_storage_FastHashTable - quickstep_storage_HashTable + quickstep_expressions_aggregation_AggregationID quickstep_storage_HashTableBase - quickstep_storage_HashTableFactory + quickstep_storage_ValueAccessorMultiplexer quickstep_threading_SpinMutex quickstep_types_Type quickstep_types_TypedValue @@ -225,10 +215,9 @@ target_link_libraries(quickstep_expressions_aggregation_AggregationHandleMin quickstep_catalog_CatalogTypedefs quickstep_expressions_aggregation_AggregationConcreteHandle quickstep_expressions_aggregation_AggregationHandle - quickstep_storage_FastHashTable - quickstep_storage_HashTable + quickstep_expressions_aggregation_AggregationID quickstep_storage_HashTableBase - quickstep_storage_HashTableFactory + quickstep_storage_ValueAccessorMultiplexer quickstep_threading_SpinMutex quickstep_types_Type quickstep_types_TypedValue @@ -242,10 +231,9 @@ target_link_libraries(quickstep_expressions_aggregation_AggregationHandleSum quickstep_catalog_CatalogTypedefs quickstep_expressions_aggregation_AggregationConcreteHandle quickstep_expressions_aggregation_AggregationHandle - quickstep_storage_FastHashTable - quickstep_storage_HashTable + quickstep_expressions_aggregation_AggregationID quickstep_storage_HashTableBase - quickstep_storage_HashTableFactory + quickstep_storage_ValueAccessorMultiplexer quickstep_threading_SpinMutex quickstep_types_Type quickstep_types_TypeFactory @@ -271,7 +259,6 @@ target_link_libraries(quickstep_expressions_aggregation quickstep_expressions_aggregation_AggregationHandle quickstep_expressions_aggregation_AggregationHandleAvg quickstep_expressions_aggregation_AggregationHandleCount - quickstep_expressions_aggregation_AggregationHandleDistinct quickstep_expressions_aggregation_AggregationHandleMax quickstep_expressions_aggregation_AggregationHandleMin quickstep_expressions_aggregation_AggregationHandleSum http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/27380a69/query_execution/QueryContext.hpp ---------------------------------------------------------------------- diff --git a/query_execution/QueryContext.hpp b/query_execution/QueryContext.hpp index 895c2ea..ed0f99c 100644 --- a/query_execution/QueryContext.hpp +++ b/query_execution/QueryContext.hpp @@ -200,20 +200,6 @@ class QueryContext { } /** - * @brief Destroy the payloads from the aggregation hash tables. - * - * @warning After calling these methods, the hash table will be in an invalid - * state. No other operation should be performed on them. - * - * @param id The ID of the AggregationOperationState. - **/ - inline void destroyAggregationHashTablePayload(const aggregation_state_id id) { - DCHECK_LT(id, aggregation_states_.size()); - DCHECK(aggregation_states_[id]); - aggregation_states_[id]->destroyAggregationHashTablePayload(); - } - - /** * @brief Whether the given GeneratorFunctionHandle id is valid. * * @param id The GeneratorFunctionHandle id. http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/27380a69/query_optimizer/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/query_optimizer/CMakeLists.txt b/query_optimizer/CMakeLists.txt index 7f90e11..a755832 100644 --- a/query_optimizer/CMakeLists.txt +++ b/query_optimizer/CMakeLists.txt @@ -64,6 +64,7 @@ target_link_libraries(quickstep_queryoptimizer_ExecutionGenerator quickstep_expressions_Expressions_proto quickstep_expressions_aggregation_AggregateFunction quickstep_expressions_aggregation_AggregateFunction_proto + quickstep_expressions_aggregation_AggregationID quickstep_expressions_predicate_Predicate quickstep_expressions_scalar_Scalar quickstep_expressions_scalar_ScalarAttribute @@ -125,6 +126,7 @@ target_link_libraries(quickstep_queryoptimizer_ExecutionGenerator quickstep_relationaloperators_DropTableOperator quickstep_relationaloperators_FinalizeAggregationOperator quickstep_relationaloperators_HashJoinOperator + quickstep_relationaloperators_InitializeAggregationOperator quickstep_relationaloperators_InsertOperator quickstep_relationaloperators_NestedLoopsJoinOperator quickstep_relationaloperators_RelationalOperator @@ -145,6 +147,7 @@ target_link_libraries(quickstep_queryoptimizer_ExecutionGenerator quickstep_storage_StorageBlockLayout_proto quickstep_storage_SubBlockTypeRegistry quickstep_types_Type + quickstep_types_TypeID quickstep_types_Type_proto quickstep_types_TypedValue quickstep_types_TypedValue_proto http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/27380a69/query_optimizer/ExecutionGenerator.cpp ---------------------------------------------------------------------- diff --git a/query_optimizer/ExecutionGenerator.cpp b/query_optimizer/ExecutionGenerator.cpp index 6918313..51e6e20 100644 --- a/query_optimizer/ExecutionGenerator.cpp +++ b/query_optimizer/ExecutionGenerator.cpp @@ -49,6 +49,7 @@ #include "expressions/Expressions.pb.h" #include "expressions/aggregation/AggregateFunction.hpp" #include "expressions/aggregation/AggregateFunction.pb.h" +#include "expressions/aggregation/AggregationID.hpp" #include "expressions/predicate/Predicate.hpp" #include "expressions/scalar/Scalar.hpp" #include "expressions/scalar/ScalarAttribute.hpp" @@ -105,6 +106,7 @@ #include "relational_operators/DropTableOperator.hpp" #include "relational_operators/FinalizeAggregationOperator.hpp" #include "relational_operators/HashJoinOperator.hpp" +#include "relational_operators/InitializeAggregationOperator.hpp" #include "relational_operators/InsertOperator.hpp" #include "relational_operators/NestedLoopsJoinOperator.hpp" #include "relational_operators/RelationalOperator.hpp" @@ -126,6 +128,7 @@ #include "storage/SubBlockTypeRegistry.hpp" #include "types/Type.hpp" #include "types/Type.pb.h" +#include "types/TypeID.hpp" #include "types/TypedValue.hpp" #include "types/TypedValue.pb.h" #include "types/containers/Tuple.pb.h" @@ -371,6 +374,103 @@ void ExecutionGenerator::dropAllTemporaryRelations() { } } +bool ExecutionGenerator::canUseCollisionFreeAggregation( + const P::AggregatePtr &aggregate, + const std::size_t estimated_num_groups, + std::size_t *max_num_groups) const { + // Supports only single group-by key. + if (aggregate->grouping_expressions().size() != 1) { + return false; + } + + // We need to know the exact min/max stats of the group-by key. + // So it must be a CatalogAttribute (but not an expression). + E::AttributeReferencePtr group_by_key_attr; + const E::ExpressionPtr agg_expr = aggregate->grouping_expressions().front(); + if (!E::SomeAttributeReference::MatchesWithConditionalCast(agg_expr, &group_by_key_attr)) { + return false; + } + + bool min_value_stat_is_exact; + bool max_value_stat_is_exact; + const TypedValue min_value = + cost_model_for_aggregation_->findMinValueStat( + aggregate, group_by_key_attr, &min_value_stat_is_exact); + const TypedValue max_value = + cost_model_for_aggregation_->findMaxValueStat( + aggregate, group_by_key_attr, &max_value_stat_is_exact); + if (min_value.isNull() || max_value.isNull() || + (!min_value_stat_is_exact) || (!max_value_stat_is_exact)) { + return false; + } + + std::int64_t min_cpp_value; + std::int64_t max_cpp_value; + switch (group_by_key_attr->getValueType().getTypeID()) { + case TypeID::kInt: { + min_cpp_value = min_value.getLiteral<int>(); + max_cpp_value = max_value.getLiteral<int>(); + break; + } + case TypeID::kLong: { + min_cpp_value = min_value.getLiteral<std::int64_t>(); + max_cpp_value = max_value.getLiteral<std::int64_t>(); + break; + } + default: + return false; + } + + // TODO(jianqiao): + // 1. Handle the case where min_cpp_value is below 0 or far greater than 0. + // 2. Reason about the upbound (e.g. by checking memory size) instead of + // hardcoding it here. + const std::int64_t kGroupSizeUpbound = 1000000000; + if (min_cpp_value < 0 || + max_cpp_value > kGroupSizeUpbound || + max_cpp_value / static_cast<double>(estimated_num_groups) > 256.0) { + return false; + } + + for (const auto &agg_expr : aggregate->aggregate_expressions()) { + const E::AggregateFunctionPtr agg_func = + std::static_pointer_cast<const E::AggregateFunction>(agg_expr->expression()); + + if (agg_func->is_distinct()) { + return false; + } + + // TODO(jianqiao): Support AggregationID::AVG. + switch (agg_func->getAggregate().getAggregationID()) { + case AggregationID::kCount: // Fall through + case AggregationID::kSum: + break; + default: + return false; + } + + const auto &arguments = agg_func->getArguments(); + if (arguments.size() > 1) { + return false; + } + + if (arguments.size() == 1) { + switch (arguments.front()->getValueType().getTypeID()) { + case TypeID::kInt: // Fall through + case TypeID::kLong: + case TypeID::kFloat: + case TypeID::kDouble: + break; + default: + return false; + } + } + } + + *max_num_groups = static_cast<std::size_t>(max_cpp_value) + 1; + return true; +} + void ExecutionGenerator::convertNamedExpressions( const std::vector<E::NamedExpressionPtr> &named_expressions, S::QueryContext::ScalarGroup *scalar_group_proto) { @@ -1475,6 +1575,8 @@ void ExecutionGenerator::convertAggregate( findRelationInfoOutputByPhysical(physical_plan->input()); aggr_state_proto->set_relation_id(input_relation_info->relation->getID()); + bool use_parallel_initialization = false; + std::vector<const Type*> group_by_types; for (const E::NamedExpressionPtr &grouping_expression : physical_plan->grouping_expressions()) { unique_ptr<const Scalar> execution_group_by_expression; @@ -1495,9 +1597,28 @@ void ExecutionGenerator::convertAggregate( } if (!group_by_types.empty()) { - // Right now, only SeparateChaining is supported. - aggr_state_proto->set_hash_table_impl_type( - serialization::HashTableImplType::SEPARATE_CHAINING); + const std::size_t estimated_num_groups = + cost_model_for_aggregation_->estimateNumGroupsForAggregate(physical_plan); + + std::size_t max_num_groups; + const bool can_use_collision_free_aggregation = + canUseCollisionFreeAggregation(physical_plan, + estimated_num_groups, + &max_num_groups); + + if (can_use_collision_free_aggregation) { + aggr_state_proto->set_hash_table_impl_type( + serialization::HashTableImplType::COLLISION_FREE_VECTOR); + aggr_state_proto->set_estimated_num_entries(max_num_groups); + use_parallel_initialization = true; + } else { + // Otherwise, use SeparateChaining. + aggr_state_proto->set_hash_table_impl_type( + serialization::HashTableImplType::SEPARATE_CHAINING); + aggr_state_proto->set_estimated_num_entries(std::max(16uL, estimated_num_groups)); + } + } else { + aggr_state_proto->set_estimated_num_entries(1uL); } for (const E::AliasPtr &named_aggregate_expression : physical_plan->aggregate_expressions()) { @@ -1535,10 +1656,6 @@ void ExecutionGenerator::convertAggregate( aggr_state_proto->mutable_predicate()->CopyFrom(predicate->getProto()); } - const std::size_t estimated_num_groups = - cost_model_for_aggregation_->estimateNumGroupsForAggregate(physical_plan); - aggr_state_proto->set_estimated_num_entries(std::max(16uL, estimated_num_groups)); - const QueryPlan::DAGNodeIndex aggregation_operator_index = execution_plan_->addRelationalOperator( new AggregationOperator( @@ -1553,6 +1670,18 @@ void ExecutionGenerator::convertAggregate( false /* is_pipeline_breaker */); } + if (use_parallel_initialization) { + const QueryPlan::DAGNodeIndex initialize_aggregation_operator_index = + execution_plan_->addRelationalOperator( + new InitializeAggregationOperator( + query_handle_->query_id(), + aggr_state_index)); + + execution_plan_->addDirectDependency(aggregation_operator_index, + initialize_aggregation_operator_index, + true /* is_pipeline_breaker */); + } + // Create InsertDestination proto. const CatalogRelation *output_relation = nullptr; const QueryContext::insert_destination_id insert_destination_index = http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/27380a69/query_optimizer/ExecutionGenerator.hpp ---------------------------------------------------------------------- diff --git a/query_optimizer/ExecutionGenerator.hpp b/query_optimizer/ExecutionGenerator.hpp index eba6eee..987f11a 100644 --- a/query_optimizer/ExecutionGenerator.hpp +++ b/query_optimizer/ExecutionGenerator.hpp @@ -20,6 +20,7 @@ #ifndef QUICKSTEP_QUERY_OPTIMIZER_EXECUTION_GENERATOR_HPP_ #define QUICKSTEP_QUERY_OPTIMIZER_EXECUTION_GENERATOR_HPP_ +#include <cstddef> #include <memory> #include <string> #include <unordered_map> @@ -37,6 +38,7 @@ #include "query_optimizer/QueryHandle.hpp" #include "query_optimizer/QueryPlan.hpp" #include "query_optimizer/cost_model/CostModel.hpp" +#include "query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp" #include "query_optimizer/expressions/ExprId.hpp" #include "query_optimizer/expressions/NamedExpression.hpp" #include "query_optimizer/expressions/Predicate.hpp" @@ -204,6 +206,22 @@ class ExecutionGenerator { std::string getNewRelationName(); /** + * @brief Checks whether an aggregate node can be efficiently evaluated with + * the collision-free aggregation fast path. + * + * @param aggregate The physical aggregate node to be checked. + * @param estimated_num_groups The estimated number of groups for the aggregate. + * @param exact_num_groups If collision-free aggregation is applicable, the + * pointed content of this pointer will be set as the maximum possible + * number of groups that the collision-free hash table need to hold. + * @return A bool value indicating whether collision-free aggregation can be + * used to evaluate \p aggregate. + */ + bool canUseCollisionFreeAggregation(const physical::AggregatePtr &aggregate, + const std::size_t estimated_num_groups, + std::size_t *max_num_groups) const; + + /** * @brief Sets up the info of the CatalogRelation represented by TableReference. * TableReference is not converted to any operator. * @@ -427,7 +445,7 @@ class ExecutionGenerator { /** * @brief The cost model to use for estimating aggregation hash table size. */ - std::unique_ptr<cost::CostModel> cost_model_for_aggregation_; + std::unique_ptr<cost::StarSchemaSimpleCostModel> cost_model_for_aggregation_; /** * @brief The cost model to use for estimating join hash table size. http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/27380a69/relational_operators/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/relational_operators/CMakeLists.txt b/relational_operators/CMakeLists.txt index c18dc77..5e67c64 100644 --- a/relational_operators/CMakeLists.txt +++ b/relational_operators/CMakeLists.txt @@ -47,6 +47,9 @@ add_library(quickstep_relationaloperators_FinalizeAggregationOperator FinalizeAggregationOperator.cpp FinalizeAggregationOperator.hpp) add_library(quickstep_relationaloperators_HashJoinOperator HashJoinOperator.cpp HashJoinOperator.hpp) +add_library(quickstep_relationaloperators_InitializeAggregationOperator + InitializeAggregationOperator.cpp + InitializeAggregationOperator.hpp) add_library(quickstep_relationaloperators_InsertOperator InsertOperator.cpp InsertOperator.hpp) add_library(quickstep_relationaloperators_NestedLoopsJoinOperator NestedLoopsJoinOperator.cpp @@ -254,6 +257,17 @@ target_link_libraries(quickstep_relationaloperators_HashJoinOperator quickstep_utility_lipfilter_LIPFilterAdaptiveProber quickstep_utility_lipfilter_LIPFilterUtil tmb) +target_link_libraries(quickstep_relationaloperators_InitializeAggregationOperator + glog + quickstep_queryexecution_QueryContext + quickstep_queryexecution_WorkOrderProtosContainer + quickstep_queryexecution_WorkOrdersContainer + quickstep_relationaloperators_RelationalOperator + quickstep_relationaloperators_WorkOrder + quickstep_relationaloperators_WorkOrder_proto + quickstep_storage_AggregationOperationState + quickstep_utility_Macros + tmb) target_link_libraries(quickstep_relationaloperators_InsertOperator glog quickstep_catalog_CatalogRelation @@ -512,6 +526,7 @@ target_link_libraries(quickstep_relationaloperators_WorkOrderFactory quickstep_relationaloperators_DropTableOperator quickstep_relationaloperators_FinalizeAggregationOperator quickstep_relationaloperators_HashJoinOperator + quickstep_relationaloperators_InitializeAggregationOperator quickstep_relationaloperators_InsertOperator quickstep_relationaloperators_NestedLoopsJoinOperator quickstep_relationaloperators_SampleOperator @@ -548,6 +563,7 @@ target_link_libraries(quickstep_relationaloperators quickstep_relationaloperators_DropTableOperator quickstep_relationaloperators_FinalizeAggregationOperator quickstep_relationaloperators_HashJoinOperator + quickstep_relationaloperators_InitializeAggregationOperator quickstep_relationaloperators_InsertOperator quickstep_relationaloperators_NestedLoopsJoinOperator quickstep_relationaloperators_RebuildWorkOrder http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/27380a69/relational_operators/DestroyAggregationStateOperator.cpp ---------------------------------------------------------------------- diff --git a/relational_operators/DestroyAggregationStateOperator.cpp b/relational_operators/DestroyAggregationStateOperator.cpp index 49be43d..62ca9e7 100644 --- a/relational_operators/DestroyAggregationStateOperator.cpp +++ b/relational_operators/DestroyAggregationStateOperator.cpp @@ -58,13 +58,6 @@ bool DestroyAggregationStateOperator::getAllWorkOrderProtos(WorkOrderProtosConta } void DestroyAggregationStateWorkOrder::execute() { - // NOTE(harshad) : The destroyAggregationHashTablePayload call is separate - // from the destroyAggregationState call. The reason is that the aggregation - // hash tables don't own the AggregationHandle objects. However the hash table - // class requires the handles for destroying the payload (see the - // destroyPayload methods in AggregationHandle classes). Therefore, we first - // destroy the payloads in the hash table and then destroy the hash table. - query_context_->destroyAggregationHashTablePayload(aggr_state_index_); query_context_->destroyAggregationState(aggr_state_index_); } http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/27380a69/relational_operators/FinalizeAggregationOperator.cpp ---------------------------------------------------------------------- diff --git a/relational_operators/FinalizeAggregationOperator.cpp b/relational_operators/FinalizeAggregationOperator.cpp index 0cbf635..c80c575 100644 --- a/relational_operators/FinalizeAggregationOperator.cpp +++ b/relational_operators/FinalizeAggregationOperator.cpp @@ -44,15 +44,15 @@ bool FinalizeAggregationOperator::getAllWorkOrders( AggregationOperationState *agg_state = query_context->getAggregationState(aggr_state_index_); DCHECK(agg_state != nullptr); - for (int part_id = 0; - part_id < static_cast<int>(agg_state->getNumPartitions()); + for (std::size_t part_id = 0; + part_id < agg_state->getNumFinalizationPartitions(); ++part_id) { container->addNormalWorkOrder( new FinalizeAggregationWorkOrder( query_id_, + part_id, agg_state, - query_context->getInsertDestination(output_destination_index_), - part_id), + query_context->getInsertDestination(output_destination_index_)), op_index_); } } @@ -80,11 +80,7 @@ bool FinalizeAggregationOperator::getAllWorkOrderProtos(WorkOrderProtosContainer } void FinalizeAggregationWorkOrder::execute() { - if (state_->isAggregatePartitioned()) { - state_->finalizeAggregatePartitioned(part_id_, output_destination_); - } else { - state_->finalizeAggregate(output_destination_); - } + state_->finalizeAggregate(partition_id_, output_destination_); } } // namespace quickstep http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/27380a69/relational_operators/FinalizeAggregationOperator.hpp ---------------------------------------------------------------------- diff --git a/relational_operators/FinalizeAggregationOperator.hpp b/relational_operators/FinalizeAggregationOperator.hpp index ae7127a..3c209b1 100644 --- a/relational_operators/FinalizeAggregationOperator.hpp +++ b/relational_operators/FinalizeAggregationOperator.hpp @@ -116,29 +116,29 @@ class FinalizeAggregationWorkOrder : public WorkOrder { * @note InsertWorkOrder takes ownership of \c state. * * @param query_id The ID of the query to which this operator belongs. + * @param partition_id The partition ID for which the Finalize aggregation + * work order is issued. * @param state The AggregationState to use. * @param output_destination The InsertDestination to insert aggregation * results. - * @param part_id The partition ID for which the Finalize aggregation work - * order is issued. Ignore if aggregation is not partitioned. */ FinalizeAggregationWorkOrder(const std::size_t query_id, + const std::size_t partition_id, AggregationOperationState *state, - InsertDestination *output_destination, - const int part_id = -1) + InsertDestination *output_destination) : WorkOrder(query_id), + partition_id_(partition_id), state_(DCHECK_NOTNULL(state)), - output_destination_(DCHECK_NOTNULL(output_destination)), - part_id_(part_id) {} + output_destination_(DCHECK_NOTNULL(output_destination)) {} ~FinalizeAggregationWorkOrder() override {} void execute() override; private: + const std::size_t partition_id_; AggregationOperationState *state_; InsertDestination *output_destination_; - const int part_id_; DISALLOW_COPY_AND_ASSIGN(FinalizeAggregationWorkOrder); }; http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/27380a69/relational_operators/InitializeAggregationOperator.cpp ---------------------------------------------------------------------- diff --git a/relational_operators/InitializeAggregationOperator.cpp b/relational_operators/InitializeAggregationOperator.cpp new file mode 100644 index 0000000..d5c1710 --- /dev/null +++ b/relational_operators/InitializeAggregationOperator.cpp @@ -0,0 +1,80 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + **/ + +#include "relational_operators/InitializeAggregationOperator.hpp" + +#include <cstddef> + +#include "query_execution/QueryContext.hpp" +#include "query_execution/WorkOrderProtosContainer.hpp" +#include "query_execution/WorkOrdersContainer.hpp" +#include "relational_operators/WorkOrder.pb.h" +#include "storage/AggregationOperationState.hpp" + +#include "glog/logging.h" + +#include "tmb/id_typedefs.h" + +namespace quickstep { + +bool InitializeAggregationOperator::getAllWorkOrders( + WorkOrdersContainer *container, + QueryContext *query_context, + StorageManager *storage_manager, + const tmb::client_id scheduler_client_id, + tmb::MessageBus *bus) { + if (!started_) { + AggregationOperationState *agg_state = + query_context->getAggregationState(aggr_state_index_); + DCHECK(agg_state != nullptr); + + for (std::size_t part_id = 0; + part_id < agg_state->getNumInitializationPartitions(); + ++part_id) { + container->addNormalWorkOrder( + new InitializeAggregationWorkOrder(query_id_, + part_id, + agg_state), + op_index_); + } + started_ = true; + } + return true; +} + +bool InitializeAggregationOperator::getAllWorkOrderProtos(WorkOrderProtosContainer *container) { + if (!started_) { + started_ = true; + + serialization::WorkOrder *proto = new serialization::WorkOrder; + proto->set_work_order_type(serialization::INITIALIZE_AGGREGATION); + proto->set_query_id(query_id_); + proto->SetExtension(serialization::InitializeAggregationWorkOrder::aggr_state_index, + aggr_state_index_); + + container->addWorkOrderProto(proto, op_index_); + } + return started_; +} + +void InitializeAggregationWorkOrder::execute() { + state_->initialize(partition_id_); +} + +} // namespace quickstep http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/27380a69/relational_operators/InitializeAggregationOperator.hpp ---------------------------------------------------------------------- diff --git a/relational_operators/InitializeAggregationOperator.hpp b/relational_operators/InitializeAggregationOperator.hpp new file mode 100644 index 0000000..58d848b --- /dev/null +++ b/relational_operators/InitializeAggregationOperator.hpp @@ -0,0 +1,122 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + **/ + +#ifndef QUICKSTEP_RELATIONAL_OPERATORS_INITIALIZE_AGGREGATION_OPERATOR_HPP_ +#define QUICKSTEP_RELATIONAL_OPERATORS_INITIALIZE_AGGREGATION_OPERATOR_HPP_ + +#include <string> + +#include "query_execution/QueryContext.hpp" +#include "relational_operators/RelationalOperator.hpp" +#include "relational_operators/WorkOrder.hpp" +#include "utility/Macros.hpp" + +#include "glog/logging.h" + +#include "tmb/id_typedefs.h" + +namespace tmb { class MessageBus; } + +namespace quickstep { + +class AggregationOperationState; +class StorageManager; +class WorkOrderProtosContainer; +class WorkOrdersContainer; + +namespace serialization { class WorkOrder; } + +/** \addtogroup RelationalOperators + * @{ + */ + +/** + * @brief An operator which initializes an AggregationOperationState. + **/ +class InitializeAggregationOperator : public RelationalOperator { + public: + /** + * @brief Constructor. + * + * @param query_id The ID of this query. + * @param aggr_state_index The index of the AggregationOperationState in QueryContext. + **/ + InitializeAggregationOperator(const std::size_t query_id, + const QueryContext::aggregation_state_id aggr_state_index) + : RelationalOperator(query_id), + aggr_state_index_(aggr_state_index), + started_(false) {} + + ~InitializeAggregationOperator() override {} + + std::string getName() const override { + return "InitializeAggregationOperator"; + } + + bool getAllWorkOrders(WorkOrdersContainer *container, + QueryContext *query_context, + StorageManager *storage_manager, + const tmb::client_id scheduler_client_id, + tmb::MessageBus *bus) override; + + bool getAllWorkOrderProtos(WorkOrderProtosContainer *container) override; + + private: + const QueryContext::aggregation_state_id aggr_state_index_; + bool started_; + + DISALLOW_COPY_AND_ASSIGN(InitializeAggregationOperator); +}; + +/** + * @brief A WorkOrder produced by InitializeAggregationOperator. + **/ +class InitializeAggregationWorkOrder : public WorkOrder { + public: + /** + * @brief Constructor. + * + * @param query_id The ID of the query to which this operator belongs. + * @param partition_id The partition ID for which the work order is issued. + * @param state The AggregationOperationState to be initialized. + */ + InitializeAggregationWorkOrder(const std::size_t query_id, + const std::size_t partition_id, + AggregationOperationState *state) + : WorkOrder(query_id), + partition_id_(partition_id), + state_(DCHECK_NOTNULL(state)) {} + + ~InitializeAggregationWorkOrder() override {} + + void execute() override; + + private: + const std::size_t partition_id_; + + AggregationOperationState *state_; + + DISALLOW_COPY_AND_ASSIGN(InitializeAggregationWorkOrder); +}; + +/** @} */ + +} // namespace quickstep + +#endif // QUICKSTEP_RELATIONAL_OPERATORS_INITIALIZE_AGGREGATION_OPERATOR_HPP_ http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/27380a69/relational_operators/WorkOrder.proto ---------------------------------------------------------------------- diff --git a/relational_operators/WorkOrder.proto b/relational_operators/WorkOrder.proto index 76753d2..83bb121 100644 --- a/relational_operators/WorkOrder.proto +++ b/relational_operators/WorkOrder.proto @@ -44,6 +44,7 @@ enum WorkOrderType { UPDATE = 20; WINDOW_AGGREGATION = 21; DESTROY_AGGREGATION_STATE = 22; + INITIALIZE_AGGREGATION = 23; } message WorkOrder { @@ -278,6 +279,13 @@ message WindowAggregationWorkOrder { message DestroyAggregationStateWorkOrder { extend WorkOrder { - optional uint32 aggr_state_index = 339; + optional uint32 aggr_state_index = 352; + } +} + +message InitializeAggregationWorkOrder { + extend WorkOrder { + // All required. + optional uint32 aggr_state_index = 368; } } http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/27380a69/relational_operators/WorkOrderFactory.cpp ---------------------------------------------------------------------- diff --git a/relational_operators/WorkOrderFactory.cpp b/relational_operators/WorkOrderFactory.cpp index 5e8d03d..99bca7b 100644 --- a/relational_operators/WorkOrderFactory.cpp +++ b/relational_operators/WorkOrderFactory.cpp @@ -37,6 +37,7 @@ #include "relational_operators/DropTableOperator.hpp" #include "relational_operators/FinalizeAggregationOperator.hpp" #include "relational_operators/HashJoinOperator.hpp" +#include "relational_operators/InitializeAggregationOperator.hpp" #include "relational_operators/InsertOperator.hpp" #include "relational_operators/NestedLoopsJoinOperator.hpp" #include "relational_operators/SampleOperator.hpp" @@ -319,6 +320,13 @@ WorkOrder* WorkOrderFactory::ReconstructFromProto(const serialization::WorkOrder LOG(FATAL) << "Unknown HashJoinWorkOrder Type in WorkOrderFactory::ReconstructFromProto"; } } + case serialization::INITIALIZE_AGGREGATION: { + LOG(INFO) << "Creating InitializeAggregationWorkOrder in Shiftboss " << shiftboss_index; + return new InitializeAggregationWorkOrder( + proto.query_id(), + query_context->getAggregationState(proto.GetExtension( + serialization::InitializeAggregationWorkOrder::aggr_state_index))); + } case serialization::INSERT: { LOG(INFO) << "Creating InsertWorkOrder in Shiftboss " << shiftboss_index; return new InsertWorkOrder( @@ -693,6 +701,11 @@ bool WorkOrderFactory::ProtoIsValid(const serialization::WorkOrder &proto, proto.GetExtension(serialization::HashJoinWorkOrder::selection_index)) && proto.HasExtension(serialization::HashJoinWorkOrder::block_id); } + case serialization::INITIALIZE_AGGREGATION: { + return proto.HasExtension(serialization::InitializeAggregationWorkOrder::aggr_state_index) && + query_context.isValidAggregationStateId( + proto.GetExtension(serialization::InitializeAggregationWorkOrder::aggr_state_index)); + } case serialization::INSERT: { return proto.HasExtension(serialization::InsertWorkOrder::insert_destination_index) && query_context.isValidInsertDestinationId(