Github user jianqiao commented on a diff in the pull request:

    https://github.com/apache/incubator-quickstep/pull/179#discussion_r99915265
  
    --- Diff: storage/AggregationOperationState.cpp ---
    @@ -353,187 +353,286 @@ bool AggregationOperationState::ProtoIsValid(
       return true;
     }
     
    -void AggregationOperationState::aggregateBlock(const block_id input_block,
    -                                               LIPFilterAdaptiveProber 
*lip_filter_adaptive_prober) {
    -  if (group_by_list_.empty()) {
    -    aggregateBlockSingleState(input_block);
    -  } else {
    -    aggregateBlockHashTable(input_block, lip_filter_adaptive_prober);
    +bool AggregationOperationState::checkAggregatePartitioned(
    +    const std::size_t estimated_num_groups,
    +    const std::vector<bool> &is_distinct,
    +    const std::vector<std::unique_ptr<const Scalar>> &group_by,
    +    const std::vector<const AggregateFunction *> &aggregate_functions) 
const {
    +  // If there's no aggregation, return false.
    +  if (aggregate_functions.empty()) {
    +    return false;
    +  }
    +  // Check if there's a distinct operation involved in any aggregate, if so
    +  // the aggregate can't be partitioned.
    +  for (auto distinct : is_distinct) {
    +    if (distinct) {
    +      return false;
    +    }
    +  }
    +  // There's no distinct aggregation involved, Check if there's at least 
one
    +  // GROUP BY operation.
    +  if (group_by.empty()) {
    +    return false;
    +  }
    +
    +  // Currently we require that all the group-by keys are ScalarAttributes 
for
    +  // the convenient of implementing copy elision.
    +  // TODO(jianqiao): relax this requirement.
    +  for (const auto &group_by_element : group_by) {
    +    if (group_by_element->getAttributeIdForValueAccessor() == 
kInvalidAttributeID) {
    +      return false;
    +    }
       }
    +
    +  // There are GROUP BYs without DISTINCT. Check if the estimated number of
    +  // groups is large enough to warrant a partitioned aggregation.
    +  return estimated_num_groups >
    +         static_cast<std::size_t>(
    +             FLAGS_partition_aggregation_num_groups_threshold);
    +  return false;
     }
     
    -void AggregationOperationState::finalizeAggregate(
    -    InsertDestination *output_destination) {
    -  if (group_by_list_.empty()) {
    -    finalizeSingleState(output_destination);
    +std::size_t AggregationOperationState::getNumInitializationPartitions() 
const {
    +  if (is_aggregate_collision_free_) {
    +    return static_cast<CollisionFreeVectorTable *>(
    +        collision_free_hashtable_.get())->getNumInitializationPartitions();
       } else {
    -    finalizeHashTable(output_destination);
    +    return 0u;
    --- End diff --
    
    Currently we create the `InitializeAggregationOperator` only for 
collision-free aggregations.


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---

Reply via email to