Github user jianqiao commented on a diff in the pull request:

    https://github.com/apache/incubator-quickstep/pull/179#discussion_r99882633
  
    --- Diff: query_optimizer/ExecutionGenerator.cpp ---
    @@ -371,6 +378,109 @@ void ExecutionGenerator::dropAllTemporaryRelations() {
       }
     }
     
    +bool ExecutionGenerator::canUseCollisionFreeAggregation(
    +    const P::AggregatePtr &aggregate,
    +    const std::size_t estimated_num_groups,
    +    std::size_t *max_num_groups) const {
    +#ifdef QUICKSTEP_DISTRIBUTED
    +  // Currently we cannot do this fast path with the distributed setting. 
See
    +  // the TODOs at InitializeAggregationOperator::getAllWorkOrderProtos() 
and
    +  // FinalizeAggregationOperator::getAllWorkOrderProtos().
    +  return false;
    +#endif
    +
    +  // Supports only single group-by key.
    +  if (aggregate->grouping_expressions().size() != 1) {
    +    return false;
    +  }
    +
    +  // We need to know the exact min/max stats of the group-by key.
    +  // So it must be a CatalogAttribute (but not an expression).
    +  E::AttributeReferencePtr group_by_key_attr;
    +  const E::ExpressionPtr agg_expr = 
aggregate->grouping_expressions().front();
    +  if (!E::SomeAttributeReference::MatchesWithConditionalCast(agg_expr, 
&group_by_key_attr)) {
    +    return false;
    +  }
    +
    +  bool min_value_stat_is_exact;
    +  bool max_value_stat_is_exact;
    +  const TypedValue min_value =
    +      cost_model_for_aggregation_->findMinValueStat(
    +          aggregate, group_by_key_attr, &min_value_stat_is_exact);
    +  const TypedValue max_value =
    +      cost_model_for_aggregation_->findMaxValueStat(
    +          aggregate, group_by_key_attr, &max_value_stat_is_exact);
    +  if (min_value.isNull() || max_value.isNull() ||
    +      (!min_value_stat_is_exact) || (!max_value_stat_is_exact)) {
    +    return false;
    +  }
    +
    +  std::int64_t min_cpp_value;
    +  std::int64_t max_cpp_value;
    +  switch (group_by_key_attr->getValueType().getTypeID()) {
    +    case TypeID::kInt: {
    +      min_cpp_value = min_value.getLiteral<int>();
    +      max_cpp_value = max_value.getLiteral<int>();
    +      break;
    +    }
    +    case TypeID::kLong: {
    +      min_cpp_value = min_value.getLiteral<std::int64_t>();
    +      max_cpp_value = max_value.getLiteral<std::int64_t>();
    +      break;
    +    }
    +    default:
    +      return false;
    +  }
    +
    +  // TODO(jianqiao):
    +  // 1. Handle the case where min_cpp_value is below 0 or far greater than 
0.
    +  // 2. Reason about the table size bound (e.g. by checking memory size) 
instead
    +  //    of hardcoding it as a gflag.
    +  if (min_cpp_value < 0 ||
    +      max_cpp_value >= FLAGS_collision_free_vector_table_max_size ||
    +      max_cpp_value / static_cast<double>(estimated_num_groups) > 256.0) {
    +    return false;
    +  }
    +
    +  for (const auto &agg_expr : aggregate->aggregate_expressions()) {
    +    const E::AggregateFunctionPtr agg_func =
    +        std::static_pointer_cast<const 
E::AggregateFunction>(agg_expr->expression());
    +
    +    if (agg_func->is_distinct()) {
    +      return false;
    +    }
    +
    +    // TODO(jianqiao): Support AggregationID::AVG.
    +    switch (agg_func->getAggregate().getAggregationID()) {
    +      case AggregationID::kCount:  // Fall through
    +      case AggregationID::kSum:
    +        break;
    +      default:
    +        return false;
    +    }
    +
    +    const auto &arguments = agg_func->getArguments();
    +    if (arguments.size() > 1) {
    +      return false;
    +    }
    +
    +    if (arguments.size() == 1) {
    +      switch (arguments.front()->getValueType().getTypeID()) {
    --- End diff --
    
    Updated.


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---

Reply via email to