icexelloss commented on code in PR #34627:
URL: https://github.com/apache/arrow/pull/34627#discussion_r1142665484
##########
cpp/src/arrow/engine/substrait/options.cc:
##########
@@ -166,6 +171,57 @@ class DefaultExtensionProvider : public
BaseExtensionProvider {
named_tap_rel.name(),
std::move(renamed_schema)));
return RelationInfo{{std::move(decl), std::move(renamed_schema)},
std::nullopt};
}
+
+ Result<RelationInfo> MakeSegmentedAggregateRel(
+ const ConversionOptions& conv_opts, const std::vector<DeclarationInfo>&
inputs,
+ const substrait_ext::SegmentedAggregateRel& seg_agg_rel,
+ const ExtensionSet& ext_set) {
+ if (inputs.size() != 1) {
+ return Status::Invalid(
+ "substrait_ext::SegmentedAggregateRel requires a single input but
got: ",
+ inputs.size());
+ }
+
+ auto input_schema = inputs[0].output_schema;
+
+ ConversionOptions conversion_options;
+
+ // store segment key fields to be used when output schema is created
+ std::vector<int> segment_key_field_ids;
+ std::vector<FieldRef> segment_keys;
+ if (seg_agg_rel.segment_groupings_size() > 0) {
+ ARROW_RETURN_NOT_OK(internal::ParseAggregateGrouping(
+ seg_agg_rel.segment_groupings(0), ext_set, conversion_options,
input_schema,
+ &segment_key_field_ids, &segment_keys));
+ }
+
+ const auto& aggregate = seg_agg_rel.aggregate();
+ ARROW_ASSIGN_OR_RAISE(
+ auto decl_info,
+ internal::ParseAggregateDeclaration(
+ aggregate, &inputs[0], ext_set, conversion_options,
+ [&](FieldVector* output_fields_ptr) {
+ FieldVector& output_fields = *output_fields_ptr;
+ // extract segment key fields to output schema
+ for (int segment_key_field_id : segment_key_field_ids) {
+
output_fields.emplace_back(input_schema->field(segment_key_field_id));
+ }
+ return Status::OK();
+ },
+ [&](compute::AggregateNodeOptions* options_ptr) {
+ compute::AggregateNodeOptions& options = *options_ptr;
+ options.segment_keys = segment_keys;
+ return Status::OK();
+ }));
+
+ const auto& output_schema = decl_info.output_schema;
+ size_t out_size = output_schema->num_fields();
+ std::vector<int> field_output_indices(out_size);
+ for (int i = 0; i < static_cast<int>(out_size); i++) {
+ field_output_indices[i] = i;
+ }
+ return RelationInfo{decl_info, std::move(field_output_indices)};
Review Comment:
can we use a null/nullopt field_output_indices? This is no-op right?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]