westonpace commented on code in PR #34651:
URL: https://github.com/apache/arrow/pull/34651#discussion_r1167300235
##########
cpp/src/arrow/engine/substrait/relation_internal.cc:
##########
@@ -757,6 +774,95 @@ Result<DeclarationInfo> FromProto(const substrait::Rel&
rel, const ExtensionSet&
return ProcessEmit(std::move(join), std::move(join_declaration),
std::move(join_schema));
}
+ case substrait::Rel::RelTypeCase::kFetch: {
+ const auto& fetch = rel.fetch();
+ RETURN_NOT_OK(CheckRelCommon(fetch, conversion_options));
+
+ if (!fetch.has_input()) {
+ return Status::Invalid("substrait::FetchRel with no input relation");
+ }
+
+ ARROW_ASSIGN_OR_RAISE(auto input,
+ FromProto(fetch.input(), ext_set,
conversion_options));
+
+ int64_t offset = fetch.offset();
+ int64_t count = fetch.count();
+
+ acero::Declaration fetch_dec{
+ "fetch", {input.declaration}, acero::FetchNodeOptions(offset,
count)};
+
+ DeclarationInfo fetch_declaration{std::move(fetch_dec),
input.output_schema};
+ return ProcessEmit(fetch, std::move(fetch_declaration),
+ fetch_declaration.output_schema);
+ }
+ case substrait::Rel::RelTypeCase::kSort: {
+ const auto& sort = rel.sort();
+ RETURN_NOT_OK(CheckRelCommon(sort, conversion_options));
+
+ if (!sort.has_input()) {
+ return Status::Invalid("substrait::SortRel with no input relation");
+ }
+
+ ARROW_ASSIGN_OR_RAISE(auto input,
+ FromProto(sort.input(), ext_set,
conversion_options));
+
+ if (sort.sorts_size() == 0) {
+ return Status::Invalid("substrait::SortRel with no sorts");
+ }
+
+ std::vector<compute::SortKey> sort_keys;
+ compute::NullPlacement null_placement;
+ bool first = true;
+ for (const auto& sort : sort.sorts()) {
+ if (sort.direction() == substrait::SortField::SortDirection::
+
SortField_SortDirection_SORT_DIRECTION_UNSPECIFIED) {
+ return Status::Invalid(
+ "substrait::SortRel with sort that had unspecified direction");
+ }
+ if (sort.direction() == substrait::SortField::SortDirection::
+
SortField_SortDirection_SORT_DIRECTION_CLUSTERED) {
+ return Status::NotImplemented(
+ "substrait::SortRel with sort with clustered sort direction");
+ }
+ // Substrait allows null placement to differ for each field. Acero
expects it to
+ // be consistent across all fields. So we grab the null placement
from the first
+ // key and verify all other keys have the same null placement
+ if (first) {
+ null_placement = IsSortNullsFirst(sort.direction())
+ ? compute::NullPlacement::AtStart
+ : compute::NullPlacement::AtEnd;
+ } else {
+ if ((null_placement == compute::NullPlacement::AtStart &&
+ !IsSortNullsFirst(sort.direction())) ||
+ (null_placement == compute::NullPlacement::AtEnd &&
+ IsSortNullsFirst(sort.direction()))) {
+ return Status::NotImplemented(
+ "substrait::SortRel with ordering with mixed null placement");
+ }
+ }
+ if (sort.sort_kind_case() !=
substrait::SortField::SortKindCase::kDirection) {
+ return Status::NotImplemented("substrait::SortRel with custom sort
function");
Review Comment:
At the moment we are constructing substrait "by hand" so it's not too hard
to come up with the test but it is a maintenance and readability burden. I
don't think we've been super consistent. I skipped this one just because I
think it'll be a while until we have producers that come up with this kind of
plan. Right now I'm trying to balance "tons of hard coded JSON plans" against
"full coverage of possible plans". Once the text format is ready enough that
we can switch to it I think that will help some.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]