icexelloss commented on code in PR #34651:
URL: https://github.com/apache/arrow/pull/34651#discussion_r1162872803
##########
cpp/src/arrow/engine/substrait/relation_internal.cc:
##########
@@ -757,6 +774,95 @@ Result<DeclarationInfo> FromProto(const substrait::Rel&
rel, const ExtensionSet&
return ProcessEmit(std::move(join), std::move(join_declaration),
std::move(join_schema));
}
+ case substrait::Rel::RelTypeCase::kFetch: {
+ const auto& fetch = rel.fetch();
+ RETURN_NOT_OK(CheckRelCommon(fetch, conversion_options));
+
+ if (!fetch.has_input()) {
+ return Status::Invalid("substrait::FetchRel with no input relation");
+ }
+
+ ARROW_ASSIGN_OR_RAISE(auto input,
+ FromProto(fetch.input(), ext_set,
conversion_options));
+
+ int64_t offset = fetch.offset();
+ int64_t count = fetch.count();
+
+ acero::Declaration fetch_dec{
+ "fetch", {input.declaration}, acero::FetchNodeOptions(offset,
count)};
+
+ DeclarationInfo fetch_declaration{std::move(fetch_dec),
input.output_schema};
+ return ProcessEmit(fetch, std::move(fetch_declaration),
+ fetch_declaration.output_schema);
+ }
+ case substrait::Rel::RelTypeCase::kSort: {
+ const auto& sort = rel.sort();
+ RETURN_NOT_OK(CheckRelCommon(sort, conversion_options));
+
+ if (!sort.has_input()) {
+ return Status::Invalid("substrait::SortRel with no input relation");
+ }
+
+ ARROW_ASSIGN_OR_RAISE(auto input,
+ FromProto(sort.input(), ext_set,
conversion_options));
+
+ if (sort.sorts_size() == 0) {
+ return Status::Invalid("substrait::SortRel with no sorts");
+ }
+
+ std::vector<compute::SortKey> sort_keys;
+ compute::NullPlacement null_placement;
+ bool first = true;
+ for (const auto& sort : sort.sorts()) {
+ if (sort.direction() == substrait::SortField::SortDirection::
+
SortField_SortDirection_SORT_DIRECTION_UNSPECIFIED) {
+ return Status::Invalid(
+ "substrait::SortRel with sort that had unspecified direction");
+ }
+ if (sort.direction() == substrait::SortField::SortDirection::
+
SortField_SortDirection_SORT_DIRECTION_CLUSTERED) {
+ return Status::NotImplemented(
+ "substrait::SortRel with sort with clustered sort direction");
+ }
+ // Substrait allows null placement to differ for each field. Acero
expects it to
+ // be consistent across all fields. So we grab the null placement
from the first
+ // key and verify all other keys have the same null placement
+ if (first) {
+ null_placement = IsSortNullsFirst(sort.direction())
+ ? compute::NullPlacement::AtStart
+ : compute::NullPlacement::AtEnd;
+ } else {
+ if ((null_placement == compute::NullPlacement::AtStart &&
+ !IsSortNullsFirst(sort.direction())) ||
+ (null_placement == compute::NullPlacement::AtEnd &&
+ IsSortNullsFirst(sort.direction()))) {
+ return Status::NotImplemented(
+ "substrait::SortRel with ordering with mixed null placement");
+ }
+ }
+ if (sort.sort_kind_case() !=
substrait::SortField::SortKindCase::kDirection) {
+ return Status::NotImplemented("substrait::SortRel with custom sort
function");
Review Comment:
This is more of a question: Do we usually write test cases for these
"unsupported case"? I know these can be quite annoying to test (since no easy
way to construct the substrait input) so just curious about the common
practice.
##########
cpp/src/arrow/engine/substrait/relation_internal.cc:
##########
@@ -757,6 +774,95 @@ Result<DeclarationInfo> FromProto(const substrait::Rel&
rel, const ExtensionSet&
return ProcessEmit(std::move(join), std::move(join_declaration),
std::move(join_schema));
}
+ case substrait::Rel::RelTypeCase::kFetch: {
+ const auto& fetch = rel.fetch();
+ RETURN_NOT_OK(CheckRelCommon(fetch, conversion_options));
+
+ if (!fetch.has_input()) {
+ return Status::Invalid("substrait::FetchRel with no input relation");
+ }
+
+ ARROW_ASSIGN_OR_RAISE(auto input,
+ FromProto(fetch.input(), ext_set,
conversion_options));
+
+ int64_t offset = fetch.offset();
+ int64_t count = fetch.count();
+
+ acero::Declaration fetch_dec{
+ "fetch", {input.declaration}, acero::FetchNodeOptions(offset,
count)};
+
+ DeclarationInfo fetch_declaration{std::move(fetch_dec),
input.output_schema};
+ return ProcessEmit(fetch, std::move(fetch_declaration),
+ fetch_declaration.output_schema);
+ }
+ case substrait::Rel::RelTypeCase::kSort: {
+ const auto& sort = rel.sort();
+ RETURN_NOT_OK(CheckRelCommon(sort, conversion_options));
+
+ if (!sort.has_input()) {
+ return Status::Invalid("substrait::SortRel with no input relation");
+ }
+
+ ARROW_ASSIGN_OR_RAISE(auto input,
+ FromProto(sort.input(), ext_set,
conversion_options));
+
+ if (sort.sorts_size() == 0) {
+ return Status::Invalid("substrait::SortRel with no sorts");
+ }
+
+ std::vector<compute::SortKey> sort_keys;
+ compute::NullPlacement null_placement;
+ bool first = true;
+ for (const auto& sort : sort.sorts()) {
+ if (sort.direction() == substrait::SortField::SortDirection::
+
SortField_SortDirection_SORT_DIRECTION_UNSPECIFIED) {
+ return Status::Invalid(
+ "substrait::SortRel with sort that had unspecified direction");
+ }
+ if (sort.direction() == substrait::SortField::SortDirection::
+
SortField_SortDirection_SORT_DIRECTION_CLUSTERED) {
+ return Status::NotImplemented(
+ "substrait::SortRel with sort with clustered sort direction");
+ }
+ // Substrait allows null placement to differ for each field. Acero
expects it to
+ // be consistent across all fields. So we grab the null placement
from the first
+ // key and verify all other keys have the same null placement
+ if (first) {
+ null_placement = IsSortNullsFirst(sort.direction())
+ ? compute::NullPlacement::AtStart
+ : compute::NullPlacement::AtEnd;
+ } else {
+ if ((null_placement == compute::NullPlacement::AtStart &&
+ !IsSortNullsFirst(sort.direction())) ||
+ (null_placement == compute::NullPlacement::AtEnd &&
+ IsSortNullsFirst(sort.direction()))) {
+ return Status::NotImplemented(
+ "substrait::SortRel with ordering with mixed null placement");
+ }
+ }
+ if (sort.sort_kind_case() !=
substrait::SortField::SortKindCase::kDirection) {
+ return Status::NotImplemented("substrait::SortRel with custom sort
function");
Review Comment:
This is more of a question: Do we usually write test cases for these
"unsupported case"? I know these can be quite annoying to test (since no easy
way to construct the substrait input) so just curious about the current
practice.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]