pitrou commented on code in PR #34651:
URL: https://github.com/apache/arrow/pull/34651#discussion_r1195279530


##########
cpp/src/arrow/engine/substrait/relation_internal.cc:
##########
@@ -714,6 +731,96 @@ Result<DeclarationInfo> FromProto(const substrait::Rel& 
rel, const ExtensionSet&
 
       return ProcessEmit(join, join_declaration, join_schema);
     }
+    case substrait::Rel::RelTypeCase::kFetch: {
+      const auto& fetch = rel.fetch();
+      RETURN_NOT_OK(CheckRelCommon(fetch, conversion_options));
+
+      if (!fetch.has_input()) {
+        return Status::Invalid("substrait::FetchRel with no input relation");
+      }
+
+      ARROW_ASSIGN_OR_RAISE(auto input,
+                            FromProto(fetch.input(), ext_set, 
conversion_options));
+
+      int64_t offset = fetch.offset();
+      int64_t count = fetch.count();
+
+      acero::Declaration fetch_dec{
+          "fetch", {input.declaration}, acero::FetchNodeOptions(offset, 
count)};
+
+      DeclarationInfo fetch_declaration{std::move(fetch_dec), 
input.output_schema};
+      return ProcessEmit(fetch, std::move(fetch_declaration),
+                         fetch_declaration.output_schema);
+    }
+    case substrait::Rel::RelTypeCase::kSort: {
+      const auto& sort = rel.sort();
+      RETURN_NOT_OK(CheckRelCommon(sort, conversion_options));
+
+      if (!sort.has_input()) {
+        return Status::Invalid("substrait::SortRel with no input relation");
+      }
+
+      ARROW_ASSIGN_OR_RAISE(auto input,
+                            FromProto(sort.input(), ext_set, 
conversion_options));
+
+      if (sort.sorts_size() == 0) {
+        return Status::Invalid("substrait::SortRel with no sorts");
+      }
+
+      std::vector<compute::SortKey> sort_keys;
+      compute::NullPlacement null_placement;
+      bool first = true;
+      for (const auto& sort : sort.sorts()) {
+        if (sort.direction() == substrait::SortField::SortDirection::
+                                    
SortField_SortDirection_SORT_DIRECTION_UNSPECIFIED) {
+          return Status::Invalid(
+              "substrait::SortRel with sort that had unspecified direction");
+        }
+        if (sort.direction() == substrait::SortField::SortDirection::
+                                    
SortField_SortDirection_SORT_DIRECTION_CLUSTERED) {
+          return Status::NotImplemented(
+              "substrait::SortRel with sort with clustered sort direction");
+        }
+        // Substrait allows null placement to differ for each field.  Acero 
expects it to
+        // be consistent across all fields.  So we grab the null placement 
from the first
+        // key and verify all other keys have the same null placement
+        if (first) {
+          null_placement = IsSortNullsFirst(sort.direction())
+                               ? compute::NullPlacement::AtStart
+                               : compute::NullPlacement::AtEnd;
+          first = false;
+        } else {
+          if ((null_placement == compute::NullPlacement::AtStart &&
+               !IsSortNullsFirst(sort.direction())) ||
+              (null_placement == compute::NullPlacement::AtEnd &&
+               IsSortNullsFirst(sort.direction()))) {

Review Comment:
   You can change `IsSortNullsFirst` to return a `compute::NullPlacement` 
directly and this will make these lines a bit simpler (just compare the old 
null placement with the new one).



##########
cpp/src/arrow/engine/substrait/relation_internal.cc:
##########
@@ -757,6 +774,95 @@ Result<DeclarationInfo> FromProto(const substrait::Rel& 
rel, const ExtensionSet&
       return ProcessEmit(std::move(join), std::move(join_declaration),
                          std::move(join_schema));
     }
+    case substrait::Rel::RelTypeCase::kFetch: {
+      const auto& fetch = rel.fetch();
+      RETURN_NOT_OK(CheckRelCommon(fetch, conversion_options));
+
+      if (!fetch.has_input()) {
+        return Status::Invalid("substrait::FetchRel with no input relation");
+      }
+
+      ARROW_ASSIGN_OR_RAISE(auto input,
+                            FromProto(fetch.input(), ext_set, 
conversion_options));
+
+      int64_t offset = fetch.offset();
+      int64_t count = fetch.count();
+
+      acero::Declaration fetch_dec{
+          "fetch", {input.declaration}, acero::FetchNodeOptions(offset, 
count)};
+
+      DeclarationInfo fetch_declaration{std::move(fetch_dec), 
input.output_schema};
+      return ProcessEmit(fetch, std::move(fetch_declaration),
+                         fetch_declaration.output_schema);
+    }
+    case substrait::Rel::RelTypeCase::kSort: {
+      const auto& sort = rel.sort();
+      RETURN_NOT_OK(CheckRelCommon(sort, conversion_options));
+
+      if (!sort.has_input()) {
+        return Status::Invalid("substrait::SortRel with no input relation");
+      }
+
+      ARROW_ASSIGN_OR_RAISE(auto input,
+                            FromProto(sort.input(), ext_set, 
conversion_options));
+
+      if (sort.sorts_size() == 0) {
+        return Status::Invalid("substrait::SortRel with no sorts");
+      }
+
+      std::vector<compute::SortKey> sort_keys;

Review Comment:
   It wouldn't hurt certainly!



##########
cpp/src/arrow/engine/substrait/relation_internal.cc:
##########
@@ -329,6 +329,23 @@ ARROW_ENGINE_EXPORT Result<DeclarationInfo> 
MakeAggregateDeclaration(
 
 }  // namespace internal
 
+namespace {
+
+bool IsSortNullsFirst(const substrait::SortField::SortDirection& direction) {
+  return direction % 2 == 1;
+}
+
+compute::SortOrder SortOrderFromDirection(
+    const substrait::SortField::SortDirection& direction) {
+  if (direction < 3) {

Review Comment:
   Same comment here.



##########
cpp/src/arrow/engine/substrait/relation_internal.cc:
##########
@@ -329,6 +329,23 @@ ARROW_ENGINE_EXPORT Result<DeclarationInfo> 
MakeAggregateDeclaration(
 
 }  // namespace internal
 
+namespace {
+
+bool IsSortNullsFirst(const substrait::SortField::SortDirection& direction) {
+  return direction % 2 == 1;

Review Comment:
   Hmm, rather than arithmetic on enum values, I'd rather see a proper 
`switch/case` statement to make code more readable and maintainable. This is 
not so performance-sensitive that it must be optimized to the latest nanosecond.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to