pitrou commented on code in PR #46926:
URL: https://github.com/apache/arrow/pull/46926#discussion_r2759682331


##########
cpp/src/arrow/compute/kernels/vector_sort_test.cc:
##########
@@ -2065,7 +2130,9 @@ TEST_P(TestTableSortIndicesRandom, Sort) {
     auto table = Table::Make(schema, std::move(columns));
     for (auto null_placement : AllNullPlacements()) {
       ARROW_SCOPED_TRACE("null_placement = ", null_placement);
-      options.null_placement = null_placement;
+      for (auto& sort_key : sort_keys) {
+        sort_key.null_placement = null_placement;

Review Comment:
   Can we generate per-column random null placements instead, just like we did 
for sort orders above? (see `generate_order`, but can use another modulo base 
to decorrelate the two)



##########
python/pyarrow/tests/test_acero.py:
##########
@@ -267,19 +267,19 @@ def test_order_by():
     table = pa.table({'a': [1, 2, 3, 4], 'b': [1, 3, None, 2]})
     table_source = Declaration("table_source", TableSourceNodeOptions(table))
 
-    ord_opts = OrderByNodeOptions([("b", "ascending")])
+    ord_opts = OrderByNodeOptions([("b", "ascending", "at_end")])

Review Comment:
   Can the third tuple element be optional? In most cases people do not need to 
override the default.



##########
cpp/src/arrow/compute/ordering.h:
##########
@@ -111,8 +116,11 @@ class ARROW_EXPORT Ordering : public 
util::EqualityComparable<Ordering> {
       : null_placement_(NullPlacement::AtStart), is_implicit_(is_implicit) {}
   /// Column key(s) to order by and how to order by these sort keys.
   std::vector<SortKey> sort_keys_;
+
+  // DEPRECATED(set null_placement in  instead)

Review Comment:
   There seems to be a missing word here.



##########
python/pyarrow/_acero.pyx:
##########
@@ -254,18 +261,19 @@ class OrderByNodeOptions(_OrderByNodeOptions):
 
     Parameters
     ----------
-    sort_keys : sequence of (name, order) tuples
+    sort_keys : sequence of (name, order, null_placement="at_end") tuples
         Names of field/column keys to sort the input on,
         along with the order each field/column is sorted in.
-        Accepted values for `order` are "ascending", "descending".
         Each field reference can be a string column name or expression.
-    null_placement : str, default "at_end"
+        Accepted values for `order` are "ascending", "descending".
+        Accepted values for `null_placement` are "at_start", "at_end".
+    null_placement : str, optional
         Where nulls in input should be sorted, only applying to
         columns/fields mentioned in `sort_keys`.
-        Accepted values are "at_start", "at_end".
+        Accepted values are "at_start", "at_end",
     """
 
-    def __init__(self, sort_keys=(), *, null_placement="at_end"):
+    def __init__(self, sort_keys=(), *, null_placement=None):

Review Comment:
   Can we deprecate a non-None `null_placement`?



##########
cpp/src/arrow/compute/kernels/select_k_test.cc:
##########
@@ -162,9 +165,11 @@ TYPED_TEST(TestSelectKForReal, Real) {
   this->AssertSelectKJson("[null, 2, NaN, 3, 1]", 1);
   this->AssertSelectKJson("[null, 2, NaN, 3, 1]", 2);
   this->AssertSelectKJson("[null, 2, NaN, 3, 1]", 3);
-  this->AssertSelectKJson("[null, 2, NaN, 3, 1]", 4);
+  // The result will contain nan. By default, the comparison of NaN is not 
equal, so

Review Comment:
   Ok, but this test used to work, why do we suddenly need to change it? Might 
there be a bug in the SelectK implementation?



##########
cpp/src/arrow/compute/kernels/vector_select_k.cc:
##########
@@ -74,6 +74,120 @@ class SelectKComparator<SortOrder::Descending> {
   }
 };
 
+struct ExtractCounter {
+  int64_t extract_non_null_count;
+  int64_t extract_nan_count;
+  int64_t extract_null_count;
+};
+
+class HeapSorter {
+ public:
+  using HeapPusherFunction =
+      std::function<void(uint64_t*, uint64_t*, uint64_t*, uint64_t*)>;
+
+  HeapSorter(int64_t k, NullPlacement null_placement, MemoryPool* pool)
+      : k_(k), null_placement_(null_placement), pool_(pool) {}
+
+  Result<std::shared_ptr<ArrayData>> HeapSort(HeapPusherFunction heap_pusher,
+                                              NullPartitionResult p,
+                                              NullPartitionResult q) {
+    ExtractCounter counter = ComputeExtractCounter(p, q);
+    return HeapSortInternal(counter, heap_pusher, p, q);
+  }
+
+  ExtractCounter ComputeExtractCounter(NullPartitionResult p, 
NullPartitionResult q) {
+    int64_t extract_non_null_count = 0;
+    int64_t extract_nan_count = 0;
+    int64_t extract_null_count = 0;
+    int64_t non_null_count = q.non_null_count();
+    int64_t nan_count = q.null_count();
+    int64_t null_count = p.null_count();
+    // non-null nan null
+    if (null_placement_ == NullPlacement::AtEnd) {
+      extract_non_null_count = non_null_count <= k_ ? non_null_count : k_;
+      extract_nan_count = extract_non_null_count >= k_
+                              ? 0
+                              : std::min(nan_count, k_ - 
extract_non_null_count);
+      extract_null_count = extract_non_null_count + extract_nan_count >= k_
+                               ? 0
+                               : (k_ - (extract_non_null_count + 
extract_nan_count));
+    } else {  // null nan non-null
+      extract_null_count = null_count <= k_ ? null_count : k_;
+      extract_nan_count =
+          extract_null_count >= k_ ? 0 : std::min(nan_count, k_ - 
extract_null_count);
+      extract_non_null_count = extract_null_count + extract_nan_count >= k_
+                                   ? 0
+                                   : (k_ - (extract_null_count + 
extract_nan_count));
+    }
+    return {extract_non_null_count, extract_nan_count, extract_null_count};
+  }
+
+  Result<std::shared_ptr<ArrayData>> HeapSortInternal(ExtractCounter counter,
+                                                      HeapPusherFunction 
heap_pusher,
+                                                      NullPartitionResult p,
+                                                      NullPartitionResult q) {
+    int64_t out_size = counter.extract_non_null_count + 
counter.extract_nan_count +
+                       counter.extract_null_count;
+    ARROW_ASSIGN_OR_RAISE(auto take_indices, MakeMutableUInt64Array(out_size, 
pool_));
+    // [extrat_count....extract_nan_count...extract_null_count]
+    if (null_placement_ == NullPlacement::AtEnd) {
+      if (counter.extract_non_null_count) {
+        auto* out_cbegin = take_indices->template 
GetMutableValues<uint64_t>(1) +
+                           counter.extract_non_null_count - 1;

Review Comment:
   I might misunderstand this line, but does this mean that `out_cbegin` really 
is the end of the indices range to push to?
   
   We should either fix the variable name, or arrange for the heap pusher to 
actually take the beginning of the range.



##########
python/pyarrow/_compute.pyx:
##########
@@ -2256,18 +2266,19 @@ class SortOptions(_SortOptions):
 
     Parameters
     ----------
-    sort_keys : sequence of (name, order) tuples
+    sort_keys : sequence of (name, order, null_placement) tuples
         Names of field/column keys to sort the input on,
         along with the order each field/column is sorted in.
         Accepted values for `order` are "ascending", "descending".
+        Accepted values for `null_placement` are "at_start", "at_end".
         The field name can be a string column name or expression.
-    null_placement : str, default "at_end"
-        Where nulls in input should be sorted, only applying to
-        columns/fields mentioned in `sort_keys`.
+    null_placement : str | None, default None
+        Where nulls in input should be sorted, overwrites
+         `null_placement` in `sort_keys`.
         Accepted values are "at_start", "at_end".
     """
 
-    def __init__(self, sort_keys=(), *, null_placement="at_end"):
+    def __init__(self, sort_keys=(), *, null_placement=None):

Review Comment:
   Same here: can we deprecate null_placement?



##########
python/pyarrow/table.pxi:
##########
@@ -2115,11 +2118,13 @@ cdef class _Tabular(_PandasConvertible):
 
         Parameters
         ----------
-        sorting : str or list[tuple(name, order)]
+        sorting : str or list[tuple(name, order, null_placement)]

Review Comment:
   Should it be
   ```suggestion
           sorting : str or list[tuple(name, order, null_placement="at_end")]
   ```



##########
cpp/src/arrow/compute/kernels/select_k_test.cc:
##########
@@ -234,6 +239,78 @@ TYPED_TEST(TestSelectKRandom, RandomValues) {
   }
 }
 
+class TestSelectKWithArray : public ::testing::Test {
+ public:
+  void Check(const std::shared_ptr<DataType>& type, const std::string& 
array_json,
+             const SelectKOptions& options, const std::string& expected_array) 
{
+    std::shared_ptr<Array> actual;
+    ASSERT_OK(this->DoSelectK(type, array_json, options, &actual));
+    ASSERT_ARRAYS_EQUAL(*ArrayFromJSON(type, expected_array), *actual);
+  }
+
+  void CheckIndices(const std::shared_ptr<DataType>& type, const std::string& 
array_json,
+                    const SelectKOptions& options, const std::string& 
expected_json) {
+    auto array = ArrayFromJSON(type, array_json);
+    auto expected = ArrayFromJSON(uint64(), expected_json);
+    auto indices = SelectKUnstable(Datum(*array), options);
+    ASSERT_OK(indices);
+    auto actual = indices.MoveValueUnsafe();

Review Comment:
   Can use `ASSERT_OK_AND_ASSIGN(auto indices, ...)`



##########
cpp/src/arrow/compute/kernels/select_k_test.cc:
##########
@@ -283,6 +389,59 @@ TYPED_TEST(TestSelectKWithChunkedArray, 
RandomValuesWithSlices) {
   }
 }
 
+TYPED_TEST(TestSelectKWithChunkedArray, PartialSelectKNull) {
+  auto chunked_array = ChunkedArrayFromJSON(uint8(), {
+                                                         "[null, 1]",
+                                                         "[3, null, 2]",
+                                                         "[1]",
+                                                     });
+  std::vector<SortKey> sort_keys{SortKey("a", SortOrder::Ascending)};
+  auto options = SelectKOptions(3, sort_keys);
+  auto expected = ChunkedArrayFromJSON(uint8(), {"[1, 1, 2]"});
+  this->Check(chunked_array, options, expected);
+  options.sort_keys[0].null_placement = NullPlacement::AtStart;
+  expected = ChunkedArrayFromJSON(uint8(), {"[null, null, 1]"});
+  this->Check(chunked_array, options, expected);
+}
+
+TYPED_TEST(TestSelectKWithChunkedArray, FullSelectKNull) {
+  auto chunked_array = ChunkedArrayFromJSON(uint8(), {
+                                                         "[null, 1]",
+                                                         "[3, null, 2]",
+                                                         "[1]",
+                                                     });
+  std::vector<SortKey> sort_keys{SortKey("a", SortOrder::Ascending)};
+  auto options = SelectKOptions(10, sort_keys);
+  options.sort_keys[0].null_placement = NullPlacement::AtStart;
+  auto expected = ChunkedArrayFromJSON(uint8(), {"[null, null, 1, 1, 2, 3]"});
+  this->Check(chunked_array, options, expected);
+  options.sort_keys[0].null_placement = NullPlacement::AtEnd;
+  expected = ChunkedArrayFromJSON(uint8(), {"[1, 1, 2, 3, null, null]"});
+  this->Check(chunked_array, options, expected);
+}
+
+TYPED_TEST(TestSelectKWithChunkedArray, PartialSelectKNullNaN) {
+  auto chunked_array = ChunkedArrayFromJSON(
+      float64(), {"[null, 1]", "[3, null, NaN]", "[10, NaN, 2]", "[1]"});
+  std::vector<SortKey> sort_keys{SortKey("a", SortOrder::Descending)};
+  auto options = SelectKOptions(3, sort_keys);
+  options.sort_keys[0].null_placement = NullPlacement::AtStart;
+  this->CheckIndices(chunked_array, options, "[3, 0, 4]");

Review Comment:
   By using `CheckIndices` we're really testing details of the underlying 
implementation. `[0, 3, 4]`, `[0, 3, 6]` would be valid results as well, given 
the sort is unstable.



##########
cpp/src/arrow/compute/kernels/vector_select_k.cc:
##########
@@ -74,6 +74,120 @@ class SelectKComparator<SortOrder::Descending> {
   }
 };
 
+struct ExtractCounter {
+  int64_t extract_non_null_count;
+  int64_t extract_nan_count;
+  int64_t extract_null_count;
+};
+
+class HeapSorter {
+ public:
+  using HeapPusherFunction =
+      std::function<void(uint64_t*, uint64_t*, uint64_t*, uint64_t*)>;
+
+  HeapSorter(int64_t k, NullPlacement null_placement, MemoryPool* pool)
+      : k_(k), null_placement_(null_placement), pool_(pool) {}
+
+  Result<std::shared_ptr<ArrayData>> HeapSort(HeapPusherFunction heap_pusher,
+                                              NullPartitionResult p,
+                                              NullPartitionResult q) {
+    ExtractCounter counter = ComputeExtractCounter(p, q);
+    return HeapSortInternal(counter, heap_pusher, p, q);
+  }
+
+  ExtractCounter ComputeExtractCounter(NullPartitionResult p, 
NullPartitionResult q) {
+    int64_t extract_non_null_count = 0;
+    int64_t extract_nan_count = 0;
+    int64_t extract_null_count = 0;
+    int64_t non_null_count = q.non_null_count();
+    int64_t nan_count = q.null_count();
+    int64_t null_count = p.null_count();
+    // non-null nan null
+    if (null_placement_ == NullPlacement::AtEnd) {
+      extract_non_null_count = non_null_count <= k_ ? non_null_count : k_;

Review Comment:
   Can you use `std::min` for better readability?



##########
cpp/src/arrow/compute/kernels/vector_select_k.cc:
##########
@@ -74,6 +74,120 @@ class SelectKComparator<SortOrder::Descending> {
   }
 };
 
+struct ExtractCounter {

Review Comment:
   Can you add comments around these internal additions? It will make reviewing 
and maintaining them easier.



##########
cpp/src/arrow/compute/api_vector.h:
##########
@@ -119,8 +119,24 @@ class ARROW_EXPORT SortOptions : public FunctionOptions {
 
   /// Column key(s) to order by and how to order by these sort keys.
   std::vector<SortKey> sort_keys;
+
+  // DEPRECATED(will be removed after null_placement has been removed)
+  /// Get sort_keys with overwritten null_placement
+  std::vector<SortKey> GetSortKeys() const {
+    if (!null_placement.has_value()) {
+      return sort_keys;
+    }
+    auto overwritten_sort_keys = sort_keys;
+    for (auto& sort_key : overwritten_sort_keys) {
+      sort_key.null_placement = null_placement.value();
+    }
+    return overwritten_sort_keys;
+  }
+
+  // DEPRECATED(set null_placement in sort_keys instead)

Review Comment:
   Can we use the `ARROW_DEPRECATED` macro so that people get warnings at 
compilation?



##########
cpp/src/arrow/compute/ordering.h:
##########
@@ -56,12 +57,14 @@ class ARROW_EXPORT SortKey : public 
util::EqualityComparable<SortKey> {
   FieldRef target;
   /// How to order by this sort key.
   SortOrder order;
+  /// Null placement for this sort key.
+  NullPlacement null_placement;
 };
 
 class ARROW_EXPORT Ordering : public util::EqualityComparable<Ordering> {
  public:
   Ordering(std::vector<SortKey> sort_keys,
-           NullPlacement null_placement = NullPlacement::AtStart)
+           std::optional<NullPlacement> null_placement = std::nullopt)

Review Comment:
   Can we deprecate the two-argument constructor and have a separate 
one-argument constructor?



##########
cpp/src/arrow/compute/kernels/vector_select_k.cc:
##########
@@ -74,6 +74,120 @@ class SelectKComparator<SortOrder::Descending> {
   }
 };
 
+struct ExtractCounter {
+  int64_t extract_non_null_count;
+  int64_t extract_nan_count;
+  int64_t extract_null_count;
+};
+
+class HeapSorter {
+ public:
+  using HeapPusherFunction =
+      std::function<void(uint64_t*, uint64_t*, uint64_t*, uint64_t*)>;
+
+  HeapSorter(int64_t k, NullPlacement null_placement, MemoryPool* pool)
+      : k_(k), null_placement_(null_placement), pool_(pool) {}
+
+  Result<std::shared_ptr<ArrayData>> HeapSort(HeapPusherFunction heap_pusher,
+                                              NullPartitionResult p,
+                                              NullPartitionResult q) {
+    ExtractCounter counter = ComputeExtractCounter(p, q);
+    return HeapSortInternal(counter, heap_pusher, p, q);
+  }
+
+  ExtractCounter ComputeExtractCounter(NullPartitionResult p, 
NullPartitionResult q) {
+    int64_t extract_non_null_count = 0;
+    int64_t extract_nan_count = 0;
+    int64_t extract_null_count = 0;
+    int64_t non_null_count = q.non_null_count();
+    int64_t nan_count = q.null_count();
+    int64_t null_count = p.null_count();
+    // non-null nan null
+    if (null_placement_ == NullPlacement::AtEnd) {
+      extract_non_null_count = non_null_count <= k_ ? non_null_count : k_;
+      extract_nan_count = extract_non_null_count >= k_
+                              ? 0
+                              : std::min(nan_count, k_ - 
extract_non_null_count);
+      extract_null_count = extract_non_null_count + extract_nan_count >= k_
+                               ? 0
+                               : (k_ - (extract_non_null_count + 
extract_nan_count));
+    } else {  // null nan non-null
+      extract_null_count = null_count <= k_ ? null_count : k_;
+      extract_nan_count =
+          extract_null_count >= k_ ? 0 : std::min(nan_count, k_ - 
extract_null_count);
+      extract_non_null_count = extract_null_count + extract_nan_count >= k_
+                                   ? 0
+                                   : (k_ - (extract_null_count + 
extract_nan_count));
+    }
+    return {extract_non_null_count, extract_nan_count, extract_null_count};
+  }
+
+  Result<std::shared_ptr<ArrayData>> HeapSortInternal(ExtractCounter counter,
+                                                      HeapPusherFunction 
heap_pusher,
+                                                      NullPartitionResult p,
+                                                      NullPartitionResult q) {
+    int64_t out_size = counter.extract_non_null_count + 
counter.extract_nan_count +
+                       counter.extract_null_count;
+    ARROW_ASSIGN_OR_RAISE(auto take_indices, MakeMutableUInt64Array(out_size, 
pool_));
+    // [extrat_count....extract_nan_count...extract_null_count]
+    if (null_placement_ == NullPlacement::AtEnd) {
+      if (counter.extract_non_null_count) {
+        auto* out_cbegin = take_indices->template 
GetMutableValues<uint64_t>(1) +
+                           counter.extract_non_null_count - 1;
+        auto kth_begin = std::min(q.non_nulls_begin + k_, q.non_nulls_end);
+        heap_pusher(q.non_nulls_begin, kth_begin, q.non_nulls_end, out_cbegin);
+      }
+
+      if (counter.extract_nan_count) {
+        auto* out_cbegin = take_indices->template 
GetMutableValues<uint64_t>(1) +
+                           counter.extract_non_null_count + 
counter.extract_nan_count - 1;
+        auto kth_begin =
+            std::min(q.nulls_begin + k_ - counter.extract_non_null_count, 
q.nulls_end);
+        heap_pusher(q.nulls_begin, kth_begin, q.nulls_end, out_cbegin);
+      }
+
+      if (counter.extract_null_count) {
+        auto* out_cbegin =
+            take_indices->template GetMutableValues<uint64_t>(1) + out_size - 
1;
+        auto kth_begin = std::min(p.nulls_begin + k_ - 
counter.extract_non_null_count -
+                                      counter.extract_nan_count,
+                                  p.nulls_end);
+        heap_pusher(p.nulls_begin, kth_begin, p.nulls_end, out_cbegin);
+      }
+    } else {  // [extract_null_count....extract_nan_count...extrat_count]
+      if (counter.extract_null_count) {
+        auto* out_cbegin = take_indices->template 
GetMutableValues<uint64_t>(1) +
+                           counter.extract_null_count - 1;
+        auto kth_begin = std::min(p.nulls_begin + k_, p.nulls_end);
+        heap_pusher(p.nulls_begin, kth_begin, p.nulls_end, out_cbegin);
+      }
+
+      if (counter.extract_nan_count) {
+        auto* out_cbegin = take_indices->template 
GetMutableValues<uint64_t>(1) +
+                           counter.extract_null_count + 
counter.extract_nan_count - 1;
+        auto kth_begin =
+            std::min(q.nulls_begin + k_ - counter.extract_null_count, 
q.nulls_end);
+        heap_pusher(q.nulls_begin, kth_begin, q.nulls_end, out_cbegin);
+      }
+
+      if (counter.extract_non_null_count) {
+        auto* out_cbegin =
+            take_indices->template GetMutableValues<uint64_t>(1) + out_size - 
1;
+        auto kth_begin = std::min(q.non_nulls_begin + k_ - 
counter.extract_null_count -
+                                      counter.extract_nan_count,
+                                  q.non_nulls_end);
+        heap_pusher(q.non_nulls_begin, kth_begin, q.non_nulls_end, out_cbegin);
+      }
+    }
+    return take_indices;
+  }
+
+ private:
+  int64_t k_;
+  NullPlacement null_placement_;

Review Comment:
   Let's make these two `const`?



##########
cpp/src/arrow/compute/kernels/vector_select_k.cc:
##########
@@ -74,6 +74,120 @@ class SelectKComparator<SortOrder::Descending> {
   }
 };
 
+struct ExtractCounter {
+  int64_t extract_non_null_count;
+  int64_t extract_nan_count;
+  int64_t extract_null_count;
+};
+
+class HeapSorter {
+ public:
+  using HeapPusherFunction =
+      std::function<void(uint64_t*, uint64_t*, uint64_t*, uint64_t*)>;
+
+  HeapSorter(int64_t k, NullPlacement null_placement, MemoryPool* pool)
+      : k_(k), null_placement_(null_placement), pool_(pool) {}
+
+  Result<std::shared_ptr<ArrayData>> HeapSort(HeapPusherFunction heap_pusher,
+                                              NullPartitionResult p,
+                                              NullPartitionResult q) {
+    ExtractCounter counter = ComputeExtractCounter(p, q);
+    return HeapSortInternal(counter, heap_pusher, p, q);
+  }
+
+  ExtractCounter ComputeExtractCounter(NullPartitionResult p, 
NullPartitionResult q) {
+    int64_t extract_non_null_count = 0;
+    int64_t extract_nan_count = 0;
+    int64_t extract_null_count = 0;
+    int64_t non_null_count = q.non_null_count();
+    int64_t nan_count = q.null_count();
+    int64_t null_count = p.null_count();
+    // non-null nan null
+    if (null_placement_ == NullPlacement::AtEnd) {
+      extract_non_null_count = non_null_count <= k_ ? non_null_count : k_;
+      extract_nan_count = extract_non_null_count >= k_
+                              ? 0
+                              : std::min(nan_count, k_ - 
extract_non_null_count);
+      extract_null_count = extract_non_null_count + extract_nan_count >= k_
+                               ? 0
+                               : (k_ - (extract_non_null_count + 
extract_nan_count));
+    } else {  // null nan non-null
+      extract_null_count = null_count <= k_ ? null_count : k_;
+      extract_nan_count =
+          extract_null_count >= k_ ? 0 : std::min(nan_count, k_ - 
extract_null_count);
+      extract_non_null_count = extract_null_count + extract_nan_count >= k_
+                                   ? 0
+                                   : (k_ - (extract_null_count + 
extract_nan_count));
+    }
+    return {extract_non_null_count, extract_nan_count, extract_null_count};
+  }
+
+  Result<std::shared_ptr<ArrayData>> HeapSortInternal(ExtractCounter counter,
+                                                      HeapPusherFunction 
heap_pusher,
+                                                      NullPartitionResult p,
+                                                      NullPartitionResult q) {
+    int64_t out_size = counter.extract_non_null_count + 
counter.extract_nan_count +
+                       counter.extract_null_count;
+    ARROW_ASSIGN_OR_RAISE(auto take_indices, MakeMutableUInt64Array(out_size, 
pool_));
+    // [extrat_count....extract_nan_count...extract_null_count]
+    if (null_placement_ == NullPlacement::AtEnd) {
+      if (counter.extract_non_null_count) {
+        auto* out_cbegin = take_indices->template 
GetMutableValues<uint64_t>(1) +
+                           counter.extract_non_null_count - 1;
+        auto kth_begin = std::min(q.non_nulls_begin + k_, q.non_nulls_end);
+        heap_pusher(q.non_nulls_begin, kth_begin, q.non_nulls_end, out_cbegin);
+      }
+
+      if (counter.extract_nan_count) {
+        auto* out_cbegin = take_indices->template 
GetMutableValues<uint64_t>(1) +
+                           counter.extract_non_null_count + 
counter.extract_nan_count - 1;
+        auto kth_begin =
+            std::min(q.nulls_begin + k_ - counter.extract_non_null_count, 
q.nulls_end);
+        heap_pusher(q.nulls_begin, kth_begin, q.nulls_end, out_cbegin);

Review Comment:
   It seems we are merely pushing nulls (well, NaNs actually), is it worth 
going through the heap-push function or can we do something more optimized? 
Those values are all equal and they don't need to be ordered.



##########
cpp/src/arrow/compute/api_vector.h:
##########
@@ -105,7 +105,7 @@ class ARROW_EXPORT ArraySortOptions : public 
FunctionOptions {
 class ARROW_EXPORT SortOptions : public FunctionOptions {
  public:
   explicit SortOptions(std::vector<SortKey> sort_keys = {},

Review Comment:
   Can we deprecate the two-argument constructor and have a separate 
one-argument constructor?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to