Taepper commented on code in PR #46926:
URL: https://github.com/apache/arrow/pull/46926#discussion_r3403347451


##########
cpp/src/arrow/compute/kernels/vector_select_k.cc:
##########
@@ -102,51 +218,45 @@ class ArraySelector : public TypeVisitor {
 
   template <typename InType, SortOrder sort_order>
   Status SelectKthInternal() {
-    using GetView = GetViewType<InType>;
     using ArrayType = typename TypeTraits<InType>::ArrayType;
 
     ArrayType arr(array_.data());
-    std::vector<uint64_t> indices(arr.length());
 
-    uint64_t* indices_begin = indices.data();
-    uint64_t* indices_end = indices_begin + indices.size();
-    std::iota(indices_begin, indices_end, 0);
     if (k_ > arr.length()) {
       k_ = arr.length();
     }
 
-    const auto p = PartitionNulls<ArrayType, NonStablePartitioner>(
-        indices_begin, indices_end, arr, 0, NullPlacement::AtEnd);
-    const auto end_iter = p.non_nulls_end;
+    std::vector<uint64_t> indices(arr.length());
 
-    auto kth_begin = std::min(indices_begin + k_, end_iter);
+    uint64_t* indices_begin = indices.data();
+    uint64_t* indices_end = indices_begin + indices.size();
+    std::iota(indices_begin, indices_end, 0);
 
-    SelectKComparator<sort_order> comparator;
-    auto cmp = [&arr, &comparator](uint64_t left, uint64_t right) {
-      const auto lval = GetView::LogicalValue(arr.GetView(left));
-      const auto rval = GetView::LogicalValue(arr.GetView(right));
-      return comparator(lval, rval);
-    };
-    using HeapContainer =
-        std::priority_queue<uint64_t, std::vector<uint64_t>, decltype(cmp)>;
-    HeapContainer heap(indices_begin, kth_begin, cmp);
-    for (auto iter = kth_begin; iter != end_iter && !heap.empty(); ++iter) {
-      uint64_t x_index = *iter;
-      if (cmp(x_index, heap.top())) {
-        heap.pop();
-        heap.push(x_index);
-      }
-    }
-    auto out_size = static_cast<int64_t>(heap.size());
     ARROW_ASSIGN_OR_RAISE(auto take_indices,
-                          MakeMutableUInt64Array(out_size, 
ctx_->memory_pool()));
+                          MakeMutableUInt64Array(k_, ctx_->memory_pool()));
+    auto* output_begin = take_indices->template GetMutableValues<uint64_t>(1);
+
+    const auto p = PartitionNullsAndNans<ArrayType, NonStablePartitioner>(
+        indices_begin, indices_end, arr, 0, null_placement_);
+
+    // From k, calculate
+    //   l = non_null_like elements to take from PartitionResult
+    //   m = nan elements to take from PartitionResult
+    //   n = null elements to take from PartitionResult
+    // k = l + m + n because k was clipped to arr.length()
+    // And directly compute the ranges in {output, output+k} where we will 
need to place
+    // the selected elements from each group -> no longer need to track 
null_placement

Review Comment:
   Ah, I added this comment not as a reference to previous implementation. I 
wanted to signal, that all code _after_ the statement 
`CalculateOutputRangesByNullLikeness` is completely independent whether 
null_placement is `AtStart` or `AtEnd`.
   
   So with `no longer` I wanted to refer the structuring of the code within the 
current implementation, not the implementation over time



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to