kou commented on a change in pull request #8612:
URL: https://github.com/apache/arrow/pull/8612#discussion_r519538131



##########
File path: cpp/src/arrow/compute/kernels/vector_sort.cc
##########
@@ -112,11 +114,12 @@ inline void VisitRawValuesInline(const ArrayType& values,
 }
 
 template <typename ArrowType>
-class CompareSorter {
+class ArrayCompareSorter {
   using ArrayType = typename TypeTraits<ArrowType>::ArrayType;
 
  public:
-  void Sort(uint64_t* indices_begin, uint64_t* indices_end, const ArrayType& 
values) {
+  void Sort(uint64_t* indices_begin, uint64_t* indices_end, const ArrayType& 
values,
+            const ArraySortOptions& options) {

Review comment:
       This change is for order support on array.

##########
File path: cpp/src/arrow/compute/api_vector.cc
##########
@@ -135,5 +149,9 @@ Result<std::shared_ptr<Table>> Take(const Table& table, 
const ChunkedArray& indi
   return result.table();
 }
 
+Result<std::shared_ptr<Array>> SortToIndices(const Array& values, ExecContext* 
ctx) {
+  return SortIndices(values, ctx);
+}
+

Review comment:
       This hunk is for `SortToIndices()` -> `SortIndices()` rename.

##########
File path: cpp/src/arrow/compute/kernels/vector_sort.cc
##########
@@ -125,35 +128,43 @@ class CompareSorter {
           std::stable_partition(indices_begin, indices_end,
                                 [&values](uint64_t ind) { return 
!values.IsNull(ind); });
     }
-    std::stable_sort(indices_begin, nulls_begin,
-                     [&values](uint64_t left, uint64_t right) {
-                       return values.GetView(left) < values.GetView(right);
-                     });
+    if (options.order == SortOrder::ASCENDING) {
+      std::stable_sort(indices_begin, nulls_begin,
+                       [&values](uint64_t left, uint64_t right) {
+                         return values.GetView(left) < values.GetView(right);
+                       });
+    } else {
+      std::stable_sort(indices_begin, nulls_begin,
+                       [&values](uint64_t left, uint64_t right) {
+                         return values.GetView(left) > values.GetView(right);
+                       });
+    }
   }
 };
 
 template <typename ArrowType>
-class CountSorter {
+class ArrayCountSorter {
   using ArrayType = typename TypeTraits<ArrowType>::ArrayType;
   using c_type = typename ArrowType::c_type;
 
  public:
-  CountSorter() = default;
+  ArrayCountSorter() = default;
 
-  explicit CountSorter(c_type min, c_type max) { SetMinMax(min, max); }
+  explicit ArrayCountSorter(c_type min, c_type max) { SetMinMax(min, max); }
 
   // Assume: max >= min && (max - min) < 4Gi
   void SetMinMax(c_type min, c_type max) {
     min_ = min;
     value_range_ = static_cast<uint32_t>(max - min) + 1;
   }
 
-  void Sort(uint64_t* indices_begin, uint64_t* indices_end, const ArrayType& 
values) {
+  void Sort(uint64_t* indices_begin, uint64_t* indices_end, const ArrayType& 
values,
+            const ArraySortOptions& options) {
     // 32bit counter performs much better than 64bit one
     if (values.length() < (1LL << 32)) {
-      SortInternal<uint32_t>(indices_begin, indices_end, values);
+      SortInternal<uint32_t>(indices_begin, indices_end, values, options);
     } else {
-      SortInternal<uint64_t>(indices_begin, indices_end, values);
+      SortInternal<uint64_t>(indices_begin, indices_end, values, options);

Review comment:
       This change is for order support on array.
   

##########
File path: cpp/src/arrow/compute/kernels/vector_sort.cc
##########
@@ -125,35 +128,43 @@ class CompareSorter {
           std::stable_partition(indices_begin, indices_end,
                                 [&values](uint64_t ind) { return 
!values.IsNull(ind); });
     }
-    std::stable_sort(indices_begin, nulls_begin,
-                     [&values](uint64_t left, uint64_t right) {
-                       return values.GetView(left) < values.GetView(right);
-                     });
+    if (options.order == SortOrder::ASCENDING) {
+      std::stable_sort(indices_begin, nulls_begin,
+                       [&values](uint64_t left, uint64_t right) {
+                         return values.GetView(left) < values.GetView(right);
+                       });
+    } else {
+      std::stable_sort(indices_begin, nulls_begin,
+                       [&values](uint64_t left, uint64_t right) {
+                         return values.GetView(left) > values.GetView(right);
+                       });
+    }

Review comment:
       This change is for order support on array.

##########
File path: cpp/src/arrow/compute/kernels/vector_sort.cc
##########
@@ -163,36 +174,49 @@ class CountSorter {
 
   template <typename CounterType>
   void SortInternal(uint64_t* indices_begin, uint64_t* indices_end,
-                    const ArrayType& values) {
+                    const ArrayType& values, const ArraySortOptions& options) {
     const uint32_t value_range = value_range_;
 
     // first slot reserved for prefix sum
     std::vector<CounterType> counts(1 + value_range);
 
-    VisitRawValuesInline(
-        values, [&](c_type v) { ++counts[v - min_ + 1]; }, []() {});
-
-    for (uint32_t i = 1; i <= value_range; ++i) {
-      counts[i] += counts[i - 1];
+    if (options.order == SortOrder::ASCENDING) {
+      VisitRawValuesInline(
+          values, [&](c_type v) { ++counts[v - min_ + 1]; }, []() {});
+      for (uint32_t i = 1; i <= value_range; ++i) {
+        counts[i] += counts[i - 1];
+      }
+      uint32_t null_position = counts[value_range];
+      int64_t index = 0;
+      VisitRawValuesInline(
+          values, [&](c_type v) { indices_begin[counts[v - min_]++] = index++; 
},
+          [&]() { indices_begin[null_position++] = index++; });
+    } else {
+      VisitRawValuesInline(
+          values, [&](c_type v) { ++counts[v - min_]; }, []() {});
+      for (uint32_t i = value_range; i >= 1; --i) {
+        counts[i - 1] += counts[i];
+      }
+      uint32_t null_position = counts[0];
+      int64_t index = 0;
+      VisitRawValuesInline(
+          values, [&](c_type v) { indices_begin[counts[v - min_ + 1]++] = 
index++; },
+          [&]() { indices_begin[null_position++] = index++; });
     }
-
-    int64_t index = 0;
-    VisitRawValuesInline(
-        values, [&](c_type v) { indices_begin[counts[v - min_]++] = index++; },
-        [&]() { indices_begin[counts[value_range]++] = index++; });
   }
 };

Review comment:
       This change is for order support on array.

##########
File path: cpp/src/arrow/compute/kernels/vector_sort_test.cc
##########
@@ -209,138 +214,155 @@ TYPED_TEST(TestNthToIndicesRandom, RandomValues) {
 using arrow::internal::checked_pointer_cast;
 
 template <typename ArrowType>
-class TestSortToIndicesKernel : public TestBase {
+class TestArraySortIndicesKernel : public TestBase {

Review comment:
       This hunk is for renaming `SortToIndices()` -> `SortIndices()`. No logic 
change.

##########
File path: cpp/src/arrow/compute/kernels/vector_sort.cc
##########
@@ -236,36 +260,40 @@ class CountOrCompareSorter {
 };
 
 template <typename Type, typename Enable = void>
-struct Sorter;
+struct ArraySorter;
 
 template <>
-struct Sorter<UInt8Type> {
-  CountSorter<UInt8Type> impl;
-  Sorter() : impl(0, 255) {}
+struct ArraySorter<UInt8Type> {
+  ArrayCountSorter<UInt8Type> impl;
+  ArraySorter() : impl(0, 255) {}
 };
 
 template <>
-struct Sorter<Int8Type> {
-  CountSorter<Int8Type> impl;
-  Sorter() : impl(-128, 127) {}
+struct ArraySorter<Int8Type> {
+  ArrayCountSorter<Int8Type> impl;
+  ArraySorter() : impl(-128, 127) {}
 };
 
 template <typename Type>
-struct Sorter<Type, enable_if_t<is_integer_type<Type>::value &&
-                                (sizeof(typename Type::c_type) > 1)>> {
-  CountOrCompareSorter<Type> impl;
+struct ArraySorter<Type, enable_if_t<is_integer_type<Type>::value &&
+                                     (sizeof(typename Type::c_type) > 1)>> {
+  ArrayCountOrCompareSorter<Type> impl;
 };
 
 template <typename Type>
-struct Sorter<Type, enable_if_t<is_floating_type<Type>::value ||
-                                is_base_binary_type<Type>::value>> {
-  CompareSorter<Type> impl;
+struct ArraySorter<Type, enable_if_t<is_floating_type<Type>::value ||
+                                     is_base_binary_type<Type>::value>> {
+  ArrayCompareSorter<Type> impl;
 };

Review comment:
       `Array` prefix is added to distinguish array and table related codes.

##########
File path: cpp/src/arrow/compute/kernels/vector_sort.cc
##########
@@ -112,11 +114,12 @@ inline void VisitRawValuesInline(const ArrayType& values,
 }
 
 template <typename ArrowType>
-class CompareSorter {
+class ArrayCompareSorter {

Review comment:
       `Array` prefix is added to distinguish array and table related codes.

##########
File path: cpp/src/arrow/compute/kernels/vector_sort.cc
##########
@@ -125,35 +128,43 @@ class CompareSorter {
           std::stable_partition(indices_begin, indices_end,
                                 [&values](uint64_t ind) { return 
!values.IsNull(ind); });
     }
-    std::stable_sort(indices_begin, nulls_begin,
-                     [&values](uint64_t left, uint64_t right) {
-                       return values.GetView(left) < values.GetView(right);
-                     });
+    if (options.order == SortOrder::ASCENDING) {
+      std::stable_sort(indices_begin, nulls_begin,
+                       [&values](uint64_t left, uint64_t right) {
+                         return values.GetView(left) < values.GetView(right);
+                       });
+    } else {
+      std::stable_sort(indices_begin, nulls_begin,
+                       [&values](uint64_t left, uint64_t right) {
+                         return values.GetView(left) > values.GetView(right);
+                       });
+    }
   }
 };
 
 template <typename ArrowType>
-class CountSorter {
+class ArrayCountSorter {
   using ArrayType = typename TypeTraits<ArrowType>::ArrayType;
   using c_type = typename ArrowType::c_type;
 
  public:
-  CountSorter() = default;
+  ArrayCountSorter() = default;
 
-  explicit CountSorter(c_type min, c_type max) { SetMinMax(min, max); }
+  explicit ArrayCountSorter(c_type min, c_type max) { SetMinMax(min, max); }

Review comment:
       `Array` prefix is added to distinguish array and table related codes.

##########
File path: cpp/src/arrow/compute/kernels/vector_sort.cc
##########
@@ -299,12 +327,344 @@ void AddSortingKernels(VectorKernel base, 
VectorFunction* func) {
   }
 }
 
+class TableSorter : public TypeVisitor {

Review comment:
       For multi-column sorting on table.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to