zeroshade commented on code in PR #34719:
URL: https://github.com/apache/arrow/pull/34719#discussion_r1154873481


##########
go/arrow/compute/internal/kernels/vector_sort.go:
##########
@@ -0,0 +1,87 @@
+package kernels
+
+import (
+       "bytes"
+       "sort"
+
+       "github.com/apache/arrow/go/v12/arrow"
+       "github.com/apache/arrow/go/v12/arrow/array"
+       "github.com/apache/arrow/go/v12/arrow/compute/internal/exec"
+       "github.com/apache/arrow/go/v12/arrow/memory"
+)
+
+type SortIndicesOptions struct {
+       NullPlacement NullPlacement `compute:"null_placement"`
+}
+
+func (s *SortIndicesOptions) TypeName() string {
+       return "SortIndicesOptions"
+}
+
+type Order int
+
+const (
+       Ascending Order = iota
+       Descending
+)
+
+type NullPlacement int
+
+const (
+       AtStart NullPlacement = iota
+       AtEnd
+)
+
+func GetVectorSortingKernels() []exec.VectorKernel {
+       var base exec.VectorKernel
+       base.CanExecuteChunkWise = true
+       base.OutputChunked = false
+       outType := exec.NewOutputType(arrow.ListOf(arrow.PrimitiveTypes.Int64))
+       kernels := make([]exec.VectorKernel, 0)
+       for _, ty := range primitiveTypes {
+               base.Signature = &exec.KernelSignature{
+                       InputTypes: []exec.InputType{exec.NewExactInput(ty)},
+                       OutType:    outType,
+               }
+               base.ExecFn = sortExec
+               kernels = append(kernels, base)
+       }
+       return kernels
+}
+
+func sortExec(ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResult) 
error {
+       // Get the input array from the batch
+       inExecVal := batch.Values[0]
+       inArr := inExecVal.Array
+
+       // Create a slice of indices, initialized to [0, 1, 2, ..., n-1]
+       indices := make([]int, inArr.Len)
+       for i := range indices {
+               indices[i] = i
+       }
+
+       sz := inArr.Type.(arrow.FixedWidthDataType).Bytes()
+
+       // Sort the indices slice based on the values in the input array
+       sort.Slice(indices, func(i, j int) bool {
+               // TODO: not sure what to do here?
+               // compare using scalar comparison?
+               a := inArr.Buffers[1].Buf[indices[i]*sz : (indices[i]+1)*sz]
+               b := inArr.Buffers[1].Buf[indices[j]*sz : (indices[j]+1)*sz]
+               return bytes.Compare(a, b) < 0
+       })

Review Comment:
   It might make sense to have a look at 
https://github.com/apache/arrow/blob/main/go/arrow/compute/internal/kernels/scalar_comparisons.go
 (which implements the less than/equals/greater than/etc.) operations at a 
scalar level. I'll try to take another look at this early next week but you 
might be able to get an idea from over there on how to tackle this.
   
   A tricky piece to think about though is that we'd want to be able to manage 
passing an option to allow sorting a record batch / table by multiple columns. 
In that scenario I think we can learn from 
https://arrow.apache.org/blog/2022/11/07/multi-column-sorts-in-arrow-rust-part-1/
 and the corresponding part 2 and potentially leverage similar techniques to 
implement the sorting here.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to