hamilton-earthscope commented on code in PR #749:
URL: https://github.com/apache/arrow-go/pull/749#discussion_r3048350669


##########
arrow/compute/vector_sort.go:
##########
@@ -0,0 +1,391 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//go:build go1.18
+
+package compute
+
+import (
+       "context"
+       "fmt"
+
+       "github.com/apache/arrow-go/v18/arrow"
+       "github.com/apache/arrow-go/v18/arrow/compute/exec"
+       "github.com/apache/arrow-go/v18/arrow/compute/internal/kernels"
+)
+
+var (
+       sortIndicesDoc = FunctionDoc{
+               Summary: "Return the indices that would sort the input",
+               Description: `This function computes an array of indices that 
define a stable sort.
+Supports arrays, chunked arrays, record batches, and tables.
+For arrays and chunked arrays, use a single SortKey (ColumnIndex is ignored).
+For record batches and tables, use []SortKey to specify columns and sort
+order; at least one key is required. Each key must reference a valid column.`,
+               ArgNames:    []string{"input"},
+               OptionsType: "SortKeys",
+       }
+
+       sortIndicesMetaFunc = NewMetaFunction("sort_indices", Unary(), 
sortIndicesDoc,
+               func(ctx context.Context, opts FunctionOptions, args ...Datum) 
(Datum, error) {
+                       input := args[0]
+                       switch input.Kind() {
+                       case KindArray, KindChunked, KindRecord, KindTable:
+                               return sortIndicesImpl(ctx, opts, input)
+                       }
+
+                       return nil, fmt.Errorf("%w: unsupported type for 
sort_indices operation: %s",
+                               arrow.ErrNotImplemented, input)
+               })
+
+       sortDoc = FunctionDoc{
+               Summary: "Return a sorted copy of the input",
+               Description: `This function sorts the input using the same 
ordering as sort_indices
+and returns the reordered values. It is equivalent to take(input,
+sort_indices(input, options)).
+Supports arrays, chunked arrays, record batches, and tables with the same
+SortKeys options as sort_indices.`,
+               ArgNames:    []string{"input"},
+               OptionsType: "SortKeys",
+       }
+
+       sortMetaFunc = NewMetaFunction("sort", Unary(), sortDoc,
+               func(ctx context.Context, opts FunctionOptions, args ...Datum) 
(Datum, error) {
+                       input := args[0]
+                       switch input.Kind() {
+                       case KindArray, KindChunked, KindRecord, KindTable:
+                       default:
+                               return nil, fmt.Errorf("%w: unsupported type 
for sort: %s", arrow.ErrNotImplemented, input)
+                       }
+
+                       indices, err := CallFunction(ctx, "sort_indices", opts, 
input)
+                       if err != nil {
+                               return nil, err
+                       }
+                       defer indices.Release()
+
+                       return Take(ctx, *DefaultTakeOptions(), input, indices)
+               })
+)
+
+const (
+       SortOrderAscending  = kernels.Ascending
+       SortOrderDescending = kernels.Descending
+       SortNullsAtEnd      = kernels.NullsAtEnd
+       SortNullsAtStart    = kernels.NullsAtStart
+)
+
+// SortKey defines a column to sort by with its ordering and null placement 
options.
+type SortKey = kernels.SortKey
+
+// SortOptions defines the desired sort order for the input.
+type SortOptions []SortKey
+
+// TypeName implements FunctionOptions.
+func (SortOptions) TypeName() string { return "SortKeys" }
+
+// DefaultSortKey returns the default sort key: ascending order with nulls 
last.
+func DefaultSortKey() SortKey {
+       return SortKey{
+               ColumnIndex:   0,
+               Order:         kernels.Ascending,
+               NullPlacement: kernels.NullsAtEnd,
+       }
+}
+
+// sortIndicesImpl adapts any supported Datum to kernels.SortIndices 
(internal/kernels), which
+// implements a stable lexicographic sort over []*arrow.Chunked (one logical 
column per sort key,
+// same row count).
+//
+// Only the columns referenced by sort keys are passed to the kernel; the rest 
of the batch/table
+// is irrelevant to index computation. Chunked wrappers we allocate with 
arrow.NewChunked must be
+// released in the defer below (needsRelease); table column *arrow.Chunked 
values are borrowed from
+// the table and must not be released here.
+func sortIndicesImpl(ctx context.Context, opts FunctionOptions, input Datum) 
(Datum, error) {
+       inputSortKeys := opts.(SortOptions)
+       if len(inputSortKeys) == 0 {
+               return nil, fmt.Errorf("%w: must provide at least one sort 
key", arrow.ErrInvalid)
+       }
+
+       var sortColumns []*arrow.Chunked
+       // For KindRecord/KindTable, sortKeys stays aligned with inputSortKeys 
(multi-column sort).
+       // For KindArray/KindChunked, sortKeys is replaced with a single key 
(see those cases).
+       sortKeys := []kernels.SortKey(inputSortKeys)
+       var needsRelease []bool
+
+       switch input.Kind() {
+       case KindArray:
+               // Single column: one Array wrapped as a one-chunk Chunked 
(kernel API).
+               arr := input.(*ArrayDatum).MakeArray()
+               defer arr.Release()
+               chunked := arrow.NewChunked(arr.DataType(), []arrow.Array{arr})
+               sortColumns = []*arrow.Chunked{chunked}
+               needsRelease = []bool{true}
+
+               // Only the first key is used; ColumnIndex is meaningless for a 
bare array—copy the key and
+               // set index 0 so the kernel sees a consistent (column, key) 
pair (order/null placement preserved).
+               key := inputSortKeys[0]
+               key.ColumnIndex = 0
+               sortKeys = []kernels.SortKey{key}
+
+       case KindChunked:
+               // Single column: use the Chunked as-is (caller-owned; do not 
Release).
+               chunked := input.(*ChunkedDatum).Value
+               sortColumns = []*arrow.Chunked{chunked}
+               needsRelease = []bool{false}
+
+               key := inputSortKeys[0]
+               key.ColumnIndex = 0
+               sortKeys = []kernels.SortKey{key}
+
+       case KindRecord:
+               batch := input.(*RecordDatum).Value
+
+               sortColumns = make([]*arrow.Chunked, len(inputSortKeys))
+               needsRelease = make([]bool, len(inputSortKeys))
+               for i, key := range inputSortKeys {

Review Comment:
   done. let me know if the docstring on the kernel method is insufficient.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to