zeroshade commented on code in PR #749:
URL: https://github.com/apache/arrow-go/pull/749#discussion_r3053042083


##########
arrow/compute/internal/kernels/vector_sort_physical.go:
##########
@@ -0,0 +1,873 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//go:build go1.18
+
+package kernels
+
+import (
+       "math"
+
+       "github.com/apache/arrow-go/v18/arrow"
+       "github.com/apache/arrow-go/v18/arrow/array"
+)
+
+// physicalColumnBase holds chunked data and row resolution shared by 
monomorphic sort columns.
+// Each Arrow physical type has its own comparator struct embedding this (no 
compare func pointer).
+type physicalColumnBase struct {
+       chunks        []arrow.Array
+       rowMap        logicalRowMap
+       validityNulls bool
+}
+
+func newPhysicalColumnBase(chunks []arrow.Array, numRows int, validityNulls 
bool) physicalColumnBase {
+       var rowMap logicalRowMap
+       if len(chunks) > 1 {
+               rowMap = newLogicalRowMap(chunks, numRows)
+       }
+       return physicalColumnBase{chunks: chunks, rowMap: rowMap, 
validityNulls: validityNulls}
+}
+
+// Pointer receivers: a value receiver would copy chunks + logicalRowMap slice 
headers on every
+// compare (pair/isNull/cell), which is measurable on large n log n sorts.
+func (b *physicalColumnBase) pair(i, j uint64) (arrI, arrJ arrow.Array, li, lj 
int) {
+       if len(b.chunks) == 1 {
+               arrI = b.chunks[0]
+               arrJ = arrI
+               li = int(i)
+               lj = int(j)
+               return
+       }
+       ci, li2, cj, lj2 := b.rowMap.pair(i, j)
+       arrI = b.chunks[ci]
+       arrJ = b.chunks[cj]
+       li, lj = li2, lj2
+       return
+}
+
+func (b *physicalColumnBase) isNullAtGlobal(row uint64) bool {
+       if len(b.chunks) == 1 {
+               return b.chunks[0].IsNull(int(row))
+       }
+       ci, li := b.rowMap.at(row)
+       return b.chunks[ci].IsNull(li)
+}
+
+func (b *physicalColumnBase) cell(row uint64) (ch arrow.Array, li int) {
+       if len(b.chunks) == 1 {
+               return b.chunks[0], int(row)
+       }
+       ci, li := b.rowMap.at(row)
+       return b.chunks[ci], li
+}
+
+func (b *physicalColumnBase) columnHasValidityNulls() bool { return 
b.validityNulls }
+
+// --- Monomorphic comparators (one concrete *array type each; mirrors C++ 
ConcreteColumnComparator<T>) ---
+
+type physicalSortInt8Column struct{ base physicalColumnBase }
+
+func newPhysicalSortInt8Column(chunks []arrow.Array, numRows int, vn bool) 
*physicalSortInt8Column {
+       return &physicalSortInt8Column{base: newPhysicalColumnBase(chunks, 
numRows, vn)}
+}
+
+func (c *physicalSortInt8Column) compareRowsForKey(i, j uint64, key SortKey) 
int {
+       ai, aj, li, lj := c.base.pair(i, j)
+       a := ai.(*array.Int8)
+       b := aj.(*array.Int8)
+       if c.base.validityNulls {
+               if v, stop := compareKeyedNulls(a.IsNull(li), b.IsNull(lj), 
key); stop {
+                       return v
+               }
+       }
+       return compareOrdered(key.Order, a.Value(li), b.Value(lj))
+}
+
+func (c *physicalSortInt8Column) isNullAt(row uint64) bool { return 
c.base.isNullAtGlobal(row) }
+func (c *physicalSortInt8Column) hasNullLikeValues() bool  { return false }
+func (c *physicalSortInt8Column) isNullLikeAt(uint64) bool { return false }
+func (c *physicalSortInt8Column) columnHasValidityNulls() bool {
+       return c.base.columnHasValidityNulls()
+}

Review Comment:
   Interesting, I'm surprised that there was that much of a slowdown using 
generics. I'll take a look at this but for now I agree that the performance is 
definitely a higher priority. Maybe we set this up like elsewhere that we do 
codegen? Just so that we can prevent future bugs if we change this logic



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to