This is an automated email from the ASF dual-hosted git repository.
zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-go.git
The following commit(s) were added to refs/heads/main by this push:
new 228ea5d5 fix(compute/kernels): map FSB byte-width to numeric memo type
for is_in (#797)
228ea5d5 is described below
commit 228ea5d579714fb2419a84b6417a61174a101e1c
Author: Sai Asish Y <[email protected]>
AuthorDate: Mon May 4 09:28:17 2026 -0700
fix(compute/kernels): map FSB byte-width to numeric memo type for is_in
(#797)
Closes #785.
`CreateSetLookupState` already routes `FixedSizeBinary` widths 1/2/4/8
onto the numeric fast path (`SetLookupState[uintN]` +
`visitNumeric[uintN]`), but `Init` then asked `newMemoTable` for
`arrow.FIXED_SIZE_BINARY` and got a `BinaryMemoTable`, which fails the
subsequent `TypedMemoTable[uintN]` assertion. The cleanup hook had the
same blind spot — it cast every state to `SetLookupState[[]byte]` and
would have panicked there too once Init was fixed.
Map the FSB byte-width to the matching uint type before the lookup, and
only release in `CleanupFn` when the state actually carries a
`BinaryMemoTable`. Adds a regression test exercising widths 1/2/4/8.
Signed-off-by: SAY-5 <[email protected]>
---
.../compute/internal/kernels/scalar_set_lookup.go | 19 +++++++++++++
arrow/compute/scalar_set_lookup.go | 10 +++++--
arrow/compute/scalar_set_lookup_test.go | 32 ++++++++++++++++++++++
3 files changed, 59 insertions(+), 2 deletions(-)
diff --git a/arrow/compute/internal/kernels/scalar_set_lookup.go
b/arrow/compute/internal/kernels/scalar_set_lookup.go
index fe9841ac..7b1caeb7 100644
--- a/arrow/compute/internal/kernels/scalar_set_lookup.go
+++ b/arrow/compute/internal/kernels/scalar_set_lookup.go
@@ -169,6 +169,25 @@ func (s *SetLookupState[T]) Init(opts SetLookupOptions)
error {
if memoType == arrow.EXTENSION {
memoType =
s.ValueSetType.(arrow.ExtensionType).StorageType().ID()
}
+ // FixedSizeBinary with byte-widths 1/2/4/8 takes the numeric fast-path
+ // in CreateSetLookupState (SetLookupState[uintN] +
visitNumeric[uintN]),
+ // so the lookup table must be the matching TypedMemoTable[uintN], not
+ // the BinaryMemoTable that newMemoTable would otherwise return for
+ // FIXED_SIZE_BINARY.
+ if memoType == arrow.FIXED_SIZE_BINARY {
+ if fsb, ok := s.ValueSetType.(*arrow.FixedSizeBinaryType); ok {
+ switch fsb.ByteWidth {
+ case 1:
+ memoType = arrow.UINT8
+ case 2:
+ memoType = arrow.UINT16
+ case 4:
+ memoType = arrow.UINT32
+ case 8:
+ memoType = arrow.UINT64
+ }
+ }
+ }
lookup, err := newMemoTable(s.Alloc, memoType)
if err != nil {
return err
diff --git a/arrow/compute/scalar_set_lookup.go
b/arrow/compute/scalar_set_lookup.go
index 84305536..9323fc0a 100644
--- a/arrow/compute/scalar_set_lookup.go
+++ b/arrow/compute/scalar_set_lookup.go
@@ -207,8 +207,14 @@ func RegisterScalarSetLookup(reg FunctionRegistry) {
kn.MemAlloc = exec.MemPrealloc
kn.NullHandling = exec.NullComputedPrealloc
kn.CleanupFn = func(state exec.KernelState) error {
- s := state.(*kernels.SetLookupState[[]byte])
- s.Lookup.(*hashing.BinaryMemoTable).Release()
+ // FixedSizeBinary widths 1/2/4/8 use
SetLookupState[uintN] +
+ // the matching numeric memo table, which carries no
buffers
+ // to release. Only the BinaryMemoTable path needs
cleanup.
+ if s, ok := state.(*kernels.SetLookupState[[]byte]); ok
{
+ if memo, ok :=
s.Lookup.(*hashing.BinaryMemoTable); ok {
+ memo.Release()
+ }
+ }
return nil
}
diff --git a/arrow/compute/scalar_set_lookup_test.go
b/arrow/compute/scalar_set_lookup_test.go
index b28c3d75..010bb52c 100644
--- a/arrow/compute/scalar_set_lookup_test.go
+++ b/arrow/compute/scalar_set_lookup_test.go
@@ -18,6 +18,7 @@ package compute_test
import (
"context"
+ "encoding/base64"
"fmt"
"strings"
"sync"
@@ -265,6 +266,37 @@ func (ss *ScalarSetLookupSuite) TestIsInFixedSizeBinary() {
}
}
+func (ss *ScalarSetLookupSuite) TestIsInFixedSizeBinaryFastPaths() {
+ // ByteWidths 1, 2, 4, 8 take the SetLookupState[uintN] / visitNumeric
+ // fast-path. Before the fix in #785 this panicked because Init asked
+ // newMemoTable for a FIXED_SIZE_BINARY table (BinaryMemoTable) but the
+ // state expected a TypedMemoTable[uintN]; afterwards the cleanup hook
+ // also tried to type-assert all states as SetLookupState[[]byte].
+ pad := func(s string, w int) string {
+ buf := make([]byte, w)
+ copy(buf, s)
+ return string(buf)
+ }
+ enc := func(s string) string {
+ // scalar_set_lookup_test uses base64-encoded JSON entries via
+ // FixedSizeBinary's standard JSON shape; the helpers below
+ // already accept JSON arrays of base64 strings.
+ return base64.StdEncoding.EncodeToString([]byte(s))
+ }
+ for _, w := range []int{1, 2, 4, 8} {
+ w := w
+ ss.Run(fmt.Sprintf("ByteWidth=%d", w), func() {
+ typ := &arrow.FixedSizeBinaryType{ByteWidth: w}
+ input := fmt.Sprintf(`[%q, %q, %q]`,
+ enc(pad("a", w)), enc(pad("z", w)),
enc(pad("b", w)))
+ valueset := fmt.Sprintf(`[%q, %q]`,
+ enc(pad("a", w)), enc(pad("b", w)))
+ ss.checkIsInFromJSON(typ, input, valueset,
+ `[true, false, true]`,
compute.NullMatchingMatch)
+ })
+ }
+}
+
func (ss *ScalarSetLookupSuite) TestIsInDecimal() {
type testCase struct {
expected string