This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-go.git


The following commit(s) were added to refs/heads/main by this push:
     new 228ea5d5 fix(compute/kernels): map FSB byte-width to numeric memo type 
for is_in (#797)
228ea5d5 is described below

commit 228ea5d579714fb2419a84b6417a61174a101e1c
Author: Sai Asish Y <[email protected]>
AuthorDate: Mon May 4 09:28:17 2026 -0700

    fix(compute/kernels): map FSB byte-width to numeric memo type for is_in 
(#797)
    
    Closes #785.
    
    `CreateSetLookupState` already routes `FixedSizeBinary` widths 1/2/4/8
    onto the numeric fast path (`SetLookupState[uintN]` +
    `visitNumeric[uintN]`), but `Init` then asked `newMemoTable` for
    `arrow.FIXED_SIZE_BINARY` and got a `BinaryMemoTable`, which fails the
    subsequent `TypedMemoTable[uintN]` assertion. The cleanup hook had the
    same blind spot — it cast every state to `SetLookupState[[]byte]` and
    would have panicked there too once Init was fixed.
    
    Map the FSB byte-width to the matching uint type before the lookup, and
    only release in `CleanupFn` when the state actually carries a
    `BinaryMemoTable`. Adds a regression test exercising widths 1/2/4/8.
    
    Signed-off-by: SAY-5 <[email protected]>
---
 .../compute/internal/kernels/scalar_set_lookup.go  | 19 +++++++++++++
 arrow/compute/scalar_set_lookup.go                 | 10 +++++--
 arrow/compute/scalar_set_lookup_test.go            | 32 ++++++++++++++++++++++
 3 files changed, 59 insertions(+), 2 deletions(-)

diff --git a/arrow/compute/internal/kernels/scalar_set_lookup.go 
b/arrow/compute/internal/kernels/scalar_set_lookup.go
index fe9841ac..7b1caeb7 100644
--- a/arrow/compute/internal/kernels/scalar_set_lookup.go
+++ b/arrow/compute/internal/kernels/scalar_set_lookup.go
@@ -169,6 +169,25 @@ func (s *SetLookupState[T]) Init(opts SetLookupOptions) 
error {
        if memoType == arrow.EXTENSION {
                memoType = 
s.ValueSetType.(arrow.ExtensionType).StorageType().ID()
        }
+       // FixedSizeBinary with byte-widths 1/2/4/8 takes the numeric fast-path
+       // in CreateSetLookupState (SetLookupState[uintN] + 
visitNumeric[uintN]),
+       // so the lookup table must be the matching TypedMemoTable[uintN], not
+       // the BinaryMemoTable that newMemoTable would otherwise return for
+       // FIXED_SIZE_BINARY.
+       if memoType == arrow.FIXED_SIZE_BINARY {
+               if fsb, ok := s.ValueSetType.(*arrow.FixedSizeBinaryType); ok {
+                       switch fsb.ByteWidth {
+                       case 1:
+                               memoType = arrow.UINT8
+                       case 2:
+                               memoType = arrow.UINT16
+                       case 4:
+                               memoType = arrow.UINT32
+                       case 8:
+                               memoType = arrow.UINT64
+                       }
+               }
+       }
        lookup, err := newMemoTable(s.Alloc, memoType)
        if err != nil {
                return err
diff --git a/arrow/compute/scalar_set_lookup.go 
b/arrow/compute/scalar_set_lookup.go
index 84305536..9323fc0a 100644
--- a/arrow/compute/scalar_set_lookup.go
+++ b/arrow/compute/scalar_set_lookup.go
@@ -207,8 +207,14 @@ func RegisterScalarSetLookup(reg FunctionRegistry) {
                kn.MemAlloc = exec.MemPrealloc
                kn.NullHandling = exec.NullComputedPrealloc
                kn.CleanupFn = func(state exec.KernelState) error {
-                       s := state.(*kernels.SetLookupState[[]byte])
-                       s.Lookup.(*hashing.BinaryMemoTable).Release()
+                       // FixedSizeBinary widths 1/2/4/8 use 
SetLookupState[uintN] +
+                       // the matching numeric memo table, which carries no 
buffers
+                       // to release. Only the BinaryMemoTable path needs 
cleanup.
+                       if s, ok := state.(*kernels.SetLookupState[[]byte]); ok 
{
+                               if memo, ok := 
s.Lookup.(*hashing.BinaryMemoTable); ok {
+                                       memo.Release()
+                               }
+                       }
                        return nil
                }
 
diff --git a/arrow/compute/scalar_set_lookup_test.go 
b/arrow/compute/scalar_set_lookup_test.go
index b28c3d75..010bb52c 100644
--- a/arrow/compute/scalar_set_lookup_test.go
+++ b/arrow/compute/scalar_set_lookup_test.go
@@ -18,6 +18,7 @@ package compute_test
 
 import (
        "context"
+       "encoding/base64"
        "fmt"
        "strings"
        "sync"
@@ -265,6 +266,37 @@ func (ss *ScalarSetLookupSuite) TestIsInFixedSizeBinary() {
        }
 }
 
+func (ss *ScalarSetLookupSuite) TestIsInFixedSizeBinaryFastPaths() {
+       // ByteWidths 1, 2, 4, 8 take the SetLookupState[uintN] / visitNumeric
+       // fast-path. Before the fix in #785 this panicked because Init asked
+       // newMemoTable for a FIXED_SIZE_BINARY table (BinaryMemoTable) but the
+       // state expected a TypedMemoTable[uintN]; afterwards the cleanup hook
+       // also tried to type-assert all states as SetLookupState[[]byte].
+       pad := func(s string, w int) string {
+               buf := make([]byte, w)
+               copy(buf, s)
+               return string(buf)
+       }
+       enc := func(s string) string {
+               // scalar_set_lookup_test uses base64-encoded JSON entries via
+               // FixedSizeBinary's standard JSON shape; the helpers below
+               // already accept JSON arrays of base64 strings.
+               return base64.StdEncoding.EncodeToString([]byte(s))
+       }
+       for _, w := range []int{1, 2, 4, 8} {
+               w := w
+               ss.Run(fmt.Sprintf("ByteWidth=%d", w), func() {
+                       typ := &arrow.FixedSizeBinaryType{ByteWidth: w}
+                       input := fmt.Sprintf(`[%q, %q, %q]`,
+                               enc(pad("a", w)), enc(pad("z", w)), 
enc(pad("b", w)))
+                       valueset := fmt.Sprintf(`[%q, %q]`,
+                               enc(pad("a", w)), enc(pad("b", w)))
+                       ss.checkIsInFromJSON(typ, input, valueset,
+                               `[true, false, true]`, 
compute.NullMatchingMatch)
+               })
+       }
+}
+
 func (ss *ScalarSetLookupSuite) TestIsInDecimal() {
        type testCase struct {
                expected string

Reply via email to