This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-go.git


The following commit(s) were added to refs/heads/main by this push:
     new 3dd2d5e8 feat(arrow/compute/exprs): support 
expr.IntervalYearToMonthLiteral in literalToDatum (#825)
3dd2d5e8 is described below

commit 3dd2d5e8be7cd95916d25052ec3ace9e78471f27
Author: Matt Topol <[email protected]>
AuthorDate: Tue May 26 12:21:55 2026 -0400

    feat(arrow/compute/exprs): support expr.IntervalYearToMonthLiteral in 
literalToDatum (#825)
    
    ### Rationale for this change
    
    Closes #815.
    
    `arrow/compute/exprs.literalToDatum` handled interval year/month
    literals only through the `*expr.ProtoLiteral` path. The substrait-go
    library also surfaces these as the dedicated
    `expr.IntervalYearToMonthLiteral` type — that is the value
    
[`intervalYearToMonthLiteralFromProto`](https://github.com/substrait-io/substrait-go/blob/v8.1.1/expr/interval_year_to_month.go#L41-L47)
    returns when deserializing protobuf — and any caller reaching
    `literalToDatum` with this type previously fell through to
    `arrow.ErrNotImplemented`.
    
    ### What changes are included in this PR?
    
    1. **New type-switch cases for `expr.IntervalYearToMonthLiteral`**
    (value form, the canonical form returned by substrait-go's deserializer)
    **and `*expr.IntervalYearToMonthLiteral`** (pointer form, used in parts
    of substrait-go itself, e.g. `expr/string_test.go:86`). Both produce the
    same `FixedSizeList<int32>[2]` extension scalar that the existing
    `*expr.ProtoLiteral` path already produces, so callers reaching either
    form get a Datum that is `compute.Equals` to the one they would have
    gotten via the protobuf path.
    2. **New helper `intervalYearToMonthDatum(mem, years, months)`**. The
    existing `*types.IntervalYearToMonthType` branch inside the
    `*expr.ProtoLiteral` case now calls this helper instead of inlining the
    builder logic, so the two paths cannot drift.
    3. **Allocator threading**. The helper takes the `mem memory.Allocator`
    parameter that `literalToDatum` already receives, instead of hardcoding
    `memory.DefaultAllocator` (which the original inlined branch did). All
    three call sites forward `mem`.
    
    ### Are these changes tested?
    
    Yes. New `TestLiteralToDatumIntervalYearToMonth` in
    `arrow/compute/exprs/exec_internal_test.go` constructs a
    `*expr.ProtoLiteral` baseline and a parallel
    `expr.IntervalYearToMonthLiteral` (value) and
    `*expr.IntervalYearToMonthLiteral` (pointer), then asserts the resulting
    Datums are `compute.Equals`. Existing `parquet/file/...`,
    `parquet/pqarrow/...`, and `arrow/compute/exprs/...` test suites all
    continue to pass; full repo `go build ./...` is clean.
    
    The test deliberately uses `memory.DefaultAllocator` rather than
    `memory.NewCheckedAllocator` with `AssertSize(t, 0)`.
    `*scalar.Extension` does not implement `Release()` (see
    `arrow/scalar/scalar.go:397-466`), so an extension scalar's underlying
    storage is never released even when the wrapping Datum is — adding a
    checked-allocator assertion would chase a pre-existing leak that is
    unrelated to this issue. (Filing that as a separate follow-up issue is
    straightforward and out of scope here.)
    
    ### Are there any user-facing changes?
    
    Yes, but **purely additive**:
    
    - Callers passing an `expr.IntervalYearToMonthLiteral` (or pointer to
    one) to `literalToDatum`, `ExecuteScalarExpression`, or related entry
    points used to receive `arrow.ErrNotImplemented`; they now receive a
    properly-formed extension Datum identical to what the
    `*expr.ProtoLiteral` path produces.
    - No public API signatures change.
    - No existing call site behavior changes (the `*expr.ProtoLiteral` path
    produces a byte-identical Datum to before).
    
    ### Out of scope (separate follow-up)
    
    `*scalar.Extension` does not implement `Release()`, so extension scalars
    (including the ones produced by this code path, the existing
    `*types.IntervalYearToMonthType` path, the `*types.IntervalDayType`
    path, and `intervalDay()` / `intervalYear()` extension types in general)
    leak their storage through `compute.ScalarDatum.Release()`. Worth a
    separate issue.
---
 arrow/compute/exprs/exec.go               | 29 ++++++++++++-------
 arrow/compute/exprs/exec_internal_test.go | 48 +++++++++++++++++++++++++++++++
 2 files changed, 66 insertions(+), 11 deletions(-)

diff --git a/arrow/compute/exprs/exec.go b/arrow/compute/exprs/exec.go
index d2b6e207..8dcdb098 100644
--- a/arrow/compute/exprs/exec.go
+++ b/arrow/compute/exprs/exec.go
@@ -179,6 +179,18 @@ func GetExtensionIDSet(ctx context.Context) ExtensionIDSet 
{
        return v
 }
 
+func intervalYearToMonthDatum(mem memory.Allocator, years, months int32) 
(compute.Datum, error) {
+       bldr := array.NewInt32Builder(mem)
+       defer bldr.Release()
+
+       bldr.Append(years)
+       bldr.Append(months)
+       arr := bldr.NewArray()
+       defer arr.Release()
+       return &compute.ScalarDatum{Value: scalar.NewExtensionScalar(
+               scalar.NewFixedSizeListScalar(arr), intervalYear())}, nil
+}
+
 func literalToDatum(mem memory.Allocator, lit expr.Literal, ext 
ExtensionIDSet) (compute.Datum, error) {
        switch v := lit.(type) {
        case *expr.PrimitiveLiteral[bool]:
@@ -329,6 +341,10 @@ func literalToDatum(mem memory.Allocator, lit 
expr.Literal, ext ExtensionIDSet)
 
                s, err := scalar.NewStructScalarWithNames(fields, names)
                return compute.NewDatum(s), err
+       case expr.IntervalYearToMonthLiteral:
+               return intervalYearToMonthDatum(mem, v.Years, v.Months)
+       case *expr.IntervalYearToMonthLiteral:
+               return intervalYearToMonthDatum(mem, v.Years, v.Months)
        case *expr.ProtoLiteral:
                switch t := v.Type.(type) {
                case *types.DecimalType:
@@ -353,19 +369,10 @@ func literalToDatum(mem memory.Allocator, lit 
expr.Literal, ext ExtensionIDSet)
                                &arrow.Decimal128Type{Precision: t.Precision, 
Scale: t.Scale})), nil
                case *types.UserDefinedType: // not yet implemented
                case *types.IntervalYearToMonthType:
-                       bldr := array.NewInt32Builder(memory.DefaultAllocator)
-                       defer bldr.Release()
-
                        val := v.Value.(*types.IntervalYearToMonth)
-                       typ := intervalYear()
-                       bldr.Append(val.Years)
-                       bldr.Append(val.Months)
-                       arr := bldr.NewArray()
-                       defer arr.Release()
-                       return &compute.ScalarDatum{Value: 
scalar.NewExtensionScalar(
-                               scalar.NewFixedSizeListScalar(arr), typ)}, nil
+                       return intervalYearToMonthDatum(mem, val.Years, 
val.Months)
                case *types.IntervalDayType:
-                       bldr := array.NewInt32Builder(memory.DefaultAllocator)
+                       bldr := array.NewInt32Builder(mem)
                        defer bldr.Release()
 
                        val := v.Value.(*types.IntervalDayToSecond)
diff --git a/arrow/compute/exprs/exec_internal_test.go 
b/arrow/compute/exprs/exec_internal_test.go
index 199f82ad..347c2517 100644
--- a/arrow/compute/exprs/exec_internal_test.go
+++ b/arrow/compute/exprs/exec_internal_test.go
@@ -29,6 +29,9 @@ import (
        "github.com/apache/arrow-go/v18/arrow/memory"
        "github.com/stretchr/testify/assert"
        "github.com/stretchr/testify/require"
+       "github.com/substrait-io/substrait-go/v8/expr"
+       "github.com/substrait-io/substrait-go/v8/extensions"
+       "github.com/substrait-io/substrait-go/v8/types"
 )
 
 var (
@@ -112,3 +115,48 @@ func TestMakeExecBatch(t *testing.T) {
                })
        }
 }
+
+func TestLiteralToDatumIntervalYearToMonth(t *testing.T) {
+       // memory.NewCheckedAllocator with AssertSize would fail here:
+       // *scalar.Extension does not implement Release() (see
+       // arrow/scalar/scalar.go), so an extension scalar's underlying
+       // storage is never released even when the wrapping Datum is.
+       // This will be fixable once 
https://github.com/apache/arrow-go/issues/827 is addressed.
+       mem := memory.DefaultAllocator
+
+       extSet := NewExtensionSetDefault(
+               
expr.NewEmptyExtensionRegistry(extensions.GetDefaultCollectionWithNoError()))
+
+       const (
+               years  int32 = 3
+               months int32 = 7
+       )
+
+       protoLitType := types.NewIntervalYearToMonthType()
+       protoLit := &expr.ProtoLiteral{
+               Value: &types.IntervalYearToMonth{Years: years, Months: months},
+               Type:  &protoLitType,
+       }
+       expected, err := literalToDatum(mem, protoLit, extSet)
+       require.NoError(t, err, "ProtoLiteral baseline failed")
+       defer expected.Release()
+
+       cases := []struct {
+               name string
+               lit  expr.Literal
+       }{
+               {"value", expr.IntervalYearToMonthLiteral{Years: years, Months: 
months}},
+               {"pointer", &expr.IntervalYearToMonthLiteral{Years: years, 
Months: months}},
+       }
+
+       for _, tc := range cases {
+               t.Run(tc.name, func(t *testing.T) {
+                       got, err := literalToDatum(mem, tc.lit, extSet)
+                       require.NoError(t, err)
+                       defer got.Release()
+                       assert.Truef(t, got.Equals(expected),
+                               "IntervalYearToMonthLiteral (%s) datum did not 
match ProtoLiteral baseline\nexpected: %s\ngot: %s",
+                               tc.name, expected, got)
+               })
+       }
+}

Reply via email to