(iceberg-go) branch main updated: refactor(evaluators): shift evaluator code into the table package for future development (#123)

etudenhoefner Sun, 18 Aug 2024 03:21:54 -0700

This is an automated email from the ASF dual-hosted git repository.

etudenhoefner pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-go.git



The following commit(s) were added to refs/heads/main by this push:
     new 607a2fd  refactor(evaluators): shift evaluator code into the table 
package for future development (#123)
607a2fd is described below

commit 607a2fd1ecb3f66e5b578cd438907ddb31aa2460
Author: Matt Topol <[email protected]>
AuthorDate: Sun Aug 18 06:21:45 2024 -0400

    refactor(evaluators): shift evaluator code into the table package for 
future development (#123)
---
 table/evaluators.go      | 490 +++++++++++++++++++++++++++++++++++++++++++++
 table/evaluators_test.go | 505 +++++++++++++++++++++++++++++++++++++++++++++++
 visitors.go              | 469 +------------------------------------------
 visitors_test.go         | 478 --------------------------------------------
 4 files changed, 996 insertions(+), 946 deletions(-)

diff --git a/table/evaluators.go b/table/evaluators.go
new file mode 100644
index 0000000..343c8ef
--- /dev/null
+++ b/table/evaluators.go
@@ -0,0 +1,490 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package table
+
+import (
+       "github.com/apache/iceberg-go"
+       "github.com/google/uuid"
+)
+
+const (
+       rowsMightMatch, rowsMustMatch      = true, true
+       rowsCannotMatch, rowsMightNotMatch = false, false
+       inPredicateLimit                   = 200
+)
+
+// newManifestEvaluator returns a function that can be used to evaluate 
whether a particular
+// manifest file has rows that might or might not match a given partition 
filter by using
+// the stats provided in the partitions 
(UpperBound/LowerBound/ContainsNull/ContainsNaN).
+func newManifestEvaluator(spec iceberg.PartitionSpec, schema *iceberg.Schema, 
partitionFilter iceberg.BooleanExpression, caseSensitive bool) 
(func(iceberg.ManifestFile) (bool, error), error) {
+       partType := spec.PartitionType(schema)
+       partSchema := iceberg.NewSchema(0, partType.FieldList...)
+       filter, err := iceberg.RewriteNotExpr(partitionFilter)
+       if err != nil {
+               return nil, err
+       }
+
+       boundFilter, err := iceberg.BindExpr(partSchema, filter, caseSensitive)
+       if err != nil {
+               return nil, err
+       }
+
+       return (&manifestEvalVisitor{partitionFilter: boundFilter}).Eval, nil
+}
+
+type manifestEvalVisitor struct {
+       partitionFields []iceberg.FieldSummary
+       partitionFilter iceberg.BooleanExpression
+}
+
+func (m *manifestEvalVisitor) Eval(manifest iceberg.ManifestFile) (bool, 
error) {
+       if parts := manifest.Partitions(); len(parts) > 0 {
+               m.partitionFields = parts
+               return iceberg.VisitExpr(m.partitionFilter, m)
+       }
+
+       return rowsMightMatch, nil
+}
+
+func allBoundCmp[T iceberg.LiteralType](bound iceberg.Literal, set 
iceberg.Set[iceberg.Literal], want int) bool {
+       val := bound.(iceberg.TypedLiteral[T])
+       cmp := val.Comparator()
+
+       return set.All(func(e iceberg.Literal) bool {
+               return cmp(val.Value(), e.(iceberg.TypedLiteral[T]).Value()) == 
want
+       })
+}
+
+func allBoundCheck(bound iceberg.Literal, set iceberg.Set[iceberg.Literal], 
want int) bool {
+       switch bound.Type().(type) {
+       case iceberg.BooleanType:
+               return allBoundCmp[bool](bound, set, want)
+       case iceberg.Int32Type:
+               return allBoundCmp[int32](bound, set, want)
+       case iceberg.Int64Type:
+               return allBoundCmp[int64](bound, set, want)
+       case iceberg.Float32Type:
+               return allBoundCmp[float32](bound, set, want)
+       case iceberg.Float64Type:
+               return allBoundCmp[float64](bound, set, want)
+       case iceberg.DateType:
+               return allBoundCmp[iceberg.Date](bound, set, want)
+       case iceberg.TimeType:
+               return allBoundCmp[iceberg.Time](bound, set, want)
+       case iceberg.TimestampType, iceberg.TimestampTzType:
+               return allBoundCmp[iceberg.Timestamp](bound, set, want)
+       case iceberg.BinaryType, iceberg.FixedType:
+               return allBoundCmp[[]byte](bound, set, want)
+       case iceberg.StringType:
+               return allBoundCmp[string](bound, set, want)
+       case iceberg.UUIDType:
+               return allBoundCmp[uuid.UUID](bound, set, want)
+       case iceberg.DecimalType:
+               return allBoundCmp[iceberg.Decimal](bound, set, want)
+       }
+       panic(iceberg.ErrType)
+}
+
+func (m *manifestEvalVisitor) VisitIn(term iceberg.BoundTerm, literals 
iceberg.Set[iceberg.Literal]) bool {
+       pos := term.Ref().Pos()
+       field := m.partitionFields[pos]
+
+       if field.LowerBound == nil {
+               return rowsCannotMatch
+       }
+
+       if literals.Len() > inPredicateLimit {
+               return rowsMightMatch
+       }
+
+       lower, err := iceberg.LiteralFromBytes(term.Type(), *field.LowerBound)
+       if err != nil {
+               panic(err)
+       }
+
+       if allBoundCheck(lower, literals, 1) {
+               return rowsCannotMatch
+       }
+
+       if field.UpperBound != nil {
+               upper, err := iceberg.LiteralFromBytes(term.Type(), 
*field.UpperBound)
+               if err != nil {
+                       panic(err)
+               }
+
+               if allBoundCheck(upper, literals, -1) {
+                       return rowsCannotMatch
+               }
+       }
+
+       return rowsMightMatch
+}
+
+func (m *manifestEvalVisitor) VisitNotIn(term iceberg.BoundTerm, literals 
iceberg.Set[iceberg.Literal]) bool {
+       // because the bounds are not necessarily a min or max value, this 
cannot be answered using them
+       // notIn(col, {X, ...}) with (X, Y) doesn't guarantee that X is a value 
in col
+       return rowsMightMatch
+}
+
+func (m *manifestEvalVisitor) VisitIsNan(term iceberg.BoundTerm) bool {
+       pos := term.Ref().Pos()
+       field := m.partitionFields[pos]
+
+       if field.ContainsNaN != nil && !*field.ContainsNaN {
+               return rowsCannotMatch
+       }
+
+       return rowsMightMatch
+}
+
+func (m *manifestEvalVisitor) VisitNotNan(term iceberg.BoundTerm) bool {
+       pos := term.Ref().Pos()
+       field := m.partitionFields[pos]
+
+       if field.ContainsNaN != nil && *field.ContainsNaN && 
!field.ContainsNull && field.LowerBound == nil {
+               return rowsCannotMatch
+       }
+
+       return rowsMightMatch
+}
+
+func (m *manifestEvalVisitor) VisitIsNull(term iceberg.BoundTerm) bool {
+       pos := term.Ref().Pos()
+       field := m.partitionFields[pos]
+
+       if !field.ContainsNull {
+               return rowsCannotMatch
+       }
+
+       return rowsMightMatch
+}
+
+func (m *manifestEvalVisitor) VisitNotNull(term iceberg.BoundTerm) bool {
+       pos := term.Ref().Pos()
+       field := m.partitionFields[pos]
+
+       // ContainsNull encodes whether at least one partition value is null
+       // lowerBound is null if all partition values are null
+       allNull := field.ContainsNull && field.LowerBound == nil
+       if allNull && (term.Ref().Type().Equals(iceberg.PrimitiveTypes.Float32) 
|| term.Ref().Type().Equals(iceberg.PrimitiveTypes.Float64)) {
+               // floating point types may include NaN values, which we check 
separately
+               // in case bounds don't include NaN values, ContainsNaN needsz 
to be checked
+               allNull = field.ContainsNaN != nil && !*field.ContainsNaN
+       }
+
+       if allNull {
+               return rowsCannotMatch
+       }
+
+       return rowsMightMatch
+}
+
+func getCmp[T iceberg.LiteralType](b iceberg.TypedLiteral[T]) 
func(iceberg.Literal, iceberg.Literal) int {
+       cmp := b.Comparator()
+       return func(l1, l2 iceberg.Literal) int {
+               return cmp(l1.(iceberg.TypedLiteral[T]).Value(), 
l2.(iceberg.TypedLiteral[T]).Value())
+       }
+}
+
+func getCmpLiteral(boundary iceberg.Literal) func(iceberg.Literal, 
iceberg.Literal) int {
+       switch l := boundary.(type) {
+       case iceberg.TypedLiteral[bool]:
+               return getCmp(l)
+       case iceberg.TypedLiteral[int32]:
+               return getCmp(l)
+       case iceberg.TypedLiteral[int64]:
+               return getCmp(l)
+       case iceberg.TypedLiteral[float32]:
+               return getCmp(l)
+       case iceberg.TypedLiteral[float64]:
+               return getCmp(l)
+       case iceberg.TypedLiteral[iceberg.Date]:
+               return getCmp(l)
+       case iceberg.TypedLiteral[iceberg.Time]:
+               return getCmp(l)
+       case iceberg.TypedLiteral[iceberg.Timestamp]:
+               return getCmp(l)
+       case iceberg.TypedLiteral[[]byte]:
+               return getCmp(l)
+       case iceberg.TypedLiteral[string]:
+               return getCmp(l)
+       case iceberg.TypedLiteral[uuid.UUID]:
+               return getCmp(l)
+       case iceberg.TypedLiteral[iceberg.Decimal]:
+               return getCmp(l)
+       }
+       panic(iceberg.ErrType)
+}
+
+func (m *manifestEvalVisitor) VisitEqual(term iceberg.BoundTerm, lit 
iceberg.Literal) bool {
+       pos := term.Ref().Pos()
+       field := m.partitionFields[pos]
+
+       if field.LowerBound == nil || field.UpperBound == nil {
+               // values are all null and literal cannot contain null
+               return rowsCannotMatch
+       }
+
+       lower, err := iceberg.LiteralFromBytes(term.Ref().Type(), 
*field.LowerBound)
+       if err != nil {
+               panic(err)
+       }
+
+       cmp := getCmpLiteral(lower)
+       if cmp(lower, lit) == 1 {
+               return rowsCannotMatch
+       }
+
+       upper, err := iceberg.LiteralFromBytes(term.Ref().Type(), 
*field.UpperBound)
+       if err != nil {
+               panic(err)
+       }
+
+       if cmp(lit, upper) == 1 {
+               return rowsCannotMatch
+       }
+
+       return rowsMightMatch
+}
+
+func (m *manifestEvalVisitor) VisitNotEqual(term iceberg.BoundTerm, lit 
iceberg.Literal) bool {
+       // because bounds are not necessarily a min or max, this cannot be 
answered
+       // using them. notEq(col, X) with (X, Y) doesn't guarantee X is a value 
in col
+       return rowsMightMatch
+}
+
+func (m *manifestEvalVisitor) VisitGreaterEqual(term iceberg.BoundTerm, lit 
iceberg.Literal) bool {
+       pos := term.Ref().Pos()
+       field := m.partitionFields[pos]
+
+       if field.UpperBound == nil {
+               return rowsCannotMatch
+       }
+
+       upper, err := iceberg.LiteralFromBytes(term.Ref().Type(), 
*field.UpperBound)
+       if err != nil {
+               panic(err)
+       }
+
+       if getCmpLiteral(upper)(lit, upper) == 1 {
+               return rowsCannotMatch
+       }
+
+       return rowsMightMatch
+}
+
+func (m *manifestEvalVisitor) VisitGreater(term iceberg.BoundTerm, lit 
iceberg.Literal) bool {
+       pos := term.Ref().Pos()
+       field := m.partitionFields[pos]
+
+       if field.UpperBound == nil {
+               return rowsCannotMatch
+       }
+
+       upper, err := iceberg.LiteralFromBytes(term.Ref().Type(), 
*field.UpperBound)
+       if err != nil {
+               panic(err)
+       }
+
+       if getCmpLiteral(upper)(lit, upper) >= 0 {
+               return rowsCannotMatch
+       }
+
+       return rowsMightMatch
+}
+
+func (m *manifestEvalVisitor) VisitLessEqual(term iceberg.BoundTerm, lit 
iceberg.Literal) bool {
+       pos := term.Ref().Pos()
+       field := m.partitionFields[pos]
+
+       if field.LowerBound == nil {
+               return rowsCannotMatch
+       }
+
+       lower, err := iceberg.LiteralFromBytes(term.Ref().Type(), 
*field.LowerBound)
+       if err != nil {
+               panic(err)
+       }
+
+       if getCmpLiteral(lower)(lit, lower) == -1 {
+               return rowsCannotMatch
+       }
+
+       return rowsMightMatch
+}
+
+func (m *manifestEvalVisitor) VisitLess(term iceberg.BoundTerm, lit 
iceberg.Literal) bool {
+       pos := term.Ref().Pos()
+       field := m.partitionFields[pos]
+
+       if field.LowerBound == nil {
+               return rowsCannotMatch
+       }
+
+       lower, err := iceberg.LiteralFromBytes(term.Ref().Type(), 
*field.LowerBound)
+       if err != nil {
+               panic(err)
+       }
+
+       if getCmpLiteral(lower)(lit, lower) <= 0 {
+               return rowsCannotMatch
+       }
+
+       return rowsMightMatch
+}
+
+func (m *manifestEvalVisitor) VisitStartsWith(term iceberg.BoundTerm, lit 
iceberg.Literal) bool {
+       pos := term.Ref().Pos()
+       field := m.partitionFields[pos]
+
+       var prefix string
+       if val, ok := lit.(iceberg.TypedLiteral[string]); ok {
+               prefix = val.Value()
+       } else {
+               prefix = string(lit.(iceberg.TypedLiteral[[]byte]).Value())
+       }
+
+       lenPrefix := len(prefix)
+
+       if field.LowerBound == nil {
+               return rowsCannotMatch
+       }
+
+       lower, err := iceberg.LiteralFromBytes(term.Ref().Type(), 
*field.LowerBound)
+       if err != nil {
+               panic(err)
+       }
+
+       // truncate lower bound so that it's length is not greater than the 
length of prefix
+       var v string
+       switch l := lower.(type) {
+       case iceberg.TypedLiteral[string]:
+               v = l.Value()
+               if len(v) > lenPrefix {
+                       v = v[:lenPrefix]
+               }
+       case iceberg.TypedLiteral[[]byte]:
+               v = string(l.Value())
+               if len(v) > lenPrefix {
+                       v = v[:lenPrefix]
+               }
+       }
+
+       if v > prefix {
+               return rowsCannotMatch
+       }
+
+       if field.UpperBound == nil {
+               return rowsCannotMatch
+       }
+
+       upper, err := iceberg.LiteralFromBytes(term.Ref().Type(), 
*field.UpperBound)
+       if err != nil {
+               panic(err)
+       }
+
+       switch u := upper.(type) {
+       case iceberg.TypedLiteral[string]:
+               v = u.Value()
+               if len(v) > lenPrefix {
+                       v = v[:lenPrefix]
+               }
+       case iceberg.TypedLiteral[[]byte]:
+               v = string(u.Value())
+               if len(v) > lenPrefix {
+                       v = v[:lenPrefix]
+               }
+       }
+
+       if v < prefix {
+               return rowsCannotMatch
+       }
+
+       return rowsMightMatch
+}
+
+func (m *manifestEvalVisitor) VisitNotStartsWith(term iceberg.BoundTerm, lit 
iceberg.Literal) bool {
+       pos := term.Ref().Pos()
+       field := m.partitionFields[pos]
+
+       if field.ContainsNull || field.LowerBound == nil || field.UpperBound == 
nil {
+               return rowsMightMatch
+       }
+
+       // NotStartsWith will match unless ALL values must start with the 
prefix.
+       // this happens when the lower and upper bounds BOTH start with the 
prefix
+       lower, err := iceberg.LiteralFromBytes(term.Ref().Type(), 
*field.LowerBound)
+       if err != nil {
+               panic(err)
+       }
+
+       upper, err := iceberg.LiteralFromBytes(term.Ref().Type(), 
*field.UpperBound)
+       if err != nil {
+               panic(err)
+       }
+
+       var (
+               prefix, lowerBound, upperBound string
+       )
+       if val, ok := lit.(iceberg.TypedLiteral[string]); ok {
+               prefix = val.Value()
+               lowerBound, upperBound = 
lower.(iceberg.TypedLiteral[string]).Value(), 
upper.(iceberg.TypedLiteral[string]).Value()
+       } else {
+               prefix = string(lit.(iceberg.TypedLiteral[[]byte]).Value())
+               lowerBound = 
string(lower.(iceberg.TypedLiteral[[]byte]).Value())
+               upperBound = 
string(upper.(iceberg.TypedLiteral[[]byte]).Value())
+       }
+
+       lenPrefix := len(prefix)
+       if len(lowerBound) < lenPrefix {
+               return rowsMightMatch
+       }
+
+       if lowerBound[:lenPrefix] == prefix {
+               // if upper is shorter then upper can't start with the prefix
+               if len(upperBound) < lenPrefix {
+                       return rowsMightMatch
+               }
+
+               if upperBound[:lenPrefix] == prefix {
+                       return rowsCannotMatch
+               }
+       }
+
+       return rowsMightMatch
+}
+
+func (m *manifestEvalVisitor) VisitTrue() bool {
+       return rowsMightMatch
+}
+
+func (m *manifestEvalVisitor) VisitFalse() bool {
+       return rowsCannotMatch
+}
+
+func (m *manifestEvalVisitor) VisitUnbound(iceberg.UnboundPredicate) bool {
+       panic("need bound predicate")
+}
+
+func (m *manifestEvalVisitor) VisitBound(pred iceberg.BoundPredicate) bool {
+       return iceberg.VisitBoundPredicate(pred, m)
+}
+
+func (m *manifestEvalVisitor) VisitNot(child bool) bool       { return !child }
+func (m *manifestEvalVisitor) VisitAnd(left, right bool) bool { return left && 
right }
+func (m *manifestEvalVisitor) VisitOr(left, right bool) bool  { return left || 
right }
diff --git a/table/evaluators_test.go b/table/evaluators_test.go
new file mode 100644
index 0000000..b8c2671
--- /dev/null
+++ b/table/evaluators_test.go
@@ -0,0 +1,505 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package table
+
+import (
+       "testing"
+
+       "github.com/apache/iceberg-go"
+       "github.com/stretchr/testify/assert"
+       "github.com/stretchr/testify/require"
+)
+
+const (
+       IntMinValue, IntMaxValue int32 = 30, 79
+)
+
+func TestManifestEvaluator(t *testing.T) {
+
+       var (
+               IntMin, IntMax       = []byte{byte(IntMinValue), 0x00, 0x00, 
0x00}, []byte{byte(IntMaxValue), 0x00, 0x00, 0x00}
+               StringMin, StringMax = []byte("a"), []byte("z")
+               FloatMin, _          = iceberg.Float32Literal(0).MarshalBinary()
+               FloatMax, _          = 
iceberg.Float32Literal(20).MarshalBinary()
+               DblMin, _            = iceberg.Float64Literal(0).MarshalBinary()
+               DblMax, _            = 
iceberg.Float64Literal(20).MarshalBinary()
+               NanTrue, NanFalse    = true, false
+
+               testSchema = iceberg.NewSchema(1,
+                       iceberg.NestedField{ID: 1, Name: "id",
+                               Type: iceberg.PrimitiveTypes.Int32, Required: 
true},
+                       iceberg.NestedField{ID: 2, Name: 
"all_nulls_missing_nan",
+                               Type: iceberg.PrimitiveTypes.String, Required: 
false},
+                       iceberg.NestedField{ID: 3, Name: "some_nulls",
+                               Type: iceberg.PrimitiveTypes.String, Required: 
false},
+                       iceberg.NestedField{ID: 4, Name: "no_nulls",
+                               Type: iceberg.PrimitiveTypes.String, Required: 
false},
+                       iceberg.NestedField{ID: 5, Name: "float",
+                               Type: iceberg.PrimitiveTypes.Float32, Required: 
false},
+                       iceberg.NestedField{ID: 6, Name: "all_nulls_double",
+                               Type: iceberg.PrimitiveTypes.Float64, Required: 
false},
+                       iceberg.NestedField{ID: 7, Name: "all_nulls_no_nans",
+                               Type: iceberg.PrimitiveTypes.Float32, Required: 
false},
+                       iceberg.NestedField{ID: 8, Name: "all_nans",
+                               Type: iceberg.PrimitiveTypes.Float64, Required: 
false},
+                       iceberg.NestedField{ID: 9, Name: "both_nan_and_null",
+                               Type: iceberg.PrimitiveTypes.Float32, Required: 
false},
+                       iceberg.NestedField{ID: 10, Name: "no_nan_or_null",
+                               Type: iceberg.PrimitiveTypes.Float64, Required: 
false},
+                       iceberg.NestedField{ID: 11, Name: 
"all_nulls_missing_nan_float",
+                               Type: iceberg.PrimitiveTypes.Float32, Required: 
false},
+                       iceberg.NestedField{ID: 12, Name: 
"all_same_value_or_null",
+                               Type: iceberg.PrimitiveTypes.String, Required: 
false},
+                       iceberg.NestedField{ID: 13, Name: 
"no_nulls_same_value_a",
+                               Type: iceberg.PrimitiveTypes.Binary, Required: 
false},
+               )
+       )
+
+       partFields := make([]iceberg.PartitionField, 0, testSchema.NumFields())
+       for _, f := range testSchema.Fields() {
+               partFields = append(partFields, iceberg.PartitionField{
+                       Name:      f.Name,
+                       SourceID:  f.ID,
+                       FieldID:   f.ID,
+                       Transform: iceberg.IdentityTransform{},
+               })
+       }
+
+       spec := iceberg.NewPartitionSpec(partFields...)
+       manifestNoStats := iceberg.NewManifestV1Builder("", 0, 0, 0).Build()
+       manifest := iceberg.NewManifestV1Builder("", 0, 0, 0).Partitions(
+               []iceberg.FieldSummary{
+                       { // id
+                               ContainsNull: false,
+                               ContainsNaN:  nil,
+                               LowerBound:   &IntMin,
+                               UpperBound:   &IntMax,
+                       },
+                       { // all_nulls_missing_nan
+                               ContainsNull: true,
+                               ContainsNaN:  nil,
+                               LowerBound:   nil,
+                               UpperBound:   nil,
+                       },
+                       { // some_nulls
+                               ContainsNull: true,
+                               ContainsNaN:  nil,
+                               LowerBound:   &StringMin,
+                               UpperBound:   &StringMax,
+                       },
+                       { // no_nulls
+                               ContainsNull: false,
+                               ContainsNaN:  nil,
+                               LowerBound:   &StringMin,
+                               UpperBound:   &StringMax,
+                       },
+                       { // float
+                               ContainsNull: true,
+                               ContainsNaN:  nil,
+                               LowerBound:   &FloatMin,
+                               UpperBound:   &FloatMax,
+                       },
+                       { // all_nulls_double
+                               ContainsNull: true,
+                               ContainsNaN:  nil,
+                               LowerBound:   nil,
+                               UpperBound:   nil,
+                       },
+                       { // all_nulls_no_nans
+                               ContainsNull: true,
+                               ContainsNaN:  &NanFalse,
+                               LowerBound:   nil,
+                               UpperBound:   nil,
+                       },
+                       { // all_nans
+                               ContainsNull: false,
+                               ContainsNaN:  &NanTrue,
+                               LowerBound:   nil,
+                               UpperBound:   nil,
+                       },
+                       { // both_nan_and_null
+                               ContainsNull: true,
+                               ContainsNaN:  &NanTrue,
+                               LowerBound:   nil,
+                               UpperBound:   nil,
+                       },
+                       { // no_nan_or_null
+                               ContainsNull: false,
+                               ContainsNaN:  &NanFalse,
+                               LowerBound:   &DblMin,
+                               UpperBound:   &DblMax,
+                       },
+                       { // all_nulls_missing_nan_float
+                               ContainsNull: true,
+                               ContainsNaN:  nil,
+                               LowerBound:   nil,
+                               UpperBound:   nil,
+                       },
+                       { // all_same_value_or_null
+                               ContainsNull: true,
+                               ContainsNaN:  nil,
+                               LowerBound:   &StringMin,
+                               UpperBound:   &StringMin,
+                       },
+                       { // no_nulls_same_value_a
+                               ContainsNull: false,
+                               ContainsNaN:  nil,
+                               LowerBound:   &StringMin,
+                               UpperBound:   &StringMin,
+                       },
+               }).Build()
+
+       t.Run("all nulls", func(t *testing.T) {
+               tests := []struct {
+                       field    string
+                       expected bool
+                       msg      string
+               }{
+                       {"all_nulls_missing_nan", false, "should skip: all 
nulls column with non-floating type contains all null"},
+                       {"all_nulls_missing_nan_float", true, "should read: no 
NaN information may indicate presence of NaN value"},
+                       {"some_nulls", true, "should read: column with some 
nulls contains a non-null value"},
+                       {"no_nulls", true, "should read: non-null column 
contains a non-null value"},
+               }
+
+               for _, tt := range tests {
+                       eval, err := newManifestEvaluator(spec, testSchema,
+                               iceberg.NotNull(iceberg.Reference(tt.field)), 
true)
+                       require.NoError(t, err)
+
+                       result, err := eval(manifest)
+                       require.NoError(t, err)
+                       assert.Equal(t, tt.expected, result, tt.msg)
+               }
+       })
+
+       t.Run("no nulls", func(t *testing.T) {
+               tests := []struct {
+                       field    string
+                       expected bool
+                       msg      string
+               }{
+                       {"all_nulls_missing_nan", true, "should read: at least 
one null value in all null column"},
+                       {"some_nulls", true, "should read: column with some 
nulls contains a null value"},
+                       {"no_nulls", false, "should skip: non-null column 
contains no null values"},
+                       {"both_nan_and_null", true, "should read: 
both_nan_and_null column contains no null values"},
+               }
+
+               for _, tt := range tests {
+                       eval, err := newManifestEvaluator(spec, testSchema,
+                               iceberg.IsNull(iceberg.Reference(tt.field)), 
true)
+                       require.NoError(t, err)
+
+                       result, err := eval(manifest)
+                       require.NoError(t, err)
+                       assert.Equal(t, tt.expected, result, tt.msg)
+               }
+       })
+
+       t.Run("is nan", func(t *testing.T) {
+               tests := []struct {
+                       field    string
+                       expected bool
+                       msg      string
+               }{
+                       {"float", true, "should read: no information on if 
there are nan values in float column"},
+                       {"all_nulls_double", true, "should read: no NaN 
information may indicate presence of NaN value"},
+                       {"all_nulls_missing_nan_float", true, "should read: no 
NaN information may indicate presence of NaN value"},
+                       {"all_nulls_no_nans", false, "should skip: no nan 
column doesn't contain nan value"},
+                       {"all_nans", true, "should read: all_nans column 
contains nan value"},
+                       {"both_nan_and_null", true, "should read: 
both_nan_and_null column contains nan value"},
+                       {"no_nan_or_null", false, "should skip: no_nan_or_null 
column doesn't contain nan value"},
+               }
+
+               for _, tt := range tests {
+                       eval, err := newManifestEvaluator(spec, testSchema,
+                               iceberg.IsNaN(iceberg.Reference(tt.field)), 
true)
+                       require.NoError(t, err)
+
+                       result, err := eval(manifest)
+                       require.NoError(t, err)
+                       assert.Equal(t, tt.expected, result, tt.msg)
+               }
+       })
+
+       t.Run("not nan", func(t *testing.T) {
+               tests := []struct {
+                       field    string
+                       expected bool
+                       msg      string
+               }{
+                       {"float", true, "should read: no information on if 
there are nan values in float column"},
+                       {"all_nulls_double", true, "should read: all null 
column contains non nan value"},
+                       {"all_nulls_no_nans", true, "should read: no_nans 
column contains non nan value"},
+                       {"all_nans", false, "should skip: all nans 
columndoesn't contain non nan value"},
+                       {"both_nan_and_null", true, "should read: 
both_nan_and_null nans column contains non nan value"},
+                       {"no_nan_or_null", true, "should read: no_nan_or_null 
column contains non nan value"},
+               }
+
+               for _, tt := range tests {
+                       eval, err := newManifestEvaluator(spec, testSchema,
+                               iceberg.NotNaN(iceberg.Reference(tt.field)), 
true)
+                       require.NoError(t, err)
+
+                       result, err := eval(manifest)
+                       require.NoError(t, err)
+                       assert.Equal(t, tt.expected, result, tt.msg)
+               }
+       })
+
+       t.Run("test missing stats", func(t *testing.T) {
+               exprs := []iceberg.BooleanExpression{
+                       iceberg.LessThan(iceberg.Reference("id"), int32(5)),
+                       iceberg.LessThanEqual(iceberg.Reference("id"), 
int32(30)),
+                       iceberg.EqualTo(iceberg.Reference("id"), int32(70)),
+                       iceberg.GreaterThan(iceberg.Reference("id"), int32(78)),
+                       iceberg.GreaterThanEqual(iceberg.Reference("id"), 
int32(90)),
+                       iceberg.NotEqualTo(iceberg.Reference("id"), int32(101)),
+                       iceberg.IsNull(iceberg.Reference("id")),
+                       iceberg.NotNull(iceberg.Reference("id")),
+                       iceberg.IsNaN(iceberg.Reference("float")),
+                       iceberg.NotNaN(iceberg.Reference("float")),
+               }
+
+               for _, tt := range exprs {
+                       eval, err := newManifestEvaluator(spec, testSchema, tt, 
true)
+                       require.NoError(t, err)
+
+                       result, err := eval(manifestNoStats)
+                       require.NoError(t, err)
+                       assert.Truef(t, result, "should read when missing stats 
for expr: %s", tt)
+               }
+       })
+
+       t.Run("test exprs", func(t *testing.T) {
+               tests := []struct {
+                       expr   iceberg.BooleanExpression
+                       expect bool
+                       msg    string
+               }{
+                       
{iceberg.NewNot(iceberg.LessThan(iceberg.Reference("id"), 
int32(IntMinValue-25))),
+                               true, "should read: not(false)"},
+                       
{iceberg.NewNot(iceberg.GreaterThan(iceberg.Reference("id"), 
int32(IntMinValue-25))),
+                               false, "should skip: not(true)"},
+                       {iceberg.NewAnd(
+                               iceberg.LessThan(iceberg.Reference("id"), 
int32(IntMinValue-25)),
+                               
iceberg.GreaterThanEqual(iceberg.Reference("id"), int32(IntMinValue-30))),
+                               false, "should skip: and(false, true)"},
+                       {iceberg.NewAnd(
+                               iceberg.LessThan(iceberg.Reference("id"), 
int32(IntMinValue-25)),
+                               
iceberg.GreaterThanEqual(iceberg.Reference("id"), int32(IntMaxValue+1))),
+                               false, "should skip: and(false, false)"},
+                       {iceberg.NewAnd(
+                               iceberg.GreaterThan(iceberg.Reference("id"), 
int32(IntMinValue-25)),
+                               iceberg.LessThanEqual(iceberg.Reference("id"), 
int32(IntMinValue))),
+                               true, "should read: and(true, true)"},
+                       {iceberg.NewOr(
+                               iceberg.LessThan(iceberg.Reference("id"), 
int32(IntMinValue-25)),
+                               
iceberg.GreaterThanEqual(iceberg.Reference("id"), int32(IntMaxValue+1))),
+                               false, "should skip: or(false, false)"},
+                       {iceberg.NewOr(
+                               iceberg.LessThan(iceberg.Reference("id"), 
int32(IntMinValue-25)),
+                               
iceberg.GreaterThanEqual(iceberg.Reference("id"), int32(IntMaxValue-19))),
+                               true, "should read: or(false, true)"},
+                       {iceberg.LessThan(iceberg.Reference("some_nulls"), 
"1"), false,
+                               "should not read: id range below lower bound"},
+                       {iceberg.LessThan(iceberg.Reference("some_nulls"), 
"b"), true,
+                               "should read: lower bound in range"},
+                       {iceberg.LessThan(iceberg.Reference("float"), 15.50), 
true,
+                               "should read: lower bound in range"},
+                       {iceberg.LessThan(iceberg.Reference("no_nan_or_null"), 
15.50), true,
+                               "should read: lower bound in range"},
+                       
{iceberg.LessThanEqual(iceberg.Reference("no_nulls_same_value_a"), "a"), true,
+                               "should read: lower bound in range"},
+                       {iceberg.LessThan(iceberg.Reference("id"), 
int32(IntMinValue-25)), false,
+                               "should not read: id range below lower bound (5 
< 30)"},
+                       {iceberg.LessThan(iceberg.Reference("id"), 
int32(IntMinValue)), false,
+                               "should not read: id range below lower bound 
(30 is not < 30)"},
+                       {iceberg.LessThan(iceberg.Reference("id"), 
int32(IntMinValue+1)), true,
+                               "should read: one possible id"},
+                       {iceberg.LessThan(iceberg.Reference("id"), 
int32(IntMaxValue)), true,
+                               "should read: many possible ids"},
+                       {iceberg.LessThanEqual(iceberg.Reference("id"), 
int32(IntMinValue-25)), false,
+                               "should not read: id range below lower bound (5 
< 30)"},
+                       {iceberg.LessThanEqual(iceberg.Reference("id"), 
int32(IntMinValue-1)), false,
+                               "should not read: id range below lower bound 29 
< 30"},
+                       {iceberg.LessThanEqual(iceberg.Reference("id"), 
int32(IntMinValue)), true,
+                               "should read: one possible id"},
+                       {iceberg.LessThanEqual(iceberg.Reference("id"), 
int32(IntMaxValue)), true,
+                               "should read: many possible ids"},
+                       {iceberg.GreaterThan(iceberg.Reference("id"), 
int32(IntMaxValue+6)), false,
+                               "should not read: id range above upper bound 
(85 < 79)"},
+                       {iceberg.GreaterThan(iceberg.Reference("id"), 
int32(IntMaxValue)), false,
+                               "should not read: id range above upper bound 
(79 is not > 79)"},
+                       {iceberg.GreaterThan(iceberg.Reference("id"), 
int32(IntMaxValue-1)), true,
+                               "should read: one possible id"},
+                       {iceberg.GreaterThan(iceberg.Reference("id"), 
int32(IntMaxValue-4)), true,
+                               "should read: many possible ids"},
+                       {iceberg.GreaterThanEqual(iceberg.Reference("id"), 
int32(IntMaxValue+6)), false,
+                               "should not read: id range is above upper bound 
(85 < 79)"},
+                       {iceberg.GreaterThanEqual(iceberg.Reference("id"), 
int32(IntMaxValue+1)), false,
+                               "should not read: id range above upper bound 
(80 > 79)"},
+                       {iceberg.GreaterThanEqual(iceberg.Reference("id"), 
int32(IntMaxValue)), true,
+                               "should read: one possible id"},
+                       {iceberg.GreaterThanEqual(iceberg.Reference("id"), 
int32(IntMaxValue)), true,
+                               "should read: many possible ids"},
+                       {iceberg.EqualTo(iceberg.Reference("id"), 
int32(IntMinValue-25)), false,
+                               "should not read: id below lower bound"},
+                       {iceberg.EqualTo(iceberg.Reference("id"), 
int32(IntMinValue-1)), false,
+                               "should not read: id below lower bound"},
+                       {iceberg.EqualTo(iceberg.Reference("id"), 
int32(IntMinValue)), true,
+                               "should read: id equal to lower bound"},
+                       {iceberg.EqualTo(iceberg.Reference("id"), 
int32(IntMaxValue-4)), true,
+                               "should read: id between lower and upper 
bounds"},
+                       {iceberg.EqualTo(iceberg.Reference("id"), 
int32(IntMaxValue)), true,
+                               "should read: id equal to upper bound"},
+                       {iceberg.EqualTo(iceberg.Reference("id"), 
int32(IntMaxValue+1)), false,
+                               "should not read: id above upper bound"},
+                       {iceberg.EqualTo(iceberg.Reference("id"), 
int32(IntMaxValue+6)), false,
+                               "should not read: id above upper bound"},
+                       {iceberg.NotEqualTo(iceberg.Reference("id"), 
int32(IntMinValue-25)), true,
+                               "should read: id below lower bound"},
+                       {iceberg.NotEqualTo(iceberg.Reference("id"), 
int32(IntMinValue-1)), true,
+                               "should read: id below lower bound"},
+                       {iceberg.NotEqualTo(iceberg.Reference("id"), 
int32(IntMinValue)), true,
+                               "should read: id equal to lower bound"},
+                       {iceberg.NotEqualTo(iceberg.Reference("id"), 
int32(IntMaxValue-4)), true,
+                               "should read: id between lower and upper 
bounds"},
+                       {iceberg.NotEqualTo(iceberg.Reference("id"), 
int32(IntMaxValue)), true,
+                               "should read: id equal to upper bound"},
+                       {iceberg.NotEqualTo(iceberg.Reference("id"), 
int32(IntMaxValue+1)), true,
+                               "should read: id above upper bound"},
+                       {iceberg.NotEqualTo(iceberg.Reference("id"), 
int32(IntMaxValue+6)), true,
+                               "should read: id above upper bound"},
+                       
{iceberg.NewNot(iceberg.EqualTo(iceberg.Reference("id"), 
int32(IntMinValue-25))), true,
+                               "should read: id below lower bound"},
+                       
{iceberg.NewNot(iceberg.EqualTo(iceberg.Reference("id"), 
int32(IntMinValue-1))), true,
+                               "should read: id below lower bound"},
+                       
{iceberg.NewNot(iceberg.EqualTo(iceberg.Reference("id"), int32(IntMinValue))), 
true,
+                               "should read: id equal to lower bound"},
+                       
{iceberg.NewNot(iceberg.EqualTo(iceberg.Reference("id"), 
int32(IntMaxValue-4))), true,
+                               "should read: id between lower and upper 
bounds"},
+                       
{iceberg.NewNot(iceberg.EqualTo(iceberg.Reference("id"), int32(IntMaxValue))), 
true,
+                               "should read: id equal to upper bound"},
+                       
{iceberg.NewNot(iceberg.EqualTo(iceberg.Reference("id"), 
int32(IntMaxValue+1))), true,
+                               "should read: id above upper bound"},
+                       
{iceberg.NewNot(iceberg.EqualTo(iceberg.Reference("id"), 
int32(IntMaxValue+6))), true,
+                               "should read: id above upper bound"},
+                       {iceberg.IsIn(iceberg.Reference("id"), 
int32(IntMinValue-25), IntMinValue-24), false,
+                               "should not read: id below lower bound (5 < 30, 
6 < 30)"},
+                       {iceberg.IsIn(iceberg.Reference("id"), 
int32(IntMinValue-2), IntMinValue-1), false,
+                               "should not read: id below lower bound (28 < 
30, 29 < 30)"},
+                       {iceberg.IsIn(iceberg.Reference("id"), 
int32(IntMinValue-1), IntMinValue), true,
+                               "should read: id equal to lower bound (30 == 
30)"},
+                       {iceberg.IsIn(iceberg.Reference("id"), 
int32(IntMaxValue-4), IntMaxValue-3), true,
+                               "should read: id between lower and upper bounds 
(30 < 75 < 79, 30 < 76 < 79)"},
+                       {iceberg.IsIn(iceberg.Reference("id"), 
int32(IntMaxValue), IntMaxValue+1), true,
+                               "should read: id equal to upper bound (79 == 
79)"},
+                       {iceberg.IsIn(iceberg.Reference("id"), 
int32(IntMaxValue+1), IntMaxValue+2), false,
+                               "should not read: id above upper bound (80 > 
79, 81 > 79)"},
+                       {iceberg.IsIn(iceberg.Reference("id"), 
int32(IntMaxValue+6), IntMaxValue+7), false,
+                               "should not read: id above upper bound (85 > 
79, 86 > 79)"},
+                       
{iceberg.IsIn(iceberg.Reference("all_nulls_missing_nan"), "abc", "def"), false,
+                               "should skip: in on all nulls column"},
+                       {iceberg.IsIn(iceberg.Reference("some_nulls"), "abc", 
"def"), true,
+                               "should read: in on some nulls column"},
+                       {iceberg.IsIn(iceberg.Reference("no_nulls"), "abc", 
"def"), true,
+                               "should read: in on no nulls column"},
+                       
{iceberg.IsIn(iceberg.Reference("no_nulls_same_value_a"), "a", "b"), true,
+                               "should read: in on no nulls column"},
+                       {iceberg.IsIn(iceberg.Reference("float"), 0, -5.5), 
true,
+                               "should read: float equal to lower bound"},
+                       {iceberg.IsIn(iceberg.Reference("no_nan_or_null"), 0, 
-5.5), true,
+                               "should read: float equal to lower bound"},
+                       {iceberg.NotIn(iceberg.Reference("id"), 
int32(IntMinValue-25), IntMinValue-24), true,
+                               "should read: id below lower bound (5 < 30, 6 < 
30)"},
+                       {iceberg.NotIn(iceberg.Reference("id"), 
int32(IntMinValue-2), IntMinValue-1), true,
+                               "should read: id below lower bound (28 < 30, 29 
< 30)"},
+                       {iceberg.NotIn(iceberg.Reference("id"), 
int32(IntMinValue-1), IntMinValue), true,
+                               "should read: id equal to lower bound (30 == 
30)"},
+                       {iceberg.NotIn(iceberg.Reference("id"), 
int32(IntMaxValue-4), IntMaxValue-3), true,
+                               "should read: id between lower and upper bounds 
(30 < 75 < 79, 30 < 76 < 79)"},
+                       {iceberg.NotIn(iceberg.Reference("id"), 
int32(IntMaxValue), IntMaxValue+1), true,
+                               "should read: id equal to upper bound (79 == 
79)"},
+                       {iceberg.NotIn(iceberg.Reference("id"), 
int32(IntMaxValue+1), IntMaxValue+2), true,
+                               "should read: id above upper bound (80 > 79, 81 
> 79)"},
+                       {iceberg.NotIn(iceberg.Reference("id"), 
int32(IntMaxValue+6), IntMaxValue+7), true,
+                               "should read: id above upper bound (85 > 79, 86 
> 79)"},
+                       
{iceberg.NotIn(iceberg.Reference("all_nulls_missing_nan"), "abc", "def"), true,
+                               "should read: notIn on all nulls column"},
+                       {iceberg.NotIn(iceberg.Reference("some_nulls"), "abc", 
"def"), true,
+                               "should read: notIn on some nulls column"},
+                       {iceberg.NotIn(iceberg.Reference("no_nulls"), "abc", 
"def"), true,
+                               "should read: notIn on no nulls column"},
+                       {iceberg.StartsWith(iceberg.Reference("some_nulls"), 
"a"), true,
+                               "should read: range matches"},
+                       {iceberg.StartsWith(iceberg.Reference("some_nulls"), 
"aa"), true,
+                               "should read: range matches"},
+                       {iceberg.StartsWith(iceberg.Reference("some_nulls"), 
"dddd"), true,
+                               "should read: range matches"},
+                       {iceberg.StartsWith(iceberg.Reference("some_nulls"), 
"z"), true,
+                               "should read: range matches"},
+                       {iceberg.StartsWith(iceberg.Reference("no_nulls"), 
"a"), true,
+                               "should read: range matches"},
+                       {iceberg.StartsWith(iceberg.Reference("some_nulls"), 
"zzzz"), false,
+                               "should skip: range doesn't match"},
+                       {iceberg.StartsWith(iceberg.Reference("some_nulls"), 
"1"), false,
+                               "should skip: range doesn't match"},
+                       
{iceberg.StartsWith(iceberg.Reference("no_nulls_same_value_a"), "a"), true,
+                               "should read: all values start with the 
prefix"},
+                       {iceberg.NotStartsWith(iceberg.Reference("some_nulls"), 
"a"), true,
+                               "should read: range matches"},
+                       {iceberg.NotStartsWith(iceberg.Reference("some_nulls"), 
"aa"), true,
+                               "should read: range matches"},
+                       {iceberg.NotStartsWith(iceberg.Reference("some_nulls"), 
"dddd"), true,
+                               "should read: range matches"},
+                       {iceberg.NotStartsWith(iceberg.Reference("some_nulls"), 
"z"), true,
+                               "should read: range matches"},
+                       {iceberg.NotStartsWith(iceberg.Reference("no_nulls"), 
"a"), true,
+                               "should read: range matches"},
+                       {iceberg.NotStartsWith(iceberg.Reference("some_nulls"), 
"zzzz"), true,
+                               "should read: range matches"},
+                       {iceberg.NotStartsWith(iceberg.Reference("some_nulls"), 
"1"), true,
+                               "should read: range matches"},
+                       
{iceberg.NotStartsWith(iceberg.Reference("all_same_value_or_null"), "a"), true,
+                               "should read: range matches"},
+                       
{iceberg.NotStartsWith(iceberg.Reference("all_same_value_or_null"), "aa"), true,
+                               "should read: range matches"},
+                       
{iceberg.NotStartsWith(iceberg.Reference("all_same_value_or_null"), "A"), true,
+                               "should read: range matches"},
+                       // Iceberg does not implement SQL 3-way boolean logic, 
so the choice of an
+                       // all null column matching is by definition in order 
to surface more values
+                       // to the query engine to allow it to make its own 
decision
+                       
{iceberg.NotStartsWith(iceberg.Reference("all_nulls_missing_nan"), "A"), true,
+                               "should read: range matches"},
+                       
{iceberg.NotStartsWith(iceberg.Reference("no_nulls_same_value_a"), "a"), false,
+                               "should not read: all values start with the 
prefix"},
+               }
+
+               for _, tt := range tests {
+                       t.Run(tt.expr.String(), func(t *testing.T) {
+                               eval, err := newManifestEvaluator(spec, 
testSchema,
+                                       tt.expr, true)
+                               require.NoError(t, err)
+
+                               result, err := eval(manifest)
+                               require.NoError(t, err)
+                               assert.Equal(t, tt.expect, result, tt.msg)
+                       })
+               }
+       })
+}
diff --git a/visitors.go b/visitors.go
index 3428b2c..7525026 100644
--- a/visitors.go
+++ b/visitors.go
@@ -30,7 +30,7 @@ import (
 type BooleanExprVisitor[T any] interface {
        VisitTrue() T
        VisitFalse() T
-       VisitNot(childREsult T) T
+       VisitNot(childResult T) T
        VisitAnd(left, right T) T
        VisitOr(left, right T) T
        VisitUnbound(UnboundPredicate) T
@@ -395,470 +395,3 @@ func (rewriteNotVisitor) VisitUnbound(pred 
UnboundPredicate) BooleanExpression {
 func (rewriteNotVisitor) VisitBound(pred BoundPredicate) BooleanExpression {
        return pred
 }
-
-const (
-       rowsMightMatch, rowsMustMatch      = true, true
-       rowsCannotMatch, rowsMightNotMatch = false, false
-       inPredicateLimit                   = 200
-)
-
-// NewManifestEvaluator returns a function that can be used to evaluate 
whether a particular
-// manifest file has rows that might or might not match a given partition 
filter by using
-// the stats provided in the partitions 
(UpperBound/LowerBound/ContainsNull/ContainsNaN).
-func NewManifestEvaluator(spec PartitionSpec, schema *Schema, partitionFilter 
BooleanExpression, caseSensitive bool) (func(ManifestFile) (bool, error), 
error) {
-       partType := spec.PartitionType(schema)
-       partSchema := NewSchema(0, partType.FieldList...)
-       filter, err := RewriteNotExpr(partitionFilter)
-       if err != nil {
-               return nil, err
-       }
-
-       boundFilter, err := BindExpr(partSchema, filter, caseSensitive)
-       if err != nil {
-               return nil, err
-       }
-
-       return (&manifestEvalVisitor{partitionFilter: boundFilter}).Eval, nil
-}
-
-type manifestEvalVisitor struct {
-       partitionFields []FieldSummary
-       partitionFilter BooleanExpression
-}
-
-func (m *manifestEvalVisitor) Eval(manifest ManifestFile) (bool, error) {
-       if parts := manifest.Partitions(); len(parts) > 0 {
-               m.partitionFields = parts
-               return VisitExpr(m.partitionFilter, m)
-       }
-
-       return rowsMightMatch, nil
-}
-
-func allBoundCmp[T LiteralType](bound Literal, set Set[Literal], want int) 
bool {
-       val := bound.(TypedLiteral[T])
-       cmp := val.Comparator()
-
-       return set.All(func(e Literal) bool {
-               return cmp(val.Value(), e.(TypedLiteral[T]).Value()) == want
-       })
-}
-
-func allBoundCheck(bound Literal, set Set[Literal], want int) bool {
-       switch bound.Type().(type) {
-       case BooleanType:
-               return allBoundCmp[bool](bound, set, want)
-       case Int32Type:
-               return allBoundCmp[int32](bound, set, want)
-       case Int64Type:
-               return allBoundCmp[int64](bound, set, want)
-       case Float32Type:
-               return allBoundCmp[float32](bound, set, want)
-       case Float64Type:
-               return allBoundCmp[float64](bound, set, want)
-       case DateType:
-               return allBoundCmp[Date](bound, set, want)
-       case TimeType:
-               return allBoundCmp[Time](bound, set, want)
-       case TimestampType, TimestampTzType:
-               return allBoundCmp[Timestamp](bound, set, want)
-       case BinaryType, FixedType:
-               return allBoundCmp[[]byte](bound, set, want)
-       case StringType:
-               return allBoundCmp[string](bound, set, want)
-       case UUIDType:
-               return allBoundCmp[uuid.UUID](bound, set, want)
-       case DecimalType:
-               return allBoundCmp[Decimal](bound, set, want)
-       }
-       panic(ErrType)
-}
-
-func (m *manifestEvalVisitor) VisitIn(term BoundTerm, literals Set[Literal]) 
bool {
-       pos := term.Ref().Pos()
-       field := m.partitionFields[pos]
-
-       if field.LowerBound == nil {
-               return rowsCannotMatch
-       }
-
-       if literals.Len() > inPredicateLimit {
-               return rowsMightMatch
-       }
-
-       lower, err := LiteralFromBytes(term.Type(), *field.LowerBound)
-       if err != nil {
-               panic(err)
-       }
-
-       if allBoundCheck(lower, literals, 1) {
-               return rowsCannotMatch
-       }
-
-       if field.UpperBound != nil {
-               upper, err := LiteralFromBytes(term.Type(), *field.UpperBound)
-               if err != nil {
-                       panic(err)
-               }
-
-               if allBoundCheck(upper, literals, -1) {
-                       return rowsCannotMatch
-               }
-       }
-
-       return rowsMightMatch
-}
-
-func (m *manifestEvalVisitor) VisitNotIn(term BoundTerm, literals 
Set[Literal]) bool {
-       // because the bounds are not necessarily a min or max value, this 
cannot be answered using them
-       // notIn(col, {X, ...}) with (X, Y) doesn't guarantee that X is a value 
in col
-       return rowsMightMatch
-}
-
-func (m *manifestEvalVisitor) VisitIsNan(term BoundTerm) bool {
-       pos := term.Ref().Pos()
-       field := m.partitionFields[pos]
-
-       if field.ContainsNaN != nil && !*field.ContainsNaN {
-               return rowsCannotMatch
-       }
-
-       return rowsMightMatch
-}
-
-func (m *manifestEvalVisitor) VisitNotNan(term BoundTerm) bool {
-       pos := term.Ref().Pos()
-       field := m.partitionFields[pos]
-
-       if field.ContainsNaN != nil && *field.ContainsNaN && 
!field.ContainsNull && field.LowerBound == nil {
-               return rowsCannotMatch
-       }
-
-       return rowsMightMatch
-}
-
-func (m *manifestEvalVisitor) VisitIsNull(term BoundTerm) bool {
-       pos := term.Ref().Pos()
-       field := m.partitionFields[pos]
-
-       if !field.ContainsNull {
-               return rowsCannotMatch
-       }
-
-       return rowsMightMatch
-}
-
-func (m *manifestEvalVisitor) VisitNotNull(term BoundTerm) bool {
-       pos := term.Ref().Pos()
-       field := m.partitionFields[pos]
-
-       // ContainsNull encodes whether at least one partition value is null
-       // lowerBound is null if all partition values are null
-       allNull := field.ContainsNull && field.LowerBound == nil
-       if allNull && (term.Ref().Type().Equals(PrimitiveTypes.Float32) || 
term.Ref().Type().Equals(PrimitiveTypes.Float64)) {
-               // floating point types may include NaN values, which we check 
separately
-               // in case bounds don't include NaN values, ContainsNaN needsz 
to be checked
-               allNull = field.ContainsNaN != nil && !*field.ContainsNaN
-       }
-
-       if allNull {
-               return rowsCannotMatch
-       }
-
-       return rowsMightMatch
-}
-
-func getCmp[T LiteralType](b TypedLiteral[T]) func(Literal, Literal) int {
-       cmp := b.Comparator()
-       return func(l1, l2 Literal) int {
-               return cmp(l1.(TypedLiteral[T]).Value(), 
l2.(TypedLiteral[T]).Value())
-       }
-}
-
-func getCmpLiteral(boundary Literal) func(Literal, Literal) int {
-       switch l := boundary.(type) {
-       case TypedLiteral[bool]:
-               return getCmp(l)
-       case TypedLiteral[int32]:
-               return getCmp(l)
-       case TypedLiteral[int64]:
-               return getCmp(l)
-       case TypedLiteral[float32]:
-               return getCmp(l)
-       case TypedLiteral[float64]:
-               return getCmp(l)
-       case TypedLiteral[Date]:
-               return getCmp(l)
-       case TypedLiteral[Time]:
-               return getCmp(l)
-       case TypedLiteral[Timestamp]:
-               return getCmp(l)
-       case TypedLiteral[[]byte]:
-               return getCmp(l)
-       case TypedLiteral[string]:
-               return getCmp(l)
-       case TypedLiteral[uuid.UUID]:
-               return getCmp(l)
-       case TypedLiteral[Decimal]:
-               return getCmp(l)
-       }
-       panic(ErrType)
-}
-
-func (m *manifestEvalVisitor) VisitEqual(term BoundTerm, lit Literal) bool {
-       pos := term.Ref().Pos()
-       field := m.partitionFields[pos]
-
-       if field.LowerBound == nil || field.UpperBound == nil {
-               // values are all null and literal cannot contain null
-               return rowsCannotMatch
-       }
-
-       lower, err := LiteralFromBytes(term.Ref().Type(), *field.LowerBound)
-       if err != nil {
-               panic(err)
-       }
-
-       cmp := getCmpLiteral(lower)
-       if cmp(lower, lit) == 1 {
-               return rowsCannotMatch
-       }
-
-       upper, err := LiteralFromBytes(term.Ref().Type(), *field.UpperBound)
-       if err != nil {
-               panic(err)
-       }
-
-       if cmp(lit, upper) == 1 {
-               return rowsCannotMatch
-       }
-
-       return rowsMightMatch
-}
-
-func (m *manifestEvalVisitor) VisitNotEqual(term BoundTerm, lit Literal) bool {
-       // because bounds are not necessarily a min or max, this cannot be 
answered
-       // using them. notEq(col, X) with (X, Y) doesn't guarantee X is a value 
in col
-       return rowsMightMatch
-}
-
-func (m *manifestEvalVisitor) VisitGreaterEqual(term BoundTerm, lit Literal) 
bool {
-       pos := term.Ref().Pos()
-       field := m.partitionFields[pos]
-
-       if field.UpperBound == nil {
-               return rowsCannotMatch
-       }
-
-       upper, err := LiteralFromBytes(term.Ref().Type(), *field.UpperBound)
-       if err != nil {
-               panic(err)
-       }
-
-       if getCmpLiteral(upper)(lit, upper) == 1 {
-               return rowsCannotMatch
-       }
-
-       return rowsMightMatch
-}
-
-func (m *manifestEvalVisitor) VisitGreater(term BoundTerm, lit Literal) bool {
-       pos := term.Ref().Pos()
-       field := m.partitionFields[pos]
-
-       if field.UpperBound == nil {
-               return rowsCannotMatch
-       }
-
-       upper, err := LiteralFromBytes(term.Ref().Type(), *field.UpperBound)
-       if err != nil {
-               panic(err)
-       }
-
-       if getCmpLiteral(upper)(lit, upper) >= 0 {
-               return rowsCannotMatch
-       }
-
-       return rowsMightMatch
-}
-
-func (m *manifestEvalVisitor) VisitLessEqual(term BoundTerm, lit Literal) bool 
{
-       pos := term.Ref().Pos()
-       field := m.partitionFields[pos]
-
-       if field.LowerBound == nil {
-               return rowsCannotMatch
-       }
-
-       lower, err := LiteralFromBytes(term.Ref().Type(), *field.LowerBound)
-       if err != nil {
-               panic(err)
-       }
-
-       if getCmpLiteral(lower)(lit, lower) == -1 {
-               return rowsCannotMatch
-       }
-
-       return rowsMightMatch
-}
-
-func (m *manifestEvalVisitor) VisitLess(term BoundTerm, lit Literal) bool {
-       pos := term.Ref().Pos()
-       field := m.partitionFields[pos]
-
-       if field.LowerBound == nil {
-               return rowsCannotMatch
-       }
-
-       lower, err := LiteralFromBytes(term.Ref().Type(), *field.LowerBound)
-       if err != nil {
-               panic(err)
-       }
-
-       if getCmpLiteral(lower)(lit, lower) <= 0 {
-               return rowsCannotMatch
-       }
-
-       return rowsMightMatch
-}
-
-func (m *manifestEvalVisitor) VisitStartsWith(term BoundTerm, lit Literal) 
bool {
-       pos := term.Ref().Pos()
-       field := m.partitionFields[pos]
-
-       var prefix string
-       if val, ok := lit.(TypedLiteral[string]); ok {
-               prefix = val.Value()
-       } else {
-               prefix = string(lit.(TypedLiteral[[]byte]).Value())
-       }
-
-       lenPrefix := len(prefix)
-
-       if field.LowerBound == nil {
-               return rowsCannotMatch
-       }
-
-       lower, err := LiteralFromBytes(term.Ref().Type(), *field.LowerBound)
-       if err != nil {
-               panic(err)
-       }
-
-       // truncate lower bound so that it's length is not greater than the 
length of prefix
-       var v string
-       switch l := lower.(type) {
-       case TypedLiteral[string]:
-               v = l.Value()
-               if len(v) > lenPrefix {
-                       v = v[:lenPrefix]
-               }
-       case TypedLiteral[[]byte]:
-               v = string(l.Value())
-               if len(v) > lenPrefix {
-                       v = v[:lenPrefix]
-               }
-       }
-
-       if v > prefix {
-               return rowsCannotMatch
-       }
-
-       if field.UpperBound == nil {
-               return rowsCannotMatch
-       }
-
-       upper, err := LiteralFromBytes(term.Ref().Type(), *field.UpperBound)
-       if err != nil {
-               panic(err)
-       }
-
-       switch u := upper.(type) {
-       case TypedLiteral[string]:
-               v = u.Value()
-               if len(v) > lenPrefix {
-                       v = v[:lenPrefix]
-               }
-       case TypedLiteral[[]byte]:
-               v = string(u.Value())
-               if len(v) > lenPrefix {
-                       v = v[:lenPrefix]
-               }
-       }
-
-       if v < prefix {
-               return rowsCannotMatch
-       }
-
-       return rowsMightMatch
-}
-
-func (m *manifestEvalVisitor) VisitNotStartsWith(term BoundTerm, lit Literal) 
bool {
-       pos := term.Ref().Pos()
-       field := m.partitionFields[pos]
-
-       if field.ContainsNull || field.LowerBound == nil || field.UpperBound == 
nil {
-               return rowsMightMatch
-       }
-
-       // NotStartsWith will match unless ALL values must start with the 
prefix.
-       // this happens when the lower and upper bounds BOTH start with the 
prefix
-       lower, err := LiteralFromBytes(term.Ref().Type(), *field.LowerBound)
-       if err != nil {
-               panic(err)
-       }
-
-       upper, err := LiteralFromBytes(term.Ref().Type(), *field.UpperBound)
-       if err != nil {
-               panic(err)
-       }
-
-       var (
-               prefix, lowerBound, upperBound string
-       )
-       if val, ok := lit.(TypedLiteral[string]); ok {
-               prefix = val.Value()
-               lowerBound, upperBound = lower.(TypedLiteral[string]).Value(), 
upper.(TypedLiteral[string]).Value()
-       } else {
-               prefix = string(lit.(TypedLiteral[[]byte]).Value())
-               lowerBound = string(lower.(TypedLiteral[[]byte]).Value())
-               upperBound = string(upper.(TypedLiteral[[]byte]).Value())
-       }
-
-       lenPrefix := len(prefix)
-       if len(lowerBound) < lenPrefix {
-               return rowsMightMatch
-       }
-
-       if lowerBound[:lenPrefix] == prefix {
-               // if upper is shorter then upper can't start with the prefix
-               if len(upperBound) < lenPrefix {
-                       return rowsMightMatch
-               }
-
-               if upperBound[:lenPrefix] == prefix {
-                       return rowsCannotMatch
-               }
-       }
-
-       return rowsMightMatch
-}
-
-func (m *manifestEvalVisitor) VisitTrue() bool {
-       return rowsMightMatch
-}
-
-func (m *manifestEvalVisitor) VisitFalse() bool {
-       return rowsCannotMatch
-}
-
-func (m *manifestEvalVisitor) VisitUnbound(UnboundPredicate) bool {
-       panic("need bound predicate")
-}
-
-func (m *manifestEvalVisitor) VisitBound(pred BoundPredicate) bool {
-       return VisitBoundPredicate(pred, m)
-}
-
-func (m *manifestEvalVisitor) VisitNot(child bool) bool       { return !child }
-func (m *manifestEvalVisitor) VisitAnd(left, right bool) bool { return left && 
right }
-func (m *manifestEvalVisitor) VisitOr(left, right bool) bool  { return left || 
right }
diff --git a/visitors_test.go b/visitors_test.go
index 688c1cc..8b44236 100644
--- a/visitors_test.go
+++ b/visitors_test.go
@@ -576,484 +576,6 @@ func TestEvaluatorCmpTypes(t *testing.T) {
        }
 }
 
-func TestManifestEvaluator(t *testing.T) {
-       const (
-               IntMinValue, IntMaxValue = 30, 79
-       )
-
-       var (
-               IntMin, IntMax       = []byte{byte(IntMinValue), 0x00, 0x00, 
0x00}, []byte{byte(IntMaxValue), 0x00, 0x00, 0x00}
-               StringMin, StringMax = []byte("a"), []byte("z")
-               FloatMin, _          = iceberg.Float32Literal(0).MarshalBinary()
-               FloatMax, _          = 
iceberg.Float32Literal(20).MarshalBinary()
-               DblMin, _            = iceberg.Float64Literal(0).MarshalBinary()
-               DblMax, _            = 
iceberg.Float64Literal(20).MarshalBinary()
-               NanTrue, NanFalse    = true, false
-
-               testSchema = iceberg.NewSchema(1,
-                       iceberg.NestedField{ID: 1, Name: "id",
-                               Type: iceberg.PrimitiveTypes.Int32, Required: 
true},
-                       iceberg.NestedField{ID: 2, Name: 
"all_nulls_missing_nan",
-                               Type: iceberg.PrimitiveTypes.String, Required: 
false},
-                       iceberg.NestedField{ID: 3, Name: "some_nulls",
-                               Type: iceberg.PrimitiveTypes.String, Required: 
false},
-                       iceberg.NestedField{ID: 4, Name: "no_nulls",
-                               Type: iceberg.PrimitiveTypes.String, Required: 
false},
-                       iceberg.NestedField{ID: 5, Name: "float",
-                               Type: iceberg.PrimitiveTypes.Float32, Required: 
false},
-                       iceberg.NestedField{ID: 6, Name: "all_nulls_double",
-                               Type: iceberg.PrimitiveTypes.Float64, Required: 
false},
-                       iceberg.NestedField{ID: 7, Name: "all_nulls_no_nans",
-                               Type: iceberg.PrimitiveTypes.Float32, Required: 
false},
-                       iceberg.NestedField{ID: 8, Name: "all_nans",
-                               Type: iceberg.PrimitiveTypes.Float64, Required: 
false},
-                       iceberg.NestedField{ID: 9, Name: "both_nan_and_null",
-                               Type: iceberg.PrimitiveTypes.Float32, Required: 
false},
-                       iceberg.NestedField{ID: 10, Name: "no_nan_or_null",
-                               Type: iceberg.PrimitiveTypes.Float64, Required: 
false},
-                       iceberg.NestedField{ID: 11, Name: 
"all_nulls_missing_nan_float",
-                               Type: iceberg.PrimitiveTypes.Float32, Required: 
false},
-                       iceberg.NestedField{ID: 12, Name: 
"all_same_value_or_null",
-                               Type: iceberg.PrimitiveTypes.String, Required: 
false},
-                       iceberg.NestedField{ID: 13, Name: 
"no_nulls_same_value_a",
-                               Type: iceberg.PrimitiveTypes.Binary, Required: 
false},
-               )
-       )
-
-       partFields := make([]iceberg.PartitionField, 0, testSchema.NumFields())
-       for _, f := range testSchema.Fields() {
-               partFields = append(partFields, iceberg.PartitionField{
-                       Name:      f.Name,
-                       SourceID:  f.ID,
-                       FieldID:   f.ID,
-                       Transform: iceberg.IdentityTransform{},
-               })
-       }
-
-       spec := iceberg.NewPartitionSpec(partFields...)
-       manifestNoStats := iceberg.NewManifestV1Builder("", 0, 0, 0).Build()
-       manifest := iceberg.NewManifestV1Builder("", 0, 0, 0).Partitions(
-               []iceberg.FieldSummary{
-                       { // id
-                               ContainsNull: false,
-                               ContainsNaN:  nil,
-                               LowerBound:   &IntMin,
-                               UpperBound:   &IntMax,
-                       },
-                       { // all_nulls_missing_nan
-                               ContainsNull: true,
-                               ContainsNaN:  nil,
-                               LowerBound:   nil,
-                               UpperBound:   nil,
-                       },
-                       { // some_nulls
-                               ContainsNull: true,
-                               ContainsNaN:  nil,
-                               LowerBound:   &StringMin,
-                               UpperBound:   &StringMax,
-                       },
-                       { // no_nulls
-                               ContainsNull: false,
-                               ContainsNaN:  nil,
-                               LowerBound:   &StringMin,
-                               UpperBound:   &StringMax,
-                       },
-                       { // float
-                               ContainsNull: true,
-                               ContainsNaN:  nil,
-                               LowerBound:   &FloatMin,
-                               UpperBound:   &FloatMax,
-                       },
-                       { // all_nulls_double
-                               ContainsNull: true,
-                               ContainsNaN:  nil,
-                               LowerBound:   nil,
-                               UpperBound:   nil,
-                       },
-                       { // all_nulls_no_nans
-                               ContainsNull: true,
-                               ContainsNaN:  &NanFalse,
-                               LowerBound:   nil,
-                               UpperBound:   nil,
-                       },
-                       { // all_nans
-                               ContainsNull: false,
-                               ContainsNaN:  &NanTrue,
-                               LowerBound:   nil,
-                               UpperBound:   nil,
-                       },
-                       { // both_nan_and_null
-                               ContainsNull: true,
-                               ContainsNaN:  &NanTrue,
-                               LowerBound:   nil,
-                               UpperBound:   nil,
-                       },
-                       { // no_nan_or_null
-                               ContainsNull: false,
-                               ContainsNaN:  &NanFalse,
-                               LowerBound:   &DblMin,
-                               UpperBound:   &DblMax,
-                       },
-                       { // all_nulls_missing_nan_float
-                               ContainsNull: true,
-                               ContainsNaN:  nil,
-                               LowerBound:   nil,
-                               UpperBound:   nil,
-                       },
-                       { // all_same_value_or_null
-                               ContainsNull: true,
-                               ContainsNaN:  nil,
-                               LowerBound:   &StringMin,
-                               UpperBound:   &StringMin,
-                       },
-                       { // no_nulls_same_value_a
-                               ContainsNull: false,
-                               ContainsNaN:  nil,
-                               LowerBound:   &StringMin,
-                               UpperBound:   &StringMin,
-                       },
-               }).Build()
-
-       t.Run("all nulls", func(t *testing.T) {
-               tests := []struct {
-                       field    string
-                       expected bool
-                       msg      string
-               }{
-                       {"all_nulls_missing_nan", false, "should skip: all 
nulls column with non-floating type contains all null"},
-                       {"all_nulls_missing_nan_float", true, "should read: no 
NaN information may indicate presence of NaN value"},
-                       {"some_nulls", true, "should read: column with some 
nulls contains a non-null value"},
-                       {"no_nulls", true, "should read: non-null column 
contains a non-null value"},
-               }
-
-               for _, tt := range tests {
-                       eval, err := iceberg.NewManifestEvaluator(spec, 
testSchema,
-                               iceberg.NotNull(iceberg.Reference(tt.field)), 
true)
-                       require.NoError(t, err)
-
-                       result, err := eval(manifest)
-                       require.NoError(t, err)
-                       assert.Equal(t, tt.expected, result, tt.msg)
-               }
-       })
-
-       t.Run("no nulls", func(t *testing.T) {
-               tests := []struct {
-                       field    string
-                       expected bool
-                       msg      string
-               }{
-                       {"all_nulls_missing_nan", true, "should read: at least 
one null value in all null column"},
-                       {"some_nulls", true, "should read: column with some 
nulls contains a null value"},
-                       {"no_nulls", false, "should skip: non-null column 
contains no null values"},
-                       {"both_nan_and_null", true, "should read: 
both_nan_and_null column contains no null values"},
-               }
-
-               for _, tt := range tests {
-                       eval, err := iceberg.NewManifestEvaluator(spec, 
testSchema,
-                               iceberg.IsNull(iceberg.Reference(tt.field)), 
true)
-                       require.NoError(t, err)
-
-                       result, err := eval(manifest)
-                       require.NoError(t, err)
-                       assert.Equal(t, tt.expected, result, tt.msg)
-               }
-       })
-
-       t.Run("is nan", func(t *testing.T) {
-               tests := []struct {
-                       field    string
-                       expected bool
-                       msg      string
-               }{
-                       {"float", true, "should read: no information on if 
there are nan values in float column"},
-                       {"all_nulls_double", true, "should read: no NaN 
information may indicate presence of NaN value"},
-                       {"all_nulls_missing_nan_float", true, "should read: no 
NaN information may indicate presence of NaN value"},
-                       {"all_nulls_no_nans", false, "should skip: no nan 
column doesn't contain nan value"},
-                       {"all_nans", true, "should read: all_nans column 
contains nan value"},
-                       {"both_nan_and_null", true, "should read: 
both_nan_and_null column contains nan value"},
-                       {"no_nan_or_null", false, "should skip: no_nan_or_null 
column doesn't contain nan value"},
-               }
-
-               for _, tt := range tests {
-                       eval, err := iceberg.NewManifestEvaluator(spec, 
testSchema,
-                               iceberg.IsNaN(iceberg.Reference(tt.field)), 
true)
-                       require.NoError(t, err)
-
-                       result, err := eval(manifest)
-                       require.NoError(t, err)
-                       assert.Equal(t, tt.expected, result, tt.msg)
-               }
-       })
-
-       t.Run("not nan", func(t *testing.T) {
-               tests := []struct {
-                       field    string
-                       expected bool
-                       msg      string
-               }{
-                       {"float", true, "should read: no information on if 
there are nan values in float column"},
-                       {"all_nulls_double", true, "should read: all null 
column contains non nan value"},
-                       {"all_nulls_no_nans", true, "should read: no_nans 
column contains non nan value"},
-                       {"all_nans", false, "should skip: all nans 
columndoesn't contain non nan value"},
-                       {"both_nan_and_null", true, "should read: 
both_nan_and_null nans column contains non nan value"},
-                       {"no_nan_or_null", true, "should read: no_nan_or_null 
column contains non nan value"},
-               }
-
-               for _, tt := range tests {
-                       eval, err := iceberg.NewManifestEvaluator(spec, 
testSchema,
-                               iceberg.NotNaN(iceberg.Reference(tt.field)), 
true)
-                       require.NoError(t, err)
-
-                       result, err := eval(manifest)
-                       require.NoError(t, err)
-                       assert.Equal(t, tt.expected, result, tt.msg)
-               }
-       })
-
-       t.Run("test missing stats", func(t *testing.T) {
-               exprs := []iceberg.BooleanExpression{
-                       iceberg.LessThan(iceberg.Reference("id"), int32(5)),
-                       iceberg.LessThanEqual(iceberg.Reference("id"), 
int32(30)),
-                       iceberg.EqualTo(iceberg.Reference("id"), int32(70)),
-                       iceberg.GreaterThan(iceberg.Reference("id"), int32(78)),
-                       iceberg.GreaterThanEqual(iceberg.Reference("id"), 
int32(90)),
-                       iceberg.NotEqualTo(iceberg.Reference("id"), int32(101)),
-                       iceberg.IsNull(iceberg.Reference("id")),
-                       iceberg.NotNull(iceberg.Reference("id")),
-                       iceberg.IsNaN(iceberg.Reference("float")),
-                       iceberg.NotNaN(iceberg.Reference("float")),
-               }
-
-               for _, tt := range exprs {
-                       eval, err := iceberg.NewManifestEvaluator(spec, 
testSchema, tt, true)
-                       require.NoError(t, err)
-
-                       result, err := eval(manifestNoStats)
-                       require.NoError(t, err)
-                       assert.Truef(t, result, "should read when missing stats 
for expr: %s", tt)
-               }
-       })
-
-       t.Run("test exprs", func(t *testing.T) {
-               tests := []struct {
-                       expr   iceberg.BooleanExpression
-                       expect bool
-                       msg    string
-               }{
-                       
{iceberg.NewNot(iceberg.LessThan(iceberg.Reference("id"), 
int32(IntMinValue-25))),
-                               true, "should read: not(false)"},
-                       
{iceberg.NewNot(iceberg.GreaterThan(iceberg.Reference("id"), 
int32(IntMinValue-25))),
-                               false, "should skip: not(true)"},
-                       {iceberg.NewAnd(
-                               iceberg.LessThan(iceberg.Reference("id"), 
int32(IntMinValue-25)),
-                               
iceberg.GreaterThanEqual(iceberg.Reference("id"), int32(IntMinValue-30))),
-                               false, "should skip: and(false, true)"},
-                       {iceberg.NewAnd(
-                               iceberg.LessThan(iceberg.Reference("id"), 
int32(IntMinValue-25)),
-                               
iceberg.GreaterThanEqual(iceberg.Reference("id"), int32(IntMaxValue+1))),
-                               false, "should skip: and(false, false)"},
-                       {iceberg.NewAnd(
-                               iceberg.GreaterThan(iceberg.Reference("id"), 
int32(IntMinValue-25)),
-                               iceberg.LessThanEqual(iceberg.Reference("id"), 
int32(IntMinValue))),
-                               true, "should read: and(true, true)"},
-                       {iceberg.NewOr(
-                               iceberg.LessThan(iceberg.Reference("id"), 
int32(IntMinValue-25)),
-                               
iceberg.GreaterThanEqual(iceberg.Reference("id"), int32(IntMaxValue+1))),
-                               false, "should skip: or(false, false)"},
-                       {iceberg.NewOr(
-                               iceberg.LessThan(iceberg.Reference("id"), 
int32(IntMinValue-25)),
-                               
iceberg.GreaterThanEqual(iceberg.Reference("id"), int32(IntMaxValue-19))),
-                               true, "should read: or(false, true)"},
-                       {iceberg.LessThan(iceberg.Reference("some_nulls"), 
"1"), false,
-                               "should not read: id range below lower bound"},
-                       {iceberg.LessThan(iceberg.Reference("some_nulls"), 
"b"), true,
-                               "should read: lower bound in range"},
-                       {iceberg.LessThan(iceberg.Reference("float"), 15.50), 
true,
-                               "should read: lower bound in range"},
-                       {iceberg.LessThan(iceberg.Reference("no_nan_or_null"), 
15.50), true,
-                               "should read: lower bound in range"},
-                       
{iceberg.LessThanEqual(iceberg.Reference("no_nulls_same_value_a"), "a"), true,
-                               "should read: lower bound in range"},
-                       {iceberg.LessThan(iceberg.Reference("id"), 
int32(IntMinValue-25)), false,
-                               "should not read: id range below lower bound (5 
< 30)"},
-                       {iceberg.LessThan(iceberg.Reference("id"), 
int32(IntMinValue)), false,
-                               "should not read: id range below lower bound 
(30 is not < 30)"},
-                       {iceberg.LessThan(iceberg.Reference("id"), 
int32(IntMinValue+1)), true,
-                               "should read: one possible id"},
-                       {iceberg.LessThan(iceberg.Reference("id"), 
int32(IntMaxValue)), true,
-                               "should read: many possible ids"},
-                       {iceberg.LessThanEqual(iceberg.Reference("id"), 
int32(IntMinValue-25)), false,
-                               "should not read: id range below lower bound (5 
< 30)"},
-                       {iceberg.LessThanEqual(iceberg.Reference("id"), 
int32(IntMinValue-1)), false,
-                               "should not read: id range below lower bound 29 
< 30"},
-                       {iceberg.LessThanEqual(iceberg.Reference("id"), 
int32(IntMinValue)), true,
-                               "should read: one possible id"},
-                       {iceberg.LessThanEqual(iceberg.Reference("id"), 
int32(IntMaxValue)), true,
-                               "should read: many possible ids"},
-                       {iceberg.GreaterThan(iceberg.Reference("id"), 
int32(IntMaxValue+6)), false,
-                               "should not read: id range above upper bound 
(85 < 79)"},
-                       {iceberg.GreaterThan(iceberg.Reference("id"), 
int32(IntMaxValue)), false,
-                               "should not read: id range above upper bound 
(79 is not > 79)"},
-                       {iceberg.GreaterThan(iceberg.Reference("id"), 
int32(IntMaxValue-1)), true,
-                               "should read: one possible id"},
-                       {iceberg.GreaterThan(iceberg.Reference("id"), 
int32(IntMaxValue-4)), true,
-                               "should read: many possible ids"},
-                       {iceberg.GreaterThanEqual(iceberg.Reference("id"), 
int32(IntMaxValue+6)), false,
-                               "should not read: id range is above upper bound 
(85 < 79)"},
-                       {iceberg.GreaterThanEqual(iceberg.Reference("id"), 
int32(IntMaxValue+1)), false,
-                               "should not read: id range above upper bound 
(80 > 79)"},
-                       {iceberg.GreaterThanEqual(iceberg.Reference("id"), 
int32(IntMaxValue)), true,
-                               "should read: one possible id"},
-                       {iceberg.GreaterThanEqual(iceberg.Reference("id"), 
int32(IntMaxValue)), true,
-                               "should read: many possible ids"},
-                       {iceberg.EqualTo(iceberg.Reference("id"), 
int32(IntMinValue-25)), false,
-                               "should not read: id below lower bound"},
-                       {iceberg.EqualTo(iceberg.Reference("id"), 
int32(IntMinValue-1)), false,
-                               "should not read: id below lower bound"},
-                       {iceberg.EqualTo(iceberg.Reference("id"), 
int32(IntMinValue)), true,
-                               "should read: id equal to lower bound"},
-                       {iceberg.EqualTo(iceberg.Reference("id"), 
int32(IntMaxValue-4)), true,
-                               "should read: id between lower and upper 
bounds"},
-                       {iceberg.EqualTo(iceberg.Reference("id"), 
int32(IntMaxValue)), true,
-                               "should read: id equal to upper bound"},
-                       {iceberg.EqualTo(iceberg.Reference("id"), 
int32(IntMaxValue+1)), false,
-                               "should not read: id above upper bound"},
-                       {iceberg.EqualTo(iceberg.Reference("id"), 
int32(IntMaxValue+6)), false,
-                               "should not read: id above upper bound"},
-                       {iceberg.NotEqualTo(iceberg.Reference("id"), 
int32(IntMinValue-25)), true,
-                               "should read: id below lower bound"},
-                       {iceberg.NotEqualTo(iceberg.Reference("id"), 
int32(IntMinValue-1)), true,
-                               "should read: id below lower bound"},
-                       {iceberg.NotEqualTo(iceberg.Reference("id"), 
int32(IntMinValue)), true,
-                               "should read: id equal to lower bound"},
-                       {iceberg.NotEqualTo(iceberg.Reference("id"), 
int32(IntMaxValue-4)), true,
-                               "should read: id between lower and upper 
bounds"},
-                       {iceberg.NotEqualTo(iceberg.Reference("id"), 
int32(IntMaxValue)), true,
-                               "should read: id equal to upper bound"},
-                       {iceberg.NotEqualTo(iceberg.Reference("id"), 
int32(IntMaxValue+1)), true,
-                               "should read: id above upper bound"},
-                       {iceberg.NotEqualTo(iceberg.Reference("id"), 
int32(IntMaxValue+6)), true,
-                               "should read: id above upper bound"},
-                       
{iceberg.NewNot(iceberg.EqualTo(iceberg.Reference("id"), 
int32(IntMinValue-25))), true,
-                               "should read: id below lower bound"},
-                       
{iceberg.NewNot(iceberg.EqualTo(iceberg.Reference("id"), 
int32(IntMinValue-1))), true,
-                               "should read: id below lower bound"},
-                       
{iceberg.NewNot(iceberg.EqualTo(iceberg.Reference("id"), int32(IntMinValue))), 
true,
-                               "should read: id equal to lower bound"},
-                       
{iceberg.NewNot(iceberg.EqualTo(iceberg.Reference("id"), 
int32(IntMaxValue-4))), true,
-                               "should read: id between lower and upper 
bounds"},
-                       
{iceberg.NewNot(iceberg.EqualTo(iceberg.Reference("id"), int32(IntMaxValue))), 
true,
-                               "should read: id equal to upper bound"},
-                       
{iceberg.NewNot(iceberg.EqualTo(iceberg.Reference("id"), 
int32(IntMaxValue+1))), true,
-                               "should read: id above upper bound"},
-                       
{iceberg.NewNot(iceberg.EqualTo(iceberg.Reference("id"), 
int32(IntMaxValue+6))), true,
-                               "should read: id above upper bound"},
-                       {iceberg.IsIn(iceberg.Reference("id"), 
int32(IntMinValue-25), IntMinValue-24), false,
-                               "should not read: id below lower bound (5 < 30, 
6 < 30)"},
-                       {iceberg.IsIn(iceberg.Reference("id"), 
int32(IntMinValue-2), IntMinValue-1), false,
-                               "should not read: id below lower bound (28 < 
30, 29 < 30)"},
-                       {iceberg.IsIn(iceberg.Reference("id"), 
int32(IntMinValue-1), IntMinValue), true,
-                               "should read: id equal to lower bound (30 == 
30)"},
-                       {iceberg.IsIn(iceberg.Reference("id"), 
int32(IntMaxValue-4), IntMaxValue-3), true,
-                               "should read: id between lower and upper bounds 
(30 < 75 < 79, 30 < 76 < 79)"},
-                       {iceberg.IsIn(iceberg.Reference("id"), 
int32(IntMaxValue), IntMaxValue+1), true,
-                               "should read: id equal to upper bound (79 == 
79)"},
-                       {iceberg.IsIn(iceberg.Reference("id"), 
int32(IntMaxValue+1), IntMaxValue+2), false,
-                               "should not read: id above upper bound (80 > 
79, 81 > 79)"},
-                       {iceberg.IsIn(iceberg.Reference("id"), 
int32(IntMaxValue+6), IntMaxValue+7), false,
-                               "should not read: id above upper bound (85 > 
79, 86 > 79)"},
-                       
{iceberg.IsIn(iceberg.Reference("all_nulls_missing_nan"), "abc", "def"), false,
-                               "should skip: in on all nulls column"},
-                       {iceberg.IsIn(iceberg.Reference("some_nulls"), "abc", 
"def"), true,
-                               "should read: in on some nulls column"},
-                       {iceberg.IsIn(iceberg.Reference("no_nulls"), "abc", 
"def"), true,
-                               "should read: in on no nulls column"},
-                       
{iceberg.IsIn(iceberg.Reference("no_nulls_same_value_a"), "a", "b"), true,
-                               "should read: in on no nulls column"},
-                       {iceberg.IsIn(iceberg.Reference("float"), 0, -5.5), 
true,
-                               "should read: float equal to lower bound"},
-                       {iceberg.IsIn(iceberg.Reference("no_nan_or_null"), 0, 
-5.5), true,
-                               "should read: float equal to lower bound"},
-                       {iceberg.NotIn(iceberg.Reference("id"), 
int32(IntMinValue-25), IntMinValue-24), true,
-                               "should read: id below lower bound (5 < 30, 6 < 
30)"},
-                       {iceberg.NotIn(iceberg.Reference("id"), 
int32(IntMinValue-2), IntMinValue-1), true,
-                               "should read: id below lower bound (28 < 30, 29 
< 30)"},
-                       {iceberg.NotIn(iceberg.Reference("id"), 
int32(IntMinValue-1), IntMinValue), true,
-                               "should read: id equal to lower bound (30 == 
30)"},
-                       {iceberg.NotIn(iceberg.Reference("id"), 
int32(IntMaxValue-4), IntMaxValue-3), true,
-                               "should read: id between lower and upper bounds 
(30 < 75 < 79, 30 < 76 < 79)"},
-                       {iceberg.NotIn(iceberg.Reference("id"), 
int32(IntMaxValue), IntMaxValue+1), true,
-                               "should read: id equal to upper bound (79 == 
79)"},
-                       {iceberg.NotIn(iceberg.Reference("id"), 
int32(IntMaxValue+1), IntMaxValue+2), true,
-                               "should read: id above upper bound (80 > 79, 81 
> 79)"},
-                       {iceberg.NotIn(iceberg.Reference("id"), 
int32(IntMaxValue+6), IntMaxValue+7), true,
-                               "should read: id above upper bound (85 > 79, 86 
> 79)"},
-                       
{iceberg.NotIn(iceberg.Reference("all_nulls_missing_nan"), "abc", "def"), true,
-                               "should read: notIn on all nulls column"},
-                       {iceberg.NotIn(iceberg.Reference("some_nulls"), "abc", 
"def"), true,
-                               "should read: notIn on some nulls column"},
-                       {iceberg.NotIn(iceberg.Reference("no_nulls"), "abc", 
"def"), true,
-                               "should read: notIn on no nulls column"},
-                       {iceberg.StartsWith(iceberg.Reference("some_nulls"), 
"a"), true,
-                               "should read: range matches"},
-                       {iceberg.StartsWith(iceberg.Reference("some_nulls"), 
"aa"), true,
-                               "should read: range matches"},
-                       {iceberg.StartsWith(iceberg.Reference("some_nulls"), 
"dddd"), true,
-                               "should read: range matches"},
-                       {iceberg.StartsWith(iceberg.Reference("some_nulls"), 
"z"), true,
-                               "should read: range matches"},
-                       {iceberg.StartsWith(iceberg.Reference("no_nulls"), 
"a"), true,
-                               "should read: range matches"},
-                       {iceberg.StartsWith(iceberg.Reference("some_nulls"), 
"zzzz"), false,
-                               "should skip: range doesn't match"},
-                       {iceberg.StartsWith(iceberg.Reference("some_nulls"), 
"1"), false,
-                               "should skip: range doesn't match"},
-                       
{iceberg.StartsWith(iceberg.Reference("no_nulls_same_value_a"), "a"), true,
-                               "should read: all values start with the 
prefix"},
-                       {iceberg.NotStartsWith(iceberg.Reference("some_nulls"), 
"a"), true,
-                               "should read: range matches"},
-                       {iceberg.NotStartsWith(iceberg.Reference("some_nulls"), 
"aa"), true,
-                               "should read: range matches"},
-                       {iceberg.NotStartsWith(iceberg.Reference("some_nulls"), 
"dddd"), true,
-                               "should read: range matches"},
-                       {iceberg.NotStartsWith(iceberg.Reference("some_nulls"), 
"z"), true,
-                               "should read: range matches"},
-                       {iceberg.NotStartsWith(iceberg.Reference("no_nulls"), 
"a"), true,
-                               "should read: range matches"},
-                       {iceberg.NotStartsWith(iceberg.Reference("some_nulls"), 
"zzzz"), true,
-                               "should read: range matches"},
-                       {iceberg.NotStartsWith(iceberg.Reference("some_nulls"), 
"1"), true,
-                               "should read: range matches"},
-                       
{iceberg.NotStartsWith(iceberg.Reference("all_same_value_or_null"), "a"), true,
-                               "should read: range matches"},
-                       
{iceberg.NotStartsWith(iceberg.Reference("all_same_value_or_null"), "aa"), true,
-                               "should read: range matches"},
-                       
{iceberg.NotStartsWith(iceberg.Reference("all_same_value_or_null"), "A"), true,
-                               "should read: range matches"},
-                       // Iceberg does not implement SQL 3-way boolean logic, 
so the choice of an
-                       // all null column matching is by definition in order 
to surface more values
-                       // to the query engine to allow it to make its own 
decision
-                       
{iceberg.NotStartsWith(iceberg.Reference("all_nulls_missing_nan"), "A"), true,
-                               "should read: range matches"},
-                       
{iceberg.NotStartsWith(iceberg.Reference("no_nulls_same_value_a"), "a"), false,
-                               "should not read: all values start with the 
prefix"},
-               }
-
-               for _, tt := range tests {
-                       t.Run(tt.expr.String(), func(t *testing.T) {
-                               eval, err := iceberg.NewManifestEvaluator(spec, 
testSchema,
-                                       tt.expr, true)
-                               require.NoError(t, err)
-
-                               result, err := eval(manifest)
-                               require.NoError(t, err)
-                               assert.Equal(t, tt.expect, result, tt.msg)
-                       })
-               }
-       })
-}
-
 func TestRewriteNot(t *testing.T) {
        tests := []struct {
                expr, expected iceberg.BooleanExpression

(iceberg-go) branch main updated: refactor(evaluators): shift evaluator code into the table package for future development (#123)

Reply via email to