This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-go.git


The following commit(s) were added to refs/heads/main by this push:
     new 55bdfbf8 feat(table): export StructLike and GetPartitionRecord (#791)
55bdfbf8 is described below

commit 55bdfbf836359142f7c010b0b80f0b7e06af2dca
Author: Tobias Pütz <[email protected]>
AuthorDate: Tue Mar 17 18:18:27 2026 +0100

    feat(table): export StructLike and GetPartitionRecord (#791)
    
    PartitionRecord and GetPartitionRecord are the only way to go from a
    DataFile to something PartitionToPath accepts. Keeping them unexported
    forces external consumers to reimplement the same conversion.
    
    Also deduplicates the manual record construction in the pos delete
    writer.
---
 exprs.go                                      | 16 ++++++++--------
 partitions.go                                 |  2 +-
 table/partitioned_fanout_writer_test.go       |  2 +-
 table/pos_delete_partitioned_fanout_writer.go |  9 +--------
 table/scanner.go                              | 12 ++++++++----
 table/snapshots.go                            |  4 ++--
 utils.go                                      |  8 ++++----
 visitors.go                                   | 10 +++++-----
 8 files changed, 30 insertions(+), 33 deletions(-)

diff --git a/exprs.go b/exprs.go
index 68ced28c..ed868d4f 100644
--- a/exprs.go
+++ b/exprs.go
@@ -338,8 +338,8 @@ type BoundTerm interface {
        Ref() BoundReference
        Type() Type
 
-       evalToLiteral(structLike) Optional[Literal]
-       evalIsNull(structLike) bool
+       evalToLiteral(StructLike) Optional[Literal]
+       evalIsNull(StructLike) bool
 }
 
 // unbound is a generic interface representing something that is not yet bound
@@ -485,7 +485,7 @@ func (b *boundRef[T]) Ref() BoundReference { return b }
 func (b *boundRef[T]) Field() NestedField  { return b.field }
 func (b *boundRef[T]) Type() Type          { return b.field.Type }
 
-func (b *boundRef[T]) eval(st structLike) Optional[T] {
+func (b *boundRef[T]) eval(st StructLike) Optional[T] {
        switch v := b.acc.Get(st).(type) {
        case nil:
                return Optional[T]{}
@@ -506,7 +506,7 @@ func (b *boundRef[T]) eval(st structLike) Optional[T] {
        }
 }
 
-func (b *boundRef[T]) evalToLiteral(st structLike) Optional[Literal] {
+func (b *boundRef[T]) evalToLiteral(st StructLike) Optional[Literal] {
        v := b.eval(st)
        if !v.Valid {
                return Optional[Literal]{}
@@ -520,7 +520,7 @@ func (b *boundRef[T]) evalToLiteral(st structLike) 
Optional[Literal] {
        return Optional[Literal]{Val: lit, Valid: true}
 }
 
-func (b *boundRef[T]) evalIsNull(st structLike) bool {
+func (b *boundRef[T]) evalIsNull(st StructLike) bool {
        v := b.eval(st)
 
        return !v.Valid
@@ -605,7 +605,7 @@ type BoundUnaryPredicate interface {
 type bound[T LiteralType] interface {
        BoundTerm
 
-       eval(structLike) Optional[T]
+       eval(StructLike) Optional[T]
 }
 
 func newBoundUnaryPred[T LiteralType](op Operation, term BoundTerm) 
BoundUnaryPredicate {
@@ -1048,10 +1048,10 @@ func (b *BoundTransform) Equals(other BoundTerm) bool {
        return b.transform.Equals(rhs.transform) && b.term.Equals(rhs.term)
 }
 
-func (b *BoundTransform) evalToLiteral(st structLike) Optional[Literal] {
+func (b *BoundTransform) evalToLiteral(st StructLike) Optional[Literal] {
        return b.transform.Apply(b.term.evalToLiteral(st))
 }
 
-func (b *BoundTransform) evalIsNull(st structLike) bool {
+func (b *BoundTransform) evalIsNull(st StructLike) bool {
        return !b.evalToLiteral(st).Valid
 }
diff --git a/partitions.go b/partitions.go
index bd556cdd..0b92dae6 100644
--- a/partitions.go
+++ b/partitions.go
@@ -479,7 +479,7 @@ func (ps *PartitionSpec) PartitionType(schema *Schema) 
*StructType {
 //
 // This does not apply the transforms to the data, it is assumed the provided 
data
 // has already been transformed appropriately.
-func (ps *PartitionSpec) PartitionToPath(data structLike, sc *Schema) string {
+func (ps *PartitionSpec) PartitionToPath(data StructLike, sc *Schema) string {
        partType := ps.PartitionType(sc)
 
        if len(partType.FieldList) == 0 {
diff --git a/table/partitioned_fanout_writer_test.go 
b/table/partitioned_fanout_writer_test.go
index 9c95885c..57495094 100644
--- a/table/partitioned_fanout_writer_test.go
+++ b/table/partitioned_fanout_writer_test.go
@@ -151,7 +151,7 @@ func (s *FanoutWriterTestSuite) 
testTransformPartition(transform iceberg.Transfo
                fileCount++
                totalRecords += dataFile.Count()
 
-               partitionRec := getPartitionRecord(dataFile, 
spec.PartitionType(icebergSchema))
+               partitionRec := GetPartitionRecord(dataFile, 
spec.PartitionType(icebergSchema))
                partitionPath := spec.PartitionToPath(partitionRec, 
icebergSchema)
                partitionPaths[partitionPath] += dataFile.Count()
        }
diff --git a/table/pos_delete_partitioned_fanout_writer.go 
b/table/pos_delete_partitioned_fanout_writer.go
index f03c7019..fe54dba9 100644
--- a/table/pos_delete_partitioned_fanout_writer.go
+++ b/table/pos_delete_partitioned_fanout_writer.go
@@ -135,14 +135,7 @@ func (p *positionDeletePartitionedFanoutWriter) 
partitionPath(partitionContext p
                return "", fmt.Errorf("unexpected missing partition spec in 
metadata for spec id %d", partitionContext.specID)
        }
 
-       data := make(partitionRecord, spec.NumFields())
-       for i, field := range spec.Fields() {
-               val, ok := partitionContext.partitionData[field.FieldID]
-               if !ok {
-                       return "", fmt.Errorf("unexpected missing partition 
value for field id %d in spec id %d", field.FieldID, partitionContext.specID)
-               }
-               data[i] = val
-       }
+       data := newPartitionRecord(partitionContext.partitionData, 
spec.PartitionType(p.schema))
 
        return spec.PartitionToPath(data, p.schema), nil
 }
diff --git a/table/scanner.go b/table/scanner.go
index 0d1a0dbd..fde1fc52 100644
--- a/table/scanner.go
+++ b/table/scanner.go
@@ -119,9 +119,7 @@ func (m *manifestEntries) addPositionalDeleteEntry(e 
iceberg.ManifestEntry) {
        m.positionalDeleteEntries = append(m.positionalDeleteEntries, e)
 }
 
-func getPartitionRecord(dataFile iceberg.DataFile, partitionType 
*iceberg.StructType) partitionRecord {
-       partitionData := dataFile.Partition()
-
+func newPartitionRecord(partitionData map[int]any, partitionType 
*iceberg.StructType) partitionRecord {
        out := make(partitionRecord, len(partitionType.FieldList))
        for i, f := range partitionType.FieldList {
                out[i] = partitionData[f.ID]
@@ -130,6 +128,12 @@ func getPartitionRecord(dataFile iceberg.DataFile, 
partitionType *iceberg.Struct
        return out
 }
 
+// GetPartitionRecord converts a DataFile's partition map into a positional
+// record ordered by the fields of the given partition struct type.
+func GetPartitionRecord(dataFile iceberg.DataFile, partitionType 
*iceberg.StructType) iceberg.StructLike {
+       return newPartitionRecord(dataFile.Partition(), partitionType)
+}
+
 func openManifest(io io.IO, manifest iceberg.ManifestFile,
        partitionFilter, metricsEval func(iceberg.DataFile) (bool, error),
 ) ([]iceberg.ManifestEntry, error) {
@@ -284,7 +288,7 @@ func (scan *Scan) buildPartitionEvaluator(specID int) 
(func(iceberg.DataFile) (b
        }
 
        return func(d iceberg.DataFile) (bool, error) {
-               return fn(getPartitionRecord(d, partType))
+               return fn(GetPartitionRecord(d, partType))
        }, nil
 }
 
diff --git a/table/snapshots.go b/table/snapshots.go
index fbdd47de..1ae60e52 100644
--- a/table/snapshots.go
+++ b/table/snapshots.go
@@ -413,7 +413,7 @@ func (s *SnapshotSummaryCollector) addFile(df 
iceberg.DataFile, sc *iceberg.Sche
 
        if len(df.Partition()) > 0 {
                partitionPath := spec.PartitionToPath(
-                       getPartitionRecord(df, spec.PartitionType(sc)), sc)
+                       GetPartitionRecord(df, spec.PartitionType(sc)), sc)
 
                return s.updatePartitionMetrics(partitionPath, df, true)
        }
@@ -428,7 +428,7 @@ func (s *SnapshotSummaryCollector) removeFile(df 
iceberg.DataFile, sc *iceberg.S
 
        if len(df.Partition()) > 0 {
                partitionPath := spec.PartitionToPath(
-                       getPartitionRecord(df, spec.PartitionType(sc)), sc)
+                       GetPartitionRecord(df, spec.PartitionType(sc)), sc)
 
                return s.updatePartitionMetrics(partitionPath, df, false)
        }
diff --git a/utils.go b/utils.go
index a1dec28b..36a798db 100644
--- a/utils.go
+++ b/utils.go
@@ -43,8 +43,8 @@ type Optional[T any] struct {
        Valid bool
 }
 
-// represents a single row in a record
-type structLike interface {
+// StructLike represents a single row in a record.
+type StructLike interface {
        // Size returns the number of columns in this row
        Size() int
        // Get returns the value in the requested column,
@@ -64,11 +64,11 @@ func (a *accessor) String() string {
        return fmt.Sprintf("Accessor(position=%d, inner=%s)", a.pos, a.inner)
 }
 
-func (a *accessor) Get(s structLike) any {
+func (a *accessor) Get(s StructLike) any {
        val, inner := s.Get(a.pos), a
        for val != nil && inner.inner != nil {
                inner = inner.inner
-               val = val.(structLike).Get(inner.pos)
+               val = val.(StructLike).Get(inner.pos)
        }
 
        return val
diff --git a/visitors.go b/visitors.go
index 3debc2ea..9564a4ea 100644
--- a/visitors.go
+++ b/visitors.go
@@ -186,7 +186,7 @@ func (*bindVisitor) VisitBound(pred BoundPredicate) 
BooleanExpression {
 // ExpressionEvaluator returns a function which can be used to evaluate a 
given expression
 // as long as a structlike value is passed which operates like and matches the 
passed in
 // schema.
-func ExpressionEvaluator(s *Schema, unbound BooleanExpression, caseSensitive 
bool) (func(structLike) (bool, error), error) {
+func ExpressionEvaluator(s *Schema, unbound BooleanExpression, caseSensitive 
bool) (func(StructLike) (bool, error), error) {
        bound, err := BindExpr(s, unbound, caseSensitive)
        if err != nil {
                return nil, err
@@ -197,10 +197,10 @@ func ExpressionEvaluator(s *Schema, unbound 
BooleanExpression, caseSensitive boo
 
 type exprEvaluator struct {
        bound BooleanExpression
-       st    structLike
+       st    StructLike
 }
 
-func (e *exprEvaluator) Eval(st structLike) (bool, error) {
+func (e *exprEvaluator) Eval(st StructLike) (bool, error) {
        e.st = st
 
        return VisitExpr(e.bound, e)
@@ -283,7 +283,7 @@ func nullsFirstCmp[T LiteralType](cmp Comparator[T], v1, v2 
Optional[T]) int {
        return cmp(v1.Val, v2.Val)
 }
 
-func typedCmp[T LiteralType](st structLike, term BoundTerm, lit Literal) int {
+func typedCmp[T LiteralType](st StructLike, term BoundTerm, lit Literal) int {
        v := term.(bound[T]).eval(st)
        var l Optional[T]
 
@@ -296,7 +296,7 @@ func typedCmp[T LiteralType](st structLike, term BoundTerm, 
lit Literal) int {
        return nullsFirstCmp(rhs.Comparator(), v, l)
 }
 
-func doCmp(st structLike, term BoundTerm, lit Literal) int {
+func doCmp(st StructLike, term BoundTerm, lit Literal) int {
        // we already properly casted and converted everything during binding
        // so we can type assert based on the term type
        switch term.Type().(type) {

Reply via email to