tanmayrauth commented on code in PR #1358:
URL: https://github.com/apache/iceberg-go/pull/1358#discussion_r3521720669


##########
table/scanner_internal_test.go:
##########
@@ -392,6 +393,133 @@ func TestBuildPartitionEvaluatorWithInvalidSpecID(t 
*testing.T) {
        assert.ErrorContains(t, err, "id 999")
 }
 
+func TestTimeTravelManifestPruningUsesSnapshotSchema(t *testing.T) {
+       spec := iceberg.NewPartitionSpecID(0, iceberg.PartitionField{
+               SourceIDs: []int{1},
+               FieldID:   1000,
+               Name:      "id",
+               Transform: iceberg.IdentityTransform{},
+       })
+       scan, _, snapshotID := newSchemaEvolutionScan(t, &spec, 
iceio.NewMemFS())
+
+       lower := int32Bound(10)
+       upper := int32Bound(20)
+       manifest := iceberg.NewManifestFile(2, 
"mem://default/table/metadata/manifest.avro", 100, int32(spec.ID()), 
snapshotID).
+               Partitions([]iceberg.FieldSummary{{
+                       ContainsNull: false,
+                       LowerBound:   &lower,
+                       UpperBound:   &upper,
+               }}).
+               Build()
+
+       eval, err := scan.buildManifestEvaluator(spec.ID())

Review Comment:
    This drives scan.buildManifestEvaluator directly — the dead method above — 
rather than the production manifest-pruning path in 
fetchPartitionSpecFilteredManifestsWithSchema. I checked the gap: forcing that 
production path to use CurrentSchema() (the exact bug this PR fixes) leaves 
this test and the entire ./table suite green. Routing this through PlanFiles 
(like the metrics test uses collectManifestEntries) would make it a real 
regression guard.



##########
table/scanner.go:
##########
@@ -403,24 +423,34 @@ func appendMissingLineageFields(s *iceberg.Schema, 
lineageFields []iceberg.Neste
 }
 
 func (scan *Scan) buildPartitionProjection(specID int) 
(iceberg.BooleanExpression, error) {
-       return buildPartitionProjection(specID, scan.metadata, scan.rowFilter, 
scan.caseSensitive)
+       schema, err := scan.effectiveSchema()
+       if err != nil {
+               return nil, err
+       }
+
+       return buildPartitionProjection(specID, scan.metadata, schema, 
scan.rowFilter, scan.caseSensitive)
 }
 
-func buildPartitionProjection(specID int, meta Metadata, rowFilter 
iceberg.BooleanExpression, caseSensitive bool) (iceberg.BooleanExpression, 
error) {
+func buildPartitionProjection(specID int, meta Metadata, schema 
*iceberg.Schema, rowFilter iceberg.BooleanExpression, caseSensitive bool) 
(iceberg.BooleanExpression, error) {
        spec := meta.PartitionSpecByID(specID)
        if spec == nil {
                return nil, fmt.Errorf("%w: id %d", ErrPartitionSpecNotFound, 
specID)
        }
-       project := newInclusiveProjection(meta.CurrentSchema(), *spec, 
caseSensitive)
+       project := newInclusiveProjection(schema, *spec, caseSensitive)
 
        return project(rowFilter)
 }
 
 func (scan *Scan) buildManifestEvaluator(specID int) 
(func(iceberg.ManifestFile) (bool, error), error) {

Review Comment:
    After this refactor buildManifestEvaluator and buildPartitionEvaluator 
(scanner.go:468) have no production callers left — PlanFiles goes through 
fetchPartitionSpecFilteredManifestsWithSchema / 
collectManifestEntriesWithSchema,  which build evaluators inline via 
partitionFiltersForSchema(schema). These two methods (and the 
scan.partitionFilters field at :243 they read) are reachable only from the 
internal tests now. Worth removing them, unless they're kept deliberately.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to