mzzz-zzm commented on code in PR #1039:
URL: https://github.com/apache/iceberg-go/pull/1039#discussion_r3212343435


##########
manifest_test.go:
##########
@@ -2115,6 +2115,67 @@ func (m *ManifestTestSuite) 
TestManifestRoundTripSortOrderID() {
        m.Equal(expectedSortOrderID, *got)
 }
 
+// writeManifestWithDistinctCounts builds a single-entry manifest at the given
+// version with distinct_counts populated for one column, round-trips it 
through
+// WriteManifest + ReadManifest, and returns the distinct counts observed by
+// the reader.
+func (m *ManifestTestSuite) writeManifestWithDistinctCounts(version int) 
map[int]int64 {
+       partitionSpec := NewPartitionSpecID(0)
+       snapshotID := int64(12345678)
+       seqNum := int64(9876)
+
+       dataFileBuilder, err := NewDataFileBuilder(
+               partitionSpec,
+               EntryContentData,
+               "s3://bucket/ns/table/data/distinct.parquet",
+               ParquetFile,
+               map[int]any{},
+               map[int]string{},
+               map[int]int{},
+               10,
+               10*1000,
+       )
+       m.Require().NoError(err)
+       dataFileBuilder.DistinctValueCounts(map[int]int64{1: 42})
+
+       var buf bytes.Buffer
+       file, err := WriteManifest(
+               "s3://bucket/ns/table/metadata/distinct.avro", &buf, version,
+               partitionSpec,
+               NewSchema(0,
+                       NestedField{ID: 1, Name: "id", Type: Int64Type{}, 
Required: true},
+               ),
+               snapshotID,
+               []ManifestEntry{NewManifestEntry(
+                       EntryStatusADDED,
+                       &snapshotID,
+                       &seqNum, &seqNum,
+                       dataFileBuilder.Build(),
+               )},
+       )
+       m.Require().NoError(err)
+
+       entries, err := ReadManifest(file, &buf, false)
+       m.Require().NoError(err)
+       m.Require().Len(entries, 1)
+
+       return entries[0].DataFile().DistinctValueCounts()
+}
+
+// TestWriteManifestV3OmitsDistinctCounts verifies that v3 manifest writers do
+// not surface data_file.distinct_counts on the read side (deprecated by the v3
+// spec; parity with Java apache/iceberg#12182). Today this passes because none
+// of the per-version Avro record schemas in internal/avro_schemas.go declare
+// a distinct_counts field, so the field is dropped at encode time on every
+// version. The v3writerImpl.prepareEntry guard that clears DistinctCounts is
+// forward-compatibility insurance: if the v2 schema is later fixed to declare
+// distinct_counts (the v2 spec requires it; tracked separately), this test
+// keeps the v3 path compliant without further code changes.
+func (m *ManifestTestSuite) TestWriteManifestV3OmitsDistinctCounts() {
+       got := m.writeManifestWithDistinctCounts(3)
+       m.Empty(got, "v3 manifests must omit distinct_counts")

Review Comment:
   Fixed. The test now asserts the new code path directly:
   
   ```go
   df, ok := entry.DataFile().(*dataFile)
   m.Require().True(ok)
   m.Nil(df.DistinctCounts, "v3 writer must clear DistinctCounts on the entry's 
*dataFile")



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to