This is an automated email from the ASF dual-hosted git repository.

timbrown pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-xtable.git


The following commit(s) were added to refs/heads/main by this push:
     new 4db9bb86 add iceberg stats when reading snapshot
4db9bb86 is described below

commit 4db9bb868655eea3dcc15c4692323f68fef4f454
Author: Timothy Brown <[email protected]>
AuthorDate: Thu Jan 2 16:58:10 2025 -0800

    add iceberg stats when reading snapshot
---
 .../xtable/iceberg/IcebergConversionSource.java      |  3 ++-
 .../xtable/iceberg/TestIcebergConversionSource.java  | 20 +++++++++++---------
 2 files changed, 13 insertions(+), 10 deletions(-)

diff --git 
a/xtable-core/src/main/java/org/apache/xtable/iceberg/IcebergConversionSource.java
 
b/xtable-core/src/main/java/org/apache/xtable/iceberg/IcebergConversionSource.java
index c84fb196..0d400e28 100644
--- 
a/xtable-core/src/main/java/org/apache/xtable/iceberg/IcebergConversionSource.java
+++ 
b/xtable-core/src/main/java/org/apache/xtable/iceberg/IcebergConversionSource.java
@@ -145,7 +145,8 @@ public class IcebergConversionSource implements 
ConversionSource<Snapshot> {
     Snapshot currentSnapshot = iceTable.currentSnapshot();
     InternalTable irTable = getTable(currentSnapshot);
 
-    TableScan scan = 
iceTable.newScan().useSnapshot(currentSnapshot.snapshotId());
+    TableScan scan =
+        
iceTable.newScan().useSnapshot(currentSnapshot.snapshotId()).includeColumnStats();
     PartitionSpec partitionSpec = iceTable.spec();
     List<PartitionFileGroup> partitionedDataFiles;
     try (CloseableIterable<FileScanTask> files = scan.planFiles()) {
diff --git 
a/xtable-core/src/test/java/org/apache/xtable/iceberg/TestIcebergConversionSource.java
 
b/xtable-core/src/test/java/org/apache/xtable/iceberg/TestIcebergConversionSource.java
index 12a02a11..4006b543 100644
--- 
a/xtable-core/src/test/java/org/apache/xtable/iceberg/TestIcebergConversionSource.java
+++ 
b/xtable-core/src/test/java/org/apache/xtable/iceberg/TestIcebergConversionSource.java
@@ -160,8 +160,7 @@ class TestIcebergConversionSource {
       PartitionValue partitionEntry = partitionValues.iterator().next();
       assertEquals(
           "cs_sold_date_sk", 
partitionEntry.getPartitionField().getSourceField().getName());
-      // TODO generate test with column stats
-      assertEquals(0, internalDataFile.getColumnStats().size());
+      assertEquals(7, internalDataFile.getColumnStats().size());
     }
   }
 
@@ -202,12 +201,12 @@ class TestIcebergConversionSource {
     Snapshot snapshot5 = catalogSales.currentSnapshot();
     Snapshot snapshot4 = catalogSales.snapshot(snapshot5.parentId());
 
-    validateTableChangeDiffSize(catalogSales, snapshot1, 5, 0);
-    validateTableChangeDiffSize(catalogSales, snapshot2, 0, 3);
-    validateTableChangeDiffSize(catalogSales, snapshot3, 5, 0);
+    validateTableChangeDiffSize(catalogSales, snapshot1, 5, 0, 7);
+    validateTableChangeDiffSize(catalogSales, snapshot2, 0, 3, 7);
+    validateTableChangeDiffSize(catalogSales, snapshot3, 5, 0, 7);
     // transaction related snapshot verification
-    validateTableChangeDiffSize(catalogSales, snapshot4, 0, 1);
-    validateTableChangeDiffSize(catalogSales, snapshot5, 1, 0);
+    validateTableChangeDiffSize(catalogSales, snapshot4, 0, 1, 7);
+    validateTableChangeDiffSize(catalogSales, snapshot5, 1, 0, 7);
 
     assertEquals(4, catalogSales.history().size());
     
catalogSales.expireSnapshots().expireSnapshotId(snapshot1.snapshotId()).commit();
@@ -242,7 +241,7 @@ class TestIcebergConversionSource {
     catalogSales.updateSpec().removeField("cs_sold_date_sk").commit();
     Snapshot snapshot8 = catalogSales.currentSnapshot();
 
-    validateTableChangeDiffSize(catalogSales, snapshot7, 1, 2);
+    validateTableChangeDiffSize(catalogSales, snapshot7, 1, 2, 7);
     assertEquals(snapshot7, snapshot8);
   }
 
@@ -311,10 +310,13 @@ class TestIcebergConversionSource {
   }
 
   private void validateTableChangeDiffSize(
-      Table table, Snapshot snapshot, int addedFiles, int removedFiles) {
+      Table table, Snapshot snapshot, int addedFiles, int removedFiles, int 
numberOfColumns) {
     IcebergConversionSource conversionSource = 
getIcebergConversionSource(table);
     TableChange tableChange = 
conversionSource.getTableChangeForCommit(snapshot);
     assertEquals(addedFiles, 
tableChange.getFilesDiff().getFilesAdded().size());
+    assertTrue(
+        tableChange.getFilesDiff().getFilesAdded().stream()
+            .allMatch(file -> file.getColumnStats().size() == 
numberOfColumns));
     assertEquals(removedFiles, 
tableChange.getFilesDiff().getFilesRemoved().size());
   }
 

Reply via email to