This is an automated email from the ASF dual-hosted git repository.
timbrown pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-xtable.git
The following commit(s) were added to refs/heads/main by this push:
new 4db9bb86 add iceberg stats when reading snapshot
4db9bb86 is described below
commit 4db9bb868655eea3dcc15c4692323f68fef4f454
Author: Timothy Brown <[email protected]>
AuthorDate: Thu Jan 2 16:58:10 2025 -0800
add iceberg stats when reading snapshot
---
.../xtable/iceberg/IcebergConversionSource.java | 3 ++-
.../xtable/iceberg/TestIcebergConversionSource.java | 20 +++++++++++---------
2 files changed, 13 insertions(+), 10 deletions(-)
diff --git
a/xtable-core/src/main/java/org/apache/xtable/iceberg/IcebergConversionSource.java
b/xtable-core/src/main/java/org/apache/xtable/iceberg/IcebergConversionSource.java
index c84fb196..0d400e28 100644
---
a/xtable-core/src/main/java/org/apache/xtable/iceberg/IcebergConversionSource.java
+++
b/xtable-core/src/main/java/org/apache/xtable/iceberg/IcebergConversionSource.java
@@ -145,7 +145,8 @@ public class IcebergConversionSource implements
ConversionSource<Snapshot> {
Snapshot currentSnapshot = iceTable.currentSnapshot();
InternalTable irTable = getTable(currentSnapshot);
- TableScan scan =
iceTable.newScan().useSnapshot(currentSnapshot.snapshotId());
+ TableScan scan =
+
iceTable.newScan().useSnapshot(currentSnapshot.snapshotId()).includeColumnStats();
PartitionSpec partitionSpec = iceTable.spec();
List<PartitionFileGroup> partitionedDataFiles;
try (CloseableIterable<FileScanTask> files = scan.planFiles()) {
diff --git
a/xtable-core/src/test/java/org/apache/xtable/iceberg/TestIcebergConversionSource.java
b/xtable-core/src/test/java/org/apache/xtable/iceberg/TestIcebergConversionSource.java
index 12a02a11..4006b543 100644
---
a/xtable-core/src/test/java/org/apache/xtable/iceberg/TestIcebergConversionSource.java
+++
b/xtable-core/src/test/java/org/apache/xtable/iceberg/TestIcebergConversionSource.java
@@ -160,8 +160,7 @@ class TestIcebergConversionSource {
PartitionValue partitionEntry = partitionValues.iterator().next();
assertEquals(
"cs_sold_date_sk",
partitionEntry.getPartitionField().getSourceField().getName());
- // TODO generate test with column stats
- assertEquals(0, internalDataFile.getColumnStats().size());
+ assertEquals(7, internalDataFile.getColumnStats().size());
}
}
@@ -202,12 +201,12 @@ class TestIcebergConversionSource {
Snapshot snapshot5 = catalogSales.currentSnapshot();
Snapshot snapshot4 = catalogSales.snapshot(snapshot5.parentId());
- validateTableChangeDiffSize(catalogSales, snapshot1, 5, 0);
- validateTableChangeDiffSize(catalogSales, snapshot2, 0, 3);
- validateTableChangeDiffSize(catalogSales, snapshot3, 5, 0);
+ validateTableChangeDiffSize(catalogSales, snapshot1, 5, 0, 7);
+ validateTableChangeDiffSize(catalogSales, snapshot2, 0, 3, 7);
+ validateTableChangeDiffSize(catalogSales, snapshot3, 5, 0, 7);
// transaction related snapshot verification
- validateTableChangeDiffSize(catalogSales, snapshot4, 0, 1);
- validateTableChangeDiffSize(catalogSales, snapshot5, 1, 0);
+ validateTableChangeDiffSize(catalogSales, snapshot4, 0, 1, 7);
+ validateTableChangeDiffSize(catalogSales, snapshot5, 1, 0, 7);
assertEquals(4, catalogSales.history().size());
catalogSales.expireSnapshots().expireSnapshotId(snapshot1.snapshotId()).commit();
@@ -242,7 +241,7 @@ class TestIcebergConversionSource {
catalogSales.updateSpec().removeField("cs_sold_date_sk").commit();
Snapshot snapshot8 = catalogSales.currentSnapshot();
- validateTableChangeDiffSize(catalogSales, snapshot7, 1, 2);
+ validateTableChangeDiffSize(catalogSales, snapshot7, 1, 2, 7);
assertEquals(snapshot7, snapshot8);
}
@@ -311,10 +310,13 @@ class TestIcebergConversionSource {
}
private void validateTableChangeDiffSize(
- Table table, Snapshot snapshot, int addedFiles, int removedFiles) {
+ Table table, Snapshot snapshot, int addedFiles, int removedFiles, int
numberOfColumns) {
IcebergConversionSource conversionSource =
getIcebergConversionSource(table);
TableChange tableChange =
conversionSource.getTableChangeForCommit(snapshot);
assertEquals(addedFiles,
tableChange.getFilesDiff().getFilesAdded().size());
+ assertTrue(
+ tableChange.getFilesDiff().getFilesAdded().stream()
+ .allMatch(file -> file.getColumnStats().size() ==
numberOfColumns));
assertEquals(removedFiles,
tableChange.getFilesDiff().getFilesRemoved().size());
}