This is an automated email from the ASF dual-hosted git repository.
aokolnychyi pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg.git
The following commit(s) were added to refs/heads/main by this push:
new 1898e62164 Core: Project data file stats only if there are equality
deletes (#11013)
1898e62164 is described below
commit 1898e621642cd0779302ce2c2933f86c3b1f6f6a
Author: Anton Okolnychyi <[email protected]>
AuthorDate: Mon Aug 26 17:30:47 2024 -0700
Core: Project data file stats only if there are equality deletes (#11013)
---
.../main/java/org/apache/iceberg/DeleteFileIndex.java | 16 +++++++++++++---
core/src/main/java/org/apache/iceberg/ManifestGroup.java | 2 +-
.../java/org/apache/iceberg/DeleteFileIndexTestBase.java | 8 ++++++++
3 files changed, 22 insertions(+), 4 deletions(-)
diff --git a/core/src/main/java/org/apache/iceberg/DeleteFileIndex.java
b/core/src/main/java/org/apache/iceberg/DeleteFileIndex.java
index c267164818..7940443734 100644
--- a/core/src/main/java/org/apache/iceberg/DeleteFileIndex.java
+++ b/core/src/main/java/org/apache/iceberg/DeleteFileIndex.java
@@ -71,6 +71,8 @@ class DeleteFileIndex {
private final PartitionMap<EqualityDeletes> eqDeletesByPartition;
private final PartitionMap<PositionDeletes> posDeletesByPartition;
private final CharSequenceMap<PositionDeletes> posDeletesByPath;
+ private final boolean hasEqDeletes;
+ private final boolean hasPosDeletes;
private final boolean isEmpty;
private DeleteFileIndex(
@@ -82,15 +84,23 @@ class DeleteFileIndex {
this.eqDeletesByPartition = eqDeletesByPartition;
this.posDeletesByPartition = posDeletesByPartition;
this.posDeletesByPath = posDeletesByPath;
- boolean noEqDeletes = globalDeletes == null && eqDeletesByPartition ==
null;
- boolean noPosDeletes = posDeletesByPartition == null && posDeletesByPath
== null;
- this.isEmpty = noEqDeletes && noPosDeletes;
+ this.hasEqDeletes = globalDeletes != null || eqDeletesByPartition != null;
+ this.hasPosDeletes = posDeletesByPartition != null || posDeletesByPath !=
null;
+ this.isEmpty = !hasEqDeletes && !hasPosDeletes;
}
public boolean isEmpty() {
return isEmpty;
}
+ public boolean hasEqualityDeletes() {
+ return hasEqDeletes;
+ }
+
+ public boolean hasPositionDeletes() {
+ return hasPosDeletes;
+ }
+
public Iterable<DeleteFile> referencedDeleteFiles() {
Iterable<DeleteFile> deleteFiles = Collections.emptyList();
diff --git a/core/src/main/java/org/apache/iceberg/ManifestGroup.java
b/core/src/main/java/org/apache/iceberg/ManifestGroup.java
index 38ad2661a8..10cf6bd3bf 100644
--- a/core/src/main/java/org/apache/iceberg/ManifestGroup.java
+++ b/core/src/main/java/org/apache/iceberg/ManifestGroup.java
@@ -184,7 +184,7 @@ class ManifestGroup {
DeleteFileIndex deleteFiles =
deleteIndexBuilder.scanMetrics(scanMetrics).build();
boolean dropStats = ManifestReader.dropStats(columns);
- if (!deleteFiles.isEmpty()) {
+ if (deleteFiles.hasEqualityDeletes()) {
select(ManifestReader.withStatsColumns(columns));
}
diff --git a/core/src/test/java/org/apache/iceberg/DeleteFileIndexTestBase.java
b/core/src/test/java/org/apache/iceberg/DeleteFileIndexTestBase.java
index 836a1ddd80..986e8608c0 100644
--- a/core/src/test/java/org/apache/iceberg/DeleteFileIndexTestBase.java
+++ b/core/src/test/java/org/apache/iceberg/DeleteFileIndexTestBase.java
@@ -139,6 +139,8 @@ public abstract class DeleteFileIndexTestBase<
DataFile file = unpartitionedFile(partSpec);
+ assertThat(index.hasEqualityDeletes()).isTrue();
+ assertThat(index.hasPositionDeletes()).isFalse();
assertThat(index.forDataFile(0, file)).as("Only one delete file should
apply").hasSize(1);
}
@@ -158,6 +160,9 @@ public abstract class DeleteFileIndexTestBase<
.specsById(ImmutableMap.of(partSpec.specId(), partSpec, 1, SPEC))
.build();
+ assertThat(index.hasEqualityDeletes()).isTrue();
+ assertThat(index.hasPositionDeletes()).isTrue();
+
DataFile unpartitionedFile = unpartitionedFile(partSpec);
assertThat(index.forDataFile(0, unpartitionedFile))
.as("All deletes should apply to seq 0")
@@ -213,6 +218,9 @@ public abstract class DeleteFileIndexTestBase<
.specsById(ImmutableMap.of(SPEC.specId(), SPEC, 1,
PartitionSpec.unpartitioned()))
.build();
+ assertThat(index.hasEqualityDeletes()).isTrue();
+ assertThat(index.hasPositionDeletes()).isTrue();
+
assertThat(index.forDataFile(0, FILE_A))
.as("All deletes should apply to seq 0")
.isEqualTo(deleteFiles);