This is an automated email from the ASF dual-hosted git repository.
etudenhoefner pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg.git
The following commit(s) were added to refs/heads/main by this push:
new cf980650ed Core: Make totalRecordCount optional in PartitionStats
(#12226)
cf980650ed is described below
commit cf980650ede19efe2b9fe9aff521ecf8e7bd315c
Author: Ajantha Bhat <[email protected]>
AuthorDate: Wed Mar 19 13:47:07 2025 +0530
Core: Make totalRecordCount optional in PartitionStats (#12226)
---
.../java/org/apache/iceberg/PartitionStats.java | 22 +++-
.../org/apache/iceberg/TestPartitionStats.java | 135 +++++++++++++++++++++
.../org/apache/iceberg/TestPartitionStatsUtil.java | 52 ++++----
.../iceberg/data/TestPartitionStatsHandler.java | 24 ++--
4 files changed, 191 insertions(+), 42 deletions(-)
diff --git a/core/src/main/java/org/apache/iceberg/PartitionStats.java
b/core/src/main/java/org/apache/iceberg/PartitionStats.java
index e4cbd1f6b9..d40b9f3e7f 100644
--- a/core/src/main/java/org/apache/iceberg/PartitionStats.java
+++ b/core/src/main/java/org/apache/iceberg/PartitionStats.java
@@ -33,7 +33,7 @@ public class PartitionStats implements StructLike {
private int positionDeleteFileCount;
private long equalityDeleteRecordCount;
private int equalityDeleteFileCount;
- private long totalRecordCount;
+ private Long totalRecordCount; // null by default
private Long lastUpdatedAt; // null by default
private Long lastUpdatedSnapshotId; // null by default
@@ -78,7 +78,15 @@ public class PartitionStats implements StructLike {
return equalityDeleteFileCount;
}
+ /**
+ * @deprecated since 1.9.0, will be removed in 1.10.0, use {@link
#totalRecords()} instead.
+ */
+ @Deprecated
public long totalRecordCount() {
+ return totalRecordCount == null ? 0L : totalRecordCount;
+ }
+
+ public Long totalRecords() {
return totalRecordCount;
}
@@ -150,7 +158,14 @@ public class PartitionStats implements StructLike {
this.positionDeleteFileCount += entry.positionDeleteFileCount;
this.equalityDeleteRecordCount += entry.equalityDeleteRecordCount;
this.equalityDeleteFileCount += entry.equalityDeleteFileCount;
- this.totalRecordCount += entry.totalRecordCount;
+
+ if (entry.totalRecordCount != null) {
+ if (totalRecordCount == null) {
+ this.totalRecordCount = entry.totalRecordCount;
+ } else {
+ this.totalRecordCount += entry.totalRecordCount;
+ }
+ }
if (entry.lastUpdatedAt != null) {
updateSnapshotInfo(entry.lastUpdatedSnapshotId, entry.lastUpdatedAt);
@@ -236,8 +251,7 @@ public class PartitionStats implements StructLike {
this.equalityDeleteFileCount = value == null ? 0 : (int) value;
break;
case 9:
- // optional field as per spec, implementation initialize to 0 for
counters
- this.totalRecordCount = value == null ? 0L : (long) value;
+ this.totalRecordCount = (Long) value;
break;
case 10:
this.lastUpdatedAt = (Long) value;
diff --git a/core/src/test/java/org/apache/iceberg/TestPartitionStats.java
b/core/src/test/java/org/apache/iceberg/TestPartitionStats.java
new file mode 100644
index 0000000000..c215fbcb80
--- /dev/null
+++ b/core/src/test/java/org/apache/iceberg/TestPartitionStats.java
@@ -0,0 +1,135 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg;
+
+import static org.assertj.core.api.Assertions.assertThat;
+import static org.assertj.core.api.Assertions.assertThatThrownBy;
+
+import org.apache.iceberg.types.Types;
+import org.junit.jupiter.api.Test;
+
+public class TestPartitionStats {
+
+ private static final PartitionData PARTITION =
+ new PartitionData(
+ Types.StructType.of(Types.NestedField.required(1, "foo",
Types.IntegerType.get())));
+
+ @Test
+ public void testAppendWithAllValues() {
+ PartitionStats stats1 =
+ createStats(100L, 15, 1000L, 2L, 500, 1L, 200, 15L, 1625077800000L,
12345L);
+ PartitionStats stats2 = createStats(200L, 7, 500L, 1L, 100, 0L, 50, 7L,
1625077900000L, 12346L);
+
+ stats1.appendStats(stats2);
+
+ validateStats(stats1, 300L, 22, 1500L, 3L, 600, 1L, 250, 22L,
1625077900000L, 12346L);
+ }
+
+ @Test
+ public void testAppendWithThisNullOptionalField() {
+ PartitionStats stats1 = createStats(100L, 15, 1000L, 2L, 500, 1L, 200,
null, null, null);
+ PartitionStats stats2 = createStats(100L, 7, 500L, 1L, 100, 0L, 50, 7L,
1625077900000L, 12346L);
+
+ stats1.appendStats(stats2);
+
+ validateStats(stats1, 200L, 22, 1500L, 3L, 600, 1L, 250, 7L,
1625077900000L, 12346L);
+ }
+
+ @Test
+ public void testAppendWithBothNullOptionalFields() {
+ PartitionStats stats1 = createStats(100L, 15, 1000L, 2L, 500, 1L, 200,
null, null, null);
+ PartitionStats stats2 = createStats(100L, 7, 500L, 1L, 100, 0L, 50, null,
null, null);
+
+ stats1.appendStats(stats2);
+
+ validateStats(stats1, 200L, 22, 1500L, 3L, 600, 1L, 250, null, null, null);
+ }
+
+ @Test
+ public void testAppendWithOtherNullOptionalFields() {
+ PartitionStats stats1 =
+ createStats(100L, 15, 1000L, 2L, 500, 1L, 200, 15L, 1625077900000L,
12346L);
+ PartitionStats stats2 = createStats(100L, 7, 500L, 1L, 100, 0L, 50, null,
null, null);
+
+ stats1.appendStats(stats2);
+
+ validateStats(stats1, 200L, 22, 1500L, 3L, 600, 1L, 250, 15L,
1625077900000L, 12346L);
+ }
+
+ @Test
+ public void testAppendEmptyStats() {
+ PartitionStats stats1 = new PartitionStats(PARTITION, 1);
+ PartitionStats stats2 = new PartitionStats(PARTITION, 1);
+
+ stats1.appendStats(stats2);
+
+ validateStats(stats1, 0L, 0, 0L, 0L, 0, 0L, 0, null, null, null);
+ }
+
+ @Test
+ public void testAppendWithDifferentSpec() {
+ PartitionStats stats1 = new PartitionStats(PARTITION, 1);
+ PartitionStats stats2 = new PartitionStats(PARTITION, 2);
+
+ assertThatThrownBy(() -> stats1.appendStats(stats2))
+ .isInstanceOf(IllegalArgumentException.class)
+ .hasMessage("Spec IDs must match");
+ }
+
+ private PartitionStats createStats(
+ long dataRecordCount,
+ int dataFileCount,
+ long totalDataFileSizeInBytes,
+ long positionDeleteRecordCount,
+ int positionDeleteFileCount,
+ long equalityDeleteRecordCount,
+ int equalityDeleteFileCount,
+ Long totalRecordCount,
+ Long lastUpdatedAt,
+ Long lastUpdatedSnapshotId) {
+
+ PartitionStats stats = new PartitionStats(PARTITION, 1);
+ stats.set(2, dataRecordCount);
+ stats.set(3, dataFileCount);
+ stats.set(4, totalDataFileSizeInBytes);
+ stats.set(5, positionDeleteRecordCount);
+ stats.set(6, positionDeleteFileCount);
+ stats.set(7, equalityDeleteRecordCount);
+ stats.set(8, equalityDeleteFileCount);
+ stats.set(9, totalRecordCount);
+ stats.set(10, lastUpdatedAt);
+ stats.set(11, lastUpdatedSnapshotId);
+
+ return stats;
+ }
+
+ private void validateStats(PartitionStats stats, Object... expectedValues) {
+ // Spec id and partition data should be unchanged
+ assertThat(stats.get(0, PartitionData.class)).isEqualTo(PARTITION);
+ assertThat(stats.get(1, Integer.class)).isEqualTo(1);
+
+ for (int i = 0; i < expectedValues.length; i++) {
+ if (expectedValues[i] == null) {
+ assertThat(stats.get(i + 2, Object.class)).isNull();
+ } else {
+ assertThat(stats.get(i + 2,
Object.class)).isEqualTo(expectedValues[i]);
+ }
+ }
+ }
+}
diff --git a/core/src/test/java/org/apache/iceberg/TestPartitionStatsUtil.java
b/core/src/test/java/org/apache/iceberg/TestPartitionStatsUtil.java
index bd4b4a2ff6..b4308ff28c 100644
--- a/core/src/test/java/org/apache/iceberg/TestPartitionStatsUtil.java
+++ b/core/src/test/java/org/apache/iceberg/TestPartitionStatsUtil.java
@@ -103,7 +103,7 @@ public class TestPartitionStatsUtil {
0,
0L,
0,
- 0L,
+ null,
snapshot1.timestampMillis(),
snapshot1.snapshotId()),
Tuple.tuple(
@@ -116,7 +116,7 @@ public class TestPartitionStatsUtil {
0,
0L,
0,
- 0L,
+ null,
snapshot1.timestampMillis(),
snapshot1.snapshotId()),
Tuple.tuple(
@@ -129,7 +129,7 @@ public class TestPartitionStatsUtil {
0,
0L,
0,
- 0L,
+ null,
snapshot1.timestampMillis(),
snapshot1.snapshotId()),
Tuple.tuple(
@@ -142,7 +142,7 @@ public class TestPartitionStatsUtil {
0,
0L,
0,
- 0L,
+ null,
snapshot1.timestampMillis(),
snapshot1.snapshotId()));
@@ -168,7 +168,7 @@ public class TestPartitionStatsUtil {
1, // one position delete file
0L,
0,
- 0L,
+ null,
snapshot2.timestampMillis(), // new snapshot from pos delete commit
snapshot2.snapshotId()),
Tuple.tuple(
@@ -181,7 +181,7 @@ public class TestPartitionStatsUtil {
0,
0L,
0,
- 0L,
+ null,
snapshot1.timestampMillis(),
snapshot1.snapshotId()),
Tuple.tuple(
@@ -194,7 +194,7 @@ public class TestPartitionStatsUtil {
0,
0L,
0,
- 0L,
+ null,
snapshot1.timestampMillis(),
snapshot1.snapshotId()),
Tuple.tuple(
@@ -207,7 +207,7 @@ public class TestPartitionStatsUtil {
0,
eqDelete.recordCount(),
1, // one equality delete file
- 0L,
+ null,
snapshot3.timestampMillis(), // new snapshot from equality delete
commit
snapshot3.snapshotId()));
}
@@ -247,7 +247,7 @@ public class TestPartitionStatsUtil {
0,
0L,
0,
- 0L,
+ null,
snapshot1.timestampMillis(),
snapshot1.snapshotId()),
Tuple.tuple(
@@ -260,7 +260,7 @@ public class TestPartitionStatsUtil {
0,
0L,
0,
- 0L,
+ null,
snapshot1.timestampMillis(),
snapshot1.snapshotId()));
@@ -288,7 +288,7 @@ public class TestPartitionStatsUtil {
0,
0L,
0,
- 0L,
+ null,
snapshot1.timestampMillis(),
snapshot1.snapshotId()),
Tuple.tuple(
@@ -301,7 +301,7 @@ public class TestPartitionStatsUtil {
0,
0L,
0,
- 0L,
+ null,
snapshot1.timestampMillis(),
snapshot1.snapshotId()),
Tuple.tuple(
@@ -314,7 +314,7 @@ public class TestPartitionStatsUtil {
0,
0L,
0,
- 0L,
+ null,
snapshot2.timestampMillis(), // new snapshot
snapshot2.snapshotId()),
Tuple.tuple(
@@ -327,7 +327,7 @@ public class TestPartitionStatsUtil {
0,
0L,
0,
- 0L,
+ null,
snapshot2.timestampMillis(), // new snapshot
snapshot2.snapshotId()),
Tuple.tuple(
@@ -340,7 +340,7 @@ public class TestPartitionStatsUtil {
0,
0L,
0,
- 0L,
+ null,
snapshot2.timestampMillis(),
snapshot2.snapshotId()),
Tuple.tuple(
@@ -353,7 +353,7 @@ public class TestPartitionStatsUtil {
0,
0L,
0,
- 0L,
+ null,
snapshot2.timestampMillis(),
snapshot2.snapshotId()),
Tuple.tuple(
@@ -366,7 +366,7 @@ public class TestPartitionStatsUtil {
0,
0L,
0,
- 0L,
+ null,
snapshot2.timestampMillis(),
snapshot2.snapshotId()));
}
@@ -410,7 +410,7 @@ public class TestPartitionStatsUtil {
0,
0L,
0,
- 0L,
+ null,
snapshot1.timestampMillis(),
snapshot1.snapshotId()),
Tuple.tuple(
@@ -423,7 +423,7 @@ public class TestPartitionStatsUtil {
0,
0L,
0,
- 0L,
+ null,
snapshot1.timestampMillis(),
snapshot1.snapshotId()));
@@ -459,7 +459,7 @@ public class TestPartitionStatsUtil {
0,
0L,
0,
- 0L,
+ null,
snapshot1.timestampMillis(),
snapshot1.snapshotId()),
Tuple.tuple(
@@ -472,7 +472,7 @@ public class TestPartitionStatsUtil {
0,
0L,
0,
- 0L,
+ null,
snapshot1.timestampMillis(),
snapshot1.snapshotId()),
Tuple.tuple(
@@ -485,7 +485,7 @@ public class TestPartitionStatsUtil {
0,
0L,
0,
- 0L,
+ null,
snapshot2.timestampMillis(),
snapshot2.snapshotId()),
Tuple.tuple(
@@ -498,7 +498,7 @@ public class TestPartitionStatsUtil {
0,
0L,
0,
- 0L,
+ null,
snapshot2.timestampMillis(),
snapshot2.snapshotId()),
Tuple.tuple(
@@ -511,7 +511,7 @@ public class TestPartitionStatsUtil {
0,
0L,
0,
- 0L,
+ null,
snapshot2.timestampMillis(),
snapshot2.snapshotId()),
Tuple.tuple(
@@ -524,7 +524,7 @@ public class TestPartitionStatsUtil {
0,
0L,
0,
- 0L,
+ null,
snapshot2.timestampMillis(),
snapshot2.snapshotId()));
}
@@ -572,7 +572,7 @@ public class TestPartitionStatsUtil {
PartitionStats::positionDeleteFileCount,
PartitionStats::equalityDeleteRecordCount,
PartitionStats::equalityDeleteFileCount,
- PartitionStats::totalRecordCount,
+ PartitionStats::totalRecords,
PartitionStats::lastUpdatedAt,
PartitionStats::lastUpdatedSnapshotId)
.containsExactlyInAnyOrder(expectedValues);
diff --git
a/data/src/test/java/org/apache/iceberg/data/TestPartitionStatsHandler.java
b/data/src/test/java/org/apache/iceberg/data/TestPartitionStatsHandler.java
index 1d84b8e229..d974887e6f 100644
--- a/data/src/test/java/org/apache/iceberg/data/TestPartitionStatsHandler.java
+++ b/data/src/test/java/org/apache/iceberg/data/TestPartitionStatsHandler.java
@@ -270,12 +270,12 @@ public class TestPartitionStatsHandler {
PartitionStats::positionDeleteFileCount,
PartitionStats::equalityDeleteRecordCount,
PartitionStats::equalityDeleteFileCount,
- PartitionStats::totalRecordCount,
+ PartitionStats::totalRecords,
PartitionStats::lastUpdatedAt,
PartitionStats::lastUpdatedSnapshotId)
.isEqualTo(
Arrays.asList(
- 0L, 0, 0L, 0, 0L, null, null)); // null counters must be
initialized to zero.
+ 0L, 0, 0L, 0, null, null, null)); // null counters must be
initialized to zero.
PartitionStatisticsFile statisticsFile =
PartitionStatsHandler.writePartitionStatsFile(testTable, 42L,
dataSchema, expected);
@@ -352,7 +352,7 @@ public class TestPartitionStatsHandler {
0,
0L,
0,
- 0L,
+ null,
snapshot1.timestampMillis(),
snapshot1.snapshotId()),
Tuple.tuple(
@@ -365,7 +365,7 @@ public class TestPartitionStatsHandler {
0,
0L,
0,
- 0L,
+ null,
snapshot1.timestampMillis(),
snapshot1.snapshotId()),
Tuple.tuple(
@@ -378,7 +378,7 @@ public class TestPartitionStatsHandler {
0,
0L,
0,
- 0L,
+ null,
snapshot1.timestampMillis(),
snapshot1.snapshotId()),
Tuple.tuple(
@@ -391,7 +391,7 @@ public class TestPartitionStatsHandler {
0,
0L,
0,
- 0L,
+ null,
snapshot1.timestampMillis(),
snapshot1.snapshotId()));
@@ -416,7 +416,7 @@ public class TestPartitionStatsHandler {
0,
eqDeletes.recordCount(),
1,
- 0L,
+ null,
snapshot3.timestampMillis(),
snapshot3.snapshotId()),
Tuple.tuple(
@@ -429,7 +429,7 @@ public class TestPartitionStatsHandler {
0,
0L,
0,
- 0L,
+ null,
snapshot1.timestampMillis(),
snapshot1.snapshotId()),
Tuple.tuple(
@@ -442,7 +442,7 @@ public class TestPartitionStatsHandler {
1,
0L,
0,
- 0L,
+ null,
snapshot2.timestampMillis(),
snapshot2.snapshotId()),
Tuple.tuple(
@@ -455,7 +455,7 @@ public class TestPartitionStatsHandler {
0,
0L,
0,
- 0L,
+ null,
snapshot1.timestampMillis(),
snapshot1.snapshotId()));
}
@@ -517,7 +517,7 @@ public class TestPartitionStatsHandler {
PartitionStats::positionDeleteFileCount,
PartitionStats::equalityDeleteRecordCount,
PartitionStats::equalityDeleteFileCount,
- PartitionStats::totalRecordCount,
+ PartitionStats::totalRecords,
PartitionStats::lastUpdatedAt,
PartitionStats::lastUpdatedSnapshotId)
.containsExactlyInAnyOrder(expectedValues);
@@ -591,7 +591,7 @@ public class TestPartitionStatsHandler {
&& stats1.positionDeleteFileCount() == stats2.positionDeleteFileCount()
&& stats1.equalityDeleteRecordCount() ==
stats2.equalityDeleteRecordCount()
&& stats1.equalityDeleteFileCount() == stats2.equalityDeleteFileCount()
- && stats1.totalRecordCount() == stats2.totalRecordCount()
+ && Objects.equals(stats1.totalRecords(), stats2.totalRecords())
&& Objects.equals(stats1.lastUpdatedAt(), stats2.lastUpdatedAt())
&& Objects.equals(stats1.lastUpdatedSnapshotId(),
stats2.lastUpdatedSnapshotId());
}