This is an automated email from the ASF dual-hosted git repository.

etudenhoefner pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg.git


The following commit(s) were added to refs/heads/main by this push:
     new cf980650ed Core: Make totalRecordCount optional in PartitionStats 
(#12226)
cf980650ed is described below

commit cf980650ede19efe2b9fe9aff521ecf8e7bd315c
Author: Ajantha Bhat <[email protected]>
AuthorDate: Wed Mar 19 13:47:07 2025 +0530

    Core: Make totalRecordCount optional in PartitionStats (#12226)
---
 .../java/org/apache/iceberg/PartitionStats.java    |  22 +++-
 .../org/apache/iceberg/TestPartitionStats.java     | 135 +++++++++++++++++++++
 .../org/apache/iceberg/TestPartitionStatsUtil.java |  52 ++++----
 .../iceberg/data/TestPartitionStatsHandler.java    |  24 ++--
 4 files changed, 191 insertions(+), 42 deletions(-)

diff --git a/core/src/main/java/org/apache/iceberg/PartitionStats.java 
b/core/src/main/java/org/apache/iceberg/PartitionStats.java
index e4cbd1f6b9..d40b9f3e7f 100644
--- a/core/src/main/java/org/apache/iceberg/PartitionStats.java
+++ b/core/src/main/java/org/apache/iceberg/PartitionStats.java
@@ -33,7 +33,7 @@ public class PartitionStats implements StructLike {
   private int positionDeleteFileCount;
   private long equalityDeleteRecordCount;
   private int equalityDeleteFileCount;
-  private long totalRecordCount;
+  private Long totalRecordCount; // null by default
   private Long lastUpdatedAt; // null by default
   private Long lastUpdatedSnapshotId; // null by default
 
@@ -78,7 +78,15 @@ public class PartitionStats implements StructLike {
     return equalityDeleteFileCount;
   }
 
+  /**
+   * @deprecated since 1.9.0, will be removed in 1.10.0, use {@link 
#totalRecords()} instead.
+   */
+  @Deprecated
   public long totalRecordCount() {
+    return totalRecordCount == null ? 0L : totalRecordCount;
+  }
+
+  public Long totalRecords() {
     return totalRecordCount;
   }
 
@@ -150,7 +158,14 @@ public class PartitionStats implements StructLike {
     this.positionDeleteFileCount += entry.positionDeleteFileCount;
     this.equalityDeleteRecordCount += entry.equalityDeleteRecordCount;
     this.equalityDeleteFileCount += entry.equalityDeleteFileCount;
-    this.totalRecordCount += entry.totalRecordCount;
+
+    if (entry.totalRecordCount != null) {
+      if (totalRecordCount == null) {
+        this.totalRecordCount = entry.totalRecordCount;
+      } else {
+        this.totalRecordCount += entry.totalRecordCount;
+      }
+    }
 
     if (entry.lastUpdatedAt != null) {
       updateSnapshotInfo(entry.lastUpdatedSnapshotId, entry.lastUpdatedAt);
@@ -236,8 +251,7 @@ public class PartitionStats implements StructLike {
         this.equalityDeleteFileCount = value == null ? 0 : (int) value;
         break;
       case 9:
-        // optional field as per spec, implementation initialize to 0 for 
counters
-        this.totalRecordCount = value == null ? 0L : (long) value;
+        this.totalRecordCount = (Long) value;
         break;
       case 10:
         this.lastUpdatedAt = (Long) value;
diff --git a/core/src/test/java/org/apache/iceberg/TestPartitionStats.java 
b/core/src/test/java/org/apache/iceberg/TestPartitionStats.java
new file mode 100644
index 0000000000..c215fbcb80
--- /dev/null
+++ b/core/src/test/java/org/apache/iceberg/TestPartitionStats.java
@@ -0,0 +1,135 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg;
+
+import static org.assertj.core.api.Assertions.assertThat;
+import static org.assertj.core.api.Assertions.assertThatThrownBy;
+
+import org.apache.iceberg.types.Types;
+import org.junit.jupiter.api.Test;
+
+public class TestPartitionStats {
+
+  private static final PartitionData PARTITION =
+      new PartitionData(
+          Types.StructType.of(Types.NestedField.required(1, "foo", 
Types.IntegerType.get())));
+
+  @Test
+  public void testAppendWithAllValues() {
+    PartitionStats stats1 =
+        createStats(100L, 15, 1000L, 2L, 500, 1L, 200, 15L, 1625077800000L, 
12345L);
+    PartitionStats stats2 = createStats(200L, 7, 500L, 1L, 100, 0L, 50, 7L, 
1625077900000L, 12346L);
+
+    stats1.appendStats(stats2);
+
+    validateStats(stats1, 300L, 22, 1500L, 3L, 600, 1L, 250, 22L, 
1625077900000L, 12346L);
+  }
+
+  @Test
+  public void testAppendWithThisNullOptionalField() {
+    PartitionStats stats1 = createStats(100L, 15, 1000L, 2L, 500, 1L, 200, 
null, null, null);
+    PartitionStats stats2 = createStats(100L, 7, 500L, 1L, 100, 0L, 50, 7L, 
1625077900000L, 12346L);
+
+    stats1.appendStats(stats2);
+
+    validateStats(stats1, 200L, 22, 1500L, 3L, 600, 1L, 250, 7L, 
1625077900000L, 12346L);
+  }
+
+  @Test
+  public void testAppendWithBothNullOptionalFields() {
+    PartitionStats stats1 = createStats(100L, 15, 1000L, 2L, 500, 1L, 200, 
null, null, null);
+    PartitionStats stats2 = createStats(100L, 7, 500L, 1L, 100, 0L, 50, null, 
null, null);
+
+    stats1.appendStats(stats2);
+
+    validateStats(stats1, 200L, 22, 1500L, 3L, 600, 1L, 250, null, null, null);
+  }
+
+  @Test
+  public void testAppendWithOtherNullOptionalFields() {
+    PartitionStats stats1 =
+        createStats(100L, 15, 1000L, 2L, 500, 1L, 200, 15L, 1625077900000L, 
12346L);
+    PartitionStats stats2 = createStats(100L, 7, 500L, 1L, 100, 0L, 50, null, 
null, null);
+
+    stats1.appendStats(stats2);
+
+    validateStats(stats1, 200L, 22, 1500L, 3L, 600, 1L, 250, 15L, 
1625077900000L, 12346L);
+  }
+
+  @Test
+  public void testAppendEmptyStats() {
+    PartitionStats stats1 = new PartitionStats(PARTITION, 1);
+    PartitionStats stats2 = new PartitionStats(PARTITION, 1);
+
+    stats1.appendStats(stats2);
+
+    validateStats(stats1, 0L, 0, 0L, 0L, 0, 0L, 0, null, null, null);
+  }
+
+  @Test
+  public void testAppendWithDifferentSpec() {
+    PartitionStats stats1 = new PartitionStats(PARTITION, 1);
+    PartitionStats stats2 = new PartitionStats(PARTITION, 2);
+
+    assertThatThrownBy(() -> stats1.appendStats(stats2))
+        .isInstanceOf(IllegalArgumentException.class)
+        .hasMessage("Spec IDs must match");
+  }
+
+  private PartitionStats createStats(
+      long dataRecordCount,
+      int dataFileCount,
+      long totalDataFileSizeInBytes,
+      long positionDeleteRecordCount,
+      int positionDeleteFileCount,
+      long equalityDeleteRecordCount,
+      int equalityDeleteFileCount,
+      Long totalRecordCount,
+      Long lastUpdatedAt,
+      Long lastUpdatedSnapshotId) {
+
+    PartitionStats stats = new PartitionStats(PARTITION, 1);
+    stats.set(2, dataRecordCount);
+    stats.set(3, dataFileCount);
+    stats.set(4, totalDataFileSizeInBytes);
+    stats.set(5, positionDeleteRecordCount);
+    stats.set(6, positionDeleteFileCount);
+    stats.set(7, equalityDeleteRecordCount);
+    stats.set(8, equalityDeleteFileCount);
+    stats.set(9, totalRecordCount);
+    stats.set(10, lastUpdatedAt);
+    stats.set(11, lastUpdatedSnapshotId);
+
+    return stats;
+  }
+
+  private void validateStats(PartitionStats stats, Object... expectedValues) {
+    // Spec id and partition data should be unchanged
+    assertThat(stats.get(0, PartitionData.class)).isEqualTo(PARTITION);
+    assertThat(stats.get(1, Integer.class)).isEqualTo(1);
+
+    for (int i = 0; i < expectedValues.length; i++) {
+      if (expectedValues[i] == null) {
+        assertThat(stats.get(i + 2, Object.class)).isNull();
+      } else {
+        assertThat(stats.get(i + 2, 
Object.class)).isEqualTo(expectedValues[i]);
+      }
+    }
+  }
+}
diff --git a/core/src/test/java/org/apache/iceberg/TestPartitionStatsUtil.java 
b/core/src/test/java/org/apache/iceberg/TestPartitionStatsUtil.java
index bd4b4a2ff6..b4308ff28c 100644
--- a/core/src/test/java/org/apache/iceberg/TestPartitionStatsUtil.java
+++ b/core/src/test/java/org/apache/iceberg/TestPartitionStatsUtil.java
@@ -103,7 +103,7 @@ public class TestPartitionStatsUtil {
             0,
             0L,
             0,
-            0L,
+            null,
             snapshot1.timestampMillis(),
             snapshot1.snapshotId()),
         Tuple.tuple(
@@ -116,7 +116,7 @@ public class TestPartitionStatsUtil {
             0,
             0L,
             0,
-            0L,
+            null,
             snapshot1.timestampMillis(),
             snapshot1.snapshotId()),
         Tuple.tuple(
@@ -129,7 +129,7 @@ public class TestPartitionStatsUtil {
             0,
             0L,
             0,
-            0L,
+            null,
             snapshot1.timestampMillis(),
             snapshot1.snapshotId()),
         Tuple.tuple(
@@ -142,7 +142,7 @@ public class TestPartitionStatsUtil {
             0,
             0L,
             0,
-            0L,
+            null,
             snapshot1.timestampMillis(),
             snapshot1.snapshotId()));
 
@@ -168,7 +168,7 @@ public class TestPartitionStatsUtil {
             1, // one position delete file
             0L,
             0,
-            0L,
+            null,
             snapshot2.timestampMillis(), // new snapshot from pos delete commit
             snapshot2.snapshotId()),
         Tuple.tuple(
@@ -181,7 +181,7 @@ public class TestPartitionStatsUtil {
             0,
             0L,
             0,
-            0L,
+            null,
             snapshot1.timestampMillis(),
             snapshot1.snapshotId()),
         Tuple.tuple(
@@ -194,7 +194,7 @@ public class TestPartitionStatsUtil {
             0,
             0L,
             0,
-            0L,
+            null,
             snapshot1.timestampMillis(),
             snapshot1.snapshotId()),
         Tuple.tuple(
@@ -207,7 +207,7 @@ public class TestPartitionStatsUtil {
             0,
             eqDelete.recordCount(),
             1, // one equality delete file
-            0L,
+            null,
             snapshot3.timestampMillis(), // new snapshot from equality delete 
commit
             snapshot3.snapshotId()));
   }
@@ -247,7 +247,7 @@ public class TestPartitionStatsUtil {
             0,
             0L,
             0,
-            0L,
+            null,
             snapshot1.timestampMillis(),
             snapshot1.snapshotId()),
         Tuple.tuple(
@@ -260,7 +260,7 @@ public class TestPartitionStatsUtil {
             0,
             0L,
             0,
-            0L,
+            null,
             snapshot1.timestampMillis(),
             snapshot1.snapshotId()));
 
@@ -288,7 +288,7 @@ public class TestPartitionStatsUtil {
             0,
             0L,
             0,
-            0L,
+            null,
             snapshot1.timestampMillis(),
             snapshot1.snapshotId()),
         Tuple.tuple(
@@ -301,7 +301,7 @@ public class TestPartitionStatsUtil {
             0,
             0L,
             0,
-            0L,
+            null,
             snapshot1.timestampMillis(),
             snapshot1.snapshotId()),
         Tuple.tuple(
@@ -314,7 +314,7 @@ public class TestPartitionStatsUtil {
             0,
             0L,
             0,
-            0L,
+            null,
             snapshot2.timestampMillis(), // new snapshot
             snapshot2.snapshotId()),
         Tuple.tuple(
@@ -327,7 +327,7 @@ public class TestPartitionStatsUtil {
             0,
             0L,
             0,
-            0L,
+            null,
             snapshot2.timestampMillis(), // new snapshot
             snapshot2.snapshotId()),
         Tuple.tuple(
@@ -340,7 +340,7 @@ public class TestPartitionStatsUtil {
             0,
             0L,
             0,
-            0L,
+            null,
             snapshot2.timestampMillis(),
             snapshot2.snapshotId()),
         Tuple.tuple(
@@ -353,7 +353,7 @@ public class TestPartitionStatsUtil {
             0,
             0L,
             0,
-            0L,
+            null,
             snapshot2.timestampMillis(),
             snapshot2.snapshotId()),
         Tuple.tuple(
@@ -366,7 +366,7 @@ public class TestPartitionStatsUtil {
             0,
             0L,
             0,
-            0L,
+            null,
             snapshot2.timestampMillis(),
             snapshot2.snapshotId()));
   }
@@ -410,7 +410,7 @@ public class TestPartitionStatsUtil {
             0,
             0L,
             0,
-            0L,
+            null,
             snapshot1.timestampMillis(),
             snapshot1.snapshotId()),
         Tuple.tuple(
@@ -423,7 +423,7 @@ public class TestPartitionStatsUtil {
             0,
             0L,
             0,
-            0L,
+            null,
             snapshot1.timestampMillis(),
             snapshot1.snapshotId()));
 
@@ -459,7 +459,7 @@ public class TestPartitionStatsUtil {
             0,
             0L,
             0,
-            0L,
+            null,
             snapshot1.timestampMillis(),
             snapshot1.snapshotId()),
         Tuple.tuple(
@@ -472,7 +472,7 @@ public class TestPartitionStatsUtil {
             0,
             0L,
             0,
-            0L,
+            null,
             snapshot1.timestampMillis(),
             snapshot1.snapshotId()),
         Tuple.tuple(
@@ -485,7 +485,7 @@ public class TestPartitionStatsUtil {
             0,
             0L,
             0,
-            0L,
+            null,
             snapshot2.timestampMillis(),
             snapshot2.snapshotId()),
         Tuple.tuple(
@@ -498,7 +498,7 @@ public class TestPartitionStatsUtil {
             0,
             0L,
             0,
-            0L,
+            null,
             snapshot2.timestampMillis(),
             snapshot2.snapshotId()),
         Tuple.tuple(
@@ -511,7 +511,7 @@ public class TestPartitionStatsUtil {
             0,
             0L,
             0,
-            0L,
+            null,
             snapshot2.timestampMillis(),
             snapshot2.snapshotId()),
         Tuple.tuple(
@@ -524,7 +524,7 @@ public class TestPartitionStatsUtil {
             0,
             0L,
             0,
-            0L,
+            null,
             snapshot2.timestampMillis(),
             snapshot2.snapshotId()));
   }
@@ -572,7 +572,7 @@ public class TestPartitionStatsUtil {
             PartitionStats::positionDeleteFileCount,
             PartitionStats::equalityDeleteRecordCount,
             PartitionStats::equalityDeleteFileCount,
-            PartitionStats::totalRecordCount,
+            PartitionStats::totalRecords,
             PartitionStats::lastUpdatedAt,
             PartitionStats::lastUpdatedSnapshotId)
         .containsExactlyInAnyOrder(expectedValues);
diff --git 
a/data/src/test/java/org/apache/iceberg/data/TestPartitionStatsHandler.java 
b/data/src/test/java/org/apache/iceberg/data/TestPartitionStatsHandler.java
index 1d84b8e229..d974887e6f 100644
--- a/data/src/test/java/org/apache/iceberg/data/TestPartitionStatsHandler.java
+++ b/data/src/test/java/org/apache/iceberg/data/TestPartitionStatsHandler.java
@@ -270,12 +270,12 @@ public class TestPartitionStatsHandler {
             PartitionStats::positionDeleteFileCount,
             PartitionStats::equalityDeleteRecordCount,
             PartitionStats::equalityDeleteFileCount,
-            PartitionStats::totalRecordCount,
+            PartitionStats::totalRecords,
             PartitionStats::lastUpdatedAt,
             PartitionStats::lastUpdatedSnapshotId)
         .isEqualTo(
             Arrays.asList(
-                0L, 0, 0L, 0, 0L, null, null)); // null counters must be 
initialized to zero.
+                0L, 0, 0L, 0, null, null, null)); // null counters must be 
initialized to zero.
 
     PartitionStatisticsFile statisticsFile =
         PartitionStatsHandler.writePartitionStatsFile(testTable, 42L, 
dataSchema, expected);
@@ -352,7 +352,7 @@ public class TestPartitionStatsHandler {
             0,
             0L,
             0,
-            0L,
+            null,
             snapshot1.timestampMillis(),
             snapshot1.snapshotId()),
         Tuple.tuple(
@@ -365,7 +365,7 @@ public class TestPartitionStatsHandler {
             0,
             0L,
             0,
-            0L,
+            null,
             snapshot1.timestampMillis(),
             snapshot1.snapshotId()),
         Tuple.tuple(
@@ -378,7 +378,7 @@ public class TestPartitionStatsHandler {
             0,
             0L,
             0,
-            0L,
+            null,
             snapshot1.timestampMillis(),
             snapshot1.snapshotId()),
         Tuple.tuple(
@@ -391,7 +391,7 @@ public class TestPartitionStatsHandler {
             0,
             0L,
             0,
-            0L,
+            null,
             snapshot1.timestampMillis(),
             snapshot1.snapshotId()));
 
@@ -416,7 +416,7 @@ public class TestPartitionStatsHandler {
             0,
             eqDeletes.recordCount(),
             1,
-            0L,
+            null,
             snapshot3.timestampMillis(),
             snapshot3.snapshotId()),
         Tuple.tuple(
@@ -429,7 +429,7 @@ public class TestPartitionStatsHandler {
             0,
             0L,
             0,
-            0L,
+            null,
             snapshot1.timestampMillis(),
             snapshot1.snapshotId()),
         Tuple.tuple(
@@ -442,7 +442,7 @@ public class TestPartitionStatsHandler {
             1,
             0L,
             0,
-            0L,
+            null,
             snapshot2.timestampMillis(),
             snapshot2.snapshotId()),
         Tuple.tuple(
@@ -455,7 +455,7 @@ public class TestPartitionStatsHandler {
             0,
             0L,
             0,
-            0L,
+            null,
             snapshot1.timestampMillis(),
             snapshot1.snapshotId()));
   }
@@ -517,7 +517,7 @@ public class TestPartitionStatsHandler {
             PartitionStats::positionDeleteFileCount,
             PartitionStats::equalityDeleteRecordCount,
             PartitionStats::equalityDeleteFileCount,
-            PartitionStats::totalRecordCount,
+            PartitionStats::totalRecords,
             PartitionStats::lastUpdatedAt,
             PartitionStats::lastUpdatedSnapshotId)
         .containsExactlyInAnyOrder(expectedValues);
@@ -591,7 +591,7 @@ public class TestPartitionStatsHandler {
         && stats1.positionDeleteFileCount() == stats2.positionDeleteFileCount()
         && stats1.equalityDeleteRecordCount() == 
stats2.equalityDeleteRecordCount()
         && stats1.equalityDeleteFileCount() == stats2.equalityDeleteFileCount()
-        && stats1.totalRecordCount() == stats2.totalRecordCount()
+        && Objects.equals(stats1.totalRecords(), stats2.totalRecords())
         && Objects.equals(stats1.lastUpdatedAt(), stats2.lastUpdatedAt())
         && Objects.equals(stats1.lastUpdatedSnapshotId(), 
stats2.lastUpdatedSnapshotId());
   }

Reply via email to