This is an automated email from the ASF dual-hosted git repository.

amoghj pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg.git


The following commit(s) were added to refs/heads/main by this push:
     new be46d29d40 Core, Parquet, Orc: Don't write column sizes when metrics 
mode is None (#10440)
be46d29d40 is described below

commit be46d29d402e44ddc365381933c8770a98ce5fec
Author: Amogh Jahagirdar <[email protected]>
AuthorDate: Wed Jun 5 12:14:00 2024 -0600

    Core, Parquet, Orc: Don't write column sizes when metrics mode is None 
(#10440)
---
 core/src/test/java/org/apache/iceberg/TestMetrics.java            | 6 +++++-
 orc/src/main/java/org/apache/iceberg/orc/OrcMetrics.java          | 3 ++-
 parquet/src/main/java/org/apache/iceberg/parquet/ParquetUtil.java | 4 ++--
 3 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/core/src/test/java/org/apache/iceberg/TestMetrics.java 
b/core/src/test/java/org/apache/iceberg/TestMetrics.java
index 424e0e0a7b..b95b92979f 100644
--- a/core/src/test/java/org/apache/iceberg/TestMetrics.java
+++ b/core/src/test/java/org/apache/iceberg/TestMetrics.java
@@ -561,7 +561,7 @@ public abstract class TestMetrics {
             
MetricsConfig.fromProperties(ImmutableMap.of("write.metadata.metrics.default", 
"none")),
             buildNestedTestRecord());
     assertThat(metrics.recordCount()).isEqualTo(1L);
-    assertThat(metrics.columnSizes()).doesNotContainValue(null);
+    assertThat(metrics.columnSizes()).isEmpty();
     assertCounts(1, null, null, metrics);
     assertBounds(1, Types.IntegerType.get(), null, null, metrics);
     assertCounts(3, null, null, metrics);
@@ -584,6 +584,7 @@ public abstract class TestMetrics {
             buildNestedTestRecord());
     assertThat(metrics.recordCount()).isEqualTo(1L);
     assertThat(metrics.columnSizes()).doesNotContainValue(null);
+    assertThat(metrics.columnSizes()).isNotEmpty();
     assertCounts(1, 1L, 0L, metrics);
     assertBounds(1, Types.IntegerType.get(), null, null, metrics);
     assertCounts(3, 1L, 0L, metrics);
@@ -605,6 +606,7 @@ public abstract class TestMetrics {
             buildNestedTestRecord());
     assertThat(metrics.recordCount()).isEqualTo(1L);
     assertThat(metrics.columnSizes()).doesNotContainValue(null);
+    assertThat(metrics.columnSizes()).isNotEmpty();
     assertCounts(1, 1L, 0L, metrics);
     assertBounds(1, Types.IntegerType.get(), Integer.MAX_VALUE, 
Integer.MAX_VALUE, metrics);
     assertCounts(3, 1L, 0L, metrics);
@@ -642,6 +644,7 @@ public abstract class TestMetrics {
     CharBuffer expectedMaxBound = CharBuffer.wrap("Lorem ipsv");
     assertThat(metrics.recordCount()).isEqualTo(1L);
     assertThat(metrics.columnSizes()).doesNotContainValue(null);
+    assertThat(metrics.columnSizes()).isNotEmpty();
     assertCounts(1, 1L, 0L, metrics);
     assertBounds(1, Types.StringType.get(), expectedMinBound, 
expectedMaxBound, metrics);
   }
@@ -666,6 +669,7 @@ public abstract class TestMetrics {
     ByteBuffer expectedMaxBounds = ByteBuffer.wrap(new byte[] {0x1, 0x2, 0x3, 
0x4, 0x6});
     assertThat(metrics.recordCount()).isEqualTo(1L);
     assertThat(metrics.columnSizes()).doesNotContainValue(null);
+    assertThat(metrics.columnSizes()).isNotEmpty();
     assertCounts(1, 1L, 0L, metrics);
     assertBounds(1, Types.BinaryType.get(), expectedMinBounds, 
expectedMaxBounds, metrics);
   }
diff --git a/orc/src/main/java/org/apache/iceberg/orc/OrcMetrics.java 
b/orc/src/main/java/org/apache/iceberg/orc/OrcMetrics.java
index 972591d53d..b057b265df 100644
--- a/orc/src/main/java/org/apache/iceberg/orc/OrcMetrics.java
+++ b/orc/src/main/java/org/apache/iceberg/orc/OrcMetrics.java
@@ -160,12 +160,13 @@ public class OrcMetrics {
 
         final MetricsMode metricsMode =
             MetricsUtil.metricsMode(schema, effectiveMetricsConfig, 
icebergCol.fieldId());
-        columnSizes.put(fieldId, colStat.getBytesOnDisk());
 
         if (metricsMode == MetricsModes.None.get()) {
           continue;
         }
 
+        columnSizes.put(fieldId, colStat.getBytesOnDisk());
+
         if (statsColumns.contains(fieldId)) {
           // Since ORC does not track null values nor repeated ones, the value 
count for columns in
           // containers (maps, list) may be larger than what it actually is, 
however these are not
diff --git a/parquet/src/main/java/org/apache/iceberg/parquet/ParquetUtil.java 
b/parquet/src/main/java/org/apache/iceberg/parquet/ParquetUtil.java
index 2de423146a..63d5adbbd2 100644
--- a/parquet/src/main/java/org/apache/iceberg/parquet/ParquetUtil.java
+++ b/parquet/src/main/java/org/apache/iceberg/parquet/ParquetUtil.java
@@ -124,12 +124,12 @@ public class ParquetUtil {
           continue;
         }
 
-        increment(columnSizes, fieldId, column.getTotalSize());
-
         MetricsMode metricsMode = MetricsUtil.metricsMode(fileSchema, 
metricsConfig, fieldId);
         if (metricsMode == MetricsModes.None.get()) {
           continue;
         }
+
+        increment(columnSizes, fieldId, column.getTotalSize());
         increment(valueCounts, fieldId, column.getValueCount());
 
         Statistics stats = column.getStatistics();

Reply via email to