This is an automated email from the ASF dual-hosted git repository.
amoghj pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg.git
The following commit(s) were added to refs/heads/main by this push:
new be46d29d40 Core, Parquet, Orc: Don't write column sizes when metrics
mode is None (#10440)
be46d29d40 is described below
commit be46d29d402e44ddc365381933c8770a98ce5fec
Author: Amogh Jahagirdar <[email protected]>
AuthorDate: Wed Jun 5 12:14:00 2024 -0600
Core, Parquet, Orc: Don't write column sizes when metrics mode is None
(#10440)
---
core/src/test/java/org/apache/iceberg/TestMetrics.java | 6 +++++-
orc/src/main/java/org/apache/iceberg/orc/OrcMetrics.java | 3 ++-
parquet/src/main/java/org/apache/iceberg/parquet/ParquetUtil.java | 4 ++--
3 files changed, 9 insertions(+), 4 deletions(-)
diff --git a/core/src/test/java/org/apache/iceberg/TestMetrics.java
b/core/src/test/java/org/apache/iceberg/TestMetrics.java
index 424e0e0a7b..b95b92979f 100644
--- a/core/src/test/java/org/apache/iceberg/TestMetrics.java
+++ b/core/src/test/java/org/apache/iceberg/TestMetrics.java
@@ -561,7 +561,7 @@ public abstract class TestMetrics {
MetricsConfig.fromProperties(ImmutableMap.of("write.metadata.metrics.default",
"none")),
buildNestedTestRecord());
assertThat(metrics.recordCount()).isEqualTo(1L);
- assertThat(metrics.columnSizes()).doesNotContainValue(null);
+ assertThat(metrics.columnSizes()).isEmpty();
assertCounts(1, null, null, metrics);
assertBounds(1, Types.IntegerType.get(), null, null, metrics);
assertCounts(3, null, null, metrics);
@@ -584,6 +584,7 @@ public abstract class TestMetrics {
buildNestedTestRecord());
assertThat(metrics.recordCount()).isEqualTo(1L);
assertThat(metrics.columnSizes()).doesNotContainValue(null);
+ assertThat(metrics.columnSizes()).isNotEmpty();
assertCounts(1, 1L, 0L, metrics);
assertBounds(1, Types.IntegerType.get(), null, null, metrics);
assertCounts(3, 1L, 0L, metrics);
@@ -605,6 +606,7 @@ public abstract class TestMetrics {
buildNestedTestRecord());
assertThat(metrics.recordCount()).isEqualTo(1L);
assertThat(metrics.columnSizes()).doesNotContainValue(null);
+ assertThat(metrics.columnSizes()).isNotEmpty();
assertCounts(1, 1L, 0L, metrics);
assertBounds(1, Types.IntegerType.get(), Integer.MAX_VALUE,
Integer.MAX_VALUE, metrics);
assertCounts(3, 1L, 0L, metrics);
@@ -642,6 +644,7 @@ public abstract class TestMetrics {
CharBuffer expectedMaxBound = CharBuffer.wrap("Lorem ipsv");
assertThat(metrics.recordCount()).isEqualTo(1L);
assertThat(metrics.columnSizes()).doesNotContainValue(null);
+ assertThat(metrics.columnSizes()).isNotEmpty();
assertCounts(1, 1L, 0L, metrics);
assertBounds(1, Types.StringType.get(), expectedMinBound,
expectedMaxBound, metrics);
}
@@ -666,6 +669,7 @@ public abstract class TestMetrics {
ByteBuffer expectedMaxBounds = ByteBuffer.wrap(new byte[] {0x1, 0x2, 0x3,
0x4, 0x6});
assertThat(metrics.recordCount()).isEqualTo(1L);
assertThat(metrics.columnSizes()).doesNotContainValue(null);
+ assertThat(metrics.columnSizes()).isNotEmpty();
assertCounts(1, 1L, 0L, metrics);
assertBounds(1, Types.BinaryType.get(), expectedMinBounds,
expectedMaxBounds, metrics);
}
diff --git a/orc/src/main/java/org/apache/iceberg/orc/OrcMetrics.java
b/orc/src/main/java/org/apache/iceberg/orc/OrcMetrics.java
index 972591d53d..b057b265df 100644
--- a/orc/src/main/java/org/apache/iceberg/orc/OrcMetrics.java
+++ b/orc/src/main/java/org/apache/iceberg/orc/OrcMetrics.java
@@ -160,12 +160,13 @@ public class OrcMetrics {
final MetricsMode metricsMode =
MetricsUtil.metricsMode(schema, effectiveMetricsConfig,
icebergCol.fieldId());
- columnSizes.put(fieldId, colStat.getBytesOnDisk());
if (metricsMode == MetricsModes.None.get()) {
continue;
}
+ columnSizes.put(fieldId, colStat.getBytesOnDisk());
+
if (statsColumns.contains(fieldId)) {
// Since ORC does not track null values nor repeated ones, the value
count for columns in
// containers (maps, list) may be larger than what it actually is,
however these are not
diff --git a/parquet/src/main/java/org/apache/iceberg/parquet/ParquetUtil.java
b/parquet/src/main/java/org/apache/iceberg/parquet/ParquetUtil.java
index 2de423146a..63d5adbbd2 100644
--- a/parquet/src/main/java/org/apache/iceberg/parquet/ParquetUtil.java
+++ b/parquet/src/main/java/org/apache/iceberg/parquet/ParquetUtil.java
@@ -124,12 +124,12 @@ public class ParquetUtil {
continue;
}
- increment(columnSizes, fieldId, column.getTotalSize());
-
MetricsMode metricsMode = MetricsUtil.metricsMode(fileSchema,
metricsConfig, fieldId);
if (metricsMode == MetricsModes.None.get()) {
continue;
}
+
+ increment(columnSizes, fieldId, column.getTotalSize());
increment(valueCounts, fieldId, column.getValueCount());
Statistics stats = column.getStatistics();