This is an automated email from the ASF dual-hosted git repository.
openinx pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iceberg.git
The following commit(s) were added to refs/heads/master by this push:
new 333e2c1 Core: Add includeColumnStats option in FindFiles API (#2875)
333e2c1 is described below
commit 333e2c1daa35fd06df16305d749528aa16232a7d
Author: Flyangz <[email protected]>
AuthorDate: Thu Jul 29 18:40:05 2021 +0800
Core: Add includeColumnStats option in FindFiles API (#2875)
---
core/src/main/java/org/apache/iceberg/FindFiles.java | 9 ++++++++-
.../test/java/org/apache/iceberg/TableTestBase.java | 16 ++++++++++++++++
.../test/java/org/apache/iceberg/TestFindFiles.java | 19 +++++++++++++++++++
3 files changed, 43 insertions(+), 1 deletion(-)
diff --git a/core/src/main/java/org/apache/iceberg/FindFiles.java
b/core/src/main/java/org/apache/iceberg/FindFiles.java
index 9fa358b..1f466b5 100644
--- a/core/src/main/java/org/apache/iceberg/FindFiles.java
+++ b/core/src/main/java/org/apache/iceberg/FindFiles.java
@@ -44,6 +44,7 @@ public class FindFiles {
private final Table table;
private final TableOperations ops;
private boolean caseSensitive = true;
+ private boolean includeColumnStats = false;
private Long snapshotId = null;
private Expression rowFilter = Expressions.alwaysTrue();
private Expression fileFilter = Expressions.alwaysTrue();
@@ -64,6 +65,11 @@ public class FindFiles {
return this;
}
+ public Builder includeColumnStats() {
+ this.includeColumnStats = true;
+ return this;
+ }
+
/**
* Base results on the given snapshot.
*
@@ -206,7 +212,8 @@ public class FindFiles {
.caseSensitive(caseSensitive)
.entries();
- return CloseableIterable.transform(entries, entry ->
entry.file().copyWithoutStats());
+ return CloseableIterable.transform(entries,
+ entry -> includeColumnStats ? entry.file().copy() :
entry.file().copyWithoutStats());
}
}
}
diff --git a/core/src/test/java/org/apache/iceberg/TableTestBase.java
b/core/src/test/java/org/apache/iceberg/TableTestBase.java
index c0f60d4..16a6bfd 100644
--- a/core/src/test/java/org/apache/iceberg/TableTestBase.java
+++ b/core/src/test/java/org/apache/iceberg/TableTestBase.java
@@ -28,11 +28,13 @@ import java.util.stream.LongStream;
import org.apache.iceberg.io.FileIO;
import org.apache.iceberg.io.OutputFile;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
+import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet;
import org.apache.iceberg.relocated.com.google.common.collect.Iterators;
import org.apache.iceberg.relocated.com.google.common.collect.Lists;
import org.apache.iceberg.relocated.com.google.common.collect.Sets;
import org.apache.iceberg.relocated.com.google.common.io.Files;
+import org.apache.iceberg.types.Conversions;
import org.apache.iceberg.types.Types;
import org.junit.After;
import org.junit.Assert;
@@ -130,6 +132,20 @@ public class TableTestBase {
.withPartition(TestHelpers.Row.of(3))
.withRecordCount(1)
.build();
+ static final DataFile FILE_WITH_STATS = DataFiles.builder(SPEC)
+ .withPath("/path/to/data-with-stats.parquet")
+ .withMetrics(new Metrics(10L,
+ ImmutableMap.of(3, 100L, 4, 200L), // column sizes
+ ImmutableMap.of(3, 90L, 4, 180L), // value counts
+ ImmutableMap.of(3, 10L, 4, 20L), // null value counts
+ ImmutableMap.of(3, 0L, 4, 0L), // nan value counts
+ ImmutableMap.of(3, Conversions.toByteBuffer(Types.IntegerType.get(),
1),
+ 4, Conversions.toByteBuffer(Types.IntegerType.get(), 2)), //
lower bounds
+ ImmutableMap.of(3, Conversions.toByteBuffer(Types.IntegerType.get(),
5),
+ 4, Conversions.toByteBuffer(Types.IntegerType.get(), 10)) //
upperbounds
+ ))
+ .withFileSizeInBytes(350)
+ .build();
static final FileIO FILE_IO = new TestTables.LocalFileIO();
diff --git a/core/src/test/java/org/apache/iceberg/TestFindFiles.java
b/core/src/test/java/org/apache/iceberg/TestFindFiles.java
index 07118be..388534c 100644
--- a/core/src/test/java/org/apache/iceberg/TestFindFiles.java
+++ b/core/src/test/java/org/apache/iceberg/TestFindFiles.java
@@ -191,6 +191,25 @@ public class TestFindFiles extends TableTestBase {
}
@Test
+ public void testIncludeColumnStats() {
+ table.newAppend()
+ .appendFile(FILE_WITH_STATS)
+ .commit();
+
+ Iterable<DataFile> files = FindFiles.in(table)
+ .includeColumnStats()
+ .collect();
+ final DataFile file = files.iterator().next();
+
+ Assert.assertEquals(FILE_WITH_STATS.columnSizes(), file.columnSizes());
+ Assert.assertEquals(FILE_WITH_STATS.valueCounts(), file.valueCounts());
+ Assert.assertEquals(FILE_WITH_STATS.nullValueCounts(),
file.nullValueCounts());
+ Assert.assertEquals(FILE_WITH_STATS.nanValueCounts(),
file.nanValueCounts());
+ Assert.assertEquals(FILE_WITH_STATS.lowerBounds(), file.lowerBounds());
+ Assert.assertEquals(FILE_WITH_STATS.upperBounds(), file.upperBounds());
+ }
+
+ @Test
public void testNoSnapshot() {
// a table has no snapshot when it just gets created and no data is loaded
yet