This is an automated email from the ASF dual-hosted git repository.

openinx pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iceberg.git


The following commit(s) were added to refs/heads/master by this push:
     new 333e2c1  Core:  Add includeColumnStats option in FindFiles API (#2875)
333e2c1 is described below

commit 333e2c1daa35fd06df16305d749528aa16232a7d
Author: Flyangz <[email protected]>
AuthorDate: Thu Jul 29 18:40:05 2021 +0800

    Core:  Add includeColumnStats option in FindFiles API (#2875)
---
 core/src/main/java/org/apache/iceberg/FindFiles.java  |  9 ++++++++-
 .../test/java/org/apache/iceberg/TableTestBase.java   | 16 ++++++++++++++++
 .../test/java/org/apache/iceberg/TestFindFiles.java   | 19 +++++++++++++++++++
 3 files changed, 43 insertions(+), 1 deletion(-)

diff --git a/core/src/main/java/org/apache/iceberg/FindFiles.java 
b/core/src/main/java/org/apache/iceberg/FindFiles.java
index 9fa358b..1f466b5 100644
--- a/core/src/main/java/org/apache/iceberg/FindFiles.java
+++ b/core/src/main/java/org/apache/iceberg/FindFiles.java
@@ -44,6 +44,7 @@ public class FindFiles {
     private final Table table;
     private final TableOperations ops;
     private boolean caseSensitive = true;
+    private boolean includeColumnStats = false;
     private Long snapshotId = null;
     private Expression rowFilter = Expressions.alwaysTrue();
     private Expression fileFilter = Expressions.alwaysTrue();
@@ -64,6 +65,11 @@ public class FindFiles {
       return this;
     }
 
+    public Builder includeColumnStats() {
+      this.includeColumnStats = true;
+      return this;
+    }
+
     /**
      * Base results on the given snapshot.
      *
@@ -206,7 +212,8 @@ public class FindFiles {
           .caseSensitive(caseSensitive)
           .entries();
 
-      return CloseableIterable.transform(entries, entry -> 
entry.file().copyWithoutStats());
+      return CloseableIterable.transform(entries,
+          entry -> includeColumnStats ? entry.file().copy() : 
entry.file().copyWithoutStats());
     }
   }
 }
diff --git a/core/src/test/java/org/apache/iceberg/TableTestBase.java 
b/core/src/test/java/org/apache/iceberg/TableTestBase.java
index c0f60d4..16a6bfd 100644
--- a/core/src/test/java/org/apache/iceberg/TableTestBase.java
+++ b/core/src/test/java/org/apache/iceberg/TableTestBase.java
@@ -28,11 +28,13 @@ import java.util.stream.LongStream;
 import org.apache.iceberg.io.FileIO;
 import org.apache.iceberg.io.OutputFile;
 import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
+import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
 import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet;
 import org.apache.iceberg.relocated.com.google.common.collect.Iterators;
 import org.apache.iceberg.relocated.com.google.common.collect.Lists;
 import org.apache.iceberg.relocated.com.google.common.collect.Sets;
 import org.apache.iceberg.relocated.com.google.common.io.Files;
+import org.apache.iceberg.types.Conversions;
 import org.apache.iceberg.types.Types;
 import org.junit.After;
 import org.junit.Assert;
@@ -130,6 +132,20 @@ public class TableTestBase {
       .withPartition(TestHelpers.Row.of(3))
       .withRecordCount(1)
       .build();
+  static final DataFile FILE_WITH_STATS = DataFiles.builder(SPEC)
+      .withPath("/path/to/data-with-stats.parquet")
+      .withMetrics(new Metrics(10L,
+          ImmutableMap.of(3, 100L, 4, 200L), // column sizes
+          ImmutableMap.of(3, 90L, 4, 180L), // value counts
+          ImmutableMap.of(3, 10L, 4, 20L), // null value counts
+          ImmutableMap.of(3, 0L, 4, 0L), // nan value counts
+          ImmutableMap.of(3, Conversions.toByteBuffer(Types.IntegerType.get(), 
1),
+             4, Conversions.toByteBuffer(Types.IntegerType.get(), 2)),  // 
lower bounds
+          ImmutableMap.of(3, Conversions.toByteBuffer(Types.IntegerType.get(), 
5),
+             4, Conversions.toByteBuffer(Types.IntegerType.get(), 10))  // 
upperbounds
+          ))
+      .withFileSizeInBytes(350)
+      .build();
 
   static final FileIO FILE_IO = new TestTables.LocalFileIO();
 
diff --git a/core/src/test/java/org/apache/iceberg/TestFindFiles.java 
b/core/src/test/java/org/apache/iceberg/TestFindFiles.java
index 07118be..388534c 100644
--- a/core/src/test/java/org/apache/iceberg/TestFindFiles.java
+++ b/core/src/test/java/org/apache/iceberg/TestFindFiles.java
@@ -191,6 +191,25 @@ public class TestFindFiles extends TableTestBase {
   }
 
   @Test
+  public void testIncludeColumnStats() {
+    table.newAppend()
+        .appendFile(FILE_WITH_STATS)
+        .commit();
+
+    Iterable<DataFile> files = FindFiles.in(table)
+        .includeColumnStats()
+        .collect();
+    final DataFile file = files.iterator().next();
+
+    Assert.assertEquals(FILE_WITH_STATS.columnSizes(), file.columnSizes());
+    Assert.assertEquals(FILE_WITH_STATS.valueCounts(), file.valueCounts());
+    Assert.assertEquals(FILE_WITH_STATS.nullValueCounts(), 
file.nullValueCounts());
+    Assert.assertEquals(FILE_WITH_STATS.nanValueCounts(), 
file.nanValueCounts());
+    Assert.assertEquals(FILE_WITH_STATS.lowerBounds(), file.lowerBounds());
+    Assert.assertEquals(FILE_WITH_STATS.upperBounds(), file.upperBounds());
+  }
+
+  @Test
   public void testNoSnapshot() {
     // a table has no snapshot when it just gets created and no data is loaded 
yet
 

Reply via email to