This is an automated email from the ASF dual-hosted git repository.

aokolnychyi pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg.git


The following commit(s) were added to refs/heads/main by this push:
     new 1e2a71398f Parquet: Add log entry when Bloom filters are used (#9010)
1e2a71398f is described below

commit 1e2a71398fb5564d1cd9f4e12b3d1acc568f8ef7
Author: Huaxin Gao <[email protected]>
AuthorDate: Thu Nov 16 16:16:15 2023 -0800

    Parquet: Add log entry when Bloom filters are used (#9010)
    
    Co-authored-by: Huaxin Gao <[email protected]>
---
 .../iceberg/parquet/ParquetBloomRowGroupFilter.java      | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git 
a/parquet/src/main/java/org/apache/iceberg/parquet/ParquetBloomRowGroupFilter.java
 
b/parquet/src/main/java/org/apache/iceberg/parquet/ParquetBloomRowGroupFilter.java
index de7f46a018..11201bd88e 100644
--- 
a/parquet/src/main/java/org/apache/iceberg/parquet/ParquetBloomRowGroupFilter.java
+++ 
b/parquet/src/main/java/org/apache/iceberg/parquet/ParquetBloomRowGroupFilter.java
@@ -48,8 +48,13 @@ import org.apache.parquet.io.api.Binary;
 import 
org.apache.parquet.schema.LogicalTypeAnnotation.DecimalLogicalTypeAnnotation;
 import org.apache.parquet.schema.MessageType;
 import org.apache.parquet.schema.PrimitiveType;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 public class ParquetBloomRowGroupFilter {
+
+  private static final Logger LOG = 
LoggerFactory.getLogger(ParquetBloomRowGroupFilter.class);
+
   private final Schema schema;
   private final Expression expr;
   private final boolean caseSensitive;
@@ -114,10 +119,13 @@ public class ParquetBloomRowGroupFilter {
 
       Set<Integer> filterRefs =
           Binder.boundReferences(schema.asStruct(), ImmutableList.of(expr), 
caseSensitive);
-      // If the filter's column set doesn't overlap with any bloom filter 
columns, exit early with
-      // ROWS_MIGHT_MATCH
-      if (!filterRefs.isEmpty() && Sets.intersection(fieldsWithBloomFilter, 
filterRefs).isEmpty()) {
-        return ROWS_MIGHT_MATCH;
+      if (!filterRefs.isEmpty()) {
+        Set<Integer> overlappedBloomFilters = 
Sets.intersection(fieldsWithBloomFilter, filterRefs);
+        if (overlappedBloomFilters.isEmpty()) {
+          return ROWS_MIGHT_MATCH;
+        } else {
+          LOG.debug("Using Bloom filters for columns with IDs: {}", 
overlappedBloomFilters);
+        }
       }
 
       return ExpressionVisitors.visitEvaluator(expr, this);

Reply via email to