This is an automated email from the ASF dual-hosted git repository.
aokolnychyi pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg.git
The following commit(s) were added to refs/heads/main by this push:
new 1e2a71398f Parquet: Add log entry when Bloom filters are used (#9010)
1e2a71398f is described below
commit 1e2a71398fb5564d1cd9f4e12b3d1acc568f8ef7
Author: Huaxin Gao <[email protected]>
AuthorDate: Thu Nov 16 16:16:15 2023 -0800
Parquet: Add log entry when Bloom filters are used (#9010)
Co-authored-by: Huaxin Gao <[email protected]>
---
.../iceberg/parquet/ParquetBloomRowGroupFilter.java | 16 ++++++++++++----
1 file changed, 12 insertions(+), 4 deletions(-)
diff --git
a/parquet/src/main/java/org/apache/iceberg/parquet/ParquetBloomRowGroupFilter.java
b/parquet/src/main/java/org/apache/iceberg/parquet/ParquetBloomRowGroupFilter.java
index de7f46a018..11201bd88e 100644
---
a/parquet/src/main/java/org/apache/iceberg/parquet/ParquetBloomRowGroupFilter.java
+++
b/parquet/src/main/java/org/apache/iceberg/parquet/ParquetBloomRowGroupFilter.java
@@ -48,8 +48,13 @@ import org.apache.parquet.io.api.Binary;
import
org.apache.parquet.schema.LogicalTypeAnnotation.DecimalLogicalTypeAnnotation;
import org.apache.parquet.schema.MessageType;
import org.apache.parquet.schema.PrimitiveType;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
public class ParquetBloomRowGroupFilter {
+
+ private static final Logger LOG =
LoggerFactory.getLogger(ParquetBloomRowGroupFilter.class);
+
private final Schema schema;
private final Expression expr;
private final boolean caseSensitive;
@@ -114,10 +119,13 @@ public class ParquetBloomRowGroupFilter {
Set<Integer> filterRefs =
Binder.boundReferences(schema.asStruct(), ImmutableList.of(expr),
caseSensitive);
- // If the filter's column set doesn't overlap with any bloom filter
columns, exit early with
- // ROWS_MIGHT_MATCH
- if (!filterRefs.isEmpty() && Sets.intersection(fieldsWithBloomFilter,
filterRefs).isEmpty()) {
- return ROWS_MIGHT_MATCH;
+ if (!filterRefs.isEmpty()) {
+ Set<Integer> overlappedBloomFilters =
Sets.intersection(fieldsWithBloomFilter, filterRefs);
+ if (overlappedBloomFilters.isEmpty()) {
+ return ROWS_MIGHT_MATCH;
+ } else {
+ LOG.debug("Using Bloom filters for columns with IDs: {}",
overlappedBloomFilters);
+ }
}
return ExpressionVisitors.visitEvaluator(expr, this);