This is an automated email from the ASF dual-hosted git repository. arina pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/drill.git
commit 5d22d708252bd5a84fe2f277f2b8b9a6abc0d9ce Author: Arina Ielchiieva <[email protected]> AuthorDate: Thu May 17 18:24:35 2018 +0300 DRILL-6413: Update ParquetFilterBuilder.visitBooleanOperator to handle simplified boolean expression closes #1269 --- .../store/parquet/AbstractParquetGroupScan.java | 3 +-- .../exec/store/parquet/ParquetFilterBuilder.java | 19 +++++++++++++++---- .../store/parquet/TestParquetFilterPushDown.java | 22 +++++++++++----------- 3 files changed, 27 insertions(+), 17 deletions(-) diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/AbstractParquetGroupScan.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/AbstractParquetGroupScan.java index 1f8c535..33472bb 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/AbstractParquetGroupScan.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/AbstractParquetGroupScan.java @@ -255,8 +255,7 @@ public abstract class AbstractParquetGroupScan extends AbstractFileGroupScan { logger.debug("materializedFilter : {}", ExpressionStringBuilder.toString(materializedFilter)); Set<LogicalExpression> constantBoundaries = ConstantExpressionIdentifier.getConstantExpressionSet(materializedFilter); - filterPredicate = (ParquetFilterPredicate) ParquetFilterBuilder.buildParquetFilterPredicate( - materializedFilter, constantBoundaries, udfUtilities); + filterPredicate = ParquetFilterBuilder.buildParquetFilterPredicate(materializedFilter, constantBoundaries, udfUtilities); if (filterPredicate == null) { return null; diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFilterBuilder.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFilterBuilder.java index e55425e..a8e101d 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFilterBuilder.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFilterBuilder.java @@ -42,6 +42,7 @@ import org.apache.drill.exec.expr.holders.ValueHolder; import org.apache.drill.exec.expr.holders.VarDecimalHolder; import org.apache.drill.exec.expr.stat.ParquetBooleanPredicates; import org.apache.drill.exec.expr.stat.ParquetComparisonPredicates; +import org.apache.drill.exec.expr.stat.ParquetFilterPredicate; import org.apache.drill.exec.expr.stat.ParquetIsPredicates; import org.apache.drill.exec.ops.UdfUtilities; import org.apache.drill.exec.util.DecimalUtility; @@ -54,7 +55,7 @@ import java.util.Set; /** * A visitor which visits a materialized logical expression, and build ParquetFilterPredicate - * If a visitXXX method returns null, that means the corresponding filter branch is not qualified for pushdown. + * If a visitXXX method returns null, that means the corresponding filter branch is not qualified for push down. */ public class ParquetFilterBuilder extends AbstractExprVisitor<LogicalExpression, Set<LogicalExpression>, RuntimeException> { static final Logger logger = LoggerFactory.getLogger(ParquetFilterBuilder.class); @@ -66,12 +67,18 @@ public class ParquetFilterBuilder extends AbstractExprVisitor<LogicalExpression, * @param constantBoundaries set of constant expressions * @param udfUtilities udf utilities * - * @return logical expression + * @return parquet filter predicate */ - public static LogicalExpression buildParquetFilterPredicate(LogicalExpression expr, final Set<LogicalExpression> constantBoundaries, UdfUtilities udfUtilities) { - return expr.accept(new ParquetFilterBuilder(udfUtilities), constantBoundaries); + public static ParquetFilterPredicate buildParquetFilterPredicate(LogicalExpression expr, final Set<LogicalExpression> constantBoundaries, UdfUtilities udfUtilities) { + LogicalExpression logicalExpression = expr.accept(new ParquetFilterBuilder(udfUtilities), constantBoundaries); + if (logicalExpression instanceof ParquetFilterPredicate) { + return (ParquetFilterPredicate) logicalExpression; + } + logger.debug("Logical expression {} was not qualified for filter push down", logicalExpression); + return null; } + private ParquetFilterBuilder(UdfUtilities udfUtilities) { this.udfUtilities = udfUtilities; } @@ -150,6 +157,10 @@ public class ParquetFilterBuilder extends AbstractExprVisitor<LogicalExpression, return null; } } else { + if (childPredicate instanceof TypedFieldExpr) { + // Calcite simplifies `= true` expression to field name, wrap it with is true predicate + childPredicate = new ParquetIsPredicates.IsTruePredicate(childPredicate); + } childPredicates.add(childPredicate); } } diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetFilterPushDown.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetFilterPushDown.java index 83a4e8e..3bbd397 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetFilterPushDown.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetFilterPushDown.java @@ -26,6 +26,7 @@ import org.apache.drill.exec.proto.BitControl; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.parquet.format.converter.ParquetMetadataConverter; import org.apache.parquet.hadoop.ParquetFileReader; import org.apache.parquet.hadoop.metadata.ParquetMetadata; import org.junit.Assert; @@ -424,6 +425,9 @@ public class TestParquetFilterPushDown extends PlanTestBase { final String queryNotEqualFalse = "select col_bln from dfs.`parquetFilterPush/blnTbl` where not col_bln = false"; testParquetFilterPD(queryNotEqualFalse, 4, 2, false); + + final String queryEqualTrueWithAnd = "select col_bln from dfs.`parquetFilterPush/blnTbl` where col_bln = true and unk_col = 'a'"; + testParquetFilterPD(queryEqualTrueWithAnd, 0, 2, false); } @Test // DRILL-5359 @@ -445,10 +449,9 @@ public class TestParquetFilterPushDown extends PlanTestBase { public void testMultiRowGroup() throws Exception { // multirowgroup is a parquet file with 2 rowgroups inside. One with a = 1 and the other with a = 2; // FilterPushDown should be able to remove the rowgroup with a = 1 from the scan operator. - final String sql = String.format("select * from dfs.`parquet/multirowgroup.parquet` where a > 1"); + final String sql = "select * from dfs.`parquet/multirowgroup.parquet` where a > 1"; final String[] expectedPlan = {"numRowGroups=1"}; - final String[] excludedPlan = {}; - PlanTestBase.testPlanMatchingPatterns(sql, expectedPlan, excludedPlan); + PlanTestBase.testPlanMatchingPatterns(sql, expectedPlan); } ////////////////////////////////////////////////////////////////////////////////////////////////// @@ -461,23 +464,20 @@ public class TestParquetFilterPushDown extends PlanTestBase { String numFilesPattern = "numFiles=" + expectedNumFiles; String usedMetaPattern = "usedMetadataFile=" + usedMetadataFile; - testPlanMatchingPatterns(query, new String[]{numFilesPattern, usedMetaPattern}, new String[] {}); + testPlanMatchingPatterns(query, new String[]{numFilesPattern, usedMetaPattern}); } - private void testParquetRowGroupFilterEval(final ParquetMetadata footer, final String exprStr, - boolean canDropExpected) throws Exception{ + private void testParquetRowGroupFilterEval(final ParquetMetadata footer, final String exprStr, boolean canDropExpected) throws Exception{ final LogicalExpression filterExpr = parseExpr(exprStr); testParquetRowGroupFilterEval(footer, 0, filterExpr, canDropExpected); } - private void testParquetRowGroupFilterEval(final ParquetMetadata footer, final int rowGroupIndex, - final LogicalExpression filterExpr, boolean canDropExpected) throws Exception { - boolean canDrop = ParquetRGFilterEvaluator.evalFilter(filterExpr, footer, rowGroupIndex, - fragContext.getOptions(), fragContext); + private void testParquetRowGroupFilterEval(final ParquetMetadata footer, final int rowGroupIndex, final LogicalExpression filterExpr, boolean canDropExpected) { + boolean canDrop = ParquetRGFilterEvaluator.evalFilter(filterExpr, footer, rowGroupIndex, fragContext.getOptions(), fragContext); Assert.assertEquals(canDropExpected, canDrop); } private ParquetMetadata getParquetMetaData(File file) throws IOException{ - return ParquetFileReader.readFooter(new Configuration(fs.getConf()), new Path(file.toURI())); + return ParquetFileReader.readFooter(new Configuration(fs.getConf()), new Path(file.toURI()), ParquetMetadataConverter.NO_FILTER); } } -- To stop receiving notification emails like this one, please contact [email protected].
