[jira] [Commented] (DRILL-6865) Query returns wrong result when filter pruning happens
[ https://issues.apache.org/jira/browse/DRILL-6865?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16699198#comment-16699198 ] ASF GitHub Bot commented on DRILL-6865: --- asfgit closed pull request #1552: DRILL-6865: Query returns wrong result when filter pruning happens URL: https://github.com/apache/drill/pull/1552 This is a PR merged from a forked repository. As GitHub hides the original diff on merge, it is displayed below for the sake of provenance: As this is a foreign pull request (from a fork), the diff is supplied below (as it won't show otherwise due to GitHub magic): diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/AbstractParquetGroupScan.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/AbstractParquetGroupScan.java index 9bc969f035b..5699d4546f7 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/AbstractParquetGroupScan.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/AbstractParquetGroupScan.java @@ -85,6 +85,8 @@ private List endpointAffinities; private ParquetGroupScanStatistics parquetGroupScanStatistics; + // whether all row groups of this group scan fully match the filter + private boolean matchAllRowGroups = false; protected AbstractParquetGroupScan(String userName, List columns, @@ -111,6 +113,7 @@ protected AbstractParquetGroupScan(AbstractParquetGroupScan that) { this.fileSet = that.fileSet == null ? null : new HashSet<>(that.fileSet); this.entries = that.entries == null ? null : new ArrayList<>(that.entries); this.readerConfig = that.readerConfig; +this.matchAllRowGroups = that.matchAllRowGroups; } @JsonProperty @@ -135,6 +138,11 @@ public ParquetReaderConfig getReaderConfig() { return readerConfig; } + @JsonIgnore + public boolean isMatchAllRowGroups() { +return matchAllRowGroups; + } + @JsonIgnore @Override public Collection getFiles() { @@ -229,15 +237,12 @@ public void setFilter(LogicalExpression filter) { } @Override - public GroupScan applyFilter(LogicalExpression filterExpr, UdfUtilities udfUtilities, - FunctionImplementationRegistry functionImplementationRegistry, OptionManager optionManager) { - -if (rowGroupInfos.size() == 1 || -! (parquetTableMetadata.isRowGroupPrunable()) || -rowGroupInfos.size() > optionManager.getOption(PlannerSettings.PARQUET_ROWGROUP_FILTER_PUSHDOWN_PLANNING_THRESHOLD) -) { - // Stop pruning for 3 cases: - //- 1 single parquet file, + public AbstractParquetGroupScan applyFilter(LogicalExpression filterExpr, UdfUtilities udfUtilities, + FunctionImplementationRegistry functionImplementationRegistry, OptionManager optionManager) { + +if (!parquetTableMetadata.isRowGroupPrunable() || +rowGroupInfos.size() > optionManager.getOption(PlannerSettings.PARQUET_ROWGROUP_FILTER_PUSHDOWN_PLANNING_THRESHOLD)) { + // Stop pruning for 2 cases: //- metadata does not have proper format to support row group level filter pruning, //- # of row groups is beyond PARQUET_ROWGROUP_FILTER_PUSHDOWN_PLANNING_THRESHOLD. return null; @@ -248,7 +253,13 @@ public GroupScan applyFilter(LogicalExpression filterExpr, UdfUtilities udfUtili final List qualifiedRGs = new ArrayList<>(rowGroupInfos.size()); Set qualifiedFilePath = new HashSet<>(); // HashSet keeps a fileName unique. -ParquetFilterPredicate filterPredicate = null; +ParquetFilterPredicate filterPredicate = getParquetFilterPredicate(filterExpr, udfUtilities, functionImplementationRegistry, optionManager, true); + +if (filterPredicate == null) { + return null; +} + +boolean matchAllRowGroupsLocal = true; for (RowGroupInfo rowGroup : rowGroupInfos) { final ColumnExplorer columnExplorer = new ColumnExplorer(optionManager, columns); @@ -262,41 +273,33 @@ public GroupScan applyFilter(LogicalExpression filterExpr, UdfUtilities udfUtili Map columnStatisticsMap = statCollector.collectColStat(schemaPathsInExpr); - if (filterPredicate == null) { -ErrorCollector errorCollector = new ErrorCollectorImpl(); -LogicalExpression materializedFilter = ExpressionTreeMaterializer.materializeFilterExpr( -filterExpr, columnStatisticsMap, errorCollector, functionImplementationRegistry); - -if (errorCollector.hasErrors()) { - logger.error("{} error(s) encountered when materialize filter expression : {}", - errorCollector.getErrorCount(), errorCollector.toErrorString()); - return null; -} -logger.debug("materializedFilter : {}", ExpressionStringBuilder.toString(materializedFilter)); - -Set constantBoundaries =
[jira] [Commented] (DRILL-6865) Query returns wrong result when filter pruning happens
[ https://issues.apache.org/jira/browse/DRILL-6865?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16698804#comment-16698804 ] ASF GitHub Bot commented on DRILL-6865: --- arina-ielchiieva commented on issue #1552: DRILL-6865: Query returns wrong result when filter pruning happens URL: https://github.com/apache/drill/pull/1552#issuecomment-441604891 +1, LGTM. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org > Query returns wrong result when filter pruning happens > -- > > Key: DRILL-6865 > URL: https://issues.apache.org/jira/browse/DRILL-6865 > Project: Apache Drill > Issue Type: Bug > Components: Storage - Parquet >Affects Versions: 1.14.0 >Reporter: Volodymyr Vysotskyi >Assignee: Volodymyr Vysotskyi >Priority: Blocker > Fix For: 1.15.0 > > > In DRILL-5796 was implemented removing the filter from the plan when some (or > all) row groups of parquet table fully match the filter. > For the case when filter has some predicates which parquet filter predicate > does not support, they can be omitted for some cases from the resulting > filter predicate. When row groups fully match predicates which left in the > filter, the whole filter is removed from the plan and the wrong result is > returned. > Example of the query for reproducing this bug: > {code:sql} > create table dfs.tmp.`multi/t1` as select * from cp.`tpch/nation.parquet` > where n_nationkey > 5; > create table dfs.tmp.`multi/t2` as select * from cp.`tpch/nation.parquet` > where n_nationkey < 5; > select * from dfs.tmp.`multi` where n_nationkey > 5 and n_nationkey like > '%10%'; > {code} > returns > {noformat} > +---+--+-+--+-+ > | dir0 | n_nationkey | n_name | n_regionkey | > n_comment > | > +---+--+-+--+-+ > | t1| 6| FRANCE | 3| refully final > requests. regular, ironi > | > | t1| 7| GERMANY | 3| l platelets. > regular accounts x-ray: unusual, regular acco > | > | t1| 8| INDIA | 2| ss excuses cajole > slyly across the packages. deposits print aroun > | > | t1| 9| INDONESIA | 2| slyly express > asymptotes. regular deposits haggle slyly. carefully ironic hockey players > sleep blithely. carefull | > | t1| 10 | IRAN| 4| efully alongside of > the slyly final dependencies. > | > | t1| 11 | IRAQ| 4| nic deposits boost > atop the quickly final requests? quickly regula >| > | t1| 12 | JAPAN | 2| ously. final, > express gifts cajole a > | > | t1| 13 | JORDAN | 4| ic deposits are > blithely about the carefully regular pa > | > | t1| 14 | KENYA | 0| pending excuses > haggle furiously deposits. pending, express pinto beans wake fluffily past t > | > | t1| 15 | MOROCCO | 0| rns. blithely bold > courts among the closely regular packages use furiously bold platelets? >| > | t1| 16 | MOZAMBIQUE | 0| s. ironic, unusual > asymptotes wake blithely r >| > | t1| 17 | PERU| 1| platelets. blithely > pending dependencies use fluffily across the even pinto beans. carefully > silent accoun | > | t1| 18 | CHINA | 2| c dependencies. > furiously express notornis sleep slyly regular accounts. ideas
[jira] [Commented] (DRILL-6865) Query returns wrong result when filter pruning happens
[ https://issues.apache.org/jira/browse/DRILL-6865?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16698146#comment-16698146 ] ASF GitHub Bot commented on DRILL-6865: --- vvysotskyi commented on a change in pull request #1552: DRILL-6865: Query returns wrong result when filter pruning happens URL: https://github.com/apache/drill/pull/1552#discussion_r236069655 ## File path: exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetPushDownFilter.java ## @@ -155,39 +170,53 @@ protected void doOnMatch(RelOptRuleCall call, FilterPrel filter, ProjectPrel pro Stopwatch timer = logger.isDebugEnabled() ? Stopwatch.createStarted() : null; -final GroupScan newGroupScan = groupScan.applyFilter(conditionExp,optimizerContext, +AbstractParquetGroupScan newGroupScan = groupScan.applyFilter(conditionExp, optimizerContext, optimizerContext.getFunctionRegistry(), optimizerContext.getPlannerSettings().getOptions()); if (timer != null) { logger.debug("Took {} ms to apply filter on parquet row groups. ", timer.elapsed(TimeUnit.MILLISECONDS)); timer.stop(); } -if (newGroupScan == null ) { +if (newGroupScan == null) { + if (groupScan.isMatchAllRowGroups()) { +RelNode child = project == null ? scan : project; +// if current row group fully matches filter, +// but row group pruning wasn't happened, removes filter. +if (nonConvertedPredList.size() == 0) { + call.transformTo(child); +} else if (nonConvertedPredList.size() < predList.size()) { Review comment: In this case, `else` will include both `nonConvertedPredList.size() < predList.size()` and `nonConvertedPredList.size() == predList.size()` cases, but as I pointed in the comment above, we shouldn't do anything for the last case. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org > Query returns wrong result when filter pruning happens > -- > > Key: DRILL-6865 > URL: https://issues.apache.org/jira/browse/DRILL-6865 > Project: Apache Drill > Issue Type: Bug > Components: Storage - Parquet >Affects Versions: 1.14.0 >Reporter: Volodymyr Vysotskyi >Assignee: Volodymyr Vysotskyi >Priority: Blocker > Fix For: 1.15.0 > > > In DRILL-5796 was implemented removing the filter from the plan when some (or > all) row groups of parquet table fully match the filter. > For the case when filter has some predicates which parquet filter predicate > does not support, they can be omitted for some cases from the resulting > filter predicate. When row groups fully match predicates which left in the > filter, the whole filter is removed from the plan and the wrong result is > returned. > Example of the query for reproducing this bug: > {code:sql} > create table dfs.tmp.`multi/t1` as select * from cp.`tpch/nation.parquet` > where n_nationkey > 5; > create table dfs.tmp.`multi/t2` as select * from cp.`tpch/nation.parquet` > where n_nationkey < 5; > select * from dfs.tmp.`multi` where n_nationkey > 5 and n_nationkey like > '%10%'; > {code} > returns > {noformat} > +---+--+-+--+-+ > | dir0 | n_nationkey | n_name | n_regionkey | > n_comment > | > +---+--+-+--+-+ > | t1| 6| FRANCE | 3| refully final > requests. regular, ironi > | > | t1| 7| GERMANY | 3| l platelets. > regular accounts x-ray: unusual, regular acco > | > | t1| 8| INDIA | 2| ss excuses cajole > slyly across the packages. deposits print aroun > | > | t1| 9| INDONESIA | 2| slyly express > asymptotes. regular deposits haggle slyly. carefully ironic hockey players > sleep blithely. carefull | > | t1| 10 | IRAN| 4| efully alongside of > the slyly final dependencies.
[jira] [Commented] (DRILL-6865) Query returns wrong result when filter pruning happens
[ https://issues.apache.org/jira/browse/DRILL-6865?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16698147#comment-16698147 ] ASF GitHub Bot commented on DRILL-6865: --- vvysotskyi commented on a change in pull request #1552: DRILL-6865: Query returns wrong result when filter pruning happens URL: https://github.com/apache/drill/pull/1552#discussion_r236069944 ## File path: exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFilterBuilder.java ## @@ -63,6 +63,7 @@ static final Logger logger = LoggerFactory.getLogger(ParquetFilterBuilder.class); private final UdfUtilities udfUtilities; + private final boolean omitUnsupportedExprs; Review comment: Added to its Javadoc case when it should be used. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org > Query returns wrong result when filter pruning happens > -- > > Key: DRILL-6865 > URL: https://issues.apache.org/jira/browse/DRILL-6865 > Project: Apache Drill > Issue Type: Bug > Components: Storage - Parquet >Affects Versions: 1.14.0 >Reporter: Volodymyr Vysotskyi >Assignee: Volodymyr Vysotskyi >Priority: Blocker > Fix For: 1.15.0 > > > In DRILL-5796 was implemented removing the filter from the plan when some (or > all) row groups of parquet table fully match the filter. > For the case when filter has some predicates which parquet filter predicate > does not support, they can be omitted for some cases from the resulting > filter predicate. When row groups fully match predicates which left in the > filter, the whole filter is removed from the plan and the wrong result is > returned. > Example of the query for reproducing this bug: > {code:sql} > create table dfs.tmp.`multi/t1` as select * from cp.`tpch/nation.parquet` > where n_nationkey > 5; > create table dfs.tmp.`multi/t2` as select * from cp.`tpch/nation.parquet` > where n_nationkey < 5; > select * from dfs.tmp.`multi` where n_nationkey > 5 and n_nationkey like > '%10%'; > {code} > returns > {noformat} > +---+--+-+--+-+ > | dir0 | n_nationkey | n_name | n_regionkey | > n_comment > | > +---+--+-+--+-+ > | t1| 6| FRANCE | 3| refully final > requests. regular, ironi > | > | t1| 7| GERMANY | 3| l platelets. > regular accounts x-ray: unusual, regular acco > | > | t1| 8| INDIA | 2| ss excuses cajole > slyly across the packages. deposits print aroun > | > | t1| 9| INDONESIA | 2| slyly express > asymptotes. regular deposits haggle slyly. carefully ironic hockey players > sleep blithely. carefull | > | t1| 10 | IRAN| 4| efully alongside of > the slyly final dependencies. > | > | t1| 11 | IRAQ| 4| nic deposits boost > atop the quickly final requests? quickly regula >| > | t1| 12 | JAPAN | 2| ously. final, > express gifts cajole a > | > | t1| 13 | JORDAN | 4| ic deposits are > blithely about the carefully regular pa > | > | t1| 14 | KENYA | 0| pending excuses > haggle furiously deposits. pending, express pinto beans wake fluffily past t > | > | t1| 15 | MOROCCO | 0| rns. blithely bold > courts among the closely regular packages use furiously bold platelets? >| > | t1| 16 | MOZAMBIQUE | 0| s. ironic, unusual > asymptotes wake blithely r
[jira] [Commented] (DRILL-6865) Query returns wrong result when filter pruning happens
[ https://issues.apache.org/jira/browse/DRILL-6865?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16697841#comment-16697841 ] ASF GitHub Bot commented on DRILL-6865: --- arina-ielchiieva commented on a change in pull request #1552: DRILL-6865: Query returns wrong result when filter pruning happens URL: https://github.com/apache/drill/pull/1552#discussion_r236044637 ## File path: exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFilterBuilder.java ## @@ -63,6 +63,7 @@ static final Logger logger = LoggerFactory.getLogger(ParquetFilterBuilder.class); private final UdfUtilities udfUtilities; + private final boolean omitUnsupportedExprs; Review comment: Please describe cases when we need this flag. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org > Query returns wrong result when filter pruning happens > -- > > Key: DRILL-6865 > URL: https://issues.apache.org/jira/browse/DRILL-6865 > Project: Apache Drill > Issue Type: Bug > Components: Storage - Parquet >Affects Versions: 1.14.0 >Reporter: Volodymyr Vysotskyi >Assignee: Volodymyr Vysotskyi >Priority: Blocker > Fix For: 1.15.0 > > > In DRILL-5796 was implemented removing the filter from the plan when some (or > all) row groups of parquet table fully match the filter. > For the case when filter has some predicates which parquet filter predicate > does not support, they can be omitted for some cases from the resulting > filter predicate. When row groups fully match predicates which left in the > filter, the whole filter is removed from the plan and the wrong result is > returned. > Example of the query for reproducing this bug: > {code:sql} > create table dfs.tmp.`multi/t1` as select * from cp.`tpch/nation.parquet` > where n_nationkey > 5; > create table dfs.tmp.`multi/t2` as select * from cp.`tpch/nation.parquet` > where n_nationkey < 5; > select * from dfs.tmp.`multi` where n_nationkey > 5 and n_nationkey like > '%10%'; > {code} > returns > {noformat} > +---+--+-+--+-+ > | dir0 | n_nationkey | n_name | n_regionkey | > n_comment > | > +---+--+-+--+-+ > | t1| 6| FRANCE | 3| refully final > requests. regular, ironi > | > | t1| 7| GERMANY | 3| l platelets. > regular accounts x-ray: unusual, regular acco > | > | t1| 8| INDIA | 2| ss excuses cajole > slyly across the packages. deposits print aroun > | > | t1| 9| INDONESIA | 2| slyly express > asymptotes. regular deposits haggle slyly. carefully ironic hockey players > sleep blithely. carefull | > | t1| 10 | IRAN| 4| efully alongside of > the slyly final dependencies. > | > | t1| 11 | IRAQ| 4| nic deposits boost > atop the quickly final requests? quickly regula >| > | t1| 12 | JAPAN | 2| ously. final, > express gifts cajole a > | > | t1| 13 | JORDAN | 4| ic deposits are > blithely about the carefully regular pa > | > | t1| 14 | KENYA | 0| pending excuses > haggle furiously deposits. pending, express pinto beans wake fluffily past t > | > | t1| 15 | MOROCCO | 0| rns. blithely bold > courts among the closely regular packages use furiously bold platelets? >| > | t1| 16 | MOZAMBIQUE | 0| s. ironic, unusual > asymptotes wake blithely r
[jira] [Commented] (DRILL-6865) Query returns wrong result when filter pruning happens
[ https://issues.apache.org/jira/browse/DRILL-6865?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16697842#comment-16697842 ] ASF GitHub Bot commented on DRILL-6865: --- arina-ielchiieva commented on a change in pull request #1552: DRILL-6865: Query returns wrong result when filter pruning happens URL: https://github.com/apache/drill/pull/1552#discussion_r236044696 ## File path: exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetPushDownFilter.java ## @@ -155,39 +170,53 @@ protected void doOnMatch(RelOptRuleCall call, FilterPrel filter, ProjectPrel pro Stopwatch timer = logger.isDebugEnabled() ? Stopwatch.createStarted() : null; -final GroupScan newGroupScan = groupScan.applyFilter(conditionExp,optimizerContext, +AbstractParquetGroupScan newGroupScan = groupScan.applyFilter(conditionExp, optimizerContext, optimizerContext.getFunctionRegistry(), optimizerContext.getPlannerSettings().getOptions()); if (timer != null) { logger.debug("Took {} ms to apply filter on parquet row groups. ", timer.elapsed(TimeUnit.MILLISECONDS)); timer.stop(); } -if (newGroupScan == null ) { +if (newGroupScan == null) { + if (groupScan.isMatchAllRowGroups()) { +RelNode child = project == null ? scan : project; +// if current row group fully matches filter, +// but row group pruning wasn't happened, removes filter. +if (nonConvertedPredList.size() == 0) { + call.transformTo(child); +} else if (nonConvertedPredList.size() < predList.size()) { Review comment: This clear, my concern was second condition `} else if (nonConvertedPredList.size() < predList.size())`. Why we cannot use `else` instead? This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org > Query returns wrong result when filter pruning happens > -- > > Key: DRILL-6865 > URL: https://issues.apache.org/jira/browse/DRILL-6865 > Project: Apache Drill > Issue Type: Bug > Components: Storage - Parquet >Affects Versions: 1.14.0 >Reporter: Volodymyr Vysotskyi >Assignee: Volodymyr Vysotskyi >Priority: Blocker > Fix For: 1.15.0 > > > In DRILL-5796 was implemented removing the filter from the plan when some (or > all) row groups of parquet table fully match the filter. > For the case when filter has some predicates which parquet filter predicate > does not support, they can be omitted for some cases from the resulting > filter predicate. When row groups fully match predicates which left in the > filter, the whole filter is removed from the plan and the wrong result is > returned. > Example of the query for reproducing this bug: > {code:sql} > create table dfs.tmp.`multi/t1` as select * from cp.`tpch/nation.parquet` > where n_nationkey > 5; > create table dfs.tmp.`multi/t2` as select * from cp.`tpch/nation.parquet` > where n_nationkey < 5; > select * from dfs.tmp.`multi` where n_nationkey > 5 and n_nationkey like > '%10%'; > {code} > returns > {noformat} > +---+--+-+--+-+ > | dir0 | n_nationkey | n_name | n_regionkey | > n_comment > | > +---+--+-+--+-+ > | t1| 6| FRANCE | 3| refully final > requests. regular, ironi > | > | t1| 7| GERMANY | 3| l platelets. > regular accounts x-ray: unusual, regular acco > | > | t1| 8| INDIA | 2| ss excuses cajole > slyly across the packages. deposits print aroun > | > | t1| 9| INDONESIA | 2| slyly express > asymptotes. regular deposits haggle slyly. carefully ironic hockey players > sleep blithely. carefull | > | t1| 10 | IRAN| 4| efully alongside of > the slyly final dependencies. > | > | t1| 11 |
[jira] [Commented] (DRILL-6865) Query returns wrong result when filter pruning happens
[ https://issues.apache.org/jira/browse/DRILL-6865?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16697396#comment-16697396 ] ASF GitHub Bot commented on DRILL-6865: --- vvysotskyi commented on a change in pull request #1552: DRILL-6865: Query returns wrong result when filter pruning happens URL: https://github.com/apache/drill/pull/1552#discussion_r235994435 ## File path: exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/AbstractParquetGroupScan.java ## @@ -262,41 +273,31 @@ public GroupScan applyFilter(LogicalExpression filterExpr, UdfUtilities udfUtili Map columnStatisticsMap = statCollector.collectColStat(schemaPathsInExpr); - if (filterPredicate == null) { -ErrorCollector errorCollector = new ErrorCollectorImpl(); -LogicalExpression materializedFilter = ExpressionTreeMaterializer.materializeFilterExpr( -filterExpr, columnStatisticsMap, errorCollector, functionImplementationRegistry); - -if (errorCollector.hasErrors()) { - logger.error("{} error(s) encountered when materialize filter expression : {}", - errorCollector.getErrorCount(), errorCollector.toErrorString()); - return null; -} -logger.debug("materializedFilter : {}", ExpressionStringBuilder.toString(materializedFilter)); - -Set constantBoundaries = ConstantExpressionIdentifier.getConstantExpressionSet(materializedFilter); -filterPredicate = ParquetFilterBuilder.buildParquetFilterPredicate(materializedFilter, constantBoundaries, udfUtilities); - -if (filterPredicate == null) { - return null; -} - } - - ParquetFilterPredicate.RowsMatch match = ParquetRGFilterEvaluator.matches(filterPredicate, columnStatisticsMap, rowGroup.getRowCount(), parquetTableMetadata, rowGroup.getColumns(), schemaPathsInExpr); + ParquetFilterPredicate.RowsMatch match = ParquetRGFilterEvaluator.matches(filterPredicate, + columnStatisticsMap, rowGroup.getRowCount(), parquetTableMetadata, rowGroup.getColumns(), schemaPathsInExpr); if (match == ParquetFilterPredicate.RowsMatch.NONE) { continue; // No row comply to the filter => drop the row group } - rowGroup.setRowsMatch(match); + if (matchAllRowGroupsLocal) { Review comment: Thanks, done This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org > Query returns wrong result when filter pruning happens > -- > > Key: DRILL-6865 > URL: https://issues.apache.org/jira/browse/DRILL-6865 > Project: Apache Drill > Issue Type: Bug > Components: Storage - Parquet >Affects Versions: 1.14.0 >Reporter: Volodymyr Vysotskyi >Assignee: Volodymyr Vysotskyi >Priority: Blocker > Fix For: 1.15.0 > > > In DRILL-5796 was implemented removing the filter from the plan when some (or > all) row groups of parquet table fully match the filter. > For the case when filter has some predicates which parquet filter predicate > does not support, they can be omitted for some cases from the resulting > filter predicate. When row groups fully match predicates which left in the > filter, the whole filter is removed from the plan and the wrong result is > returned. > Example of the query for reproducing this bug: > {code:sql} > create table dfs.tmp.`multi/t1` as select * from cp.`tpch/nation.parquet` > where n_nationkey > 5; > create table dfs.tmp.`multi/t2` as select * from cp.`tpch/nation.parquet` > where n_nationkey < 5; > select * from dfs.tmp.`multi` where n_nationkey > 5 and n_nationkey like > '%10%'; > {code} > returns > {noformat} > +---+--+-+--+-+ > | dir0 | n_nationkey | n_name | n_regionkey | > n_comment > | > +---+--+-+--+-+ > | t1| 6| FRANCE | 3| refully final > requests. regular, ironi > | > | t1| 7| GERMANY | 3| l platelets. > regular accounts x-ray: unusual, regular acco >
[jira] [Commented] (DRILL-6865) Query returns wrong result when filter pruning happens
[ https://issues.apache.org/jira/browse/DRILL-6865?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16697403#comment-16697403 ] ASF GitHub Bot commented on DRILL-6865: --- vvysotskyi commented on a change in pull request #1552: DRILL-6865: Query returns wrong result when filter pruning happens URL: https://github.com/apache/drill/pull/1552#discussion_r235997432 ## File path: exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetPushDownFilter.java ## @@ -155,39 +170,53 @@ protected void doOnMatch(RelOptRuleCall call, FilterPrel filter, ProjectPrel pro Stopwatch timer = logger.isDebugEnabled() ? Stopwatch.createStarted() : null; -final GroupScan newGroupScan = groupScan.applyFilter(conditionExp,optimizerContext, +AbstractParquetGroupScan newGroupScan = groupScan.applyFilter(conditionExp, optimizerContext, optimizerContext.getFunctionRegistry(), optimizerContext.getPlannerSettings().getOptions()); if (timer != null) { logger.debug("Took {} ms to apply filter on parquet row groups. ", timer.elapsed(TimeUnit.MILLISECONDS)); timer.stop(); } -if (newGroupScan == null ) { +if (newGroupScan == null) { + if (groupScan.isMatchAllRowGroups()) { +RelNode child = project == null ? scan : project; +// if current row group fully matches filter, +// but row group pruning wasn't happened, removes filter. +if (nonConvertedPredList.size() == 0) { + call.transformTo(child); +} else if (nonConvertedPredList.size() < predList.size()) { Review comment: For the case when `nonConvertedPredList.size() == predList.size()`, none of the predicates participated in filter pushdown, so `call.transformTo()` shouldn't be called for this case. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org > Query returns wrong result when filter pruning happens > -- > > Key: DRILL-6865 > URL: https://issues.apache.org/jira/browse/DRILL-6865 > Project: Apache Drill > Issue Type: Bug > Components: Storage - Parquet >Affects Versions: 1.14.0 >Reporter: Volodymyr Vysotskyi >Assignee: Volodymyr Vysotskyi >Priority: Blocker > Fix For: 1.15.0 > > > In DRILL-5796 was implemented removing the filter from the plan when some (or > all) row groups of parquet table fully match the filter. > For the case when filter has some predicates which parquet filter predicate > does not support, they can be omitted for some cases from the resulting > filter predicate. When row groups fully match predicates which left in the > filter, the whole filter is removed from the plan and the wrong result is > returned. > Example of the query for reproducing this bug: > {code:sql} > create table dfs.tmp.`multi/t1` as select * from cp.`tpch/nation.parquet` > where n_nationkey > 5; > create table dfs.tmp.`multi/t2` as select * from cp.`tpch/nation.parquet` > where n_nationkey < 5; > select * from dfs.tmp.`multi` where n_nationkey > 5 and n_nationkey like > '%10%'; > {code} > returns > {noformat} > +---+--+-+--+-+ > | dir0 | n_nationkey | n_name | n_regionkey | > n_comment > | > +---+--+-+--+-+ > | t1| 6| FRANCE | 3| refully final > requests. regular, ironi > | > | t1| 7| GERMANY | 3| l platelets. > regular accounts x-ray: unusual, regular acco > | > | t1| 8| INDIA | 2| ss excuses cajole > slyly across the packages. deposits print aroun > | > | t1| 9| INDONESIA | 2| slyly express > asymptotes. regular deposits haggle slyly. carefully ironic hockey players > sleep blithely. carefull | > | t1| 10 | IRAN| 4| efully alongside of > the slyly final dependencies. >
[jira] [Commented] (DRILL-6865) Query returns wrong result when filter pruning happens
[ https://issues.apache.org/jira/browse/DRILL-6865?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16697398#comment-16697398 ] ASF GitHub Bot commented on DRILL-6865: --- vvysotskyi commented on a change in pull request #1552: DRILL-6865: Query returns wrong result when filter pruning happens URL: https://github.com/apache/drill/pull/1552#discussion_r235997768 ## File path: exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFilterBuilder.java ## @@ -71,18 +72,24 @@ * * @return parquet filter predicate */ - public static ParquetFilterPredicate buildParquetFilterPredicate(LogicalExpression expr, final Set constantBoundaries, UdfUtilities udfUtilities) { -LogicalExpression logicalExpression = expr.accept(new ParquetFilterBuilder(udfUtilities), constantBoundaries); + public static ParquetFilterPredicate buildParquetFilterPredicate(LogicalExpression expr, + Set constantBoundaries, UdfUtilities udfUtilities, boolean omitUnsupportedExprs) { +LogicalExpression logicalExpression = +expr.accept(new ParquetFilterBuilder(udfUtilities, omitUnsupportedExprs), constantBoundaries); if (logicalExpression instanceof ParquetFilterPredicate) { return (ParquetFilterPredicate) logicalExpression; +} else if (logicalExpression instanceof TypedFieldExpr) { + // Calcite simplifies `= true` expression to field name, wrap it with is true predicate + return (ParquetFilterPredicate) ParquetIsPredicate.createIsPredicate(FunctionGenerationHelper.IS_TRUE, logicalExpression); Review comment: This change was added because now we try to convert every expression, especially arguments of `AND` operator. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org > Query returns wrong result when filter pruning happens > -- > > Key: DRILL-6865 > URL: https://issues.apache.org/jira/browse/DRILL-6865 > Project: Apache Drill > Issue Type: Bug > Components: Storage - Parquet >Affects Versions: 1.14.0 >Reporter: Volodymyr Vysotskyi >Assignee: Volodymyr Vysotskyi >Priority: Blocker > Fix For: 1.15.0 > > > In DRILL-5796 was implemented removing the filter from the plan when some (or > all) row groups of parquet table fully match the filter. > For the case when filter has some predicates which parquet filter predicate > does not support, they can be omitted for some cases from the resulting > filter predicate. When row groups fully match predicates which left in the > filter, the whole filter is removed from the plan and the wrong result is > returned. > Example of the query for reproducing this bug: > {code:sql} > create table dfs.tmp.`multi/t1` as select * from cp.`tpch/nation.parquet` > where n_nationkey > 5; > create table dfs.tmp.`multi/t2` as select * from cp.`tpch/nation.parquet` > where n_nationkey < 5; > select * from dfs.tmp.`multi` where n_nationkey > 5 and n_nationkey like > '%10%'; > {code} > returns > {noformat} > +---+--+-+--+-+ > | dir0 | n_nationkey | n_name | n_regionkey | > n_comment > | > +---+--+-+--+-+ > | t1| 6| FRANCE | 3| refully final > requests. regular, ironi > | > | t1| 7| GERMANY | 3| l platelets. > regular accounts x-ray: unusual, regular acco > | > | t1| 8| INDIA | 2| ss excuses cajole > slyly across the packages. deposits print aroun > | > | t1| 9| INDONESIA | 2| slyly express > asymptotes. regular deposits haggle slyly. carefully ironic hockey players > sleep blithely. carefull | > | t1| 10 | IRAN| 4| efully alongside of > the slyly final dependencies. > | > | t1| 11 | IRAQ| 4| nic deposits boost
[jira] [Commented] (DRILL-6865) Query returns wrong result when filter pruning happens
[ https://issues.apache.org/jira/browse/DRILL-6865?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16697402#comment-16697402 ] ASF GitHub Bot commented on DRILL-6865: --- vvysotskyi commented on a change in pull request #1552: DRILL-6865: Query returns wrong result when filter pruning happens URL: https://github.com/apache/drill/pull/1552#discussion_r235997054 ## File path: exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetPushDownFilter.java ## @@ -172,14 +170,30 @@ protected void doOnMatch(RelOptRuleCall call, FilterPrel filter, ProjectPrel pro Stopwatch timer = logger.isDebugEnabled() ? Stopwatch.createStarted() : null; -final GroupScan newGroupScan = groupScan.applyFilter(conditionExp, optimizerContext, +AbstractParquetGroupScan newGroupScan = groupScan.applyFilter(conditionExp, optimizerContext, optimizerContext.getFunctionRegistry(), optimizerContext.getPlannerSettings().getOptions()); if (timer != null) { logger.debug("Took {} ms to apply filter on parquet row groups. ", timer.elapsed(TimeUnit.MILLISECONDS)); timer.stop(); } -if (newGroupScan == null ) { +if (newGroupScan == null) { + if (groupScan.isMatchAllRowGroups()) { +RelNode child = project == null ? scan : project; +// if current row group fully matches filter, +// but row group pruning wasn't happened, removes filter. Review comment: thanks, changed. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org > Query returns wrong result when filter pruning happens > -- > > Key: DRILL-6865 > URL: https://issues.apache.org/jira/browse/DRILL-6865 > Project: Apache Drill > Issue Type: Bug > Components: Storage - Parquet >Affects Versions: 1.14.0 >Reporter: Volodymyr Vysotskyi >Assignee: Volodymyr Vysotskyi >Priority: Blocker > Fix For: 1.15.0 > > > In DRILL-5796 was implemented removing the filter from the plan when some (or > all) row groups of parquet table fully match the filter. > For the case when filter has some predicates which parquet filter predicate > does not support, they can be omitted for some cases from the resulting > filter predicate. When row groups fully match predicates which left in the > filter, the whole filter is removed from the plan and the wrong result is > returned. > Example of the query for reproducing this bug: > {code:sql} > create table dfs.tmp.`multi/t1` as select * from cp.`tpch/nation.parquet` > where n_nationkey > 5; > create table dfs.tmp.`multi/t2` as select * from cp.`tpch/nation.parquet` > where n_nationkey < 5; > select * from dfs.tmp.`multi` where n_nationkey > 5 and n_nationkey like > '%10%'; > {code} > returns > {noformat} > +---+--+-+--+-+ > | dir0 | n_nationkey | n_name | n_regionkey | > n_comment > | > +---+--+-+--+-+ > | t1| 6| FRANCE | 3| refully final > requests. regular, ironi > | > | t1| 7| GERMANY | 3| l platelets. > regular accounts x-ray: unusual, regular acco > | > | t1| 8| INDIA | 2| ss excuses cajole > slyly across the packages. deposits print aroun > | > | t1| 9| INDONESIA | 2| slyly express > asymptotes. regular deposits haggle slyly. carefully ironic hockey players > sleep blithely. carefull | > | t1| 10 | IRAN| 4| efully alongside of > the slyly final dependencies. > | > | t1| 11 | IRAQ| 4| nic deposits boost > atop the quickly final requests? quickly regula >| > | t1| 12 | JAPAN | 2| ously. final, > express gifts cajole a
[jira] [Commented] (DRILL-6865) Query returns wrong result when filter pruning happens
[ https://issues.apache.org/jira/browse/DRILL-6865?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16697397#comment-16697397 ] ASF GitHub Bot commented on DRILL-6865: --- vvysotskyi commented on a change in pull request #1552: DRILL-6865: Query returns wrong result when filter pruning happens URL: https://github.com/apache/drill/pull/1552#discussion_r235995816 ## File path: exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/AbstractParquetGroupScan.java ## @@ -310,13 +311,60 @@ public GroupScan applyFilter(LogicalExpression filterExpr, UdfUtilities udfUtili AbstractParquetGroupScan cloneGroupScan = cloneWithFileSelection(qualifiedFilePath); cloneGroupScan.rowGroupInfos = qualifiedRGs; cloneGroupScan.parquetGroupScanStatistics.collect(cloneGroupScan.rowGroupInfos, cloneGroupScan.parquetTableMetadata); + cloneGroupScan.matchAllRowGroups = matchAllRowGroupsLocal; return cloneGroupScan; } catch (IOException e) { logger.warn("Could not apply filter prune due to Exception : {}", e); return null; } } + + /** + * Returns parquet filter predicate built from specified {@code filterExpr}. + * + * @param filterExpr filter expression to build + * @param udfUtilities udf utilities + * @param functionImplementationRegistry context to find drill function holder + * @param optionManager option manager + * @param omitUnsupportedExprs whether expressions which cannot be converted + * may be omitted from the resulting expression + * @return parquet filter predicate + */ + public ParquetFilterPredicate getParquetFilterPredicate(LogicalExpression filterExpr, + UdfUtilities udfUtilities, FunctionImplementationRegistry functionImplementationRegistry, + OptionManager optionManager, boolean omitUnsupportedExprs) { +// used first row group to receive fields list +RowGroupInfo rowGroup = rowGroupInfos.iterator().next(); Review comment: Thanks, done. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org > Query returns wrong result when filter pruning happens > -- > > Key: DRILL-6865 > URL: https://issues.apache.org/jira/browse/DRILL-6865 > Project: Apache Drill > Issue Type: Bug > Components: Storage - Parquet >Affects Versions: 1.14.0 >Reporter: Volodymyr Vysotskyi >Assignee: Volodymyr Vysotskyi >Priority: Blocker > Fix For: 1.15.0 > > > In DRILL-5796 was implemented removing the filter from the plan when some (or > all) row groups of parquet table fully match the filter. > For the case when filter has some predicates which parquet filter predicate > does not support, they can be omitted for some cases from the resulting > filter predicate. When row groups fully match predicates which left in the > filter, the whole filter is removed from the plan and the wrong result is > returned. > Example of the query for reproducing this bug: > {code:sql} > create table dfs.tmp.`multi/t1` as select * from cp.`tpch/nation.parquet` > where n_nationkey > 5; > create table dfs.tmp.`multi/t2` as select * from cp.`tpch/nation.parquet` > where n_nationkey < 5; > select * from dfs.tmp.`multi` where n_nationkey > 5 and n_nationkey like > '%10%'; > {code} > returns > {noformat} > +---+--+-+--+-+ > | dir0 | n_nationkey | n_name | n_regionkey | > n_comment > | > +---+--+-+--+-+ > | t1| 6| FRANCE | 3| refully final > requests. regular, ironi > | > | t1| 7| GERMANY | 3| l platelets. > regular accounts x-ray: unusual, regular acco > | > | t1| 8| INDIA | 2| ss excuses cajole > slyly across the packages. deposits print aroun > | > | t1| 9| INDONESIA | 2
[jira] [Commented] (DRILL-6865) Query returns wrong result when filter pruning happens
[ https://issues.apache.org/jira/browse/DRILL-6865?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16697399#comment-16697399 ] ASF GitHub Bot commented on DRILL-6865: --- vvysotskyi commented on a change in pull request #1552: DRILL-6865: Query returns wrong result when filter pruning happens URL: https://github.com/apache/drill/pull/1552#discussion_r235995999 ## File path: exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFilterBuilder.java ## @@ -63,6 +63,7 @@ static final Logger logger = LoggerFactory.getLogger(ParquetFilterBuilder.class); private final UdfUtilities udfUtilities; + private final boolean omitUnsupportedExprs; Review comment: Thanks, added. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org > Query returns wrong result when filter pruning happens > -- > > Key: DRILL-6865 > URL: https://issues.apache.org/jira/browse/DRILL-6865 > Project: Apache Drill > Issue Type: Bug > Components: Storage - Parquet >Affects Versions: 1.14.0 >Reporter: Volodymyr Vysotskyi >Assignee: Volodymyr Vysotskyi >Priority: Blocker > Fix For: 1.15.0 > > > In DRILL-5796 was implemented removing the filter from the plan when some (or > all) row groups of parquet table fully match the filter. > For the case when filter has some predicates which parquet filter predicate > does not support, they can be omitted for some cases from the resulting > filter predicate. When row groups fully match predicates which left in the > filter, the whole filter is removed from the plan and the wrong result is > returned. > Example of the query for reproducing this bug: > {code:sql} > create table dfs.tmp.`multi/t1` as select * from cp.`tpch/nation.parquet` > where n_nationkey > 5; > create table dfs.tmp.`multi/t2` as select * from cp.`tpch/nation.parquet` > where n_nationkey < 5; > select * from dfs.tmp.`multi` where n_nationkey > 5 and n_nationkey like > '%10%'; > {code} > returns > {noformat} > +---+--+-+--+-+ > | dir0 | n_nationkey | n_name | n_regionkey | > n_comment > | > +---+--+-+--+-+ > | t1| 6| FRANCE | 3| refully final > requests. regular, ironi > | > | t1| 7| GERMANY | 3| l platelets. > regular accounts x-ray: unusual, regular acco > | > | t1| 8| INDIA | 2| ss excuses cajole > slyly across the packages. deposits print aroun > | > | t1| 9| INDONESIA | 2| slyly express > asymptotes. regular deposits haggle slyly. carefully ironic hockey players > sleep blithely. carefull | > | t1| 10 | IRAN| 4| efully alongside of > the slyly final dependencies. > | > | t1| 11 | IRAQ| 4| nic deposits boost > atop the quickly final requests? quickly regula >| > | t1| 12 | JAPAN | 2| ously. final, > express gifts cajole a > | > | t1| 13 | JORDAN | 4| ic deposits are > blithely about the carefully regular pa > | > | t1| 14 | KENYA | 0| pending excuses > haggle furiously deposits. pending, express pinto beans wake fluffily past t > | > | t1| 15 | MOROCCO | 0| rns. blithely bold > courts among the closely regular packages use furiously bold platelets? >| > | t1| 16 | MOZAMBIQUE | 0| s. ironic, unusual > asymptotes wake blithely r
[jira] [Commented] (DRILL-6865) Query returns wrong result when filter pruning happens
[ https://issues.apache.org/jira/browse/DRILL-6865?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16697401#comment-16697401 ] ASF GitHub Bot commented on DRILL-6865: --- vvysotskyi commented on a change in pull request #1552: DRILL-6865: Query returns wrong result when filter pruning happens URL: https://github.com/apache/drill/pull/1552#discussion_r235997009 ## File path: exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetPushDownFilter.java ## @@ -134,12 +132,29 @@ protected void doOnMatch(RelOptRuleCall call, FilterPrel filter, ProjectPrel pro // get a conjunctions of the filter condition. For each conjunction, if it refers to ITEM or FLATTEN expression // then we could not pushed down. Otherwise, it's qualified to be pushed down. -final List predList = RelOptUtil.conjunctions(condition); +final List predList = RelOptUtil.conjunctions(RexUtil.toCnf(filter.getCluster().getRexBuilder(), condition)); Review comment: We need to convert initial expression to conjunctive normal form, so it will be splitted into predicates more precisely and they will be divided into predicates which are supported by parquet filter pushdown and predicates which aren't. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org > Query returns wrong result when filter pruning happens > -- > > Key: DRILL-6865 > URL: https://issues.apache.org/jira/browse/DRILL-6865 > Project: Apache Drill > Issue Type: Bug > Components: Storage - Parquet >Affects Versions: 1.14.0 >Reporter: Volodymyr Vysotskyi >Assignee: Volodymyr Vysotskyi >Priority: Blocker > Fix For: 1.15.0 > > > In DRILL-5796 was implemented removing the filter from the plan when some (or > all) row groups of parquet table fully match the filter. > For the case when filter has some predicates which parquet filter predicate > does not support, they can be omitted for some cases from the resulting > filter predicate. When row groups fully match predicates which left in the > filter, the whole filter is removed from the plan and the wrong result is > returned. > Example of the query for reproducing this bug: > {code:sql} > create table dfs.tmp.`multi/t1` as select * from cp.`tpch/nation.parquet` > where n_nationkey > 5; > create table dfs.tmp.`multi/t2` as select * from cp.`tpch/nation.parquet` > where n_nationkey < 5; > select * from dfs.tmp.`multi` where n_nationkey > 5 and n_nationkey like > '%10%'; > {code} > returns > {noformat} > +---+--+-+--+-+ > | dir0 | n_nationkey | n_name | n_regionkey | > n_comment > | > +---+--+-+--+-+ > | t1| 6| FRANCE | 3| refully final > requests. regular, ironi > | > | t1| 7| GERMANY | 3| l platelets. > regular accounts x-ray: unusual, regular acco > | > | t1| 8| INDIA | 2| ss excuses cajole > slyly across the packages. deposits print aroun > | > | t1| 9| INDONESIA | 2| slyly express > asymptotes. regular deposits haggle slyly. carefully ironic hockey players > sleep blithely. carefull | > | t1| 10 | IRAN| 4| efully alongside of > the slyly final dependencies. > | > | t1| 11 | IRAQ| 4| nic deposits boost > atop the quickly final requests? quickly regula >| > | t1| 12 | JAPAN | 2| ously. final, > express gifts cajole a > | > | t1| 13 | JORDAN | 4| ic deposits are > blithely about the carefully regular pa >
[jira] [Commented] (DRILL-6865) Query returns wrong result when filter pruning happens
[ https://issues.apache.org/jira/browse/DRILL-6865?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16697400#comment-16697400 ] ASF GitHub Bot commented on DRILL-6865: --- vvysotskyi commented on a change in pull request #1552: DRILL-6865: Query returns wrong result when filter pruning happens URL: https://github.com/apache/drill/pull/1552#discussion_r235995661 ## File path: exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/AbstractParquetGroupScan.java ## @@ -310,13 +311,60 @@ public GroupScan applyFilter(LogicalExpression filterExpr, UdfUtilities udfUtili AbstractParquetGroupScan cloneGroupScan = cloneWithFileSelection(qualifiedFilePath); cloneGroupScan.rowGroupInfos = qualifiedRGs; cloneGroupScan.parquetGroupScanStatistics.collect(cloneGroupScan.rowGroupInfos, cloneGroupScan.parquetTableMetadata); + cloneGroupScan.matchAllRowGroups = matchAllRowGroupsLocal; return cloneGroupScan; } catch (IOException e) { logger.warn("Could not apply filter prune due to Exception : {}", e); return null; } } + + /** + * Returns parquet filter predicate built from specified {@code filterExpr}. + * + * @param filterExpr filter expression to build + * @param udfUtilities udf utilities + * @param functionImplementationRegistry context to find drill function holder + * @param optionManager option manager + * @param omitUnsupportedExprs whether expressions which cannot be converted + * may be omitted from the resulting expression + * @return parquet filter predicate + */ + public ParquetFilterPredicate getParquetFilterPredicate(LogicalExpression filterExpr, Review comment: `applyFilter()` method from the previous code returns `null` if the filter wasn't created from first row group. I agree with you that schema change may break filter pushdown, but currently, we cannot predict that the filter built from one row group will be suitable for other ones. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org > Query returns wrong result when filter pruning happens > -- > > Key: DRILL-6865 > URL: https://issues.apache.org/jira/browse/DRILL-6865 > Project: Apache Drill > Issue Type: Bug > Components: Storage - Parquet >Affects Versions: 1.14.0 >Reporter: Volodymyr Vysotskyi >Assignee: Volodymyr Vysotskyi >Priority: Blocker > Fix For: 1.15.0 > > > In DRILL-5796 was implemented removing the filter from the plan when some (or > all) row groups of parquet table fully match the filter. > For the case when filter has some predicates which parquet filter predicate > does not support, they can be omitted for some cases from the resulting > filter predicate. When row groups fully match predicates which left in the > filter, the whole filter is removed from the plan and the wrong result is > returned. > Example of the query for reproducing this bug: > {code:sql} > create table dfs.tmp.`multi/t1` as select * from cp.`tpch/nation.parquet` > where n_nationkey > 5; > create table dfs.tmp.`multi/t2` as select * from cp.`tpch/nation.parquet` > where n_nationkey < 5; > select * from dfs.tmp.`multi` where n_nationkey > 5 and n_nationkey like > '%10%'; > {code} > returns > {noformat} > +---+--+-+--+-+ > | dir0 | n_nationkey | n_name | n_regionkey | > n_comment > | > +---+--+-+--+-+ > | t1| 6| FRANCE | 3| refully final > requests. regular, ironi > | > | t1| 7| GERMANY | 3| l platelets. > regular accounts x-ray: unusual, regular acco > | > | t1| 8| INDIA | 2| ss excuses cajole > slyly across the packages. deposits print aroun > | > | t1| 9| INDONESIA | 2
[jira] [Commented] (DRILL-6865) Query returns wrong result when filter pruning happens
[ https://issues.apache.org/jira/browse/DRILL-6865?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16697395#comment-16697395 ] ASF GitHub Bot commented on DRILL-6865: --- vvysotskyi commented on a change in pull request #1552: DRILL-6865: Query returns wrong result when filter pruning happens URL: https://github.com/apache/drill/pull/1552#discussion_r235994025 ## File path: exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/AbstractParquetGroupScan.java ## @@ -85,6 +85,8 @@ private List endpointAffinities; private ParquetGroupScanStatistics parquetGroupScanStatistics; + // whether all row groups of this group scan fully matches the filter Review comment: Thanks, fixed This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org > Query returns wrong result when filter pruning happens > -- > > Key: DRILL-6865 > URL: https://issues.apache.org/jira/browse/DRILL-6865 > Project: Apache Drill > Issue Type: Bug > Components: Storage - Parquet >Affects Versions: 1.14.0 >Reporter: Volodymyr Vysotskyi >Assignee: Volodymyr Vysotskyi >Priority: Blocker > Fix For: 1.15.0 > > > In DRILL-5796 was implemented removing the filter from the plan when some (or > all) row groups of parquet table fully match the filter. > For the case when filter has some predicates which parquet filter predicate > does not support, they can be omitted for some cases from the resulting > filter predicate. When row groups fully match predicates which left in the > filter, the whole filter is removed from the plan and the wrong result is > returned. > Example of the query for reproducing this bug: > {code:sql} > create table dfs.tmp.`multi/t1` as select * from cp.`tpch/nation.parquet` > where n_nationkey > 5; > create table dfs.tmp.`multi/t2` as select * from cp.`tpch/nation.parquet` > where n_nationkey < 5; > select * from dfs.tmp.`multi` where n_nationkey > 5 and n_nationkey like > '%10%'; > {code} > returns > {noformat} > +---+--+-+--+-+ > | dir0 | n_nationkey | n_name | n_regionkey | > n_comment > | > +---+--+-+--+-+ > | t1| 6| FRANCE | 3| refully final > requests. regular, ironi > | > | t1| 7| GERMANY | 3| l platelets. > regular accounts x-ray: unusual, regular acco > | > | t1| 8| INDIA | 2| ss excuses cajole > slyly across the packages. deposits print aroun > | > | t1| 9| INDONESIA | 2| slyly express > asymptotes. regular deposits haggle slyly. carefully ironic hockey players > sleep blithely. carefull | > | t1| 10 | IRAN| 4| efully alongside of > the slyly final dependencies. > | > | t1| 11 | IRAQ| 4| nic deposits boost > atop the quickly final requests? quickly regula >| > | t1| 12 | JAPAN | 2| ously. final, > express gifts cajole a > | > | t1| 13 | JORDAN | 4| ic deposits are > blithely about the carefully regular pa > | > | t1| 14 | KENYA | 0| pending excuses > haggle furiously deposits. pending, express pinto beans wake fluffily past t > | > | t1| 15 | MOROCCO | 0| rns. blithely bold > courts among the closely regular packages use furiously bold platelets? >| > | t1| 16 | MOZAMBIQUE | 0| s. ironic, unusual > asymptotes wake blithely r
[jira] [Commented] (DRILL-6865) Query returns wrong result when filter pruning happens
[ https://issues.apache.org/jira/browse/DRILL-6865?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16697273#comment-16697273 ] ASF GitHub Bot commented on DRILL-6865: --- arina-ielchiieva commented on a change in pull request #1552: DRILL-6865: Query returns wrong result when filter pruning happens URL: https://github.com/apache/drill/pull/1552#discussion_r235950497 ## File path: exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/AbstractParquetGroupScan.java ## @@ -85,6 +85,8 @@ private List endpointAffinities; private ParquetGroupScanStatistics parquetGroupScanStatistics; + // whether all row groups of this group scan fully matches the filter Review comment: fully match This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org > Query returns wrong result when filter pruning happens > -- > > Key: DRILL-6865 > URL: https://issues.apache.org/jira/browse/DRILL-6865 > Project: Apache Drill > Issue Type: Bug > Components: Storage - Parquet >Affects Versions: 1.14.0 >Reporter: Volodymyr Vysotskyi >Assignee: Volodymyr Vysotskyi >Priority: Blocker > Fix For: 1.15.0 > > > In DRILL-5796 was implemented removing the filter from the plan when some (or > all) row groups of parquet table fully match the filter. > For the case when filter has some predicates which parquet filter predicate > does not support, they can be omitted for some cases from the resulting > filter predicate. When row groups fully match predicates which left in the > filter, the whole filter is removed from the plan and the wrong result is > returned. > Example of the query for reproducing this bug: > {code:sql} > create table dfs.tmp.`multi/t1` as select * from cp.`tpch/nation.parquet` > where n_nationkey > 5; > create table dfs.tmp.`multi/t2` as select * from cp.`tpch/nation.parquet` > where n_nationkey < 5; > select * from dfs.tmp.`multi` where n_nationkey > 5 and n_nationkey like > '%10%'; > {code} > returns > {noformat} > +---+--+-+--+-+ > | dir0 | n_nationkey | n_name | n_regionkey | > n_comment > | > +---+--+-+--+-+ > | t1| 6| FRANCE | 3| refully final > requests. regular, ironi > | > | t1| 7| GERMANY | 3| l platelets. > regular accounts x-ray: unusual, regular acco > | > | t1| 8| INDIA | 2| ss excuses cajole > slyly across the packages. deposits print aroun > | > | t1| 9| INDONESIA | 2| slyly express > asymptotes. regular deposits haggle slyly. carefully ironic hockey players > sleep blithely. carefull | > | t1| 10 | IRAN| 4| efully alongside of > the slyly final dependencies. > | > | t1| 11 | IRAQ| 4| nic deposits boost > atop the quickly final requests? quickly regula >| > | t1| 12 | JAPAN | 2| ously. final, > express gifts cajole a > | > | t1| 13 | JORDAN | 4| ic deposits are > blithely about the carefully regular pa > | > | t1| 14 | KENYA | 0| pending excuses > haggle furiously deposits. pending, express pinto beans wake fluffily past t > | > | t1| 15 | MOROCCO | 0| rns. blithely bold > courts among the closely regular packages use furiously bold platelets? >| > | t1| 16 | MOZAMBIQUE | 0| s. ironic, unusual > asymptotes wake blithely r
[jira] [Commented] (DRILL-6865) Query returns wrong result when filter pruning happens
[ https://issues.apache.org/jira/browse/DRILL-6865?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16697274#comment-16697274 ] ASF GitHub Bot commented on DRILL-6865: --- arina-ielchiieva commented on a change in pull request #1552: DRILL-6865: Query returns wrong result when filter pruning happens URL: https://github.com/apache/drill/pull/1552#discussion_r235969168 ## File path: exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFilterBuilder.java ## @@ -71,18 +72,24 @@ * * @return parquet filter predicate */ - public static ParquetFilterPredicate buildParquetFilterPredicate(LogicalExpression expr, final Set constantBoundaries, UdfUtilities udfUtilities) { -LogicalExpression logicalExpression = expr.accept(new ParquetFilterBuilder(udfUtilities), constantBoundaries); + public static ParquetFilterPredicate buildParquetFilterPredicate(LogicalExpression expr, + Set constantBoundaries, UdfUtilities udfUtilities, boolean omitUnsupportedExprs) { +LogicalExpression logicalExpression = +expr.accept(new ParquetFilterBuilder(udfUtilities, omitUnsupportedExprs), constantBoundaries); if (logicalExpression instanceof ParquetFilterPredicate) { return (ParquetFilterPredicate) logicalExpression; +} else if (logicalExpression instanceof TypedFieldExpr) { + // Calcite simplifies `= true` expression to field name, wrap it with is true predicate + return (ParquetFilterPredicate) ParquetIsPredicate.createIsPredicate(FunctionGenerationHelper.IS_TRUE, logicalExpression); Review comment: Not sure about this change, initially it was added during `visitBooleanOperator`, now you do this for all. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org > Query returns wrong result when filter pruning happens > -- > > Key: DRILL-6865 > URL: https://issues.apache.org/jira/browse/DRILL-6865 > Project: Apache Drill > Issue Type: Bug > Components: Storage - Parquet >Affects Versions: 1.14.0 >Reporter: Volodymyr Vysotskyi >Assignee: Volodymyr Vysotskyi >Priority: Blocker > Fix For: 1.15.0 > > > In DRILL-5796 was implemented removing the filter from the plan when some (or > all) row groups of parquet table fully match the filter. > For the case when filter has some predicates which parquet filter predicate > does not support, they can be omitted for some cases from the resulting > filter predicate. When row groups fully match predicates which left in the > filter, the whole filter is removed from the plan and the wrong result is > returned. > Example of the query for reproducing this bug: > {code:sql} > create table dfs.tmp.`multi/t1` as select * from cp.`tpch/nation.parquet` > where n_nationkey > 5; > create table dfs.tmp.`multi/t2` as select * from cp.`tpch/nation.parquet` > where n_nationkey < 5; > select * from dfs.tmp.`multi` where n_nationkey > 5 and n_nationkey like > '%10%'; > {code} > returns > {noformat} > +---+--+-+--+-+ > | dir0 | n_nationkey | n_name | n_regionkey | > n_comment > | > +---+--+-+--+-+ > | t1| 6| FRANCE | 3| refully final > requests. regular, ironi > | > | t1| 7| GERMANY | 3| l platelets. > regular accounts x-ray: unusual, regular acco > | > | t1| 8| INDIA | 2| ss excuses cajole > slyly across the packages. deposits print aroun > | > | t1| 9| INDONESIA | 2| slyly express > asymptotes. regular deposits haggle slyly. carefully ironic hockey players > sleep blithely. carefull | > | t1| 10 | IRAN| 4| efully alongside of > the slyly final dependencies. > | > | t1| 11 | IRAQ| 4| nic deposits
[jira] [Commented] (DRILL-6865) Query returns wrong result when filter pruning happens
[ https://issues.apache.org/jira/browse/DRILL-6865?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16697267#comment-16697267 ] ASF GitHub Bot commented on DRILL-6865: --- arina-ielchiieva commented on a change in pull request #1552: DRILL-6865: Query returns wrong result when filter pruning happens URL: https://github.com/apache/drill/pull/1552#discussion_r235960629 ## File path: exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/AbstractParquetGroupScan.java ## @@ -310,13 +311,60 @@ public GroupScan applyFilter(LogicalExpression filterExpr, UdfUtilities udfUtili AbstractParquetGroupScan cloneGroupScan = cloneWithFileSelection(qualifiedFilePath); cloneGroupScan.rowGroupInfos = qualifiedRGs; cloneGroupScan.parquetGroupScanStatistics.collect(cloneGroupScan.rowGroupInfos, cloneGroupScan.parquetTableMetadata); + cloneGroupScan.matchAllRowGroups = matchAllRowGroupsLocal; return cloneGroupScan; } catch (IOException e) { logger.warn("Could not apply filter prune due to Exception : {}", e); return null; } } + + /** + * Returns parquet filter predicate built from specified {@code filterExpr}. + * + * @param filterExpr filter expression to build + * @param udfUtilities udf utilities + * @param functionImplementationRegistry context to find drill function holder + * @param optionManager option manager + * @param omitUnsupportedExprs whether expressions which cannot be converted + * may be omitted from the resulting expression + * @return parquet filter predicate + */ + public ParquetFilterPredicate getParquetFilterPredicate(LogicalExpression filterExpr, + UdfUtilities udfUtilities, FunctionImplementationRegistry functionImplementationRegistry, + OptionManager optionManager, boolean omitUnsupportedExprs) { +// used first row group to receive fields list +RowGroupInfo rowGroup = rowGroupInfos.iterator().next(); Review comment: At least add assert that will ensure that we did have one row group. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org > Query returns wrong result when filter pruning happens > -- > > Key: DRILL-6865 > URL: https://issues.apache.org/jira/browse/DRILL-6865 > Project: Apache Drill > Issue Type: Bug > Components: Storage - Parquet >Affects Versions: 1.14.0 >Reporter: Volodymyr Vysotskyi >Assignee: Volodymyr Vysotskyi >Priority: Blocker > Fix For: 1.15.0 > > > In DRILL-5796 was implemented removing the filter from the plan when some (or > all) row groups of parquet table fully match the filter. > For the case when filter has some predicates which parquet filter predicate > does not support, they can be omitted for some cases from the resulting > filter predicate. When row groups fully match predicates which left in the > filter, the whole filter is removed from the plan and the wrong result is > returned. > Example of the query for reproducing this bug: > {code:sql} > create table dfs.tmp.`multi/t1` as select * from cp.`tpch/nation.parquet` > where n_nationkey > 5; > create table dfs.tmp.`multi/t2` as select * from cp.`tpch/nation.parquet` > where n_nationkey < 5; > select * from dfs.tmp.`multi` where n_nationkey > 5 and n_nationkey like > '%10%'; > {code} > returns > {noformat} > +---+--+-+--+-+ > | dir0 | n_nationkey | n_name | n_regionkey | > n_comment > | > +---+--+-+--+-+ > | t1| 6| FRANCE | 3| refully final > requests. regular, ironi > | > | t1| 7| GERMANY | 3| l platelets. > regular accounts x-ray: unusual, regular acco > | > | t1| 8| INDIA | 2| ss excuses cajole > slyly across the packages. deposits print aroun >
[jira] [Commented] (DRILL-6865) Query returns wrong result when filter pruning happens
[ https://issues.apache.org/jira/browse/DRILL-6865?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16697272#comment-16697272 ] ASF GitHub Bot commented on DRILL-6865: --- arina-ielchiieva commented on a change in pull request #1552: DRILL-6865: Query returns wrong result when filter pruning happens URL: https://github.com/apache/drill/pull/1552#discussion_r235966100 ## File path: exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/AbstractParquetGroupScan.java ## @@ -262,41 +273,31 @@ public GroupScan applyFilter(LogicalExpression filterExpr, UdfUtilities udfUtili Map columnStatisticsMap = statCollector.collectColStat(schemaPathsInExpr); - if (filterPredicate == null) { -ErrorCollector errorCollector = new ErrorCollectorImpl(); -LogicalExpression materializedFilter = ExpressionTreeMaterializer.materializeFilterExpr( -filterExpr, columnStatisticsMap, errorCollector, functionImplementationRegistry); - -if (errorCollector.hasErrors()) { - logger.error("{} error(s) encountered when materialize filter expression : {}", - errorCollector.getErrorCount(), errorCollector.toErrorString()); - return null; -} -logger.debug("materializedFilter : {}", ExpressionStringBuilder.toString(materializedFilter)); - -Set constantBoundaries = ConstantExpressionIdentifier.getConstantExpressionSet(materializedFilter); -filterPredicate = ParquetFilterBuilder.buildParquetFilterPredicate(materializedFilter, constantBoundaries, udfUtilities); - -if (filterPredicate == null) { - return null; -} - } - - ParquetFilterPredicate.RowsMatch match = ParquetRGFilterEvaluator.matches(filterPredicate, columnStatisticsMap, rowGroup.getRowCount(), parquetTableMetadata, rowGroup.getColumns(), schemaPathsInExpr); + ParquetFilterPredicate.RowsMatch match = ParquetRGFilterEvaluator.matches(filterPredicate, + columnStatisticsMap, rowGroup.getRowCount(), parquetTableMetadata, rowGroup.getColumns(), schemaPathsInExpr); if (match == ParquetFilterPredicate.RowsMatch.NONE) { continue; // No row comply to the filter => drop the row group } - rowGroup.setRowsMatch(match); + if (matchAllRowGroupsLocal) { Review comment: Please add comment above. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org > Query returns wrong result when filter pruning happens > -- > > Key: DRILL-6865 > URL: https://issues.apache.org/jira/browse/DRILL-6865 > Project: Apache Drill > Issue Type: Bug > Components: Storage - Parquet >Affects Versions: 1.14.0 >Reporter: Volodymyr Vysotskyi >Assignee: Volodymyr Vysotskyi >Priority: Blocker > Fix For: 1.15.0 > > > In DRILL-5796 was implemented removing the filter from the plan when some (or > all) row groups of parquet table fully match the filter. > For the case when filter has some predicates which parquet filter predicate > does not support, they can be omitted for some cases from the resulting > filter predicate. When row groups fully match predicates which left in the > filter, the whole filter is removed from the plan and the wrong result is > returned. > Example of the query for reproducing this bug: > {code:sql} > create table dfs.tmp.`multi/t1` as select * from cp.`tpch/nation.parquet` > where n_nationkey > 5; > create table dfs.tmp.`multi/t2` as select * from cp.`tpch/nation.parquet` > where n_nationkey < 5; > select * from dfs.tmp.`multi` where n_nationkey > 5 and n_nationkey like > '%10%'; > {code} > returns > {noformat} > +---+--+-+--+-+ > | dir0 | n_nationkey | n_name | n_regionkey | > n_comment > | > +---+--+-+--+-+ > | t1| 6| FRANCE | 3| refully final > requests. regular, ironi > | > | t1| 7| GERMANY | 3| l platelets. > regular accounts x-ray: unusual, regular acco
[jira] [Commented] (DRILL-6865) Query returns wrong result when filter pruning happens
[ https://issues.apache.org/jira/browse/DRILL-6865?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16697268#comment-16697268 ] ASF GitHub Bot commented on DRILL-6865: --- arina-ielchiieva commented on a change in pull request #1552: DRILL-6865: Query returns wrong result when filter pruning happens URL: https://github.com/apache/drill/pull/1552#discussion_r235960462 ## File path: exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFilterBuilder.java ## @@ -63,6 +63,7 @@ static final Logger logger = LoggerFactory.getLogger(ParquetFilterBuilder.class); private final UdfUtilities udfUtilities; + private final boolean omitUnsupportedExprs; Review comment: Please add javadoc explaining cases when we want to omit unsupported expressions and when we don't. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org > Query returns wrong result when filter pruning happens > -- > > Key: DRILL-6865 > URL: https://issues.apache.org/jira/browse/DRILL-6865 > Project: Apache Drill > Issue Type: Bug > Components: Storage - Parquet >Affects Versions: 1.14.0 >Reporter: Volodymyr Vysotskyi >Assignee: Volodymyr Vysotskyi >Priority: Blocker > Fix For: 1.15.0 > > > In DRILL-5796 was implemented removing the filter from the plan when some (or > all) row groups of parquet table fully match the filter. > For the case when filter has some predicates which parquet filter predicate > does not support, they can be omitted for some cases from the resulting > filter predicate. When row groups fully match predicates which left in the > filter, the whole filter is removed from the plan and the wrong result is > returned. > Example of the query for reproducing this bug: > {code:sql} > create table dfs.tmp.`multi/t1` as select * from cp.`tpch/nation.parquet` > where n_nationkey > 5; > create table dfs.tmp.`multi/t2` as select * from cp.`tpch/nation.parquet` > where n_nationkey < 5; > select * from dfs.tmp.`multi` where n_nationkey > 5 and n_nationkey like > '%10%'; > {code} > returns > {noformat} > +---+--+-+--+-+ > | dir0 | n_nationkey | n_name | n_regionkey | > n_comment > | > +---+--+-+--+-+ > | t1| 6| FRANCE | 3| refully final > requests. regular, ironi > | > | t1| 7| GERMANY | 3| l platelets. > regular accounts x-ray: unusual, regular acco > | > | t1| 8| INDIA | 2| ss excuses cajole > slyly across the packages. deposits print aroun > | > | t1| 9| INDONESIA | 2| slyly express > asymptotes. regular deposits haggle slyly. carefully ironic hockey players > sleep blithely. carefull | > | t1| 10 | IRAN| 4| efully alongside of > the slyly final dependencies. > | > | t1| 11 | IRAQ| 4| nic deposits boost > atop the quickly final requests? quickly regula >| > | t1| 12 | JAPAN | 2| ously. final, > express gifts cajole a > | > | t1| 13 | JORDAN | 4| ic deposits are > blithely about the carefully regular pa > | > | t1| 14 | KENYA | 0| pending excuses > haggle furiously deposits. pending, express pinto beans wake fluffily past t > | > | t1| 15 | MOROCCO | 0| rns. blithely bold > courts among the closely regular packages use furiously bold platelets? >| > | t1| 16 | MOZAMBIQUE | 0
[jira] [Commented] (DRILL-6865) Query returns wrong result when filter pruning happens
[ https://issues.apache.org/jira/browse/DRILL-6865?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16697269#comment-16697269 ] ASF GitHub Bot commented on DRILL-6865: --- arina-ielchiieva commented on a change in pull request #1552: DRILL-6865: Query returns wrong result when filter pruning happens URL: https://github.com/apache/drill/pull/1552#discussion_r235964950 ## File path: exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/AbstractParquetGroupScan.java ## @@ -310,13 +311,60 @@ public GroupScan applyFilter(LogicalExpression filterExpr, UdfUtilities udfUtili AbstractParquetGroupScan cloneGroupScan = cloneWithFileSelection(qualifiedFilePath); cloneGroupScan.rowGroupInfos = qualifiedRGs; cloneGroupScan.parquetGroupScanStatistics.collect(cloneGroupScan.rowGroupInfos, cloneGroupScan.parquetTableMetadata); + cloneGroupScan.matchAllRowGroups = matchAllRowGroupsLocal; return cloneGroupScan; } catch (IOException e) { logger.warn("Could not apply filter prune due to Exception : {}", e); return null; } } + + /** + * Returns parquet filter predicate built from specified {@code filterExpr}. + * + * @param filterExpr filter expression to build + * @param udfUtilities udf utilities + * @param functionImplementationRegistry context to find drill function holder + * @param optionManager option manager + * @param omitUnsupportedExprs whether expressions which cannot be converted + * may be omitted from the resulting expression + * @return parquet filter predicate + */ + public ParquetFilterPredicate getParquetFilterPredicate(LogicalExpression filterExpr, Review comment: Maybe filter creation was done before in a loop for the case when we could not build filter form first row group but were able to build filter for the second (for example, if they came from different files)? This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org > Query returns wrong result when filter pruning happens > -- > > Key: DRILL-6865 > URL: https://issues.apache.org/jira/browse/DRILL-6865 > Project: Apache Drill > Issue Type: Bug > Components: Storage - Parquet >Affects Versions: 1.14.0 >Reporter: Volodymyr Vysotskyi >Assignee: Volodymyr Vysotskyi >Priority: Blocker > Fix For: 1.15.0 > > > In DRILL-5796 was implemented removing the filter from the plan when some (or > all) row groups of parquet table fully match the filter. > For the case when filter has some predicates which parquet filter predicate > does not support, they can be omitted for some cases from the resulting > filter predicate. When row groups fully match predicates which left in the > filter, the whole filter is removed from the plan and the wrong result is > returned. > Example of the query for reproducing this bug: > {code:sql} > create table dfs.tmp.`multi/t1` as select * from cp.`tpch/nation.parquet` > where n_nationkey > 5; > create table dfs.tmp.`multi/t2` as select * from cp.`tpch/nation.parquet` > where n_nationkey < 5; > select * from dfs.tmp.`multi` where n_nationkey > 5 and n_nationkey like > '%10%'; > {code} > returns > {noformat} > +---+--+-+--+-+ > | dir0 | n_nationkey | n_name | n_regionkey | > n_comment > | > +---+--+-+--+-+ > | t1| 6| FRANCE | 3| refully final > requests. regular, ironi > | > | t1| 7| GERMANY | 3| l platelets. > regular accounts x-ray: unusual, regular acco > | > | t1| 8| INDIA | 2| ss excuses cajole > slyly across the packages. deposits print aroun > | > | t1| 9| INDONESIA | 2| slyly express > asymptotes. regular deposits haggle slyly.
[jira] [Commented] (DRILL-6865) Query returns wrong result when filter pruning happens
[ https://issues.apache.org/jira/browse/DRILL-6865?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16697275#comment-16697275 ] ASF GitHub Bot commented on DRILL-6865: --- arina-ielchiieva commented on a change in pull request #1552: DRILL-6865: Query returns wrong result when filter pruning happens URL: https://github.com/apache/drill/pull/1552#discussion_r235957943 ## File path: exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetPushDownFilter.java ## @@ -155,39 +170,53 @@ protected void doOnMatch(RelOptRuleCall call, FilterPrel filter, ProjectPrel pro Stopwatch timer = logger.isDebugEnabled() ? Stopwatch.createStarted() : null; -final GroupScan newGroupScan = groupScan.applyFilter(conditionExp,optimizerContext, +AbstractParquetGroupScan newGroupScan = groupScan.applyFilter(conditionExp, optimizerContext, optimizerContext.getFunctionRegistry(), optimizerContext.getPlannerSettings().getOptions()); if (timer != null) { logger.debug("Took {} ms to apply filter on parquet row groups. ", timer.elapsed(TimeUnit.MILLISECONDS)); timer.stop(); } -if (newGroupScan == null ) { +if (newGroupScan == null) { + if (groupScan.isMatchAllRowGroups()) { +RelNode child = project == null ? scan : project; +// if current row group fully matches filter, +// but row group pruning wasn't happened, removes filter. +if (nonConvertedPredList.size() == 0) { + call.transformTo(child); +} else if (nonConvertedPredList.size() < predList.size()) { Review comment: Won't be else enough? Why check that non converted list is smaller? This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org > Query returns wrong result when filter pruning happens > -- > > Key: DRILL-6865 > URL: https://issues.apache.org/jira/browse/DRILL-6865 > Project: Apache Drill > Issue Type: Bug > Components: Storage - Parquet >Affects Versions: 1.14.0 >Reporter: Volodymyr Vysotskyi >Assignee: Volodymyr Vysotskyi >Priority: Blocker > Fix For: 1.15.0 > > > In DRILL-5796 was implemented removing the filter from the plan when some (or > all) row groups of parquet table fully match the filter. > For the case when filter has some predicates which parquet filter predicate > does not support, they can be omitted for some cases from the resulting > filter predicate. When row groups fully match predicates which left in the > filter, the whole filter is removed from the plan and the wrong result is > returned. > Example of the query for reproducing this bug: > {code:sql} > create table dfs.tmp.`multi/t1` as select * from cp.`tpch/nation.parquet` > where n_nationkey > 5; > create table dfs.tmp.`multi/t2` as select * from cp.`tpch/nation.parquet` > where n_nationkey < 5; > select * from dfs.tmp.`multi` where n_nationkey > 5 and n_nationkey like > '%10%'; > {code} > returns > {noformat} > +---+--+-+--+-+ > | dir0 | n_nationkey | n_name | n_regionkey | > n_comment > | > +---+--+-+--+-+ > | t1| 6| FRANCE | 3| refully final > requests. regular, ironi > | > | t1| 7| GERMANY | 3| l platelets. > regular accounts x-ray: unusual, regular acco > | > | t1| 8| INDIA | 2| ss excuses cajole > slyly across the packages. deposits print aroun > | > | t1| 9| INDONESIA | 2| slyly express > asymptotes. regular deposits haggle slyly. carefully ironic hockey players > sleep blithely. carefull | > | t1| 10 | IRAN| 4| efully alongside of > the slyly final dependencies. > | > | t1| 11 | IRAQ| 4| nic deposits boost > atop the quickly
[jira] [Commented] (DRILL-6865) Query returns wrong result when filter pruning happens
[ https://issues.apache.org/jira/browse/DRILL-6865?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16697271#comment-16697271 ] ASF GitHub Bot commented on DRILL-6865: --- arina-ielchiieva commented on a change in pull request #1552: DRILL-6865: Query returns wrong result when filter pruning happens URL: https://github.com/apache/drill/pull/1552#discussion_r235949907 ## File path: exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetPushDownFilter.java ## @@ -172,14 +170,30 @@ protected void doOnMatch(RelOptRuleCall call, FilterPrel filter, ProjectPrel pro Stopwatch timer = logger.isDebugEnabled() ? Stopwatch.createStarted() : null; -final GroupScan newGroupScan = groupScan.applyFilter(conditionExp, optimizerContext, +AbstractParquetGroupScan newGroupScan = groupScan.applyFilter(conditionExp, optimizerContext, optimizerContext.getFunctionRegistry(), optimizerContext.getPlannerSettings().getOptions()); if (timer != null) { logger.debug("Took {} ms to apply filter on parquet row groups. ", timer.elapsed(TimeUnit.MILLISECONDS)); timer.stop(); } -if (newGroupScan == null ) { +if (newGroupScan == null) { + if (groupScan.isMatchAllRowGroups()) { +RelNode child = project == null ? scan : project; +// if current row group fully matches filter, +// but row group pruning wasn't happened, removes filter. Review comment: did not happen, remove the filter This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org > Query returns wrong result when filter pruning happens > -- > > Key: DRILL-6865 > URL: https://issues.apache.org/jira/browse/DRILL-6865 > Project: Apache Drill > Issue Type: Bug > Components: Storage - Parquet >Affects Versions: 1.14.0 >Reporter: Volodymyr Vysotskyi >Assignee: Volodymyr Vysotskyi >Priority: Blocker > Fix For: 1.15.0 > > > In DRILL-5796 was implemented removing the filter from the plan when some (or > all) row groups of parquet table fully match the filter. > For the case when filter has some predicates which parquet filter predicate > does not support, they can be omitted for some cases from the resulting > filter predicate. When row groups fully match predicates which left in the > filter, the whole filter is removed from the plan and the wrong result is > returned. > Example of the query for reproducing this bug: > {code:sql} > create table dfs.tmp.`multi/t1` as select * from cp.`tpch/nation.parquet` > where n_nationkey > 5; > create table dfs.tmp.`multi/t2` as select * from cp.`tpch/nation.parquet` > where n_nationkey < 5; > select * from dfs.tmp.`multi` where n_nationkey > 5 and n_nationkey like > '%10%'; > {code} > returns > {noformat} > +---+--+-+--+-+ > | dir0 | n_nationkey | n_name | n_regionkey | > n_comment > | > +---+--+-+--+-+ > | t1| 6| FRANCE | 3| refully final > requests. regular, ironi > | > | t1| 7| GERMANY | 3| l platelets. > regular accounts x-ray: unusual, regular acco > | > | t1| 8| INDIA | 2| ss excuses cajole > slyly across the packages. deposits print aroun > | > | t1| 9| INDONESIA | 2| slyly express > asymptotes. regular deposits haggle slyly. carefully ironic hockey players > sleep blithely. carefull | > | t1| 10 | IRAN| 4| efully alongside of > the slyly final dependencies. > | > | t1| 11 | IRAQ| 4| nic deposits boost > atop the quickly final requests? quickly regula >| > | t1| 12 | JAPAN | 2| ously. final, > express gifts cajole a
[jira] [Commented] (DRILL-6865) Query returns wrong result when filter pruning happens
[ https://issues.apache.org/jira/browse/DRILL-6865?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16697270#comment-16697270 ] ASF GitHub Bot commented on DRILL-6865: --- arina-ielchiieva commented on a change in pull request #1552: DRILL-6865: Query returns wrong result when filter pruning happens URL: https://github.com/apache/drill/pull/1552#discussion_r235952813 ## File path: exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetPushDownFilter.java ## @@ -134,12 +132,29 @@ protected void doOnMatch(RelOptRuleCall call, FilterPrel filter, ProjectPrel pro // get a conjunctions of the filter condition. For each conjunction, if it refers to ITEM or FLATTEN expression // then we could not pushed down. Otherwise, it's qualified to be pushed down. -final List predList = RelOptUtil.conjunctions(condition); +final List predList = RelOptUtil.conjunctions(RexUtil.toCnf(filter.getCluster().getRexBuilder(), condition)); Review comment: Why this change is needed? This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org > Query returns wrong result when filter pruning happens > -- > > Key: DRILL-6865 > URL: https://issues.apache.org/jira/browse/DRILL-6865 > Project: Apache Drill > Issue Type: Bug > Components: Storage - Parquet >Affects Versions: 1.14.0 >Reporter: Volodymyr Vysotskyi >Assignee: Volodymyr Vysotskyi >Priority: Blocker > Fix For: 1.15.0 > > > In DRILL-5796 was implemented removing the filter from the plan when some (or > all) row groups of parquet table fully match the filter. > For the case when filter has some predicates which parquet filter predicate > does not support, they can be omitted for some cases from the resulting > filter predicate. When row groups fully match predicates which left in the > filter, the whole filter is removed from the plan and the wrong result is > returned. > Example of the query for reproducing this bug: > {code:sql} > create table dfs.tmp.`multi/t1` as select * from cp.`tpch/nation.parquet` > where n_nationkey > 5; > create table dfs.tmp.`multi/t2` as select * from cp.`tpch/nation.parquet` > where n_nationkey < 5; > select * from dfs.tmp.`multi` where n_nationkey > 5 and n_nationkey like > '%10%'; > {code} > returns > {noformat} > +---+--+-+--+-+ > | dir0 | n_nationkey | n_name | n_regionkey | > n_comment > | > +---+--+-+--+-+ > | t1| 6| FRANCE | 3| refully final > requests. regular, ironi > | > | t1| 7| GERMANY | 3| l platelets. > regular accounts x-ray: unusual, regular acco > | > | t1| 8| INDIA | 2| ss excuses cajole > slyly across the packages. deposits print aroun > | > | t1| 9| INDONESIA | 2| slyly express > asymptotes. regular deposits haggle slyly. carefully ironic hockey players > sleep blithely. carefull | > | t1| 10 | IRAN| 4| efully alongside of > the slyly final dependencies. > | > | t1| 11 | IRAQ| 4| nic deposits boost > atop the quickly final requests? quickly regula >| > | t1| 12 | JAPAN | 2| ously. final, > express gifts cajole a > | > | t1| 13 | JORDAN | 4| ic deposits are > blithely about the carefully regular pa > | > | t1| 14 | KENYA | 0| pending excuses > haggle furiously deposits. pending, express pinto beans wake fluffily past t > | > | t1| 15
[jira] [Commented] (DRILL-6865) Query returns wrong result when filter pruning happens
[ https://issues.apache.org/jira/browse/DRILL-6865?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16697088#comment-16697088 ] ASF GitHub Bot commented on DRILL-6865: --- vvysotskyi opened a new pull request #1552: DRILL-6865: Query returns wrong result when filter pruning happens URL: https://github.com/apache/drill/pull/1552 This PR contains two commits: - The first commit contains changes to preserve predicates from filter condition which weren't used for filter pushdown to avoid the case when the filter is pruned. Instead of pruning whole the filter, only predicates which were used in the row group filtering are removed. Please note, that this problem happened only for the case when row group fully matches to the predicates which are used in the filter pushdown. - The second commit contains changes to remove filter from the plan when parquet table has a single row group and fully matches the filter. For problem descriptions please see [DRILL-6865](https://issues.apache.org/jira/browse/DRILL-6865). This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org > Query returns wrong result when filter pruning happens > -- > > Key: DRILL-6865 > URL: https://issues.apache.org/jira/browse/DRILL-6865 > Project: Apache Drill > Issue Type: Bug > Components: Storage - Parquet >Affects Versions: 1.14.0 >Reporter: Volodymyr Vysotskyi >Assignee: Volodymyr Vysotskyi >Priority: Blocker > Fix For: 1.15.0 > > > In DRILL-5796 was implemented removing the filter from the plan when some (or > all) row groups of parquet table fully match the filter. > For the case when filter has some predicates which parquet filter predicate > does not support, they can be omitted for some cases from the resulting > filter predicate. When row groups fully match predicates which left in the > filter, the whole filter is removed from the plan and the wrong result is > returned. > Example of the query for reproducing this bug: > {code:sql} > create table dfs.tmp.`multi/t1` as select * from cp.`tpch/nation.parquet` > where n_nationkey > 5; > create table dfs.tmp.`multi/t2` as select * from cp.`tpch/nation.parquet` > where n_nationkey < 5; > select * from dfs.tmp.`multi` where n_nationkey > 5 and n_nationkey like > '%10%'; > {code} > returns > {noformat} > +---+--+-+--+-+ > | dir0 | n_nationkey | n_name | n_regionkey | > n_comment > | > +---+--+-+--+-+ > | t1| 6| FRANCE | 3| refully final > requests. regular, ironi > | > | t1| 7| GERMANY | 3| l platelets. > regular accounts x-ray: unusual, regular acco > | > | t1| 8| INDIA | 2| ss excuses cajole > slyly across the packages. deposits print aroun > | > | t1| 9| INDONESIA | 2| slyly express > asymptotes. regular deposits haggle slyly. carefully ironic hockey players > sleep blithely. carefull | > | t1| 10 | IRAN| 4| efully alongside of > the slyly final dependencies. > | > | t1| 11 | IRAQ| 4| nic deposits boost > atop the quickly final requests? quickly regula >| > | t1| 12 | JAPAN | 2| ously. final, > express gifts cajole a > | > | t1| 13 | JORDAN | 4| ic deposits are > blithely about the carefully regular pa > | > | t1| 14 | KENYA | 0| pending excuses > haggle furiously deposits. pending, express pinto beans wake fluffily past t > | > | t1|
[jira] [Commented] (DRILL-6865) Query returns wrong result when filter pruning happens
[ https://issues.apache.org/jira/browse/DRILL-6865?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16697079#comment-16697079 ] Anton Gozhiy commented on DRILL-6865: - The issue is also reproduced with the following case: {code:sql} select * from dfs.tmp.`multy` where n_nationkey > 5 and n_nationkey/2 < 5 {code} > Query returns wrong result when filter pruning happens > -- > > Key: DRILL-6865 > URL: https://issues.apache.org/jira/browse/DRILL-6865 > Project: Apache Drill > Issue Type: Bug > Components: Storage - Parquet >Affects Versions: 1.14.0 >Reporter: Volodymyr Vysotskyi >Assignee: Volodymyr Vysotskyi >Priority: Blocker > Fix For: 1.15.0 > > > In DRILL-5796 was implemented removing the filter from the plan when some (or > all) row groups of parquet table fully match the filter. > For the case when filter has some predicates which parquet filter predicate > does not support, they can be omitted for some cases from the resulting > filter predicate. When row groups fully match predicates which left in the > filter, the whole filter is removed from the plan and the wrong result is > returned. > Example of the query for reproducing this bug: > {code:sql} > create table dfs.tmp.`multi/t1` as select * from cp.`tpch/nation.parquet` > where n_nationkey > 5; > create table dfs.tmp.`multi/t2` as select * from cp.`tpch/nation.parquet` > where n_nationkey < 5; > select * from dfs.tmp.`multi` where n_nationkey > 5 and n_nationkey like > '%10%'; > {code} > returns > {noformat} > +---+--+-+--+-+ > | dir0 | n_nationkey | n_name | n_regionkey | > n_comment > | > +---+--+-+--+-+ > | t1| 6| FRANCE | 3| refully final > requests. regular, ironi > | > | t1| 7| GERMANY | 3| l platelets. > regular accounts x-ray: unusual, regular acco > | > | t1| 8| INDIA | 2| ss excuses cajole > slyly across the packages. deposits print aroun > | > | t1| 9| INDONESIA | 2| slyly express > asymptotes. regular deposits haggle slyly. carefully ironic hockey players > sleep blithely. carefull | > | t1| 10 | IRAN| 4| efully alongside of > the slyly final dependencies. > | > | t1| 11 | IRAQ| 4| nic deposits boost > atop the quickly final requests? quickly regula >| > | t1| 12 | JAPAN | 2| ously. final, > express gifts cajole a > | > | t1| 13 | JORDAN | 4| ic deposits are > blithely about the carefully regular pa > | > | t1| 14 | KENYA | 0| pending excuses > haggle furiously deposits. pending, express pinto beans wake fluffily past t > | > | t1| 15 | MOROCCO | 0| rns. blithely bold > courts among the closely regular packages use furiously bold platelets? >| > | t1| 16 | MOZAMBIQUE | 0| s. ironic, unusual > asymptotes wake blithely r >| > | t1| 17 | PERU| 1| platelets. blithely > pending dependencies use fluffily across the even pinto beans. carefully > silent accoun | > | t1| 18 | CHINA | 2| c dependencies. > furiously express notornis sleep slyly regular accounts. ideas sleep. depos > | > | t1| 19 | ROMANIA | 3| ular asymptotes are > about the furious multipliers. express dependencies nag above the ironically > ironic account | > | t1| 20 | SAUDI ARABIA| 4| ts. silent requests > haggle. closely express packages sleep across the blithely