This is an automated email from the ASF dual-hosted git repository.
gparai pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/drill.git
The following commit(s) were added to refs/heads/master by this push:
new 0fa2967 DRILL-7245: Cap NDV at row count after applying filters
0fa2967 is described below
commit 0fa29677aea131727459c23602080fdf307c9ae1
Author: Gautam Parai <[email protected]>
AuthorDate: Tue May 7 17:44:44 2019 -0700
DRILL-7245: Cap NDV at row count after applying filters
closes #1786
---
.../drill/exec/planner/cost/DrillRelMdDistinctRowCount.java | 11 ++++++-----
1 file changed, 6 insertions(+), 5 deletions(-)
diff --git
a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/cost/DrillRelMdDistinctRowCount.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/cost/DrillRelMdDistinctRowCount.java
index ae62449..d7f701e 100644
---
a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/cost/DrillRelMdDistinctRowCount.java
+++
b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/cost/DrillRelMdDistinctRowCount.java
@@ -136,7 +136,7 @@ public class DrillRelMdDistinctRowCount extends
RelMdDistinctRowCount{
*/
private Double getDistinctRowCountInternal(TableScan scan, RelMetadataQuery
mq, DrillTable table,
ImmutableBitSet groupKey, RelDataType type, RexNode predicate) {
- double selectivity, rowCount;
+ double selectivity, gbyColPredSel, rowCount;
/* If predicate is present, determine its selectivity to estimate filtered
rows.
* Thereafter, compute the number of distinct rows.
*/
@@ -172,16 +172,17 @@ public class DrillRelMdDistinctRowCount extends
RelMdDistinctRowCount{
break;
}
estRowCnt *= ndv;
- selectivity = getPredSelectivityContainingInputRef(predicate, i, mq,
scan);
+ gbyColPredSel = getPredSelectivityContainingInputRef(predicate, i, mq,
scan);
/* If predicate is on group-by column, scale down the NDV by
selectivity. Consider the query
* select a, b from t where a = 10 group by a, b. Here, NDV(a) will be
scaled down by SEL(a)
* whereas NDV(b) will not.
*/
- if (selectivity > 0) {
- estRowCnt *= selectivity;
+ if (gbyColPredSel > 0) {
+ estRowCnt *= gbyColPredSel;
}
}
- estRowCnt = Math.min(estRowCnt, rowCount);
+ // Estimated NDV should not exceed number of rows after applying the
filters
+ estRowCnt = Math.min(estRowCnt, selectivity*rowCount);
if (!allColsHaveNDV) {
if (logger.isDebugEnabled()) {
logger.debug(String.format("NDV not available for %s(%s). Using
default rowcount for group-by %s",