This is an automated email from the ASF dual-hosted git repository.

gparai pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/drill.git


The following commit(s) were added to refs/heads/master by this push:
     new 0fa2967  DRILL-7245: Cap NDV at row count after applying filters
0fa2967 is described below

commit 0fa29677aea131727459c23602080fdf307c9ae1
Author: Gautam Parai <[email protected]>
AuthorDate: Tue May 7 17:44:44 2019 -0700

    DRILL-7245: Cap NDV at row count after applying filters
    
    closes #1786
---
 .../drill/exec/planner/cost/DrillRelMdDistinctRowCount.java   | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/cost/DrillRelMdDistinctRowCount.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/cost/DrillRelMdDistinctRowCount.java
index ae62449..d7f701e 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/cost/DrillRelMdDistinctRowCount.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/cost/DrillRelMdDistinctRowCount.java
@@ -136,7 +136,7 @@ public class DrillRelMdDistinctRowCount extends 
RelMdDistinctRowCount{
    */
   private Double getDistinctRowCountInternal(TableScan scan, RelMetadataQuery 
mq, DrillTable table,
       ImmutableBitSet groupKey, RelDataType type, RexNode predicate) {
-    double selectivity, rowCount;
+    double selectivity, gbyColPredSel, rowCount;
     /* If predicate is present, determine its selectivity to estimate filtered 
rows.
      * Thereafter, compute the number of distinct rows.
      */
@@ -172,16 +172,17 @@ public class DrillRelMdDistinctRowCount extends 
RelMdDistinctRowCount{
         break;
       }
       estRowCnt *= ndv;
-      selectivity = getPredSelectivityContainingInputRef(predicate, i, mq, 
scan);
+      gbyColPredSel = getPredSelectivityContainingInputRef(predicate, i, mq, 
scan);
       /* If predicate is on group-by column, scale down the NDV by 
selectivity. Consider the query
        * select a, b from t where a = 10 group by a, b. Here, NDV(a) will be 
scaled down by SEL(a)
        * whereas NDV(b) will not.
        */
-      if (selectivity > 0) {
-        estRowCnt *= selectivity;
+      if (gbyColPredSel > 0) {
+        estRowCnt *= gbyColPredSel;
       }
     }
-    estRowCnt = Math.min(estRowCnt, rowCount);
+    // Estimated NDV should not exceed number of rows after applying the 
filters
+    estRowCnt = Math.min(estRowCnt, selectivity*rowCount);
     if (!allColsHaveNDV) {
       if (logger.isDebugEnabled()) {
         logger.debug(String.format("NDV not available for %s(%s). Using 
default rowcount for group-by %s",

Reply via email to