Repository: incubator-impala
Updated Branches:
  refs/heads/master 3480c892c -> d06225746


IMPALA-5494: Fixes the selectivity of NOT IN predicates

This change modifies the logic of NOT IN predicate so that
the planner can calculate the correct node cardinality. Prior
to this change, both IN and NOT IN predicates shared the
same selectivity, which resulted in the same cardinality
during planning.

The selectivity is calculated by the following heuristic:

selectivity = 1 - (num of predicate children /
                num of distinct values)

Change-Id: I69e6217257b5618cb63e13b32ba3347fa0483b63
Reviewed-on: http://gerrit.cloudera.org:8080/7168
Reviewed-by: Alex Behm <[email protected]>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/d0622574
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/d0622574
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/d0622574

Branch: refs/heads/master
Commit: d062257462fcbdc399c28d724b2ad5b4cad0f216
Parents: 3480c89
Author: Vincent Tran <[email protected]>
Authored: Tue Jun 13 03:07:04 2017 -0400
Committer: Impala Public Jenkins <[email protected]>
Committed: Fri Jun 16 22:18:07 2017 +0000

----------------------------------------------------------------------
 .../org/apache/impala/analysis/InPredicate.java | 12 ++--
 .../impala/analysis/ExprSelectivityTest.java    | 63 ++++++++++++++++++++
 .../queries/PlannerTest/kudu-selectivity.test   |  4 +-
 3 files changed, 73 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/d0622574/fe/src/main/java/org/apache/impala/analysis/InPredicate.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/analysis/InPredicate.java 
b/fe/src/main/java/org/apache/impala/analysis/InPredicate.java
index d95fb3a..de12167 100644
--- a/fe/src/main/java/org/apache/impala/analysis/InPredicate.java
+++ b/fe/src/main/java/org/apache/impala/analysis/InPredicate.java
@@ -173,11 +173,15 @@ public class InPredicate extends Predicate {
     // TODO: Fix selectivity_ for nested predicate
     Reference<SlotRef> slotRefRef = new Reference<SlotRef>();
     Reference<Integer> idxRef = new Reference<Integer>();
-    if (isSingleColumnPredicate(slotRefRef, idxRef)
-        && idxRef.getRef() == 0
+    if (isSingleColumnPredicate(slotRefRef, idxRef) && idxRef.getRef() == 0
         && slotRefRef.getRef().getNumDistinctValues() > 0) {
-      selectivity_ = (double) (getChildren().size() - 1)
-          / (double) slotRefRef.getRef().getNumDistinctValues();
+      if (isNotIn()) {
+        selectivity_ = 1.0 - ((double) (getChildren().size() - 1)
+            / (double) slotRefRef.getRef().getNumDistinctValues());
+      } else {
+        selectivity_ = (double) (getChildren().size() - 1)
+            / (double) slotRefRef.getRef().getNumDistinctValues();
+      }
       selectivity_ = Math.max(0.0, Math.min(1.0, selectivity_));
     }
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/d0622574/fe/src/test/java/org/apache/impala/analysis/ExprSelectivityTest.java
----------------------------------------------------------------------
diff --git 
a/fe/src/test/java/org/apache/impala/analysis/ExprSelectivityTest.java 
b/fe/src/test/java/org/apache/impala/analysis/ExprSelectivityTest.java
new file mode 100644
index 0000000..de46a07
--- /dev/null
+++ b/fe/src/test/java/org/apache/impala/analysis/ExprSelectivityTest.java
@@ -0,0 +1,63 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.impala.analysis;
+
+import org.apache.impala.common.AnalysisException;
+import org.apache.impala.common.FrontendTestBase;
+import org.junit.Assert;
+import org.junit.Test;
+
+/**
+ * Tests selectivity estimates for Exprs
+ */
+public class ExprSelectivityTest extends FrontendTestBase {
+
+  public void verifySelectivityStmt(String stmtStr, double expectedSel)
+      throws AnalysisException {
+    SelectStmt stmt = (SelectStmt) AnalyzesOk(stmtStr);
+    Expr selectClause = stmt.getSelectList().getItems().get(0).getExpr();
+    double calculatedSel = selectClause.getSelectivity();
+    assertEquals(calculatedSel, expectedSel);
+  }
+
+  public void verifySel(String predicate, double expectedSel)
+      throws AnalysisException {
+    String stmtStr = "select " + predicate + " from functional.alltypes";
+    verifySelectivityStmt(stmtStr, expectedSel);
+  }
+
+  /**
+   * Helper for prettier error messages than what JUnit.Assert provides.
+   */
+  private void assertEquals(double actual, double expected) {
+    // Direct comparison of a calculated double and a hard-coded double proves
+    // to be difficult. So we are setting the delta to essentially ignores
+    // precision beyond 6.
+    if (Math.abs(actual - expected) > 0.000001) {
+      Assert.fail(
+          String.format("\nActual: %.7f\nExpected: %.7f\n", actual, expected));
+    }
+  }
+
+  @Test
+  public void TestBasicPredicateSel() throws AnalysisException {
+    // Testing selectivity of IN an NOT IN predicates
+    verifySel("id in (1,3,5,7)", 0.000548);
+    verifySel("id not in (1,3,9)", 0.999589);
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/d0622574/testdata/workloads/functional-planner/queries/PlannerTest/kudu-selectivity.test
----------------------------------------------------------------------
diff --git 
a/testdata/workloads/functional-planner/queries/PlannerTest/kudu-selectivity.test
 
b/testdata/workloads/functional-planner/queries/PlannerTest/kudu-selectivity.test
index 2ab2ba9..3dd5168 100644
--- 
a/testdata/workloads/functional-planner/queries/PlannerTest/kudu-selectivity.test
+++ 
b/testdata/workloads/functional-planner/queries/PlannerTest/kudu-selectivity.test
@@ -138,10 +138,10 @@ F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
   |  mem-estimate=0B mem-reservation=0B
   |
   00:SCAN KUDU [functional_kudu.alltypes]
-     predicates: id IN (int_col), string_col NOT IN ('bar'), bigint_col IN 
(9999999999999999999), double_col IN (CAST('inf' AS DOUBLE)), float_col IN 
(CAST('NaN' AS FLOAT)), int_col IN (9999999999), smallint_col IN (99999, 2), 
tinyint_col IN (1, 999), bool_col IN (1)
+     predicates: id IN (int_col), bigint_col IN (9999999999999999999), 
double_col IN (CAST('inf' AS DOUBLE)), float_col IN (CAST('NaN' AS FLOAT)), 
int_col IN (9999999999), smallint_col IN (99999, 2), tinyint_col IN (1, 999), 
bool_col IN (1), string_col NOT IN ('bar')
      kudu predicates: double_col IN (0.0), float_col IN (0.0), bigint_col IN 
(1, 2), int_col IN (1, 2), smallint_col IN (0, 2), string_col IN ('foo', 'foo   
    '), tinyint_col IN (1, 2), bool_col IN (TRUE)
      mem-estimate=0B mem-reservation=0B
-     tuple-ids=0 row-size=126B cardinality=4
+     tuple-ids=0 row-size=126B cardinality=5
 ====
 select * from functional_kudu.alltypes where
 tinyint_col is not null and

Reply via email to