[impala] 04/09: IMPALA-8095: Detailed expression cardinality tests

tarmstrong Sun, 10 Feb 2019 12:26:10 -0800

This is an automated email from the ASF dual-hosted git repository.

tarmstrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


commit 4ce689e58a000e78f641ecf544ca7f8c23345abb
Author: paul-rogers <prog...@cloudera.com>
AuthorDate: Sun Jan 20 11:48:30 2019 -0800

    IMPALA-8095: Detailed expression cardinality tests
    
    Cardinality is a critical input to the query planning process,
    especially join planning. Impala has many high-level end-to-end tests
    that implicitly test cardinality at the "wholesale" level: A test will
    produce a wrong result if the cardinality is badly wrong.
    
    This patch adds detailed unit tests for cardinality:
    
    * Table cardinality, NDV values and null count in metadata retrieved from
      HMS.
    * Table cardinality, NDV values and null counts in metadata presented to
      the query.
    * Expression NDV and selectivity values (which derive from table
      cardinality and column NDV.)
    
    The test illustrate a number of bugs. This patch simply identifies the
    bugs, comments out the tests that fail because of the bugs, and
    substitutes tests that pass with the current, incorrect, behavior.
    Future patches will fix the bugs. Reviewers can note the difference
    between the original, incorrect behavior shown here, and the revised
    behavior in those additional patches.
    
    Since none of the existing "functional" tables provide the level of
    detail needed for these tests, added a new test table specifically for
    this task.
    
    This set of tests was a good time to extend the test "fixture" framework
    created earlier. The FrontendTestBase class was refactored to use a new
    FrontendFixture which represents a (simulated) Impala and HMS cluster.
    The previous SessionFixture represents a single user session (with
    session options) and the QueryFixture represents a single query.
    
    As part of this refactoring, the fixture classes moved into "common"
    alongside FrontendTestBase.
    
    Testing: This patch includes only tests: no "production" code was
    changed.
    
    Change-Id: I3da58ee9b0beebeffb170b9430bd36d20dcd2401
    Reviewed-on: http://gerrit.cloudera.org:8080/12248
    Reviewed-by: Impala Public Jenkins <impala-public-jenk...@cloudera.com>
    Tested-by: Impala Public Jenkins <impala-public-jenk...@cloudera.com>
---
 .../org/apache/impala/analysis/SelectListItem.java |   6 +-
 .../org/apache/impala/analysis/SlotDescriptor.java |   3 +
 .../impala/analysis/AnalyzeAuthStmtsTest.java      |  26 +-
 .../impala/analysis/ExprCardinalityTest.java       | 596 +++++++++++++++++++++
 .../org/apache/impala/analysis/ExprNdvTest.java    | 101 +++-
 .../impala/analysis/ExprRewriteRulesTest.java      |  24 +-
 .../apache/impala/common/AbstractFrontendTest.java |  58 ++
 .../impala/common/AnalysisSessionFixture.java      |  89 +++
 ...{FrontendTestBase.java => FrontendFixture.java} | 354 +++++-------
 .../org/apache/impala/common/FrontendTestBase.java | 207 +------
 .../QueryFixture.java}                             | 181 ++-----
 .../org/apache/impala/planner/CardinalityTest.java |  87 +++
 testdata/NullRows/data.csv                         |  26 +
 testdata/bin/compute-table-stats.sh                |   2 +-
 .../functional/functional_schema_template.sql      |  24 +
 .../datasets/functional/schema_constraints.csv     |   1 +
 16 files changed, 1250 insertions(+), 535 deletions(-)

diff --git a/fe/src/main/java/org/apache/impala/analysis/SelectListItem.java 
b/fe/src/main/java/org/apache/impala/analysis/SelectListItem.java
index dd3e414..4849543 100644
--- a/fe/src/main/java/org/apache/impala/analysis/SelectListItem.java
+++ b/fe/src/main/java/org/apache/impala/analysis/SelectListItem.java
@@ -17,14 +17,14 @@
 
 package org.apache.impala.analysis;
 
+import static org.apache.impala.analysis.ToSqlOptions.DEFAULT;
+
 import java.util.List;
 
 import com.google.common.base.Joiner;
 import com.google.common.base.Preconditions;
 
-import static org.apache.impala.analysis.ToSqlOptions.DEFAULT;
-
-class SelectListItem {
+public class SelectListItem {
   private Expr expr_;
   private String alias_;
 
diff --git a/fe/src/main/java/org/apache/impala/analysis/SlotDescriptor.java 
b/fe/src/main/java/org/apache/impala/analysis/SlotDescriptor.java
index f203d70..5be2303 100644
--- a/fe/src/main/java/org/apache/impala/analysis/SlotDescriptor.java
+++ b/fe/src/main/java/org/apache/impala/analysis/SlotDescriptor.java
@@ -307,4 +307,7 @@ public class SlotDescriptor {
         .add("stats", stats_)
         .toString();
   }
+
+  @Override
+  public String toString() { return debugString(); }
 }
diff --git 
a/fe/src/test/java/org/apache/impala/analysis/AnalyzeAuthStmtsTest.java 
b/fe/src/test/java/org/apache/impala/analysis/AnalyzeAuthStmtsTest.java
index 18c663d..be47a08 100644
--- a/fe/src/test/java/org/apache/impala/analysis/AnalyzeAuthStmtsTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/AnalyzeAuthStmtsTest.java
@@ -24,12 +24,17 @@ import org.apache.impala.catalog.Catalog;
 import org.apache.impala.catalog.Role;
 import org.apache.impala.catalog.User;
 import org.apache.impala.common.AnalysisException;
+import org.apache.impala.common.FrontendTestBase;
 import org.apache.impala.testutil.TestUtils;
 import org.apache.impala.thrift.TQueryCtx;
 import org.apache.impala.util.EventSequence;
 import org.junit.Test;
 
-public class AnalyzeAuthStmtsTest extends AnalyzerTest {
+public class AnalyzeAuthStmtsTest extends FrontendTestBase {
+
+  // TODO: Change this to a @BeforeClass method. Then, clean up these
+  // items in @AfterClass, else we've made a global change that may affect
+  // other tests in random ways.
   public AnalyzeAuthStmtsTest() {
     catalog_.getAuthPolicy().addPrincipal(
         new Role("myRole", new HashSet<>()));
@@ -37,6 +42,25 @@ public class AnalyzeAuthStmtsTest extends AnalyzerTest {
         new User("myUser", new HashSet<>()));
   }
 
+  // TODO: Switch to use a fixture with custom settings rather than the
+  // current patchwork of base and derived class methods.
+  /**
+   * Analyze 'stmt', expecting it to pass. Asserts in case of analysis error.
+   */
+  @Override
+  public ParseNode AnalyzesOk(String stmt) {
+    return AnalyzesOk(stmt, createAnalysisCtx(Catalog.DEFAULT_DB), null);
+  }
+
+  /**
+   * Asserts if stmt passes analysis or the error string doesn't match and it
+   * is non-null.
+   */
+  @Override
+  public void AnalysisError(String stmt, String expectedErrorString) {
+    AnalysisError(stmt, createAnalysisCtx(Catalog.DEFAULT_DB), 
expectedErrorString);
+  }
+
   @Override
   protected AnalysisContext createAnalysisCtx(String defaultDb) {
     TQueryCtx queryCtx = TestUtils.createQueryContext(
diff --git 
a/fe/src/test/java/org/apache/impala/analysis/ExprCardinalityTest.java 
b/fe/src/test/java/org/apache/impala/analysis/ExprCardinalityTest.java
new file mode 100644
index 0000000..b30db8f
--- /dev/null
+++ b/fe/src/test/java/org/apache/impala/analysis/ExprCardinalityTest.java
@@ -0,0 +1,596 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.impala.analysis;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+
+import java.util.Set;
+
+import org.apache.curator.shaded.com.google.common.collect.Sets;
+import org.apache.impala.catalog.Catalog;
+import org.apache.impala.catalog.Column;
+import org.apache.impala.catalog.ColumnStats;
+import org.apache.impala.catalog.DatabaseNotFoundException;
+import org.apache.impala.catalog.Db;
+import org.apache.impala.catalog.Table;
+import org.apache.impala.common.AnalysisSessionFixture;
+import org.apache.impala.common.ImpalaException;
+import org.apache.impala.common.InternalException;
+import org.apache.impala.common.QueryFixture.SelectFixture;
+import org.apache.impala.planner.CardinalityTest;
+import org.junit.Test;
+
+/**
+ * Tests expression cardinality and selectivity, both of which are
+ * important inputs to scan and join cardinality estimates.
+ *
+ * In the comments below, the notation |x| means the cardinality of
+ * x. If x is a table, then it is the row count of x. If x is a column,
+ * then it is the number of distinct values (the cardinality of the
+ * domain of the column), also known as NDV.
+ *
+ * This test focuses on cardinality and the selectivity that determines
+ * derived cardinality. If |T| is the cardinality of table T, then
+ * |T'| is defined as the cardinality of table T after applying a selection s.
+ * Selectivity is defined as:
+ *
+ * sel(s) = |T'|/|T|
+ *
+ * Or
+ *
+ * |T'| = |T| * sel(s)
+ *
+ * Though not used here, it can be helpful to think of the selectivity as
+ * the probability p that some row r appears in the output after selection:
+ *
+ * sel(s) = p(r in |T'|)
+ *
+ * Tests here focus on the entire cardinality and NDV lifecycle up to an
+ * expression, ensuring that we produce proper overall estimates. See also:
+ *
+ * * {@link ExprNdvTest} which focuses on the actual NDV calculation
+ *   method,
+ * * {@link CardinalityTest} which examines cardinality output from the
+ *   planner.
+ *
+ * The tests here illustrate a number of known bugs, typically marked by
+ * their ticket number. IMPALA-7601 is a roll-up for the general case that
+ * Impala does not estimate selectivity except in the narrow (col = const)
+ * case.
+ */
+public class ExprCardinalityTest {
+  private static AnalysisSessionFixture session_ = new 
AnalysisSessionFixture();
+
+  private void verifyTableCol(Table table, String colName,
+      long expectedNdv, long expectedNullCount) {
+    Column col = table.getColumn(colName);
+    assertNotNull(col);
+    ColumnStats stats = col.getStats();
+    assertNotNull(stats);
+    assertEquals(expectedNdv, stats.getNumDistinctValues());
+    assertEquals(expectedNullCount, stats.getNumNulls());
+  }
+
+  /**
+   * Baseline test of metadata cardinality, NDVs and null count.
+   * Locks down the values used in later tests to catch external changes
+   * easily.
+   *
+   * Cases:
+   * - With stats
+   *   - Columns without nulls
+   *   - Columns with nulls
+   * - Without stats, estimated from file size and schema
+   *
+   * (The last bit is not yet available.)
+   */
+
+  @Test
+  public void testMetadata() throws DatabaseNotFoundException, 
InternalException {
+    Catalog catalog = session_.catalog();
+    Db db = catalog.getDb("functional");
+    StmtMetadataLoader mdLoader =
+        new StmtMetadataLoader(session_.frontend(), "functional", null);
+    Set<TableName> tables = Sets.newHashSet(
+        new TableName("functional", "alltypes"),
+        new TableName("functional", "nullrows"),
+        new TableName("functional", "manynulls"));
+    mdLoader.loadTables(tables);
+
+    // Table with stats, no nulls
+    Table allTypes = db.getTable("alltypes");
+    assertEquals(7300, allTypes.getTTableStats().getNum_rows());
+    verifyTableCol(allTypes, "id", 7300, 0);
+    verifyTableCol(allTypes, "bool_col", 2, 0);
+    verifyTableCol(allTypes, "int_col", 10, 0);
+    // Bug: NDV of partition columns is -1 though it is listed as
+    // 2 in the shell with: SHOW COLUMN STATS alltypes
+    //verifyTableCol(allTypes, "year", 2, 0);
+    // Bug: When tests are run in Eclipse we get the result above.
+    // But, when the same test is run using maven from the command line,
+    // we get the result shown below.
+    // Unit test in Eclipse see the above, unit tests run from the
+    // Disabling both to avoid a flaky test,
+    // Same issue for the next three tests.
+    //verifyTableCol(allTypes, "year", -1, -1);
+    //verifyTableCol(allTypes, "month", 12, 0);
+    //verifyTableCol(allTypes, "month", -1, -1);
+
+    // Table with stats and nulls
+    Table nullrows = db.getTable("nullrows");
+    assertEquals(26, nullrows.getTTableStats().getNum_rows());
+    verifyTableCol(nullrows, "id", 26, 0);
+    // Bug: NDV should be 1 to include nulls
+    verifyTableCol(nullrows, "null_str", 0, 26);
+    verifyTableCol(nullrows, "group_str", 6, 0);
+    verifyTableCol(nullrows, "some_nulls", 6, 20);
+    // Oddly, boolean columns DO include nulls in NDV.
+    verifyTableCol(nullrows, "bool_nulls", 3, 15);
+
+    // Table without stats
+    Table manynulls = db.getTable("manynulls");
+    // Bug: Table cardinality should be guessed from schema & file size.
+    assertEquals(-1, manynulls.getTTableStats().getNum_rows());
+    verifyTableCol(manynulls, "id", -1, -1);
+  }
+
+  public void verifySelectCol(String table, String col,
+      long expectedNdv, long expectedNullCount) throws ImpalaException {
+    SelectFixture select = new SelectFixture(session_)
+        .table("functional." + table)
+        .exprSql(col);
+    Expr expr = select.analyzeExpr();
+    SlotRef colRef = (SlotRef) expr;
+    assertEquals(expectedNdv, expr.getNumDistinctValues());
+    assertEquals(expectedNullCount, colRef.getDesc().getStats().getNumNulls());
+    // Columns don't have selectivity, only expressions on columns
+    assertEquals(-1, expr.getSelectivity(), 0.001);
+  }
+
+  /**
+   * Test cardinality of the column references within an AST.
+   * Ensures that the metadata cardinality was propagated into the
+   * AST, along with possible adjustments.
+   *
+   * Cases:
+   * - With stats
+   *   - Normal NDV
+   *   - Small NDV
+   *   - Small NDV with nulls
+   *   - NDV with all nulls
+   *   - Constants
+   * - Without stats
+   * @throws ImpalaException
+   */
+  @Test
+  public void testColumnCardinality() throws ImpalaException {
+    // Stats, no null values
+    verifySelectCol("alltypes", "id", 7300, 0);
+    verifySelectCol("alltypes", "bool_col", 2, 0);
+    verifySelectCol("alltypes", "int_col", 10, 0);
+    // Bug: Stats not available for partition columns
+    //verifySelectExpr("alltypes", "year", 2, 0);
+    // Bug: Unit test in Eclipse see the above, unit tests run from the
+    // command line see the below. Disabling to avoid a flaky test,
+    // here and below.
+    //verifySelectExpr("alltypes", "year", -1, -1);
+    //verifySelectExpr("alltypes", "month", 12, 0);
+    //verifySelectExpr("alltypes", "month", -1, -1);
+
+    // Stats, with null values
+    verifySelectCol("nullrows", "id", 26, 0);
+    // Bug: NDV should be 1 to include nulls
+    verifySelectCol("nullrows", "null_str", 0, 26);
+    verifySelectCol("nullrows", "group_str", 6, 0);
+    verifySelectCol("nullrows", "some_nulls", 6, 20);
+    // Oddly, boolean columns DO include nulls in NDV.
+    verifySelectCol("nullrows", "bool_nulls", 3, 15);
+
+    // No stats
+    verifySelectCol("manynulls", "id", -1, -1);
+  }
+
+  public void verifySelectExpr(String table, String exprSql,
+      long expectedNdv, double expectedSel) throws ImpalaException {
+    SelectFixture select = new SelectFixture(session_)
+        .table("functional." + table)
+        .exprSql(exprSql);
+    Expr expr = select.analyzeExpr();
+    assertEquals(expectedNdv, expr.getNumDistinctValues());
+    assertEquals(expectedSel, expr.getSelectivity(), 0.00001);
+  }
+
+  /**
+   * Constants have an NDV of 1, selectivity of -1.
+   */
+  @Test
+  public void testConstants() throws ImpalaException {
+    verifySelectExpr("alltypes", "10", 1, -1);
+    verifySelectExpr("allTypes", "'foo'", 1, -1);
+    // Note that the constant NULL has an NDV = 1, but
+    // Null-only columns have an NDV=0...
+    // See IMPALA-8058
+    verifySelectExpr("alltypes", "NULL", 1, -1);
+    verifySelectExpr("alltypes", "true", 1, -1);
+  }
+
+  // Expression selectivity
+  // - Test for each expression type
+  // - Test for variety of situations
+  //   - Valid/invalid table cardinality
+  //   - Valid/invalid NDV
+  //   - Valid/invalid null count
+
+  /**
+   * Test col = const
+   *
+   * selectivity = 1 / |col|
+   */
+  @Test
+  public void testEqSelectivity() throws ImpalaException {
+    verifySelectExpr("alltypes", "id = 10", 3, 1.0/7300);
+    verifySelectExpr("alltypes", "bool_col = true", 3, 1.0/2);
+    verifySelectExpr("alltypes", "int_col = 10", 3, 1.0/10);
+
+    verifySelectExpr("nullrows", "id = 'foo'", 3, 1.0/26);
+    // Bug: All nulls, so NDV should = 1, so Sel should be 1.0/1
+    //verifySelectExpr("nullrows", "c = 'foo'", 3, 1.0/1);
+    verifySelectExpr("nullrows", "null_str = 'foo'", 3, -1);
+    verifySelectExpr("nullrows", "group_str = 'foo'", 3, 1.0/6);
+    //verifySelectExpr("nullrows", "some_nulls = 'foo'", 3, 1.0/7);
+    verifySelectExpr("nullrows", "some_nulls = 'foo'", 3, 1.0/6);
+
+    // Bug: Sel should default to good old 0.1
+    verifySelectExpr("manynulls", "id = 10", 3, -1);
+  }
+
+  /**
+   * Test col IS NOT DISTINCT FROM x
+   *
+   * Sel should be same as = if x is non-null, otherwise
+   * same as IS NULL
+   */
+  @Test
+  public void testNotDistinctSelectivity() throws ImpalaException {
+    verifySelectExpr("alltypes", "id is not distinct from 10", 3, 1.0/7300);
+    // Bug: does not treat NULL specially
+    // Bug: NDV sould be 2 since IS NOT DISTINCT won't return NULL
+    //verifySelectExpr("alltypes", "id is not distinct from null", 2, 0);
+    verifySelectExpr("alltypes", "id is not distinct from null", 3, 1.0/7300);
+    verifySelectExpr("alltypes", "bool_col is not distinct from true", 3, 
1.0/2);
+    //verifySelectExpr("alltypes", "bool_col is not distinct from null", 2, 0);
+    verifySelectExpr("alltypes", "bool_col is not distinct from null", 3, 
1.0/2);
+    verifySelectExpr("alltypes", "int_col is not distinct from 10", 3, 1.0/10);
+    //verifySelectExpr("alltypes", "int_col is not distinct from null", 2, 0);
+    verifySelectExpr("alltypes", "int_col is not distinct from null", 3, 
1.0/10);
+
+    verifySelectExpr("nullrows", "id is not distinct from 'foo'", 3, 1.0/26);
+    //verifySelectExpr("nullrows", "id is not distinct from null", 2, 0);
+    verifySelectExpr("nullrows", "id is not distinct from null", 3, 1.0/26);
+    // Bug: All nulls, so NDV should = 1, so Sel should be 1.0/1
+    //verifySelectExpr("nullrows", "null_str is not distinct from 'foo'", 2, 
1.0/1);
+    verifySelectExpr("nullrows", "null_str is not distinct from 'foo'", 3, -1);
+    verifySelectExpr("nullrows", "null_str is not distinct from null", 3, -1);
+    verifySelectExpr("nullrows", "group_str is not distinct from 'foo'", 3, 
1.0/6);
+    //verifySelectExpr("nullrows", "group_str is not distinct from null", 2, 
1);
+    verifySelectExpr("nullrows", "group_str is not distinct from null", 3, 
1.0/6);
+    //verifySelectExpr("nullrows", "some_nulls is not distinct from 'foo'", 3, 
1.0/7);
+    verifySelectExpr("nullrows", "some_nulls is not distinct from 'foo'", 3, 
1.0/6);
+
+    // Bug: Sel should default to good old 0.1
+    verifySelectExpr("manynulls", "id is not distinct from 10", 3, -1);
+  }
+
+  /**
+   * Test col != const
+   */
+  @Test
+  public void testNeSelectivity() throws ImpalaException {
+    // Bug: No estimated selectivity for != (IMPALA-8039)
+    //verifySelectExpr("alltypes", "id != 10", 3, 1 - 1.0/7300);
+    verifySelectExpr("alltypes", "id != 10", 3, -1);
+    //verifySelectExpr("alltypes", "bool_col != true", 3, 1 - 1.0/2);
+    verifySelectExpr("alltypes", "bool_col != true", 3, -1);
+    //verifySelectExpr("alltypes", "int_col != 10", 3, 1 - 1.0/10);
+    verifySelectExpr("alltypes", "int_col != 10", 3, -1);
+
+    //verifySelectExpr("nullrows", "id != 'foo'", 3, 1 - 1.0/26);
+    verifySelectExpr("nullrows", "id != 'foo'", 3, -1);
+    // Bug: All nulls, so NDV should = 1, so Sel should be 1 - 1.0/1
+    //verifySelectExpr("nullrows", "null_str != 'foo'", 3, 1 - 1.0/1);
+    verifySelectExpr("nullrows", "null_str != 'foo'", 3, -1);
+    //verifySelectExpr("nullrows", "group_str != 'foo'", 3, 1 - 1.0/6);
+    verifySelectExpr("nullrows", "group_str != 'foo'", 3, -1);
+    //verifySelectExpr("nullrows", "some_nulls != 'foo'", 3, 1 - 1.0/7);
+    verifySelectExpr("nullrows", "some_nulls != 'foo'", 3, -1);
+
+    // Bug: Sel should default to 1 - good old 0.1
+    verifySelectExpr("manynulls", "id != 10", 3, -1);
+  }
+
+  /**
+   * Test col IS DISTINCT FROM x
+   *
+   * Sel should be 1 - Sel(col IS NOT DISTINCT FROM x)
+   */
+  @Test
+  public void testDistinctSelectivity() throws ImpalaException {
+    // BUG: IS DISTINCT has no selectivity
+    //verifySelectExpr("alltypes", "id is distinct from 10", 3, 1 - 1.0/7300);
+    verifySelectExpr("alltypes", "id is distinct from 10", 3, -1);
+    // Bug: does not treat NULL specially
+    // Bug: NDV sould be 2 since IS DISTINCT won't return NULL
+    //verifySelectExpr("alltypes", "id is distinct from null", 2, 1);
+    verifySelectExpr("alltypes", "id is distinct from null", 3, -1);
+    //verifySelectExpr("alltypes", "bool_col is distinct from true", 3, 1 - 
1.0/2);
+    verifySelectExpr("alltypes", "bool_col is distinct from true", 3, -1);
+    //verifySelectExpr("alltypes", "bool_col is distinct from null", 2, 1);
+    verifySelectExpr("alltypes", "bool_col is distinct from null", 3, -1);
+    //verifySelectExpr("alltypes", "int_col is distinct from 10", 3, 1 - 
1.0/10);
+    verifySelectExpr("alltypes", "int_col is distinct from 10", 3, -1);
+    //verifySelectExpr("alltypes", "int_col is distinct from null", 2, 1);
+    verifySelectExpr("alltypes", "int_col is distinct from null", 3, -1);
+
+    //verifySelectExpr("nullrows", "id is distinct from 'foo'", 3, 1 - 1.0/26);
+    verifySelectExpr("nullrows", "id is distinct from 'foo'", 3, -1);
+    //verifySelectExpr("nullrows", "id is distinct from null", 2, 1);
+    verifySelectExpr("nullrows", "id is distinct from null", 3, -1);
+    // Bug: All nulls, so NDV should = 1, so Sel should be 1.0/1
+    //verifySelectExpr("nullrows", "null_str is distinct from 'foo'", 2, 1 - 
1.0/1);
+    verifySelectExpr("nullrows", "null_str is distinct from 'foo'", 3, -1);
+    verifySelectExpr("nullrows", "null_str is distinct from null", 3, -1);
+    //verifySelectExpr("nullrows", "group_str is distinct from 'foo'", 3, 1 - 
1.0/6);
+    verifySelectExpr("nullrows", "group_str is distinct from 'foo'", 3, -1);
+    //verifySelectExpr("nullrows", "group_str is distinct from null", 2, 0);
+    verifySelectExpr("nullrows", "group_str is distinct from null", 3, -1);
+    //verifySelectExpr("nullrows", "some_nulls is not distinct from 'foo'", 3, 
1 - 1.0/7);
+    verifySelectExpr("nullrows", "some_nulls is not distinct from 'foo'", 3, 
1.0/6);
+
+    // Bug: Sel should default to 1 - good old 0.1
+    verifySelectExpr("manynulls", "id is distinct from 10", 3, -1);
+  }
+
+  public static final double INEQUALITY_SEL = 0.33;
+
+  private void verifyInequalitySel(String table, String col, String value)
+      throws ImpalaException {
+    for (String op : new String[] { "<", "<=", ">", ">="}) {
+      // Bug: No estimated selectivity for >, >=, <, <= (IMPALA-7603)
+      //verifySelectExpr(table, col + " " + op + " " + value, 3, 
INEQUALITY_SEL);
+      verifySelectExpr(table, col + " " + op + " " + value, 3, -1);
+    }
+  }
+
+  @Test
+  public void testInequalitySelectivity() throws ImpalaException {
+    verifyInequalitySel("alltypes", "id", "10");
+    verifyInequalitySel("alltypes", "int_col", "10");
+
+    verifyInequalitySel("nullrows", "id", "'foo'");
+    verifyInequalitySel("nullrows", "null_str", "'foo'");
+    verifyInequalitySel("nullrows", "group_str", "'foo'");
+    verifyInequalitySel("nullrows", "some_nulls", "'foo'");
+
+    // Bug: Sel should default to 1 - good old 0.1
+    verifyInequalitySel("manynulls", "id", "10");
+  }
+
+  /**
+   * Test col IS NULL
+   * Selectivity should be null_count / |table|
+   */
+  @Test
+  public void testIsNullSelectivity() throws ImpalaException {
+    // Bug: No estimated selectivity for IS NULL (IMPALA-8050)
+    // Should be null count / |table|
+    // Bug: NDV of IS NULL is 3, should be 2 since IS NULL will
+    // never itself return NULL
+    //verifySelectExpr("alltypes", "id is null", 2, 0);
+    verifySelectExpr("alltypes", "id is null", 3, -1);
+    //verifySelectExpr("alltypes", "bool_col is null", 2, 0);
+    verifySelectExpr("alltypes", "bool_col is null", 3, -1);
+    //verifySelectExpr("alltypes", "int_col is null", 2, 0);
+    verifySelectExpr("alltypes", "int_col is null", 3, -1);
+
+    //verifySelectExpr("nullrows", "id is null", 2, 0);
+    verifySelectExpr("nullrows", "id is null", 3, -1);
+     //verifySelectExpr("nullrows", "null_str is null", 2, 1);
+    verifySelectExpr("nullrows", "null_str is null", 3, 1);
+    //verifySelectExpr("nullrows", "group_str is null", 2, 0);
+    verifySelectExpr("nullrows", "group_str is null", 3, -1);
+    //verifySelectExpr("nullrows", "some_nulls is null", 2, 20.0/26);
+    verifySelectExpr("nullrows", "some_nulls is null", 3, 20.0/26);
+    verifySelectExpr("nullrows", "bool_nulls is not null", 3, 1 - 15.0/26);
+
+    // Bug: Sel should default to good old 0.1
+    verifySelectExpr("manynulls", "id is null", 3, -1);
+  }
+
+  /**
+   * Test col IS NOT NULL
+   * Selectivity should be 1 - null_count / |table|
+   */
+  @Test
+  public void testNotNullSelectivity() throws ImpalaException {
+    // Bug: No estimated selectivity for IS NOT NULL (IMPALA-8050)
+    // Should be 1 - null count / |table|
+    // Bug: NDV of IS NULL is 3, should be 2 since IS NOT NULL will
+    // never itself return NULL
+    //verifySelectExpr("alltypes", "id is not null", 2, 1);
+    verifySelectExpr("alltypes", "id is null", 3, -1);
+    //verifySelectExpr("alltypes", "bool_col is not null", 2, 1);
+    verifySelectExpr("alltypes", "bool_col is null", 3, -1);
+    //verifySelectExpr("alltypes", "int_col is not null", 2, 1);
+    verifySelectExpr("alltypes", "int_col is not null", 3, -1);
+
+    //verifySelectExpr("nullrows", "id is not null", 2, 1);
+    verifySelectExpr("nullrows", "id is not null", 3, -1);
+     //verifySelectExpr("nullrows", "null_str is not null", 2, 0);
+    verifySelectExpr("nullrows", "null_str is not null", 3, 0);
+    //verifySelectExpr("nullrows", "group_str is not null", 2, 1);
+    verifySelectExpr("nullrows", "group_str is not null", 3, -1);
+    //verifySelectExpr("nullrows", "some_nulls is not null", 2, 1 - 20.0/26);
+    verifySelectExpr("nullrows", "some_nulls is not null", 3, 1 - 20.0/26);
+    verifySelectExpr("nullrows", "bool_nulls is not null", 3, 1 - 15.0/26);
+
+    // Bug: Sel should default to good old 0.1
+    verifySelectExpr("manynulls", "id is not null", 3, -1);
+  }
+
+  /**
+   * Test col IN (a, b, c)
+   *
+   * The code should check only distinct values, so that
+   * |in| = NDV(in clause)
+   *
+   * Expected selectivity is |in| / |col|
+   *
+   * Where |col| = ndv(col)
+   *
+   * Estimate should be based on the "Containment" assumption: that the
+   * in-clause values are contained in the set of column values
+   */
+  @Test
+  public void testInSelectivity() throws ImpalaException {
+    verifySelectExpr("alltypes", "id in (1, 2, 3)", 3, 3.0/7300);
+    // Bug: Does not use NDV, just simple value count
+    //verifySelectExpr("alltypes", "id in (1, 2, 3, 2, 3, 1)", 3, 3.0/7300);
+    verifySelectExpr("alltypes", "id in (1, 2, 3, 2, 3, 1)", 3, 6.0/7300);
+    verifySelectExpr("alltypes", "bool_col in (true)", 3, 1.0/2);
+    verifySelectExpr("alltypes", "bool_col in (true, false)", 3, 2.0/2);
+    verifySelectExpr("alltypes", "int_col in (1, 2, 3)", 3, 3.0/10);
+    verifySelectExpr("alltypes",
+        "int_col in (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12)", 3, 1);
+
+    verifySelectExpr("nullrows", "id in ('a', 'b', 'c')", 3, 3.0/26);
+    // Bug: Why -1?
+    //verifySelectExpr("nullrows", "null_str in ('a', 'b', 'c')", 3, 1);
+    verifySelectExpr("nullrows", "null_str in ('a', 'b', 'c')", 3, -1);
+    verifySelectExpr("nullrows", "group_str in ('a', 'b', 'c')", 3, 3.0/6);
+    //verifySelectExpr("nullrows", "some_nulls in ('a', 'b', 'c')", 3, 3.0/7);
+    verifySelectExpr("nullrows", "some_nulls in ('a', 'b', 'c')", 3, 3.0/6);
+
+    // Bug: Sel should default to good old 0.1
+    verifySelectExpr("manynulls", "id in (1, 3, 3)", 3, -1);
+  }
+
+  /**
+   * Test col NOT IN (a, b, c)
+   *
+   * Should be 1 = sel(col IN (a, b, c))
+   */
+  @Test
+  public void testNotInSelectivity() throws ImpalaException {
+    verifySelectExpr("alltypes", "id not in (1, 2, 3)", 3, 1 - 3.0/7300);
+    // Bug: Does not use NDV, just simple value count
+    //verifySelectExpr("alltypes", "id not in (1, 2, 3, 2, 3, 1)", 3, 1 - 
3.0/7300);
+    verifySelectExpr("alltypes", "id not in (1, 2, 3, 2, 3, 1)", 3, 1 - 
6.0/7300);
+    verifySelectExpr("alltypes", "bool_col not in (true)", 3, 1 - 1.0/2);
+    verifySelectExpr("alltypes", "bool_col not in (true, false)", 3, 1 - 
2.0/2);
+    verifySelectExpr("alltypes", "int_col not in (1, 2, 3)", 3, 1 - 3.0/10);
+    verifySelectExpr("alltypes",
+        "int_col not in (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12)", 3, 0);
+
+    verifySelectExpr("nullrows", "id not in ('a', 'b', 'c')", 3, 1 - 3.0/26);
+    // Bug: Why -1?
+    //verifySelectExpr("nullrows", "null_str not in ('a', 'b', 'c')", 3, 1);
+    verifySelectExpr("nullrows", "null_str not in ('a', 'b', 'c')", 3, -1);
+    verifySelectExpr("nullrows", "group_str not in ('a', 'b', 'c')", 3, 1 - 
3.0/6);
+    //verifySelectExpr("nullrows", "some_nulls not in ('a', 'b', 'c')", 3, 1 - 
3.0/7);
+    verifySelectExpr("nullrows", "some_nulls not in ('a', 'b', 'c')", 3, 1 - 
3.0/6);
+
+    // Bug: Sel should default to 1 - good old 0.1
+    verifySelectExpr("manynulls", "id not in (1, 3, 3)", 3, -1);
+  }
+
+  @Test
+  public void testNotSelectivity() throws ImpalaException {
+    verifySelectExpr("alltypes", "not id in (1, 2, 3)", 3, 1 - 3.0/7300);
+    verifySelectExpr("alltypes", "not int_col in (1, 2)", 3, 1 - 2.0/10);
+    verifySelectExpr("alltypes", "not int_col = 10", 3, 1 - 1.0/10);
+
+    // Bug: Sel should default to 1 - good old 0.1
+    //verifySelectExpr("manynulls", "not id = 10", 3, 0.9);
+    verifySelectExpr("manynulls", "not id = 10", 3, -1);
+  }
+
+  @Test
+  public void testAndSelectivity() throws ImpalaException {
+    verifySelectExpr("alltypes", "bool_col = true", 3, 1.0/2);
+    verifySelectExpr("alltypes", "int_col = 10", 3, 1.0/10);
+    // Note: This is NOT the logic used in plan nodes!
+    verifySelectExpr("alltypes", "bool_col = true and int_col = 10", 3, 1.0/2 
* 1.0/10);
+    // Bug: should be something like (1/3)^2
+    //verifySelectExpr("alltypes", "int_col >= 10 and int_col <= 20", 3, 0.11);
+    verifySelectExpr("alltypes", "int_col >= 10 and int_col <= 20", 3, -1);
+
+    // Bug: Should be a product of two estimates.
+    // But, the -1 from the inequality poisons the whole expression
+    //verifySelectExpr("alltypes", "int_col = 10 AND smallint_col > 20",
+    //      3, 1.0/10 * 0.33);
+    verifySelectExpr("alltypes", "int_col = 10 AND smallint_col > 20", 3, -1);
+  }
+
+  @Test
+  public void testOrSelectivity() throws ImpalaException {
+    verifySelectExpr("alltypes", "bool_col = true or int_col = 10",
+        3, 1.0/2 + 1.0/10 - 1.0/2 * 1.0/10);
+    // Chain of OR rewritten to IN
+    verifySelectExpr("alltypes", "int_col = 10 or int_col = 20", 3, 2.0/10);
+  }
+
+  /**
+   * Test col BETWEEN x and y. Rewritten to
+   * col >= x AND col <= y. Inequality should have an estimate. Since
+   * the expression is an AND, we multipley the two estimates.
+   * So, regardless of NDV and null count, selectivity should be
+   * something like 0.33^2.
+   */
+  @Test
+  public void testBetweenSelectivity() throws ImpalaException {
+    // Bug: NO selectivity for Between because it is rewritten to
+    // use inequalities, and there no selectivities for those
+    // See IMPALA-8042
+    //verifySelectExpr("alltypes", "id between 30 and 60", 3, 0.33 * 0.33);
+    verifySelectExpr("alltypes", "id between 30 and 60", 3, -1);
+    //verifySelectExpr("alltypes", "int_col between 30 and 60", 3, 0.33 * 
0.33);
+    verifySelectExpr("alltypes", "int_col between 30 and 60", 3, -1);
+
+    // Should not matter that there are no stats
+    //verifySelectExpr("manynulls", "id between 30 and 60", 3, 0.33 * 0.33);
+    verifySelectExpr("manynulls", "id between 30 and 60", 3, -1);
+  }
+
+  /**
+   * Test col NOT BETWEEN x and y. Should be 1 - sel(col BETWEEN x and y).
+   */
+  @Test
+  public void testNotBetweenSelectivity() throws ImpalaException {
+    // Bug: NO selectivity for Not Between because it is rewritten to
+    // use inequalities, and there no selectivities for those
+    //verifySelectExpr("alltypes", "id not between 30 and 60", 3, 1 - 0.33 * 
0.33);
+    verifySelectExpr("alltypes", "id not between 30 and 60", 3, -1);
+    //verifySelectExpr("alltypes", "int_col not between 30 and 60", 3, 1 - 
0.33 * 0.33);
+    verifySelectExpr("alltypes", "int_col not between 30 and 60", 3, -1);
+
+    // Should not matter that there are no stats
+    //verifySelectExpr("manynulls", "id not between 30 and 60", 3, 1 - 0.33 * 
0.33);
+    verifySelectExpr("manynulls", "id not between 30 and 60", 3, -1);
+  }
+}
diff --git a/fe/src/test/java/org/apache/impala/analysis/ExprNdvTest.java 
b/fe/src/test/java/org/apache/impala/analysis/ExprNdvTest.java
index 88e1160..8a573a6 100644
--- a/fe/src/test/java/org/apache/impala/analysis/ExprNdvTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/ExprNdvTest.java
@@ -64,6 +64,29 @@ public class ExprNdvTest extends FrontendTestBase {
     }
   }
 
+  /**
+   * Constants have an NDV.
+   */
+  @Test
+  public void testConsts() throws ImpalaException {
+    // Would expect 1, but is 2.
+    verifyNdv("case when 0 = 1 then 'yes' else 'no' end", 2);
+
+    // Constants have NDV=1. This is set in the base LiteralExpr class,
+    // so only an INT constant is tested, all others are the same.
+    verifyNdv("10", 1);
+
+    // Propagation of const NDV. All expressions save CASE use
+    // the same max logic.
+    verifyNdv("10 * 3", 1);
+
+    // Planner defines NDV as "number of distinct values
+    // including nulls", but the NDV function (and the stats
+    // from tables) define it as "number of distinct non-null
+    // values".
+    verifyNdv("null", 1);
+  }
+
   @Test
   public void TestCaseExprBasic() throws ImpalaException {
     // All constants tests
@@ -91,8 +114,35 @@ public class ExprNdvTest extends FrontendTestBase {
   }
 
   @Test
-  public void TestCaseExprMissingStats() throws ImpalaException {
+  public void testExprBasic() throws ImpalaException {
+    // Baseline
+    verifyNdv("id", 7300);
+
+    // Produces a constant, but not worth worrying about.
+    // Actual NDV = 1 (or 2 if nullable)
+    verifyNdv("id * 0", 7300);
+
+    // Should not change NDV
+    verifyNdv("CAST(id AS VARCHAR)", 7300);
+
+    // All expressions save CASE use the max logic.
+    verifyNdv("id + 2", 7300);
+    verifyNdv("id * 2", 7300);
+
+    // IMPALA-7603: Should multiply NDVs, but does Max instead
+    verifyNdv("id + int_col", 7300);
+    verifyNdv("id * int_col", 7300);
 
+    // nullValue returns a boolean, so should be NDV=2
+    // Actual is wrong because it uses a generic calc:
+    // NDV(f(x)) = NDV(x).
+    // Should be:
+    // NDV(f(x)) = max(NDV(x), NDV(type(f)))
+    verifyNdv("nullValue(id)", 7300);
+  }
+
+  @Test
+  public void TestCaseExprMissingStats() throws ImpalaException {
     // Consts still work
     verifyNdvTwoTable("case when a.id = 1 then 'yes' " +
                       "when tiny.a = 'whatever' then 'maybe' " +
@@ -109,4 +159,53 @@ public class ExprNdvTest extends FrontendTestBase {
     verifyNdvTwoTable("case when tiny.a = 'whatever' then a.id " +
                       "else 0 end", 7301);
   }
+
+  /**
+   * Test null count handling. After IMPALA-7659, Impala computes a null count,
+   * when gathering stats, but the NDV does not include nulls (except for 
Boolean
+   * columns) if stats are computed by Impala, but does include nulls if stats 
are
+   * computed by Hive. This leads to rather bizarre outcomes such as the NDV 
of a
+   * column = 0 when the null count is greater than zero. This is clearly a 
bug to
+   * be fixed, but a complex one because of Hive and backward compatibility
+   * considerations. This test simply illustrates the current (unfortunate)
+   * behavior. See IMPALA-8094.
+   */
+  @Test
+  public void testNulls() throws ImpalaException {
+    // A table with nulls for which stats have been computed
+    // NDV(a) = 26
+    verifyNdvStmt("SELECT id FROM functional.nullrows", 26);
+    // NDV(f) = 6
+    verifyNdvStmt("SELECT some_nulls FROM functional.nullrows", 6);
+    // NDV(c) = 0 (all nulls), but add 1 for nulls
+    // Bug: See IMPALA-7310, IMPALA-8094
+    //verifyNdvStmt("SELECT null_str FROM functional.nullrows", 1);
+    verifyNdvStmt("SELECT null_str FROM functional.nullrows", 0);
+    // NDV(b) = 1, add 1 for nulls
+    // Bug: Same as above
+    //verifyNdvStmt("SELECT blanks FROM functional.nullrows", 2);
+    verifyNdvStmt("SELECT blank FROM functional.nullrows", 1);
+
+    // Same schema, one row
+    verifyNdvStmt("SELECT a FROM functional.nulltable", 1);
+    // Bug: Same as above
+    //verifyNdvStmt("SELECT c FROM functional.nulltable", 1);
+    verifyNdvStmt("SELECT c FROM functional.nulltable", 0);
+
+    // 11K rows, no stats
+    // Bug: Should come up with some estimate from size
+    verifyNdvStmt("SELECT id FROM functional.manynulls", -1);
+
+    // Table with 8 rows, NDV(year) = 1,
+    // null count for year is 0, so no adjustment.
+    verifyNdvStmt("SELECT year FROM functional.alltypestiny", 1);
+
+    // Test with non-nullable columns.
+    // NDV value from stats not increased by one here.
+    verifyNdvStmt("SELECT id FROM functional_kudu.alltypestiny", 8);
+    // But, is increased for a nullable column.
+    // Bug: Same as above
+    //verifyNdvStmt("SELECT year FROM functional_kudu.alltypestiny", 2);
+    verifyNdvStmt("SELECT year FROM functional_kudu.alltypestiny", 1);
+  }
 }
diff --git 
a/fe/src/test/java/org/apache/impala/analysis/ExprRewriteRulesTest.java 
b/fe/src/test/java/org/apache/impala/analysis/ExprRewriteRulesTest.java
index b91c526..7536fbb 100644
--- a/fe/src/test/java/org/apache/impala/analysis/ExprRewriteRulesTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/ExprRewriteRulesTest.java
@@ -26,8 +26,10 @@ import java.util.List;
 
 import org.apache.impala.catalog.ScalarType;
 import org.apache.impala.common.AnalysisException;
+import org.apache.impala.common.AnalysisSessionFixture;
 import org.apache.impala.common.FrontendTestBase;
 import org.apache.impala.common.ImpalaException;
+import org.apache.impala.common.QueryFixture;
 import org.apache.impala.common.SqlCastException;
 import org.apache.impala.rewrite.BetweenToCompoundRule;
 import org.apache.impala.rewrite.EqualityDisjunctsToInRule;
@@ -51,13 +53,31 @@ import com.google.common.collect.Lists;
  * Tests ExprRewriteRules.
  */
 public class ExprRewriteRulesTest extends FrontendTestBase {
+  /**
+   * Wraps an ExprRewriteRule to count how many times it's been applied.
+   */
+  public static class CountingRewriteRuleWrapper implements ExprRewriteRule {
+    int rewrites_;
+    final ExprRewriteRule wrapped_;
+
+    CountingRewriteRuleWrapper(ExprRewriteRule wrapped) {
+      this.wrapped_ = wrapped;
+    }
+
+    @Override
+    public Expr apply(Expr expr, Analyzer analyzer) throws AnalysisException {
+      Expr ret = wrapped_.apply(expr, analyzer);
+      if (expr != ret) { rewrites_++; }
+      return ret;
+    }
+  }
 
   /**
    * Specialized form of the Select fixture which analyzes a query without
    * rewrites. Use this to invoke the rewrite engine within the test itself.
    * Note: no analysis context is created in this case.
    */
-  public static class SelectRewriteFixture extends 
AnalysisSessionFixture.SelectFixture {
+  public static class SelectRewriteFixture extends QueryFixture.SelectFixture {
     private Analyzer analyzer_;
 
     public SelectRewriteFixture(AnalysisSessionFixture analysisFixture) {
@@ -136,7 +156,7 @@ public class ExprRewriteRulesTest extends FrontendTestBase {
     }
   }
 
-  public static AnalysisSessionFixture session = new 
AnalysisSessionFixture(frontend_);
+  public static AnalysisSessionFixture session = new AnalysisSessionFixture();
 
   @BeforeClass
   public static void setup() {
diff --git 
a/fe/src/test/java/org/apache/impala/common/AbstractFrontendTest.java 
b/fe/src/test/java/org/apache/impala/common/AbstractFrontendTest.java
new file mode 100644
index 0000000..ff18c19
--- /dev/null
+++ b/fe/src/test/java/org/apache/impala/common/AbstractFrontendTest.java
@@ -0,0 +1,58 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.impala.common;
+
+import org.junit.After;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+
+/**
+ * Base Unit test class that manages the Frontend fixture
+ * to initialize and shut down the front-end, and to remove any
+ * temporary tables created by the test. Derive tests from this class
+ * if the test does anything "special." Derive from
+ * {@link FrontendTestBase} for routine tests that can leverage the
+ * many default functions available.
+ *
+ * A special test is one that:
+ *
+ * * Needs specialized query options.
+ * * Needs specialized query handling, such as inspecting bits of the
+ *   AST, decorated AST or query plan.
+ *
+ * In these cases, use the fixtures directly as they provide more control
+ * than do the generic methods in FrontendTestBase.
+ */
+public abstract class AbstractFrontendTest {
+  protected static FrontendFixture feFixture_ = FrontendFixture.instance();
+
+  @BeforeClass
+  public static void setUp() throws Exception {
+    feFixture_.setUp();
+  }
+
+  @AfterClass
+  public static void cleanUp() throws Exception {
+    feFixture_.cleanUp();
+  }
+
+  @After
+  public void tearDown() {
+    feFixture_.tearDown();
+  }
+}
diff --git 
a/fe/src/test/java/org/apache/impala/common/AnalysisSessionFixture.java 
b/fe/src/test/java/org/apache/impala/common/AnalysisSessionFixture.java
new file mode 100644
index 0000000..b7ff0fe
--- /dev/null
+++ b/fe/src/test/java/org/apache/impala/common/AnalysisSessionFixture.java
@@ -0,0 +1,89 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.impala.common;
+
+import org.apache.impala.analysis.ExprRewriterTest;
+import org.apache.impala.catalog.Catalog;
+import org.apache.impala.service.Frontend;
+import org.apache.impala.testutil.ImpaladTestCatalog;
+import org.apache.impala.testutil.TestUtils;
+import org.apache.impala.thrift.TQueryCtx;
+import org.apache.impala.thrift.TQueryOptions;
+
+/**
+ * Session fixture for analyzer tests. Holds state shared across test cases 
such
+ * as the front-end, the user, the database, and query options. Queries created
+ * from this fixture start with these defaults, but each query can change them
+ * as needed for that particular test case.
+ *
+ * This fixture is analogous to a user session. Though, unlike a real session,
+ * test can change the database, options and user per-query without changing
+ * the session settings.
+ *
+ * The session fixture is created once per test file, then query fixtures 
perform
+ * the work needed for each particular query. It is often helpful to wrap the
+ * query fixtures in a function if the same setup is used over and over.
+ * See {@link ExprRewriterTest} for  example usage.
+ */
+public class AnalysisSessionFixture {
+
+  private final FrontendFixture feFixture_ = FrontendFixture.instance();
+  // Query options to be used for all queries. Can be overridden per-query.
+  private final TQueryOptions queryOptions_;
+  // Default database for all queries.
+  private String db_ = Catalog.DEFAULT_DB;
+  // Default user for all queries.
+  private String user_ = System.getProperty("user.name");
+
+  public AnalysisSessionFixture() {
+    queryOptions_ = new TQueryOptions();
+  }
+
+  public AnalysisSessionFixture setDB(String db) {
+    db_ = db;
+    return this;
+  }
+
+  public AnalysisSessionFixture setUser(String user) {
+    user_ = user;
+    return this;
+  }
+
+  public TQueryOptions options() { return queryOptions_; }
+  public String db() { return db_; }
+  public String user() { return user_; }
+  public Frontend frontend() { return feFixture_.frontend(); }
+  public ImpaladTestCatalog catalog() { return feFixture_.catalog(); }
+
+  /**
+   * Disable the optional expression rewrites.
+   */
+  public AnalysisSessionFixture disableExprRewrite() {
+    queryOptions_.setEnable_expr_rewrites(false);
+    return this;
+  }
+
+  public TQueryOptions cloneOptions() {
+    return new TQueryOptions(queryOptions_);
+  }
+
+  public TQueryCtx queryContext() {
+    return TestUtils.createQueryContext(db_, user_, cloneOptions());
+  }
+
+}
diff --git a/fe/src/test/java/org/apache/impala/common/FrontendTestBase.java 
b/fe/src/test/java/org/apache/impala/common/FrontendFixture.java
similarity index 59%
copy from fe/src/test/java/org/apache/impala/common/FrontendTestBase.java
copy to fe/src/test/java/org/apache/impala/common/FrontendFixture.java
index 2b6a640..1f696f0 100644
--- a/fe/src/test/java/org/apache/impala/common/FrontendTestBase.java
+++ b/fe/src/test/java/org/apache/impala/common/FrontendFixture.java
@@ -26,12 +26,10 @@ import java.util.List;
 
 import org.apache.impala.analysis.AnalysisContext;
 import org.apache.impala.analysis.AnalysisContext.AnalysisResult;
-import org.apache.impala.analysis.Analyzer;
 import org.apache.impala.analysis.ColumnDef;
 import org.apache.impala.analysis.CreateTableStmt;
 import org.apache.impala.analysis.CreateViewStmt;
 import org.apache.impala.analysis.FunctionName;
-import org.apache.impala.analysis.InsertStmt;
 import org.apache.impala.analysis.ParseNode;
 import org.apache.impala.analysis.Parser;
 import org.apache.impala.analysis.QueryStmt;
@@ -60,86 +58,100 @@ import org.apache.impala.thrift.TFunctionBinaryType;
 import org.apache.impala.thrift.TQueryCtx;
 import org.apache.impala.thrift.TQueryOptions;
 import org.apache.impala.util.EventSequence;
-import org.junit.After;
-import org.junit.AfterClass;
-import org.junit.Assert;
-import org.junit.BeforeClass;
 
 import com.google.common.base.Joiner;
 import com.google.common.base.Preconditions;
 import com.google.common.collect.Lists;
 
 /**
- * Base class for most frontend tests. Contains common functions for unit 
testing
- * various components, e.g., ParsesOk(), ParserError(), AnalyzesOk(), 
AnalysisError(),
- * as well as helper functions for creating test-local tables/views and 
UDF/UDAs.
+ * Test fixture for the front-end as a whole. Logically equivalent to a running
+ * Impala and HMS cluster. Manages the test metadata catalog.
+ * Use {@link SessionFixture} to represent a user session (with a user name,
+ * session options, and so on), and a {@link QueryFixture} to represent a
+ * single query.
+ *
+ * While this fixture provides methods to parse and analyze a query, these
+ * actions are done with default options and handle the general case. Use
+ * the above fixtures for greater control, and to get at multiple bits of a
+ * query.
+ *
+ * {@link AbstractFrontendTest} manages a front-end fixture including setup
+ * and teardown. Use it as the base class for new tests that wish to use the
+ * test fixtures. {@link FrontendTestBase} extends AbstractFrontendTest and
+ * wraps the fixture in a set of functions which act as shims for legacy tests.
  */
-// TODO: Revise to leverage AnalysisFixure
-public class FrontendTestBase {
-  protected static ImpaladTestCatalog catalog_ = new ImpaladTestCatalog();
-  protected static Frontend frontend_ = new Frontend(
+
+public class FrontendFixture {
+  // Single instance used for all tests. Logically equivalent to a
+  // single Impala cluster used by many clients.
+  protected static final FrontendFixture instance_ = new FrontendFixture();
+
+  // The test catalog that can hold test-only tables.
+  protected final ImpaladTestCatalog catalog_ = new ImpaladTestCatalog();
+
+  // The actual Impala frontend that backs this fixture.
+  protected final Frontend frontend_ = new Frontend(
       AuthorizationConfig.createAuthDisabledConfig(), catalog_);
 
-  // Test-local list of test databases and tables. These are cleaned up in 
@After.
-  protected final List<Db> testDbs_ = Lists.newArrayList();
-  protected final List<Table> testTables_ = Lists.newArrayList();
-  protected final String[][] hintStyles_ = new String[][] {
-      new String[] { "/* +", "*/" }, // traditional commented hint
-      new String[] { "-- +", "\n" }, // eol commented hint
-      new String[] { "[", "]" } // legacy style
-  };
-
-  @BeforeClass
-  public static void setUp() throws Exception {
-    RuntimeEnv.INSTANCE.setTestEnv(true);
-  }
+  // Test-local list of test databases and tables.
+  protected final List<Db> testDbs_ = new ArrayList<>();
+  protected final List<Table> testTables_ = new ArrayList<>();
 
-  @AfterClass
-  public static void cleanUp() throws Exception {
-    RuntimeEnv.INSTANCE.reset();
-    catalog_.close();
+  protected final AnalysisSessionFixture defaultSession_;
+
+  public static FrontendFixture instance() {
+    return instance_;
   }
 
-  // Adds a Udf: default.name(args) to the catalog.
-  // TODO: we could consider having this be the sql to run instead but that 
requires
-  // connecting to the BE.
-  protected Function addTestFunction(String name,
-      ArrayList<ScalarType> args, boolean varArgs) {
-    return addTestFunction("default", name, args, varArgs);
+  /**
+   * Private constructor. Use {@link #instance()} to get access to
+   * the front-end fixture.
+   */
+  private FrontendFixture() {
+    defaultSession_ = new AnalysisSessionFixture();
   }
 
-  protected Function addTestFunction(String name,
-      ScalarType arg, boolean varArgs) {
-    return addTestFunction("default", name, Lists.newArrayList(arg), varArgs);
+  /**
+   * Call this from the test's @BeforeClass method.
+   */
+  public void setUp() throws Exception {
+    RuntimeEnv.INSTANCE.setTestEnv(true);
   }
 
-  protected Function addTestFunction(String db, String fnName,
-      ArrayList<ScalarType> args, boolean varArgs) {
-    ArrayList<Type> argTypes = Lists.newArrayList();
-    argTypes.addAll(args);
-    Function fn = ScalarFunction.createForTesting(
-        db, fnName, argTypes, Type.INT, "/Foo", "Foo.class", null,
-        null, TFunctionBinaryType.NATIVE);
-    fn.setHasVarArgs(varArgs);
-    catalog_.addFunction(fn);
-    return fn;
+  /**
+   * Call this from the test's @AfterClass method.
+   */
+  public void cleanUp() throws Exception {
+    RuntimeEnv.INSTANCE.reset();
+    catalog_.close();
   }
 
-  protected void addTestUda(String name, Type retType, Type... argTypes) {
-    FunctionName fnName = new FunctionName("default", name);
-    catalog_.addFunction(
-        AggregateFunction.createForTesting(
-            fnName, Lists.newArrayList(argTypes), retType, retType,
-            null, "init_fn_symbol", "update_fn_symbol", null, null,
-            null, null, null, TFunctionBinaryType.NATIVE));
+  /**
+   * Call this from the test's @After method.
+   */
+  public void tearDown() {
+    clearTestTables();
+    clearTestDbs();
   }
 
+  public Frontend frontend() { return frontend_; }
+  public ImpaladTestCatalog catalog() { return catalog_; }
+
+  /**
+   * Returns the default session with default options. Create your own
+   * instance if your test needs to change any of the options. Any number
+   * of sessions can be active at once.
+   *
+   * @return the default session with default options
+   */
+  public AnalysisSessionFixture session() { return defaultSession_; }
+
   /**
    * Add a new dummy database with the given name to the catalog.
    * Returns the new dummy database.
    * The database is registered in testDbs_ and removed in the @After method.
    */
-  protected Db addTestDb(String dbName, String comment) {
+  public Db addTestDb(String dbName, String comment) {
     Db db = catalog_.getDb(dbName);
     Preconditions.checkState(db == null, "Test db must not already exist.");
     db = new Db(dbName, new org.apache.hadoop.hive.metastore.api.Database(
@@ -162,8 +174,8 @@ public class FrontendTestBase {
    * Returns the new dummy table.
    * The test tables are registered in testTables_ and removed in the @After 
method.
    */
-  protected Table addTestTable(String createTableSql) {
-    CreateTableStmt createTableStmt = (CreateTableStmt) 
AnalyzesOk(createTableSql);
+  public Table addTestTable(String createTableSql) {
+    CreateTableStmt createTableStmt = (CreateTableStmt) 
analyzeStmt(createTableSql);
     Db db = catalog_.getDb(createTableStmt.getDb());
     Preconditions.checkNotNull(db, "Test tables must be created in an existing 
db.");
     org.apache.hadoop.hive.metastore.api.Table msTbl =
@@ -205,12 +217,18 @@ public class FrontendTestBase {
     return dummyTable;
   }
 
+  protected void clearTestTables() {
+    for (Table testTable: testTables_) {
+      testTable.getDb().removeTable(testTable.getName());
+    }
+  }
+
   /**
    * Adds a test-local view to the catalog based on the given CREATE VIEW sql.
    * The test views are registered in testTables_ and removed in the @After 
method.
    * Returns the new view.
    */
-  protected Table addTestView(String createViewSql) {
+  public Table addTestView(String createViewSql) {
     return addTestView(catalog_, createViewSql);
   }
 
@@ -219,94 +237,58 @@ public class FrontendTestBase {
    * The test views are registered in testTables_ and removed in the @After 
method.
    * Returns the new view.
    */
-  protected Table addTestView(Catalog catalog, String createViewSql) {
-    CreateViewStmt createViewStmt = (CreateViewStmt) AnalyzesOk(createViewSql);
+  public Table addTestView(Catalog catalog, String createViewSql) {
+    CreateViewStmt createViewStmt = (CreateViewStmt) 
analyzeStmt(createViewSql);
     Db db = catalog.getDb(createViewStmt.getDb());
     Preconditions.checkNotNull(db, "Test views must be created in an existing 
db.");
     // Do not analyze the stmt to avoid applying rewrites that would alter the 
view
     // definition. We want to model real views as closely as possible.
-    QueryStmt viewStmt = (QueryStmt) 
ParsesOk(createViewStmt.getInlineViewDef());
+    QueryStmt viewStmt = (QueryStmt) 
parseStmt(createViewStmt.getInlineViewDef());
     View dummyView = View.createTestView(db, createViewStmt.getTbl(), 
viewStmt);
     db.addTable(dummyView);
     testTables_.add(dummyView);
     return dummyView;
   }
 
-  protected Table addAllScalarTypesTestTable() {
-    addTestDb("allscalartypesdb", "");
-    return addTestTable("create table allscalartypes (" +
-      "bool_col boolean, tinyint_col tinyint, smallint_col smallint, int_col 
int, " +
-      "bigint_col bigint, float_col float, double_col double, dec1 
decimal(9,0), " +
-      "d2 decimal(10, 0), d3 decimal(20, 10), d4 decimal(38, 38), d5 
decimal(10, 5), " +
-      "timestamp_col timestamp, string_col string, varchar_col varchar(50), " +
-      "char_col char (30))");
-  }
-
-  protected void clearTestTables() {
-    for (Table testTable: testTables_) {
-      testTable.getDb().removeTable(testTable.getName());
-    }
-  }
-
-  /**
-   * Inject the hint into the pattern using hint location.
-   *
-   * Example:
-   *   pattern: insert %s into t %s select * from t
-   *   hint: <token_hint_begin> hint_with_args(a) <token_hint_end>
-   *   loc: Start(=oracle style) | End(=traditional style)
-   */
-  protected String InjectInsertHint(String pattern, String hint,
-      InsertStmt.HintLocation loc) {
-    final String oracleHint = (loc == InsertStmt.HintLocation.Start) ? hint : 
"";
-    final String defaultHint  = (loc == InsertStmt.HintLocation.End) ? hint : 
"";
-    return String.format(pattern, oracleHint, defaultHint);
-  }
-
-  @After
-  public void tearDown() {
-    clearTestTables();
-    clearTestDbs();
-  }
-
-  /**
-   * Parse 'stmt' and return the root StatementBase.
-   */
-  public StatementBase ParsesOk(String stmt) {
-    try {
-      StatementBase node = Parser.parse(stmt);
-      assertNotNull(node);
-      return node;
-    } catch (AnalysisException e) {
-      fail("\nParser error:\n" + e.getMessage());
-      throw new IllegalStateException(); // Keep compiler happy
-    }
+  // Adds a Udf: default.name(args) to the catalog.
+  // TODO: we could consider having this be the sql to run instead but that 
requires
+  // connecting to the BE.
+  public Function addTestFunction(String name,
+      ArrayList<ScalarType> args, boolean varArgs) {
+    return addTestFunction("default", name, args, varArgs);
   }
 
-  /**
-   * Analyze 'stmt', expecting it to pass. Asserts in case of analysis error.
-   */
-  public ParseNode AnalyzesOk(String stmt) {
-    return AnalyzesOk(stmt, createAnalysisCtx(), null);
+  public Function addTestFunction(String name,
+      ScalarType arg, boolean varArgs) {
+    return addTestFunction("default", name, Lists.newArrayList(arg), varArgs);
   }
 
-  public ParseNode AnalyzesOk(String stmt, AnalysisContext analysisCtx) {
-    return AnalyzesOk(stmt, analysisCtx, null);
+  public Function addTestFunction(String db, String fnName,
+      ArrayList<ScalarType> args, boolean varArgs) {
+    List<Type> argTypes = new ArrayList<>();
+    argTypes.addAll(args);
+    Function fn = ScalarFunction.createForTesting(
+        db, fnName, argTypes, Type.INT, "/Foo", "Foo.class", null,
+        null, TFunctionBinaryType.NATIVE);
+    fn.setHasVarArgs(varArgs);
+    catalog_.addFunction(fn);
+    return fn;
   }
 
-  /**
-   * Analyze 'stmt', expecting it to pass. Asserts in case of analysis error.
-   * If 'expectedWarning' is not null, asserts that a warning is produced.
-   */
-  public ParseNode AnalyzesOk(String stmt, String expectedWarning) {
-    return AnalyzesOk(stmt, createAnalysisCtx(), expectedWarning);
+  public void addTestUda(String name, Type retType, Type... argTypes) {
+    FunctionName fnName = new FunctionName("default", name);
+    catalog_.addFunction(
+        AggregateFunction.createForTesting(
+            fnName, Lists.newArrayList(argTypes), retType, retType,
+            null, "init_fn_symbol", "update_fn_symbol", null, null,
+            null, null, null, TFunctionBinaryType.NATIVE));
   }
 
-  protected AnalysisContext createAnalysisCtx() {
+  public AnalysisContext createAnalysisCtx() {
     return createAnalysisCtx(Catalog.DEFAULT_DB);
   }
 
-  protected AnalysisContext createAnalysisCtx(String defaultDb) {
+  public AnalysisContext createAnalysisCtx(String defaultDb) {
     TQueryCtx queryCtx = TestUtils.createQueryContext(
         defaultDb, System.getProperty("user.name"));
     EventSequence timeline = new EventSequence("Frontend Test Timeline");
@@ -315,7 +297,7 @@ public class FrontendTestBase {
     return analysisCtx;
   }
 
-  protected AnalysisContext createAnalysisCtx(TQueryOptions queryOptions) {
+  public AnalysisContext createAnalysisCtx(TQueryOptions queryOptions) {
     TQueryCtx queryCtx = TestUtils.createQueryContext();
     queryCtx.client_request.query_options = queryOptions;
     EventSequence timeline = new EventSequence("Frontend Test Timeline");
@@ -324,11 +306,11 @@ public class FrontendTestBase {
     return analysisCtx;
   }
 
-  protected AnalysisContext createAnalysisCtx(AuthorizationConfig authzConfig) 
{
+  public AnalysisContext createAnalysisCtx(AuthorizationConfig authzConfig) {
     return createAnalysisCtx(authzConfig, System.getProperty("user.name"));
   }
 
-  protected AnalysisContext createAnalysisCtx(AuthorizationConfig authzConfig,
+  public AnalysisContext createAnalysisCtx(AuthorizationConfig authzConfig,
       String user) {
     TQueryCtx queryCtx = TestUtils.createQueryContext(Catalog.DEFAULT_DB, 
user);
     EventSequence timeline = new EventSequence("Frontend Test Timeline");
@@ -336,17 +318,36 @@ public class FrontendTestBase {
     return analysisCtx;
   }
 
-  protected AnalysisContext createAnalysisCtxUsingHiveColLabels() {
-    AnalysisContext analysisCtx = createAnalysisCtx();
-    analysisCtx.setUseHiveColLabels(true);
-    return analysisCtx;
+  /**
+   * Parse 'stmt' and return the root StatementBase.
+   */
+  public StatementBase parseStmt(String stmt) {
+    try {
+      StatementBase node = Parser.parse(stmt);
+      assertNotNull(node);
+      return node;
+    } catch (AnalysisException e) {
+      fail("Parser error:\n" + e.getMessage());
+      throw new IllegalStateException(); // Keep compiler happy
+    }
+  }
+
+  public AnalysisResult parseAndAnalyze(String stmt, AnalysisContext ctx)
+      throws ImpalaException {
+    StatementBase parsedStmt = Parser.parse(stmt, ctx.getQueryOptions());
+    StmtMetadataLoader mdLoader =
+        new StmtMetadataLoader(frontend_, ctx.getQueryCtx().session.database, 
null);
+    StmtTableCache stmtTableCache = mdLoader.loadTables(parsedStmt);
+    return ctx.analyzeAndAuthorize(parsedStmt, stmtTableCache,
+        frontend_.getAuthzChecker());
   }
 
   /**
    * Analyze 'stmt', expecting it to pass. Asserts in case of analysis error.
    * If 'expectedWarning' is not null, asserts that a warning is produced.
    */
-  public ParseNode AnalyzesOk(String stmt, AnalysisContext ctx, String 
expectedWarning) {
+  public ParseNode analyzeStmt(String stmt, AnalysisContext ctx,
+      String expectedWarning) {
     try {
       AnalysisResult analysisResult = parseAndAnalyze(stmt, ctx);
       if (expectedWarning != null) {
@@ -369,88 +370,15 @@ public class FrontendTestBase {
     } catch (Exception e) {
       e.printStackTrace();
       fail("Error during analysis:\n" + e.toString() + "\nsql:\n" + stmt);
+      throw new IllegalStateException(); // Keep compiler happy
     }
-    return null;
-  }
-
-  /**
-   * Analyzes the given statement without performing rewrites or authorization.
-   */
-  public StatementBase AnalyzesOkNoRewrite(StatementBase stmt) throws 
ImpalaException {
-    AnalysisContext ctx = createAnalysisCtx();
-    StmtMetadataLoader mdLoader =
-        new StmtMetadataLoader(frontend_, ctx.getQueryCtx().session.database, 
null);
-    StmtTableCache loadedTables = mdLoader.loadTables(stmt);
-    Analyzer analyzer = ctx.createAnalyzer(loadedTables);
-    stmt.analyze(analyzer);
-    return stmt;
-  }
-
-  /**
-   * Asserts if stmt passes analysis.
-   */
-  public void AnalysisError(String stmt) {
-    AnalysisError(stmt, null);
-  }
-
-  /**
-   * Asserts if stmt passes analysis or the error string doesn't match and it
-   * is non-null.
-   */
-  public void AnalysisError(String stmt, String expectedErrorString) {
-    AnalysisError(stmt, createAnalysisCtx(), expectedErrorString);
-  }
-
-  /**
-   * Asserts if stmt passes analysis or the error string doesn't match and it
-   * is non-null.
-   */
-  public void AnalysisError(String stmt, AnalysisContext ctx, String 
expectedErrorString) {
-    Preconditions.checkNotNull(expectedErrorString, "No expected error message 
given.");
-    try {
-      AnalysisResult analysisResult = parseAndAnalyze(stmt, ctx);
-      Preconditions.checkNotNull(analysisResult.getStmt());
-    } catch (Exception e) {
-      String errorString = e.getMessage();
-      Preconditions.checkNotNull(errorString, "Stack trace lost during 
exception.");
-      String msg = "got error:\n" + errorString + "\nexpected:\n" + 
expectedErrorString;
-      // TODO: This logic can be removed.
-      // Different versions of Hive have slightly different error messages;
-      // we normalize here as follows:
-      // 'No FileSystem for Scheme "x"' -> 'No FileSystem for scheme: x'
-      if (errorString.contains("No FileSystem for scheme ")) {
-        errorString = errorString.replace("\"", "");
-        errorString = errorString.replace("No FileSystem for scheme ",
-            "No FileSystem for scheme: ");
-      }
-      Assert.assertTrue(msg, errorString.startsWith(expectedErrorString));
-      return;
-    }
-    fail("Stmt didn't result in analysis error: " + stmt);
-  }
-
-  protected AnalysisResult parseAndAnalyze(String stmt, AnalysisContext ctx)
-      throws ImpalaException {
-    return parseAndAnalyze(stmt, ctx, frontend_);
-  }
-
-  protected AnalysisResult parseAndAnalyze(String stmt, AnalysisContext ctx, 
Frontend fe)
-      throws ImpalaException {
-    StatementBase parsedStmt = Parser.parse(stmt, ctx.getQueryOptions());
-    StmtMetadataLoader mdLoader =
-        new StmtMetadataLoader(fe, ctx.getQueryCtx().session.database, null);
-    StmtTableCache stmtTableCache = mdLoader.loadTables(parsedStmt);
-    return ctx.analyzeAndAuthorize(parsedStmt, stmtTableCache, 
fe.getAuthzChecker());
   }
 
   /**
-   * Creates an authorization config for creating an AnalysisContext with
-   * authorization enabled.
+   * Analyze 'stmt', expecting it to pass. Asserts in case of analysis error.
+   * Uses default options; use {@link QueryFixture} for greater control.
    */
-  protected AuthorizationConfig createAuthorizationConfig() {
-    AuthorizationConfig authzConfig = 
AuthorizationConfig.createHadoopGroupAuthConfig(
-        "server1", null, System.getenv("IMPALA_HOME") +
-            "/fe/src/test/resources/sentry-site.xml");
-    return authzConfig;
+  public ParseNode analyzeStmt(String stmt) {
+    return analyzeStmt(stmt, createAnalysisCtx(), null);
   }
 }
diff --git a/fe/src/test/java/org/apache/impala/common/FrontendTestBase.java 
b/fe/src/test/java/org/apache/impala/common/FrontendTestBase.java
index 2b6a640..837fd8c 100644
--- a/fe/src/test/java/org/apache/impala/common/FrontendTestBase.java
+++ b/fe/src/test/java/org/apache/impala/common/FrontendTestBase.java
@@ -17,55 +17,31 @@
 
 package org.apache.impala.common;
 
-import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.fail;
 
 import java.util.ArrayList;
-import java.util.Collections;
-import java.util.List;
 
 import org.apache.impala.analysis.AnalysisContext;
 import org.apache.impala.analysis.AnalysisContext.AnalysisResult;
 import org.apache.impala.analysis.Analyzer;
-import org.apache.impala.analysis.ColumnDef;
-import org.apache.impala.analysis.CreateTableStmt;
-import org.apache.impala.analysis.CreateViewStmt;
-import org.apache.impala.analysis.FunctionName;
 import org.apache.impala.analysis.InsertStmt;
 import org.apache.impala.analysis.ParseNode;
 import org.apache.impala.analysis.Parser;
-import org.apache.impala.analysis.QueryStmt;
 import org.apache.impala.analysis.StatementBase;
 import org.apache.impala.analysis.StmtMetadataLoader;
 import org.apache.impala.analysis.StmtMetadataLoader.StmtTableCache;
 import org.apache.impala.authorization.AuthorizationConfig;
-import org.apache.impala.catalog.AggregateFunction;
 import org.apache.impala.catalog.Catalog;
-import org.apache.impala.catalog.CatalogException;
-import org.apache.impala.catalog.Column;
 import org.apache.impala.catalog.Db;
 import org.apache.impala.catalog.Function;
-import org.apache.impala.catalog.HdfsTable;
-import org.apache.impala.catalog.KuduTable;
-import org.apache.impala.catalog.ScalarFunction;
 import org.apache.impala.catalog.ScalarType;
 import org.apache.impala.catalog.Table;
 import org.apache.impala.catalog.Type;
-import org.apache.impala.catalog.View;
-import org.apache.impala.service.CatalogOpExecutor;
 import org.apache.impala.service.Frontend;
 import org.apache.impala.testutil.ImpaladTestCatalog;
-import org.apache.impala.testutil.TestUtils;
-import org.apache.impala.thrift.TFunctionBinaryType;
-import org.apache.impala.thrift.TQueryCtx;
 import org.apache.impala.thrift.TQueryOptions;
-import org.apache.impala.util.EventSequence;
-import org.junit.After;
-import org.junit.AfterClass;
 import org.junit.Assert;
-import org.junit.BeforeClass;
 
-import com.google.common.base.Joiner;
 import com.google.common.base.Preconditions;
 import com.google.common.collect.Lists;
 
@@ -73,33 +49,23 @@ import com.google.common.collect.Lists;
  * Base class for most frontend tests. Contains common functions for unit 
testing
  * various components, e.g., ParsesOk(), ParserError(), AnalyzesOk(), 
AnalysisError(),
  * as well as helper functions for creating test-local tables/views and 
UDF/UDAs.
+ *
+ * Extend "typical" tests from this class. For deeper, or more specialized 
tests,
+ * extend from {@link AbstractFrontendTest} and use the various fixtures 
directly.
+ * This class is also used for "legacy" tests that used the many functions here
+ * rather than the newer fixtures.
  */
-// TODO: Revise to leverage AnalysisFixure
-public class FrontendTestBase {
-  protected static ImpaladTestCatalog catalog_ = new ImpaladTestCatalog();
-  protected static Frontend frontend_ = new Frontend(
-      AuthorizationConfig.createAuthDisabledConfig(), catalog_);
-
-  // Test-local list of test databases and tables. These are cleaned up in 
@After.
-  protected final List<Db> testDbs_ = Lists.newArrayList();
-  protected final List<Table> testTables_ = Lists.newArrayList();
+public class FrontendTestBase extends AbstractFrontendTest {
+  // Temporary shim until tests are updated to use the
+  // frontend fixture.
+  protected static Frontend frontend_ = feFixture_.frontend();
+  protected static ImpaladTestCatalog catalog_ = feFixture_.catalog();
   protected final String[][] hintStyles_ = new String[][] {
       new String[] { "/* +", "*/" }, // traditional commented hint
       new String[] { "-- +", "\n" }, // eol commented hint
       new String[] { "[", "]" } // legacy style
   };
 
-  @BeforeClass
-  public static void setUp() throws Exception {
-    RuntimeEnv.INSTANCE.setTestEnv(true);
-  }
-
-  @AfterClass
-  public static void cleanUp() throws Exception {
-    RuntimeEnv.INSTANCE.reset();
-    catalog_.close();
-  }
-
   // Adds a Udf: default.name(args) to the catalog.
   // TODO: we could consider having this be the sql to run instead but that 
requires
   // connecting to the BE.
@@ -115,23 +81,11 @@ public class FrontendTestBase {
 
   protected Function addTestFunction(String db, String fnName,
       ArrayList<ScalarType> args, boolean varArgs) {
-    ArrayList<Type> argTypes = Lists.newArrayList();
-    argTypes.addAll(args);
-    Function fn = ScalarFunction.createForTesting(
-        db, fnName, argTypes, Type.INT, "/Foo", "Foo.class", null,
-        null, TFunctionBinaryType.NATIVE);
-    fn.setHasVarArgs(varArgs);
-    catalog_.addFunction(fn);
-    return fn;
+    return feFixture_.addTestFunction(db, fnName, args, varArgs);
   }
 
   protected void addTestUda(String name, Type retType, Type... argTypes) {
-    FunctionName fnName = new FunctionName("default", name);
-    catalog_.addFunction(
-        AggregateFunction.createForTesting(
-            fnName, Lists.newArrayList(argTypes), retType, retType,
-            null, "init_fn_symbol", "update_fn_symbol", null, null,
-            null, null, null, TFunctionBinaryType.NATIVE));
+    feFixture_.addTestUda(name, retType, argTypes);
   }
 
   /**
@@ -140,19 +94,7 @@ public class FrontendTestBase {
    * The database is registered in testDbs_ and removed in the @After method.
    */
   protected Db addTestDb(String dbName, String comment) {
-    Db db = catalog_.getDb(dbName);
-    Preconditions.checkState(db == null, "Test db must not already exist.");
-    db = new Db(dbName, new org.apache.hadoop.hive.metastore.api.Database(
-        dbName, comment, "", Collections.<String, String>emptyMap()));
-    catalog_.addDb(db);
-    testDbs_.add(db);
-    return db;
-  }
-
-  protected void clearTestDbs() {
-    for (Db testDb: testDbs_) {
-      catalog_.removeDb(testDb.getName());
-    }
+    return feFixture_.addTestDb(dbName, comment);
   }
 
   /**
@@ -163,46 +105,7 @@ public class FrontendTestBase {
    * The test tables are registered in testTables_ and removed in the @After 
method.
    */
   protected Table addTestTable(String createTableSql) {
-    CreateTableStmt createTableStmt = (CreateTableStmt) 
AnalyzesOk(createTableSql);
-    Db db = catalog_.getDb(createTableStmt.getDb());
-    Preconditions.checkNotNull(db, "Test tables must be created in an existing 
db.");
-    org.apache.hadoop.hive.metastore.api.Table msTbl =
-        CatalogOpExecutor.createMetaStoreTable(createTableStmt.toThrift());
-    Table dummyTable = Table.fromMetastoreTable(db, msTbl);
-    if (dummyTable instanceof HdfsTable) {
-      List<ColumnDef> columnDefs = Lists.newArrayList(
-          createTableStmt.getPartitionColumnDefs());
-      dummyTable.setNumClusteringCols(columnDefs.size());
-      columnDefs.addAll(createTableStmt.getColumnDefs());
-      for (int i = 0; i < columnDefs.size(); ++i) {
-        ColumnDef colDef = columnDefs.get(i);
-        dummyTable.addColumn(
-            new Column(colDef.getColName(), colDef.getType(), 
colDef.getComment(), i));
-      }
-      try {
-        HdfsTable hdfsTable = (HdfsTable) dummyTable;
-        hdfsTable.setPrototypePartition(msTbl.getSd());
-      } catch (CatalogException e) {
-        e.printStackTrace();
-        fail("Failed to add test table:\n" + createTableSql);
-      }
-    } else if (dummyTable instanceof KuduTable) {
-      if (!Table.isExternalTable(msTbl)) {
-        fail("Failed to add table, external kudu table expected:\n" + 
createTableSql);
-      }
-      try {
-        KuduTable kuduTable = (KuduTable) dummyTable;
-        kuduTable.loadSchemaFromKudu();
-      } catch (ImpalaRuntimeException e) {
-        e.printStackTrace();
-        fail("Failed to add test table:\n" + createTableSql);
-      }
-    } else {
-      fail("Test table type not supported:\n" + createTableSql);
-    }
-    db.addTable(dummyTable);
-    testTables_.add(dummyTable);
-    return dummyTable;
+    return feFixture_.addTestTable(createTableSql);
   }
 
   /**
@@ -211,7 +114,7 @@ public class FrontendTestBase {
    * Returns the new view.
    */
   protected Table addTestView(String createViewSql) {
-    return addTestView(catalog_, createViewSql);
+    return feFixture_.addTestView(createViewSql);
   }
 
   /**
@@ -220,16 +123,7 @@ public class FrontendTestBase {
    * Returns the new view.
    */
   protected Table addTestView(Catalog catalog, String createViewSql) {
-    CreateViewStmt createViewStmt = (CreateViewStmt) AnalyzesOk(createViewSql);
-    Db db = catalog.getDb(createViewStmt.getDb());
-    Preconditions.checkNotNull(db, "Test views must be created in an existing 
db.");
-    // Do not analyze the stmt to avoid applying rewrites that would alter the 
view
-    // definition. We want to model real views as closely as possible.
-    QueryStmt viewStmt = (QueryStmt) 
ParsesOk(createViewStmt.getInlineViewDef());
-    View dummyView = View.createTestView(db, createViewStmt.getTbl(), 
viewStmt);
-    db.addTable(dummyView);
-    testTables_.add(dummyView);
-    return dummyView;
+    return feFixture_.addTestView(catalog, createViewSql);
   }
 
   protected Table addAllScalarTypesTestTable() {
@@ -242,12 +136,6 @@ public class FrontendTestBase {
       "char_col char (30))");
   }
 
-  protected void clearTestTables() {
-    for (Table testTable: testTables_) {
-      testTable.getDb().removeTable(testTable.getName());
-    }
-  }
-
   /**
    * Inject the hint into the pattern using hint location.
    *
@@ -263,24 +151,11 @@ public class FrontendTestBase {
     return String.format(pattern, oracleHint, defaultHint);
   }
 
-  @After
-  public void tearDown() {
-    clearTestTables();
-    clearTestDbs();
-  }
-
   /**
    * Parse 'stmt' and return the root StatementBase.
    */
   public StatementBase ParsesOk(String stmt) {
-    try {
-      StatementBase node = Parser.parse(stmt);
-      assertNotNull(node);
-      return node;
-    } catch (AnalysisException e) {
-      fail("\nParser error:\n" + e.getMessage());
-      throw new IllegalStateException(); // Keep compiler happy
-    }
+    return feFixture_.parseStmt(stmt);
   }
 
   /**
@@ -303,37 +178,24 @@ public class FrontendTestBase {
   }
 
   protected AnalysisContext createAnalysisCtx() {
-    return createAnalysisCtx(Catalog.DEFAULT_DB);
+    return feFixture_.createAnalysisCtx();
   }
 
   protected AnalysisContext createAnalysisCtx(String defaultDb) {
-    TQueryCtx queryCtx = TestUtils.createQueryContext(
-        defaultDb, System.getProperty("user.name"));
-    EventSequence timeline = new EventSequence("Frontend Test Timeline");
-    AnalysisContext analysisCtx = new AnalysisContext(queryCtx,
-        AuthorizationConfig.createAuthDisabledConfig(), timeline);
-    return analysisCtx;
+    return feFixture_.createAnalysisCtx(defaultDb);
   }
 
   protected AnalysisContext createAnalysisCtx(TQueryOptions queryOptions) {
-    TQueryCtx queryCtx = TestUtils.createQueryContext();
-    queryCtx.client_request.query_options = queryOptions;
-    EventSequence timeline = new EventSequence("Frontend Test Timeline");
-    AnalysisContext analysisCtx = new AnalysisContext(queryCtx,
-        AuthorizationConfig.createAuthDisabledConfig(), timeline);
-    return analysisCtx;
+    return feFixture_.createAnalysisCtx(queryOptions);
   }
 
   protected AnalysisContext createAnalysisCtx(AuthorizationConfig authzConfig) 
{
-    return createAnalysisCtx(authzConfig, System.getProperty("user.name"));
+    return feFixture_.createAnalysisCtx(authzConfig);
   }
 
   protected AnalysisContext createAnalysisCtx(AuthorizationConfig authzConfig,
       String user) {
-    TQueryCtx queryCtx = TestUtils.createQueryContext(Catalog.DEFAULT_DB, 
user);
-    EventSequence timeline = new EventSequence("Frontend Test Timeline");
-    AnalysisContext analysisCtx = new AnalysisContext(queryCtx, authzConfig, 
timeline);
-    return analysisCtx;
+    return feFixture_.createAnalysisCtx(authzConfig, user);
   }
 
   protected AnalysisContext createAnalysisCtxUsingHiveColLabels() {
@@ -347,30 +209,7 @@ public class FrontendTestBase {
    * If 'expectedWarning' is not null, asserts that a warning is produced.
    */
   public ParseNode AnalyzesOk(String stmt, AnalysisContext ctx, String 
expectedWarning) {
-    try {
-      AnalysisResult analysisResult = parseAndAnalyze(stmt, ctx);
-      if (expectedWarning != null) {
-        List<String> actualWarnings = 
analysisResult.getAnalyzer().getWarnings();
-        boolean matchedWarning = false;
-        for (String actualWarning: actualWarnings) {
-          if (actualWarning.startsWith(expectedWarning)) {
-            matchedWarning = true;
-            break;
-          }
-        }
-        if (!matchedWarning) {
-          fail(String.format("Did not produce expected warning.\n"
-                  + "Expected warning:\n%s.\nActual warnings:\n%s\nsql:\n%s",
-              expectedWarning, Joiner.on("\n").join(actualWarnings), stmt));
-        }
-      }
-      Preconditions.checkNotNull(analysisResult.getStmt());
-      return analysisResult.getStmt();
-    } catch (Exception e) {
-      e.printStackTrace();
-      fail("Error during analysis:\n" + e.toString() + "\nsql:\n" + stmt);
-    }
-    return null;
+    return feFixture_.analyzeStmt(stmt, ctx, expectedWarning);
   }
 
   /**
diff --git 
a/fe/src/test/java/org/apache/impala/analysis/AnalysisSessionFixture.java 
b/fe/src/test/java/org/apache/impala/common/QueryFixture.java
similarity index 59%
rename from 
fe/src/test/java/org/apache/impala/analysis/AnalysisSessionFixture.java
rename to fe/src/test/java/org/apache/impala/common/QueryFixture.java
index 900604a..88c9d57 100644
--- a/fe/src/test/java/org/apache/impala/analysis/AnalysisSessionFixture.java
+++ b/fe/src/test/java/org/apache/impala/common/QueryFixture.java
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-package org.apache.impala.analysis;
+package org.apache.impala.common;
 
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.fail;
@@ -23,15 +23,17 @@ import static org.junit.Assert.fail;
 import java.io.StringReader;
 import java.util.List;
 
+import org.apache.impala.analysis.AnalysisContext;
 import org.apache.impala.analysis.AnalysisContext.AnalysisResult;
+import org.apache.impala.analysis.Analyzer;
+import org.apache.impala.analysis.Expr;
+import org.apache.impala.analysis.SelectStmt;
+import org.apache.impala.analysis.SqlParser;
+import org.apache.impala.analysis.SqlScanner;
+import org.apache.impala.analysis.StatementBase;
+import org.apache.impala.analysis.StmtMetadataLoader;
 import org.apache.impala.analysis.StmtMetadataLoader.StmtTableCache;
 import org.apache.impala.authorization.AuthorizationConfig;
-import org.apache.impala.catalog.Catalog;
-import org.apache.impala.common.AnalysisException;
-import org.apache.impala.common.ImpalaException;
-import org.apache.impala.common.InternalException;
-import org.apache.impala.rewrite.ExprRewriteRule;
-import org.apache.impala.service.Frontend;
 import org.apache.impala.testutil.TestUtils;
 import org.apache.impala.thrift.TQueryCtx;
 import org.apache.impala.thrift.TQueryOptions;
@@ -41,73 +43,17 @@ import com.google.common.base.Joiner;
 import com.google.common.base.Preconditions;
 
 /**
- * Session fixture for analyzer tests. Holds state shared across test cases 
such
- * as the frontend, the user, the database, and query options. Queries created
- * from this fixture start with these defaults, but each query can change them
- * as needed for that particular test case.
+ * Base class for per-query processing. This base class encapsulates all the 
inputs
+ * to a query: the session, context, options, db and user, as well as the input
+ * SQL. All inputs, except for the SQL, "inherit" from the session fixture, 
but can
+ * be overriden here. For example, if most tests use the "functional" DB, set 
that
+ * in the session fixture. But, if one particular test needs a different DB, 
you can
+ * set that here.
  *
- * This fixture is analogous to a user session. Though, unlike a real session,
- * test can change the database, options and user per-query without changing
- * the session settings.
- *
- * The session fixture is created once per test file, then query fixtures 
perform
- * the work needed for each particular query. It is often helpful to wrap the
- * query fixtures in a function if the same setup is used over and over.
- * See {@link ExprRewriterTest} for  example usage.
+ * Provides the parse step. Use this class directory for parse-only tests.
+ * Subclasses implement various kinds of analysis operations.
  */
-public class AnalysisSessionFixture {
-
-  /**
-   * Base class for per-query processing. This base class encapsulates all the 
inputs
-   * to a query: the session, context, options, db and user, as well as the 
input
-   * SQL. All inputs, except for the SQL, "inherit" from the session fixture, 
but can
-   * be overriden here. For example, if most tests use the "functional" DB, 
set that
-   * in the session fixture. But, if one particular test needs a different DB, 
you can
-   * set that here.
-   *
-   * Provides the parse step. Use this class directory for parse-only tests.
-   * Subclasses implement various kinds of analysis operations.
-   */
-  public static class QueryFixture {
-    protected final AnalysisSessionFixture session_;
-    protected final TQueryCtx queryCtx_;
-    protected final TQueryOptions queryOptions_;
-    protected String stmtSql_;
-    protected String db_;
-    protected String user_;
-
-    public QueryFixture(AnalysisSessionFixture session, String stmtSql) {
-      session_ = session;
-      stmtSql_ = stmtSql;
-      queryCtx_ = session_.queryContext();
-      queryOptions_ = session_.cloneOptions();
-      db_ = session_.db();
-      user_ = session_.user();
-    }
-
-    public void setDb(String db) { db_ = db; }
-    public void setUser(String user) { user_ = user; }
-    public TQueryCtx context() { return queryCtx_; }
-    public String stmtSql() { return stmtSql_; }
-    public TQueryOptions options() { return queryOptions_; }
-
-    protected TQueryCtx queryContext() {
-      return TestUtils.createQueryContext(db_, user_, queryOptions_);
-    }
-
-    public StatementBase parse() {
-      // TODO: Use the parser class when available
-      SqlScanner input = new SqlScanner(new StringReader(stmtSql_));
-      SqlParser parser = new SqlParser(input);
-      parser.setQueryOptions(queryOptions_);
-      try {
-        return (StatementBase) parser.parse().value;
-      } catch (Exception e) {
-        throw new IllegalStateException(e);
-      }
-    }
-  }
-
+public class QueryFixture {
   /**
    * Full query analysis, including rewrites. Use this for most tests. The
    * {@link #analyze()} method provides the decorated AST after analysis.
@@ -129,7 +75,7 @@ public class AnalysisSessionFixture {
         stmt_ = parse();
         analysisCtx_ = makeAnalysisContext();
         analysisResult_ = analysisCtx_.analyzeAndAuthorize(stmt_,
-            makeTableCache(stmt_), session_.frontend_.getAuthzChecker());
+            makeTableCache(stmt_), session_.frontend().getAuthzChecker());
         Preconditions.checkNotNull(analysisResult_.getStmt());
         return stmt_;
       } catch (AnalysisException e) {
@@ -158,7 +104,7 @@ public class AnalysisSessionFixture {
      */
     protected StmtTableCache makeTableCache(StatementBase stmt) {
       StmtMetadataLoader mdLoader =
-         new StmtMetadataLoader(session_.frontend_, db_, null);
+         new StmtMetadataLoader(session_.frontend(), db_, null);
       try {
         return mdLoader.loadTables(stmt);
       } catch (InternalException e) {
@@ -211,26 +157,6 @@ public class AnalysisSessionFixture {
    * functional.alltypes.
    */
   public static class SelectFixture extends AnalysisFixture {
-
-    /**
-     * Wraps an ExprRewriteRule to count how many times it's been applied.
-     */
-    static class CountingRewriteRuleWrapper implements ExprRewriteRule {
-      int rewrites_;
-      final ExprRewriteRule wrapped_;
-
-      CountingRewriteRuleWrapper(ExprRewriteRule wrapped) {
-        this.wrapped_ = wrapped;
-      }
-
-      @Override
-      public Expr apply(Expr expr, Analyzer analyzer) throws AnalysisException 
{
-        Expr ret = wrapped_.apply(expr, analyzer);
-        if (expr != ret) { rewrites_++; }
-        return ret;
-      }
-    }
-
     public String table_ = "functional.alltypes";
     public String exprSql_;
 
@@ -296,46 +222,41 @@ public class AnalysisSessionFixture {
     }
   }
 
-  private final Frontend frontend_;
-  // Query options to be used for all queries. Can be overriden per-query.
-  private final TQueryOptions queryOptions_;
-  // Default database for all queries.
-  private String db_ = Catalog.DEFAULT_DB;
-  // Default user for all queries.
-  private String user_ = System.getProperty("user.name");
-
-  public AnalysisSessionFixture(Frontend frontend) {
-    frontend_ = frontend;
-    queryOptions_ = new TQueryOptions();
-  }
-
-  public AnalysisSessionFixture setDB(String db) {
-    db_ = db;
-    return this;
-  }
-
-  public AnalysisSessionFixture setUser(String user) {
-    user_ = user;
-    return this;
+  protected final AnalysisSessionFixture session_;
+  protected final TQueryCtx queryCtx_;
+  protected final TQueryOptions queryOptions_;
+  protected String stmtSql_;
+  protected String db_;
+  protected String user_;
+
+  public QueryFixture(AnalysisSessionFixture session, String stmtSql) {
+    session_ = session;
+    stmtSql_ = stmtSql;
+    queryCtx_ = session_.queryContext();
+    queryOptions_ = session_.cloneOptions();
+    db_ = session_.db();
+    user_ = session_.user();
   }
 
+  public void setDb(String db) { db_ = db; }
+  public void setUser(String user) { user_ = user; }
+  public TQueryCtx context() { return queryCtx_; }
+  public String stmtSql() { return stmtSql_; }
   public TQueryOptions options() { return queryOptions_; }
-  public String db() { return db_; }
-  public String user() { return user_; }
 
-  /**
-   * Disable the optional expression rewrites.
-   */
-  public AnalysisSessionFixture disableExprRewrite() {
-    queryOptions_.setEnable_expr_rewrites(false);
-    return this;
-  }
-
-  public TQueryOptions cloneOptions() {
-    return new TQueryOptions(queryOptions_);
+  protected TQueryCtx queryContext() {
+    return TestUtils.createQueryContext(db_, user_, queryOptions_);
   }
 
-  public TQueryCtx queryContext() {
-    return TestUtils.createQueryContext(db_, user_, cloneOptions());
+  public StatementBase parse() {
+    // TODO: Use the parser class when available
+    SqlScanner input = new SqlScanner(new StringReader(stmtSql_));
+    SqlParser parser = new SqlParser(input);
+    parser.setQueryOptions(queryOptions_);
+    try {
+      return (StatementBase) parser.parse().value;
+    } catch (Exception e) {
+      throw new IllegalStateException(e);
+    }
   }
-}
+}
\ No newline at end of file
diff --git a/fe/src/test/java/org/apache/impala/planner/CardinalityTest.java 
b/fe/src/test/java/org/apache/impala/planner/CardinalityTest.java
index 3bbb903..e1e4433 100644
--- a/fe/src/test/java/org/apache/impala/planner/CardinalityTest.java
+++ b/fe/src/test/java/org/apache/impala/planner/CardinalityTest.java
@@ -93,6 +93,56 @@ public class CardinalityTest extends PlannerTestBase {
         "SELECT COUNT(*) FROM functional.alltypes GROUP BY bool_col", 2);
   }
 
+  /**
+   * Test tables with all-null columns. Test need for IMPALA-7310, NDV of an
+   * all-null column should be 1.
+   */
+  @Test
+  public void testNulls() {
+    verifyCardinality("SELECT null_int FROM functional.nullrows", 26);
+    // a has unique values, so NDV = 26, card = 26/26 = 1
+    verifyCardinality("SELECT null_int FROM functional.nullrows WHERE id = 
'x'", 1);
+    // f repeats for 5 rows, so NDV=7, 26/7 =~ 4
+    verifyCardinality("SELECT null_int FROM functional.nullrows WHERE 
group_str = 'x'",
+        4);
+    // Revised use of nulls per IMPALA-7310
+    // null_str is all nulls, NDV = 1, selectivity = 1/1, cardinality = 26
+    // BUG: At present selectivity is assumed to be 0.1
+    //verifyCardinality(
+    //      "SELECT null_int FROM functional.nullrows WHERE null_str = 'x'", 
26);
+    verifyCardinality("SELECT null_int FROM functional.nullrows WHERE null_str 
= 'x'",
+        3);
+  }
+
+  @Test
+  public void testGroupBy() {
+    String baseStmt = "SELECT COUNT(*) " +
+                      "FROM functional.nullrows " +
+                      "GROUP BY ";
+    // NDV(a) = 26
+    verifyCardinality(baseStmt + "id", 26);
+    // f has NDV=3
+    verifyCardinality(baseStmt + "group_str", 6);
+    // b has NDV=1 (plus 1 for nulls)
+    // Bug: Nulls not counted in NDV
+    //verifyCardinality(baseStmt + "blank", 2);
+    verifyCardinality(baseStmt + "blank", 1);
+    // c is all nulls
+    // Bug: Nulls not counted in NDV
+    //verifyCardinality(baseStmt + "null_str", 1);
+    verifyCardinality(baseStmt + "null_str", 0);
+    // NDV(a) * ndv(c) = 26 * 1 = 26
+    // Bug: Nulls not counted in NDV
+    //verifyCardinality(baseStmt + "id, null_str", 26);
+    verifyCardinality(baseStmt + "id, null_str", 0);
+    // NDV(a) * ndv(f) = 26 * 3 = 78, capped at row count = 26
+    verifyCardinality(baseStmt + "id, group_str", 26);
+  }
+
+  /**
+   * Compute join cardinality using a table without stats. We estimate row 
count.
+   * Combine with an all-nulls column.
+   */
   @Test
   public void testNullColumnJoinCardinality() throws ImpalaException {
     // IMPALA-7565: Make sure there is no division by zero during cardinality 
calculation
@@ -103,6 +153,43 @@ public class CardinalityTest extends PlannerTestBase {
   }
 
   /**
+   * Compute join cardinality using a table with stats.
+   * Focus on an all-nulls column.
+   */
+  @Test
+  public void testJoinWithStats() {
+    // NDV multiplied out on group by
+    verifyCardinality(
+        "SELECT null_int FROM functional.alltypes, functional.nullrows", 7300 
* 26);
+    // With that as the basis, add a GROUP BY
+    String baseStmt = "SELECT COUNT(*) " +
+                      "FROM functional.alltypes, functional.nullrows " +
+                      "GROUP BY ";
+    // Unique values, one group per row
+    verifyCardinality(baseStmt + "alltypes.id", 7300);
+    // NDV(id) = 26
+    verifyCardinality(baseStmt + "nullrows.id", 26);
+    // blank has NDV=1, but adjust for nulls
+    // Bug: Nulls not counted in NDV
+    //verifyCardinality(baseStmt + "blank", 2);
+    verifyCardinality(baseStmt + "blank", 1);
+    // group_str has NDV=6
+    verifyCardinality(baseStmt + "group_str", 6);
+    // null_str is all nulls
+    // Bug: Nulls not counted in NDV
+    //verifyCardinality(baseStmt + "null_str", 1);
+    verifyCardinality(baseStmt + "null_str", 0);
+    // NDV(id) = 26 * ndv(null_str) = 1
+    // Bug: Nulls not counted in NDV
+    // Here and for similar bugs: see IMPALA-7310 and IMPALA-8094
+    //verifyCardinality(baseStmt + "id, null_str", 26);
+    verifyCardinality(baseStmt + "nullrows.id, null_str", 0);
+    // NDV(id) = 26 * ndv(group_str) = 156
+    // Planner does not know that id determines group_str
+    verifyCardinality(baseStmt + "nullrows.id, group_str", 156);
+  }
+
+  /**
    * Joins should multiply out cardinalities.
    */
   @Test
diff --git a/testdata/NullRows/data.csv b/testdata/NullRows/data.csv
new file mode 100644
index 0000000..9d43668
--- /dev/null
+++ b/testdata/NullRows/data.csv
@@ -0,0 +1,26 @@
+a,,\N,\N,\N,a,a,true
+b,,\N,\N,\N,a,\N,false
+c,,\N,\N,\N,a,\N,\N
+d,,\N,\N,\N,a,\N,\N
+e,,\N,\N,\N,a,\N,\N
+f,,\N,\N,\N,f,f,true
+g,,\N,\N,\N,f,\N,false
+h,,\N,\N,\N,f,\N,\N
+i,,\N,\N,\N,f,\N,\N
+j,,\N,\N,\N,f,\N,\N
+k,,\N,\N,\N,k,k,true
+l,,\N,\N,\N,k,\N,false
+m,,\N,\N,\N,k,\N,\N
+n,,\N,\N,\N,k,\N,\N
+o,,\N,\N,\N,k,\N,\N
+p,,\N,\N,\N,p,p,true
+q,,\N,\N,\N,p,\N,false
+r,,\N,\N,\N,p,\N,\N
+s,,\N,\N,\N,p,\N,\N
+t,,\N,\N,\N,p,\N,\N
+u,,\N,\N,\N,u,u,true
+v,,\N,\N,\N,u,\N,false
+w,,\N,\N,\N,u,\N,\N
+x,,\N,\N,\N,u,\N,\N
+y,,\N,\N,\N,u,\N,\N
+z,,\N,\N,\N,z,z,true
diff --git a/testdata/bin/compute-table-stats.sh 
b/testdata/bin/compute-table-stats.sh
index d6e6d22..5bff6b1 100755
--- a/testdata/bin/compute-table-stats.sh
+++ b/testdata/bin/compute-table-stats.sh
@@ -33,7 +33,7 @@ 
COMPUTE_STATS_SCRIPT="${IMPALA_HOME}/tests/util/compute_table_stats.py --impalad
 # Run compute stats over as many of the tables used in the Planner tests as 
possible.
 ${COMPUTE_STATS_SCRIPT} --db_names=functional\
     
--table_names="alltypes,alltypesagg,alltypesaggmultifilesnopart,alltypesaggnonulls,
-    alltypessmall,alltypestiny,jointbl,dimtbl,stringpartitionkey,nulltable"
+    
alltypessmall,alltypestiny,jointbl,dimtbl,stringpartitionkey,nulltable,nullrows"
 
 # We cannot load HBase on s3 and isilon yet.
 if [ "${TARGET_FILESYSTEM}" = "hdfs" ]; then
diff --git a/testdata/datasets/functional/functional_schema_template.sql 
b/testdata/datasets/functional/functional_schema_template.sql
index a21bdc8..9cdf3d1 100644
--- a/testdata/datasets/functional/functional_schema_template.sql
+++ b/testdata/datasets/functional/functional_schema_template.sql
@@ -1350,6 +1350,30 @@ create table {db_name}{db_suffix}.{table_name} (
 partition by hash(a) partitions 3 stored as kudu;
 ====
 ---- DATASET
+-- Table with varying ratios of nulls. Used to test NDV with nulls
+-- Also useful to test null counts as the count varies from 0 to
+-- some to all rows.
+functional
+---- BASE_TABLE_NAME
+nullrows
+---- COLUMNS
+id string
+blank string
+null_str string
+null_int int
+null_double double
+group_str string
+some_nulls string
+bool_nulls boolean
+---- ROW_FORMAT
+delimited fields terminated by ','
+---- DEPENDENT_LOAD
+INSERT OVERWRITE TABLE {db_name}{db_suffix}.{table_name} select * from 
functional.nullrows;
+---- LOAD
+LOAD DATA LOCAL INPATH '{impala_home}/testdata/NullRows/data.csv'
+OVERWRITE INTO TABLE {db_name}{db_suffix}.{table_name};
+====
+---- DATASET
 functional
 ---- BASE_TABLE_NAME
 nullescapedtable
diff --git a/testdata/datasets/functional/schema_constraints.csv 
b/testdata/datasets/functional/schema_constraints.csv
index 1ce6ce3..bc43297 100644
--- a/testdata/datasets/functional/schema_constraints.csv
+++ b/testdata/datasets/functional/schema_constraints.csv
@@ -203,6 +203,7 @@ table_name:tinytable, constraint:only, 
table_format:kudu/none/none
 table_name:tinyinttable, constraint:only, table_format:kudu/none/none
 table_name:zipcode_incomes, constraint:only, table_format:kudu/none/none
 table_name:nulltable, constraint:only, table_format:kudu/none/none
+table_name:nullrows, constraint:only, table_format:kudu/none/none
 table_name:nullescapedtable, constraint:only, table_format:kudu/none/none
 table_name:decimal_tbl, constraint:only, table_format:kudu/none/none
 table_name:decimal_rtf_tbl, constraint:only, table_format:kudu/none/none

[impala] 04/09: IMPALA-8095: Detailed expression cardinality tests

Reply via email to