IMPALA-3940: Fix getting column stats through views. The bug: During join ordering we rely on the column stats of join predicates for estimating the join cardinality. We have code that tries to find the stats of a column through views but there was a bug in identifying slots that belong to base table scans. The bug lead us to incorrectly accept slots of view references which do not have stats.
This patch fixes the above issue and adds new test infrastructure for creating test-local views. It adds a TPCH-equivalent database that contains views of the form "select * from tpch_basetbl" for all TPCH tables and add tests the plans of all TPCH queries on the view database. Change-Id: Ie3b62a5e7e7d0e84850749108c13991647cedce6 Reviewed-on: http://gerrit.cloudera.org:8080/3865 Reviewed-by: Alex Behm <[email protected]> Tested-by: Internal Jenkins Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/286da592 Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/286da592 Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/286da592 Branch: refs/heads/master Commit: 286da59219f322ce99563537214e3bb30c0fa8c5 Parents: 88b89b8 Author: Alex Behm <[email protected]> Authored: Mon Aug 8 10:27:09 2016 -0700 Committer: Internal Jenkins <[email protected]> Committed: Thu Aug 11 08:22:30 2016 +0000 ---------------------------------------------------------------------- .../impala/analysis/SlotDescriptor.java | 2 +- .../java/com/cloudera/impala/catalog/View.java | 15 +- .../impala/analysis/AnalyzeAuthStmtsTest.java | 3 +- .../impala/analysis/AnalyzeDDLTest.java | 3 +- .../cloudera/impala/analysis/AnalyzerTest.java | 334 +--- .../com/cloudera/impala/analysis/ToSqlTest.java | 3 +- .../impala/common/FrontendTestBase.java | 316 ++++ .../cloudera/impala/planner/PlannerTest.java | 16 +- .../impala/planner/PlannerTestBase.java | 10 +- .../queries/PlannerTest/joins.test | 50 +- .../queries/PlannerTest/tpcds-all.test | 335 ++-- .../queries/PlannerTest/tpch-all.test | 160 +- .../queries/PlannerTest/tpch-views.test | 1473 ++++++++++++++++++ 13 files changed, 2145 insertions(+), 575 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/286da592/fe/src/main/java/com/cloudera/impala/analysis/SlotDescriptor.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/com/cloudera/impala/analysis/SlotDescriptor.java b/fe/src/main/java/com/cloudera/impala/analysis/SlotDescriptor.java index dea7789..7850a0e 100644 --- a/fe/src/main/java/com/cloudera/impala/analysis/SlotDescriptor.java +++ b/fe/src/main/java/com/cloudera/impala/analysis/SlotDescriptor.java @@ -141,7 +141,7 @@ public class SlotDescriptor { public Path getPath() { return path_; } - public boolean isScanSlot() { return path_ != null; } + public boolean isScanSlot() { return path_ != null && path_.isRootedAtTable(); } public Column getColumn() { return !isScanSlot() ? null : path_.destColumn(); } http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/286da592/fe/src/main/java/com/cloudera/impala/catalog/View.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/com/cloudera/impala/catalog/View.java b/fe/src/main/java/com/cloudera/impala/catalog/View.java index 24f79b7..f062172 100644 --- a/fe/src/main/java/com/cloudera/impala/catalog/View.java +++ b/fe/src/main/java/com/cloudera/impala/catalog/View.java @@ -18,7 +18,6 @@ package com.cloudera.impala.catalog; import java.io.StringReader; -import java.util.ArrayList; import java.util.List; import java.util.Set; @@ -89,6 +88,16 @@ public class View extends Table { colLabels_ = colLabels; } + /** + * Creates a view for testig purposes. + */ + private View(Db db, String name, QueryStmt queryStmt) { + super(null, null, db, name, null); + isLocalView_ = false; + queryStmt_ = queryStmt; + colLabels_ = null; + } + @Override public void load(boolean reuseMetadata, HiveMetaStoreClient client, org.apache.hadoop.hive.metastore.api.Table msTbl) throws TableLoadingException { @@ -191,4 +200,8 @@ public class View extends Table { view.setTable_type(TTableType.VIEW); return view; } + + public static View createTestView(Db db, String name, QueryStmt viewDefStmt) { + return new View(db, name, viewDefStmt); + } } http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/286da592/fe/src/test/java/com/cloudera/impala/analysis/AnalyzeAuthStmtsTest.java ---------------------------------------------------------------------- diff --git a/fe/src/test/java/com/cloudera/impala/analysis/AnalyzeAuthStmtsTest.java b/fe/src/test/java/com/cloudera/impala/analysis/AnalyzeAuthStmtsTest.java index 2f54608..ec12ab8 100644 --- a/fe/src/test/java/com/cloudera/impala/analysis/AnalyzeAuthStmtsTest.java +++ b/fe/src/test/java/com/cloudera/impala/analysis/AnalyzeAuthStmtsTest.java @@ -30,8 +30,7 @@ import com.cloudera.impala.thrift.TQueryCtx; public class AnalyzeAuthStmtsTest extends AnalyzerTest { public AnalyzeAuthStmtsTest() throws AnalysisException { - analyzer_ = createAnalyzer(Catalog.DEFAULT_DB); - analyzer_.getCatalog().getAuthPolicy().addRole( + catalog_.getAuthPolicy().addRole( new Role("myRole", new HashSet<String>())); } http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/286da592/fe/src/test/java/com/cloudera/impala/analysis/AnalyzeDDLTest.java ---------------------------------------------------------------------- diff --git a/fe/src/test/java/com/cloudera/impala/analysis/AnalyzeDDLTest.java b/fe/src/test/java/com/cloudera/impala/analysis/AnalyzeDDLTest.java index f79f246..e89a7e0 100644 --- a/fe/src/test/java/com/cloudera/impala/analysis/AnalyzeDDLTest.java +++ b/fe/src/test/java/com/cloudera/impala/analysis/AnalyzeDDLTest.java @@ -46,6 +46,7 @@ import com.cloudera.impala.catalog.StructType; import com.cloudera.impala.catalog.Type; import com.cloudera.impala.common.AnalysisException; import com.cloudera.impala.common.FileSystemUtil; +import com.cloudera.impala.common.FrontendTestBase; import com.cloudera.impala.common.RuntimeEnv; import com.cloudera.impala.testutil.TestUtils; import com.cloudera.impala.util.MetaStoreUtil; @@ -53,7 +54,7 @@ import com.google.common.base.Joiner; import com.google.common.base.Preconditions; import com.google.common.collect.Lists; -public class AnalyzeDDLTest extends AnalyzerTest { +public class AnalyzeDDLTest extends FrontendTestBase { @Test public void TestAlterTableAddDropPartition() throws CatalogException { http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/286da592/fe/src/test/java/com/cloudera/impala/analysis/AnalyzerTest.java ---------------------------------------------------------------------- diff --git a/fe/src/test/java/com/cloudera/impala/analysis/AnalyzerTest.java b/fe/src/test/java/com/cloudera/impala/analysis/AnalyzerTest.java index 8b65c7c..815279c 100644 --- a/fe/src/test/java/com/cloudera/impala/analysis/AnalyzerTest.java +++ b/fe/src/test/java/com/cloudera/impala/analysis/AnalyzerTest.java @@ -17,55 +17,27 @@ package com.cloudera.impala.analysis; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.fail; - -import java.io.StringReader; import java.util.ArrayList; -import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; -import org.junit.After; import org.junit.Assert; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.cloudera.impala.authorization.AuthorizationConfig; -import com.cloudera.impala.catalog.AggregateFunction; -import com.cloudera.impala.catalog.Catalog; -import com.cloudera.impala.catalog.Column; -import com.cloudera.impala.catalog.Db; import com.cloudera.impala.catalog.Function; -import com.cloudera.impala.catalog.HdfsTable; -import com.cloudera.impala.catalog.ImpaladCatalog; import com.cloudera.impala.catalog.PrimitiveType; -import com.cloudera.impala.catalog.ScalarFunction; import com.cloudera.impala.catalog.ScalarType; -import com.cloudera.impala.catalog.Table; import com.cloudera.impala.catalog.Type; import com.cloudera.impala.common.AnalysisException; -import com.cloudera.impala.testutil.ImpaladTestCatalog; -import com.cloudera.impala.testutil.TestUtils; +import com.cloudera.impala.common.FrontendTestBase; import com.cloudera.impala.thrift.TExpr; -import com.cloudera.impala.thrift.TFunctionBinaryType; -import com.cloudera.impala.thrift.TQueryCtx; -import com.cloudera.impala.thrift.TQueryOptions; -import com.google.common.base.Joiner; import com.google.common.base.Preconditions; -import com.google.common.collect.Lists; -public class AnalyzerTest { +public class AnalyzerTest extends FrontendTestBase { protected final static Logger LOG = LoggerFactory.getLogger(AnalyzerTest.class); - protected static ImpaladCatalog catalog_ = new ImpaladTestCatalog(); - - // Test-local list of test databases and tables. These are cleaned up in @After. - protected final List<Db> testDbs_ = Lists.newArrayList(); - protected final List<Table> testTables_ = Lists.newArrayList(); - - protected Analyzer analyzer_; // maps from type to string that will result in literal of that type protected static Map<ScalarType, String> typeToLiteralValue_ = @@ -86,117 +58,6 @@ public class AnalyzerTest { typeToLiteralValue_.put(Type.NULL, "NULL"); } - protected Analyzer createAnalyzer(String defaultDb) { - TQueryCtx queryCtx = - TestUtils.createQueryContext(defaultDb, System.getProperty("user.name")); - return new Analyzer(catalog_, queryCtx, - AuthorizationConfig.createAuthDisabledConfig()); - } - - protected Analyzer createAnalyzer(TQueryOptions queryOptions) { - TQueryCtx queryCtx = TestUtils.createQueryContext(); - queryCtx.request.query_options = queryOptions; - return new Analyzer(catalog_, queryCtx, - AuthorizationConfig.createAuthDisabledConfig()); - } - - protected Analyzer createAnalyzerUsingHiveColLabels() { - Analyzer analyzer = createAnalyzer(Catalog.DEFAULT_DB); - analyzer.setUseHiveColLabels(true); - return analyzer; - } - - // Adds a Udf: default.name(args) to the catalog. - // TODO: we could consider having this be the sql to run instead but that requires - // connecting to the BE. - protected Function addTestFunction(String name, - ArrayList<ScalarType> args, boolean varArgs) { - return addTestFunction("default", name, args, varArgs); - } - - protected Function addTestFunction(String name, - ScalarType arg, boolean varArgs) { - return addTestFunction("default", name, Lists.newArrayList(arg), varArgs); - } - - protected Function addTestFunction(String db, String fnName, - ArrayList<ScalarType> args, boolean varArgs) { - ArrayList<Type> argTypes = Lists.newArrayList(); - argTypes.addAll(args); - Function fn = ScalarFunction.createForTesting( - db, fnName, argTypes, Type.INT, "/Foo", "Foo.class", null, - null, TFunctionBinaryType.NATIVE); - fn.setHasVarArgs(varArgs); - catalog_.addFunction(fn); - return fn; - } - - protected void addTestUda(String name, Type retType, Type... argTypes) { - FunctionName fnName = new FunctionName("default", name); - catalog_.addFunction( - AggregateFunction.createForTesting( - fnName, Lists.newArrayList(argTypes), retType, retType, - null, "init_fn_symbol", "update_fn_symbol", null, null, - null, null, null, TFunctionBinaryType.NATIVE)); - } - - /** - * Add a new dummy database with the given name to the catalog. - * Returns the new dummy database. - * The database is registered in testDbs_ and removed in the @After method. - */ - protected Db addTestDb(String dbName, String comment) { - Db db = catalog_.getDb(dbName); - Preconditions.checkState(db == null, "Test db must not already exist."); - db = new Db(dbName, catalog_, new org.apache.hadoop.hive.metastore.api.Database( - dbName, comment, "", Collections.<String, String>emptyMap())); - catalog_.addDb(db); - testDbs_.add(db); - return db; - } - - protected void clearTestDbs() { - for (Db testDb: testDbs_) { - catalog_.removeDb(testDb.getName()); - } - } - - /** - * Add a new dummy table to the catalog based on the given CREATE TABLE sql. - * The dummy table only has the column definitions and no other metadata. - * Returns the new dummy table. - * The test tables are registered in testTables_ and removed in the @After method. - */ - protected Table addTestTable(String createTableSql) { - CreateTableStmt createTableStmt = (CreateTableStmt) AnalyzesOk(createTableSql); - // Currently does not support partitioned tables. - Preconditions.checkState(createTableStmt.getPartitionColumnDefs().isEmpty()); - Db db = catalog_.getDb(createTableStmt.getDb()); - Preconditions.checkNotNull(db, "Test tables must be created in an existing db."); - HdfsTable dummyTable = new HdfsTable(null, null, db, createTableStmt.getTbl(), - createTableStmt.getOwner()); - List<ColumnDef> columnDefs = createTableStmt.getColumnDefs(); - for (int i = 0; i < columnDefs.size(); ++i) { - ColumnDef colDef = columnDefs.get(i); - dummyTable.addColumn(new Column(colDef.getColName(), colDef.getType(), i)); - } - db.addTable(dummyTable); - testTables_.add(dummyTable); - return dummyTable; - } - - protected void clearTestTables() { - for (Table testTable: testTables_) { - testTable.getDb().removeTable(testTable.getName()); - } - } - - @After - public void tearDown() { - clearTestTables(); - clearTestDbs(); - } - /** * Check whether SelectStmt components can be converted to thrift. */ @@ -239,123 +100,6 @@ public class AnalyzerTest { } /** - * Parse 'stmt' and return the root ParseNode. - */ - public ParseNode ParsesOk(String stmt) { - SqlScanner input = new SqlScanner(new StringReader(stmt)); - SqlParser parser = new SqlParser(input); - ParseNode node = null; - try { - node = (ParseNode) parser.parse().value; - } catch (Exception e) { - e.printStackTrace(); - fail("\nParser error:\n" + parser.getErrorMsg(stmt)); - } - assertNotNull(node); - return node; - } - - /** - * Analyze 'stmt', expecting it to pass. Asserts in case of analysis error. - */ - public ParseNode AnalyzesOk(String stmt) { - return AnalyzesOk(stmt, createAnalyzer(Catalog.DEFAULT_DB), null); - } - - /** - * Analyze 'stmt', expecting it to pass. Asserts in case of analysis error. - * If 'expectedWarning' is not null, asserts that a warning is produced. - */ - public ParseNode AnalyzesOk(String stmt, String expectedWarning) { - return AnalyzesOk(stmt, createAnalyzer(Catalog.DEFAULT_DB), expectedWarning); - } - - /** - * Analyze 'stmt', expecting it to pass. Asserts in case of analysis error. - * If 'expectedWarning' is not null, asserts that a warning is produced. - */ - public ParseNode AnalyzesOk(String stmt, Analyzer analyzer, String expectedWarning) { - try { - analyzer_ = analyzer; - AnalysisContext analysisCtx = new AnalysisContext(catalog_, - TestUtils.createQueryContext(Catalog.DEFAULT_DB, - System.getProperty("user.name")), - AuthorizationConfig.createAuthDisabledConfig()); - analysisCtx.analyze(stmt, analyzer); - AnalysisContext.AnalysisResult analysisResult = analysisCtx.getAnalysisResult(); - if (expectedWarning != null) { - List<String> actualWarnings = analysisResult.getAnalyzer().getWarnings(); - boolean matchedWarning = false; - for (String actualWarning: actualWarnings) { - if (actualWarning.startsWith(expectedWarning)) { - matchedWarning = true; - break; - } - } - if (!matchedWarning) { - fail(String.format("Did not produce expected warning.\n" + - "Expected warning:\n%s.\nActual warnings:\n%s", - expectedWarning, Joiner.on("\n").join(actualWarnings))); - } - } - Preconditions.checkNotNull(analysisResult.getStmt()); - return analysisResult.getStmt(); - } catch (Exception e) { - e.printStackTrace(); - fail("Error:\n" + e.toString()); - } - return null; - } - - /** - * Asserts if stmt passes analysis. - */ - public void AnalysisError(String stmt) { - AnalysisError(stmt, null); - } - - /** - * Analyze 'stmt', expecting it to pass. Asserts in case of analysis error. - */ - public ParseNode AnalyzesOk(String stmt, Analyzer analyzer) { - return AnalyzesOk(stmt, analyzer, null); - } - - /** - * Asserts if stmt passes analysis or the error string doesn't match and it - * is non-null. - */ - public void AnalysisError(String stmt, String expectedErrorString) { - AnalysisError(stmt, createAnalyzer(Catalog.DEFAULT_DB), expectedErrorString); - } - - /** - * Asserts if stmt passes analysis or the error string doesn't match and it - * is non-null. - */ - public void AnalysisError(String stmt, Analyzer analyzer, String expectedErrorString) { - Preconditions.checkNotNull(expectedErrorString, "No expected error message given."); - LOG.info("processing " + stmt); - try { - AnalysisContext analysisCtx = new AnalysisContext(catalog_, - TestUtils.createQueryContext(Catalog.DEFAULT_DB, - System.getProperty("user.name")), - AuthorizationConfig.createAuthDisabledConfig()); - analysisCtx.analyze(stmt, analyzer); - AnalysisContext.AnalysisResult analysisResult = analysisCtx.getAnalysisResult(); - Preconditions.checkNotNull(analysisResult.getStmt()); - } catch (Exception e) { - String errorString = e.getMessage(); - Preconditions.checkNotNull(errorString, "Stack trace lost during exception."); - Assert.assertTrue( - "got error:\n" + errorString + "\nexpected:\n" + expectedErrorString, - errorString.startsWith(expectedErrorString)); - return; - } - fail("Stmt didn't result in analysis error: " + stmt); - } - - /** * Generates and analyzes two variants of the given query by replacing all occurrences * of "$TBL" in the query string with the unqualified and fully-qualified version of * the given table name. The unqualified variant is analyzed using an analyzer that has @@ -426,28 +170,30 @@ public class AnalyzerTest { } private void testSelectStar() throws AnalysisException { - AnalyzesOk("select * from functional.AllTypes"); - DescriptorTable descTbl = analyzer_.getDescTbl(); + SelectStmt stmt = (SelectStmt) AnalyzesOk("select * from functional.AllTypes"); + Analyzer analyzer = stmt.getAnalyzer(); + DescriptorTable descTbl = analyzer.getDescTbl(); TupleDescriptor tupleD = descTbl.getTupleDesc(new TupleId(0)); for (SlotDescriptor slotD: tupleD.getSlots()) { slotD.setIsMaterialized(true); } descTbl.computeMemLayout(); Assert.assertEquals(97.0f, tupleD.getAvgSerializedSize(), 0.0); - checkLayoutParams("functional.alltypes.bool_col", 1, 2, 0, 0); - checkLayoutParams("functional.alltypes.tinyint_col", 1, 3, 0, 1); - checkLayoutParams("functional.alltypes.smallint_col", 2, 4, 0, 2); - checkLayoutParams("functional.alltypes.id", 4, 8, 0, 3); - checkLayoutParams("functional.alltypes.int_col", 4, 12, 0, 4); - checkLayoutParams("functional.alltypes.float_col", 4, 16, 0, 5); - checkLayoutParams("functional.alltypes.year", 4, 20, 0, 6); - checkLayoutParams("functional.alltypes.month", 4, 24, 0, 7); - checkLayoutParams("functional.alltypes.bigint_col", 8, 32, 1, 0); - checkLayoutParams("functional.alltypes.double_col", 8, 40, 1, 1); + checkLayoutParams("functional.alltypes.bool_col", 1, 2, 0, 0, analyzer); + checkLayoutParams("functional.alltypes.tinyint_col", 1, 3, 0, 1, analyzer); + checkLayoutParams("functional.alltypes.smallint_col", 2, 4, 0, 2, analyzer); + checkLayoutParams("functional.alltypes.id", 4, 8, 0, 3, analyzer); + checkLayoutParams("functional.alltypes.int_col", 4, 12, 0, 4, analyzer); + checkLayoutParams("functional.alltypes.float_col", 4, 16, 0, 5, analyzer); + checkLayoutParams("functional.alltypes.year", 4, 20, 0, 6, analyzer); + checkLayoutParams("functional.alltypes.month", 4, 24, 0, 7, analyzer); + checkLayoutParams("functional.alltypes.bigint_col", 8, 32, 1, 0, analyzer); + checkLayoutParams("functional.alltypes.double_col", 8, 40, 1, 1, analyzer); int strSlotSize = PrimitiveType.STRING.getSlotSize(); - checkLayoutParams("functional.alltypes.date_string_col", strSlotSize, 48, 1, 2); + checkLayoutParams("functional.alltypes.date_string_col", + strSlotSize, 48, 1, 2, analyzer); checkLayoutParams("functional.alltypes.string_col", - strSlotSize, 48 + strSlotSize, 1, 3); + strSlotSize, 48 + strSlotSize, 1, 3, analyzer); } private void testNonNullable() throws AnalysisException { @@ -455,8 +201,9 @@ public class AnalyzerTest { // (byte range : data) // 0 - 7: count(int_col) // 8 - 15: count(*) - AnalyzesOk("select count(int_col), count(*) from functional.AllTypes"); - DescriptorTable descTbl = analyzer_.getDescTbl(); + SelectStmt stmt = (SelectStmt) AnalyzesOk( + "select count(int_col), count(*) from functional.AllTypes"); + DescriptorTable descTbl = stmt.getAnalyzer().getDescTbl(); TupleDescriptor aggDesc = descTbl.getTupleDesc(new TupleId(1)); for (SlotDescriptor slotD: aggDesc.getSlots()) { slotD.setIsMaterialized(true); @@ -475,8 +222,9 @@ public class AnalyzerTest { // 1 - 7: padded bytes // 8 - 15: sum(int_col) // 16 - 23: count(*) - AnalyzesOk("select sum(int_col), count(*) from functional.AllTypes"); - DescriptorTable descTbl = analyzer_.getDescTbl(); + SelectStmt stmt = (SelectStmt) AnalyzesOk( + "select sum(int_col), count(*) from functional.AllTypes"); + DescriptorTable descTbl = stmt.getAnalyzer().getDescTbl(); TupleDescriptor aggDesc = descTbl.getTupleDesc(new TupleId(1)); for (SlotDescriptor slotD: aggDesc.getSlots()) { slotD.setIsMaterialized(true); @@ -492,8 +240,9 @@ public class AnalyzerTest { * Tests that computeMemLayout() ignores non-materialized slots. */ private void testNonMaterializedSlots() throws AnalysisException { - AnalyzesOk("select * from functional.alltypes"); - DescriptorTable descTbl = analyzer_.getDescTbl(); + SelectStmt stmt = (SelectStmt) AnalyzesOk("select * from functional.alltypes"); + Analyzer analyzer = stmt.getAnalyzer(); + DescriptorTable descTbl = analyzer.getDescTbl(); TupleDescriptor tupleD = descTbl.getTupleDesc(new TupleId(0)); ArrayList<SlotDescriptor> slots = tupleD.getSlots(); for (SlotDescriptor slotD: slots) { @@ -507,20 +256,21 @@ public class AnalyzerTest { descTbl.computeMemLayout(); Assert.assertEquals(68.0f, tupleD.getAvgSerializedSize(), 0.0); // Check non-materialized slots. - checkLayoutParams("functional.alltypes.id", 0, -1, 0, 0); - checkLayoutParams("functional.alltypes.double_col", 0, -1, 0, 0); - checkLayoutParams("functional.alltypes.string_col", 0, -1, 0, 0); + checkLayoutParams("functional.alltypes.id", 0, -1, 0, 0, analyzer); + checkLayoutParams("functional.alltypes.double_col", 0, -1, 0, 0, analyzer); + checkLayoutParams("functional.alltypes.string_col", 0, -1, 0, 0, analyzer); // Check materialized slots. - checkLayoutParams("functional.alltypes.bool_col", 1, 2, 0, 0); - checkLayoutParams("functional.alltypes.tinyint_col", 1, 3, 0, 1); - checkLayoutParams("functional.alltypes.smallint_col", 2, 4, 0, 2); - checkLayoutParams("functional.alltypes.int_col", 4, 8, 0, 3); - checkLayoutParams("functional.alltypes.float_col", 4, 12, 0, 4); - checkLayoutParams("functional.alltypes.year", 4, 16, 0, 5); - checkLayoutParams("functional.alltypes.month", 4, 20, 0, 6); - checkLayoutParams("functional.alltypes.bigint_col", 8, 24, 0, 7); + checkLayoutParams("functional.alltypes.bool_col", 1, 2, 0, 0, analyzer); + checkLayoutParams("functional.alltypes.tinyint_col", 1, 3, 0, 1, analyzer); + checkLayoutParams("functional.alltypes.smallint_col", 2, 4, 0, 2, analyzer); + checkLayoutParams("functional.alltypes.int_col", 4, 8, 0, 3, analyzer); + checkLayoutParams("functional.alltypes.float_col", 4, 12, 0, 4, analyzer); + checkLayoutParams("functional.alltypes.year", 4, 16, 0, 5, analyzer); + checkLayoutParams("functional.alltypes.month", 4, 20, 0, 6, analyzer); + checkLayoutParams("functional.alltypes.bigint_col", 8, 24, 0, 7, analyzer); int strSlotSize = PrimitiveType.STRING.getSlotSize(); - checkLayoutParams("functional.alltypes.date_string_col", strSlotSize, 32, 1, 0); + checkLayoutParams("functional.alltypes.date_string_col", + strSlotSize, 32, 1, 0, analyzer); } private void checkLayoutParams(SlotDescriptor d, int byteSize, int byteOffset, @@ -532,8 +282,8 @@ public class AnalyzerTest { } private void checkLayoutParams(String colAlias, int byteSize, int byteOffset, - int nullIndicatorByte, int nullIndicatorBit) { - SlotDescriptor d = analyzer_.getSlotDescriptor(colAlias); + int nullIndicatorByte, int nullIndicatorBit, Analyzer analyzer) { + SlotDescriptor d = analyzer.getSlotDescriptor(colAlias); checkLayoutParams(d, byteSize, byteOffset, nullIndicatorByte, nullIndicatorBit); } http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/286da592/fe/src/test/java/com/cloudera/impala/analysis/ToSqlTest.java ---------------------------------------------------------------------- diff --git a/fe/src/test/java/com/cloudera/impala/analysis/ToSqlTest.java b/fe/src/test/java/com/cloudera/impala/analysis/ToSqlTest.java index c264041..6b502a1 100644 --- a/fe/src/test/java/com/cloudera/impala/analysis/ToSqlTest.java +++ b/fe/src/test/java/com/cloudera/impala/analysis/ToSqlTest.java @@ -24,12 +24,13 @@ import org.junit.Test; import com.cloudera.impala.authorization.AuthorizationConfig; import com.cloudera.impala.common.AnalysisException; +import com.cloudera.impala.common.FrontendTestBase; import com.cloudera.impala.testutil.TestUtils; import com.google.common.base.Preconditions; // TODO: Expand this test, in particular, because view creation relies // on producing correct SQL. -public class ToSqlTest extends AnalyzerTest { +public class ToSqlTest extends FrontendTestBase { // Helpers for templated join tests. private static final String[] joinConditions_ = http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/286da592/fe/src/test/java/com/cloudera/impala/common/FrontendTestBase.java ---------------------------------------------------------------------- diff --git a/fe/src/test/java/com/cloudera/impala/common/FrontendTestBase.java b/fe/src/test/java/com/cloudera/impala/common/FrontendTestBase.java new file mode 100644 index 0000000..2d54182 --- /dev/null +++ b/fe/src/test/java/com/cloudera/impala/common/FrontendTestBase.java @@ -0,0 +1,316 @@ +// Copyright (c) 2016 Cloudera, Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package com.cloudera.impala.common; + +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.fail; + +import java.io.StringReader; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +import org.junit.After; +import org.junit.Assert; + +import com.cloudera.impala.analysis.AnalysisContext; +import com.cloudera.impala.analysis.Analyzer; +import com.cloudera.impala.analysis.ColumnDef; +import com.cloudera.impala.analysis.CreateTableStmt; +import com.cloudera.impala.analysis.CreateViewStmt; +import com.cloudera.impala.analysis.FunctionName; +import com.cloudera.impala.analysis.ParseNode; +import com.cloudera.impala.analysis.QueryStmt; +import com.cloudera.impala.analysis.SqlParser; +import com.cloudera.impala.analysis.SqlScanner; +import com.cloudera.impala.authorization.AuthorizationConfig; +import com.cloudera.impala.catalog.AggregateFunction; +import com.cloudera.impala.catalog.Catalog; +import com.cloudera.impala.catalog.Column; +import com.cloudera.impala.catalog.Db; +import com.cloudera.impala.catalog.Function; +import com.cloudera.impala.catalog.HdfsTable; +import com.cloudera.impala.catalog.ImpaladCatalog; +import com.cloudera.impala.catalog.ScalarFunction; +import com.cloudera.impala.catalog.ScalarType; +import com.cloudera.impala.catalog.Table; +import com.cloudera.impala.catalog.Type; +import com.cloudera.impala.catalog.View; +import com.cloudera.impala.service.Frontend; +import com.cloudera.impala.testutil.ImpaladTestCatalog; +import com.cloudera.impala.testutil.TestUtils; +import com.cloudera.impala.thrift.TFunctionBinaryType; +import com.cloudera.impala.thrift.TQueryCtx; +import com.cloudera.impala.thrift.TQueryOptions; +import com.google.common.base.Joiner; +import com.google.common.base.Preconditions; +import com.google.common.collect.Lists; + +/** + * Base class for most frontend tests. Contains common functions for unit testing + * various components, e.g., ParsesOk(), ParserError(), AnalyzesOk(), AnalysisError(), + * as well as helper functions for creating test-local tables/views and UDF/UDAs. + */ +public class FrontendTestBase { + protected static ImpaladCatalog catalog_ = new ImpaladTestCatalog(); + protected static Frontend frontend_ = new Frontend( + AuthorizationConfig.createAuthDisabledConfig(), catalog_); + + // Test-local list of test databases and tables. These are cleaned up in @After. + protected final List<Db> testDbs_ = Lists.newArrayList(); + protected final List<Table> testTables_ = Lists.newArrayList(); + + protected Analyzer createAnalyzer(String defaultDb) { + TQueryCtx queryCtx = + TestUtils.createQueryContext(defaultDb, System.getProperty("user.name")); + return new Analyzer(catalog_, queryCtx, + AuthorizationConfig.createAuthDisabledConfig()); + } + + protected Analyzer createAnalyzer(TQueryOptions queryOptions) { + TQueryCtx queryCtx = TestUtils.createQueryContext(); + queryCtx.request.query_options = queryOptions; + return new Analyzer(catalog_, queryCtx, + AuthorizationConfig.createAuthDisabledConfig()); + } + + protected Analyzer createAnalyzerUsingHiveColLabels() { + Analyzer analyzer = createAnalyzer(Catalog.DEFAULT_DB); + analyzer.setUseHiveColLabels(true); + return analyzer; + } + + // Adds a Udf: default.name(args) to the catalog. + // TODO: we could consider having this be the sql to run instead but that requires + // connecting to the BE. + protected Function addTestFunction(String name, + ArrayList<ScalarType> args, boolean varArgs) { + return addTestFunction("default", name, args, varArgs); + } + + protected Function addTestFunction(String name, + ScalarType arg, boolean varArgs) { + return addTestFunction("default", name, Lists.newArrayList(arg), varArgs); + } + + protected Function addTestFunction(String db, String fnName, + ArrayList<ScalarType> args, boolean varArgs) { + ArrayList<Type> argTypes = Lists.newArrayList(); + argTypes.addAll(args); + Function fn = ScalarFunction.createForTesting( + db, fnName, argTypes, Type.INT, "/Foo", "Foo.class", null, + null, TFunctionBinaryType.NATIVE); + fn.setHasVarArgs(varArgs); + catalog_.addFunction(fn); + return fn; + } + + protected void addTestUda(String name, Type retType, Type... argTypes) { + FunctionName fnName = new FunctionName("default", name); + catalog_.addFunction( + AggregateFunction.createForTesting( + fnName, Lists.newArrayList(argTypes), retType, retType, + null, "init_fn_symbol", "update_fn_symbol", null, null, + null, null, null, TFunctionBinaryType.NATIVE)); + } + + /** + * Add a new dummy database with the given name to the catalog. + * Returns the new dummy database. + * The database is registered in testDbs_ and removed in the @After method. + */ + protected Db addTestDb(String dbName, String comment) { + Db db = catalog_.getDb(dbName); + Preconditions.checkState(db == null, "Test db must not already exist."); + db = new Db(dbName, catalog_, new org.apache.hadoop.hive.metastore.api.Database( + dbName, comment, "", Collections.<String, String>emptyMap())); + catalog_.addDb(db); + testDbs_.add(db); + return db; + } + + protected void clearTestDbs() { + for (Db testDb: testDbs_) { + catalog_.removeDb(testDb.getName()); + } + } + + /** + * Add a new dummy table to the catalog based on the given CREATE TABLE sql. + * The dummy table only has the column definitions and no other metadata. + * Returns the new dummy table. + * The test tables are registered in testTables_ and removed in the @After method. + */ + protected Table addTestTable(String createTableSql) { + CreateTableStmt createTableStmt = (CreateTableStmt) AnalyzesOk(createTableSql); + // Currently does not support partitioned tables. + Preconditions.checkState(createTableStmt.getPartitionColumnDefs().isEmpty()); + Db db = catalog_.getDb(createTableStmt.getDb()); + Preconditions.checkNotNull(db, "Test tables must be created in an existing db."); + HdfsTable dummyTable = new HdfsTable(null, null, db, createTableStmt.getTbl(), + createTableStmt.getOwner()); + List<ColumnDef> columnDefs = createTableStmt.getColumnDefs(); + for (int i = 0; i < columnDefs.size(); ++i) { + ColumnDef colDef = columnDefs.get(i); + dummyTable.addColumn(new Column(colDef.getColName(), colDef.getType(), i)); + } + db.addTable(dummyTable); + testTables_.add(dummyTable); + return dummyTable; + } + + /** + * Adds a test-local view to the catalog based on the given CREATE VIEW sql. + * The test views are registered in testTables_ and removed in the @After method. + * Returns the new view. + */ + protected Table addTestView(String createViewSql) { + CreateViewStmt createViewStmt = (CreateViewStmt) AnalyzesOk(createViewSql); + Db db = catalog_.getDb(createViewStmt.getDb()); + Preconditions.checkNotNull(db, "Test views must be created in an existing db."); + QueryStmt viewStmt = (QueryStmt) AnalyzesOk(createViewStmt.getInlineViewDef()); + View dummyView = View.createTestView(db, createViewStmt.getTbl(), viewStmt); + db.addTable(dummyView); + testTables_.add(dummyView); + return dummyView; + } + + protected void clearTestTables() { + for (Table testTable: testTables_) { + testTable.getDb().removeTable(testTable.getName()); + } + } + + @After + public void tearDown() { + clearTestTables(); + clearTestDbs(); + } + + /** + * Parse 'stmt' and return the root ParseNode. + */ + public ParseNode ParsesOk(String stmt) { + SqlScanner input = new SqlScanner(new StringReader(stmt)); + SqlParser parser = new SqlParser(input); + ParseNode node = null; + try { + node = (ParseNode) parser.parse().value; + } catch (Exception e) { + e.printStackTrace(); + fail("\nParser error:\n" + parser.getErrorMsg(stmt)); + } + assertNotNull(node); + return node; + } + + /** + * Analyze 'stmt', expecting it to pass. Asserts in case of analysis error. + */ + public ParseNode AnalyzesOk(String stmt) { + return AnalyzesOk(stmt, createAnalyzer(Catalog.DEFAULT_DB), null); + } + + /** + * Analyze 'stmt', expecting it to pass. Asserts in case of analysis error. + * If 'expectedWarning' is not null, asserts that a warning is produced. + */ + public ParseNode AnalyzesOk(String stmt, String expectedWarning) { + return AnalyzesOk(stmt, createAnalyzer(Catalog.DEFAULT_DB), expectedWarning); + } + + /** + * Analyze 'stmt', expecting it to pass. Asserts in case of analysis error. + * If 'expectedWarning' is not null, asserts that a warning is produced. + */ + public ParseNode AnalyzesOk(String stmt, Analyzer analyzer, String expectedWarning) { + try { + AnalysisContext analysisCtx = new AnalysisContext(catalog_, + TestUtils.createQueryContext(Catalog.DEFAULT_DB, + System.getProperty("user.name")), + AuthorizationConfig.createAuthDisabledConfig()); + analysisCtx.analyze(stmt, analyzer); + AnalysisContext.AnalysisResult analysisResult = analysisCtx.getAnalysisResult(); + if (expectedWarning != null) { + List<String> actualWarnings = analysisResult.getAnalyzer().getWarnings(); + boolean matchedWarning = false; + for (String actualWarning: actualWarnings) { + if (actualWarning.startsWith(expectedWarning)) { + matchedWarning = true; + break; + } + } + if (!matchedWarning) { + fail(String.format("Did not produce expected warning.\n" + + "Expected warning:\n%s.\nActual warnings:\n%s", + expectedWarning, Joiner.on("\n").join(actualWarnings))); + } + } + Preconditions.checkNotNull(analysisResult.getStmt()); + return analysisResult.getStmt(); + } catch (Exception e) { + e.printStackTrace(); + fail("Error:\n" + e.toString()); + } + return null; + } + + /** + * Asserts if stmt passes analysis. + */ + public void AnalysisError(String stmt) { + AnalysisError(stmt, null); + } + + /** + * Analyze 'stmt', expecting it to pass. Asserts in case of analysis error. + */ + public ParseNode AnalyzesOk(String stmt, Analyzer analyzer) { + return AnalyzesOk(stmt, analyzer, null); + } + + /** + * Asserts if stmt passes analysis or the error string doesn't match and it + * is non-null. + */ + public void AnalysisError(String stmt, String expectedErrorString) { + AnalysisError(stmt, createAnalyzer(Catalog.DEFAULT_DB), expectedErrorString); + } + + /** + * Asserts if stmt passes analysis or the error string doesn't match and it + * is non-null. + */ + public void AnalysisError(String stmt, Analyzer analyzer, String expectedErrorString) { + Preconditions.checkNotNull(expectedErrorString, "No expected error message given."); + try { + AnalysisContext analysisCtx = new AnalysisContext(catalog_, + TestUtils.createQueryContext(Catalog.DEFAULT_DB, + System.getProperty("user.name")), + AuthorizationConfig.createAuthDisabledConfig()); + analysisCtx.analyze(stmt, analyzer); + AnalysisContext.AnalysisResult analysisResult = analysisCtx.getAnalysisResult(); + Preconditions.checkNotNull(analysisResult.getStmt()); + } catch (Exception e) { + String errorString = e.getMessage(); + Preconditions.checkNotNull(errorString, "Stack trace lost during exception."); + Assert.assertTrue( + "got error:\n" + errorString + "\nexpected:\n" + expectedErrorString, + errorString.startsWith(expectedErrorString)); + return; + } + fail("Stmt didn't result in analysis error: " + stmt); + } +} http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/286da592/fe/src/test/java/com/cloudera/impala/planner/PlannerTest.java ---------------------------------------------------------------------- diff --git a/fe/src/test/java/com/cloudera/impala/planner/PlannerTest.java b/fe/src/test/java/com/cloudera/impala/planner/PlannerTest.java index 153ba97..7472da0 100644 --- a/fe/src/test/java/com/cloudera/impala/planner/PlannerTest.java +++ b/fe/src/test/java/com/cloudera/impala/planner/PlannerTest.java @@ -19,6 +19,7 @@ package com.cloudera.impala.planner; import org.junit.Test; +import com.cloudera.impala.catalog.Db; import com.cloudera.impala.thrift.TQueryOptions; import com.cloudera.impala.thrift.TRuntimeFilterMode; @@ -174,7 +175,20 @@ public class PlannerTest extends PlannerTestBase { @Test public void testTpch() { - runPlannerTestFile("tpch-all"); + runPlannerTestFile("tpch-all", "tpch"); + } + + @Test + public void testTpchViews() { + // Re-create TPCH with views on the base tables. Used for testing + // that plan generation works as expected through views. + addTestDb("tpch_views", "Test DB for TPCH with views."); + Db tpchDb = catalog_.getDb("tpch"); + for (String tblName: tpchDb.getAllTableNames()) { + addTestView(String.format( + "create view tpch_views.%s as select * from tpch.%s", tblName, tblName)); + } + runPlannerTestFile("tpch-views", "tpch_views"); } @Test http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/286da592/fe/src/test/java/com/cloudera/impala/planner/PlannerTestBase.java ---------------------------------------------------------------------- diff --git a/fe/src/test/java/com/cloudera/impala/planner/PlannerTestBase.java b/fe/src/test/java/com/cloudera/impala/planner/PlannerTestBase.java index 04b5b5f..328d151 100644 --- a/fe/src/test/java/com/cloudera/impala/planner/PlannerTestBase.java +++ b/fe/src/test/java/com/cloudera/impala/planner/PlannerTestBase.java @@ -39,13 +39,11 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.cloudera.impala.analysis.ColumnLineageGraph; -import com.cloudera.impala.authorization.AuthorizationConfig; import com.cloudera.impala.catalog.CatalogException; +import com.cloudera.impala.common.FrontendTestBase; import com.cloudera.impala.common.ImpalaException; import com.cloudera.impala.common.NotImplementedException; import com.cloudera.impala.common.RuntimeEnv; -import com.cloudera.impala.service.Frontend; -import com.cloudera.impala.testutil.ImpaladTestCatalog; import com.cloudera.impala.testutil.TestFileParser; import com.cloudera.impala.testutil.TestFileParser.Section; import com.cloudera.impala.testutil.TestFileParser.TestCase; @@ -60,8 +58,8 @@ import com.cloudera.impala.thrift.THdfsPartition; import com.cloudera.impala.thrift.THdfsPartitionLocation; import com.cloudera.impala.thrift.THdfsScanNode; import com.cloudera.impala.thrift.THdfsTable; -import com.cloudera.impala.thrift.TLineageGraph; import com.cloudera.impala.thrift.TKuduKeyRange; +import com.cloudera.impala.thrift.TLineageGraph; import com.cloudera.impala.thrift.TNetworkAddress; import com.cloudera.impala.thrift.TPlanFragment; import com.cloudera.impala.thrift.TPlanNode; @@ -79,11 +77,9 @@ import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.collect.Sets; -public class PlannerTestBase { +public class PlannerTestBase extends FrontendTestBase { private final static Logger LOG = LoggerFactory.getLogger(PlannerTest.class); private final static boolean GENERATE_OUTPUT_FILE = true; - private static Frontend frontend_ = new Frontend( - AuthorizationConfig.createAuthDisabledConfig(), new ImpaladTestCatalog()); private final String testDir_ = "functional-planner/queries/PlannerTest"; private final String outDir_ = "/tmp/PlannerTest/"; http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/286da592/testdata/workloads/functional-planner/queries/PlannerTest/joins.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/joins.test b/testdata/workloads/functional-planner/queries/PlannerTest/joins.test index 5d0e892..f14363f 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/joins.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/joins.test @@ -727,8 +727,15 @@ inner join [shuffle] on (b.int_col = c.int_col and c.bool_col = b.bool_col) ---- PLAN 05:HASH JOIN [INNER JOIN] -| hash predicates: b.int_col = int_col, b.bool_col = bool_col -| runtime filters: RF000 <- int_col, RF001 <- bool_col +| hash predicates: a.int_col = b.int_col, a.bool_col = b.bool_col +| runtime filters: RF000 <- b.int_col, RF001 <- b.bool_col +| +|--01:SCAN HDFS [functional.alltypes b] +| partitions=24/24 files=24 size=478.45KB +| +04:HASH JOIN [INNER JOIN] +| hash predicates: a.int_col = int_col, a.bool_col = bool_col +| runtime filters: RF002 <- int_col, RF003 <- bool_col | |--03:AGGREGATE [FINALIZE] | | output: count(*) @@ -736,14 +743,7 @@ on (b.int_col = c.int_col and c.bool_col = b.bool_col) | | | 02:SCAN HDFS [functional.alltypes] | partitions=24/24 files=24 size=478.45KB -| -04:HASH JOIN [INNER JOIN] -| hash predicates: a.int_col = b.int_col, a.bool_col = b.bool_col -| runtime filters: RF002 <- b.int_col, RF003 <- b.bool_col -| -|--01:SCAN HDFS [functional.alltypes b] -| partitions=24/24 files=24 size=478.45KB -| runtime filters: RF000 -> b.int_col, RF001 -> b.bool_col +| runtime filters: RF000 -> functional.alltypes.int_col, RF001 -> functional.alltypes.bool_col | 00:SCAN HDFS [functional.alltypes a] partitions=24/24 files=24 size=478.45KB @@ -752,14 +752,23 @@ on (b.int_col = c.int_col and c.bool_col = b.bool_col) 10:EXCHANGE [UNPARTITIONED] | 05:HASH JOIN [INNER JOIN, PARTITIONED] -| hash predicates: b.int_col = int_col, b.bool_col = bool_col -| runtime filters: RF000 <- int_col, RF001 <- bool_col +| hash predicates: a.int_col = b.int_col, a.bool_col = b.bool_col +| runtime filters: RF000 <- b.int_col, RF001 <- b.bool_col | -|--09:AGGREGATE [FINALIZE] +|--09:EXCHANGE [HASH(b.int_col,b.bool_col)] +| | +| 01:SCAN HDFS [functional.alltypes b] +| partitions=24/24 files=24 size=478.45KB +| +04:HASH JOIN [INNER JOIN, PARTITIONED] +| hash predicates: a.int_col = int_col, a.bool_col = bool_col +| runtime filters: RF002 <- int_col, RF003 <- bool_col +| +|--07:AGGREGATE [FINALIZE] | | output: count:merge(*) | | group by: int_col, bool_col | | -| 08:EXCHANGE [HASH(int_col,bool_col)] +| 06:EXCHANGE [HASH(int_col,bool_col)] | | | 03:AGGREGATE [STREAMING] | | output: count(*) @@ -767,18 +776,9 @@ on (b.int_col = c.int_col and c.bool_col = b.bool_col) | | | 02:SCAN HDFS [functional.alltypes] | partitions=24/24 files=24 size=478.45KB +| runtime filters: RF000 -> functional.alltypes.int_col, RF001 -> functional.alltypes.bool_col | -04:HASH JOIN [INNER JOIN, PARTITIONED] -| hash predicates: a.int_col = b.int_col, a.bool_col = b.bool_col -| runtime filters: RF002 <- b.int_col, RF003 <- b.bool_col -| -|--07:EXCHANGE [HASH(b.int_col,b.bool_col)] -| | -| 01:SCAN HDFS [functional.alltypes b] -| partitions=24/24 files=24 size=478.45KB -| runtime filters: RF000 -> b.int_col, RF001 -> b.bool_col -| -06:EXCHANGE [HASH(a.int_col,a.bool_col)] +08:EXCHANGE [HASH(a.int_col,a.bool_col)] | 00:SCAN HDFS [functional.alltypes a] partitions=24/24 files=24 size=478.45KB http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/286da592/testdata/workloads/functional-planner/queries/PlannerTest/tpcds-all.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/tpcds-all.test b/testdata/workloads/functional-planner/queries/PlannerTest/tpcds-all.test index 22032ab..6baefb1 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/tpcds-all.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/tpcds-all.test @@ -2512,20 +2512,20 @@ limit 100 | runtime filters: RF000 <- d_week_seq - 52, RF001 <- s_store_id | |--15:HASH JOIN [INNER JOIN] +| | hash predicates: ss_store_sk = s_store_sk +| | runtime filters: RF005 <- s_store_sk +| | +| |--12:SCAN HDFS [tpcds.store] +| | partitions=1/1 files=1 size=3.08KB +| | +| 14:HASH JOIN [INNER JOIN] | | hash predicates: d_week_seq = d.d_week_seq -| | runtime filters: RF005 <- d.d_week_seq +| | runtime filters: RF006 <- d.d_week_seq | | | |--13:SCAN HDFS [tpcds.date_dim d] | | partitions=1/1 files=1 size=9.84MB | | predicates: d_month_seq >= 1185 + 12, d_month_seq <= 1185 + 23 | | -| 14:HASH JOIN [INNER JOIN] -| | hash predicates: ss_store_sk = s_store_sk -| | runtime filters: RF006 <- s_store_sk -| | -| |--12:SCAN HDFS [tpcds.store] -| | partitions=1/1 files=1 size=3.08KB -| | | 11:AGGREGATE [FINALIZE] | | output: sum(CASE WHEN (d_day_name = 'Sunday') THEN ss_sales_price ELSE NULL END), sum(CASE WHEN (d_day_name = 'Monday') THEN ss_sales_price ELSE NULL END), sum(CASE WHEN (d_day_name = 'Tuesday') THEN ss_sales_price ELSE NULL END), sum(CASE WHEN (d_day_name = 'Wednesday') THEN ss_sales_price ELSE NULL END), sum(CASE WHEN (d_day_name = 'Thursday') THEN ss_sales_price ELSE NULL END), sum(CASE WHEN (d_day_name = 'Friday') THEN ss_sales_price ELSE NULL END), sum(CASE WHEN (d_day_name = 'Saturday') THEN ss_sales_price ELSE NULL END) | | group by: d_week_seq, ss_store_sk @@ -2536,29 +2536,29 @@ limit 100 | | | |--09:SCAN HDFS [tpcds.date_dim] | | partitions=1/1 files=1 size=9.84MB -| | runtime filters: RF005 -> tpcds.date_dim.d_week_seq +| | runtime filters: RF006 -> tpcds.date_dim.d_week_seq | | | 08:SCAN HDFS [tpcds.store_sales] | partitions=120/120 files=120 size=21.31MB -| runtime filters: RF006 -> tpcds.store_sales.ss_store_sk, RF007 -> ss_sold_date_sk +| runtime filters: RF005 -> tpcds.store_sales.ss_store_sk, RF007 -> ss_sold_date_sk | 07:HASH JOIN [INNER JOIN] +| hash predicates: ss_store_sk = s_store_sk +| runtime filters: RF002 <- s_store_sk +| +|--04:SCAN HDFS [tpcds.store] +| partitions=1/1 files=1 size=3.08KB +| runtime filters: RF001 -> s_store_id +| +06:HASH JOIN [INNER JOIN] | hash predicates: d_week_seq = d.d_week_seq -| runtime filters: RF002 <- d.d_week_seq +| runtime filters: RF003 <- d.d_week_seq | |--05:SCAN HDFS [tpcds.date_dim d] | partitions=1/1 files=1 size=9.84MB | predicates: d_month_seq >= 1185, d_month_seq <= 1185 + 11 | runtime filters: RF000 -> d.d_week_seq | -06:HASH JOIN [INNER JOIN] -| hash predicates: ss_store_sk = s_store_sk -| runtime filters: RF003 <- s_store_sk -| -|--04:SCAN HDFS [tpcds.store] -| partitions=1/1 files=1 size=3.08KB -| runtime filters: RF001 -> s_store_id -| 03:AGGREGATE [FINALIZE] | output: sum(CASE WHEN (d_day_name = 'Sunday') THEN ss_sales_price ELSE NULL END), sum(CASE WHEN (d_day_name = 'Monday') THEN ss_sales_price ELSE NULL END), sum(CASE WHEN (d_day_name = 'Tuesday') THEN ss_sales_price ELSE NULL END), sum(CASE WHEN (d_day_name = 'Wednesday') THEN ss_sales_price ELSE NULL END), sum(CASE WHEN (d_day_name = 'Thursday') THEN ss_sales_price ELSE NULL END), sum(CASE WHEN (d_day_name = 'Friday') THEN ss_sales_price ELSE NULL END), sum(CASE WHEN (d_day_name = 'Saturday') THEN ss_sales_price ELSE NULL END) | group by: d_week_seq, ss_store_sk @@ -2569,11 +2569,11 @@ limit 100 | |--01:SCAN HDFS [tpcds.date_dim] | partitions=1/1 files=1 size=9.84MB -| runtime filters: RF000 -> tpcds.date_dim.d_week_seq, RF002 -> tpcds.date_dim.d_week_seq +| runtime filters: RF000 -> tpcds.date_dim.d_week_seq, RF003 -> tpcds.date_dim.d_week_seq | 00:SCAN HDFS [tpcds.store_sales] partitions=120/120 files=120 size=21.31MB - runtime filters: RF003 -> tpcds.store_sales.ss_store_sk, RF004 -> ss_sold_date_sk + runtime filters: RF002 -> tpcds.store_sales.ss_store_sk, RF004 -> ss_sold_date_sk ---- DISTRIBUTEDPLAN 32:MERGING-EXCHANGE [UNPARTITIONED] | order by: s_store_name1 ASC, s_store_id1 ASC, d_week_seq1 ASC @@ -2589,23 +2589,23 @@ limit 100 |--31:EXCHANGE [HASH(d_week_seq - 52,s_store_id)] | | | 15:HASH JOIN [INNER JOIN, BROADCAST] -| | hash predicates: d_week_seq = d.d_week_seq -| | runtime filters: RF005 <- d.d_week_seq +| | hash predicates: ss_store_sk = s_store_sk +| | runtime filters: RF005 <- s_store_sk | | | |--29:EXCHANGE [BROADCAST] | | | -| | 13:SCAN HDFS [tpcds.date_dim d] -| | partitions=1/1 files=1 size=9.84MB -| | predicates: d_month_seq >= 1185 + 12, d_month_seq <= 1185 + 23 +| | 12:SCAN HDFS [tpcds.store] +| | partitions=1/1 files=1 size=3.08KB | | | 14:HASH JOIN [INNER JOIN, BROADCAST] -| | hash predicates: ss_store_sk = s_store_sk -| | runtime filters: RF006 <- s_store_sk +| | hash predicates: d_week_seq = d.d_week_seq +| | runtime filters: RF006 <- d.d_week_seq | | | |--28:EXCHANGE [BROADCAST] | | | -| | 12:SCAN HDFS [tpcds.store] -| | partitions=1/1 files=1 size=3.08KB +| | 13:SCAN HDFS [tpcds.date_dim d] +| | partitions=1/1 files=1 size=9.84MB +| | predicates: d_month_seq >= 1185 + 12, d_month_seq <= 1185 + 23 | | | 27:AGGREGATE [FINALIZE] | | output: sum:merge(CASE WHEN (d_day_name = 'Sunday') THEN ss_sales_price ELSE NULL END), sum:merge(CASE WHEN (d_day_name = 'Monday') THEN ss_sales_price ELSE NULL END), sum:merge(CASE WHEN (d_day_name = 'Tuesday') THEN ss_sales_price ELSE NULL END), sum:merge(CASE WHEN (d_day_name = 'Wednesday') THEN ss_sales_price ELSE NULL END), sum:merge(CASE WHEN (d_day_name = 'Thursday') THEN ss_sales_price ELSE NULL END), sum:merge(CASE WHEN (d_day_name = 'Friday') THEN ss_sales_price ELSE NULL END), sum:merge(CASE WHEN (d_day_name = 'Saturday') THEN ss_sales_price ELSE NULL END) @@ -2625,36 +2625,36 @@ limit 100 | | | | | 09:SCAN HDFS [tpcds.date_dim] | | partitions=1/1 files=1 size=9.84MB -| | runtime filters: RF005 -> tpcds.date_dim.d_week_seq +| | runtime filters: RF006 -> tpcds.date_dim.d_week_seq | | | 24:EXCHANGE [HASH(ss_sold_date_sk)] | | | 08:SCAN HDFS [tpcds.store_sales] | partitions=120/120 files=120 size=21.31MB -| runtime filters: RF006 -> tpcds.store_sales.ss_store_sk, RF007 -> ss_sold_date_sk +| runtime filters: RF005 -> tpcds.store_sales.ss_store_sk, RF007 -> ss_sold_date_sk | 30:EXCHANGE [HASH(d_week_seq,s_store_id)] | 07:HASH JOIN [INNER JOIN, BROADCAST] -| hash predicates: d_week_seq = d.d_week_seq -| runtime filters: RF002 <- d.d_week_seq +| hash predicates: ss_store_sk = s_store_sk +| runtime filters: RF002 <- s_store_sk | |--23:EXCHANGE [BROADCAST] | | -| 05:SCAN HDFS [tpcds.date_dim d] -| partitions=1/1 files=1 size=9.84MB -| predicates: d_month_seq >= 1185, d_month_seq <= 1185 + 11 -| runtime filters: RF000 -> d.d_week_seq +| 04:SCAN HDFS [tpcds.store] +| partitions=1/1 files=1 size=3.08KB +| runtime filters: RF001 -> s_store_id | 06:HASH JOIN [INNER JOIN, BROADCAST] -| hash predicates: ss_store_sk = s_store_sk -| runtime filters: RF003 <- s_store_sk +| hash predicates: d_week_seq = d.d_week_seq +| runtime filters: RF003 <- d.d_week_seq | |--22:EXCHANGE [BROADCAST] | | -| 04:SCAN HDFS [tpcds.store] -| partitions=1/1 files=1 size=3.08KB -| runtime filters: RF001 -> s_store_id +| 05:SCAN HDFS [tpcds.date_dim d] +| partitions=1/1 files=1 size=9.84MB +| predicates: d_month_seq >= 1185, d_month_seq <= 1185 + 11 +| runtime filters: RF000 -> d.d_week_seq | 21:AGGREGATE [FINALIZE] | output: sum:merge(CASE WHEN (d_day_name = 'Sunday') THEN ss_sales_price ELSE NULL END), sum:merge(CASE WHEN (d_day_name = 'Monday') THEN ss_sales_price ELSE NULL END), sum:merge(CASE WHEN (d_day_name = 'Tuesday') THEN ss_sales_price ELSE NULL END), sum:merge(CASE WHEN (d_day_name = 'Wednesday') THEN ss_sales_price ELSE NULL END), sum:merge(CASE WHEN (d_day_name = 'Thursday') THEN ss_sales_price ELSE NULL END), sum:merge(CASE WHEN (d_day_name = 'Friday') THEN ss_sales_price ELSE NULL END), sum:merge(CASE WHEN (d_day_name = 'Saturday') THEN ss_sales_price ELSE NULL END) @@ -2674,13 +2674,13 @@ limit 100 | | | 01:SCAN HDFS [tpcds.date_dim] | partitions=1/1 files=1 size=9.84MB -| runtime filters: RF000 -> tpcds.date_dim.d_week_seq, RF002 -> tpcds.date_dim.d_week_seq +| runtime filters: RF000 -> tpcds.date_dim.d_week_seq, RF003 -> tpcds.date_dim.d_week_seq | 18:EXCHANGE [HASH(ss_sold_date_sk)] | 00:SCAN HDFS [tpcds.store_sales] partitions=120/120 files=120 size=21.31MB - runtime filters: RF003 -> tpcds.store_sales.ss_store_sk, RF004 -> ss_sold_date_sk + runtime filters: RF002 -> tpcds.store_sales.ss_store_sk, RF004 -> ss_sold_date_sk ---- PARALLELPLANS 32:MERGING-EXCHANGE [UNPARTITIONED] | order by: s_store_name1 ASC, s_store_id1 ASC, d_week_seq1 ASC @@ -2700,31 +2700,31 @@ limit 100 | 31:EXCHANGE [HASH(d_week_seq - 52,s_store_id)] | | | 15:HASH JOIN [INNER JOIN, BROADCAST] -| | hash predicates: d_week_seq = d.d_week_seq -| | runtime filters: RF005 <- d.d_week_seq +| | hash predicates: ss_store_sk = s_store_sk +| | runtime filters: RF005 <- s_store_sk | | | |--JOIN BUILD | | | join-table-id=01 plan-id=02 cohort-id=02 -| | | build expressions: d.d_week_seq +| | | build expressions: s_store_sk | | | | | 29:EXCHANGE [BROADCAST] | | | -| | 13:SCAN HDFS [tpcds.date_dim d] -| | partitions=1/1 files=1 size=9.84MB -| | predicates: d_month_seq >= 1185 + 12, d_month_seq <= 1185 + 23 +| | 12:SCAN HDFS [tpcds.store] +| | partitions=1/1 files=1 size=3.08KB | | | 14:HASH JOIN [INNER JOIN, BROADCAST] -| | hash predicates: ss_store_sk = s_store_sk -| | runtime filters: RF006 <- s_store_sk +| | hash predicates: d_week_seq = d.d_week_seq +| | runtime filters: RF006 <- d.d_week_seq | | | |--JOIN BUILD | | | join-table-id=02 plan-id=03 cohort-id=02 -| | | build expressions: s_store_sk +| | | build expressions: d.d_week_seq | | | | | 28:EXCHANGE [BROADCAST] | | | -| | 12:SCAN HDFS [tpcds.store] -| | partitions=1/1 files=1 size=3.08KB +| | 13:SCAN HDFS [tpcds.date_dim d] +| | partitions=1/1 files=1 size=9.84MB +| | predicates: d_month_seq >= 1185 + 12, d_month_seq <= 1185 + 23 | | | 27:AGGREGATE [FINALIZE] | | output: sum:merge(CASE WHEN (d_day_name = 'Sunday') THEN ss_sales_price ELSE NULL END), sum:merge(CASE WHEN (d_day_name = 'Monday') THEN ss_sales_price ELSE NULL END), sum:merge(CASE WHEN (d_day_name = 'Tuesday') THEN ss_sales_price ELSE NULL END), sum:merge(CASE WHEN (d_day_name = 'Wednesday') THEN ss_sales_price ELSE NULL END), sum:merge(CASE WHEN (d_day_name = 'Thursday') THEN ss_sales_price ELSE NULL END), sum:merge(CASE WHEN (d_day_name = 'Friday') THEN ss_sales_price ELSE NULL END), sum:merge(CASE WHEN (d_day_name = 'Saturday') THEN ss_sales_price ELSE NULL END) @@ -2748,44 +2748,44 @@ limit 100 | | | | | 09:SCAN HDFS [tpcds.date_dim] | | partitions=1/1 files=1 size=9.84MB -| | runtime filters: RF005 -> tpcds.date_dim.d_week_seq +| | runtime filters: RF006 -> tpcds.date_dim.d_week_seq | | | 24:EXCHANGE [HASH(ss_sold_date_sk)] | | | 08:SCAN HDFS [tpcds.store_sales] | partitions=120/120 files=120 size=21.31MB -| runtime filters: RF006 -> tpcds.store_sales.ss_store_sk, RF007 -> ss_sold_date_sk +| runtime filters: RF005 -> tpcds.store_sales.ss_store_sk, RF007 -> ss_sold_date_sk | 30:EXCHANGE [HASH(d_week_seq,s_store_id)] | 07:HASH JOIN [INNER JOIN, BROADCAST] -| hash predicates: d_week_seq = d.d_week_seq -| runtime filters: RF002 <- d.d_week_seq +| hash predicates: ss_store_sk = s_store_sk +| runtime filters: RF002 <- s_store_sk | |--JOIN BUILD | | join-table-id=04 plan-id=05 cohort-id=01 -| | build expressions: d.d_week_seq +| | build expressions: s_store_sk | | | 23:EXCHANGE [BROADCAST] | | -| 05:SCAN HDFS [tpcds.date_dim d] -| partitions=1/1 files=1 size=9.84MB -| predicates: d_month_seq >= 1185, d_month_seq <= 1185 + 11 -| runtime filters: RF000 -> d.d_week_seq +| 04:SCAN HDFS [tpcds.store] +| partitions=1/1 files=1 size=3.08KB +| runtime filters: RF001 -> s_store_id | 06:HASH JOIN [INNER JOIN, BROADCAST] -| hash predicates: ss_store_sk = s_store_sk -| runtime filters: RF003 <- s_store_sk +| hash predicates: d_week_seq = d.d_week_seq +| runtime filters: RF003 <- d.d_week_seq | |--JOIN BUILD | | join-table-id=05 plan-id=06 cohort-id=01 -| | build expressions: s_store_sk +| | build expressions: d.d_week_seq | | | 22:EXCHANGE [BROADCAST] | | -| 04:SCAN HDFS [tpcds.store] -| partitions=1/1 files=1 size=3.08KB -| runtime filters: RF001 -> s_store_id +| 05:SCAN HDFS [tpcds.date_dim d] +| partitions=1/1 files=1 size=9.84MB +| predicates: d_month_seq >= 1185, d_month_seq <= 1185 + 11 +| runtime filters: RF000 -> d.d_week_seq | 21:AGGREGATE [FINALIZE] | output: sum:merge(CASE WHEN (d_day_name = 'Sunday') THEN ss_sales_price ELSE NULL END), sum:merge(CASE WHEN (d_day_name = 'Monday') THEN ss_sales_price ELSE NULL END), sum:merge(CASE WHEN (d_day_name = 'Tuesday') THEN ss_sales_price ELSE NULL END), sum:merge(CASE WHEN (d_day_name = 'Wednesday') THEN ss_sales_price ELSE NULL END), sum:merge(CASE WHEN (d_day_name = 'Thursday') THEN ss_sales_price ELSE NULL END), sum:merge(CASE WHEN (d_day_name = 'Friday') THEN ss_sales_price ELSE NULL END), sum:merge(CASE WHEN (d_day_name = 'Saturday') THEN ss_sales_price ELSE NULL END) @@ -2809,13 +2809,13 @@ limit 100 | | | 01:SCAN HDFS [tpcds.date_dim] | partitions=1/1 files=1 size=9.84MB -| runtime filters: RF000 -> tpcds.date_dim.d_week_seq, RF002 -> tpcds.date_dim.d_week_seq +| runtime filters: RF000 -> tpcds.date_dim.d_week_seq, RF003 -> tpcds.date_dim.d_week_seq | 18:EXCHANGE [HASH(ss_sold_date_sk)] | 00:SCAN HDFS [tpcds.store_sales] partitions=120/120 files=120 size=21.31MB - runtime filters: RF003 -> tpcds.store_sales.ss_store_sk, RF004 -> ss_sold_date_sk + runtime filters: RF002 -> tpcds.store_sales.ss_store_sk, RF004 -> ss_sold_date_sk ==== # TPCDS-Q63 select @@ -3094,9 +3094,23 @@ limit 100 | order by: s_store_name ASC, i_item_desc ASC | 13:HASH JOIN [INNER JOIN] +| hash predicates: ss_item_sk = i_item_sk +| runtime filters: RF000 <- i_item_sk +| +|--01:SCAN HDFS [tpcds.item] +| partitions=1/1 files=1 size=4.82MB +| +12:HASH JOIN [INNER JOIN] +| hash predicates: ss_store_sk = s_store_sk +| runtime filters: RF001 <- s_store_sk +| +|--00:SCAN HDFS [tpcds.store] +| partitions=1/1 files=1 size=3.08KB +| +11:HASH JOIN [INNER JOIN] | hash predicates: ss_store_sk = ss_store_sk | other predicates: sum(ss_sales_price) <= 0.1 * avg(revenue) -| runtime filters: RF000 <- ss_store_sk +| runtime filters: RF002 <- ss_store_sk | |--06:AGGREGATE [FINALIZE] | | output: avg(sum(ss_sales_price)) @@ -3116,22 +3130,7 @@ limit 100 | | | 02:SCAN HDFS [tpcds.store_sales] | partitions=120/120 files=120 size=21.31MB -| runtime filters: RF004 -> ss_sold_date_sk -| -12:HASH JOIN [INNER JOIN] -| hash predicates: ss_item_sk = i_item_sk -| runtime filters: RF001 <- i_item_sk -| -|--01:SCAN HDFS [tpcds.item] -| partitions=1/1 files=1 size=4.82MB -| -11:HASH JOIN [INNER JOIN] -| hash predicates: ss_store_sk = s_store_sk -| runtime filters: RF002 <- s_store_sk -| -|--00:SCAN HDFS [tpcds.store] -| partitions=1/1 files=1 size=3.08KB -| runtime filters: RF000 -> tpcds.store.s_store_sk +| runtime filters: RF001 -> tpcds.store_sales.ss_store_sk, RF004 -> ss_sold_date_sk | 10:AGGREGATE [FINALIZE] | output: sum(ss_sales_price) @@ -3147,37 +3146,59 @@ limit 100 | 07:SCAN HDFS [tpcds.store_sales] partitions=120/120 files=120 size=21.31MB - runtime filters: RF000 -> tpcds.store_sales.ss_store_sk, RF001 -> tpcds.store_sales.ss_item_sk, RF002 -> tpcds.store_sales.ss_store_sk, RF003 -> ss_sold_date_sk + runtime filters: RF000 -> tpcds.store_sales.ss_item_sk, RF001 -> tpcds.store_sales.ss_store_sk, RF002 -> tpcds.store_sales.ss_store_sk, RF003 -> ss_sold_date_sk ---- DISTRIBUTEDPLAN -26:MERGING-EXCHANGE [UNPARTITIONED] +28:MERGING-EXCHANGE [UNPARTITIONED] | order by: s_store_name ASC, i_item_desc ASC | limit: 100 | 14:TOP-N [LIMIT=100] | order by: s_store_name ASC, i_item_desc ASC | -13:HASH JOIN [INNER JOIN, BROADCAST] +13:HASH JOIN [INNER JOIN, PARTITIONED] +| hash predicates: ss_item_sk = i_item_sk +| runtime filters: RF000 <- i_item_sk +| +|--27:EXCHANGE [HASH(i_item_sk)] +| | +| 01:SCAN HDFS [tpcds.item] +| partitions=1/1 files=1 size=4.82MB +| +26:EXCHANGE [HASH(ss_item_sk)] +| +12:HASH JOIN [INNER JOIN, PARTITIONED] +| hash predicates: ss_store_sk = s_store_sk +| runtime filters: RF001 <- s_store_sk +| +|--25:EXCHANGE [HASH(s_store_sk)] +| | +| 00:SCAN HDFS [tpcds.store] +| partitions=1/1 files=1 size=3.08KB +| +24:EXCHANGE [HASH(ss_store_sk)] +| +11:HASH JOIN [INNER JOIN, BROADCAST] | hash predicates: ss_store_sk = ss_store_sk | other predicates: sum(ss_sales_price) <= 0.1 * avg(revenue) -| runtime filters: RF000 <- ss_store_sk +| runtime filters: RF002 <- ss_store_sk | -|--25:EXCHANGE [BROADCAST] +|--23:EXCHANGE [BROADCAST] | | -| 24:AGGREGATE [FINALIZE] +| 22:AGGREGATE [FINALIZE] | | output: avg:merge(revenue) | | group by: ss_store_sk | | -| 23:EXCHANGE [HASH(ss_store_sk)] +| 21:EXCHANGE [HASH(ss_store_sk)] | | | 06:AGGREGATE [STREAMING] | | output: avg(sum(ss_sales_price)) | | group by: ss_store_sk | | -| 22:AGGREGATE [FINALIZE] +| 20:AGGREGATE [FINALIZE] | | output: sum:merge(ss_sales_price) | | group by: ss_store_sk, ss_item_sk | | -| 21:EXCHANGE [HASH(ss_store_sk,ss_item_sk)] +| 19:EXCHANGE [HASH(ss_store_sk,ss_item_sk)] | | | 05:AGGREGATE [STREAMING] | | output: sum(ss_sales_price) @@ -3187,7 +3208,7 @@ limit 100 | | hash predicates: ss_sold_date_sk = d_date_sk | | runtime filters: RF004 <- d_date_sk | | -| |--20:EXCHANGE [BROADCAST] +| |--18:EXCHANGE [BROADCAST] | | | | | 03:SCAN HDFS [tpcds.date_dim] | | partitions=1/1 files=1 size=9.84MB @@ -3195,26 +3216,7 @@ limit 100 | | | 02:SCAN HDFS [tpcds.store_sales] | partitions=120/120 files=120 size=21.31MB -| runtime filters: RF004 -> ss_sold_date_sk -| -12:HASH JOIN [INNER JOIN, BROADCAST] -| hash predicates: ss_item_sk = i_item_sk -| runtime filters: RF001 <- i_item_sk -| -|--19:EXCHANGE [BROADCAST] -| | -| 01:SCAN HDFS [tpcds.item] -| partitions=1/1 files=1 size=4.82MB -| -11:HASH JOIN [INNER JOIN, BROADCAST] -| hash predicates: ss_store_sk = s_store_sk -| runtime filters: RF002 <- s_store_sk -| -|--18:EXCHANGE [BROADCAST] -| | -| 00:SCAN HDFS [tpcds.store] -| partitions=1/1 files=1 size=3.08KB -| runtime filters: RF000 -> tpcds.store.s_store_sk +| runtime filters: RF001 -> tpcds.store_sales.ss_store_sk, RF004 -> ss_sold_date_sk | 17:AGGREGATE [FINALIZE] | output: sum:merge(ss_sales_price) @@ -3238,41 +3240,71 @@ limit 100 | 07:SCAN HDFS [tpcds.store_sales] partitions=120/120 files=120 size=21.31MB - runtime filters: RF000 -> tpcds.store_sales.ss_store_sk, RF001 -> tpcds.store_sales.ss_item_sk, RF002 -> tpcds.store_sales.ss_store_sk, RF003 -> ss_sold_date_sk + runtime filters: RF000 -> tpcds.store_sales.ss_item_sk, RF001 -> tpcds.store_sales.ss_store_sk, RF002 -> tpcds.store_sales.ss_store_sk, RF003 -> ss_sold_date_sk ---- PARALLELPLANS -26:MERGING-EXCHANGE [UNPARTITIONED] +28:MERGING-EXCHANGE [UNPARTITIONED] | order by: s_store_name ASC, i_item_desc ASC | limit: 100 | 14:TOP-N [LIMIT=100] | order by: s_store_name ASC, i_item_desc ASC | -13:HASH JOIN [INNER JOIN, BROADCAST] +13:HASH JOIN [INNER JOIN, PARTITIONED] +| hash predicates: ss_item_sk = i_item_sk +| runtime filters: RF000 <- i_item_sk +| +|--JOIN BUILD +| | join-table-id=00 plan-id=01 cohort-id=01 +| | build expressions: i_item_sk +| | +| 27:EXCHANGE [HASH(i_item_sk)] +| | +| 01:SCAN HDFS [tpcds.item] +| partitions=1/1 files=1 size=4.82MB +| +26:EXCHANGE [HASH(ss_item_sk)] +| +12:HASH JOIN [INNER JOIN, PARTITIONED] +| hash predicates: ss_store_sk = s_store_sk +| runtime filters: RF001 <- s_store_sk +| +|--JOIN BUILD +| | join-table-id=01 plan-id=02 cohort-id=01 +| | build expressions: s_store_sk +| | +| 25:EXCHANGE [HASH(s_store_sk)] +| | +| 00:SCAN HDFS [tpcds.store] +| partitions=1/1 files=1 size=3.08KB +| +24:EXCHANGE [HASH(ss_store_sk)] +| +11:HASH JOIN [INNER JOIN, BROADCAST] | hash predicates: ss_store_sk = ss_store_sk | other predicates: sum(ss_sales_price) <= 0.1 * avg(revenue) -| runtime filters: RF000 <- ss_store_sk +| runtime filters: RF002 <- ss_store_sk | |--JOIN BUILD -| | join-table-id=00 plan-id=01 cohort-id=01 +| | join-table-id=02 plan-id=03 cohort-id=01 | | build expressions: ss_store_sk | | -| 25:EXCHANGE [BROADCAST] +| 23:EXCHANGE [BROADCAST] | | -| 24:AGGREGATE [FINALIZE] +| 22:AGGREGATE [FINALIZE] | | output: avg:merge(revenue) | | group by: ss_store_sk | | -| 23:EXCHANGE [HASH(ss_store_sk)] +| 21:EXCHANGE [HASH(ss_store_sk)] | | | 06:AGGREGATE [STREAMING] | | output: avg(sum(ss_sales_price)) | | group by: ss_store_sk | | -| 22:AGGREGATE [FINALIZE] +| 20:AGGREGATE [FINALIZE] | | output: sum:merge(ss_sales_price) | | group by: ss_store_sk, ss_item_sk | | -| 21:EXCHANGE [HASH(ss_store_sk,ss_item_sk)] +| 19:EXCHANGE [HASH(ss_store_sk,ss_item_sk)] | | | 05:AGGREGATE [STREAMING] | | output: sum(ss_sales_price) @@ -3283,10 +3315,10 @@ limit 100 | | runtime filters: RF004 <- d_date_sk | | | |--JOIN BUILD -| | | join-table-id=01 plan-id=02 cohort-id=02 +| | | join-table-id=03 plan-id=04 cohort-id=02 | | | build expressions: d_date_sk | | | -| | 20:EXCHANGE [BROADCAST] +| | 18:EXCHANGE [BROADCAST] | | | | | 03:SCAN HDFS [tpcds.date_dim] | | partitions=1/1 files=1 size=9.84MB @@ -3294,34 +3326,7 @@ limit 100 | | | 02:SCAN HDFS [tpcds.store_sales] | partitions=120/120 files=120 size=21.31MB -| runtime filters: RF004 -> ss_sold_date_sk -| -12:HASH JOIN [INNER JOIN, BROADCAST] -| hash predicates: ss_item_sk = i_item_sk -| runtime filters: RF001 <- i_item_sk -| -|--JOIN BUILD -| | join-table-id=02 plan-id=03 cohort-id=01 -| | build expressions: i_item_sk -| | -| 19:EXCHANGE [BROADCAST] -| | -| 01:SCAN HDFS [tpcds.item] -| partitions=1/1 files=1 size=4.82MB -| -11:HASH JOIN [INNER JOIN, BROADCAST] -| hash predicates: ss_store_sk = s_store_sk -| runtime filters: RF002 <- s_store_sk -| -|--JOIN BUILD -| | join-table-id=03 plan-id=04 cohort-id=01 -| | build expressions: s_store_sk -| | -| 18:EXCHANGE [BROADCAST] -| | -| 00:SCAN HDFS [tpcds.store] -| partitions=1/1 files=1 size=3.08KB -| runtime filters: RF000 -> tpcds.store.s_store_sk +| runtime filters: RF001 -> tpcds.store_sales.ss_store_sk, RF004 -> ss_sold_date_sk | 17:AGGREGATE [FINALIZE] | output: sum:merge(ss_sales_price) @@ -3349,7 +3354,7 @@ limit 100 | 07:SCAN HDFS [tpcds.store_sales] partitions=120/120 files=120 size=21.31MB - runtime filters: RF000 -> tpcds.store_sales.ss_store_sk, RF001 -> tpcds.store_sales.ss_item_sk, RF002 -> tpcds.store_sales.ss_store_sk, RF003 -> ss_sold_date_sk + runtime filters: RF000 -> tpcds.store_sales.ss_item_sk, RF001 -> tpcds.store_sales.ss_store_sk, RF002 -> tpcds.store_sales.ss_store_sk, RF003 -> ss_sold_date_sk ==== # TPCDS-Q68 select @@ -5129,21 +5134,21 @@ with v1 as ( partitions=120/120 files=120 size=21.31MB runtime filters: RF000 -> ss_store_sk, RF001 -> ss_sold_date_sk, RF002 -> ss_item_sk ---- PARALLELPLANS -53:MERGING-EXCHANGE [UNPARTITIONED] +54:MERGING-EXCHANGE [UNPARTITIONED] | order by: sum_sales - avg_monthly_sales ASC, d_year ASC | limit: 100 | 35:TOP-N [LIMIT=100] | order by: sum_sales - avg_monthly_sales ASC, d_year ASC | -34:HASH JOIN [INNER JOIN, BROADCAST] +34:HASH JOIN [INNER JOIN, PARTITIONED] | hash predicates: rank() = rank() - 1, s_store_name = s_store_name, i_category = i_category, s_company_name = s_company_name, i_brand = i_brand | |--JOIN BUILD | | join-table-id=00 plan-id=01 cohort-id=01 | | build expressions: rank() - 1, s_store_name, i_category, s_company_name, i_brand | | -| 52:EXCHANGE [BROADCAST] +| 53:EXCHANGE [HASH(rank() - 1,s_store_name,i_category,s_company_name,i_brand)] | | | 32:ANALYTIC | | functions: rank() @@ -5208,6 +5213,8 @@ with v1 as ( | partitions=120/120 files=120 size=21.31MB | runtime filters: RF006 -> ss_store_sk, RF007 -> ss_sold_date_sk, RF008 -> ss_item_sk | +52:EXCHANGE [HASH(rank(),s_store_name,i_category,s_company_name,i_brand)] +| 33:HASH JOIN [INNER JOIN, BROADCAST] | hash predicates: rank() + 1 = rank(), s_store_name = s_store_name, i_category = i_category, s_company_name = s_company_name, i_brand = i_brand | http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/286da592/testdata/workloads/functional-planner/queries/PlannerTest/tpch-all.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/tpch-all.test b/testdata/workloads/functional-planner/queries/PlannerTest/tpch-all.test index b91cc04..e5948eb 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/tpch-all.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/tpch-all.test @@ -12,7 +12,7 @@ select avg(l_discount) as avg_disc, count(*) as count_order from - tpch.lineitem + lineitem where l_shipdate <= '1998-09-02' group by @@ -85,11 +85,11 @@ select s_phone, s_comment from - tpch.part, - tpch.supplier, - tpch.partsupp, - tpch.nation, - tpch.region + part, + supplier, + partsupp, + nation, + region where p_partkey = ps_partkey and s_suppkey = ps_suppkey @@ -441,9 +441,9 @@ select o_orderdate, o_shippriority from - tpch.customer, - tpch.orders, - tpch.lineitem + customer, + orders, + lineitem where c_mktsegment = 'BUILDING' and c_custkey = o_custkey @@ -588,7 +588,7 @@ select o_orderpriority, count(*) as order_count from - tpch.orders + orders where o_orderdate >= '1993-07-01' and o_orderdate < '1993-10-01' @@ -596,7 +596,7 @@ where select * from - tpch.lineitem + lineitem where l_orderkey = o_orderkey and l_commitdate < l_receiptdate @@ -702,12 +702,12 @@ select n_name, sum(l_extendedprice * (1 - l_discount)) as revenue from - tpch.customer, - tpch.orders, - tpch.lineitem, - tpch.supplier, - tpch.nation, - tpch.region + customer, + orders, + lineitem, + supplier, + nation, + region where c_custkey = o_custkey and l_orderkey = o_orderkey @@ -942,7 +942,7 @@ order by select sum(l_extendedprice * l_discount) as revenue from - tpch.lineitem + lineitem where l_shipdate >= '1994-01-01' and l_shipdate < '1995-01-01' @@ -994,12 +994,12 @@ from ( year(l_shipdate) as l_year, l_extendedprice * (1 - l_discount) as volume from - tpch.supplier, - tpch.lineitem, - tpch.orders, - tpch.customer, - tpch.nation n1, - tpch.nation n2 + supplier, + lineitem, + orders, + customer, + nation n1, + nation n2 where s_suppkey = l_suppkey and o_orderkey = l_orderkey @@ -1251,14 +1251,14 @@ from ( l_extendedprice * (1 - l_discount) as volume, n2.n_name as nation from - tpch.part, - tpch.supplier, - tpch.lineitem, - tpch.orders, - tpch.customer, - tpch.nation n1, - tpch.nation n2, - tpch.region + part, + supplier, + lineitem, + orders, + customer, + nation n1, + nation n2, + region where p_partkey = l_partkey and s_suppkey = l_suppkey @@ -1571,12 +1571,12 @@ from( year(o_orderdate) as o_year, l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity as amount from - tpch.part, - tpch.supplier, - tpch.lineitem, - tpch.partsupp, - tpch.orders, - tpch.nation + part, + supplier, + lineitem, + partsupp, + orders, + nation where s_suppkey = l_suppkey and ps_suppkey = l_suppkey @@ -1815,10 +1815,10 @@ select c_phone, c_comment from - tpch.customer, - tpch.orders, - tpch.lineitem, - tpch.nation + customer, + orders, + lineitem, + nation where c_custkey = o_custkey and l_orderkey = o_orderkey @@ -2004,9 +2004,9 @@ from ( ps_partkey, sum(ps_supplycost * ps_availqty) as value from - tpch.partsupp, - tpch.supplier, - tpch.nation + partsupp, + supplier, + nation where ps_suppkey = s_suppkey and s_nationkey = n_nationkey @@ -2019,9 +2019,9 @@ where select sum(ps_supplycost * ps_availqty) * 0.0001 from - tpch.partsupp, - tpch.supplier, - tpch.nation + partsupp, + supplier, + nation where ps_suppkey = s_suppkey and s_nationkey = n_nationkey @@ -2274,8 +2274,8 @@ select else 0 end) as low_line_count from - tpch.orders, - tpch.lineitem + orders, + lineitem where o_orderkey = l_orderkey and l_shipmode in ('MAIL', 'SHIP') @@ -2381,7 +2381,7 @@ from ( c_custkey, count(o_orderkey) as c_count from - tpch.customer left outer join tpch.orders on ( + customer left outer join tpch.orders on ( c_custkey = o_custkey and o_comment not like '%special%requests%' ) @@ -2502,8 +2502,8 @@ select else 0.0 end) / sum(l_extendedprice * (1 - l_discount)) as promo_revenue from - tpch.lineitem, - tpch.part + lineitem, + part where l_partkey = p_partkey and l_shipdate >= '1995-09-01' @@ -2579,7 +2579,7 @@ with revenue_view as ( l_suppkey as supplier_no, sum(l_extendedprice * (1 - l_discount)) as total_revenue from - tpch.lineitem + lineitem where l_shipdate >= '1996-01-01' and l_shipdate < '1996-04-01' @@ -2592,7 +2592,7 @@ select s_phone, total_revenue from - tpch.supplier, + supplier, revenue_view where s_suppkey = supplier_no @@ -2768,8 +2768,8 @@ select p_size, count(distinct ps_suppkey) as supplier_cnt from - tpch.partsupp, - tpch.part + partsupp, + part where p_partkey = ps_partkey and p_brand <> 'Brand#45' @@ -2779,7 +2779,7 @@ where select s_suppkey from - tpch.supplier + supplier where s_comment like '%Customer%Complaints%' ) @@ -2917,8 +2917,8 @@ order by select sum(l_extendedprice) / 7.0 as avg_yearly from - tpch.lineitem, - tpch.part + lineitem, + part where p_partkey = l_partkey and p_brand = 'Brand#23' @@ -2927,7 +2927,7 @@ where select 0.2 * avg(l_quantity) from - tpch.lineitem + lineitem where l_partkey = p_partkey ) @@ -3064,15 +3064,15 @@ select o_totalprice, sum(l_quantity) from - tpch.customer, - tpch.orders, - tpch.lineitem + customer, + orders, + lineitem where o_orderkey in ( select l_orderkey from - tpch.lineitem + lineitem group by l_orderkey having @@ -3266,8 +3266,8 @@ limit 100 select sum(l_extendedprice * (1 - l_discount)) as revenue from - tpch.lineitem, - tpch.part + lineitem, + part where p_partkey = l_partkey and ( @@ -3368,20 +3368,20 @@ select s_name, s_address from - tpch.supplier, - tpch.nation + supplier, + nation where s_suppkey in ( select ps_suppkey from - tpch.partsupp + partsupp where ps_partkey in ( select p_partkey from - tpch.part + part where p_name like 'forest%' ) @@ -3389,7 +3389,7 @@ where select 0.5 * sum(l_quantity) from - tpch.lineitem + lineitem where l_partkey = ps_partkey and l_suppkey = ps_suppkey @@ -3597,9 +3597,9 @@ select s_name, count(*) as numwait from - tpch.supplier, - tpch.lineitem l1, - tpch.orders, + supplier, + lineitem l1, + orders, tpch.nation where s_suppkey = l1.l_suppkey @@ -3610,7 +3610,7 @@ where select * from - tpch.lineitem l2 + lineitem l2 where l2.l_orderkey = l1.l_orderkey and l2.l_suppkey <> l1.l_suppkey @@ -3619,7 +3619,7 @@ where select * from - tpch.lineitem l3 + lineitem l3 where l3.l_orderkey = l1.l_orderkey and l3.l_suppkey <> l1.l_suppkey @@ -3868,14 +3868,14 @@ from ( substr(c_phone, 1, 2) as cntrycode, c_acctbal from - tpch.customer + customer where substr(c_phone, 1, 2) in ('13', '31', '23', '29', '30', '18', '17') and c_acctbal > ( select avg(c_acctbal) from - tpch.customer + customer where c_acctbal > 0.00 and substr(c_phone, 1, 2) in ('13', '31', '23', '29', '30', '18', '17') @@ -3884,7 +3884,7 @@ from ( select * from - tpch.orders + orders where o_custkey = c_custkey )
