IMPALA-7842: Expose physical plan for unit testing The FrontEnd class uses a functional model to generate a plan: pass in SQL text to the createExecRequest() method and get back a Thrift plan ready for serialization.
For unit testing, however, we need access to the intermediate physical plan tree produced by the planner. Inspecting only the Thrift version results in loss of important details. This fix introduces a new planner context (PlanCtx) class that passes information into the planner, and passses the physical plan back out. Code that used the prior version (pass in a query context and explain string) are changed to use the new form. Testing: * There is no functional change, just a refactoring of the existing code. Ran all FE test to verify no regressions. * Introduces a new test case that uses this feature to verify plan cardinality. Change-Id: I4c615dbc1d14685a6966c5ca7538777cdc80b74d Reviewed-on: http://gerrit.cloudera.org:8080/11920 Reviewed-by: Impala Public Jenkins <impala-public-jenk...@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenk...@cloudera.com> Project: http://git-wip-us.apache.org/repos/asf/impala/repo Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/7dbeb695 Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/7dbeb695 Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/7dbeb695 Branch: refs/heads/master Commit: 7dbeb6950f4d21248dc71b89add1aa14b8c95a8f Parents: 9af2fdc Author: Paul Rogers <prog...@cloudera.com> Authored: Sat Nov 10 20:32:59 2018 -0800 Committer: Impala Public Jenkins <impala-public-jenk...@cloudera.com> Committed: Thu Dec 6 07:56:01 2018 +0000 ---------------------------------------------------------------------- .../org/apache/impala/service/Frontend.java | 92 ++++++++--- .../org/apache/impala/service/JniFrontend.java | 10 +- .../apache/impala/planner/CardinalityTest.java | 159 +++++++++++++++++++ .../org/apache/impala/planner/PlannerTest.java | 12 +- .../apache/impala/planner/PlannerTestBase.java | 23 +-- 5 files changed, 258 insertions(+), 38 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/impala/blob/7dbeb695/fe/src/main/java/org/apache/impala/service/Frontend.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/service/Frontend.java b/fe/src/main/java/org/apache/impala/service/Frontend.java index d67d1e0..57f4488 100644 --- a/fe/src/main/java/org/apache/impala/service/Frontend.java +++ b/fe/src/main/java/org/apache/impala/service/Frontend.java @@ -175,6 +175,53 @@ public class Frontend { private static final int INCONSISTENT_METADATA_NUM_RETRIES = BackendConfig.INSTANCE.getLocalCatalogMaxFetchRetries(); + /** + * Plan-time context that allows capturing various artifacts created + * during the process. + * + * The context gathers an optional describe string for display to the + * user, and the optional plan fragment nodes for use in unit tests. + */ + public static class PlanCtx { + // The query context. + protected final TQueryCtx queryCtx_; + // The explain string built from the query plan. + protected final StringBuilder explainBuf_; + // Flag to indicate whether to capture (return) the plan. + protected boolean capturePlan_; + // The physical plan, divided by fragment, before conversion to + // Thrift. For unit testing. + protected List<PlanFragment> plan_; + + public PlanCtx(TQueryCtx qCtx) { + queryCtx_ = qCtx; + explainBuf_ = new StringBuilder(); + } + + public PlanCtx(TQueryCtx qCtx, StringBuilder describe) { + queryCtx_ = qCtx; + explainBuf_ = describe; + } + + /** + * Request to capture the plan tree for unit tests. + */ + public void requestPlanCapture() { capturePlan_ = true; } + public boolean planCaptureRequested() { return capturePlan_; } + public TQueryCtx getQueryContext() { return queryCtx_; } + + /** + * @return the captured plan tree. Used only for unit tests + */ + @VisibleForTesting + public List<PlanFragment> getPlan() { return plan_; } + + /** + * @return the captured describe string + */ + public String getExplainString() { return explainBuf_.toString(); } + } + private final FeCatalogManager catalogManager_; private final AuthorizationConfig authzConfig_; /** @@ -653,9 +700,9 @@ public class Frontend { * not increase the query id counter. */ public String getExplainString(TQueryCtx queryCtx) throws ImpalaException { - StringBuilder stringBuilder = new StringBuilder(); - createExecRequest(queryCtx, stringBuilder); - return stringBuilder.toString(); + PlanCtx planCtx = new PlanCtx(queryCtx); + createExecRequest(planCtx); + return planCtx.getExplainString(); } public TGetCatalogMetricsResult getCatalogMetrics() throws ImpalaException { @@ -1100,7 +1147,7 @@ public class Frontend { * Create a populated TQueryExecRequest, corresponding to the supplied planner. */ private TQueryExecRequest createExecRequest( - Planner planner, StringBuilder explainString) throws ImpalaException { + Planner planner, PlanCtx planCtx) throws ImpalaException { TQueryCtx queryCtx = planner.getQueryCtx(); AnalysisResult analysisResult = planner.getAnalysisResult(); boolean isMtExec = analysisResult.isQueryStmt() @@ -1116,6 +1163,9 @@ public class Frontend { LOG.trace("create plan"); planRoots.add(planner.createPlan().get(0)); } + if (planCtx.planCaptureRequested()) { + planCtx.plan_ = planRoots; + } // Compute resource requirements of the final plans. planner.computeResourceReqs(planRoots, queryCtx, result); @@ -1146,8 +1196,8 @@ public class Frontend { // create EXPLAIN output after setting everything else result.setQuery_ctx(queryCtx); // needed by getExplainString() List<PlanFragment> allFragments = planRoots.get(0).getNodesPreOrder(); - explainString.append(planner.getExplainString(allFragments, result)); - result.setQuery_plan(explainString.toString()); + planCtx.explainBuf_.append(planner.getExplainString(allFragments, result)); + result.setQuery_plan(planCtx.getExplainString()); return result; } @@ -1171,15 +1221,16 @@ public class Frontend { } /** - * Create a populated TExecRequest corresponding to the supplied TQueryCtx. + * Create a TExecRequest for the query and query context provided in the plan + * context. Fills in the EXPLAIN string and optionally the internal plan tree. */ - public TExecRequest createExecRequest(TQueryCtx queryCtx, StringBuilder explainString) + public TExecRequest createExecRequest(PlanCtx planCtx) throws ImpalaException { // Timeline of important events in the planning process, used for debugging // and profiling. try (FrontendProfile.Scope scope = FrontendProfile.createNewWithScope()) { EventSequence timeline = new EventSequence("Query Compilation"); - TExecRequest result = getTExecRequest(queryCtx, timeline, explainString); + TExecRequest result = getTExecRequest(planCtx, timeline); timeline.markEvent("Planning finished"); result.setTimeline(timeline.toThrift()); result.setProfile(FrontendProfile.getCurrent().emitAsThrift()); @@ -1199,15 +1250,16 @@ public class Frontend { + "%s of %s times: ",numRetries, INCONSISTENT_METADATA_NUM_RETRIES) + msg); } - private TExecRequest getTExecRequest(TQueryCtx queryCtx, EventSequence timeline, - StringBuilder explainString) throws ImpalaException { + private TExecRequest getTExecRequest(PlanCtx planCtx, EventSequence timeline) + throws ImpalaException { + TQueryCtx queryCtx = planCtx.getQueryContext(); LOG.info("Analyzing query: " + queryCtx.client_request.stmt); int attempt = 0; String retryMsg = ""; while (true) { try { - TExecRequest req = doCreateExecRequest(queryCtx, timeline, explainString); + TExecRequest req = doCreateExecRequest(planCtx, timeline); markTimelineRetries(attempt, retryMsg, timeline); return req; } catch (InconsistentMetadataFetchException e) { @@ -1227,8 +1279,9 @@ public class Frontend { } } - private TExecRequest doCreateExecRequest(TQueryCtx queryCtx, EventSequence timeline, - StringBuilder explainString) throws ImpalaException { + private TExecRequest doCreateExecRequest(PlanCtx planCtx, + EventSequence timeline) throws ImpalaException { + TQueryCtx queryCtx = planCtx.getQueryContext(); // Parse stmt and collect/load metadata to populate a stmt-local table cache StatementBase stmt = parse(queryCtx.client_request.stmt, queryCtx.client_request.query_options); @@ -1285,7 +1338,7 @@ public class Frontend { // create TQueryExecRequest TQueryExecRequest queryExecRequest = - getPlannedExecRequest(queryCtx, analysisResult, timeline, explainString); + getPlannedExecRequest(planCtx, analysisResult, timeline); TLineageGraph thriftLineageGraph = analysisResult.getThriftLineageGraph(); if (thriftLineageGraph != null && thriftLineageGraph.isSetQuery_text()) { @@ -1299,7 +1352,7 @@ public class Frontend { if (analysisResult.isExplainStmt()) { // Return the EXPLAIN request - createExplainRequest(explainString.toString(), result); + createExplainRequest(planCtx.getExplainString(), result); return result; } @@ -1398,14 +1451,15 @@ public class Frontend { /** * Get the TQueryExecRequest and use it to populate the query context */ - private TQueryExecRequest getPlannedExecRequest(TQueryCtx queryCtx, - AnalysisResult analysisResult, EventSequence timeline, StringBuilder explainString) + private TQueryExecRequest getPlannedExecRequest(PlanCtx planCtx, + AnalysisResult analysisResult, EventSequence timeline) throws ImpalaException { Preconditions.checkState(analysisResult.isQueryStmt() || analysisResult.isDmlStmt() || analysisResult.isCreateTableAsSelectStmt() || analysisResult.isUpdateStmt() || analysisResult.isDeleteStmt()); + TQueryCtx queryCtx = planCtx.getQueryContext(); Planner planner = new Planner(analysisResult, queryCtx, timeline); - TQueryExecRequest queryExecRequest = createExecRequest(planner, explainString); + TQueryExecRequest queryExecRequest = createExecRequest(planner, planCtx); queryCtx.setDesc_tbl( planner.getAnalysisResult().getAnalyzer().getDescTbl().toThrift()); queryExecRequest.setQuery_ctx(queryCtx); http://git-wip-us.apache.org/repos/asf/impala/blob/7dbeb695/fe/src/main/java/org/apache/impala/service/JniFrontend.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/service/JniFrontend.java b/fe/src/main/java/org/apache/impala/service/JniFrontend.java index 6a1d7e9..d9f3f06 100644 --- a/fe/src/main/java/org/apache/impala/service/JniFrontend.java +++ b/fe/src/main/java/org/apache/impala/service/JniFrontend.java @@ -58,6 +58,7 @@ import org.apache.impala.common.FileSystemUtil; import org.apache.impala.common.ImpalaException; import org.apache.impala.common.InternalException; import org.apache.impala.common.JniUtil; +import org.apache.impala.service.Frontend.PlanCtx; import org.apache.impala.thrift.TBackendGflags; import org.apache.impala.thrift.TBuildTestDescriptorTableParams; import org.apache.impala.thrift.TCatalogObject; @@ -163,10 +164,11 @@ public class JniFrontend { TQueryCtx queryCtx = new TQueryCtx(); JniUtil.deserializeThrift(protocolFactory_, queryCtx, thriftQueryContext); - StringBuilder explainString = new StringBuilder(); - TExecRequest result = frontend_.createExecRequest(queryCtx, explainString); - if (explainString.length() > 0 && LOG.isTraceEnabled()) { - LOG.trace(explainString.toString()); + PlanCtx planCtx = new PlanCtx(queryCtx); + TExecRequest result = frontend_.createExecRequest(planCtx); + if (LOG.isTraceEnabled()) { + String explainStr = planCtx.getExplainString(); + if (!explainStr.isEmpty()) LOG.trace(explainStr); } // TODO: avoid creating serializer for each query? http://git-wip-us.apache.org/repos/asf/impala/blob/7dbeb695/fe/src/test/java/org/apache/impala/planner/CardinalityTest.java ---------------------------------------------------------------------- diff --git a/fe/src/test/java/org/apache/impala/planner/CardinalityTest.java b/fe/src/test/java/org/apache/impala/planner/CardinalityTest.java new file mode 100644 index 0000000..d6ccfb6 --- /dev/null +++ b/fe/src/test/java/org/apache/impala/planner/CardinalityTest.java @@ -0,0 +1,159 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.impala.planner; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.fail; + +import java.util.List; + +import org.apache.impala.common.ImpalaException; +import org.apache.impala.service.Frontend.PlanCtx; +import org.apache.impala.testutil.TestUtils; +import org.apache.impala.thrift.TQueryCtx; +import org.apache.impala.thrift.TQueryOptions; +import org.junit.Test; + +/** + * Test the planner's inference of tuple cardinality from metadata NDV and + * resulting selectivity. + */ +public class CardinalityTest extends PlannerTestBase { + + /** + * Test the happy path: table with stats, no all-null cols. + */ + @Test + public void testBasicsWithStats() { + + // Return all rows. Cardinality is row count; + verifyCardinality("SELECT id FROM functional.alltypes", 7300); + + // Return all rows. Cardinality is row count, + // should not be influenced by limited NDV of selected + // column. + verifyCardinality("SELECT bool_col FROM functional.alltypes", 7300); + + // Result cardinality reduced by limited NDV. + // Boolean column has cardinality 3 (true, false, null). + // Since we have metadata, and know the column is non-null, + // NDV is 2. We select one of them. + verifyCardinality( + "SELECT id FROM functional.alltypes WHERE bool_col = TRUE", 7300/2); + + // Result cardinality reduced by NDV. + // NDV should be 10 (from metadata). + verifyCardinality( + "SELECT id FROM functional.alltypes WHERE int_col = 1", 7300/10); + + // Assume classic 0.1 selectivity for other operators + // IMPALA-7560 says this should be revised. + verifyCardinality( + "SELECT id FROM functional.alltypes WHERE int_col != 1", 730); + + // IMPALA-7601 says the following should be revised. + verifyCardinality( + "SELECT id FROM functional.alltypes WHERE int_col > 1", 730); + verifyCardinality( + "SELECT id FROM functional.alltypes WHERE int_col > 1", 730); + + // Not using NDV of int_col, instead uses default 0.1 value. + // Since NULL is one of the NDV's, nullvalue() is true + // 1/NDV of the time, on average. + verifyCardinality( + "SELECT id FROM functional.alltypes WHERE nullvalue(int_col)", 730); + + // Grouping should reduce cardinality + verifyCardinality( + "SELECT COUNT(*) FROM functional.alltypes GROUP BY int_col", 10); + verifyCardinality( + "SELECT COUNT(*) FROM functional.alltypes GROUP BY id", 7300); + verifyCardinality( + "SELECT COUNT(*) FROM functional.alltypes GROUP BY bool_col", 2); + } + + /** + * Joins should multiply out cardinalities. + */ + @Test + public void testJoins() { + // Cartesian product + String joinClause = " FROM functional.alltypes t1, functional.alltypes t2 "; + verifyCardinality("SELECT t1.id" + joinClause, 7300 * 7300); + // Cartesian product, reduced by NDV of group key + verifyCardinality( + "SELECT COUNT(*)" + joinClause + "GROUP BY t1.id", 7300); + verifyCardinality( + "SELECT COUNT(*)" + joinClause + "GROUP BY t1.id, t1.int_col", 7300 * 10); + } + + @Test + public void testBasicsWithoutStats() { + // IMPALA-7608: no cardinality is available (result is -1) + verifyCardinality("SELECT a FROM functional.tinytable", -1); + } + + /** + * Given a query and an expected cardinality, checks that the root + * node of the single-fragment plan has the expected cardinality. + * Verifies that cardinality is propagated upward through the plan + * as expected. + * + * @param query query to test + * @param expected expected cardinality at the root node + */ + protected void verifyCardinality(String query, long expected) { + List<PlanFragment> plan = getPlan(query); + PlanNode planRoot = plan.get(0).getPlanRoot(); + assertEquals("Cardinality error for: " + query, + expected, planRoot.getCardinality()); + } + + /** + * Given a query, run the planner and extract the physical plan prior + * to conversion to Thrift. Extract the first (or, more typically, only + * fragment) and return it for inspection. + * + * @param query the query to run + * @return the first (or only) fragment plan node + */ + private List<PlanFragment> getPlan(String query) { + // Create a query context with rewrites disabled + // TODO: Should probably turn them on, or run a test + // both with and without rewrites. + TQueryCtx queryCtx = TestUtils.createQueryContext( + "default", System.getProperty("user.name")); + queryCtx.client_request.setStmt(query); + + // Force the plan to run on a single node so it + // resides in a single fragment. + TQueryOptions queryOptions = queryCtx.client_request.getQuery_options(); + queryOptions.setNum_nodes(1); + + // Plan the query, discard the actual execution plan, and + // return the plan tree. + PlanCtx planCtx = new PlanCtx(queryCtx); + planCtx.requestPlanCapture(); + try { + frontend_.createExecRequest(planCtx); + } catch (ImpalaException e) { + fail(e.getMessage()); + } + return planCtx.getPlan(); + } +} http://git-wip-us.apache.org/repos/asf/impala/blob/7dbeb695/fe/src/test/java/org/apache/impala/planner/PlannerTest.java ---------------------------------------------------------------------- diff --git a/fe/src/test/java/org/apache/impala/planner/PlannerTest.java b/fe/src/test/java/org/apache/impala/planner/PlannerTest.java index 53af678..c8dbf1f 100644 --- a/fe/src/test/java/org/apache/impala/planner/PlannerTest.java +++ b/fe/src/test/java/org/apache/impala/planner/PlannerTest.java @@ -29,6 +29,7 @@ import org.apache.impala.catalog.HBaseColumn; import org.apache.impala.catalog.Type; import org.apache.impala.common.ImpalaException; import org.apache.impala.common.RuntimeEnv; +import org.apache.impala.service.Frontend.PlanCtx; import org.apache.impala.testutil.TestUtils; import org.apache.impala.thrift.TExecRequest; import org.apache.impala.thrift.TExplainLevel; @@ -471,10 +472,10 @@ public class PlannerTest extends PlannerTestBase { queryCtx.client_request.setStmt(stmt); queryCtx.client_request.query_options = defaultQueryOptions(); if (userMtDop != -1) queryCtx.client_request.query_options.setMt_dop(userMtDop); - StringBuilder explainBuilder = new StringBuilder(); TExecRequest request = null; try { - request = frontend_.createExecRequest(queryCtx, explainBuilder); + PlanCtx planCtx = new PlanCtx(queryCtx); + request = frontend_.createExecRequest(planCtx); } catch (ImpalaException e) { Assert.fail("Failed to create exec request for '" + stmt + "': " + e.getMessage()); } @@ -561,14 +562,15 @@ public class PlannerTest extends PlannerTestBase { // Setting up a table with computed stats queryCtx.client_request.setStmt("compute stats functional.alltypes"); queryCtx.client_request.query_options = defaultQueryOptions(); - StringBuilder explainBuilder = new StringBuilder(); - frontend_.createExecRequest(queryCtx, explainBuilder); + PlanCtx planCtx = new PlanCtx(queryCtx); + frontend_.createExecRequest(planCtx); // Setting up an arbitrary query involving a table with stats. queryCtx.client_request.setStmt("select * from functional.alltypes"); // Setting disable_unsafe_spills = true to verify that it no longer // throws a NPE with computed stats (IMPALA-5524) queryCtx.client_request.query_options.setDisable_unsafe_spills(true); - requestWithDisableSpillOn = frontend_.createExecRequest(queryCtx, explainBuilder); + planCtx = new PlanCtx(queryCtx); + requestWithDisableSpillOn = frontend_.createExecRequest(planCtx); Assert.assertNotNull(requestWithDisableSpillOn); } http://git-wip-us.apache.org/repos/asf/impala/blob/7dbeb695/fe/src/test/java/org/apache/impala/planner/PlannerTestBase.java ---------------------------------------------------------------------- diff --git a/fe/src/test/java/org/apache/impala/planner/PlannerTestBase.java b/fe/src/test/java/org/apache/impala/planner/PlannerTestBase.java index 713a300..7db5d37 100644 --- a/fe/src/test/java/org/apache/impala/planner/PlannerTestBase.java +++ b/fe/src/test/java/org/apache/impala/planner/PlannerTestBase.java @@ -41,6 +41,7 @@ import org.apache.impala.common.FrontendTestBase; import org.apache.impala.common.ImpalaException; import org.apache.impala.common.RuntimeEnv; import org.apache.impala.datagenerator.HBaseTestDataRegionAssignment; +import org.apache.impala.service.Frontend.PlanCtx; import org.apache.impala.testutil.TestFileParser; import org.apache.impala.testutil.TestFileParser.Section; import org.apache.impala.testutil.TestFileParser.TestCase; @@ -505,11 +506,13 @@ public class PlannerTestBase extends FrontendTestBase { boolean sectionExists = expectedPlan != null && !expectedPlan.isEmpty(); String expectedErrorMsg = getExpectedErrorMessage(expectedPlan); - StringBuilder explainBuilder = new StringBuilder(); TExecRequest execRequest = null; if (sectionExists) actualOutput.append(section.getHeader() + "\n"); + String explainStr = ""; try { - execRequest = frontend_.createExecRequest(queryCtx, explainBuilder); + PlanCtx planCtx = new PlanCtx(queryCtx); + execRequest = frontend_.createExecRequest(planCtx); + explainStr = planCtx.getExplainString(); } catch (Exception e) { if (!sectionExists) return null; handleException(query, expectedErrorMsg, errorLog, actualOutput, e); @@ -519,9 +522,7 @@ public class PlannerTestBase extends FrontendTestBase { // Failed to produce an exec request. if (execRequest == null) return null; - String explainStr = explainBuilder.toString(); - explainStr = removeExplainHeader(explainBuilder.toString(), testOptions); - + explainStr = removeExplainHeader(explainStr, testOptions); actualOutput.append(explainStr); LOG.info(section.toString() + ":" + explainStr); if (expectedErrorMsg != null) { @@ -553,13 +554,15 @@ public class PlannerTestBase extends FrontendTestBase { * if an error occurred while creating the plan. */ private String getVerboseExplainPlan(TQueryCtx queryCtx) { - StringBuilder explainBuilder = new StringBuilder(); + String explainStr; TExecRequest execRequest = null; TExplainLevel origExplainLevel = queryCtx.client_request.getQuery_options().getExplain_level(); try { queryCtx.client_request.getQuery_options().setExplain_level(TExplainLevel.VERBOSE); - execRequest = frontend_.createExecRequest(queryCtx, explainBuilder); + PlanCtx planCtx = new PlanCtx(queryCtx); + execRequest = frontend_.createExecRequest(planCtx); + explainStr = planCtx.getExplainString(); } catch (ImpalaException e) { return ExceptionUtils.getStackTrace(e); } finally { @@ -567,7 +570,7 @@ public class PlannerTestBase extends FrontendTestBase { } Preconditions.checkNotNull(execRequest); return removeExplainHeader( - explainBuilder.toString(), Collections.<PlannerTestOption>emptySet()); + explainStr, Collections.<PlannerTestOption>emptySet()); } private void checkScanRangeLocations(TestCase testCase, TExecRequest execRequest, @@ -676,8 +679,8 @@ public class PlannerTestBase extends FrontendTestBase { TQueryCtx queryCtx = TestUtils.createQueryContext(Catalog.DEFAULT_DB, System.getProperty("user.name")); queryCtx.client_request.setStmt(query); - StringBuilder explainBuilder = new StringBuilder(); - TExecRequest execRequest = frontend_.createExecRequest(queryCtx, explainBuilder); + PlanCtx planCtx = new PlanCtx(queryCtx); + TExecRequest execRequest = frontend_.createExecRequest(planCtx); if (!execRequest.isSetQuery_exec_request() || execRequest.query_exec_request == null