Repository: impala Updated Branches: refs/heads/master 2e6a63e31 -> 1e79f1479
IMPALA-6314: Add run time scalar subquery check for uncorrelated subqueries If a scalar subquery is used with a binary predicate, or, used in an arithmetic expression, it must return only one row/column to be valid. If this cannot be guaranteed at parse time through a single row aggregate or limit clause, Impala fails the query like such. E.g., currently the following query is not allowed: SELECT bigint_col FROM alltypesagg WHERE id = (SELECT id FROM alltypesagg WHERE id = 1) However, it would be allowed if the query contained a LIMIT 1 clause, or instead of id it was max(id). This commit makes the example valid by introducing a runtime check to test if the subquery returns a single row. If the subquery returns more than one row, it aborts the query with an error. I added a new node type, called CardinalityCheckNode. It is created during planning on top of the subquery when needed, then during execution it checks if its child only returns a single row. I extended the frontend tests and e2e tests as well. Change-Id: I0f52b93a60eeacedd242a2f17fa6b99c4fc38e06 Reviewed-on: http://gerrit.cloudera.org:8080/9005 Reviewed-by: Alex Behm <alex.b...@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenk...@cloudera.com> Project: http://git-wip-us.apache.org/repos/asf/impala/repo Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/1e79f147 Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/1e79f147 Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/1e79f147 Branch: refs/heads/master Commit: 1e79f14798a8f742bbf17f79a4666627dcef3faf Parents: 2e6a63e Author: Zoltan Borok-Nagy <borokna...@cloudera.com> Authored: Wed Apr 4 17:38:59 2018 +0200 Committer: Impala Public Jenkins <impala-public-jenk...@cloudera.com> Committed: Fri Apr 27 20:06:56 2018 +0000 ---------------------------------------------------------------------- be/src/exec/CMakeLists.txt | 1 + be/src/exec/cardinality-check-node.cc | 110 +++++ be/src/exec/cardinality-check-node.h | 61 +++ be/src/exec/exec-node.cc | 4 + common/thrift/PlanNodes.thrift | 10 +- .../apache/impala/analysis/BinaryPredicate.java | 16 +- .../impala/analysis/ComputeStatsStmt.java | 2 +- .../apache/impala/analysis/CreateViewStmt.java | 6 +- .../apache/impala/analysis/ExistsPredicate.java | 6 + .../java/org/apache/impala/analysis/Expr.java | 15 +- .../apache/impala/analysis/HdfsCachingOp.java | 9 +- .../org/apache/impala/analysis/InPredicate.java | 1 + .../apache/impala/analysis/IsNullPredicate.java | 3 +- .../org/apache/impala/analysis/QueryStmt.java | 17 + .../org/apache/impala/analysis/SelectStmt.java | 4 +- .../apache/impala/analysis/StmtRewriter.java | 32 +- .../org/apache/impala/analysis/Subquery.java | 5 +- .../org/apache/impala/analysis/UnionStmt.java | 1 + .../impala/planner/CardinalityCheckNode.java | 91 ++++ .../impala/planner/DistributedPlanner.java | 21 + .../impala/planner/SingleNodePlanner.java | 8 + .../impala/analysis/AnalyzeStmtsTest.java | 2 +- .../impala/analysis/AnalyzeSubqueriesTest.java | 71 ++-- .../org/apache/impala/analysis/ToSqlTest.java | 16 +- .../queries/PlannerTest/nested-collections.test | 66 +++ .../queries/PlannerTest/subquery-rewrite.test | 422 +++++++++++++++++++ .../queries/QueryTest/nested-types-subplan.test | 38 ++ .../queries/QueryTest/subquery.test | 171 ++++++++ 28 files changed, 1130 insertions(+), 79 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/impala/blob/1e79f147/be/src/exec/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/be/src/exec/CMakeLists.txt b/be/src/exec/CMakeLists.txt index 7224df8..ccec3fb 100644 --- a/be/src/exec/CMakeLists.txt +++ b/be/src/exec/CMakeLists.txt @@ -28,6 +28,7 @@ add_library(Exec analytic-eval-node.cc base-sequence-scanner.cc blocking-join-node.cc + cardinality-check-node.cc catalog-op-executor.cc data-sink.cc data-source-scan-node.cc http://git-wip-us.apache.org/repos/asf/impala/blob/1e79f147/be/src/exec/cardinality-check-node.cc ---------------------------------------------------------------------- diff --git a/be/src/exec/cardinality-check-node.cc b/be/src/exec/cardinality-check-node.cc new file mode 100644 index 0000000..76579dd --- /dev/null +++ b/be/src/exec/cardinality-check-node.cc @@ -0,0 +1,110 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "exec/cardinality-check-node.h" +#include "runtime/row-batch.h" +#include "runtime/runtime-state.h" +#include "util/runtime-profile-counters.h" +#include "gen-cpp/PlanNodes_types.h" + +#include "common/names.h" + +namespace impala { + +CardinalityCheckNode::CardinalityCheckNode( + ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs) + : ExecNode(pool, tnode, descs), + display_statement_(tnode.cardinality_check_node.display_statement) { +} + +Status CardinalityCheckNode::Prepare(RuntimeState* state) { + DCHECK(conjuncts_.empty()); + DCHECK_EQ(limit_, 1); + + SCOPED_TIMER(runtime_profile_->total_time_counter()); + RETURN_IF_ERROR(ExecNode::Prepare(state)); + return Status::OK(); +} + +Status CardinalityCheckNode::Open(RuntimeState* state) { + SCOPED_TIMER(runtime_profile_->total_time_counter()); + RETURN_IF_ERROR(ExecNode::Open(state)); + RETURN_IF_ERROR(child(0)->Open(state)); + row_batch_.reset( + new RowBatch(row_desc(), 1, mem_tracker())); + + // Read rows from the child, raise error if there are more rows than one + RowBatch child_batch(child(0)->row_desc(), state->batch_size(), mem_tracker()); + bool child_eos = false; + int rows_collected = 0; + do { + RETURN_IF_CANCELLED(state); + RETURN_IF_ERROR(QueryMaintenance(state)); + RETURN_IF_ERROR(child(0)->GetNext(state, &child_batch, &child_eos)); + + rows_collected += child_batch.num_rows(); + if (rows_collected > 1) { + return Status(Substitute("Subquery must not return more than one row: $0", + display_statement_)); + } + if (child_batch.num_rows() != 0) child_batch.DeepCopyTo(row_batch_.get()); + child_batch.Reset(); + } while (!child_eos); + + DCHECK(rows_collected == 0 || rows_collected == 1); + + // If we are inside a subplan we can expect a call to Open()/GetNext() + // on the child again. + if (!IsInSubplan()) child(0)->Close(state); + return Status::OK(); +} + +Status CardinalityCheckNode::GetNext(RuntimeState* state, RowBatch* output_row_batch, + bool* eos) { + SCOPED_TIMER(runtime_profile_->total_time_counter()); + RETURN_IF_ERROR(ExecDebugAction(TExecNodePhase::GETNEXT, state)); + RETURN_IF_CANCELLED(state); + RETURN_IF_ERROR(QueryMaintenance(state)); + DCHECK_LE(row_batch_->num_rows(), 1); + + if (row_batch_->num_rows() == 1) { + TupleRow* src_row = row_batch_->GetRow(0); + TupleRow* dst_row = output_row_batch->GetRow(output_row_batch->AddRow()); + output_row_batch->CopyRow(src_row, dst_row); + output_row_batch->CommitLastRow(); + row_batch_->TransferResourceOwnership(output_row_batch); + num_rows_returned_ = 1; + COUNTER_SET(rows_returned_counter_, num_rows_returned_); + } + *eos = true; + row_batch_->Reset(); + return Status::OK(); +} + +Status CardinalityCheckNode::Reset(RuntimeState* state) { + row_batch_->Reset(); + return ExecNode::Reset(state); +} + +void CardinalityCheckNode::Close(RuntimeState* state) { + if (is_closed()) return; + // Need to call destructor to release resources before calling ExecNode::Close(). + row_batch_.reset(); + ExecNode::Close(state); +} + +} http://git-wip-us.apache.org/repos/asf/impala/blob/1e79f147/be/src/exec/cardinality-check-node.h ---------------------------------------------------------------------- diff --git a/be/src/exec/cardinality-check-node.h b/be/src/exec/cardinality-check-node.h new file mode 100644 index 0000000..c71bd2b --- /dev/null +++ b/be/src/exec/cardinality-check-node.h @@ -0,0 +1,61 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + + +#ifndef IMPALA_EXEC_CARDINALITY_CHECK_NODE_H +#define IMPALA_EXEC_CARDINALITY_CHECK_NODE_H + +#include "exec/exec-node.h" +#include <boost/scoped_ptr.hpp> + +namespace impala { + +/// Node that returns an error if its child produces more than a single row. +/// If successful, this node returns a deep copy of its single input row. +/// +/// Note that this node must be a blocking node. It would be incorrect to return rows +/// before the single row constraint has been validated because downstream exec nodes +/// might produce results and incorrectly return them to the client. If the child of this +/// node produces more than one row it means the SQL query is semantically invalid, so no +/// rows must be returned to the client. +class CardinalityCheckNode : public ExecNode { + public: + CardinalityCheckNode(ObjectPool* pool, const TPlanNode& tnode, + const DescriptorTbl& descs); + + virtual Status Prepare(RuntimeState* state) override; + virtual Status Open(RuntimeState* state) override; + virtual Status GetNext(RuntimeState* state, RowBatch* row_batch, bool* eos) override; + virtual Status Reset(RuntimeState* state) override; + virtual void Close(RuntimeState* state) override; + private: + ///////////////////////////////////////// + /// BEGIN: Members that must be Reset() + + /// Row batch that contains a single row from child + boost::scoped_ptr<RowBatch> row_batch_; + + /// END: Members that must be Reset() + ///////////////////////////////////////// + + // The associated SQL statement for error reporting + std::string display_statement_; +}; + +} + +#endif http://git-wip-us.apache.org/repos/asf/impala/blob/1e79f147/be/src/exec/exec-node.cc ---------------------------------------------------------------------- diff --git a/be/src/exec/exec-node.cc b/be/src/exec/exec-node.cc index d2dd85f..ed9ab3f 100644 --- a/be/src/exec/exec-node.cc +++ b/be/src/exec/exec-node.cc @@ -30,6 +30,7 @@ #include "exprs/scalar-expr.h" #include "exprs/scalar-expr-evaluator.h" #include "exec/analytic-eval-node.h" +#include "exec/cardinality-check-node.h" #include "exec/data-source-scan-node.h" #include "exec/empty-set-node.h" #include "exec/exchange-node.h" @@ -398,6 +399,9 @@ Status ExecNode::CreateNode(ObjectPool* pool, const TPlanNode& tnode, case TPlanNodeType::UNNEST_NODE: *node = pool->Add(new UnnestNode(pool, tnode, descs)); break; + case TPlanNodeType::CARDINALITY_CHECK_NODE: + *node = pool->Add(new CardinalityCheckNode(pool, tnode, descs)); + break; default: map<int, const char*>::const_iterator i = _TPlanNodeType_VALUES_TO_NAMES.find(tnode.node_type); http://git-wip-us.apache.org/repos/asf/impala/blob/1e79f147/common/thrift/PlanNodes.thrift ---------------------------------------------------------------------- diff --git a/common/thrift/PlanNodes.thrift b/common/thrift/PlanNodes.thrift index c5df1cd..01698ce 100644 --- a/common/thrift/PlanNodes.thrift +++ b/common/thrift/PlanNodes.thrift @@ -46,7 +46,8 @@ enum TPlanNodeType { SINGULAR_ROW_SRC_NODE, UNNEST_NODE, SUBPLAN_NODE, - KUDU_SCAN_NODE + KUDU_SCAN_NODE, + CARDINALITY_CHECK_NODE } // phases of an execution node @@ -519,6 +520,11 @@ struct TBackendResourceProfile { 4: optional i64 max_row_buffer_size } +struct TCardinalityCheckNode { + // Associated statement of child + 1: required string display_statement +} + // This is essentially a union of all messages corresponding to subclasses // of PlanNode. struct TPlanNode { @@ -567,6 +573,8 @@ struct TPlanNode { // Resource profile for this plan node. 25: required TBackendResourceProfile resource_profile + + 26: optional TCardinalityCheckNode cardinality_check_node } // A flattened representation of a tree of PlanNodes, obtained by depth-first http://git-wip-us.apache.org/repos/asf/impala/blob/1e79f147/fe/src/main/java/org/apache/impala/analysis/BinaryPredicate.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/analysis/BinaryPredicate.java b/fe/src/main/java/org/apache/impala/analysis/BinaryPredicate.java index 444c003..e15b91f 100644 --- a/fe/src/main/java/org/apache/impala/analysis/BinaryPredicate.java +++ b/fe/src/main/java/org/apache/impala/analysis/BinaryPredicate.java @@ -173,20 +173,8 @@ public class BinaryPredicate extends Predicate { fn_ = getBuiltinFunction(analyzer, opName, collectChildReturnTypes(), CompareMode.IS_NONSTRICT_SUPERTYPE_OF); if (fn_ == null) { - // Construct an appropriate error message and throw an AnalysisException. - String errMsg = "operands of type " + getChild(0).getType().toSql() + " and " + - getChild(1).getType().toSql() + " are not comparable: " + toSql(); - - // Check if any of the children is a Subquery that does not return a - // scalar. - for (Expr expr: children_) { - if (expr instanceof Subquery && !expr.getType().isScalarType()) { - errMsg = "Subquery must return a single row: " + expr.toSql(); - break; - } - } - - throw new AnalysisException(errMsg); + throw new AnalysisException("operands of type " + getChild(0).getType().toSql() + + " and " + getChild(1).getType().toSql() + " are not comparable: " + toSql()); } Preconditions.checkState(fn_.getReturnType().isBoolean()); http://git-wip-us.apache.org/repos/asf/impala/blob/1e79f147/fe/src/main/java/org/apache/impala/analysis/ComputeStatsStmt.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/analysis/ComputeStatsStmt.java b/fe/src/main/java/org/apache/impala/analysis/ComputeStatsStmt.java index e442d66..f20e1c7 100644 --- a/fe/src/main/java/org/apache/impala/analysis/ComputeStatsStmt.java +++ b/fe/src/main/java/org/apache/impala/analysis/ComputeStatsStmt.java @@ -776,7 +776,7 @@ public class ComputeStatsStmt extends StatementBase { return "COMPUTE STATS " + tableName_.toSql() + columnList.toString() + tblsmpl; } else { return "COMPUTE INCREMENTAL STATS " + tableName_.toSql() + - partitionSet_ == null ? "" : partitionSet_.toSql(); + (partitionSet_ == null ? "" : partitionSet_.toSql()); } } http://git-wip-us.apache.org/repos/asf/impala/blob/1e79f147/fe/src/main/java/org/apache/impala/analysis/CreateViewStmt.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/analysis/CreateViewStmt.java b/fe/src/main/java/org/apache/impala/analysis/CreateViewStmt.java index 6b90083..6e98fe5 100644 --- a/fe/src/main/java/org/apache/impala/analysis/CreateViewStmt.java +++ b/fe/src/main/java/org/apache/impala/analysis/CreateViewStmt.java @@ -72,9 +72,9 @@ public class CreateViewStmt extends CreateOrAlterViewStmtBase { sb.append("CREATE VIEW "); if (ifNotExists_) sb.append("IF NOT EXISTS "); if (tableName_.getDb() != null) sb.append(tableName_.getDb() + "."); - sb.append(tableName_.getTbl() + " ("); - sb.append(Joiner.on(", ").join(columnDefs_)); - sb.append(") AS "); + sb.append(tableName_.getTbl()); + if (columnDefs_ != null) sb.append("(" + Joiner.on(", ").join(columnDefs_) + ")"); + sb.append(" AS "); sb.append(viewDefStmt_.toSql()); return sb.toString(); } http://git-wip-us.apache.org/repos/asf/impala/blob/1e79f147/fe/src/main/java/org/apache/impala/analysis/ExistsPredicate.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/analysis/ExistsPredicate.java b/fe/src/main/java/org/apache/impala/analysis/ExistsPredicate.java index 381848c..8da020f 100644 --- a/fe/src/main/java/org/apache/impala/analysis/ExistsPredicate.java +++ b/fe/src/main/java/org/apache/impala/analysis/ExistsPredicate.java @@ -55,6 +55,12 @@ public class ExistsPredicate extends Predicate { } @Override + protected void analyzeImpl(Analyzer analyzer) throws AnalysisException { + super.analyzeImpl(analyzer); + ((Subquery)children_.get(0)).getStatement().setIsRuntimeScalar(false); + } + + @Override protected void toThrift(TExprNode msg) { // Cannot serialize a nested predicate Preconditions.checkState(false); http://git-wip-us.apache.org/repos/asf/impala/blob/1e79f147/fe/src/main/java/org/apache/impala/analysis/Expr.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/analysis/Expr.java b/fe/src/main/java/org/apache/impala/analysis/Expr.java index ef53476..c519ea4 100644 --- a/fe/src/main/java/org/apache/impala/analysis/Expr.java +++ b/fe/src/main/java/org/apache/impala/analysis/Expr.java @@ -152,6 +152,16 @@ abstract public class Expr extends TreeNode<Expr> implements ParseNode, Cloneabl } }; + // Returns true if an Expr is a user-defined aggregate function. + public final static com.google.common.base.Predicate<Expr> IS_UDA_FN = + new com.google.common.base.Predicate<Expr>() { + @Override + public boolean apply(Expr arg) { + return isAggregatePredicate_.apply(arg) && + !((FunctionCallExpr)arg).getFnName().isBuiltin(); + } + }; + public final static com.google.common.base.Predicate<Expr> IS_TRUE_LITERAL = new com.google.common.base.Predicate<Expr>() { @Override @@ -1189,7 +1199,10 @@ abstract public class Expr extends TreeNode<Expr> implements ParseNode, Cloneabl */ public boolean isScalarSubquery() { Preconditions.checkState(isAnalyzed_); - return this instanceof Subquery && getType().isScalarType(); + if (!(this instanceof Subquery)) return false; + Subquery subq = (Subquery) this; + SelectStmt stmt = (SelectStmt) subq.getStatement(); + return stmt.returnsSingleRow() && getType().isScalarType(); } /** http://git-wip-us.apache.org/repos/asf/impala/blob/1e79f147/fe/src/main/java/org/apache/impala/analysis/HdfsCachingOp.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/analysis/HdfsCachingOp.java b/fe/src/main/java/org/apache/impala/analysis/HdfsCachingOp.java index 0ee274c..4f10a1b 100644 --- a/fe/src/main/java/org/apache/impala/analysis/HdfsCachingOp.java +++ b/fe/src/main/java/org/apache/impala/analysis/HdfsCachingOp.java @@ -79,8 +79,13 @@ public class HdfsCachingOp implements ParseNode { @Override public String toSql() { - return !shouldCache() ? "UNCACHED" : "CACHED IN '" + getCachePoolName() + "' WITH " + - "REPLICATION = " + parsedReplication_.longValue(); + if (!shouldCache()) return "UNCACHED"; + StringBuilder sb = new StringBuilder(); + sb.append("CACHED IN '" + getCachePoolName() + "'"); + if (parsedReplication_ != null) { + sb.append(" WITH REPLICATION = " + parsedReplication_.longValue()); + } + return sb.toString(); } public THdfsCachingOp toThrift() { return cacheOp_; } http://git-wip-us.apache.org/repos/asf/impala/blob/1e79f147/fe/src/main/java/org/apache/impala/analysis/InPredicate.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/analysis/InPredicate.java b/fe/src/main/java/org/apache/impala/analysis/InPredicate.java index f840ef2..89479ad 100644 --- a/fe/src/main/java/org/apache/impala/analysis/InPredicate.java +++ b/fe/src/main/java/org/apache/impala/analysis/InPredicate.java @@ -114,6 +114,7 @@ public class InPredicate extends Predicate { toSqlImpl()); } Subquery subquery = (Subquery)getChild(1); + subquery.getStatement().setIsRuntimeScalar(false); if (!subquery.returnsScalarColumn()) { throw new AnalysisException("Subquery must return a single column: " + subquery.toSql()); http://git-wip-us.apache.org/repos/asf/impala/blob/1e79f147/fe/src/main/java/org/apache/impala/analysis/IsNullPredicate.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/analysis/IsNullPredicate.java b/fe/src/main/java/org/apache/impala/analysis/IsNullPredicate.java index 5a1fe99..2629be2 100644 --- a/fe/src/main/java/org/apache/impala/analysis/IsNullPredicate.java +++ b/fe/src/main/java/org/apache/impala/analysis/IsNullPredicate.java @@ -104,7 +104,8 @@ public class IsNullPredicate extends Predicate { // a null value. setChild(0, new BoolLiteral(true)); getChild(0).analyze(analyzer); - } else if (!getChild(0).contains(Expr.IS_SCALAR_SUBQUERY)) { + } else if (!getChild(0).contains(Expr.IS_SCALAR_SUBQUERY) && + !getChild(0).getSubquery().getStatement().isRuntimeScalar()) { // We only support scalar subqueries in an IS NULL predicate because // they can be rewritten into a join. // TODO: Add support for InPredicates and BinaryPredicates with http://git-wip-us.apache.org/repos/asf/impala/blob/1e79f147/fe/src/main/java/org/apache/impala/analysis/QueryStmt.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/analysis/QueryStmt.java b/fe/src/main/java/org/apache/impala/analysis/QueryStmt.java index 6f2747f..6ad1c8a 100644 --- a/fe/src/main/java/org/apache/impala/analysis/QueryStmt.java +++ b/fe/src/main/java/org/apache/impala/analysis/QueryStmt.java @@ -87,6 +87,16 @@ public abstract class QueryStmt extends StatementBase { ///////////////////////////////////////// // END: Members that need to be reset() + // Contains the post-analysis toSql() string before rewrites. I.e. table refs are + // resolved and fully qualified, but no rewrites happened yet. This string is showed + // to the user in some cases in order to display a statement that is very similar + // to what was originally issued. + protected String origSqlString_ = null; + + // If true, we need a runtime check on this statement's result to check if it + // returns a single row. + protected boolean isRuntimeScalar_ = false; + QueryStmt(ArrayList<OrderByElement> orderByElements, LimitElement limitElement) { orderByElements_ = orderByElements; sortInfo_ = null; @@ -372,6 +382,11 @@ public abstract class QueryStmt extends StatementBase { public WithClause getWithClause() { return withClause_; } public boolean hasOrderByClause() { return orderByElements_ != null; } public boolean hasLimit() { return limitElement_.getLimitExpr() != null; } + public String getOrigSqlString() { return origSqlString_; } + public boolean isRuntimeScalar() { return isRuntimeScalar_; } + public void setIsRuntimeScalar(boolean isRuntimeScalar) { + isRuntimeScalar_ = isRuntimeScalar; + } public long getLimit() { return limitElement_.getLimit(); } public boolean hasOffset() { return limitElement_.getOffsetExpr() != null; } public long getOffset() { return limitElement_.getOffset(); } @@ -446,6 +461,8 @@ public abstract class QueryStmt extends StatementBase { sortInfo_ = (other.sortInfo_ != null) ? other.sortInfo_.clone() : null; analyzer_ = other.analyzer_; evaluateOrderBy_ = other.evaluateOrderBy_; + origSqlString_ = other.origSqlString_; + isRuntimeScalar_ = other.isRuntimeScalar_; } @Override http://git-wip-us.apache.org/repos/asf/impala/blob/1e79f147/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java b/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java index 874001d..0d295cc 100644 --- a/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java +++ b/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java @@ -255,6 +255,7 @@ public class SelectStmt extends QueryStmt { // Remember the SQL string before inline-view expression substitution. sqlString_ = toSql(); + if (origSqlString_ == null) origSqlString_ = sqlString_; resolveInlineViewRefs(analyzer); // If this block's select-project-join portion returns an empty result set and the @@ -1088,8 +1089,9 @@ public class SelectStmt extends QueryStmt { * result set also depends on the data a stmt is processing. */ public boolean returnsSingleRow() { + Preconditions.checkState(isAnalyzed()); // limit 1 clause - if (limitElement_ != null && limitElement_.getLimit() == 1) return true; + if (limitElement_ != null && hasLimit() && limitElement_.getLimit() == 1) return true; // No from clause (base tables or inline views) if (fromClause_.isEmpty()) return true; // Aggregation with no group by and no DISTINCT http://git-wip-us.apache.org/repos/asf/impala/blob/1e79f147/fe/src/main/java/org/apache/impala/analysis/StmtRewriter.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/analysis/StmtRewriter.java b/fe/src/main/java/org/apache/impala/analysis/StmtRewriter.java index a0eb757..6cfbd20 100644 --- a/fe/src/main/java/org/apache/impala/analysis/StmtRewriter.java +++ b/fe/src/main/java/org/apache/impala/analysis/StmtRewriter.java @@ -217,7 +217,7 @@ public class StmtRewriter { } if (!(conjunct instanceof InPredicate) && !(conjunct instanceof ExistsPredicate) && !(conjunct instanceof BinaryPredicate) && - !conjunct.contains(Expr.IS_SCALAR_SUBQUERY)) { + !conjunct.getSubquery().getType().isScalarType()) { throw new AnalysisException("Non-scalar subquery is not supported in " + "expression: " + conjunct.toSql()); } @@ -457,8 +457,9 @@ public class StmtRewriter { Preconditions.checkNotNull(expr); Preconditions.checkNotNull(analyzer); boolean updateSelectList = false; - SelectStmt subqueryStmt = (SelectStmt)expr.getSubquery().getStatement(); + boolean isScalarSubquery = expr.getSubquery().isScalarSubquery(); + boolean isRuntimeScalar = subqueryStmt.isRuntimeScalar(); // Create a new inline view from the subquery stmt. The inline view will be added // to the stmt's table refs later. Explicitly set the inline view's column labels // to eliminate any chance that column aliases from the parent query could reference @@ -478,10 +479,13 @@ public class StmtRewriter { // safely remove it. subqueryStmt.limitElement_ = new LimitElement(null, null); } + // If runtime scalar, we need to prevent the propagation of predicates into the + // inline view by setting a limit on the statement. + if (isRuntimeScalar) subqueryStmt.setLimit(2); // Update the subquery's select list and/or its GROUP BY clause by adding // exprs from the extracted correlated predicates. - boolean updateGroupBy = expr.getSubquery().isScalarSubquery() + boolean updateGroupBy = isScalarSubquery || (expr instanceof ExistsPredicate && !subqueryStmt.getSelectList().isDistinct() && subqueryStmt.hasAggInfo()); @@ -610,14 +614,15 @@ public class StmtRewriter { // TODO: Remove this when independent subquery evaluation is implemented. // TODO: Requires support for non-equi joins. boolean hasGroupBy = ((SelectStmt) inlineView.getViewStmt()).hasGroupByClause(); - if (!expr.getSubquery().isScalarSubquery() || - (!(hasGroupBy && stmt.selectList_.isDistinct()) && hasGroupBy)) { + Subquery subquery = expr.getSubquery(); + if ((!isScalarSubquery && !isRuntimeScalar) || + (hasGroupBy && !stmt.selectList_.isDistinct())) { throw new AnalysisException("Unsupported predicate with subquery: " + expr.toSql()); } // TODO: Requires support for null-aware anti-join mode in nested-loop joins - if (expr.getSubquery().isScalarSubquery() && expr instanceof InPredicate + if (isScalarSubquery && expr instanceof InPredicate && ((InPredicate) expr).isNotIn()) { throw new AnalysisException("Unsupported NOT IN predicate with subquery: " + expr.toSql()); @@ -796,7 +801,12 @@ public class StmtRewriter { throw new AnalysisException("Unsupported correlated subquery with grouping " + "and/or aggregation: " + stmt.toSql()); } - + // TODO: instead of this check, implement IMPALA-6315 + if (!expr.getSubquery().isScalarSubquery() && + !(expr instanceof InPredicate || expr instanceof ExistsPredicate)) { + throw new AnalysisException( + "Unsupported correlated subquery with runtime scalar check: " + stmt.toSql()); + } // The following correlated subqueries with a limit clause are supported: // 1. EXISTS subqueries // 2. Scalar subqueries with aggregation @@ -1016,16 +1026,12 @@ public class StmtRewriter { pred.analyze(analyzer); return pred; } - // Only scalar subqueries are supported Subquery subquery = exprWithSubquery.getSubquery(); - if (!subquery.isScalarSubquery()) { - throw new AnalysisException("Unsupported predicate with a non-scalar subquery: " - + subquery.toSql()); - } + Preconditions.checkState(subquery.getType().isScalarType()); ExprSubstitutionMap smap = new ExprSubstitutionMap(); SelectListItem item = ((SelectStmt) inlineView.getViewStmt()).getSelectList().getItems().get(0); - if (isCorrelated && !item.getExpr().contains(Expr.IS_BUILTIN_AGG_FN)) { + if (isCorrelated && item.getExpr().contains(Expr.IS_UDA_FN)) { throw new AnalysisException("UDAs are not supported in the select list of " + "correlated subqueries: " + subquery.toSql()); } http://git-wip-us.apache.org/repos/asf/impala/blob/1e79f147/fe/src/main/java/org/apache/impala/analysis/Subquery.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/analysis/Subquery.java b/fe/src/main/java/org/apache/impala/analysis/Subquery.java index ca15020..d5c2e68 100644 --- a/fe/src/main/java/org/apache/impala/analysis/Subquery.java +++ b/fe/src/main/java/org/apache/impala/analysis/Subquery.java @@ -45,6 +45,7 @@ public class Subquery extends Expr { public Analyzer getAnalyzer() { return analyzer_; } public QueryStmt getStatement() { return stmt_; } + @Override public String toSqlImpl() { return "(" + stmt_.toSql() + ")"; } @@ -92,8 +93,8 @@ public class Subquery extends Expr { type_ = createStructTypeFromExprList(); } - // If the subquery returns many rows, set its type to ArrayType. - if (!((SelectStmt)stmt_).returnsSingleRow()) type_ = new ArrayType(type_); + // If the subquery can return many rows, do the cardinality check at runtime. + if (!((SelectStmt)stmt_).returnsSingleRow()) stmt_.setIsRuntimeScalar(true); Preconditions.checkNotNull(type_); } http://git-wip-us.apache.org/repos/asf/impala/blob/1e79f147/fe/src/main/java/org/apache/impala/analysis/UnionStmt.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/analysis/UnionStmt.java b/fe/src/main/java/org/apache/impala/analysis/UnionStmt.java index bb472a2..1e58fad 100644 --- a/fe/src/main/java/org/apache/impala/analysis/UnionStmt.java +++ b/fe/src/main/java/org/apache/impala/analysis/UnionStmt.java @@ -219,6 +219,7 @@ public class UnionStmt extends QueryStmt { // Remember the SQL string before unnesting operands. toSqlString_ = toSql(); + if (origSqlString_ == null) origSqlString_ = toSqlString_; // Unnest the operands before casting the result exprs. Unnesting may add // additional entries to operands_ and the result exprs of those unnested http://git-wip-us.apache.org/repos/asf/impala/blob/1e79f147/fe/src/main/java/org/apache/impala/planner/CardinalityCheckNode.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/planner/CardinalityCheckNode.java b/fe/src/main/java/org/apache/impala/planner/CardinalityCheckNode.java new file mode 100644 index 0000000..94f3dd1 --- /dev/null +++ b/fe/src/main/java/org/apache/impala/planner/CardinalityCheckNode.java @@ -0,0 +1,91 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.impala.planner; + +import org.apache.impala.analysis.Analyzer; +import org.apache.impala.common.ImpalaException; +import org.apache.impala.thrift.TCardinalityCheckNode; +import org.apache.impala.thrift.TExplainLevel; +import org.apache.impala.thrift.TPlanNode; +import org.apache.impala.thrift.TPlanNodeType; +import org.apache.impala.thrift.TQueryOptions; + +import com.google.common.base.Preconditions; + +/** + * Node that returns an error if its child produces more than a single row. + * If successful, this node returns a deep copy of its single input row. + * + * Note that this node must be a blocking node. It would be incorrect to return rows + * before the single row constraint has been validated because downstream exec nodes + * might produce results and incorrectly return them to the client. If the child of this + * node produces more than one row it means the SQL query is semantically invalid, so no + * rows must be returned to the client. + */ +public class CardinalityCheckNode extends PlanNode { + private final String displayStatement_; + + protected CardinalityCheckNode(PlanNodeId id, PlanNode child, String displayStmt) { + super(id, "CARDINALITY CHECK"); + Preconditions.checkState(child.getLimit() == 2); + cardinality_ = 1; + limit_ = 1; + displayStatement_ = displayStmt; + addChild(child); + computeTupleIds(); + } + + /** + * Same as PlanNode.init(), except we don't assign conjuncts. + */ + @Override + public void init(Analyzer analyzer) throws ImpalaException { + computeStats(analyzer); + createDefaultSmap(analyzer); + } + + @Override + public boolean isBlockingNode() { return true; } + + @Override + public void computeTupleIds() { + clearTupleIds(); + tblRefIds_.addAll(getChild(0).getTblRefIds()); + tupleIds_.addAll(getChild(0).getTupleIds()); + nullableTupleIds_.addAll(getChild(0).getNullableTupleIds()); + } + + @Override + protected void toThrift(TPlanNode msg) { + msg.node_type = TPlanNodeType.CARDINALITY_CHECK_NODE; + TCardinalityCheckNode cardinalityCheckNode = new TCardinalityCheckNode( + displayStatement_); + msg.setCardinality_check_node(cardinalityCheckNode); + } + + @Override + public void computeNodeResourceProfile(TQueryOptions queryOptions) { + nodeResourceProfile_ = ResourceProfile.noReservation(0); + } + + @Override + protected String getNodeExplainString(String prefix, String detailPrefix, + TExplainLevel detailLevel) { + return String.format("%s%s:%s\n", prefix, id_.toString(), displayName_); + } +} http://git-wip-us.apache.org/repos/asf/impala/blob/1e79f147/fe/src/main/java/org/apache/impala/planner/DistributedPlanner.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/planner/DistributedPlanner.java b/fe/src/main/java/org/apache/impala/planner/DistributedPlanner.java index b388673..ef24f6c 100644 --- a/fe/src/main/java/org/apache/impala/planner/DistributedPlanner.java +++ b/fe/src/main/java/org/apache/impala/planner/DistributedPlanner.java @@ -143,6 +143,8 @@ public class DistributedPlanner { } else if (root instanceof EmptySetNode) { result = new PlanFragment( ctx_.getNextFragmentId(), root, DataPartition.UNPARTITIONED); + } else if (root instanceof CardinalityCheckNode) { + result = createCardinalityCheckNodeFragment((CardinalityCheckNode) root, childFragments); } else { throw new InternalException("Cannot create plan fragment for this node type: " + root.getExplainString(ctx_.getQueryOptions())); @@ -728,6 +730,25 @@ public class DistributedPlanner { } /** + * Adds the CardinalityCheckNode as the new plan root to the child fragment and returns + * the child fragment. + */ + private PlanFragment createCardinalityCheckNodeFragment( + CardinalityCheckNode cardinalityCheckNode, + ArrayList<PlanFragment> childFragments) throws ImpalaException { + PlanFragment childFragment = childFragments.get(0); + // The cardinality check must execute on a single node. + if (childFragment.getOutputPartition().isPartitioned()) { + childFragment = createMergeFragment(childFragment); + } + // Set the child explicitly, an ExchangeNode might have been inserted + // (whereas cardinalityCheckNode.child[0] would point to the original child) + cardinalityCheckNode.setChild(0, childFragment.getPlanRoot()); + childFragment.setPlanRoot(cardinalityCheckNode); + return childFragment; + } + + /** * Replace node's child at index childIdx with an ExchangeNode that receives its * input from childFragment. ParentFragment contains node and the new ExchangeNode. */ http://git-wip-us.apache.org/repos/asf/impala/blob/1e79f147/fe/src/main/java/org/apache/impala/planner/SingleNodePlanner.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/planner/SingleNodePlanner.java b/fe/src/main/java/org/apache/impala/planner/SingleNodePlanner.java index 27d293d..fb669c5 100644 --- a/fe/src/main/java/org/apache/impala/planner/SingleNodePlanner.java +++ b/fe/src/main/java/org/apache/impala/planner/SingleNodePlanner.java @@ -1098,6 +1098,14 @@ public class SingleNodePlanner { // Set output smap of rootNode *before* creating a SelectNode for proper resolution. rootNode.setOutputSmap(outputSmap); + // Add runtime cardinality check if needed + if (inlineViewRef.getViewStmt().isRuntimeScalar()) { + rootNode = new CardinalityCheckNode(ctx_.getNextNodeId(), rootNode, + inlineViewRef.getViewStmt().getOrigSqlString()); + rootNode.setOutputSmap(outputSmap); + rootNode.init(ctx_.getRootAnalyzer()); + } + // If the inline view has a LIMIT/OFFSET or unassigned conjuncts due to analytic // functions, we may have conjuncts that need to be assigned to a SELECT node on // top of the current plan root node. http://git-wip-us.apache.org/repos/asf/impala/blob/1e79f147/fe/src/test/java/org/apache/impala/analysis/AnalyzeStmtsTest.java ---------------------------------------------------------------------- diff --git a/fe/src/test/java/org/apache/impala/analysis/AnalyzeStmtsTest.java b/fe/src/test/java/org/apache/impala/analysis/AnalyzeStmtsTest.java index 10a3e9d..18b1483 100644 --- a/fe/src/test/java/org/apache/impala/analysis/AnalyzeStmtsTest.java +++ b/fe/src/test/java/org/apache/impala/analysis/AnalyzeStmtsTest.java @@ -3700,7 +3700,7 @@ public class AnalyzeStmtsTest extends AnalyzerTest { */ @Test public void TestClone() { - testNumberOfMembers(QueryStmt.class, 9); + testNumberOfMembers(QueryStmt.class, 11); testNumberOfMembers(UnionStmt.class, 9); testNumberOfMembers(ValuesStmt.class, 0); http://git-wip-us.apache.org/repos/asf/impala/blob/1e79f147/fe/src/test/java/org/apache/impala/analysis/AnalyzeSubqueriesTest.java ---------------------------------------------------------------------- diff --git a/fe/src/test/java/org/apache/impala/analysis/AnalyzeSubqueriesTest.java b/fe/src/test/java/org/apache/impala/analysis/AnalyzeSubqueriesTest.java index de8632e..06e834d 100644 --- a/fe/src/test/java/org/apache/impala/analysis/AnalyzeSubqueriesTest.java +++ b/fe/src/test/java/org/apache/impala/analysis/AnalyzeSubqueriesTest.java @@ -938,26 +938,11 @@ public class AnalyzeSubqueriesTest extends AnalyzerTest { } } - // Subquery returns multiple rows - AnalysisError("select * from functional.alltypestiny where " + - "(select max(id) from functional.alltypes) = " + - "(select id from functional.alltypestiny)", - "Subquery must return a single row: " + - "(SELECT id FROM functional.alltypestiny)"); - AnalysisError("select id from functional.alltypestiny t where int_col = " + - "(select int_col from functional.alltypessmall limit 2)", - "Subquery must return a single row: " + - "(SELECT int_col FROM functional.alltypessmall LIMIT 2)"); - AnalysisError("select id from functional.alltypestiny where int_col = " + - "(select id from functional.alltypessmall)", - "Subquery must return a single row: " + - "(SELECT id FROM functional.alltypessmall)"); - // Subquery returns multiple columns AnalysisError("select id from functional.alltypestiny where int_col = " + "(select id, int_col from functional.alltypessmall)", - "Subquery must return a single row: " + - "(SELECT id, int_col FROM functional.alltypessmall)"); + "operands of type INT and STRUCT<id:INT,int_col:INT> are not " + + "comparable: int_col = (SELECT id, int_col FROM functional.alltypessmall)"); AnalysisError("select * from functional.alltypestiny where id in " + "(select * from (values(1,2)) as t)", "Subquery must return a single column: (SELECT * FROM (VALUES(1, 2)) t)"); @@ -965,9 +950,9 @@ public class AnalyzeSubqueriesTest extends AnalyzerTest { // Subquery returns multiple columns due to a group by clause AnalysisError("select id from functional.alltypestiny where int_col = " + "(select int_col, count(*) from functional.alltypessmall group by int_col)", - "Subquery must return a single row: " + - "(SELECT int_col, count(*) FROM functional.alltypessmall " + - "GROUP BY int_col)"); + "operands of type INT and STRUCT<int_col:INT,_1:BIGINT> are not " + + "comparable: int_col = (SELECT int_col, count(*) FROM " + + "functional.alltypessmall GROUP BY int_col)"); // Outer join with a table from the outer block using an explicit alias AnalysisError("select id from functional.alltypestiny t where int_col = " + @@ -1083,12 +1068,10 @@ public class AnalyzeSubqueriesTest extends AnalyzerTest { "g.int_col = t.int_col limit 1)"); // Aggregate subquery with analytic function - AnalysisError("select id, int_col, bool_col from " + + AnalyzesOk("select id, int_col, bool_col from " + "functional.alltypestiny t1 where int_col = (select min(bigint_col) " + "over (partition by bool_col) from functional.alltypessmall t2 where " + - "int_col < 10)", "Subquery must return a single row: (SELECT " + - "min(bigint_col) OVER (PARTITION BY bool_col) FROM " + - "functional.alltypessmall t2 WHERE int_col < 10)"); + "int_col < 10)"); // Aggregate subquery with analytic function + limit 1 and a relative table ref // TODO: Modify the StmtRewriter to allow this query with only relative refs. @@ -1114,10 +1097,8 @@ public class AnalyzeSubqueriesTest extends AnalyzerTest { "int_col < 10 limit 1)"); // Subquery with distinct in binary predicate - AnalysisError("select * from functional.alltypes where int_col = " + - "(select distinct int_col from functional.alltypesagg)", "Subquery " + - "must return a single row: (SELECT DISTINCT int_col FROM " + - "functional.alltypesagg)"); + AnalyzesOk("select * from functional.alltypes where int_col = " + + "(select distinct int_col from functional.alltypesagg)"); AnalyzesOk("select * from functional.alltypes where int_col = " + "(select count(distinct int_col) from functional.alltypesagg)"); // Multiple count aggregate functions in a correlated subquery's select list @@ -1179,6 +1160,29 @@ public class AnalyzeSubqueriesTest extends AnalyzerTest { "(select t.* from functional.alltypes)", "Could not resolve star expression: 't.*'"); + // Scalar subquery check is done at runtime, not during analysis + for (String cmpOp: cmpOperators) { + AnalyzesOk(String.format( + "select id from functional.alltypestiny t where int_col %s " + + "(select int_col from functional.alltypessmall)", cmpOp)); + AnalyzesOk(String.format( + "select id from functional.alltypestiny t where int_col %s " + + "(select int_col from functional.alltypessmall where id = 1)", cmpOp)); + AnalyzesOk(String.format( + "select id from functional.alltypestiny t where int_col %s " + + "1 - (select int_col from functional.alltypessmall where id = 1)", cmpOp)); + AnalyzesOk(String.format( + "select id from functional.alltypestiny t where int_col %s " + + "1 - (select int_col from functional.alltypessmall limit 10)", cmpOp)); + AnalyzesOk(String.format( + "select id from functional.alltypestiny t where int_col %s " + + "(select int_col from functional.alltypessmall) * 7", cmpOp)); + } + AnalysisError("select * from functional.alltypes t1 where id < " + + "(select id from functional.alltypes t2 where t1.int_col = t2.int_col)", + "Unsupported correlated subquery with runtime scalar check: " + + "SELECT id FROM functional.alltypes t2 WHERE t1.int_col = t2.int_col"); + // Test resolution of correlated table references inside subqueries. The testing // here is rather basic, because the analysis goes through the same codepath // as the analysis of correlated inline views, which are more thoroughly tested. @@ -1396,5 +1400,16 @@ public class AnalyzeSubqueriesTest extends AnalyzerTest { AnalyzesOk("select * from functional.alltypestiny a where " + "double_col between cast(1 as double) and cast(10 as double) and " + "exists (select 1 from functional.alltypessmall b where a.id = b.id)"); + + AnalyzesOk("select count(1) from functional.alltypes " + + "where (select int_col > 10 from functional.alltypes)"); + AnalyzesOk("select count(1) from functional.alltypes " + + "where (select string_col is null from functional.alltypes)"); + AnalyzesOk("select count(1) from functional.alltypes " + + "where (select int_col from functional.alltypes) is null"); + AnalyzesOk("select count(1) from functional.alltypes " + + "where (select int_col from functional.alltypes) is not null"); + AnalyzesOk("select 1 from functional.alltypes where " + + "coalesce(null, (select bool_col from functional.alltypes where id = 0))"); } } http://git-wip-us.apache.org/repos/asf/impala/blob/1e79f147/fe/src/test/java/org/apache/impala/analysis/ToSqlTest.java ---------------------------------------------------------------------- diff --git a/fe/src/test/java/org/apache/impala/analysis/ToSqlTest.java b/fe/src/test/java/org/apache/impala/analysis/ToSqlTest.java index 760ea59..0a073eb 100644 --- a/fe/src/test/java/org/apache/impala/analysis/ToSqlTest.java +++ b/fe/src/test/java/org/apache/impala/analysis/ToSqlTest.java @@ -79,7 +79,11 @@ public class ToSqlTest extends FrontendTestBase { String actual = null; try { ParseNode node = AnalyzesOk(query, createAnalysisCtx(defaultDb)); - actual = node.toSql(); + if (node instanceof QueryStmt) { + actual = ((QueryStmt)node).getOrigSqlString(); + } else { + actual = node.toSql(); + } if (ignoreWhitespace) { // Transform whitespace to single space. actual = actual.replace('\n', ' ').replaceAll(" +", " ").trim(); @@ -886,9 +890,6 @@ public class ToSqlTest extends FrontendTestBase { /** * Tests that toSql() properly handles subqueries in the where clause. */ - // TODO Fix testToSql to print the stmt after the first analysis phase and not - // after the rewrite. - @Ignore("Prints the rewritten statement") @Test public void subqueryTest() { // Nested predicates @@ -912,13 +913,6 @@ public class ToSqlTest extends FrontendTestBase { "(select * from functional.alltypestiny)", "SELECT * FROM functional.alltypes WHERE NOT EXISTS " + "(SELECT * FROM functional.alltypestiny)"); - // Multiple nested predicates in the WHERE clause - testToSql("select * from functional.alltypes where not (id < 10 and " + - "(int_col in (select int_col from functional.alltypestiny)) and " + - "(string_col = (select max(string_col) from functional.alltypestiny)))", - "SELECT * FROM functional.alltypes WHERE NOT (id < 10 AND " + - "(int_col IN (SELECT int_col FROM functional.alltypestiny)) AND " + - "(string_col = (SELECT max(string_col) FROM functional.alltypestiny)))"); // Multiple nesting levels testToSql("select * from functional.alltypes where id in " + "(select id from functional.alltypestiny where int_col = " + http://git-wip-us.apache.org/repos/asf/impala/blob/1e79f147/testdata/workloads/functional-planner/queries/PlannerTest/nested-collections.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/nested-collections.test b/testdata/workloads/functional-planner/queries/PlannerTest/nested-collections.test index 3646146..bf7f8b8 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/nested-collections.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/nested-collections.test @@ -1987,3 +1987,69 @@ PLAN-ROOT SINK partitions=1/1 files=1 size=4.18KB runtime filters: RF000 -> t1.pos ==== +# Add run time scalar subquery check for uncorrelated subqueries +# Create CardinalityCheckNode inside a subplan +select c_custkey +from tpch_nested_parquet.customer c +where c_custkey < (select o_orderkey + from c.c_orders + where o_orderkey = 6000000) +---- PLAN +PLAN-ROOT SINK +| +01:SUBPLAN +| +|--05:NESTED LOOP JOIN [RIGHT SEMI JOIN] +| | join predicates: c_custkey < o_orderkey +| | +| |--02:SINGULAR ROW SRC +| | +| 04:CARDINALITY CHECK +| | limit: 1 +| | +| 03:UNNEST [c.c_orders] +| limit: 2 +| +00:SCAN HDFS [tpch_nested_parquet.customer c] + partitions=1/1 files=4 size=292.35MB + predicates on c_orders: o_orderkey = 6000000 +==== +# CardinalityCheckNode in subplan in a subplan +select c_custkey +from tpch_nested_parquet.customer c +where c_custkey < (select o_orderkey + from c.c_orders co + where o_orderkey = (select li.l_linenumber + from co.o_lineitems li)) +---- PLAN +PLAN-ROOT SINK +| +01:SUBPLAN +| +|--10:NESTED LOOP JOIN [RIGHT SEMI JOIN] +| | join predicates: c_custkey < o_orderkey +| | +| |--02:SINGULAR ROW SRC +| | +| 09:CARDINALITY CHECK +| | limit: 1 +| | +| 04:SUBPLAN +| | limit: 2 +| | +| |--08:NESTED LOOP JOIN [RIGHT SEMI JOIN] +| | | join predicates: li.l_linenumber = o_orderkey +| | | +| | |--05:SINGULAR ROW SRC +| | | +| | 07:CARDINALITY CHECK +| | | limit: 1 +| | | +| | 06:UNNEST [co.o_lineitems li] +| | limit: 2 +| | +| 03:UNNEST [c.c_orders co] +| +00:SCAN HDFS [tpch_nested_parquet.customer c] + partitions=1/1 files=4 size=292.35MB +==== http://git-wip-us.apache.org/repos/asf/impala/blob/1e79f147/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite.test b/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite.test index ea22679..796ccd8 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite.test @@ -2338,4 +2338,426 @@ PLAN-ROOT SINK 01:SCAN HDFS [functional.alltypestiny] partitions=4/4 files=4 size=460B predicates: id < 5 +======= +# Subquery in binary predicate that needs cardinality check at runtime +select bigint_col from functional.alltypes where id = + (select id + from functional.alltypes where id = 1 + ) +---- PLAN +PLAN-ROOT SINK +| +03:HASH JOIN [LEFT SEMI JOIN] +| hash predicates: id = id +| runtime filters: RF000 <- id +| +|--02:CARDINALITY CHECK +| | limit: 1 +| | +| 01:SCAN HDFS [functional.alltypes] +| partitions=24/24 files=24 size=478.45KB +| predicates: id = 1 +| limit: 2 +| +00:SCAN HDFS [functional.alltypes] + partitions=24/24 files=24 size=478.45KB + predicates: functional.alltypes.id = 1 + runtime filters: RF000 -> id +---- DISTRIBUTEDPLAN +PLAN-ROOT SINK +| +07:EXCHANGE [UNPARTITIONED] +| +03:HASH JOIN [LEFT SEMI JOIN, PARTITIONED] +| hash predicates: id = id +| runtime filters: RF000 <- id +| +|--06:EXCHANGE [HASH(id)] +| | +| 02:CARDINALITY CHECK +| | limit: 1 +| | +| 04:EXCHANGE [UNPARTITIONED] +| | limit: 2 +| | +| 01:SCAN HDFS [functional.alltypes] +| partitions=24/24 files=24 size=478.45KB +| predicates: id = 1 +| limit: 2 +| +05:EXCHANGE [HASH(id)] +| +00:SCAN HDFS [functional.alltypes] + partitions=24/24 files=24 size=478.45KB + predicates: functional.alltypes.id = 1 + runtime filters: RF000 -> id +==== +# Subquery in arithmetic expression that needs cardinality check at runtime +select bigint_col from functional.alltypes where id = + 3 * (select id + from functional.alltypes where id = 1 + ) +---- PLAN +PLAN-ROOT SINK +| +03:HASH JOIN [LEFT SEMI JOIN] +| hash predicates: id = 3 * id +| runtime filters: RF000 <- 3 * id +| +|--02:CARDINALITY CHECK +| | limit: 1 +| | +| 01:SCAN HDFS [functional.alltypes] +| partitions=24/24 files=24 size=478.45KB +| predicates: id = 1 +| limit: 2 +| +00:SCAN HDFS [functional.alltypes] + partitions=24/24 files=24 size=478.45KB + runtime filters: RF000 -> id +---- DISTRIBUTEDPLAN +PLAN-ROOT SINK +| +06:EXCHANGE [UNPARTITIONED] +| +03:HASH JOIN [LEFT SEMI JOIN, BROADCAST] +| hash predicates: id = 3 * id +| runtime filters: RF000 <- 3 * id +| +|--05:EXCHANGE [BROADCAST] +| | +| 02:CARDINALITY CHECK +| | limit: 1 +| | +| 04:EXCHANGE [UNPARTITIONED] +| | limit: 2 +| | +| 01:SCAN HDFS [functional.alltypes] +| partitions=24/24 files=24 size=478.45KB +| predicates: id = 1 +| limit: 2 +| +00:SCAN HDFS [functional.alltypes] + partitions=24/24 files=24 size=478.45KB + runtime filters: RF000 -> id +==== +# Subquery that contains union and needs cardinality check at runtime +select * from functional.alltypes where id = + (select i from (select bigint_col as i from functional.alltypes + union + select smallint_col as i from functional.alltypes) t) +---- PLAN +PLAN-ROOT SINK +| +06:HASH JOIN [LEFT SEMI JOIN] +| hash predicates: id = i +| runtime filters: RF000 <- i +| +|--05:CARDINALITY CHECK +| | limit: 1 +| | +| 04:AGGREGATE [FINALIZE] +| | group by: i +| | limit: 2 +| | +| 01:UNION +| | pass-through-operands: 02 +| | +| |--03:SCAN HDFS [functional.alltypes] +| | partitions=24/24 files=24 size=478.45KB +| | +| 02:SCAN HDFS [functional.alltypes] +| partitions=24/24 files=24 size=478.45KB +| +00:SCAN HDFS [functional.alltypes] + partitions=24/24 files=24 size=478.45KB + runtime filters: RF000 -> id +---- DISTRIBUTEDPLAN +PLAN-ROOT SINK +| +11:EXCHANGE [UNPARTITIONED] +| +06:HASH JOIN [LEFT SEMI JOIN, BROADCAST] +| hash predicates: id = i +| runtime filters: RF000 <- i +| +|--10:EXCHANGE [BROADCAST] +| | +| 05:CARDINALITY CHECK +| | limit: 1 +| | +| 09:EXCHANGE [UNPARTITIONED] +| | limit: 2 +| | +| 08:AGGREGATE [FINALIZE] +| | group by: i +| | limit: 2 +| | +| 07:EXCHANGE [HASH(i)] +| | +| 04:AGGREGATE [STREAMING] +| | group by: i +| | +| 01:UNION +| | pass-through-operands: 02 +| | +| |--03:SCAN HDFS [functional.alltypes] +| | partitions=24/24 files=24 size=478.45KB +| | +| 02:SCAN HDFS [functional.alltypes] +| partitions=24/24 files=24 size=478.45KB +| +00:SCAN HDFS [functional.alltypes] + partitions=24/24 files=24 size=478.45KB + runtime filters: RF000 -> id +==== +# Subquery that contains join and GROUP BY and needs cardinality check at runtime +select * from functional.alltypes where id = + (select max(allt.smallint_col) from functional.alltypes allt, functional.alltypesagg ata + where allt.id = ata.id and ata.month = 1 group by ata.month) +---- PLAN +PLAN-ROOT SINK +| +06:HASH JOIN [LEFT SEMI JOIN] +| hash predicates: id = max(allt.smallint_col) +| runtime filters: RF000 <- max(allt.smallint_col) +| +|--05:CARDINALITY CHECK +| | limit: 1 +| | +| 04:AGGREGATE [FINALIZE] +| | output: max(allt.smallint_col) +| | group by: ata.month +| | limit: 2 +| | +| 03:HASH JOIN [INNER JOIN] +| | hash predicates: ata.id = allt.id +| | runtime filters: RF002 <- allt.id +| | +| |--01:SCAN HDFS [functional.alltypes allt] +| | partitions=24/24 files=24 size=478.45KB +| | +| 02:SCAN HDFS [functional.alltypesagg ata] +| partitions=11/11 files=11 size=814.73KB +| runtime filters: RF002 -> ata.id +| +00:SCAN HDFS [functional.alltypes] + partitions=24/24 files=24 size=478.45KB + runtime filters: RF000 -> id +---- DISTRIBUTEDPLAN +PLAN-ROOT SINK +| +13:EXCHANGE [UNPARTITIONED] +| +06:HASH JOIN [LEFT SEMI JOIN, BROADCAST] +| hash predicates: id = max(allt.smallint_col) +| runtime filters: RF000 <- max(allt.smallint_col) +| +|--12:EXCHANGE [BROADCAST] +| | +| 05:CARDINALITY CHECK +| | limit: 1 +| | +| 11:EXCHANGE [UNPARTITIONED] +| | limit: 2 +| | +| 10:AGGREGATE [FINALIZE] +| | output: max:merge(allt.smallint_col) +| | group by: ata.month +| | limit: 2 +| | +| 09:EXCHANGE [HASH(ata.month)] +| | +| 04:AGGREGATE [STREAMING] +| | output: max(allt.smallint_col) +| | group by: ata.month +| | +| 03:HASH JOIN [INNER JOIN, PARTITIONED] +| | hash predicates: ata.id = allt.id +| | runtime filters: RF002 <- allt.id +| | +| |--08:EXCHANGE [HASH(allt.id)] +| | | +| | 01:SCAN HDFS [functional.alltypes allt] +| | partitions=24/24 files=24 size=478.45KB +| | +| 07:EXCHANGE [HASH(ata.id)] +| | +| 02:SCAN HDFS [functional.alltypesagg ata] +| partitions=11/11 files=11 size=814.73KB +| runtime filters: RF002 -> ata.id +| +00:SCAN HDFS [functional.alltypes] + partitions=24/24 files=24 size=478.45KB + runtime filters: RF000 -> id +==== +# IS NULL predicate must not be pushed down to the scan node of the inline view. +select count(1) from functional.alltypes +where (select int_col from functional.alltypes) is null +---- PLAN +PLAN-ROOT SINK +| +05:AGGREGATE [FINALIZE] +| output: count(*) +| +04:NESTED LOOP JOIN [CROSS JOIN] +| +|--03:SELECT +| | predicates: int_col IS NULL +| | +| 02:CARDINALITY CHECK +| | limit: 1 +| | +| 01:SCAN HDFS [functional.alltypes] +| partitions=24/24 files=24 size=478.45KB +| limit: 2 +| +00:SCAN HDFS [functional.alltypes] + partitions=24/24 files=24 size=478.45KB +---- DISTRIBUTEDPLAN +PLAN-ROOT SINK +| +09:AGGREGATE [FINALIZE] +| output: count:merge(*) +| +08:EXCHANGE [UNPARTITIONED] +| +05:AGGREGATE +| output: count(*) +| +04:NESTED LOOP JOIN [CROSS JOIN, BROADCAST] +| +|--07:EXCHANGE [BROADCAST] +| | +| 03:SELECT +| | predicates: int_col IS NULL +| | +| 02:CARDINALITY CHECK +| | limit: 1 +| | +| 06:EXCHANGE [UNPARTITIONED] +| | limit: 2 +| | +| 01:SCAN HDFS [functional.alltypes] +| partitions=24/24 files=24 size=478.45KB +| limit: 2 +| +00:SCAN HDFS [functional.alltypes] + partitions=24/24 files=24 size=478.45KB +==== +# Binary predicate with constant must not be pushed down +# to the scan node of the inline view. +select count(1) from functional.alltypes +where (select int_col from functional.alltypes) > 10 +---- PLAN +PLAN-ROOT SINK +| +05:AGGREGATE [FINALIZE] +| output: count(*) +| +04:NESTED LOOP JOIN [CROSS JOIN] +| +|--03:SELECT +| | predicates: int_col > 10 +| | +| 02:CARDINALITY CHECK +| | limit: 1 +| | +| 01:SCAN HDFS [functional.alltypes] +| partitions=24/24 files=24 size=478.45KB +| limit: 2 +| +00:SCAN HDFS [functional.alltypes] + partitions=24/24 files=24 size=478.45KB +---- DISTRIBUTEDPLAN +PLAN-ROOT SINK +| +09:AGGREGATE [FINALIZE] +| output: count:merge(*) +| +08:EXCHANGE [UNPARTITIONED] +| +05:AGGREGATE +| output: count(*) +| +04:NESTED LOOP JOIN [CROSS JOIN, BROADCAST] +| +|--07:EXCHANGE [BROADCAST] +| | +| 03:SELECT +| | predicates: int_col > 10 +| | +| 02:CARDINALITY CHECK +| | limit: 1 +| | +| 06:EXCHANGE [UNPARTITIONED] +| | limit: 2 +| | +| 01:SCAN HDFS [functional.alltypes] +| partitions=24/24 files=24 size=478.45KB +| limit: 2 +| +00:SCAN HDFS [functional.alltypes] + partitions=24/24 files=24 size=478.45KB +==== +# Runtime scalar subquery with offset. +select count(*) from functional.alltypes +where 7 = (select id from functional.alltypestiny + order by id limit 8 offset 7) +---- PLAN +PLAN-ROOT SINK +| +06:AGGREGATE [FINALIZE] +| output: count(*) +| +05:NESTED LOOP JOIN [CROSS JOIN] +| +|--04:SELECT +| | predicates: id = 7 +| | +| 03:CARDINALITY CHECK +| | limit: 1 +| | +| 02:TOP-N [LIMIT=2 OFFSET=7] +| | order by: id ASC +| | +| 01:SCAN HDFS [functional.alltypestiny] +| partitions=4/4 files=4 size=460B +| +00:SCAN HDFS [functional.alltypes] + partitions=24/24 files=24 size=478.45KB +---- DISTRIBUTEDPLAN +PLAN-ROOT SINK +| +10:AGGREGATE [FINALIZE] +| output: count:merge(*) +| +09:EXCHANGE [UNPARTITIONED] +| +06:AGGREGATE +| output: count(*) +| +05:NESTED LOOP JOIN [CROSS JOIN, BROADCAST] +| +|--08:EXCHANGE [BROADCAST] +| | +| 04:SELECT +| | predicates: id = 7 +| | +| 03:CARDINALITY CHECK +| | limit: 1 +| | +| 07:MERGING-EXCHANGE [UNPARTITIONED] +| | offset: 7 +| | order by: id ASC +| | limit: 2 +| | +| 02:TOP-N [LIMIT=9] +| | order by: id ASC +| | +| 01:SCAN HDFS [functional.alltypestiny] +| partitions=4/4 files=4 size=460B +| +00:SCAN HDFS [functional.alltypes] + partitions=24/24 files=24 size=478.45KB ==== http://git-wip-us.apache.org/repos/asf/impala/blob/1e79f147/testdata/workloads/functional-query/queries/QueryTest/nested-types-subplan.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-query/queries/QueryTest/nested-types-subplan.test b/testdata/workloads/functional-query/queries/QueryTest/nested-types-subplan.test index 2f10072..a8acfe2 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/nested-types-subplan.test +++ b/testdata/workloads/functional-query/queries/QueryTest/nested-types-subplan.test @@ -670,3 +670,41 @@ where c_custkey < 10 and c_custkey in ---- TYPES bigint ==== +---- QUERY +# Create CardinalityCheckNode inside a subplan +select c_custkey +from customer c +where c_custkey < (select o_orderkey + from c.c_orders + where o_orderkey = 6000000) +---- RESULTS +110063 +---- TYPES +bigint +==== +---- QUERY +# Create CardinalityCheckNode inside a subplan +# o_orderkey 6000000 and 5000000 belong to different customers +select c_custkey +from customer c +where c_custkey < (select o_orderkey + from c.c_orders + where o_orderkey = 6000000 or o_orderkey = 5000000) +order by c_custkey +---- RESULTS +24325 +110063 +---- TYPES +bigint +==== +---- QUERY +# Create CardinalityCheckNode inside a subplan. +# o_orderkey 6000000 and 4285920 belong to the same customer +select c_custkey +from customer c +where c_custkey < (select o_orderkey + from c.c_orders + where o_orderkey = 6000000 or o_orderkey = 4285920) +---- CATCH +Subquery must not return more than one row: SELECT o_orderkey FROM c.c_orders WHERE o_orderkey = 6000000 OR o_orderkey = 4285920 +==== http://git-wip-us.apache.org/repos/asf/impala/blob/1e79f147/testdata/workloads/functional-query/queries/QueryTest/subquery.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-query/queries/QueryTest/subquery.test b/testdata/workloads/functional-query/queries/QueryTest/subquery.test index 4aaa665..2d691ed 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/subquery.test +++ b/testdata/workloads/functional-query/queries/QueryTest/subquery.test @@ -847,6 +847,177 @@ WHERE EXISTS ORDER BY id LIMIT 10 OFFSET 6) ---- RESULTS 0 +==== +---- QUERY +# Uncorrelated subquery in binary predicate that returns scalar value at runtime +SELECT id FROM functional.alltypessmall +WHERE int_col = + (SELECT int_col + FROM functional.alltypessmall + WHERE id = 1) +ORDER BY id +---- RESULTS +1 +11 +21 +26 +36 +46 +51 +61 +71 +76 +86 +96 +---- TYPES +INT +==== +---- QUERY +# Uncorrelated subquery in arithmetic expr that returns scalar value at runtime +SELECT id FROM functional.alltypessmall +WHERE int_col = + 3 * (SELECT int_col + FROM functional.alltypessmall + WHERE id = 1) +ORDER BY id +---- RESULTS +3 +13 +23 +28 +38 +48 +53 +63 +73 +78 +88 +98 +---- TYPES +INT +==== +---- QUERY +# Uncorrelated subquery in binary predicate that returns no rows. +SELECT id FROM functional.alltypessmall +WHERE int_col = + (SELECT int_col + FROM functional.alltypessmall + WHERE id = -123) +ORDER BY id +---- RESULTS +---- TYPES +INT +==== +---- QUERY +# Uncorrelated subquery in arithmetic expr that returns no rows. +SELECT id FROM functional.alltypessmall +WHERE int_col = + 3 * (SELECT int_col + FROM functional.alltypessmall + WHERE id = -123) +ORDER BY id +---- RESULTS +---- TYPES +INT +==== +---- QUERY +# Uncorrelated subquery in binary predicate that returns multiple rows +SELECT id FROM functional.alltypessmall +WHERE int_col = + (SELECT int_col + FROM functional.alltypessmall) +ORDER BY id +---- RESULTS +---- CATCH +Query aborted:Subquery must not return more than one row: +==== +---- QUERY +# Uncorrelated subquery in arithmetic expr that returns multiple rows +SELECT id FROM functional.alltypessmall +WHERE int_col = + 3 * (SELECT int_col + FROM functional.alltypessmall) +ORDER BY id +---- RESULTS +---- CATCH +Query aborted:Subquery must not return more than one row: +==== +---- QUERY +# Uncorrelated subquery in binary predicate that returns scalar value at runtime +SELECT count(id) FROM functional.alltypes +WHERE int_col = + (SELECT int_col + FROM functional.alltypessmall + WHERE id = 1) +---- RESULTS +730 +---- TYPES +BIGINT +==== +---- QUERY +# Uncorrelated subquery in arithmetic expr that returns scalar value at runtime +SELECT count(id) FROM functional.alltypes +WHERE int_col = + 3 * (SELECT int_col + FROM functional.alltypessmall + WHERE id = 1) +---- RESULTS +730 +---- TYPES +BIGINT +==== +---- QUERY +# Uncorrelated subquery in binary predicate that returns scalar value at runtime +# executed on a single node. +set num_nodes=1; +SELECT count(id) FROM functional.alltypes +WHERE int_col = + (SELECT int_col + FROM functional.alltypessmall + WHERE id = 1) +---- RESULTS +730 +---- TYPES +BIGINT +==== +---- QUERY +# Uncorrelated subquery in arithmetic expr that returns scalar value at runtime +# executed on a single node. +set num_nodes=1; +SELECT count(id) FROM functional.alltypes +WHERE int_col = + 3 * (SELECT int_col + FROM functional.alltypessmall + WHERE id = 1) +---- RESULTS +730 +---- TYPES +BIGINT +==== +---- QUERY +# Subquery that returns more than one row +SELECT a FROM (values(1 a),(2),(3)) v +WHERE a = (SELECT x FROM (values(1 x),(2),(3)) v) +---- RESULTS +---- CATCH +Query aborted:Subquery must not return more than one row: +==== +---- QUERY +# Subquery that returns more than one row +# The error message must not reveal the definition of functional.alltypes_view +SELECT id FROM functional.alltypes +WHERE id = (SELECT bigint_col FROM functional.alltypes_view) +---- RESULTS +---- CATCH +Query aborted:Subquery must not return more than one row: SELECT bigint_col FROM functional.alltypes_view +==== +---- QUERY +# Runtime scalar subquery with offset. +select count(*) from functional.alltypes +where 7 = (select id from functional.alltypestiny + order by id limit 8 offset 7) +---- RESULTS +7300 ---- TYPES BIGINT ====