This is an automated email from the ASF dual-hosted git repository. laszlog pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit c088e7204ab95bc7d60cfb135f230008be5e4d30 Author: Steve Carlin <[email protected]> AuthorDate: Fri Aug 29 09:27:02 2025 -0700 IMPALA-14422: Calcite planner: Crash on Union above Values RelNode In version 1.37, for the query: select 1 union all select count(*) ... Calcite was creating the following RelNode structure: LogicalUnion LogicalValues (1) LogicalAggregate ... Calcite changed the behavior in 1.40 to ensure that all datatypes are matching. Now it produces: LogicalUnion LogicalProject(CAST 1 as SMALLINT) LogicalValues LogicalProject(CAST 5000 as SMALLINT) LogicalAggregate The introduction of the Project between Values exposed a bug in the ImpalaValuesRel class. The Values node was not looking at its parent type in the situation where the parent was a Project and the tuple size was 1. The code in this commit also handles projects which ignore the values underneath. When the "trivial" query is found, it creates the Plan Node at the Project level rather than the Values level. One other small change is thrown into this commit. The memory allocation in the tuple descriptor was including a byte for nulls even when the column was non-null. This has been fixed. The testing for this is already included in calcite.test. This will fail on a simple "select length('hello')" query, but only after upgrading to at least Calcite 1.40. So this commit is necessary before upgrading, but no additional test is needed. Change-Id: I1edf2f0bdb9271b5b8c9716ca7d8c8f858cc108c Reviewed-on: http://gerrit.cloudera.org:8080/23731 Reviewed-by: Impala Public Jenkins <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- .../impala/calcite/rel/node/ImpalaProjectRel.java | 58 ++++++++++++++++++++++ .../impala/calcite/rel/node/ImpalaValuesRel.java | 49 ++++++++---------- .../calcite/rel/util/TupleDescriptorFactory.java | 3 ++ 3 files changed, 83 insertions(+), 27 deletions(-) diff --git a/java/calcite-planner/src/main/java/org/apache/impala/calcite/rel/node/ImpalaProjectRel.java b/java/calcite-planner/src/main/java/org/apache/impala/calcite/rel/node/ImpalaProjectRel.java index e01e36bab..cf995999e 100644 --- a/java/calcite-planner/src/main/java/org/apache/impala/calcite/rel/node/ImpalaProjectRel.java +++ b/java/calcite-planner/src/main/java/org/apache/impala/calcite/rel/node/ImpalaProjectRel.java @@ -36,7 +36,9 @@ import org.apache.impala.analysis.Analyzer; import org.apache.impala.analysis.Expr; import org.apache.impala.calcite.rel.util.CreateExprVisitor; import org.apache.impala.common.ImpalaException; +import org.apache.impala.planner.PlanNodeId; +import java.util.ArrayList; import java.util.List; @@ -74,8 +76,13 @@ public class ImpalaProjectRel extends Project @Override public NodeWithExprs getPlanNode(ParentPlanRelContext context) throws ImpalaException { + if (isTrivialProject(context, getProjects())) { + return createUnionPlanNode(context, getProjects(), getRowType()); + } + // see comment in isCoercedProjectForValues method boolean isCoercedProjectForValues = isCoercedProjectForValues(context); + NodeWithExprs inputWithExprs = getChildPlanNode(context, isCoercedProjectForValues); // If this Project is a coercedProjectForValues, then this Project has been taken @@ -159,6 +166,12 @@ public class ImpalaProjectRel extends Project } List<RexNode> projects = getProjects(); + + // We are looking specifically for when all the columns in the project + // match the input columns, so return false if the sizes are different. + if (getInput().getRowType().getFieldNames().size() != projects.size()) { + return false; + } for (int i = 0; i < projects.size(); ++i) { RexNode project = projects.get(i); if (project instanceof RexInputRef) { @@ -183,6 +196,51 @@ public class ImpalaProjectRel extends Project return true; } + /** + * Returns true if this Project is trivial. The Project is trivial if: + * a) The Project has no inputRefs, which means that it doesn't matter + * what the underlying RelNode is, it will be ignored. + * b) It is a Values RelNode underneath that only returns 1 row, A multi- + * row Values node is not trivial because 2 rows would be coming out of + * the Values node, which means 2 rows would be coming out of the Project. + */ + private boolean isTrivialProject(ParentPlanRelContext context, + List<RexNode> projects) { + if (RelOptUtil.InputFinder.bits(projects, null).size() > 0) { + return false; + } + ImpalaPlanRel relInput = (ImpalaPlanRel) getInput(0); + if (!(relInput instanceof ImpalaValuesRel)) { + return false; + } + + ImpalaValuesRel values = (ImpalaValuesRel) relInput; + if (values.getTuples().size() != 1) { + return false; + } + + return true; + } + + private NodeWithExprs createUnionPlanNode(ParentPlanRelContext context, + List<RexNode> projects, RelDataType rowType) throws ImpalaException { + CreateExprVisitor visitor = new CreateExprVisitor(getCluster().getRexBuilder(), + new ArrayList<>(), context.ctx_.getRootAnalyzer()); + List<Expr> outputExprs = new ArrayList<>(); + for (RexNode rexNode : projects) { + outputExprs.add(CreateExprVisitor.getExpr(visitor, rexNode)); + } + + PlanNodeId nodeId = context.ctx_.getNextNodeId(); + List<NodeWithExprs> nodeWithExprsList = new ArrayList<>(); + nodeWithExprsList.add(new NodeWithExprs(null, outputExprs, + getRowType().getFieldNames())); + NodeWithExprs retNode = NodeCreationUtils.createUnionPlanNode(nodeId, + context.ctx_.getRootAnalyzer(), rowType, nodeWithExprsList, true); + return NodeCreationUtils.wrapInSelectNodeIfNeeded(context, retNode, + getCluster().getRexBuilder()); + } + @Override public RelNodeType relNodeType() { return RelNodeType.PROJECT; diff --git a/java/calcite-planner/src/main/java/org/apache/impala/calcite/rel/node/ImpalaValuesRel.java b/java/calcite-planner/src/main/java/org/apache/impala/calcite/rel/node/ImpalaValuesRel.java index 9f2640ac7..012a27a00 100644 --- a/java/calcite-planner/src/main/java/org/apache/impala/calcite/rel/node/ImpalaValuesRel.java +++ b/java/calcite-planner/src/main/java/org/apache/impala/calcite/rel/node/ImpalaValuesRel.java @@ -61,18 +61,21 @@ public class ImpalaValuesRel extends Values @Override public NodeWithExprs getPlanNode(ParentPlanRelContext context) throws ImpalaException { + + + RelDataType rowType = + context.parentRowType_ != null ? context.parentRowType_ : getRowType(); + // Value RelNode will generate a Union PlanNode to hold the values. There is // no need to create another Union node if the parent is already a Union node. if (context.parentType_ == RelNodeType.UNION && (getTuples().size() == 1)) { - return getValuesExprs(context, getTuples().get(0)); + return getValuesExprs(rowType, context.ctx_.getRootAnalyzer(), getTuples().get(0)); } - PlanNodeId nodeId = context.ctx_.getNextNodeId(); + List<NodeWithExprs> nodeWithExprsList = + getValuesExprs(rowType, context.ctx_.getRootAnalyzer()); - RelDataType rowType = - context.parentRowType_ != null ? context.parentRowType_ : getRowType(); - - List<NodeWithExprs> nodeWithExprsList = getValuesExprs(context); + PlanNodeId nodeId = context.ctx_.getNextNodeId(); NodeWithExprs retNode = NodeCreationUtils.createUnionPlanNode(nodeId, context.ctx_.getRootAnalyzer(), rowType, nodeWithExprsList, false); @@ -83,41 +86,33 @@ public class ImpalaValuesRel extends Values getCluster().getRexBuilder()); } - private List<NodeWithExprs> getValuesExprs(ParentPlanRelContext context - ) throws ImpalaException { + private List<NodeWithExprs> getValuesExprs(RelDataType rowType, + Analyzer analyzer) throws ImpalaException { List<NodeWithExprs> nodeWithExprsList = new ArrayList<>(); for (List<RexLiteral> literals : getTuples()) { - nodeWithExprsList.add(getValuesExprs(context, literals)); + nodeWithExprsList.add(getValuesExprs(rowType, analyzer, literals)); } return nodeWithExprsList; } - private NodeWithExprs getValuesExprs(ParentPlanRelContext context, + private NodeWithExprs getValuesExprs(RelDataType rowType, Analyzer analyzer, List<RexLiteral> literals) throws ImpalaException { PlanNode retNode = null; List<Expr> outputExprs = new ArrayList<>(); + Preconditions.checkState(rowType.getFieldList().size() == literals.size()); int i = 0; for (RexLiteral literal : literals) { ExprConjunctsConverter converter = new ExprConjunctsConverter(literal, - new ArrayList<>(), getCluster().getRexBuilder(), - context.ctx_.getRootAnalyzer()); - - // If the parentRowType_ is set, that means there is a passthrough "Project" - // RelNode on top of this Values RelNode. This Project will contain the type we - // need to use. This happens because Calcite treats literal strings as "char" - // and smaller integers (e.g. tinyint) as "int", - if (context.parentRowType_ != null) { - LiteralExpr literalExpr = - getLiteralExprWithType( - (LiteralExpr) converter.getImpalaConjuncts().get(0), - context.parentRowType_.getFieldList().get(i).getType(), - context.ctx_.getRootAnalyzer()); - outputExprs.add(literalExpr); - } else { - outputExprs.addAll(converter.getImpalaConjuncts()); - } + new ArrayList<>(), getCluster().getRexBuilder(), analyzer); + + LiteralExpr literalExpr = + getLiteralExprWithType( + (LiteralExpr) converter.getImpalaConjuncts().get(0), + rowType.getFieldList().get(i).getType(), + analyzer); + outputExprs.add(literalExpr); i++; } diff --git a/java/calcite-planner/src/main/java/org/apache/impala/calcite/rel/util/TupleDescriptorFactory.java b/java/calcite-planner/src/main/java/org/apache/impala/calcite/rel/util/TupleDescriptorFactory.java index d029c7ef3..8f94a1324 100644 --- a/java/calcite-planner/src/main/java/org/apache/impala/calcite/rel/util/TupleDescriptorFactory.java +++ b/java/calcite-planner/src/main/java/org/apache/impala/calcite/rel/util/TupleDescriptorFactory.java @@ -73,6 +73,9 @@ public class TupleDescriptorFactory { slotDesc.setType(impalaType); slotDesc.setLabel(fieldLabel); slotDesc.setIsMaterialized(true); + if (!relDataTypeField.getType().isNullable()) { + slotDesc.setIsNullable(false); + } } tupleDesc.computeMemLayout(); return tupleDesc;
