HIVE-12543: Disable Hive ConstantPropagate optimizer when CBO has optimized the plan (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/a6d9bf76 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/a6d9bf76 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/a6d9bf76 Branch: refs/heads/llap Commit: a6d9bf76e525988058f87948a8cfa7a683d6e218 Parents: f8f50ab Author: Jesus Camacho Rodriguez <jcama...@apache.org> Authored: Fri Feb 12 00:20:19 2016 +0100 Committer: Jesus Camacho Rodriguez <jcama...@apache.org> Committed: Wed Feb 17 09:36:29 2016 +0100 ---------------------------------------------------------------------- .../results/positive/hbase_ppd_key_range.q.out | 2 +- .../test/results/positive/hbase_pushdown.q.out | 2 +- .../hadoop/hive/ql/exec/OperatorUtils.java | 4 +- .../optimizer/ConstantPropagateProcFactory.java | 29 +- .../DynamicPartitionPruningOptimization.java | 40 +-- .../hadoop/hive/ql/optimizer/Optimizer.java | 23 +- ...edundantDynamicPruningConditionsRemoval.java | 229 ++++++++++++++++ .../optimizer/SortedDynPartitionOptimizer.java | 38 ++- .../hive/ql/optimizer/calcite/HiveRexUtil.java | 269 +++++++++++++++++++ .../rules/HiveReduceExpressionsRule.java | 96 ++----- .../calcite/translator/ExprNodeConverter.java | 161 ++++++----- .../calcite/translator/RexNodeConverter.java | 88 +++++- .../translator/SqlFunctionConverter.java | 4 + .../hadoop/hive/ql/parse/CalcitePlanner.java | 8 +- .../hadoop/hive/ql/plan/ExprNodeDescUtils.java | 29 +- .../annotate_stats_join_pkfk.q.out | 12 +- .../results/clientpositive/auto_join32.q.out | 2 +- .../auto_join_without_localtask.q.out | 6 +- .../bucketsortoptimize_insert_7.q.out | 2 +- .../clientpositive/cbo_rp_lineage2.q.out | 6 +- .../clientpositive/constprog_partitioner.q.out | 4 +- .../clientpositive/constprog_when_case.q.out | 2 +- .../clientpositive/correlationoptimizer13.q.out | 2 +- .../clientpositive/correlationoptimizer8.q.out | 6 +- .../clientpositive/correlationoptimizer9.q.out | 4 +- .../results/clientpositive/decimal_udf.q.out | 6 +- .../clientpositive/dynamic_rdd_cache.q.out | 12 +- .../clientpositive/filter_cond_pushdown.q.out | 10 +- .../clientpositive/filter_join_breaktask.q.out | 4 +- .../test/results/clientpositive/fold_case.q.out | 24 +- .../clientpositive/fold_eq_with_case_when.q.out | 16 +- .../test/results/clientpositive/fold_when.q.out | 24 +- .../clientpositive/index_auto_self_join.q.out | 12 +- .../clientpositive/index_auto_unused.q.out | 38 +-- .../results/clientpositive/input_part3.q.out | 2 +- ql/src/test/results/clientpositive/join34.q.out | 2 +- ql/src/test/results/clientpositive/join35.q.out | 2 +- ql/src/test/results/clientpositive/join42.q.out | 59 ++-- .../clientpositive/join_grp_diff_keys.q.out | 2 +- .../test/results/clientpositive/lineage2.q.out | 8 +- .../test/results/clientpositive/lineage3.q.out | 14 +- .../llap/dynamic_partition_pruning.q.out | 263 +++++++++--------- .../llap/dynamic_partition_pruning_2.q.out | 11 +- .../llap/hybridgrace_hashjoin_1.q.out | 21 +- .../llap/tez_dynpart_hashjoin_1.q.out | 24 +- .../clientpositive/llap/tez_self_join.q.out | 2 +- .../llap/tez_vector_dynpart_hashjoin_1.q.out | 24 +- .../vectorized_dynamic_partition_pruning.q.out | 251 +++++++++-------- .../clientpositive/mapjoin_mapjoin.q.out | 2 +- .../merge_dynamic_partition.q.out | 2 +- .../results/clientpositive/perf/query21.q.out | 2 +- .../results/clientpositive/perf/query31.q.out | 28 +- .../results/clientpositive/perf/query32.q.out | 6 +- .../results/clientpositive/perf/query34.q.out | 6 +- .../results/clientpositive/perf/query39.q.out | 22 +- .../results/clientpositive/perf/query45.q.out | 2 +- .../results/clientpositive/perf/query58.q.out | 6 +- .../results/clientpositive/perf/query64.q.out | 8 +- .../results/clientpositive/perf/query70.q.out | 6 +- .../results/clientpositive/perf/query71.q.out | 4 +- .../results/clientpositive/perf/query72.q.out | 6 +- .../results/clientpositive/perf/query73.q.out | 6 +- .../results/clientpositive/perf/query75.q.out | 24 +- .../results/clientpositive/perf/query85.q.out | 2 +- .../results/clientpositive/perf/query87.q.out | 6 +- .../results/clientpositive/perf/query89.q.out | 8 +- .../results/clientpositive/perf/query92.q.out | 4 +- .../results/clientpositive/perf/query94.q.out | 6 +- .../results/clientpositive/perf/query95.q.out | 6 +- .../results/clientpositive/perf/query97.q.out | 4 +- ql/src/test/results/clientpositive/ppd2.q.out | 8 +- .../test/results/clientpositive/ppd_join2.q.out | 8 +- .../test/results/clientpositive/ppd_join5.q.out | 4 +- .../clientpositive/ppd_outer_join5.q.out | 246 +++++++++++------ .../clientpositive/ppd_repeated_alias.q.out | 2 +- .../results/clientpositive/ppd_udf_case.q.out | 8 +- .../results/clientpositive/ppd_union_view.q.out | 20 +- .../clientpositive/ppr_allchildsarenull.q.out | 16 +- .../test/results/clientpositive/semijoin.q.out | 2 +- .../test/results/clientpositive/semijoin4.q.out | 4 +- .../results/clientpositive/smb_mapjoin_25.q.out | 54 ++-- .../clientpositive/spark/auto_join32.q.out | 2 +- .../spark/auto_join_without_localtask.q.out | 2 +- .../spark/bucketsortoptimize_insert_7.q.out | 4 +- .../spark/constprog_partitioner.q.out | 4 +- .../spark/dynamic_rdd_cache.q.out | 12 +- .../spark/filter_join_breaktask.q.out | 4 +- .../spark/index_auto_self_join.q.out | 12 +- .../results/clientpositive/spark/join34.q.out | 2 +- .../results/clientpositive/spark/join35.q.out | 2 +- .../clientpositive/spark/mapjoin_mapjoin.q.out | 2 +- .../clientpositive/spark/ppd_join2.q.out | 8 +- .../clientpositive/spark/ppd_join5.q.out | 4 +- .../clientpositive/spark/ppd_outer_join5.q.out | 222 ++++++++------- .../results/clientpositive/spark/semijoin.q.out | 2 +- .../clientpositive/spark/smb_mapjoin_25.q.out | 54 ++-- .../clientpositive/spark/subquery_exists.q.out | 2 +- .../clientpositive/spark/subquery_in.q.out | 6 +- .../spark/table_access_keys_stats.q.out | 2 +- .../clientpositive/spark/union_remove_19.q.out | 18 +- .../spark/vector_mapjoin_reduce.q.out | 6 +- .../spark/vectorization_short_regress.q.out | 2 +- .../clientpositive/spark/vectorized_case.q.out | 4 +- .../clientpositive/subquery_exists.q.out | 2 +- .../results/clientpositive/subquery_in.q.out | 6 +- .../results/clientpositive/subquery_notin.q.out | 2 +- .../subquery_unqualcolumnrefs.q.out | 6 +- .../table_access_keys_stats.q.out | 2 +- .../tez/dynamic_partition_pruning.q.out | 263 +++++++++--------- .../tez/dynamic_partition_pruning_2.q.out | 11 +- .../clientpositive/tez/explainuser_1.q.out | 62 +++-- .../clientpositive/tez/explainuser_4.q.out | 24 +- .../tez/filter_join_breaktask.q.out | 4 +- .../tez/hybridgrace_hashjoin_1.q.out | 21 +- .../clientpositive/tez/mapjoin_mapjoin.q.out | 2 +- .../clientpositive/tez/subquery_exists.q.out | 2 +- .../clientpositive/tez/subquery_in.q.out | 6 +- .../tez/tez_dynpart_hashjoin_1.q.out | 24 +- .../clientpositive/tez/tez_self_join.q.out | 2 +- .../tez/tez_vector_dynpart_hashjoin_1.q.out | 24 +- .../clientpositive/tez/vector_decimal_udf.q.out | 6 +- .../clientpositive/tez/vector_if_expr.q.out | 2 +- .../tez/vector_leftsemi_mapjoin.q.out | 12 +- .../tez/vector_mapjoin_reduce.q.out | 6 +- .../tez/vectorization_short_regress.q.out | 2 +- .../clientpositive/tez/vectorized_case.q.out | 4 +- .../vectorized_dynamic_partition_pruning.q.out | 251 +++++++++-------- .../udf_case_column_pruning.q.out | 2 +- .../clientpositive/union_remove_19.q.out | 28 +- .../clientpositive/vector_decimal_udf.q.out | 6 +- .../results/clientpositive/vector_if_expr.q.out | 2 +- .../vector_leftsemi_mapjoin.q.out | 12 +- .../clientpositive/vector_mapjoin_reduce.q.out | 6 +- .../vectorization_short_regress.q.out | 2 +- .../clientpositive/vectorized_case.q.out | 4 +- 135 files changed, 2170 insertions(+), 1451 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/a6d9bf76/hbase-handler/src/test/results/positive/hbase_ppd_key_range.q.out ---------------------------------------------------------------------- diff --git a/hbase-handler/src/test/results/positive/hbase_ppd_key_range.q.out b/hbase-handler/src/test/results/positive/hbase_ppd_key_range.q.out index d32b02c..27446b4 100644 --- a/hbase-handler/src/test/results/positive/hbase_ppd_key_range.q.out +++ b/hbase-handler/src/test/results/positive/hbase_ppd_key_range.q.out @@ -440,7 +440,7 @@ STAGE PLANS: alias: hbase_pushdown Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (CASE WHEN ((key < '90')) THEN (2) ELSE (4) END > 3) (type: boolean) + predicate: (key >= '90') (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: key (type: string), value (type: string) http://git-wip-us.apache.org/repos/asf/hive/blob/a6d9bf76/hbase-handler/src/test/results/positive/hbase_pushdown.q.out ---------------------------------------------------------------------- diff --git a/hbase-handler/src/test/results/positive/hbase_pushdown.q.out b/hbase-handler/src/test/results/positive/hbase_pushdown.q.out index b362f57..d5661be 100644 --- a/hbase-handler/src/test/results/positive/hbase_pushdown.q.out +++ b/hbase-handler/src/test/results/positive/hbase_pushdown.q.out @@ -312,7 +312,7 @@ STAGE PLANS: alias: hbase_pushdown Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (CASE WHEN ((key = 90)) THEN (2) ELSE (4) END > 3) (type: boolean) + predicate: (key <> 90) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: key (type: int), value (type: string) http://git-wip-us.apache.org/repos/asf/hive/blob/a6d9bf76/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java index 4828d70..3d664c1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java @@ -25,11 +25,11 @@ import java.util.List; import java.util.Map; import java.util.Set; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.ql.exec.NodeUtils.Function; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.mapred.OutputCollector; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import com.google.common.collect.ImmutableMultimap; import com.google.common.collect.Multimap; http://git-wip-us.apache.org/repos/asf/hive/blob/a6d9bf76/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java index ea200db..e02d277 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java @@ -17,8 +17,8 @@ package org.apache.hadoop.hive.ql.optimizer; import java.io.Serializable; import java.util.ArrayList; -import java.util.BitSet; import java.util.Arrays; +import java.util.BitSet; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; @@ -29,8 +29,6 @@ import java.util.Set; import java.util.Stack; import org.apache.commons.lang.StringUtils; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.ColumnInfo; import org.apache.hadoop.hive.ql.exec.FileSinkOperator; @@ -56,6 +54,7 @@ import org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.ql.plan.FileSinkDesc; import org.apache.hadoop.hive.ql.plan.GroupByDesc; @@ -72,10 +71,6 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDFCase; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFNvl; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNot; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotEqual; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotNull; @@ -88,19 +83,20 @@ import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; -import org.apache.hadoop.hive.serde2.objectinspector.StandardConstantStructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.StandardConstantStructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; -import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Lists; @@ -492,9 +488,9 @@ public final class ConstantPropagateProcFactory { return; } // If both sides are constants, there is nothing to propagate - ExprNodeColumnDesc c = getColumnExpr(lOperand); + ExprNodeColumnDesc c = ExprNodeDescUtils.getColumnExpr(lOperand); if (null == c) { - c = getColumnExpr(rOperand); + c = ExprNodeDescUtils.getColumnExpr(rOperand); } if (null == c) { // we need a column expression on other side. @@ -527,13 +523,6 @@ public final class ConstantPropagateProcFactory { } } - private static ExprNodeColumnDesc getColumnExpr(ExprNodeDesc expr) { - while (FunctionRegistry.isOpCast(expr)) { - expr = expr.getChildren().get(0); - } - return (expr instanceof ExprNodeColumnDesc) ? (ExprNodeColumnDesc)expr : null; - } - private static ExprNodeDesc shortcutFunction(GenericUDF udf, List<ExprNodeDesc> newExprs, Operator<? extends Serializable> op) throws UDFArgumentException { if (udf instanceof GenericUDFOPEqual) { @@ -623,9 +612,9 @@ public final class ConstantPropagateProcFactory { // Try to fold (key <op> 86) and (key is not null) to (key <op> 86) // where <op> can be "=", ">=", "<=", ">", "<". // Note: (key <> 86) and (key is not null) cannot be folded - ExprNodeColumnDesc colDesc = getColumnExpr(childExpr.getChildren().get(0)); + ExprNodeColumnDesc colDesc = ExprNodeDescUtils.getColumnExpr(childExpr.getChildren().get(0)); if (null == colDesc) { - colDesc = getColumnExpr(childExpr.getChildren().get(1)); + colDesc = ExprNodeDescUtils.getColumnExpr(childExpr.getChildren().get(1)); } if (colDesc != null) { compareExprs.add(colDesc); http://git-wip-us.apache.org/repos/asf/hive/blob/a6d9bf76/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java index 60240bd..26fcc45 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java @@ -26,8 +26,6 @@ import java.util.List; import java.util.Map; import java.util.Stack; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.ql.exec.FilterOperator; @@ -59,12 +57,15 @@ import org.apache.hadoop.hive.ql.plan.DynamicPruningEventDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils; import org.apache.hadoop.hive.ql.plan.ExprNodeDynamicListDesc; import org.apache.hadoop.hive.ql.plan.FilterDesc; import org.apache.hadoop.hive.ql.plan.GroupByDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.PlanUtils; import org.apache.hadoop.hive.ql.plan.SelectDesc; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * This optimization looks for expressions of the kind "x IN (RS[n])". If such @@ -77,7 +78,7 @@ public class DynamicPartitionPruningOptimization implements NodeProcessor { static final private Logger LOG = LoggerFactory.getLogger(DynamicPartitionPruningOptimization.class .getName()); - public static class DynamicPartitionPrunerProc implements NodeProcessor { + private static class DynamicPartitionPrunerProc implements NodeProcessor { /** * process simply remembers all the dynamic partition pruning expressions @@ -130,27 +131,6 @@ public class DynamicPartitionPruningOptimization implements NodeProcessor { } } - private String extractColName(ExprNodeDesc root) { - if (root instanceof ExprNodeColumnDesc) { - return ((ExprNodeColumnDesc) root).getColumn(); - } else { - if (root.getChildren() == null) { - return null; - } - - String column = null; - for (ExprNodeDesc d: root.getChildren()) { - String candidate = extractColName(d); - if (column != null && candidate != null) { - return null; - } else if (candidate != null) { - column = candidate; - } - } - return column; - } - } - @Override public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { @@ -191,16 +171,16 @@ public class DynamicPartitionPruningOptimization implements NodeProcessor { // collect the dynamic pruning conditions removerContext.dynLists.clear(); - walkExprTree(desc.getPredicate(), removerContext); + collectDynamicPruningConditions(desc.getPredicate(), removerContext); for (DynamicListContext ctx : removerContext) { - String column = extractColName(ctx.parent); + String column = ExprNodeDescUtils.extractColName(ctx.parent); if (ts != null && column != null) { Table table = ts.getConf().getTableMetadata(); if (table != null && table.isPartitionKey(column)) { - String columnType = table.getPartColByName(column).getType(); + String columnType = table.getPartColByName(column).getType(); String alias = ts.getConf().getAlias(); PrunedPartitionList plist = parseContext.getPrunedPartitions(alias, ts); if (LOG.isDebugEnabled()) { @@ -212,6 +192,8 @@ public class DynamicPartitionPruningOptimization implements NodeProcessor { } } } + // If partKey is a constant, we can check whether the partitions + // have been already filtered if (plist == null || plist.getPartitions().size() != 0) { LOG.info("Dynamic partitioning: " + table.getCompleteName() + "." + column); generateEventOperatorPlan(ctx, parseContext, ts, column, columnType); @@ -253,7 +235,7 @@ public class DynamicPartitionPruningOptimization implements NodeProcessor { // collect the dynamic pruning conditions removerContext.dynLists.clear(); - walkExprTree(ts.getConf().getFilterExpr(), removerContext); + collectDynamicPruningConditions(ts.getConf().getFilterExpr(), removerContext); for (DynamicListContext ctx : removerContext) { // remove the condition by replacing it with "true" @@ -345,7 +327,7 @@ public class DynamicPartitionPruningOptimization implements NodeProcessor { } } - private Map<Node, Object> walkExprTree(ExprNodeDesc pred, NodeProcessorCtx ctx) + private Map<Node, Object> collectDynamicPruningConditions(ExprNodeDesc pred, NodeProcessorCtx ctx) throws SemanticException { // create a walker which walks the tree in a DFS manner while maintaining http://git-wip-us.apache.org/repos/asf/hive/blob/a6d9bf76/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java index c06b8fc..f56cd96 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java @@ -23,6 +23,7 @@ import java.util.List; import java.util.Set; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.optimizer.ConstantPropagateProcCtx.ConstantPropagateOption; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.HiveOpConverterPostProc; import org.apache.hadoop.hive.ql.optimizer.correlation.CorrelationOptimizer; import org.apache.hadoop.hive.ql.optimizer.correlation.ReduceSinkDeDuplication; @@ -102,16 +103,15 @@ public class Optimizer { transformations.add(new PredicatePushDown()); } else if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTPPD) && pctx.getContext().isCboSucceeded()) { - if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTCONSTANTPROPAGATION)) { - transformations.add(new ConstantPropagate()); - } transformations.add(new SyntheticJoinPredicate()); transformations.add(new SimplePredicatePushDown()); + transformations.add(new RedundantDynamicPruningConditionsRemoval()); } - if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTCONSTANTPROPAGATION)) { - // We run constant propagation twice because after predicate pushdown, filter expressions - // are combined and may become eligible for reduction (like is not null filter). + if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTCONSTANTPROPAGATION) && + !pctx.getContext().isCboSucceeded()) { + // We run constant propagation twice because after predicate pushdown, filter expressions + // are combined and may become eligible for reduction (like is not null filter). transformations.add(new ConstantPropagate()); } @@ -129,10 +129,13 @@ public class Optimizer { /* Add list bucketing pruner. */ transformations.add(new ListBucketingPruner()); } - if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTCONSTANTPROPAGATION)) { - // PartitionPruner may create more folding opportunities, run ConstantPropagate again. - transformations.add(new ConstantPropagate()); - } + } + if ((HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTPPD) + && HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTCONSTANTPROPAGATION)) || + (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTCONSTANTPROPAGATION) + && pctx.getContext().isCboSucceeded())) { + // PartitionPruner may create more folding opportunities, run ConstantPropagate again. + transformations.add(new ConstantPropagate()); } if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTGROUPBY) || http://git-wip-us.apache.org/repos/asf/hive/blob/a6d9bf76/ql/src/java/org/apache/hadoop/hive/ql/optimizer/RedundantDynamicPruningConditionsRemoval.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/RedundantDynamicPruningConditionsRemoval.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/RedundantDynamicPruningConditionsRemoval.java new file mode 100644 index 0000000..d9ce017 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/RedundantDynamicPruningConditionsRemoval.java @@ -0,0 +1,229 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer; + +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Stack; + +import org.apache.calcite.util.Pair; +import org.apache.hadoop.hive.ql.exec.FilterOperator; +import org.apache.hadoop.hive.ql.exec.FunctionRegistry; +import org.apache.hadoop.hive.ql.exec.TableScanOperator; +import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; +import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; +import org.apache.hadoop.hive.ql.lib.Dispatcher; +import org.apache.hadoop.hive.ql.lib.GraphWalker; +import org.apache.hadoop.hive.ql.lib.Node; +import org.apache.hadoop.hive.ql.lib.NodeProcessor; +import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import org.apache.hadoop.hive.ql.lib.Rule; +import org.apache.hadoop.hive.ql.lib.RuleRegExp; +import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.parse.ParseContext; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils; +import org.apache.hadoop.hive.ql.plan.ExprNodeDynamicListDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; +import org.apache.hadoop.hive.ql.plan.FilterDesc; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseCompare; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.collect.Lists; + + +/** + * Takes a Filter operator on top of a TableScan and removes dynamic pruning conditions + * if static partition pruning has been triggered already. + * + * This transformation is executed when CBO is on and hence we can guarantee that the filtering + * conditions on the partition columns will be immediately on top of the TableScan operator. + * + */ +public class RedundantDynamicPruningConditionsRemoval extends Transform { + + private static final Logger LOG = LoggerFactory.getLogger(RedundantDynamicPruningConditionsRemoval.class); + + + /** + * Transform the query tree. + * + * @param pctx the current parse context + */ + @Override + public ParseContext transform(ParseContext pctx) throws SemanticException { + Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>(); + opRules.put(new RuleRegExp("R1", TableScanOperator.getOperatorName() + "%" + + FilterOperator.getOperatorName() + "%"), new FilterTransformer()); + + Dispatcher disp = new DefaultRuleDispatcher(null, opRules, null); + GraphWalker ogw = new DefaultGraphWalker(disp); + + List<Node> topNodes = new ArrayList<Node>(); + topNodes.addAll(pctx.getTopOps().values()); + ogw.startWalking(topNodes, null); + return pctx; + } + + private class FilterTransformer implements NodeProcessor { + + @Override + public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) + throws SemanticException { + FilterOperator filter = (FilterOperator) nd; + FilterDesc desc = filter.getConf(); + + TableScanOperator ts = (TableScanOperator) stack.get(stack.size() - 2); + + // collect + CollectContext removalContext = new CollectContext(); + collect(desc.getPredicate(), removalContext); + CollectContext tsRemovalContext = new CollectContext(); + collect(ts.getConf().getFilterExpr(), tsRemovalContext); + + for (Pair<ExprNodeDesc,ExprNodeDesc> pair : removalContext.dynamicListNodes) { + ExprNodeDesc child = pair.left; + ExprNodeDesc columnDesc = child.getChildren().get(0); + assert child.getChildren().get(1) instanceof ExprNodeDynamicListDesc; + ExprNodeDesc parent = pair.right; + + String column = ExprNodeDescUtils.extractColName(columnDesc); + if (column != null) { + Table table = ts.getConf().getTableMetadata(); + + boolean generate = false; + if (table != null && table.isPartitionKey(column)) { + generate = true; + for (ExprNodeDesc filterColumnDesc : removalContext.comparatorNodes) { + if (columnDesc.isSame(filterColumnDesc)) { + generate = false; + break; + } + } + } + if (!generate) { + // We can safely remove the condition by replacing it with "true" + ExprNodeDesc constNode = new ExprNodeConstantDesc(TypeInfoFactory.booleanTypeInfo, Boolean.TRUE); + if (parent == null) { + desc.setPredicate(constNode); + } else { + int i = parent.getChildren().indexOf(child); + parent.getChildren().remove(i); + parent.getChildren().add(i, constNode); + } + // We remove it from the TS too if it was pushed + for (Pair<ExprNodeDesc,ExprNodeDesc> tsPair : tsRemovalContext.dynamicListNodes) { + ExprNodeDesc tsChild = tsPair.left; + ExprNodeDesc tsParent = tsPair.right; + if (tsChild.isSame(child)) { + if (tsParent == null) { + ts.getConf().setFilterExpr(null); + } else { + int i = tsParent.getChildren().indexOf(tsChild); + if (i != -1) { + tsParent.getChildren().remove(i); + tsParent.getChildren().add(i, constNode); + } + } + break; + } + } + if (LOG.isInfoEnabled()) { + LOG.info("Dynamic pruning condition removed: " + child); + } + } + } + } + return false; + } + } + + private static void collect(ExprNodeDesc pred, CollectContext listContext) { + collect(null, pred, listContext); + } + + private static void collect(ExprNodeDesc parent, ExprNodeDesc child, CollectContext listContext) { + if (child instanceof ExprNodeGenericFuncDesc && + ((ExprNodeGenericFuncDesc)child).getGenericUDF() instanceof GenericUDFIn) { + if (child.getChildren().get(1) instanceof ExprNodeDynamicListDesc) { + listContext.dynamicListNodes.add(new Pair<ExprNodeDesc,ExprNodeDesc>(child, parent)); + } + return; + } + if (child instanceof ExprNodeGenericFuncDesc && + ((ExprNodeGenericFuncDesc)child).getGenericUDF() instanceof GenericUDFBaseCompare && + child.getChildren().size() == 2) { + ExprNodeDesc leftCol = child.getChildren().get(0); + ExprNodeDesc rightCol = child.getChildren().get(1); + ExprNodeColumnDesc leftColDesc = ExprNodeDescUtils.getColumnExpr(leftCol); + if (leftColDesc != null) { + boolean rightConstant = false; + if (rightCol instanceof ExprNodeConstantDesc) { + rightConstant = true; + } else if (rightCol instanceof ExprNodeGenericFuncDesc) { + ExprNodeDesc foldedExpr = ConstantPropagateProcFactory.foldExpr((ExprNodeGenericFuncDesc)rightCol); + rightConstant = foldedExpr != null; + } + if (rightConstant) { + listContext.comparatorNodes.add(leftColDesc); + } + } else { + ExprNodeColumnDesc rightColDesc = ExprNodeDescUtils.getColumnExpr(rightCol); + if (rightColDesc != null) { + boolean leftConstant = false; + if (leftCol instanceof ExprNodeConstantDesc) { + leftConstant = true; + } else if (leftCol instanceof ExprNodeGenericFuncDesc) { + ExprNodeDesc foldedExpr = ConstantPropagateProcFactory.foldExpr((ExprNodeGenericFuncDesc)leftCol); + leftConstant = foldedExpr != null; + } + if (leftConstant) { + listContext.comparatorNodes.add(rightColDesc); + } + } + } + return; + } + if (FunctionRegistry.isOpAnd(child)) { + for (ExprNodeDesc newChild : child.getChildren()) { + collect(child, newChild, listContext); + } + } + } + + private class CollectContext implements NodeProcessorCtx { + + private final List<Pair<ExprNodeDesc,ExprNodeDesc>> dynamicListNodes; + private final List<ExprNodeDesc> comparatorNodes; + + public CollectContext() { + this.dynamicListNodes = Lists.<Pair<ExprNodeDesc,ExprNodeDesc>>newArrayList(); + this.comparatorNodes = Lists.<ExprNodeDesc>newArrayList(); + } + + } + +} http://git-wip-us.apache.org/repos/asf/hive/blob/a6d9bf76/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java index 3105c07..ad12091 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java @@ -26,8 +26,6 @@ import java.util.Map; import java.util.Set; import java.util.Stack; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.common.ObjectPair; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.Order; @@ -58,6 +56,7 @@ import org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.ql.plan.FileSinkDesc; import org.apache.hadoop.hive.ql.plan.ListBucketingCtx; import org.apache.hadoop.hive.ql.plan.OperatorDesc; @@ -66,6 +65,8 @@ import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc; import org.apache.hadoop.hive.ql.plan.SelectDesc; import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import com.google.common.collect.Lists; import com.google.common.collect.Maps; @@ -283,7 +284,7 @@ public class SortedDynPartitionOptimizer extends Transform { } if (op.getColumnExprMap() != null) { for(String dpCol : dpCols) { - ExprNodeDesc end = op.getColumnExprMap().get(dpCol); + ExprNodeDesc end = findConstantExprOrigin(dpCol, op); if (!(end instanceof ExprNodeConstantDesc)) { return false; } @@ -294,6 +295,37 @@ public class SortedDynPartitionOptimizer extends Transform { return true; } + // Find the constant origin of a certain column if it is originated from a constant + // Otherwise, it returns the expression that originated the column + private ExprNodeDesc findConstantExprOrigin(String dpCol, Operator<? extends OperatorDesc> op) { + ExprNodeDesc expr = op.getColumnExprMap().get(dpCol); + ExprNodeDesc foldedExpr; + // If it is a function, we try to fold it + if (expr instanceof ExprNodeGenericFuncDesc) { + foldedExpr = ConstantPropagateProcFactory.foldExpr((ExprNodeGenericFuncDesc)expr); + if (foldedExpr == null) { + foldedExpr = expr; + } + } else { + foldedExpr = expr; + } + // If it is a column reference, we will try to resolve it + if (foldedExpr instanceof ExprNodeColumnDesc) { + Operator<? extends OperatorDesc> originOp = null; + for(Operator<? extends OperatorDesc> parentOp : op.getParentOperators()) { + if (parentOp.getColumnExprMap() != null) { + originOp = parentOp; + break; + } + } + if (originOp != null) { + return findConstantExprOrigin(((ExprNodeColumnDesc)foldedExpr).getColumn(), originOp); + } + } + // Otherwise, we return the expression + return foldedExpr; + } + // Remove RS and SEL introduced by enforce bucketing/sorting config // Convert PARENT -> RS -> SEL -> FS to PARENT -> FS private boolean removeRSInsertedByEnforceBucketing(FileSinkOperator fsOp) { http://git-wip-us.apache.org/repos/asf/hive/blob/a6d9bf76/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRexUtil.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRexUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRexUtil.java new file mode 100644 index 0000000..3f6dd6a --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRexUtil.java @@ -0,0 +1,269 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +import org.apache.calcite.linq4j.Ord; +import org.apache.calcite.plan.RelOptUtil; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexLiteral; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexUtil; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.fun.SqlStdOperatorTable; +import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.calcite.util.Pair; +import org.apache.calcite.util.Util; + +import com.google.common.collect.ImmutableList; + + +public class HiveRexUtil { + + /** + * Simplifies a boolean expression. + * + * <p>In particular:</p> + * <ul> + * <li>{@code simplify(x = 1 AND y = 2 AND NOT x = 1)} + * returns {@code y = 2}</li> + * <li>{@code simplify(x = 1 AND FALSE)} + * returns {@code FALSE}</li> + * </ul> + */ + public static RexNode simplify(RexBuilder rexBuilder, RexNode e) { + switch (e.getKind()) { + case AND: + return simplifyAnd(rexBuilder, (RexCall) e); + case OR: + return simplifyOr(rexBuilder, (RexCall) e); + case CASE: + return simplifyCase(rexBuilder, (RexCall) e); + case IS_NULL: + return ((RexCall) e).getOperands().get(0).getType().isNullable() + ? e : rexBuilder.makeLiteral(false); + case IS_NOT_NULL: + return ((RexCall) e).getOperands().get(0).getType().isNullable() + ? e : rexBuilder.makeLiteral(true); + default: + return e; + } + } + + private static RexNode simplifyCase(RexBuilder rexBuilder, RexCall call) { + final List<RexNode> operands = call.getOperands(); + final List<RexNode> newOperands = new ArrayList<>(); + for (int i = 0; i < operands.size(); i++) { + RexNode operand = operands.get(i); + if (RexUtil.isCasePredicate(call, i)) { + if (operand.isAlwaysTrue()) { + // Predicate is always TRUE. Make value the ELSE and quit. + newOperands.add(operands.get(i + 1)); + break; + } + if (operand.isAlwaysFalse()) { + // Predicate is always FALSE. Skip predicate and value. + ++i; + continue; + } + } + newOperands.add(operand); + } + assert newOperands.size() % 2 == 1; + switch (newOperands.size()) { + case 1: + return newOperands.get(0); + } + trueFalse: + if (call.getType().getSqlTypeName() == SqlTypeName.BOOLEAN) { + // Optimize CASE where every branch returns constant true or constant + // false: + // CASE + // WHEN p1 THEN TRUE + // WHEN p2 THEN FALSE + // WHEN p3 THEN TRUE + // ELSE FALSE + // END + final List<Pair<RexNode, RexNode>> pairs = + casePairs(rexBuilder, newOperands); + for (Ord<Pair<RexNode, RexNode>> pair : Ord.zip(pairs)) { + if (!pair.e.getValue().isAlwaysTrue() + && !pair.e.getValue().isAlwaysFalse()) { + break trueFalse; + } + } + final List<RexNode> terms = new ArrayList<>(); + final List<RexNode> notTerms = new ArrayList<>(); + for (Ord<Pair<RexNode, RexNode>> pair : Ord.zip(pairs)) { + if (pair.e.getValue().isAlwaysTrue()) { + terms.add(RexUtil.andNot(rexBuilder, pair.e.getKey(), notTerms)); + } else { + notTerms.add(pair.e.getKey()); + } + } + return RexUtil.composeDisjunction(rexBuilder, terms, false); + } + if (newOperands.equals(operands)) { + return call; + } + return call.clone(call.getType(), newOperands); + } + + /** Given "CASE WHEN p1 THEN v1 ... ELSE e END" + * returns [(p1, v1), ..., (true, e)]. */ + private static List<Pair<RexNode, RexNode>> casePairs(RexBuilder rexBuilder, + List<RexNode> operands) { + final ImmutableList.Builder<Pair<RexNode, RexNode>> builder = + ImmutableList.builder(); + for (int i = 0; i < operands.size() - 1; i += 2) { + builder.add(Pair.of(operands.get(i), operands.get(i + 1))); + } + builder.add( + Pair.of((RexNode) rexBuilder.makeLiteral(true), Util.last(operands))); + return builder.build(); + } + + public static RexNode simplifyAnd(RexBuilder rexBuilder, RexCall e) { + final List<RexNode> terms = RelOptUtil.conjunctions(e); + final List<RexNode> notTerms = new ArrayList<>(); + final List<RexNode> nullOperands = new ArrayList<>(); + final List<RexNode> notNullOperands = new ArrayList<>(); + final List<RexNode> comparedOperands = new ArrayList<>(); + for (int i = 0; i < terms.size(); i++) { + final RexNode term = terms.get(i); + switch (term.getKind()) { + case NOT: + notTerms.add( + ((RexCall) term).getOperands().get(0)); + terms.remove(i); + --i; + break; + case LITERAL: + if (!RexLiteral.booleanValue(term)) { + return term; // false + } else { + terms.remove(i); + --i; + } + break; + case EQUALS: + case NOT_EQUALS: + case LESS_THAN: + case GREATER_THAN: + case LESS_THAN_OR_EQUAL: + case GREATER_THAN_OR_EQUAL: + RexCall call = (RexCall) term; + RexNode left = call.getOperands().get(0); + comparedOperands.add(left); + // if it is a cast, we include the inner reference + if (left.getKind() == SqlKind.CAST) { + RexCall leftCast = (RexCall) left; + comparedOperands.add(leftCast.getOperands().get(0)); + } + RexNode right = call.getOperands().get(1); + comparedOperands.add(right); + // if it is a cast, we include the inner reference + if (right.getKind() == SqlKind.CAST) { + RexCall rightCast = (RexCall) right; + comparedOperands.add(rightCast.getOperands().get(0)); + } + break; + case IN: + comparedOperands.add(((RexCall) term).operands.get(0)); + break; + case BETWEEN: + comparedOperands.add(((RexCall) term).operands.get(1)); + break; + case IS_NOT_NULL: + notNullOperands.add( + ((RexCall) term).getOperands().get(0)); + terms.remove(i); + --i; + break; + case IS_NULL: + nullOperands.add( + ((RexCall) term).getOperands().get(0)); + } + } + if (terms.isEmpty() && notTerms.isEmpty() && notNullOperands.isEmpty()) { + return rexBuilder.makeLiteral(true); + } + // If one column should be null and is in a comparison predicate, + // it is not satisfiable. + // Example. IS NULL(x) AND x < 5 - not satisfiable + if (!Collections.disjoint(nullOperands, comparedOperands)) { + return rexBuilder.makeLiteral(false); + } + // Remove not necessary IS NOT NULL expressions. + // + // Example. IS NOT NULL(x) AND x < 5 : x < 5 + for (RexNode operand : notNullOperands) { + if (!comparedOperands.contains(operand)) { + terms.add( + rexBuilder.makeCall( + SqlStdOperatorTable.IS_NOT_NULL, operand)); + } + } + // If one of the not-disjunctions is a disjunction that is wholly + // contained in the disjunctions list, the expression is not + // satisfiable. + // + // Example #1. x AND y AND z AND NOT (x AND y) - not satisfiable + // Example #2. x AND y AND NOT (x AND y) - not satisfiable + // Example #3. x AND y AND NOT (x AND y AND z) - may be satisfiable + for (RexNode notDisjunction : notTerms) { + final List<RexNode> terms2 = RelOptUtil.conjunctions(notDisjunction); + if (terms.containsAll(terms2)) { + return rexBuilder.makeLiteral(false); + } + } + // Add the NOT disjunctions back in. + for (RexNode notDisjunction : notTerms) { + terms.add( + rexBuilder.makeCall( + SqlStdOperatorTable.NOT, notDisjunction)); + } + return RexUtil.composeConjunction(rexBuilder, terms, false); + } + + /** Simplifies OR(x, x) into x, and similar. */ + public static RexNode simplifyOr(RexBuilder rexBuilder, RexCall call) { + assert call.getKind() == SqlKind.OR; + final List<RexNode> terms = RelOptUtil.disjunctions(call); + for (int i = 0; i < terms.size(); i++) { + final RexNode term = terms.get(i); + switch (term.getKind()) { + case LITERAL: + if (RexLiteral.booleanValue(term)) { + return term; // true + } else { + terms.remove(i); + --i; + } + } + } + return RexUtil.composeDisjunction(rexBuilder, terms, false); + } + + + +} http://git-wip-us.apache.org/repos/asf/hive/blob/a6d9bf76/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReduceExpressionsRule.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReduceExpressionsRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReduceExpressionsRule.java index d736f21..8f15ec7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReduceExpressionsRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReduceExpressionsRule.java @@ -16,6 +16,15 @@ */ package org.apache.hadoop.hive.ql.optimizer.calcite.rules; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.regex.Pattern; + import org.apache.calcite.plan.RelOptPlanner; import org.apache.calcite.plan.RelOptPredicateList; import org.apache.calcite.plan.RelOptRule; @@ -25,6 +34,7 @@ import org.apache.calcite.rel.core.Filter; import org.apache.calcite.rel.core.JoinInfo; import org.apache.calcite.rel.core.Project; import org.apache.calcite.rel.metadata.RelMetadataQuery; +import org.apache.calcite.rel.rules.ValuesReduceRule; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeFactory; import org.apache.calcite.rex.RexBuilder; @@ -50,22 +60,14 @@ import org.apache.calcite.util.Pair; import org.apache.calcite.util.Stacks; import org.apache.calcite.util.Util; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRexUtil; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter; -import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.regex.Pattern; - /** * Collection of planner rules that apply various simplifying transformations on * RexNode trees. Currently, there are two transformations: @@ -123,24 +125,23 @@ public abstract class HiveReduceExpressionsRule extends RelOptRule { @Override public void onMatch(RelOptRuleCall call) { final Filter filter = call.rel(0); - final List<RexNode> expList = - Lists.newArrayList(filter.getCondition()); - RexNode newConditionExp; - boolean reduced; + final RexBuilder rexBuilder = filter.getCluster().getRexBuilder(); + + RexNode newConditionExp = HiveRexUtil.simplify(rexBuilder, filter.getCondition()); + final List<RexNode> expList = Lists.newArrayList(newConditionExp); + boolean reduced = false; final RelOptPredicateList predicates = RelMetadataQuery.instance().getPulledUpPredicates(filter.getInput()); if (reduceExpressions(filter, expList, predicates)) { assert expList.size() == 1; newConditionExp = expList.get(0); reduced = true; - } else { - // No reduction, but let's still test the original - // predicate to see if it was already a constant, - // in which case we don't need any runtime decision - // about filtering. - newConditionExp = filter.getCondition(); - reduced = false; } + + // Even if no reduction, let's still test the original + // predicate to see if it was already a constant, + // in which case we don't need any runtime decision + // about filtering. if (newConditionExp.isAlwaysTrue()) { call.transformTo( filter.getInput()); @@ -151,64 +152,17 @@ public abstract class HiveReduceExpressionsRule extends RelOptRule { // call.transformTo(call.builder().values(filter.getRowType()).build()); return; } - else if (reduced) { + else if (reduced + || !newConditionExp.toString().equals(filter.getCondition().toString())) { call.transformTo(call.builder(). - push(filter.getInput()).filter(expList.get(0)).build()); + push(filter.getInput()).filter(newConditionExp).build()); } else { - if (newConditionExp instanceof RexCall) { - RexCall rexCall = (RexCall) newConditionExp; - boolean reverse = - rexCall.getOperator() - == SqlStdOperatorTable.NOT; - if (reverse) { - rexCall = (RexCall) rexCall.getOperands().get(0); - } - reduceNotNullableFilter(call, filter, rexCall, reverse); - } return; } // New plan is absolutely better than old plan. call.getPlanner().setImportance(filter, 0.0); } - - private void reduceNotNullableFilter( - RelOptRuleCall call, - Filter filter, - RexCall rexCall, - boolean reverse) { - // If the expression is a IS [NOT] NULL on a non-nullable - // column, then we can either remove the filter or replace - // it with an Empty. - boolean alwaysTrue; - switch (rexCall.getKind()) { - case IS_NULL: - case IS_UNKNOWN: - alwaysTrue = false; - break; - case IS_NOT_NULL: - alwaysTrue = true; - break; - default: - return; - } - if (reverse) { - alwaysTrue = !alwaysTrue; - } - RexNode operand = rexCall.getOperands().get(0); - if (operand instanceof RexInputRef) { - RexInputRef inputRef = (RexInputRef) operand; - if (!inputRef.getType().isNullable()) { - if (alwaysTrue) { - call.transformTo(filter.getInput()); - } else { - // TODO: support LogicalValues - // call.transformTo(call.builder().values(filter.getRowType()).build()); - return; - } - } - } - } } /** http://git-wip-us.apache.org/repos/asf/hive/blob/a6d9bf76/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java index b42e78f..739faa9 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java @@ -41,8 +41,6 @@ import org.apache.calcite.rex.RexWindow; import org.apache.calcite.rex.RexWindowBound; import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.type.SqlTypeUtil; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; @@ -70,6 +68,8 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import com.google.common.collect.ImmutableSet; @@ -151,71 +151,108 @@ public class ExprNodeConverter extends RexVisitorImpl<ExprNodeDesc> { return gfDesc; } - /** - * TODO: 1. Handle NULL - */ @Override public ExprNodeDesc visitLiteral(RexLiteral literal) { RelDataType lType = literal.getType(); - switch (literal.getType().getSqlTypeName()) { - case BOOLEAN: - return new ExprNodeConstantDesc(TypeInfoFactory.booleanTypeInfo, Boolean.valueOf(RexLiteral - .booleanValue(literal))); - case TINYINT: - return new ExprNodeConstantDesc(TypeInfoFactory.byteTypeInfo, Byte.valueOf(((Number) literal - .getValue3()).byteValue())); - case SMALLINT: - return new ExprNodeConstantDesc(TypeInfoFactory.shortTypeInfo, - Short.valueOf(((Number) literal.getValue3()).shortValue())); - case INTEGER: - return new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, - Integer.valueOf(((Number) literal.getValue3()).intValue())); - case BIGINT: - return new ExprNodeConstantDesc(TypeInfoFactory.longTypeInfo, Long.valueOf(((Number) literal - .getValue3()).longValue())); - case FLOAT: - case REAL: - return new ExprNodeConstantDesc(TypeInfoFactory.floatTypeInfo, - Float.valueOf(((Number) literal.getValue3()).floatValue())); - case DOUBLE: - return new ExprNodeConstantDesc(TypeInfoFactory.doubleTypeInfo, - Double.valueOf(((Number) literal.getValue3()).doubleValue())); - case DATE: - return new ExprNodeConstantDesc(TypeInfoFactory.dateTypeInfo, - new Date(((Calendar)literal.getValue()).getTimeInMillis())); - case TIME: - case TIMESTAMP: { - Object value = literal.getValue3(); - if (value instanceof Long) { - value = new Timestamp((Long)value); + if (RexLiteral.value(literal) == null) { + switch (literal.getType().getSqlTypeName()) { + case BOOLEAN: + return new ExprNodeConstantDesc(TypeInfoFactory.booleanTypeInfo, null); + case TINYINT: + return new ExprNodeConstantDesc(TypeInfoFactory.byteTypeInfo, null); + case SMALLINT: + return new ExprNodeConstantDesc(TypeInfoFactory.shortTypeInfo, null); + case INTEGER: + return new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, null); + case BIGINT: + return new ExprNodeConstantDesc(TypeInfoFactory.longTypeInfo, null); + case FLOAT: + case REAL: + return new ExprNodeConstantDesc(TypeInfoFactory.floatTypeInfo, null); + case DOUBLE: + return new ExprNodeConstantDesc(TypeInfoFactory.doubleTypeInfo, null); + case DATE: + return new ExprNodeConstantDesc(TypeInfoFactory.dateTypeInfo, null); + case TIME: + case TIMESTAMP: + return new ExprNodeConstantDesc(TypeInfoFactory.timestampTypeInfo, null); + case BINARY: + return new ExprNodeConstantDesc(TypeInfoFactory.binaryTypeInfo, null); + case DECIMAL: + return new ExprNodeConstantDesc( + TypeInfoFactory.getDecimalTypeInfo(lType.getPrecision(), lType.getScale()), null); + case VARCHAR: + case CHAR: + return new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, null); + case INTERVAL_YEAR_MONTH: + return new ExprNodeConstantDesc(TypeInfoFactory.intervalYearMonthTypeInfo, null); + case INTERVAL_DAY_TIME: + return new ExprNodeConstantDesc(TypeInfoFactory.intervalDayTimeTypeInfo, null); + case OTHER: + default: + return new ExprNodeConstantDesc(TypeInfoFactory.voidTypeInfo, null); + } + } else { + switch (literal.getType().getSqlTypeName()) { + case BOOLEAN: + return new ExprNodeConstantDesc(TypeInfoFactory.booleanTypeInfo, Boolean.valueOf(RexLiteral + .booleanValue(literal))); + case TINYINT: + return new ExprNodeConstantDesc(TypeInfoFactory.byteTypeInfo, Byte.valueOf(((Number) literal + .getValue3()).byteValue())); + case SMALLINT: + return new ExprNodeConstantDesc(TypeInfoFactory.shortTypeInfo, + Short.valueOf(((Number) literal.getValue3()).shortValue())); + case INTEGER: + return new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, + Integer.valueOf(((Number) literal.getValue3()).intValue())); + case BIGINT: + return new ExprNodeConstantDesc(TypeInfoFactory.longTypeInfo, Long.valueOf(((Number) literal + .getValue3()).longValue())); + case FLOAT: + case REAL: + return new ExprNodeConstantDesc(TypeInfoFactory.floatTypeInfo, + Float.valueOf(((Number) literal.getValue3()).floatValue())); + case DOUBLE: + return new ExprNodeConstantDesc(TypeInfoFactory.doubleTypeInfo, + Double.valueOf(((Number) literal.getValue3()).doubleValue())); + case DATE: + return new ExprNodeConstantDesc(TypeInfoFactory.dateTypeInfo, + new Date(((Calendar)literal.getValue()).getTimeInMillis())); + case TIME: + case TIMESTAMP: { + Object value = literal.getValue3(); + if (value instanceof Long) { + value = new Timestamp((Long)value); + } + return new ExprNodeConstantDesc(TypeInfoFactory.timestampTypeInfo, value); + } + case BINARY: + return new ExprNodeConstantDesc(TypeInfoFactory.binaryTypeInfo, literal.getValue3()); + case DECIMAL: + return new ExprNodeConstantDesc(TypeInfoFactory.getDecimalTypeInfo(lType.getPrecision(), + lType.getScale()), HiveDecimal.create((BigDecimal)literal.getValue3())); + case VARCHAR: + case CHAR: { + return new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, literal.getValue3()); + } + case INTERVAL_YEAR_MONTH: { + BigDecimal monthsBd = (BigDecimal) literal.getValue(); + return new ExprNodeConstantDesc(TypeInfoFactory.intervalYearMonthTypeInfo, + new HiveIntervalYearMonth(monthsBd.intValue())); + } + case INTERVAL_DAY_TIME: { + BigDecimal millisBd = (BigDecimal) literal.getValue(); + // Calcite literal is in millis, we need to convert to seconds + BigDecimal secsBd = millisBd.divide(BigDecimal.valueOf(1000)); + return new ExprNodeConstantDesc(TypeInfoFactory.intervalDayTimeTypeInfo, + new HiveIntervalDayTime(secsBd)); + } + case OTHER: + default: + return new ExprNodeConstantDesc(TypeInfoFactory.voidTypeInfo, literal.getValue3()); } - return new ExprNodeConstantDesc(TypeInfoFactory.timestampTypeInfo, value); - } - case BINARY: - return new ExprNodeConstantDesc(TypeInfoFactory.binaryTypeInfo, literal.getValue3()); - case DECIMAL: - return new ExprNodeConstantDesc(TypeInfoFactory.getDecimalTypeInfo(lType.getPrecision(), - lType.getScale()), HiveDecimal.create((BigDecimal)literal.getValue3())); - case VARCHAR: - case CHAR: { - return new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, literal.getValue3()); - } - case INTERVAL_YEAR_MONTH: { - BigDecimal monthsBd = (BigDecimal) literal.getValue(); - return new ExprNodeConstantDesc(TypeInfoFactory.intervalYearMonthTypeInfo, - new HiveIntervalYearMonth(monthsBd.intValue())); - } - case INTERVAL_DAY_TIME: { - BigDecimal millisBd = (BigDecimal) literal.getValue(); - // Calcite literal is in millis, we need to convert to seconds - BigDecimal secsBd = millisBd.divide(BigDecimal.valueOf(1000)); - return new ExprNodeConstantDesc(TypeInfoFactory.intervalDayTimeTypeInfo, - new HiveIntervalDayTime(secsBd)); - } - case OTHER: - default: - return new ExprNodeConstantDesc(TypeInfoFactory.voidTypeInfo, literal.getValue3()); } } http://git-wip-us.apache.org/repos/asf/hive/blob/a6d9bf76/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java index 122546f..9c929af 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java @@ -25,7 +25,6 @@ import java.util.Calendar; import java.util.Date; import java.util.GregorianCalendar; import java.util.LinkedHashMap; -import java.util.LinkedList; import java.util.List; import java.util.Map; @@ -40,8 +39,10 @@ import org.apache.calcite.rex.RexNode; import org.apache.calcite.rex.RexUtil; import org.apache.calcite.sql.SqlCollation; import org.apache.calcite.sql.SqlIntervalQualifier; +import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.SqlOperator; import org.apache.calcite.sql.fun.SqlCastFunction; +import org.apache.calcite.sql.fun.SqlStdOperatorTable; import org.apache.calcite.sql.parser.SqlParserPos; import org.apache.calcite.sql.type.SqlTypeName; import org.apache.calcite.util.ConversionUtil; @@ -67,15 +68,17 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseBinary; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseCompare; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFCase; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFTimestamp; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToBinary; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToChar; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToDate; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToDecimal; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToVarchar; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFWhen; import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping; @@ -88,6 +91,7 @@ import com.google.common.collect.ImmutableList.Builder; import com.google.common.collect.ImmutableMap; public class RexNodeConverter { + private static class InputCtx { private final RelDataType calciteInpDataType; private final ImmutableMap<String, Integer> hiveNameToPosMap; @@ -157,7 +161,7 @@ public class RexNodeConverter { ExprNodeDesc tmpExprNode; RexNode tmpRN; - List<RexNode> childRexNodeLst = new LinkedList<RexNode>(); + List<RexNode> childRexNodeLst = new ArrayList<RexNode>(); Builder<RelDataType> argTypeBldr = ImmutableList.<RelDataType> builder(); // TODO: 1) Expand to other functions as needed 2) What about types other than primitive. @@ -169,6 +173,7 @@ public class RexNodeConverter { && (PrimitiveGrouping.NUMERIC_GROUP == PrimitiveObjectInspectorUtils.getPrimitiveGrouping( ((PrimitiveTypeInfo) func.getTypeInfo()).getPrimitiveCategory()))); boolean isCompare = !isNumeric && tgtUdf instanceof GenericUDFBaseCompare; + boolean isWhenCase = tgtUdf instanceof GenericUDFWhen || tgtUdf instanceof GenericUDFCase; if (isNumeric) { tgtDT = func.getTypeInfo(); @@ -178,9 +183,14 @@ public class RexNodeConverter { } else if (isCompare && (func.getChildren().size() == 2)) { tgtDT = FunctionRegistry.getCommonClassForComparison(func.getChildren().get(0) .getTypeInfo(), func.getChildren().get(1).getTypeInfo()); + } else if (isWhenCase) { + // If it is a CASE or WHEN, we need to check that children do not contain stateful functions + // as they are not allowed + if (checkForStatefulFunctions(func.getChildren())) { + throw new SemanticException("Stateful expressions cannot be used inside of CASE"); + } } - for (ExprNodeDesc childExpr : func.getChildren()) { tmpExprNode = childExpr; if (tgtDT != null @@ -196,8 +206,8 @@ public class RexNodeConverter { } else { throw new AssertionError("Unexpected " + tgtDT + " - not a numeric op or compare"); } - } + argTypeBldr.add(TypeConverter.convert(tmpExprNode.getTypeInfo(), cluster.getTypeFactory())); tmpRN = convert(tmpExprNode); childRexNodeLst.add(tmpRN); @@ -213,6 +223,10 @@ public class RexNodeConverter { retType = TypeConverter.convert(func.getTypeInfo(), cluster.getTypeFactory()); SqlOperator calciteOp = SqlFunctionConverter.getCalciteOperator(func.getFuncText(), func.getGenericUDF(), argTypeBldr.build(), retType); + // If it is a case operator, we need to rewrite it + if (calciteOp.getKind() == SqlKind.CASE) { + childRexNodeLst = rewriteCaseChildren(func, childRexNodeLst); + } expr = cluster.getRexBuilder().makeCall(calciteOp, childRexNodeLst); } else { retType = expr.getType(); @@ -272,6 +286,70 @@ public class RexNodeConverter { return castExpr; } + /* + * Hive syntax allows to define CASE expressions in two ways: + * - CASE a WHEN b THEN c [WHEN d THEN e]* [ELSE f] END (translated into the + * "case" function, ELSE clause is optional) + * - CASE WHEN a THEN b [WHEN c THEN d]* [ELSE e] END (translated into the + * "when" function, ELSE clause is optional) + * However, Calcite only has the equivalent to the "when" Hive function. Thus, + * we need to transform the "case" function into "when". Further, ELSE clause is + * not optional in Calcite. + * + * Example. Consider the following statement: + * CASE x + y WHEN 1 THEN 'fee' WHEN 2 THEN 'fie' END + * It will be transformed into: + * CASE WHEN =(x + y, 1) THEN 'fee' WHEN =(x + y, 2) THEN 'fie' ELSE null END + */ + private List<RexNode> rewriteCaseChildren(ExprNodeGenericFuncDesc func, List<RexNode> childRexNodeLst) + throws SemanticException { + List<RexNode> newChildRexNodeLst = new ArrayList<RexNode>(); + if (FunctionRegistry.getNormalizedFunctionName(func.getFuncText()).equals("case")) { + RexNode firstPred = childRexNodeLst.get(0); + int length = childRexNodeLst.size() % 2 == 1 ? + childRexNodeLst.size() : childRexNodeLst.size() - 1; + for (int i = 1; i < length; i++) { + if (i % 2 == 1) { + // We rewrite it + newChildRexNodeLst.add( + cluster.getRexBuilder().makeCall( + SqlStdOperatorTable.EQUALS, firstPred, childRexNodeLst.get(i))); + } else { + newChildRexNodeLst.add(childRexNodeLst.get(i)); + } + } + // The else clause + if (length != childRexNodeLst.size()) { + newChildRexNodeLst.add(childRexNodeLst.get(childRexNodeLst.size()-1)); + } + } else { + newChildRexNodeLst.addAll(childRexNodeLst); + } + // Calcite always needs the else clause to be defined explicitly + if (newChildRexNodeLst.size() % 2 == 0) { + newChildRexNodeLst.add(cluster.getRexBuilder().makeNullLiteral( + newChildRexNodeLst.get(newChildRexNodeLst.size()-1).getType().getSqlTypeName())); + } + return newChildRexNodeLst; + } + + private static boolean checkForStatefulFunctions(List<ExprNodeDesc> list) { + for (ExprNodeDesc node : list) { + if (node instanceof ExprNodeGenericFuncDesc) { + GenericUDF nodeUDF = ((ExprNodeGenericFuncDesc) node).getGenericUDF(); + // Stateful? + if (FunctionRegistry.isStateful(nodeUDF)) { + return true; + } + if (node.getChildren() != null && !node.getChildren().isEmpty() && + checkForStatefulFunctions(node.getChildren())) { + return true; + } + } + } + return false; + } + private InputCtx getInputCtx(ExprNodeColumnDesc col) throws SemanticException { InputCtx ctxLookingFor = null; http://git-wip-us.apache.org/repos/asf/hive/blob/a6d9bf76/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java index b4c6e05..0b76bff 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java @@ -204,6 +204,7 @@ public class SqlFunctionConverter { case BETWEEN: case ROW: case IS_NOT_NULL: + case CASE: node = (ASTNode) ParseDriver.adaptor.create(HiveParser.TOK_FUNCTION, "TOK_FUNCTION"); node.addChild((ASTNode) ParseDriver.adaptor.create(hToken.type, hToken.text)); break; @@ -323,10 +324,13 @@ public class SqlFunctionConverter { hToken(HiveParser.GREATERTHANOREQUALTO, ">=")); registerFunction("!", SqlStdOperatorTable.NOT, hToken(HiveParser.KW_NOT, "not")); registerFunction("<>", SqlStdOperatorTable.NOT_EQUALS, hToken(HiveParser.NOTEQUAL, "<>")); + registerDuplicateFunction("!=", SqlStdOperatorTable.NOT_EQUALS, hToken(HiveParser.NOTEQUAL, "<>")); registerFunction("in", HiveIn.INSTANCE, hToken(HiveParser.Identifier, "in")); registerFunction("between", HiveBetween.INSTANCE, hToken(HiveParser.Identifier, "between")); registerFunction("struct", SqlStdOperatorTable.ROW, hToken(HiveParser.Identifier, "struct")); registerFunction("isnotnull", SqlStdOperatorTable.IS_NOT_NULL, hToken(HiveParser.TOK_ISNOTNULL, "TOK_ISNOTNULL")); + registerFunction("when", SqlStdOperatorTable.CASE, hToken(HiveParser.Identifier, "when")); + registerDuplicateFunction("case", SqlStdOperatorTable.CASE, hToken(HiveParser.Identifier, "when")); } private void registerFunction(String name, SqlOperator calciteFn, HiveToken hiveToken) { http://git-wip-us.apache.org/repos/asf/hive/blob/a6d9bf76/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index e7dc08c..809affb 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -1130,13 +1130,13 @@ public class CalcitePlanner extends SemanticAnalyzer { HiveFilterJoinRule.FILTER_ON_JOIN, new HiveFilterAggregateTransposeRule(Filter.class, HiveRelFactories.HIVE_FILTER_FACTORY, Aggregate.class), new FilterMergeRule(HiveRelFactories.HIVE_FILTER_FACTORY), + HiveReduceExpressionsRule.PROJECT_INSTANCE, + HiveReduceExpressionsRule.FILTER_INSTANCE, + HiveReduceExpressionsRule.JOIN_INSTANCE, HiveJoinAddNotNullRule.INSTANCE_JOIN, HiveJoinAddNotNullRule.INSTANCE_SEMIJOIN, HiveJoinPushTransitivePredicatesRule.INSTANCE_JOIN, - HiveJoinPushTransitivePredicatesRule.INSTANCE_SEMIJOIN, - HiveReduceExpressionsRule.PROJECT_INSTANCE, - HiveReduceExpressionsRule.FILTER_INSTANCE, - HiveReduceExpressionsRule.JOIN_INSTANCE); + HiveJoinPushTransitivePredicatesRule.INSTANCE_SEMIJOIN); perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Prejoin ordering transformation, PPD, not null predicates, transitive inference, constant folding"); http://git-wip-us.apache.org/repos/asf/hive/blob/a6d9bf76/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java index 0223038..223718e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java @@ -28,7 +28,6 @@ import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory; import org.apache.hadoop.hive.ql.exec.FunctionRegistry; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.UDF; -import org.apache.hadoop.hive.ql.optimizer.ConstantPropagateProcFactory; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge; @@ -606,4 +605,32 @@ public class ExprNodeDescUtils { // constant or null expr, just return return source; } + + public static String extractColName(ExprNodeDesc root) { + if (root instanceof ExprNodeColumnDesc) { + return ((ExprNodeColumnDesc) root).getColumn(); + } else { + if (root.getChildren() == null) { + return null; + } + + String column = null; + for (ExprNodeDesc d: root.getChildren()) { + String candidate = extractColName(d); + if (column != null && candidate != null) { + return null; + } else if (candidate != null) { + column = candidate; + } + } + return column; + } + } + + public static ExprNodeColumnDesc getColumnExpr(ExprNodeDesc expr) { + while (FunctionRegistry.isOpCast(expr)) { + expr = expr.getChildren().get(0); + } + return (expr instanceof ExprNodeColumnDesc) ? (ExprNodeColumnDesc)expr : null; + } } http://git-wip-us.apache.org/repos/asf/hive/blob/a6d9bf76/ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out b/ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out index 8ef9984..ff95252 100644 --- a/ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out +++ b/ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out @@ -406,7 +406,7 @@ STAGE PLANS: alias: s Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator - predicate: (s_store_sk is not null and (s_company_id > 0)) (type: boolean) + predicate: ((s_company_id > 0) and s_store_sk is not null) (type: boolean) Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: s_store_sk (type: int) @@ -421,7 +421,7 @@ STAGE PLANS: alias: ss Statistics: Num rows: 1000 Data size: 7668 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (ss_store_sk is not null and (ss_quantity > 10)) (type: boolean) + predicate: ((ss_quantity > 10) and ss_store_sk is not null) (type: boolean) Statistics: Num rows: 321 Data size: 2460 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ss_store_sk (type: int) @@ -471,7 +471,7 @@ STAGE PLANS: alias: s Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (s_store_sk is not null and (s_floor_space > 0)) (type: boolean) + predicate: ((s_floor_space > 0) and s_store_sk is not null) (type: boolean) Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: s_store_sk (type: int) @@ -551,7 +551,7 @@ STAGE PLANS: alias: ss Statistics: Num rows: 1000 Data size: 7668 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (ss_store_sk is not null and (ss_quantity > 10)) (type: boolean) + predicate: ((ss_quantity > 10) and ss_store_sk is not null) (type: boolean) Statistics: Num rows: 321 Data size: 2460 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ss_store_sk (type: int) @@ -788,7 +788,7 @@ STAGE PLANS: alias: s Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (s_store_sk is not null and (s_floor_space > 1000)) (type: boolean) + predicate: ((s_floor_space > 1000) and s_store_sk is not null) (type: boolean) Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: s_store_sk (type: int) @@ -859,7 +859,7 @@ STAGE PLANS: alias: ss Statistics: Num rows: 1000 Data size: 7668 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (ss_store_sk is not null and (ss_quantity > 10)) (type: boolean) + predicate: ((ss_quantity > 10) and ss_store_sk is not null) (type: boolean) Statistics: Num rows: 321 Data size: 2460 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ss_store_sk (type: int) http://git-wip-us.apache.org/repos/asf/hive/blob/a6d9bf76/ql/src/test/results/clientpositive/auto_join32.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/auto_join32.q.out b/ql/src/test/results/clientpositive/auto_join32.q.out index e5249a2..2ebec97 100644 --- a/ql/src/test/results/clientpositive/auto_join32.q.out +++ b/ql/src/test/results/clientpositive/auto_join32.q.out @@ -412,7 +412,7 @@ STAGE PLANS: alias: s Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (name is not null and (p = 'bar')) (type: boolean) + predicate: ((p = 'bar') and name is not null) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: name (type: string) http://git-wip-us.apache.org/repos/asf/hive/blob/a6d9bf76/ql/src/test/results/clientpositive/auto_join_without_localtask.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/auto_join_without_localtask.q.out b/ql/src/test/results/clientpositive/auto_join_without_localtask.q.out index 9393aff..85df1d6 100644 --- a/ql/src/test/results/clientpositive/auto_join_without_localtask.q.out +++ b/ql/src/test/results/clientpositive/auto_join_without_localtask.q.out @@ -704,7 +704,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (value is not null and (UDFToDouble(key) > 100.0)) (type: boolean) + predicate: ((UDFToDouble(key) > 100.0) and value is not null) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -889,7 +889,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (value is not null and (UDFToDouble(key) > 100.0)) (type: boolean) + predicate: ((UDFToDouble(key) > 100.0) and value is not null) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -937,7 +937,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (value is not null and (UDFToDouble(key) > 100.0)) (type: boolean) + predicate: ((UDFToDouble(key) > 100.0) and value is not null) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) http://git-wip-us.apache.org/repos/asf/hive/blob/a6d9bf76/ql/src/test/results/clientpositive/bucketsortoptimize_insert_7.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/bucketsortoptimize_insert_7.q.out b/ql/src/test/results/clientpositive/bucketsortoptimize_insert_7.q.out index b5dfce3..fa73acf 100644 --- a/ql/src/test/results/clientpositive/bucketsortoptimize_insert_7.q.out +++ b/ql/src/test/results/clientpositive/bucketsortoptimize_insert_7.q.out @@ -85,7 +85,7 @@ STAGE PLANS: alias: a Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and ((key = 0) or (key = 5))) (type: boolean) + predicate: (((key = 0) or (key = 5)) and key is not null) (type: boolean) Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string)