HIVE-17465 Statistics: Drill-down filters don't reduce row-counts progressively (Vineet Garg, reviewed by Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/20b84523 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/20b84523 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/20b84523 Branch: refs/heads/master Commit: 20b84523d7ad277bd711d4dbe081cc5d2315a9dd Parents: 527d13b Author: Vineet Garg <vg...@apache.com> Authored: Mon Sep 18 13:20:31 2017 -0700 Committer: Vineet Garg <vg...@apache.com> Committed: Mon Sep 18 13:20:31 2017 -0700 ---------------------------------------------------------------------- data/files/filterCard.txt | 101 +++++++++++++++++++ .../stats/annotation/StatsRulesProcFactory.java | 89 ++++++++-------- .../annotate_stats_deep_filters.q.out | 4 +- .../annotate_stats_join_pkfk.q.out | 48 ++++----- .../clientpositive/llap/explainuser_1.q.out | 54 +++++----- .../llap/vector_decimal_cast.q.out | 4 +- .../clientpositive/llap/vector_if_expr.q.out | 10 +- .../clientpositive/llap/vectorization_0.q.out | 12 +-- .../clientpositive/llap/vectorization_10.q.out | 6 +- .../clientpositive/llap/vectorization_17.q.out | 10 +- .../clientpositive/llap/vectorization_7.q.out | 16 +-- .../llap/vectorization_short_regress.q.out | 8 +- .../llap/vectorized_nested_mapjoin.q.out | 12 +-- .../results/clientpositive/perf/query23.q.out | 2 +- .../clientpositive/spark/vectorization_10.q.out | 6 +- .../clientpositive/spark/vectorization_12.q.out | 18 ++-- .../clientpositive/spark/vectorization_17.q.out | 10 +- .../clientpositive/vectorization_10.q.out | 6 +- .../clientpositive/vectorization_12.q.out | 18 ++-- .../clientpositive/vectorization_17.q.out | 10 +- .../clientpositive/vectorization_7.q.out | 16 +-- 21 files changed, 280 insertions(+), 180 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/20b84523/data/files/filterCard.txt ---------------------------------------------------------------------- diff --git a/data/files/filterCard.txt b/data/files/filterCard.txt new file mode 100644 index 0000000..6246bfb --- /dev/null +++ b/data/files/filterCard.txt @@ -0,0 +1,101 @@ +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 http://git-wip-us.apache.org/repos/asf/hive/blob/20b84523/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java index 458e8b3..a4f60ac 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java @@ -282,7 +282,7 @@ public class StatsRulesProcFactory { // evaluate filter expression and update statistics long newNumRows = evaluateExpression(parentStats, pred, aspCtx, - neededCols, fop, 0); + neededCols, fop, parentStats.getNumRows()); Statistics st = parentStats.clone(); if (satisfyPrecondition(parentStats)) { @@ -320,13 +320,13 @@ public class StatsRulesProcFactory { protected long evaluateExpression(Statistics stats, ExprNodeDesc pred, AnnotateStatsProcCtx aspCtx, List<String> neededCols, - Operator<?> op, long evaluatedRowCount) throws CloneNotSupportedException, SemanticException { + Operator<?> op, long currNumRows) throws CloneNotSupportedException, SemanticException { long newNumRows = 0; Statistics andStats = null; - if (stats.getNumRows() <= 1 || stats.getDataSize() <= 0) { + if (currNumRows <= 1 || stats.getDataSize() <= 0) { if (LOG.isDebugEnabled()) { - LOG.debug("Estimating row count for " + pred + " Original num rows: " + stats.getNumRows() + + LOG.debug("Estimating row count for " + pred + " Original num rows: " + currNumRows + " Original data size: " + stats.getDataSize() + " New num rows: 1"); } return 1; @@ -342,41 +342,40 @@ public class StatsRulesProcFactory { aspCtx.setAndExprStats(andStats); // evaluate children + long evaluatedRowCount = currNumRows; for (ExprNodeDesc child : genFunc.getChildren()) { - newNumRows = evaluateChildExpr(aspCtx.getAndExprStats(), child, + evaluatedRowCount = evaluateChildExpr(aspCtx.getAndExprStats(), child, aspCtx, neededCols, op, evaluatedRowCount); - if (satisfyPrecondition(aspCtx.getAndExprStats())) { - updateStats(aspCtx.getAndExprStats(), newNumRows, true, op); - } else { - updateStats(aspCtx.getAndExprStats(), newNumRows, false, op); - } + } + newNumRows = evaluatedRowCount; + if (satisfyPrecondition(aspCtx.getAndExprStats())) { + updateStats(aspCtx.getAndExprStats(), newNumRows, true, op); + } else { + updateStats(aspCtx.getAndExprStats(), newNumRows, false, op); } } else if (udf instanceof GenericUDFOPOr) { // for OR condition independently compute and update stats. for (ExprNodeDesc child : genFunc.getChildren()) { - // early exit if OR evaluation yields more rows than input rows - if (evaluatedRowCount >= stats.getNumRows()) { - evaluatedRowCount = stats.getNumRows(); - } else { newNumRows = StatsUtils.safeAdd( - evaluateChildExpr(stats, child, aspCtx, neededCols, op, evaluatedRowCount), + evaluateChildExpr(stats, child, aspCtx, neededCols, op, currNumRows), newNumRows); - evaluatedRowCount = newNumRows; - } + } + if(newNumRows > currNumRows) { + newNumRows = currNumRows; } } else if (udf instanceof GenericUDFIn) { // for IN clause - newNumRows = evaluateInExpr(stats, pred, aspCtx, neededCols, op); + newNumRows = evaluateInExpr(stats, pred, currNumRows, aspCtx, neededCols, op); } else if (udf instanceof GenericUDFBetween) { // for BETWEEN clause - newNumRows = evaluateBetweenExpr(stats, pred, aspCtx, neededCols, op); + newNumRows = evaluateBetweenExpr(stats, pred, currNumRows, aspCtx, neededCols, op); } else if (udf instanceof GenericUDFOPNot) { - newNumRows = evaluateNotExpr(stats, pred, aspCtx, neededCols, op); + newNumRows = evaluateNotExpr(stats, pred, currNumRows, aspCtx, neededCols, op); } else if (udf instanceof GenericUDFOPNotNull) { - return evaluateNotNullExpr(stats, genFunc); + return evaluateNotNullExpr(stats, genFunc, currNumRows); } else { // single predicate condition - newNumRows = evaluateChildExpr(stats, pred, aspCtx, neededCols, op, evaluatedRowCount); + newNumRows = evaluateChildExpr(stats, pred, aspCtx, neededCols, op,currNumRows); } } else if (pred instanceof ExprNodeColumnDesc) { @@ -415,10 +414,10 @@ public class StatsRulesProcFactory { return newNumRows; } - private long evaluateInExpr(Statistics stats, ExprNodeDesc pred, AnnotateStatsProcCtx aspCtx, + private long evaluateInExpr(Statistics stats, ExprNodeDesc pred, long currNumRows, AnnotateStatsProcCtx aspCtx, List<String> neededCols, Operator<?> op) throws SemanticException { - long numRows = stats.getNumRows(); + long numRows = currNumRows; ExprNodeGenericFuncDesc fd = (ExprNodeGenericFuncDesc) pred; @@ -505,7 +504,7 @@ public class StatsRulesProcFactory { return Math.round( (double) numRows * factor * inFactor); } - private long evaluateBetweenExpr(Statistics stats, ExprNodeDesc pred, AnnotateStatsProcCtx aspCtx, + private long evaluateBetweenExpr(Statistics stats, ExprNodeDesc pred, long currNumRows, AnnotateStatsProcCtx aspCtx, List<String> neededCols, Operator<?> op) throws SemanticException, CloneNotSupportedException { final ExprNodeGenericFuncDesc fd = (ExprNodeGenericFuncDesc) pred; final boolean invert = Boolean.TRUE.equals( @@ -517,7 +516,7 @@ public class StatsRulesProcFactory { // Short circuit and return the current number of rows if this is a // synthetic predicate with dynamic values if (leftExpression instanceof ExprNodeDynamicValueDesc) { - return stats.getNumRows(); + return currNumRows; } // We transform the BETWEEN clause to AND clause (with NOT on top in invert is true). @@ -534,14 +533,14 @@ public class StatsRulesProcFactory { new GenericUDFOPNot(), Lists.newArrayList(newExpression)); } - return evaluateExpression(stats, newExpression, aspCtx, neededCols, op, 0); + return evaluateExpression(stats, newExpression, aspCtx, neededCols, op, currNumRows); } - private long evaluateNotExpr(Statistics stats, ExprNodeDesc pred, + private long evaluateNotExpr(Statistics stats, ExprNodeDesc pred, long currNumRows, AnnotateStatsProcCtx aspCtx, List<String> neededCols, Operator<?> op) throws CloneNotSupportedException, SemanticException { - long numRows = stats.getNumRows(); + long numRows = currNumRows; // if the evaluate yields true then pass all rows else pass 0 rows if (pred instanceof ExprNodeGenericFuncDesc) { @@ -553,7 +552,7 @@ public class StatsRulesProcFactory { long newNumRows = 0; for (ExprNodeDesc child : genFunc.getChildren()) { newNumRows = evaluateChildExpr(stats, child, aspCtx, neededCols, - op, 0); + op, numRows); } return numRows - newNumRows; } else if (leaf instanceof ExprNodeConstantDesc) { @@ -585,9 +584,9 @@ public class StatsRulesProcFactory { return numRows / 2; } - private long evaluateColEqualsNullExpr(Statistics stats, ExprNodeDesc pred) { + private long evaluateColEqualsNullExpr(Statistics stats, ExprNodeDesc pred, long currNumRows) { - long numRows = stats.getNumRows(); + long numRows = currNumRows; if (pred instanceof ExprNodeGenericFuncDesc) { @@ -609,9 +608,9 @@ public class StatsRulesProcFactory { return numRows / 2; } - private long evaluateNotNullExpr(Statistics parentStats, ExprNodeGenericFuncDesc pred) { + private long evaluateNotNullExpr(Statistics parentStats, ExprNodeGenericFuncDesc pred, long currNumRows) { long noOfNulls = getMaxNulls(parentStats, pred); - long parentCardinality = parentStats.getNumRows(); + long parentCardinality = currNumRows; long newPredCardinality = parentCardinality; if (parentCardinality > noOfNulls) { @@ -661,8 +660,8 @@ public class StatsRulesProcFactory { return maxNoNulls; } - private long evaluateComparator(Statistics stats, ExprNodeGenericFuncDesc genFunc) { - long numRows = stats.getNumRows(); + private long evaluateComparator(Statistics stats, ExprNodeGenericFuncDesc genFunc, long currNumRows) { + long numRows = currNumRows; GenericUDF udf = genFunc.getGenericUDF(); ExprNodeColumnDesc columnDesc; @@ -838,9 +837,9 @@ public class StatsRulesProcFactory { private long evaluateChildExpr(Statistics stats, ExprNodeDesc child, AnnotateStatsProcCtx aspCtx, List<String> neededCols, - Operator<?> op, long evaluatedRowCount) throws CloneNotSupportedException, SemanticException { + Operator<?> op, long currNumRows) throws CloneNotSupportedException, SemanticException { - long numRows = stats.getNumRows(); + long numRows = currNumRows; if (child instanceof ExprNodeGenericFuncDesc) { @@ -917,15 +916,15 @@ public class StatsRulesProcFactory { || udf instanceof GenericUDFOPEqualOrLessThan || udf instanceof GenericUDFOPGreaterThan || udf instanceof GenericUDFOPLessThan) { - return evaluateComparator(stats, genFunc); + return evaluateComparator(stats, genFunc, numRows); } else if (udf instanceof GenericUDFOPNotNull) { - return evaluateNotNullExpr(stats, genFunc); + return evaluateNotNullExpr(stats, genFunc, numRows); } else if (udf instanceof GenericUDFOPNull) { - return evaluateColEqualsNullExpr(stats, genFunc); + return evaluateColEqualsNullExpr(stats, genFunc, numRows); } else if (udf instanceof GenericUDFOPAnd || udf instanceof GenericUDFOPOr || udf instanceof GenericUDFIn || udf instanceof GenericUDFBetween || udf instanceof GenericUDFOPNot) { - return evaluateExpression(stats, genFunc, aspCtx, neededCols, op, evaluatedRowCount); + return evaluateExpression(stats, genFunc, aspCtx, neededCols, op, numRows); } else if (udf instanceof GenericUDFInBloomFilter) { if (genFunc.getChildren().get(1) instanceof ExprNodeDynamicValueDesc) { // Synthetic predicates from semijoin opt should not affect stats. @@ -936,7 +935,7 @@ public class StatsRulesProcFactory { if (Boolean.FALSE.equals(((ExprNodeConstantDesc) child).getValue())) { return 0; } else { - return stats.getNumRows(); + return numRows; } } @@ -1584,7 +1583,7 @@ public class StatsRulesProcFactory { // evaluate filter expression and update statistics try { newNumRows = evaluateExpression(stats, pred, - aspCtx, jop.getSchema().getColumnNames(), jop, 0); + aspCtx, jop.getSchema().getColumnNames(), jop, stats.getNumRows()); } catch (CloneNotSupportedException e) { throw new SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg()); } @@ -1673,7 +1672,7 @@ public class StatsRulesProcFactory { // evaluate filter expression and update statistics try { newNumRows = evaluateExpression(wcStats, pred, - aspCtx, jop.getSchema().getColumnNames(), jop, 0); + aspCtx, jop.getSchema().getColumnNames(), jop, wcStats.getNumRows()); } catch (CloneNotSupportedException e) { throw new SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg()); } http://git-wip-us.apache.org/repos/asf/hive/blob/20b84523/ql/src/test/results/clientpositive/annotate_stats_deep_filters.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/annotate_stats_deep_filters.q.out b/ql/src/test/results/clientpositive/annotate_stats_deep_filters.q.out index 6e2975e..da1d03c 100644 --- a/ql/src/test/results/clientpositive/annotate_stats_deep_filters.q.out +++ b/ql/src/test/results/clientpositive/annotate_stats_deep_filters.q.out @@ -121,9 +121,9 @@ STAGE PLANS: Statistics: Num rows: 2098 Data size: 16744 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (((t = 1) and (si = 2)) or ((t = 2) and (si = 3)) or ((t = 3) and (si = 4)) or ((t = 4) and (si = 5)) or ((t = 5) and (si = 6)) or ((t = 6) and (si = 7)) or ((t = 7) and (si = 8)) or ((t = 9) and (si = 10)) or ((t = 10) and (si = 11)) or ((t = 11) and (si = 12)) or ((t = 12) and (si = 13)) or ((t = 13) and (si = 14)) or ((t = 14) and (si = 15)) or ((t = 15) and (si = 16)) or ((t = 16) and (si = 17)) or ((t = 17) and (si = 18)) or ((t = 27) and (si = 28)) or ((t = 37) and (si = 38)) or ((t = 47) and (si = 48)) or ((t = 52) and (si = 53))) (type: boolean) - Statistics: Num rows: 160 Data size: 1280 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 160 Data size: 1280 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash http://git-wip-us.apache.org/repos/asf/hive/blob/20b84523/ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out b/ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out index e04c1c6..d88819a 100644 --- a/ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out +++ b/ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out @@ -577,16 +577,16 @@ STAGE PLANS: Statistics: Num rows: 1000 Data size: 7676 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((ss_quantity > 10) and ss_store_sk is not null) (type: boolean) - Statistics: Num rows: 321 Data size: 2468 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 297 Data size: 2284 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ss_store_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 321 Data size: 2468 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 297 Data size: 2284 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 321 Data size: 2468 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 297 Data size: 2284 Basic stats: COMPLETE Column stats: COMPLETE Reduce Operator Tree: Join Operator condition map: @@ -595,10 +595,10 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 131 Data size: 524 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 122 Data size: 488 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 131 Data size: 524 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 122 Data size: 488 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -707,16 +707,16 @@ STAGE PLANS: Statistics: Num rows: 1000 Data size: 7676 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((ss_quantity > 10) and ss_store_sk is not null) (type: boolean) - Statistics: Num rows: 321 Data size: 2468 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 297 Data size: 2284 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ss_store_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 321 Data size: 2468 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 297 Data size: 2284 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 321 Data size: 2468 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 297 Data size: 2284 Basic stats: COMPLETE Column stats: COMPLETE Reduce Operator Tree: Join Operator condition map: @@ -725,10 +725,10 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 321 Data size: 1284 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 297 Data size: 1188 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 321 Data size: 1284 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 297 Data size: 1188 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1015,16 +1015,16 @@ STAGE PLANS: Statistics: Num rows: 1000 Data size: 7676 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((ss_quantity > 10) and ss_store_sk is not null) (type: boolean) - Statistics: Num rows: 321 Data size: 2468 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 297 Data size: 2284 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ss_store_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 321 Data size: 2468 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 297 Data size: 2284 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 321 Data size: 2468 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 297 Data size: 2284 Basic stats: COMPLETE Column stats: COMPLETE TableScan alias: s Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE @@ -1065,14 +1065,14 @@ STAGE PLANS: 1 _col0 (type: int) 2 _col0 (type: int) outputColumnNames: _col2 - Statistics: Num rows: 321 Data size: 1284 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 297 Data size: 1188 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 321 Data size: 1284 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 297 Data size: 1188 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 321 Data size: 1284 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 297 Data size: 1188 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1102,16 +1102,16 @@ STAGE PLANS: Statistics: Num rows: 1000 Data size: 7664 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (ss_store_sk is not null and ss_addr_sk is not null) (type: boolean) - Statistics: Num rows: 916 Data size: 7020 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 914 Data size: 7004 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ss_addr_sk (type: int), ss_store_sk (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 916 Data size: 7020 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 914 Data size: 7004 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 916 Data size: 7020 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 914 Data size: 7004 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) TableScan alias: s @@ -1136,7 +1136,7 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col2 - Statistics: Num rows: 916 Data size: 7148 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 914 Data size: 7132 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -1152,7 +1152,7 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 916 Data size: 7148 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 914 Data size: 7132 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: int) TableScan alias: ca @@ -1177,14 +1177,14 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col2 - Statistics: Num rows: 241 Data size: 964 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 243 Data size: 972 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 241 Data size: 964 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 243 Data size: 972 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 241 Data size: 964 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 243 Data size: 972 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat http://git-wip-us.apache.org/repos/asf/hive/blob/20b84523/ql/src/test/results/clientpositive/llap/explainuser_1.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/explainuser_1.q.out b/ql/src/test/results/clientpositive/llap/explainuser_1.q.out index 48c02a2..03ebe37 100644 --- a/ql/src/test/results/clientpositive/llap/explainuser_1.q.out +++ b/ql/src/test/results/clientpositive/llap/explainuser_1.q.out @@ -388,25 +388,25 @@ Stage-0 <-Map 5 [SIMPLE_EDGE] llap SHUFFLE [RS_7] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_6] (rows=2 width=101) + Group By Operator [GBY_6] (rows=1 width=101) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float - Filter Operator [FIL_39] (rows=5 width=93) + Filter Operator [FIL_39] (rows=4 width=93) predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and key is not null) TableScan [TS_3] (rows=20 width=88) default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] <-Reducer 8 [SIMPLE_EDGE] llap SHUFFLE [RS_21] PartitionCols:_col0 - Select Operator [SEL_17] (rows=2 width=89) + Select Operator [SEL_17] (rows=1 width=89) Output:["_col0","_col1"] - Group By Operator [GBY_16] (rows=2 width=93) + Group By Operator [GBY_16] (rows=1 width=93) Output:["_col0","_col1","_col2"],keys:KEY._col0, KEY._col1, KEY._col2 <-Map 7 [SIMPLE_EDGE] llap SHUFFLE [RS_15] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_14] (rows=2 width=93) + Group By Operator [GBY_14] (rows=1 width=93) Output:["_col0","_col1","_col2"],keys:key, c_int, c_float - Filter Operator [FIL_40] (rows=5 width=93) + Filter Operator [FIL_40] (rows=4 width=93) predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and key is not null) TableScan [TS_11] (rows=20 width=88) default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] @@ -1088,25 +1088,25 @@ Stage-0 Stage-1 Reducer 2 llap File Output Operator [FS_14] - Select Operator [SEL_13] (rows=40 width=101) + Select Operator [SEL_13] (rows=48 width=101) Output:["_col0","_col1","_col2","_col3","_col4"] - Merge Join Operator [MERGEJOIN_24] (rows=40 width=101) + Merge Join Operator [MERGEJOIN_24] (rows=48 width=101) Conds:RS_9._col0=RS_10._col0(Inner),RS_9._col0=RS_11._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col6"],residual filter predicates:{((_col1 + _col4) = 2)} {((_col1 > 0) or (_col6 >= 0))} <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_9] PartitionCols:_col0 - Select Operator [SEL_2] (rows=9 width=93) + Select Operator [SEL_2] (rows=8 width=93) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_21] (rows=9 width=93) + Filter Operator [FIL_21] (rows=8 width=93) predicate:(((c_int + 1) = 2) and ((c_int > 0) or (c_float >= 0)) and key is not null) TableScan [TS_0] (rows=20 width=88) default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] <-Map 3 [SIMPLE_EDGE] llap SHUFFLE [RS_10] PartitionCols:_col0 - Select Operator [SEL_5] (rows=9 width=89) + Select Operator [SEL_5] (rows=8 width=89) Output:["_col0","_col1"] - Filter Operator [FIL_22] (rows=9 width=93) + Filter Operator [FIL_22] (rows=8 width=93) predicate:(((c_int + 1) = 2) and ((c_int > 0) or (c_float >= 0)) and key is not null) TableScan [TS_3] (rows=20 width=88) default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] @@ -1135,25 +1135,25 @@ Stage-0 Stage-1 Reducer 2 llap File Output Operator [FS_14] - Select Operator [SEL_13] (rows=40 width=101) + Select Operator [SEL_13] (rows=48 width=101) Output:["_col0","_col1","_col2","_col3","_col4"] - Merge Join Operator [MERGEJOIN_24] (rows=40 width=101) + Merge Join Operator [MERGEJOIN_24] (rows=48 width=101) Conds:RS_9._col0=RS_10._col0(Inner),RS_9._col0=RS_11._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col6"],residual filter predicates:{((_col1 + _col4) = 2)} {((_col1 > 0) or (_col6 >= 0))} <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_9] PartitionCols:_col0 - Select Operator [SEL_2] (rows=9 width=93) + Select Operator [SEL_2] (rows=8 width=93) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_21] (rows=9 width=93) + Filter Operator [FIL_21] (rows=8 width=93) predicate:(((c_int + 1) = 2) and ((c_int > 0) or (c_float >= 0)) and key is not null) TableScan [TS_0] (rows=20 width=88) default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] <-Map 3 [SIMPLE_EDGE] llap SHUFFLE [RS_10] PartitionCols:_col0 - Select Operator [SEL_5] (rows=9 width=89) + Select Operator [SEL_5] (rows=8 width=89) Output:["_col0","_col1"] - Filter Operator [FIL_22] (rows=9 width=93) + Filter Operator [FIL_22] (rows=8 width=93) predicate:(((c_int + 1) = 2) and ((c_int > 0) or (c_float >= 0)) and key is not null) TableScan [TS_3] (rows=20 width=88) default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] @@ -1416,16 +1416,16 @@ Stage-0 Stage-1 Reducer 2 llap File Output Operator [FS_12] - Select Operator [SEL_11] (rows=9 width=4) + Select Operator [SEL_11] (rows=8 width=4) Output:["_col0"] - Merge Join Operator [MERGEJOIN_17] (rows=9 width=4) + Merge Join Operator [MERGEJOIN_17] (rows=8 width=4) Conds:RS_8._col0=RS_9._col0(Left Semi),Output:["_col1"] <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_8] PartitionCols:_col0 - Select Operator [SEL_2] (rows=9 width=93) + Select Operator [SEL_2] (rows=8 width=93) Output:["_col0","_col1"] - Filter Operator [FIL_15] (rows=9 width=93) + Filter Operator [FIL_15] (rows=8 width=93) predicate:(((c_int + 1) = 2) and ((c_int > 0) or (c_float >= 0)) and key is not null) TableScan [TS_0] (rows=20 width=88) default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] @@ -1463,20 +1463,20 @@ Stage-0 <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_13] PartitionCols:_col0 - Select Operator [SEL_2] (rows=9 width=93) + Select Operator [SEL_2] (rows=8 width=93) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_25] (rows=9 width=93) + Filter Operator [FIL_25] (rows=8 width=93) predicate:(((c_int + 1) = 2) and ((c_int > 0) or (c_float >= 0)) and key is not null) TableScan [TS_0] (rows=20 width=88) default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] <-Map 3 [SIMPLE_EDGE] llap SHUFFLE [RS_14] PartitionCols:_col0 - Group By Operator [GBY_10] (rows=4 width=85) + Group By Operator [GBY_10] (rows=3 width=85) Output:["_col0"],keys:_col0 - Select Operator [SEL_5] (rows=9 width=85) + Select Operator [SEL_5] (rows=8 width=85) Output:["_col0"] - Filter Operator [FIL_26] (rows=9 width=93) + Filter Operator [FIL_26] (rows=8 width=93) predicate:(((c_int + 1) = 2) and ((c_int > 0) or (c_float >= 0)) and key is not null) TableScan [TS_3] (rows=20 width=88) default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] http://git-wip-us.apache.org/repos/asf/hive/blob/20b84523/ql/src/test/results/clientpositive/llap/vector_decimal_cast.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/vector_decimal_cast.q.out b/ql/src/test/results/clientpositive/llap/vector_decimal_cast.q.out index c45210e..8fc23c4 100644 --- a/ql/src/test/results/clientpositive/llap/vector_decimal_cast.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_decimal_cast.q.out @@ -29,7 +29,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 5) -> boolean, SelectColumnIsNotNull(col 2) -> boolean, SelectColumnIsNotNull(col 10) -> boolean, SelectColumnIsNotNull(col 8) -> boolean) -> boolean predicate: (cdouble is not null and cint is not null and cboolean1 is not null and ctimestamp1 is not null) (type: boolean) - Statistics: Num rows: 5112 Data size: 265564 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2945 Data size: 152996 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cdouble (type: double), cint (type: int), cboolean1 (type: boolean), ctimestamp1 (type: timestamp), CAST( cdouble AS decimal(20,10)) (type: decimal(20,10)), CAST( cint AS decimal(23,14)) (type: decimal(23,14)), CAST( cboolean1 AS decimal(5,2)) (type: decimal(5,2)), CAST( ctimestamp1 AS decimal(15,0)) (type: decimal(15,0)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 @@ -38,7 +38,7 @@ STAGE PLANS: native: true projectedOutputColumns: [5, 2, 10, 8, 12, 13, 14, 15] selectExpressions: CastDoubleToDecimal(col 5) -> 12:decimal(20,10), CastLongToDecimal(col 2) -> 13:decimal(23,14), CastLongToDecimal(col 10) -> 14:decimal(5,2), CastTimestampToDecimal(col 8) -> 15:decimal(15,0) - Statistics: Num rows: 5112 Data size: 2410700 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2945 Data size: 1388804 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 Limit Vectorization: http://git-wip-us.apache.org/repos/asf/hive/blob/20b84523/ql/src/test/results/clientpositive/llap/vector_if_expr.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/vector_if_expr.q.out b/ql/src/test/results/clientpositive/llap/vector_if_expr.q.out index 32d1001..b1e0b14 100644 --- a/ql/src/test/results/clientpositive/llap/vector_if_expr.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_if_expr.q.out @@ -34,7 +34,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: SelectColumnIsTrue(col 10) -> boolean, SelectColumnIsNotNull(col 10) -> boolean) -> boolean predicate: (cboolean1 and cboolean1 is not null) (type: boolean) - Statistics: Num rows: 4587 Data size: 13704 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3030 Data size: 9052 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cboolean1 (type: boolean), if(cboolean1, 'first', 'second') (type: string) outputColumnNames: _col0, _col1 @@ -43,7 +43,7 @@ STAGE PLANS: native: true projectedOutputColumns: [10, 12] selectExpressions: IfExprStringScalarStringScalar(col 10, val first, val second) -> 12:String - Statistics: Num rows: 4587 Data size: 857712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3030 Data size: 566572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: boolean) sort order: + @@ -51,7 +51,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 4587 Data size: 857712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3030 Data size: 566572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs @@ -80,13 +80,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1] - Statistics: Num rows: 4587 Data size: 857712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3030 Data size: 566572 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 4587 Data size: 857712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3030 Data size: 566572 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat http://git-wip-us.apache.org/repos/asf/hive/blob/20b84523/ql/src/test/results/clientpositive/llap/vectorization_0.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/vectorization_0.q.out b/ql/src/test/results/clientpositive/llap/vectorization_0.q.out index db76eaa..2307843 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_0.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_0.q.out @@ -30611,17 +30611,17 @@ STAGE PLANS: Filter Operator isSamplingPred: false predicate: (((cint = 49) and (cfloat = 3.5)) or ((cint = 47) and (cfloat = 2.09)) or ((cint = 45) and (cfloat = 3.02))) (type: boolean) - Statistics: Num rows: 6 Data size: 1630 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 6 Data size: 1630 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 6 Data size: 1630 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -30841,17 +30841,17 @@ STAGE PLANS: Filter Operator isSamplingPred: false predicate: (((cint = 49) or (cfloat = 3.5)) and ((cint = 47) or (cfloat = 2.09)) and ((cint = 45) or (cfloat = 3.02))) (type: boolean) - Statistics: Num rows: 27 Data size: 6990 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 27 Data size: 6990 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 27 Data size: 6990 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat http://git-wip-us.apache.org/repos/asf/hive/blob/20b84523/ql/src/test/results/clientpositive/llap/vectorization_10.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/vectorization_10.q.out b/ql/src/test/results/clientpositive/llap/vectorization_10.q.out index f06c2db..1a0e846 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_10.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_10.q.out @@ -73,7 +73,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterStringGroupColLessEqualStringScalar(col 7, val 10) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleColumn(col 12, col 5)(children: CastLongToDouble(col 0) -> 12:double) -> boolean, FilterDecimalScalarGreaterEqualDecimalColumn(val -5638.15, col 13)(children: CastLongToDecimal(col 0) -> 13:decimal(6,2)) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 5, val 6981.0) -> boolean, FilterExprOrExpr(children: FilterDecimalColEqualDecimalScalar(col 14, val 9763215.5639)(children: CastLongToDecimal(col 1) -> 14:decimal(11,4)) -> boolean, FilterStringColLikeStringScalar(col 6, pattern %a) -> boolean) -> boolean) -> boolean) -> boolean predicate: ((cstring2 <= '10') or ((UDFToDouble(ctinyint) > cdouble) and (-5638.15 >= CAST( ctinyint AS decimal(6,2)))) or ((cdouble > 6981.0) and ((CAST( csmallint AS decimal(11,4)) = 9763215.5639) or (cstring1 like '%a')))) (type: boolean) - Statistics: Num rows: 5461 Data size: 1107444 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9557 Data size: 1937820 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cdouble (type: double), ctimestamp1 (type: timestamp), ctinyint (type: tinyint), cboolean1 (type: boolean), cstring1 (type: string), (- cdouble) (type: double), (cdouble + UDFToDouble(csmallint)) (type: double), ((cdouble + UDFToDouble(csmallint)) % 33.0) (type: double), (- cdouble) (type: double), (UDFToDouble(ctinyint) % cdouble) (type: double), (UDFToShort(ctinyint) % csmallint) (type: smallint), (- cdouble) (type: double), (cbigint * UDFToLong((UDFToShort(ctinyint) % csmallint))) (type: bigint), (9763215.5639 - (cdouble + UDFToDouble(csmallint))) (type: double), (- (- cdouble)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 @@ -82,13 +82,13 @@ STAGE PLANS: native: true projectedOutputColumns: [5, 8, 0, 10, 6, 12, 16, 15, 17, 19, 20, 18, 22, 23, 25] selectExpressions: DoubleColUnaryMinus(col 5) -> 12:double, DoubleColAddDoubleColumn(col 5, col 15)(children: CastLongToDouble(col 1) -> 15:double) -> 16:double, DoubleColModuloDoubleScalar(col 17, val 33.0)(children: DoubleColAddDoubleColumn(col 5, col 15)(children: CastLongToDouble(col 1) -> 15:double) -> 17:double) -> 15:double, DoubleColUnaryMinus(col 5) -> 17:double, DoubleColModuloDoubleColumn(col 18, col 5)(children: CastLongToDouble(col 0) -> 18:double) -> 19:double, LongColModuloLongColumn(col 0, col 1)(children: col 0) -> 20:long, DoubleColUnaryMinus(col 5) -> 18:double, LongColMultiplyLongColumn(col 3, col 21)(children: col 21) -> 22:long, DoubleScalarSubtractDoubleColumn(val 9763215.5639, col 24)(children: DoubleColAddDoubleColumn(col 5, col 23)(children: CastLongToDouble(col 1) -> 23:double) -> 24:double) -> 23:double, DoubleColUnaryMinus(col 24)(children: DoubleColUnaryMinus(col 5) -> 24:double) -> 25:double - Statistics: Num rows: 5461 Data size: 1082056 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9557 Data size: 1893568 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 5461 Data size: 1082056 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9557 Data size: 1893568 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat http://git-wip-us.apache.org/repos/asf/hive/blob/20b84523/ql/src/test/results/clientpositive/llap/vectorization_17.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/vectorization_17.q.out b/ql/src/test/results/clientpositive/llap/vectorization_17.q.out index 1c07962..a66ea36 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_17.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_17.q.out @@ -76,7 +76,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: FilterLongColGreaterLongScalar(col 3, val -23) -> boolean, FilterExprOrExpr(children: FilterDoubleColNotEqualDoubleScalar(col 5, val 988888.0) -> boolean, FilterDecimalColGreaterDecimalScalar(col 12, val -863.257)(children: CastLongToDecimal(col 2) -> 12:decimal(13,3)) -> boolean) -> boolean, FilterExprOrExpr(children: FilterLongColGreaterEqualLongScalar(col 0, val 33) -> boolean, FilterLongColGreaterEqualLongColumn(col 1, col 3)(children: col 1) -> boolean, FilterDoubleColEqualDoubleColumn(col 4, col 5)(children: col 4) -> boolean) -> boolean) -> boolean predicate: ((cbigint > -23) and ((cdouble <> 988888.0) or (CAST( cint AS decimal(13,3)) > -863.257)) and ((ctinyint >= 33) or (UDFToLong(csmallint) >= cbigint) or (UDFToDouble(cfloat) = cdouble))) (type: boolean) - Statistics: Num rows: 4778 Data size: 640688 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4096 Data size: 549274 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cfloat (type: float), cstring1 (type: string), cint (type: int), ctimestamp1 (type: timestamp), cdouble (type: double), cbigint (type: bigint), (UDFToDouble(cfloat) / UDFToDouble(ctinyint)) (type: double), (UDFToLong(cint) % cbigint) (type: bigint), (- cdouble) (type: double), (cdouble + (UDFToDouble(cfloat) / UDFToDouble(ctinyint))) (type: double), (cdouble / UDFToDouble(cint)) (type: double), (- (- cdouble)) (type: double), (9763215.5639 % CAST( cbigint AS decimal(19,0))) (type: decimal(11,4)), (2563.58 + (- (- cdouble))) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 @@ -85,7 +85,7 @@ STAGE PLANS: native: true projectedOutputColumns: [4, 6, 2, 8, 5, 3, 14, 15, 13, 16, 18, 19, 21, 17] selectExpressions: DoubleColDivideDoubleColumn(col 4, col 13)(children: col 4, CastLongToDouble(col 0) -> 13:double) -> 14:double, LongColModuloLongColumn(col 2, col 3)(children: col 2) -> 15:long, DoubleColUnaryMinus(col 5) -> 13:double, DoubleColAddDoubleColumn(col 5, col 17)(children: DoubleColDivideDoubleColumn(col 4, col 16)(children: col 4, CastLongToDouble(col 0) -> 16:double) -> 17:double) -> 16:double, DoubleColDivideDoubleColumn(col 5, col 17)(children: CastLongToDouble(col 2) -> 17:double) -> 18:double, DoubleColUnaryMinus(col 17)(children: DoubleColUnaryMinus(col 5) -> 17:double) -> 19:double, DecimalScalarModuloDecimalColumn(val 9763215.5639, col 20)(children: CastLongToDecimal(col 3) -> 20:decimal(19,0)) -> 21:decimal(11,4), DoubleScalarAddDoubleColumn(val 2563.58, col 22)(children: DoubleColUnaryMinus(col 17)(children: DoubleColUnaryMinus(col 5) -> 17:double) -> 22:double) -> 17:double - Statistics: Num rows: 4778 Data size: 1414848 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4096 Data size: 1212930 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col5 (type: bigint), _col0 (type: float) sort order: ++ @@ -95,7 +95,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumns: [6, 2, 8, 5, 14, 15, 13, 16, 18, 19, 21, 17] - Statistics: Num rows: 4778 Data size: 1414848 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4096 Data size: 1212930 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: timestamp), _col4 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: decimal(11,4)), _col13 (type: double) Execution mode: vectorized, llap LLAP IO: all inputs @@ -136,13 +136,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [1, 2, 3, 4, 5, 0, 6, 7, 8, 9, 10, 11, 12, 13] - Statistics: Num rows: 4778 Data size: 1414848 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4096 Data size: 1212930 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 4778 Data size: 1414848 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4096 Data size: 1212930 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat http://git-wip-us.apache.org/repos/asf/hive/blob/20b84523/ql/src/test/results/clientpositive/llap/vectorization_7.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/vectorization_7.q.out b/ql/src/test/results/clientpositive/llap/vectorization_7.q.out index 6c32ccf..9e13ea6 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_7.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_7.q.out @@ -82,7 +82,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: FilterLongColNotEqualLongScalar(col 0, val 0) -> boolean, FilterExprOrExpr(children: FilterDoubleColLessEqualDoubleScalar(col 12, val 0.0)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean, FilterLongColEqualLongColumn(col 0, col 2)(children: col 0) -> boolean, FilterStringColLikeStringScalar(col 7, pattern ss) -> boolean) -> boolean, FilterExprOrExpr(children: FilterDoubleScalarLessDoubleColumn(val 988888.0, col 5) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 12, val -15.0)(children: CastTimestampToDouble(col 9) -> 12:double) -> boolean, FilterDoubleScalarGreaterEqualDoubleColumn(val 3569.0, col 5) -> boolean) -> boolean) -> boolean) -> boolean predicate: ((ctinyint <> 0) and ((UDFToDouble(ctimestamp1) <= 0.0) or (UDFToInteger(ctinyint) = cint) or (cstring2 like 'ss')) and ((988888.0 < cdouble) or ((UDFToDouble(ctimestamp2) > -15.0) and (3569.0 >= cdouble)))) (type: boolean) - Statistics: Num rows: 7281 Data size: 1789382 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5461 Data size: 1342196 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cboolean1 (type: boolean), cbigint (type: bigint), csmallint (type: smallint), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cstring1 (type: string), (cbigint + cbigint) (type: bigint), (UDFToInteger(csmallint) % -257) (type: int), (- csmallint) (type: smallint), (- ctinyint) (type: tinyint), (UDFToInteger((- ctinyint)) + 17) (type: int), (cbigint * UDFToLong((- csmallint))) (type: bigint), (cint % UDFToInteger(csmallint)) (type: int), (- ctinyint) (type: tinyint), ((- ctinyint) % ctinyint) (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 @@ -91,7 +91,7 @@ STAGE PLANS: native: true projectedOutputColumns: [10, 3, 1, 0, 8, 6, 13, 14, 15, 16, 18, 19, 17, 20, 22] selectExpressions: LongColAddLongColumn(col 3, col 3) -> 13:long, LongColModuloLongScalar(col 1, val -257)(children: col 1) -> 14:long, LongColUnaryMinus(col 1) -> 15:long, LongColUnaryMinus(col 0) -> 16:long, LongColAddLongScalar(col 17, val 17)(children: col 17) -> 18:long, LongColMultiplyLongColumn(col 3, col 17)(children: col 17) -> 19:long, LongColModuloLongColumn(col 2, col 1)(children: col 1) -> 17:long, LongColUnaryMinus(col 0) -> 20:long, LongColModuloLongColumn(col 21, col 0)(children: LongColUnaryMinus(col 0) -> 21:long) -> 22:long - Statistics: Num rows: 7281 Data size: 1231410 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5461 Data size: 923616 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: boolean), _col1 (type: bigint), _col2 (type: smallint), _col3 (type: tinyint), _col4 (type: timestamp), _col5 (type: string), _col6 (type: bigint), _col7 (type: int), _col8 (type: smallint), _col9 (type: tinyint), _col10 (type: int), _col11 (type: bigint), _col12 (type: int), _col13 (type: tinyint), _col14 (type: tinyint) sort order: +++++++++++++++ @@ -101,7 +101,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumns: [] - Statistics: Num rows: 7281 Data size: 1231410 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5461 Data size: 923616 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap LLAP IO: all inputs @@ -142,7 +142,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 9, 14] - Statistics: Num rows: 7281 Data size: 1231410 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5461 Data size: 923616 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 25 Limit Vectorization: @@ -331,7 +331,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: FilterLongColNotEqualLongScalar(col 0, val 0) -> boolean, FilterExprOrExpr(children: FilterDoubleColLessEqualDoubleScalar(col 12, val 0.0)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean, FilterLongColEqualLongColumn(col 0, col 2)(children: col 0) -> boolean, FilterStringColLikeStringScalar(col 7, pattern ss) -> boolean) -> boolean, FilterExprOrExpr(children: FilterDoubleScalarLessDoubleColumn(val 988888.0, col 5) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 12, val 7.6850000000000005)(children: CastTimestampToDouble(col 9) -> 12:double) -> boolean, FilterDoubleScalarGreaterEqualDoubleColumn(val 3569.0, col 5) -> boolean) -> boolean) -> boolean) -> boolean predicate: ((ctinyint <> 0) and ((UDFToDouble(ctimestamp1) <= 0.0) or (UDFToInteger(ctinyint) = cint) or (cstring2 like 'ss')) and ((988888.0 < cdouble) or ((UDFToDouble(ctimestamp2) > 7.6850000000000005) and (3569.0 >= cdouble)))) (type: boolean) - Statistics: Num rows: 7281 Data size: 1789382 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5461 Data size: 1342196 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cboolean1 (type: boolean), cbigint (type: bigint), csmallint (type: smallint), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cstring1 (type: string), (cbigint + cbigint) (type: bigint), (UDFToInteger(csmallint) % -257) (type: int), (- csmallint) (type: smallint), (- ctinyint) (type: tinyint), (UDFToInteger((- ctinyint)) + 17) (type: int), (cbigint * UDFToLong((- csmallint))) (type: bigint), (cint % UDFToInteger(csmallint)) (type: int), (- ctinyint) (type: tinyint), ((- ctinyint) % ctinyint) (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 @@ -340,7 +340,7 @@ STAGE PLANS: native: true projectedOutputColumns: [10, 3, 1, 0, 8, 6, 13, 14, 15, 16, 18, 19, 17, 20, 22] selectExpressions: LongColAddLongColumn(col 3, col 3) -> 13:long, LongColModuloLongScalar(col 1, val -257)(children: col 1) -> 14:long, LongColUnaryMinus(col 1) -> 15:long, LongColUnaryMinus(col 0) -> 16:long, LongColAddLongScalar(col 17, val 17)(children: col 17) -> 18:long, LongColMultiplyLongColumn(col 3, col 17)(children: col 17) -> 19:long, LongColModuloLongColumn(col 2, col 1)(children: col 1) -> 17:long, LongColUnaryMinus(col 0) -> 20:long, LongColModuloLongColumn(col 21, col 0)(children: LongColUnaryMinus(col 0) -> 21:long) -> 22:long - Statistics: Num rows: 7281 Data size: 1231410 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5461 Data size: 923616 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: boolean), _col1 (type: bigint), _col2 (type: smallint), _col3 (type: tinyint), _col4 (type: timestamp), _col5 (type: string), _col6 (type: bigint), _col7 (type: int), _col8 (type: smallint), _col9 (type: tinyint), _col10 (type: int), _col11 (type: bigint), _col12 (type: int), _col13 (type: tinyint), _col14 (type: tinyint) sort order: +++++++++++++++ @@ -348,7 +348,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 7281 Data size: 1231410 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5461 Data size: 923616 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap LLAP IO: all inputs @@ -377,7 +377,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 9, 14] - Statistics: Num rows: 7281 Data size: 1231410 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5461 Data size: 923616 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 25 Limit Vectorization: http://git-wip-us.apache.org/repos/asf/hive/blob/20b84523/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out b/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out index 66764cf..fe4a30f 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out @@ -622,7 +622,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterTimestampColEqualTimestampColumn(col 8, col 9) -> boolean, FilterDoubleScalarEqualDoubleColumn(val 762.0, col 4) -> boolean, FilterStringGroupColEqualStringScalar(col 6, val ss) -> boolean, FilterExprAndExpr(children: FilterLongColLessEqualLongColumn(col 1, col 3)(children: col 1) -> boolean, FilterLongScalarEqualLongColumn(val 1, col 11) -> boolean) -> boolean, FilterExprAndExpr(children: SelectColumnIsNotNull(col 10) -> boolean, SelectColumnIsNotNull(col 9) -> boolean, FilterStringGroupColGreaterStringScalar(col 7, val a) -> boolean) -> boolean) -> boolean predicate: ((ctimestamp1 = ctimestamp2) or (762 = cfloat) or (cstring1 = 'ss') or ((UDFToLong(csmallint) <= cbigint) and (1 = cboolean2)) or (cboolean1 is not null and ctimestamp2 is not null and (cstring2 > 'a'))) (type: boolean) - Statistics: Num rows: 12288 Data size: 3093170 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11346 Data size: 2856120 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cdouble (type: double) outputColumnNames: ctinyint, csmallint, cint, cbigint, cdouble @@ -630,7 +630,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2, 3, 5] - Statistics: Num rows: 12288 Data size: 3093170 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11346 Data size: 2856120 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: var_pop(cbigint), count(), max(ctinyint), stddev_pop(csmallint), max(cint), stddev_samp(cdouble), count(ctinyint), avg(ctinyint) Group By Vectorization: @@ -3206,7 +3206,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColLessDoubleColumn(col 5, col 12)(children: CastLongToDouble(col 1) -> 12:double) -> boolean, FilterLongColEqualLongColumn(col 11, col 10) -> boolean, FilterDecimalColLessEqualDecimalScalar(col 13, val -863.257)(children: CastLongToDecimal(col 3) -> 13:decimal(22,3)) -> boolean) -> boolean, FilterExprAndExpr(children: FilterLongColGreaterEqualLongScalar(col 2, val -257) -> boolean, SelectColumnIsNotNull(col 6) -> boolean, FilterLongColGreaterEqualLongScalar(col 10, val 1) -> boolean) -> boolean, FilterStringColRegExpStringScalar(col 7, pattern b) -> boolean, FilterExprAndExpr(children: FilterLongColGreaterEqualLongColumn(col 1, col 0)(children: col 0) -> boolean, SelectColumnIsNull(col 9) -> boolean) -> boolean) -> boolean, SelectColumnIsNotNull(col 10) -> boolean) -> boolean predicate: ((((cdouble < UDFToDouble(csmallint)) and (cboolean2 = cboolean1) and (CAST( cbigint AS decimal(22,3)) <= -863.257)) or ((cint >= -257) and cstring1 is not null and (cboolean1 >= 1)) or cstring2 regexp 'b' or ((csmallint >= UDFToShort(ctinyint)) and ctimestamp2 is null)) and cboolean1 is not null) (type: boolean) - Statistics: Num rows: 7845 Data size: 1661020 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7153 Data size: 1514550 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cboolean1 (type: boolean) outputColumnNames: ctinyint, csmallint, cint, cbigint, cfloat, cdouble, cboolean1 @@ -3214,7 +3214,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 10] - Statistics: Num rows: 7845 Data size: 1661020 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7153 Data size: 1514550 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(cfloat), sum(cbigint), var_samp(cint), avg(cdouble), min(cbigint), var_pop(cbigint), sum(cint), stddev_samp(ctinyint), stddev_pop(csmallint), avg(cint) Group By Vectorization: http://git-wip-us.apache.org/repos/asf/hive/blob/20b84523/ql/src/test/results/clientpositive/llap/vectorized_nested_mapjoin.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/vectorized_nested_mapjoin.q.out b/ql/src/test/results/clientpositive/llap/vectorized_nested_mapjoin.q.out index 2a95065..eb6eabe 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_nested_mapjoin.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_nested_mapjoin.q.out @@ -40,11 +40,11 @@ STAGE PLANS: outputColumnNames: _col2, _col3 input vertices: 1 Map 3 - Statistics: Num rows: 661228 Data size: 7913928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 585044 Data size: 7002120 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: smallint), _col3 (type: double) outputColumnNames: _col0, _col1 - Statistics: Num rows: 661228 Data size: 7913928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 585044 Data size: 7002120 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -54,7 +54,7 @@ STAGE PLANS: outputColumnNames: _col1 input vertices: 1 Map 4 - Statistics: Num rows: 1452263 Data size: 11604232 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1284939 Data size: 10267240 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col1) mode: hash @@ -81,16 +81,16 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 146796 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (ctinyint is not null and csmallint is not null) (type: boolean) - Statistics: Num rows: 6848 Data size: 81820 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6059 Data size: 72396 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctinyint (type: tinyint), csmallint (type: smallint), cdouble (type: double) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6848 Data size: 81820 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6059 Data size: 72396 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 6848 Data size: 81820 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6059 Data size: 72396 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: smallint), _col2 (type: double) Execution mode: vectorized, llap LLAP IO: all inputs http://git-wip-us.apache.org/repos/asf/hive/blob/20b84523/ql/src/test/results/clientpositive/perf/query23.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/query23.q.out b/ql/src/test/results/clientpositive/perf/query23.q.out index dde707d..5925869 100644 --- a/ql/src/test/results/clientpositive/perf/query23.q.out +++ b/ql/src/test/results/clientpositive/perf/query23.q.out @@ -1,5 +1,5 @@ -Warning: Shuffle Join MERGEJOIN[367][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 25' is a cross product Warning: Shuffle Join MERGEJOIN[369][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 30' is a cross product +Warning: Shuffle Join MERGEJOIN[367][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 25' is a cross product PREHOOK: query: explain with frequent_ss_items as (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt http://git-wip-us.apache.org/repos/asf/hive/blob/20b84523/ql/src/test/results/clientpositive/spark/vectorization_10.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/vectorization_10.q.out b/ql/src/test/results/clientpositive/spark/vectorization_10.q.out index 1f95357..ed0319d 100644 --- a/ql/src/test/results/clientpositive/spark/vectorization_10.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorization_10.q.out @@ -73,7 +73,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterStringGroupColLessEqualStringScalar(col 7, val 10) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleColumn(col 12, col 5)(children: CastLongToDouble(col 0) -> 12:double) -> boolean, FilterDecimalScalarGreaterEqualDecimalColumn(val -5638.15, col 13)(children: CastLongToDecimal(col 0) -> 13:decimal(6,2)) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 5, val 6981.0) -> boolean, FilterExprOrExpr(children: FilterDecimalColEqualDecimalScalar(col 14, val 9763215.5639)(children: CastLongToDecimal(col 1) -> 14:decimal(11,4)) -> boolean, FilterStringColLikeStringScalar(col 6, pattern %a) -> boolean) -> boolean) -> boolean) -> boolean predicate: ((cstring2 <= '10') or ((UDFToDouble(ctinyint) > cdouble) and (-5638.15 >= CAST( ctinyint AS decimal(6,2)))) or ((cdouble > 6981.0) and ((CAST( csmallint AS decimal(11,4)) = 9763215.5639) or (cstring1 like '%a')))) (type: boolean) - Statistics: Num rows: 5461 Data size: 1174134 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9557 Data size: 2054789 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cdouble (type: double), ctimestamp1 (type: timestamp), ctinyint (type: tinyint), cboolean1 (type: boolean), cstring1 (type: string), (- cdouble) (type: double), (cdouble + UDFToDouble(csmallint)) (type: double), ((cdouble + UDFToDouble(csmallint)) % 33.0) (type: double), (- cdouble) (type: double), (UDFToDouble(ctinyint) % cdouble) (type: double), (UDFToShort(ctinyint) % csmallint) (type: smallint), (- cdouble) (type: double), (cbigint * UDFToLong((UDFToShort(ctinyint) % csmallint))) (type: bigint), (9763215.5639 - (cdouble + UDFToDouble(csmallint))) (type: double), (- (- cdouble)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 @@ -82,13 +82,13 @@ STAGE PLANS: native: true projectedOutputColumns: [5, 8, 0, 10, 6, 12, 16, 15, 17, 19, 20, 18, 22, 23, 25] selectExpressions: DoubleColUnaryMinus(col 5) -> 12:double, DoubleColAddDoubleColumn(col 5, col 15)(children: CastLongToDouble(col 1) -> 15:double) -> 16:double, DoubleColModuloDoubleScalar(col 17, val 33.0)(children: DoubleColAddDoubleColumn(col 5, col 15)(children: CastLongToDouble(col 1) -> 15:double) -> 17:double) -> 15:double, DoubleColUnaryMinus(col 5) -> 17:double, DoubleColModuloDoubleColumn(col 18, col 5)(children: CastLongToDouble(col 0) -> 18:double) -> 19:double, LongColModuloLongColumn(col 0, col 1)(children: col 0) -> 20:long, DoubleColUnaryMinus(col 5) -> 18:double, LongColMultiplyLongColumn(col 3, col 21)(children: col 21) -> 22:long, DoubleScalarSubtractDoubleColumn(val 9763215.5639, col 24)(children: DoubleColAddDoubleColumn(col 5, col 23)(children: CastLongToDouble(col 1) -> 23:double) -> 24:double) -> 23:double, DoubleColUnaryMinus(col 24)(children: DoubleColUnaryMinus(col 5) -> 24:double) -> 25:double - Statistics: Num rows: 5461 Data size: 1174134 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9557 Data size: 2054789 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 5461 Data size: 1174134 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9557 Data size: 2054789 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat