HIVE-20432: Rewrite BETWEEN to IN for integer types for stats estimation(Vineet Garg, reviewed by Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/e74eb354 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/e74eb354 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/e74eb354 Branch: refs/heads/master Commit: e74eb354f5066e595e64e06cf6fe168e48e9ae77 Parents: 530e040 Author: Vineet Garg <[email protected]> Authored: Fri Sep 7 10:22:05 2018 -0700 Committer: Vineet Garg <[email protected]> Committed: Fri Sep 7 10:22:05 2018 -0700 ---------------------------------------------------------------------- packaging/src/main/assembly/src.xml | 1 + .../stats/annotation/StatsRulesProcFactory.java | 78 ++++- .../hadoop/hive/ql/plan/ExprNodeDescUtils.java | 17 +- ql/src/test/results/clientpositive/join46.q.out | 16 +- .../llap/orc_predicate_pushdown.q.out | 80 ++--- .../llap/parquet_predicate_pushdown.q.out | 80 ++--- .../clientpositive/llap/subquery_select.q.out | 4 +- .../test/results/clientpositive/mapjoin46.q.out | 12 +- .../clientpositive/perf/spark/query10.q.out | 304 +++++++++-------- .../clientpositive/perf/spark/query22.q.out | 116 +++---- .../clientpositive/perf/spark/query25.q.out | 204 ++++++------ .../clientpositive/perf/spark/query28.q.out | 72 ++--- .../clientpositive/perf/spark/query29.q.out | 96 +++--- .../clientpositive/perf/spark/query34.q.out | 12 +- .../clientpositive/perf/spark/query38.q.out | 315 +++++++++--------- .../clientpositive/perf/spark/query51.q.out | 280 ++++++++-------- .../clientpositive/perf/spark/query59.q.out | 12 +- .../clientpositive/perf/spark/query65.q.out | 274 ++++++++-------- .../clientpositive/perf/spark/query67.q.out | 6 +- .../clientpositive/perf/spark/query68.q.out | 6 +- .../clientpositive/perf/spark/query69.q.out | 306 +++++++++--------- .../clientpositive/perf/spark/query70.q.out | 12 +- .../clientpositive/perf/spark/query73.q.out | 12 +- .../clientpositive/perf/spark/query79.q.out | 4 +- .../clientpositive/perf/spark/query86.q.out | 102 +++--- .../clientpositive/perf/spark/query87.q.out | 275 +++++++--------- .../clientpositive/perf/spark/query9.q.out | 60 ++-- .../clientpositive/perf/spark/query90.q.out | 322 ++++++++++--------- .../clientpositive/perf/spark/query97.q.out | 194 ++++++----- .../clientpositive/perf/spark/query99.q.out | 174 +++++----- .../clientpositive/perf/tez/query10.q.out | 10 +- .../clientpositive/perf/tez/query14.q.out | 30 +- .../clientpositive/perf/tez/query22.q.out | 4 +- .../clientpositive/perf/tez/query25.q.out | 10 +- .../clientpositive/perf/tez/query28.q.out | 48 +-- .../clientpositive/perf/tez/query29.q.out | 4 +- .../clientpositive/perf/tez/query34.q.out | 8 +- .../clientpositive/perf/tez/query38.q.out | 10 +- .../clientpositive/perf/tez/query51.q.out | 8 +- .../clientpositive/perf/tez/query59.q.out | 8 +- .../clientpositive/perf/tez/query65.q.out | 8 +- .../clientpositive/perf/tez/query67.q.out | 6 +- .../clientpositive/perf/tez/query68.q.out | 6 +- .../clientpositive/perf/tez/query69.q.out | 10 +- .../clientpositive/perf/tez/query70.q.out | 8 +- .../clientpositive/perf/tez/query73.q.out | 8 +- .../clientpositive/perf/tez/query79.q.out | 6 +- .../clientpositive/perf/tez/query86.q.out | 6 +- .../clientpositive/perf/tez/query87.q.out | 10 +- .../clientpositive/perf/tez/query9.q.out | 60 ++-- .../clientpositive/perf/tez/query90.q.out | 12 +- .../clientpositive/perf/tez/query97.q.out | 8 +- .../clientpositive/perf/tez/query99.q.out | 6 +- .../results/clientpositive/smb_mapjoin_46.q.out | 8 +- .../results/clientpositive/smb_mapjoin_47.q.out | 20 +- .../spark_dynamic_partition_pruning_3.q.out | 10 +- .../clientpositive/spark/subquery_select.q.out | 6 +- 57 files changed, 1898 insertions(+), 1886 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/e74eb354/packaging/src/main/assembly/src.xml ---------------------------------------------------------------------- diff --git a/packaging/src/main/assembly/src.xml b/packaging/src/main/assembly/src.xml index 6b6d9db..021b23e 100644 --- a/packaging/src/main/assembly/src.xml +++ b/packaging/src/main/assembly/src.xml @@ -70,6 +70,7 @@ <include>dev-support/**/*</include> <include>druid-handler/**/*</include> <include>jdbc-handler/**/*</include> + <include>kafka-handler/**/*</include> <include>find-bugs/**/*</include> <include>hbase-handler/**/*</include> <include>hcatalog/**/*</include> http://git-wip-us.apache.org/repos/asf/hive/blob/e74eb354/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java index 9cd6812..3788ef9 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java @@ -113,6 +113,7 @@ import org.slf4j.LoggerFactory; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.collect.Sets; +import scala.math.Numeric; public class StatsRulesProcFactory { @@ -687,6 +688,69 @@ public class StatsRulesProcFactory { } } + private ExprNodeDesc rewriteBetweenToIn(final ExprNodeDesc comparisonExpression, final ExprNodeDesc leftExpression, + final ExprNodeDesc rightExpression, boolean invert) { + // difference in BETWEEN values could be millions, since for each value a new ExprNodeConstantDesc is created + // we should limit the rewrite to avoid taking too much memory + final int REWRITE_THRESHOLD = 100; + + boolean shouldRewrite = false; + long startVal = 0, endVal = 0; + + if (ExprNodeDescUtils.isIntegerType(comparisonExpression) + && leftExpression instanceof ExprNodeConstantDesc + && rightExpression instanceof ExprNodeConstantDesc) { + Object leftValue = ((ExprNodeConstantDesc) leftExpression).getValue(); + Object rightValue = ((ExprNodeConstantDesc) rightExpression).getValue(); + + startVal = ((Number)leftValue).longValue(); + endVal = ((Number)rightValue).longValue(); + + // BETWEEN could be (10,0) + if(startVal > endVal) { + Long tmpVal = startVal; + startVal = endVal; + endVal = tmpVal; + } + + if ((endVal - startVal) <= REWRITE_THRESHOLD) { + shouldRewrite = true; + } + } + + if (shouldRewrite) { + + List<ExprNodeDesc> constantExprs = new ArrayList<>(); + constantExprs.add(comparisonExpression); + //generate list of contiguous integers + for (long i = startVal; i <= endVal; i++) { + ExprNodeConstantDesc constExpr = new ExprNodeConstantDesc(comparisonExpression.getTypeInfo(), i); + constantExprs.add(constExpr); + } + ExprNodeDesc newExpression = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, + new GenericUDFIn(), constantExprs); + return newExpression; + } else { + // We transform the BETWEEN clause to AND clause (with NOT on top in invert is true). + // This is more straightforward, as the evaluateExpression method will deal with + // generating the final row count relying on the basic comparator evaluation methods + final ExprNodeDesc leftComparator = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, + new GenericUDFOPEqualOrGreaterThan(), + Lists.newArrayList(comparisonExpression, leftExpression)); + final ExprNodeDesc rightComparator = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, + new GenericUDFOPEqualOrLessThan(), + Lists.newArrayList(comparisonExpression, rightExpression)); + ExprNodeDesc newExpression = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, + new GenericUDFOPAnd(), + Lists.newArrayList(leftComparator, rightComparator)); + if (invert) { + newExpression = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, + new GenericUDFOPNot(), Lists.newArrayList(newExpression)); + } + return newExpression; + } + } + private long evaluateBetweenExpr(Statistics stats, ExprNodeDesc pred, long currNumRows, AnnotateStatsProcCtx aspCtx, List<String> neededCols, Operator<?> op) throws SemanticException { final ExprNodeGenericFuncDesc fd = (ExprNodeGenericFuncDesc) pred; @@ -702,19 +766,7 @@ public class StatsRulesProcFactory { return currNumRows; } - // We transform the BETWEEN clause to AND clause (with NOT on top in invert is true). - // This is more straightforward, as the evaluateExpression method will deal with - // generating the final row count relying on the basic comparator evaluation methods - final ExprNodeDesc leftComparator = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, - new GenericUDFOPEqualOrGreaterThan(), Lists.newArrayList(comparisonExpression, leftExpression)); - final ExprNodeDesc rightComparator = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, - new GenericUDFOPEqualOrLessThan(), Lists.newArrayList(comparisonExpression, rightExpression)); - ExprNodeDesc newExpression = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, - new GenericUDFOPAnd(), Lists.newArrayList(leftComparator, rightComparator)); - if (invert) { - newExpression = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, - new GenericUDFOPNot(), Lists.newArrayList(newExpression)); - } + ExprNodeDesc newExpression = rewriteBetweenToIn(comparisonExpression, leftExpression, rightExpression, invert); return evaluateExpression(stats, newExpression, aspCtx, neededCols, op, currNumRows); } http://git-wip-us.apache.org/repos/asf/hive/blob/e74eb354/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java index 5275561..66c1025 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java @@ -938,7 +938,7 @@ public class ExprNodeDescUtils { return true; } - // Given an expression this method figures out of the type for the expression belongs to string group + // Given an expression this method figures out if the type for the expression belongs to string group // e.g. (String, Char, Varchar etc) public static boolean isStringType(ExprNodeDesc expr) { TypeInfo typeInfo = expr.getTypeInfo(); @@ -951,4 +951,19 @@ public class ExprNodeDescUtils { } return false; } + // Given an expression this method figures out if the type for the expression is integer + // i.e. INT, SHORT, TINYINT (BYTE) or LONG + public static boolean isIntegerType(ExprNodeDesc expr) { + TypeInfo typeInfo = expr.getTypeInfo(); + if (typeInfo.getCategory() == ObjectInspector.Category.PRIMITIVE) { + PrimitiveObjectInspector.PrimitiveCategory primitiveCategory = ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory(); + if(primitiveCategory == PrimitiveCategory.INT + || primitiveCategory == PrimitiveCategory.SHORT + || primitiveCategory == PrimitiveCategory.BYTE + || primitiveCategory == PrimitiveCategory.LONG){ + return true; + } + } + return false; + } } http://git-wip-us.apache.org/repos/asf/hive/blob/e74eb354/ql/src/test/results/clientpositive/join46.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/join46.q.out b/ql/src/test/results/clientpositive/join46.q.out index c192194..dbaddcc 100644 --- a/ql/src/test/results/clientpositive/join46.q.out +++ b/ql/src/test/results/clientpositive/join46.q.out @@ -170,16 +170,16 @@ STAGE PLANS: Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key BETWEEN 100 AND 102 (type: boolean) - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: string) Reduce Operator Tree: Join Operator @@ -269,14 +269,14 @@ STAGE PLANS: Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key BETWEEN 100 AND 102 (type: boolean) - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Reduce Operator Tree: Join Operator @@ -289,10 +289,10 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 6 Data size: 116 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 116 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat http://git-wip-us.apache.org/repos/asf/hive/blob/e74eb354/ql/src/test/results/clientpositive/llap/orc_predicate_pushdown.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/orc_predicate_pushdown.q.out b/ql/src/test/results/clientpositive/llap/orc_predicate_pushdown.q.out index af7b0ba..3379268 100644 --- a/ql/src/test/results/clientpositive/llap/orc_predicate_pushdown.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_predicate_pushdown.q.out @@ -592,15 +592,15 @@ STAGE PLANS: Statistics: Num rows: 1049 Data size: 105941 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((not (t) IN (-1, -2, -3)) and (s like 'bob%') and s is not null and t BETWEEN 25 AND 30) (type: boolean) - Statistics: Num rows: 56 Data size: 5656 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 24 Data size: 2424 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: tinyint), s (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 56 Data size: 5656 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 24 Data size: 2424 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: string) sort order: ++ - Statistics: Num rows: 56 Data size: 5656 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 24 Data size: 2424 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Reducer 2 @@ -609,10 +609,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 56 Data size: 5656 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 24 Data size: 2424 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 56 Data size: 5656 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 24 Data size: 2424 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -658,15 +658,15 @@ STAGE PLANS: Statistics: Num rows: 1049 Data size: 105941 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((not (t) IN (-1, -2, -3)) and (s like 'bob%') and s is not null and t BETWEEN 25 AND 30) (type: boolean) - Statistics: Num rows: 56 Data size: 5656 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 24 Data size: 2424 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: tinyint), s (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 56 Data size: 5656 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 24 Data size: 2424 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: string) sort order: ++ - Statistics: Num rows: 56 Data size: 5656 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 24 Data size: 2424 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Reducer 2 @@ -675,10 +675,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 56 Data size: 5656 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 24 Data size: 2424 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 56 Data size: 5656 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 24 Data size: 2424 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -789,15 +789,15 @@ STAGE PLANS: Statistics: Num rows: 1049 Data size: 118521 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((d < 12.0D) and (d >= 10.0D) and (not (s like '%car%')) and (s like '%son') and (t > 0Y) and UDFToInteger(si) BETWEEN 300 AND 400) (type: boolean) - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 565 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: tinyint), si (type: smallint), d (type: double), s (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 565 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string) sort order: - - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 565 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double) Execution mode: llap @@ -808,13 +808,13 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 565 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 3 - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -868,15 +868,15 @@ STAGE PLANS: Statistics: Num rows: 1049 Data size: 118521 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((d < 12.0D) and (d >= 10.0D) and (not (s like '%car%')) and (s like '%son') and (t > 0Y) and UDFToInteger(si) BETWEEN 300 AND 400) (type: boolean) - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 565 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: tinyint), si (type: smallint), d (type: double), s (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 565 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string) sort order: - - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 565 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double) Execution mode: llap @@ -887,13 +887,13 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 565 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 3 - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1013,15 +1013,15 @@ STAGE PLANS: Statistics: Num rows: 1049 Data size: 118521 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((d < 12.0D) and (d >= 10) and (not (s like '%car%')) and (s like '%son') and (t <> 101Y) and (t > 0Y) and (t > 10Y) and si BETWEEN 300 AND 400) (type: boolean) - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: tinyint), si (type: smallint), d (type: double), s (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string) sort order: - - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double) Execution mode: llap @@ -1032,14 +1032,14 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 3 - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string) sort order: - - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double) Reducer 3 @@ -1048,13 +1048,13 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 3 - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1113,15 +1113,15 @@ STAGE PLANS: Statistics: Num rows: 1049 Data size: 118521 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((d < 12.0D) and (d >= 10) and (not (s like '%car%')) and (s like '%son') and (t <> 101Y) and (t > 0Y) and (t > 10Y) and si BETWEEN 300 AND 400) (type: boolean) - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: tinyint), si (type: smallint), d (type: double), s (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string) sort order: - - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double) Execution mode: llap @@ -1132,14 +1132,14 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 3 - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string) sort order: - - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double) Reducer 3 @@ -1148,13 +1148,13 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 3 - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat http://git-wip-us.apache.org/repos/asf/hive/blob/e74eb354/ql/src/test/results/clientpositive/llap/parquet_predicate_pushdown.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/parquet_predicate_pushdown.q.out b/ql/src/test/results/clientpositive/llap/parquet_predicate_pushdown.q.out index 84c5b2f..aca1115 100644 --- a/ql/src/test/results/clientpositive/llap/parquet_predicate_pushdown.q.out +++ b/ql/src/test/results/clientpositive/llap/parquet_predicate_pushdown.q.out @@ -530,15 +530,15 @@ STAGE PLANS: Statistics: Num rows: 1049 Data size: 105941 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((not (t) IN (-1, -2, -3)) and (s like 'bob%') and s is not null and t BETWEEN 25 AND 30) (type: boolean) - Statistics: Num rows: 56 Data size: 5656 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 24 Data size: 2424 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: tinyint), s (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 56 Data size: 5656 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 24 Data size: 2424 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: string) sort order: ++ - Statistics: Num rows: 56 Data size: 5656 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 24 Data size: 2424 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs (cache only) Reducer 2 @@ -547,10 +547,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 56 Data size: 5656 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 24 Data size: 2424 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 56 Data size: 5656 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 24 Data size: 2424 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -596,15 +596,15 @@ STAGE PLANS: Statistics: Num rows: 1049 Data size: 105941 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((not (t) IN (-1, -2, -3)) and (s like 'bob%') and s is not null and t BETWEEN 25 AND 30) (type: boolean) - Statistics: Num rows: 56 Data size: 5656 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 24 Data size: 2424 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: tinyint), s (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 56 Data size: 5656 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 24 Data size: 2424 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: string) sort order: ++ - Statistics: Num rows: 56 Data size: 5656 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 24 Data size: 2424 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs (cache only) Reducer 2 @@ -613,10 +613,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 56 Data size: 5656 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 24 Data size: 2424 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 56 Data size: 5656 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 24 Data size: 2424 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -727,15 +727,15 @@ STAGE PLANS: Statistics: Num rows: 1049 Data size: 118521 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((d < 12.0D) and (d >= 10.0D) and (not (s like '%car%')) and (s like '%son') and (t > 0Y) and UDFToInteger(si) BETWEEN 300 AND 400) (type: boolean) - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 565 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: tinyint), si (type: smallint), d (type: double), s (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 565 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string) sort order: - - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 565 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double) Execution mode: llap @@ -746,13 +746,13 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 565 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 3 - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -806,15 +806,15 @@ STAGE PLANS: Statistics: Num rows: 1049 Data size: 118521 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((d < 12.0D) and (d >= 10.0D) and (not (s like '%car%')) and (s like '%son') and (t > 0Y) and UDFToInteger(si) BETWEEN 300 AND 400) (type: boolean) - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 565 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: tinyint), si (type: smallint), d (type: double), s (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 565 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string) sort order: - - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 565 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double) Execution mode: llap @@ -825,13 +825,13 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 565 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 3 - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -996,15 +996,15 @@ STAGE PLANS: Statistics: Num rows: 1049 Data size: 118521 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((d < 12.0D) and (d >= 10) and (not (s like '%car%')) and (s like '%son') and (t <> 101Y) and (t > 0Y) and (t > 10Y) and si BETWEEN 300 AND 400) (type: boolean) - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: tinyint), si (type: smallint), d (type: double), s (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string) sort order: - - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double) Execution mode: llap @@ -1015,14 +1015,14 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 3 - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string) sort order: - - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double) Reducer 3 @@ -1031,13 +1031,13 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 3 - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1096,15 +1096,15 @@ STAGE PLANS: Statistics: Num rows: 1049 Data size: 118521 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((d < 12.0D) and (d >= 10) and (not (s like '%car%')) and (s like '%son') and (t <> 101Y) and (t > 0Y) and (t > 10Y) and si BETWEEN 300 AND 400) (type: boolean) - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: tinyint), si (type: smallint), d (type: double), s (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string) sort order: - - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double) Execution mode: llap @@ -1115,14 +1115,14 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 3 - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string) sort order: - - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double) Reducer 3 @@ -1131,13 +1131,13 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 3 - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat http://git-wip-us.apache.org/repos/asf/hive/blob/e74eb354/ql/src/test/results/clientpositive/llap/subquery_select.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/subquery_select.q.out b/ql/src/test/results/clientpositive/llap/subquery_select.q.out index 83a94f8..e035219 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_select.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_select.q.out @@ -3790,9 +3790,9 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: p_size BETWEEN 1 AND 20 (type: boolean) - Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash http://git-wip-us.apache.org/repos/asf/hive/blob/e74eb354/ql/src/test/results/clientpositive/mapjoin46.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/mapjoin46.q.out b/ql/src/test/results/clientpositive/mapjoin46.q.out index a4f067f..febb6c7 100644 --- a/ql/src/test/results/clientpositive/mapjoin46.q.out +++ b/ql/src/test/results/clientpositive/mapjoin46.q.out @@ -166,11 +166,11 @@ STAGE PLANS: Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key BETWEEN 100 AND 102 (type: boolean) - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator filter predicates: 0 {_col0 BETWEEN 100 AND 102} @@ -273,11 +273,11 @@ STAGE PLANS: Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key BETWEEN 100 AND 102 (type: boolean) - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator filter predicates: 0 {_col0 BETWEEN 100 AND 102} @@ -306,10 +306,10 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 6 Data size: 116 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 116 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat http://git-wip-us.apache.org/repos/asf/hive/blob/e74eb354/ql/src/test/results/clientpositive/perf/spark/query10.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/spark/query10.q.out b/ql/src/test/results/clientpositive/perf/spark/query10.q.out index b7faa9a..e85519d 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query10.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query10.q.out @@ -115,96 +115,21 @@ select limit 100 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-3 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-3 - Stage-1 depends on stages: Stage-4 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 10 - Map Operator Tree: - TableScan - alias: date_dim - filterExpr: ((d_year = 2002) and d_moy BETWEEN 4 AND 7 and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((d_year = 2002) and d_date_sk is not null and d_moy BETWEEN 4 AND 7) (type: boolean) - Statistics: Num rows: 4058 Data size: 4540902 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: d_date_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 4058 Data size: 4540902 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-3 - Spark -#### A masked pattern was here #### - Vertices: - Map 13 - Map Operator Tree: - TableScan - alias: date_dim - filterExpr: ((d_year = 2002) and d_moy BETWEEN 4 AND 7 and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((d_year = 2002) and d_date_sk is not null and d_moy BETWEEN 4 AND 7) (type: boolean) - Statistics: Num rows: 4058 Data size: 4540902 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: d_date_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 4058 Data size: 4540902 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-4 - Spark -#### A masked pattern was here #### - Vertices: - Map 16 - Map Operator Tree: - TableScan - alias: date_dim - filterExpr: ((d_year = 2002) and d_moy BETWEEN 4 AND 7 and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((d_year = 2002) and d_date_sk is not null and d_moy BETWEEN 4 AND 7) (type: boolean) - Statistics: Num rows: 4058 Data size: 4540902 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: d_date_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 4058 Data size: 4540902 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Stage: Stage-1 Spark Edges: - Reducer 12 <- Map 11 (GROUP, 169) - Reducer 15 <- Map 14 (GROUP, 336) + Reducer 10 <- Map 11 (PARTITION-LEVEL SORT, 398), Map 9 (PARTITION-LEVEL SORT, 398) + Reducer 13 <- Map 12 (PARTITION-LEVEL SORT, 154), Map 15 (PARTITION-LEVEL SORT, 154) + Reducer 14 <- Reducer 13 (GROUP, 169) + Reducer 17 <- Map 16 (PARTITION-LEVEL SORT, 306), Map 19 (PARTITION-LEVEL SORT, 306) + Reducer 18 <- Reducer 17 (GROUP, 336) Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 855), Map 7 (PARTITION-LEVEL SORT, 855) Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 597), Reducer 2 (PARTITION-LEVEL SORT, 597) - Reducer 4 <- Map 9 (PARTITION-LEVEL SORT, 1009), Reducer 12 (PARTITION-LEVEL SORT, 1009), Reducer 15 (PARTITION-LEVEL SORT, 1009), Reducer 3 (PARTITION-LEVEL SORT, 1009) + Reducer 4 <- Reducer 10 (PARTITION-LEVEL SORT, 1009), Reducer 14 (PARTITION-LEVEL SORT, 1009), Reducer 18 (PARTITION-LEVEL SORT, 1009), Reducer 3 (PARTITION-LEVEL SORT, 1009) Reducer 5 <- Reducer 4 (GROUP, 1009) Reducer 6 <- Reducer 5 (SORT, 1) #### A masked pattern was here #### @@ -232,6 +157,25 @@ STAGE PLANS: Map 11 Map Operator Tree: TableScan + alias: date_dim + filterExpr: ((d_year = 2002) and d_moy BETWEEN 4 AND 7 and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((d_year = 2002) and d_date_sk is not null and d_moy BETWEEN 4 AND 7) (type: boolean) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map 12 + Map Operator Tree: + TableScan alias: web_sales filterExpr: (ws_bill_customer_sk is not null and ws_sold_date_sk is not null) (type: boolean) Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE @@ -242,30 +186,33 @@ STAGE PLANS: expressions: ws_sold_date_sk (type: int), ws_bill_customer_sk (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - input vertices: - 1 Map 13 - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col1 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Execution mode: vectorized + Map 15 + Map Operator Tree: + TableScan + alias: date_dim + filterExpr: ((d_year = 2002) and d_moy BETWEEN 4 AND 7 and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((d_year = 2002) and d_date_sk is not null and d_moy BETWEEN 4 AND 7) (type: boolean) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Local Work: - Map Reduce Local Work - Map 14 + Map 16 Map Operator Tree: TableScan alias: catalog_sales @@ -278,29 +225,32 @@ STAGE PLANS: expressions: cs_sold_date_sk (type: int), cs_ship_customer_sk (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - input vertices: - 1 Map 16 - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col1 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Execution mode: vectorized + Map 19 + Map Operator Tree: + TableScan + alias: date_dim + filterExpr: ((d_year = 2002) and d_moy BETWEEN 4 AND 7 and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((d_year = 2002) and d_date_sk is not null and d_moy BETWEEN 4 AND 7) (type: boolean) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Local Work: - Map Reduce Local Work Map 7 Map Operator Tree: TableScan @@ -353,34 +303,58 @@ STAGE PLANS: expressions: ss_sold_date_sk (type: int), ss_customer_sk (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - input vertices: - 1 Map 10 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reducer 12 + Reducer 10 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Reducer 13 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col1 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + Reducer 14 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -398,7 +372,27 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 79201469 Data size: 10769109250 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean) - Reducer 15 + Reducer 17 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col1 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + Reducer 18 Execution mode: vectorized Reduce Operator Tree: Group By Operator http://git-wip-us.apache.org/repos/asf/hive/blob/e74eb354/ql/src/test/results/clientpositive/perf/spark/query22.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/spark/query22.q.out b/ql/src/test/results/clientpositive/perf/spark/query22.q.out index d0970bb..2079262 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query22.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query22.q.out @@ -50,27 +50,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 5 - Map Operator Tree: - TableScan - alias: date_dim - filterExpr: (d_month_seq BETWEEN 1212 AND 1223 and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) (type: boolean) - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: d_date_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Map 6 + Map 7 Map Operator Tree: TableScan alias: warehouse @@ -94,9 +74,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 11), Map 7 (PARTITION-LEVEL SORT, 11) - Reducer 3 <- Reducer 2 (GROUP, 31) - Reducer 4 <- Reducer 3 (SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 6), Map 6 (PARTITION-LEVEL SORT, 6) + Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 11), Reducer 2 (PARTITION-LEVEL SORT, 11) + Reducer 4 <- Reducer 3 (GROUP, 31) + Reducer 5 <- Reducer 4 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -112,36 +93,33 @@ STAGE PLANS: expressions: inv_date_sk (type: int), inv_item_sk (type: int), inv_warehouse_sk (type: int), inv_quantity_on_hand (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 37584000 Data size: 593821104 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3 - input vertices: - 1 Map 5 - Statistics: Num rows: 41342400 Data size: 653203228 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col3 - input vertices: - 1 Map 6 - Statistics: Num rows: 45476640 Data size: 718523566 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 45476640 Data size: 718523566 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: int) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 37584000 Data size: 593821104 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int) Execution mode: vectorized - Local Work: - Map Reduce Local Work - Map 7 + Map 6 + Map Operator Tree: + TableScan + alias: date_dim + filterExpr: (d_month_seq BETWEEN 1212 AND 1223 and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map 8 Map Operator Tree: TableScan alias: item @@ -162,6 +140,34 @@ STAGE PLANS: value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string) Execution mode: vectorized Reducer 2 + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 41342400 Data size: 653203228 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col3 + input vertices: + 1 Map 7 + Statistics: Num rows: 45476640 Data size: 718523566 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 45476640 Data size: 718523566 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: int) + Reducer 3 Reduce Operator Tree: Join Operator condition map: @@ -183,7 +189,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: bigint) Statistics: Num rows: 250121525 Data size: 3951879695 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: bigint), _col6 (type: bigint) - Reducer 3 + Reducer 4 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -202,7 +208,7 @@ STAGE PLANS: sort order: +++++ Statistics: Num rows: 125060762 Data size: 1975939839 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 - Reducer 4 + Reducer 5 Execution mode: vectorized Reduce Operator Tree: Select Operator
