HIVE-15029: Add logic to estimate stats for BETWEEN operator (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/e2653db3 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/e2653db3 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/e2653db3 Branch: refs/heads/hive-14535 Commit: e2653db377ab7fff34563d348364fd0c92f359c6 Parents: 749e831 Author: Jesus Camacho Rodriguez <[email protected]> Authored: Fri Oct 21 12:30:06 2016 +0100 Committer: Jesus Camacho Rodriguez <[email protected]> Committed: Tue Oct 25 07:13:09 2016 -0400 ---------------------------------------------------------------------- .../stats/annotation/StatsRulesProcFactory.java | 34 +++++- .../clientpositive/llap/explainuser_4.q.out | 78 ++++++------ .../llap/orc_predicate_pushdown.q.out | 20 +-- .../llap/parquet_predicate_pushdown.q.out | 44 +++---- .../llap/tez_dynpart_hashjoin_1.q.out | 120 +++++++++--------- .../llap/tez_vector_dynpart_hashjoin_1.q.out | 122 +++++++++---------- .../llap/vector_between_columns.q.out | 6 +- .../clientpositive/llap/vector_between_in.q.out | 34 +++--- .../results/clientpositive/perf/query12.q.out | 2 +- .../results/clientpositive/perf/query13.q.out | 24 ++-- .../results/clientpositive/perf/query20.q.out | 4 +- .../results/clientpositive/perf/query21.q.out | 8 +- .../results/clientpositive/perf/query22.q.out | 4 +- .../results/clientpositive/perf/query25.q.out | 8 +- .../results/clientpositive/perf/query28.q.out | 36 +++--- .../results/clientpositive/perf/query29.q.out | 4 +- .../results/clientpositive/perf/query32.q.out | 8 +- .../results/clientpositive/perf/query34.q.out | 12 +- .../results/clientpositive/perf/query40.q.out | 8 +- .../results/clientpositive/perf/query48.q.out | 18 +-- .../results/clientpositive/perf/query51.q.out | 8 +- .../results/clientpositive/perf/query54.q.out | 4 +- .../results/clientpositive/perf/query58.q.out | 12 +- .../results/clientpositive/perf/query64.q.out | 8 +- .../results/clientpositive/perf/query65.q.out | 8 +- .../results/clientpositive/perf/query66.q.out | 8 +- .../results/clientpositive/perf/query67.q.out | 4 +- .../results/clientpositive/perf/query68.q.out | 4 +- .../results/clientpositive/perf/query70.q.out | 8 +- .../results/clientpositive/perf/query73.q.out | 12 +- .../results/clientpositive/perf/query79.q.out | 4 +- .../results/clientpositive/perf/query80.q.out | 12 +- .../results/clientpositive/perf/query82.q.out | 14 +-- .../results/clientpositive/perf/query85.q.out | 42 +++---- .../results/clientpositive/perf/query87.q.out | 12 +- .../results/clientpositive/perf/query90.q.out | 16 +-- .../results/clientpositive/perf/query94.q.out | 4 +- .../results/clientpositive/perf/query95.q.out | 4 +- .../results/clientpositive/perf/query97.q.out | 8 +- .../results/clientpositive/perf/query98.q.out | 4 +- .../spark/vector_between_in.q.out | 34 +++--- .../clientpositive/tez/explainanalyze_4.q.out | 80 ++++++------ .../results/clientpositive/udf_between.q.out | 8 +- .../clientpositive/vector_between_columns.q.out | 6 +- 44 files changed, 475 insertions(+), 443 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/e2653db3/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java index ab07fb6..aa1e509 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java @@ -71,6 +71,7 @@ import org.apache.hadoop.hive.ql.plan.Statistics; import org.apache.hadoop.hive.ql.stats.StatsUtils; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual; @@ -89,6 +90,7 @@ import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -354,6 +356,9 @@ public class StatsRulesProcFactory { } else if (udf instanceof GenericUDFIn) { // for IN clause newNumRows = evaluateInExpr(stats, pred, aspCtx, neededCols, fop); + } else if (udf instanceof GenericUDFBetween) { + // for BETWEEN clause + newNumRows = evaluateBetweenExpr(stats, pred, aspCtx, neededCols, fop); } else if (udf instanceof GenericUDFOPNot) { newNumRows = evaluateNotExpr(stats, pred, aspCtx, neededCols, fop); } else if (udf instanceof GenericUDFOPNotNull) { @@ -480,6 +485,32 @@ public class StatsRulesProcFactory { return Math.round( (double)numRows * factor * inFactor); } + private long evaluateBetweenExpr(Statistics stats, ExprNodeDesc pred, AnnotateStatsProcCtx aspCtx, + List<String> neededCols, FilterOperator fop) throws SemanticException, CloneNotSupportedException { + final ExprNodeGenericFuncDesc fd = (ExprNodeGenericFuncDesc) pred; + final boolean invert = Boolean.TRUE.equals( + ((ExprNodeConstantDesc) fd.getChildren().get(0)).getValue()); // boolean invert (not) + final ExprNodeDesc comparisonExpression = fd.getChildren().get(1); // expression + final ExprNodeDesc leftExpression = fd.getChildren().get(2); // left expression + final ExprNodeDesc rightExpression = fd.getChildren().get(3); // right expression + + // We transform the BETWEEN clause to AND clause (with NOT on top in invert is true). + // This is more straightforward, as the evaluateExpression method will deal with + // generating the final row count relying on the basic comparator evaluation methods + final ExprNodeDesc leftComparator = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, + new GenericUDFOPEqualOrGreaterThan(), Lists.newArrayList(comparisonExpression, leftExpression)); + final ExprNodeDesc rightComparator = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, + new GenericUDFOPEqualOrLessThan(), Lists.newArrayList(comparisonExpression, rightExpression)); + ExprNodeDesc newExpression = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, + new GenericUDFOPAnd(), Lists.newArrayList(leftComparator, rightComparator)); + if (invert) { + newExpression = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, + new GenericUDFOPNot(), Lists.newArrayList(newExpression)); + } + + return evaluateExpression(stats, newExpression, aspCtx, neededCols, fop, 0); + } + private long evaluateNotExpr(Statistics stats, ExprNodeDesc pred, AnnotateStatsProcCtx aspCtx, List<String> neededCols, FilterOperator fop) throws CloneNotSupportedException, SemanticException { @@ -866,7 +897,8 @@ public class StatsRulesProcFactory { } else if (udf instanceof GenericUDFOPNull) { return evaluateColEqualsNullExpr(stats, genFunc); } else if (udf instanceof GenericUDFOPAnd || udf instanceof GenericUDFOPOr - || udf instanceof GenericUDFIn || udf instanceof GenericUDFOPNot) { + || udf instanceof GenericUDFIn || udf instanceof GenericUDFBetween + || udf instanceof GenericUDFOPNot) { return evaluateExpression(stats, genFunc, aspCtx, neededCols, fop, evaluatedRowCount); } } http://git-wip-us.apache.org/repos/asf/hive/blob/e2653db3/ql/src/test/results/clientpositive/llap/explainuser_4.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/explainuser_4.q.out b/ql/src/test/results/clientpositive/llap/explainuser_4.q.out index 0978ddd..e83d6d8 100644 --- a/ql/src/test/results/clientpositive/llap/explainuser_4.q.out +++ b/ql/src/test/results/clientpositive/llap/explainuser_4.q.out @@ -28,27 +28,27 @@ Stage-0 Stage-1 Reducer 3 llap File Output Operator [FS_12] - Select Operator [SEL_11] (rows=9759 width=620) + Select Operator [SEL_11] (rows=2166 width=620) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23"] <-Reducer 2 [SIMPLE_EDGE] llap SHUFFLE [RS_10] - Merge Join Operator [MERGEJOIN_17] (rows=9759 width=620) + Merge Join Operator [MERGEJOIN_17] (rows=2166 width=620) Conds:RS_6._col2=RS_7._col2(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23"] <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_6] PartitionCols:_col2 - Select Operator [SEL_2] (rows=6144 width=251) + Select Operator [SEL_2] (rows=1365 width=251) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] - Filter Operator [FIL_15] (rows=6144 width=251) + Filter Operator [FIL_15] (rows=1365 width=251) predicate:cint BETWEEN 1000000 AND 3000000 TableScan [TS_0] (rows=12288 width=251) default@alltypesorc,a,Tbl:COMPLETE,Col:COMPLETE,Output:["ctinyint","csmallint","cint","cbigint","cfloat","cdouble","cstring1","cstring2","ctimestamp1","ctimestamp2","cboolean1","cboolean2"] <-Map 4 [SIMPLE_EDGE] llap SHUFFLE [RS_7] PartitionCols:_col2 - Select Operator [SEL_5] (rows=4586 width=251) + Select Operator [SEL_5] (rows=1019 width=251) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] - Filter Operator [FIL_16] (rows=4586 width=251) + Filter Operator [FIL_16] (rows=1019 width=251) predicate:(cint BETWEEN 1000000 AND 3000000 and cbigint is not null) TableScan [TS_3] (rows=12288 width=251) default@alltypesorc,b,Tbl:COMPLETE,Col:COMPLETE,Output:["ctinyint","csmallint","cint","cbigint","cfloat","cdouble","cstring1","cstring2","ctimestamp1","ctimestamp2","cboolean1","cboolean2"] @@ -113,23 +113,23 @@ Stage-0 SHUFFLE [RS_11] Group By Operator [GBY_10] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Merge Join Operator [MERGEJOIN_19] (rows=9759 width=8) + Merge Join Operator [MERGEJOIN_19] (rows=2166 width=8) Conds:RS_6._col0=RS_7._col0(Inner) <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_6] PartitionCols:_col0 - Select Operator [SEL_2] (rows=6144 width=2) + Select Operator [SEL_2] (rows=1365 width=2) Output:["_col0"] - Filter Operator [FIL_17] (rows=6144 width=2) + Filter Operator [FIL_17] (rows=1365 width=2) predicate:cint BETWEEN 1000000 AND 3000000 TableScan [TS_0] (rows=12288 width=2) default@alltypesorc,a,Tbl:COMPLETE,Col:COMPLETE,Output:["cint"] <-Map 4 [SIMPLE_EDGE] llap SHUFFLE [RS_7] PartitionCols:_col0 - Select Operator [SEL_5] (rows=4586 width=8) + Select Operator [SEL_5] (rows=1019 width=8) Output:["_col0"] - Filter Operator [FIL_18] (rows=4586 width=8) + Filter Operator [FIL_18] (rows=1019 width=8) predicate:(cint BETWEEN 1000000 AND 3000000 and cbigint is not null) TableScan [TS_3] (rows=12288 width=8) default@alltypesorc,b,Tbl:COMPLETE,Col:COMPLETE,Output:["cint","cbigint"] @@ -182,34 +182,34 @@ Stage-0 Stage-1 Reducer 4 llap File Output Operator [FS_16] - Select Operator [SEL_15] (rows=2765 width=12) + Select Operator [SEL_15] (rows=615 width=12) Output:["_col0","_col1"] <-Reducer 3 [SIMPLE_EDGE] llap SHUFFLE [RS_14] - Group By Operator [GBY_12] (rows=2765 width=12) + Group By Operator [GBY_12] (rows=615 width=12) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 <-Reducer 2 [SIMPLE_EDGE] llap SHUFFLE [RS_11] PartitionCols:_col0 - Group By Operator [GBY_10] (rows=2765 width=12) + Group By Operator [GBY_10] (rows=615 width=12) Output:["_col0","_col1"],aggregations:["count()"],keys:_col0 - Merge Join Operator [MERGEJOIN_21] (rows=9759 width=4) + Merge Join Operator [MERGEJOIN_21] (rows=2166 width=4) Conds:RS_6._col1=RS_7._col0(Inner),Output:["_col0"] <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_6] PartitionCols:_col1 - Select Operator [SEL_2] (rows=6144 width=5) + Select Operator [SEL_2] (rows=1365 width=5) Output:["_col0","_col1"] - Filter Operator [FIL_19] (rows=6144 width=5) + Filter Operator [FIL_19] (rows=1365 width=5) predicate:cint BETWEEN 1000000 AND 3000000 TableScan [TS_0] (rows=12288 width=5) default@alltypesorc,a,Tbl:COMPLETE,Col:COMPLETE,Output:["csmallint","cint"] <-Map 5 [SIMPLE_EDGE] llap SHUFFLE [RS_7] PartitionCols:_col0 - Select Operator [SEL_5] (rows=4586 width=8) + Select Operator [SEL_5] (rows=1019 width=8) Output:["_col0"] - Filter Operator [FIL_20] (rows=4586 width=8) + Filter Operator [FIL_20] (rows=1019 width=8) predicate:(cint BETWEEN 1000000 AND 3000000 and cbigint is not null) TableScan [TS_3] (rows=12288 width=8) default@alltypesorc,b,Tbl:COMPLETE,Col:COMPLETE,Output:["cint","cbigint"] @@ -269,27 +269,27 @@ Stage-0 Stage-1 Reducer 3 llap File Output Operator [FS_12] - Select Operator [SEL_11] (rows=6758 width=215) + Select Operator [SEL_11] (rows=1501 width=215) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23"] <-Reducer 2 [SIMPLE_EDGE] llap SHUFFLE [RS_10] - Map Join Operator [MAPJOIN_17] (rows=6758 width=215) + Map Join Operator [MAPJOIN_17] (rows=1501 width=215) Conds:RS_6.KEY.reducesinkkey0=RS_7.KEY.reducesinkkey0(Inner),HybridGraceHashJoin:true,Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23"] <-Map 4 [CUSTOM_SIMPLE_EDGE] llap PARTITION_ONLY_SHUFFLE [RS_7] PartitionCols:_col2 - Select Operator [SEL_5] (rows=6144 width=215) + Select Operator [SEL_5] (rows=1365 width=215) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] - Filter Operator [FIL_16] (rows=6144 width=215) + Filter Operator [FIL_16] (rows=1365 width=215) predicate:(cint BETWEEN 1000000 AND 3000000 and cbigint is not null) TableScan [TS_3] (rows=12288 width=215) default@alltypesorc,b,Tbl:COMPLETE,Col:NONE,Output:["ctinyint","csmallint","cint","cbigint","cfloat","cdouble","cstring1","cstring2","ctimestamp1","ctimestamp2","cboolean1","cboolean2"] <-Map 1 [CUSTOM_SIMPLE_EDGE] llap PARTITION_ONLY_SHUFFLE [RS_6] PartitionCols:_col2 - Select Operator [SEL_2] (rows=6144 width=215) + Select Operator [SEL_2] (rows=1365 width=215) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] - Filter Operator [FIL_15] (rows=6144 width=215) + Filter Operator [FIL_15] (rows=1365 width=215) predicate:cint BETWEEN 1000000 AND 3000000 TableScan [TS_0] (rows=12288 width=215) default@alltypesorc,a,Tbl:COMPLETE,Col:NONE,Output:["ctinyint","csmallint","cint","cbigint","cfloat","cdouble","cstring1","cstring2","ctimestamp1","ctimestamp2","cboolean1","cboolean2"] @@ -354,23 +354,23 @@ Stage-0 SHUFFLE [RS_11] Group By Operator [GBY_10] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Map Join Operator [MAPJOIN_19] (rows=6758 width=215) + Map Join Operator [MAPJOIN_19] (rows=1501 width=215) Conds:RS_6.KEY.reducesinkkey0=RS_7.KEY.reducesinkkey0(Inner),HybridGraceHashJoin:true <-Map 4 [CUSTOM_SIMPLE_EDGE] llap PARTITION_ONLY_SHUFFLE [RS_7] PartitionCols:_col0 - Select Operator [SEL_5] (rows=6144 width=215) + Select Operator [SEL_5] (rows=1365 width=215) Output:["_col0"] - Filter Operator [FIL_18] (rows=6144 width=215) + Filter Operator [FIL_18] (rows=1365 width=215) predicate:(cint BETWEEN 1000000 AND 3000000 and cbigint is not null) TableScan [TS_3] (rows=12288 width=215) default@alltypesorc,b,Tbl:COMPLETE,Col:NONE,Output:["cint","cbigint"] <-Map 1 [CUSTOM_SIMPLE_EDGE] llap PARTITION_ONLY_SHUFFLE [RS_6] PartitionCols:_col0 - Select Operator [SEL_2] (rows=6144 width=215) + Select Operator [SEL_2] (rows=1365 width=215) Output:["_col0"] - Filter Operator [FIL_17] (rows=6144 width=215) + Filter Operator [FIL_17] (rows=1365 width=215) predicate:cint BETWEEN 1000000 AND 3000000 TableScan [TS_0] (rows=12288 width=215) default@alltypesorc,a,Tbl:COMPLETE,Col:NONE,Output:["cint"] @@ -423,34 +423,34 @@ Stage-0 Stage-1 Reducer 4 llap File Output Operator [FS_16] - Select Operator [SEL_15] (rows=3379 width=215) + Select Operator [SEL_15] (rows=750 width=215) Output:["_col0","_col1"] <-Reducer 3 [SIMPLE_EDGE] llap SHUFFLE [RS_14] - Group By Operator [GBY_12] (rows=3379 width=215) + Group By Operator [GBY_12] (rows=750 width=215) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 <-Reducer 2 [SIMPLE_EDGE] llap SHUFFLE [RS_11] PartitionCols:_col0 - Group By Operator [GBY_10] (rows=6758 width=215) + Group By Operator [GBY_10] (rows=1501 width=215) Output:["_col0","_col1"],aggregations:["count()"],keys:_col0 - Map Join Operator [MAPJOIN_21] (rows=6758 width=215) + Map Join Operator [MAPJOIN_21] (rows=1501 width=215) Conds:RS_6.KEY.reducesinkkey0=RS_7.KEY.reducesinkkey0(Inner),HybridGraceHashJoin:true,Output:["_col0"] <-Map 5 [CUSTOM_SIMPLE_EDGE] llap PARTITION_ONLY_SHUFFLE [RS_7] PartitionCols:_col0 - Select Operator [SEL_5] (rows=6144 width=215) + Select Operator [SEL_5] (rows=1365 width=215) Output:["_col0"] - Filter Operator [FIL_20] (rows=6144 width=215) + Filter Operator [FIL_20] (rows=1365 width=215) predicate:(cint BETWEEN 1000000 AND 3000000 and cbigint is not null) TableScan [TS_3] (rows=12288 width=215) default@alltypesorc,b,Tbl:COMPLETE,Col:NONE,Output:["cint","cbigint"] <-Map 1 [CUSTOM_SIMPLE_EDGE] llap PARTITION_ONLY_SHUFFLE [RS_6] PartitionCols:_col1 - Select Operator [SEL_2] (rows=6144 width=215) + Select Operator [SEL_2] (rows=1365 width=215) Output:["_col0","_col1"] - Filter Operator [FIL_19] (rows=6144 width=215) + Filter Operator [FIL_19] (rows=1365 width=215) predicate:cint BETWEEN 1000000 AND 3000000 TableScan [TS_0] (rows=12288 width=215) default@alltypesorc,a,Tbl:COMPLETE,Col:NONE,Output:["csmallint","cint"] @@ -475,8 +475,8 @@ order by c1 POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### +-13036 1 -8915 1 -3799 1 10782 1 --13036 1 NULL 6 http://git-wip-us.apache.org/repos/asf/hive/blob/e2653db3/ql/src/test/results/clientpositive/llap/orc_predicate_pushdown.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/orc_predicate_pushdown.q.out b/ql/src/test/results/clientpositive/llap/orc_predicate_pushdown.q.out index db0baee..48a86cf 100644 --- a/ql/src/test/results/clientpositive/llap/orc_predicate_pushdown.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_predicate_pushdown.q.out @@ -608,15 +608,15 @@ STAGE PLANS: Statistics: Num rows: 232 Data size: 24150 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (s is not null and (s like 'bob%') and (not (t) IN (-1, -2, -3)) and t BETWEEN 25 AND 30) (type: boolean) - Statistics: Num rows: 29 Data size: 3018 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 624 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: t (type: tinyint), s (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 29 Data size: 3018 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 624 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: string) sort order: ++ - Statistics: Num rows: 29 Data size: 3018 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 624 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: all inputs Reducer 2 @@ -625,10 +625,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 29 Data size: 3018 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 624 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 29 Data size: 3018 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 624 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -674,15 +674,15 @@ STAGE PLANS: Statistics: Num rows: 232 Data size: 24150 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (s is not null and (s like 'bob%') and (not (t) IN (-1, -2, -3)) and t BETWEEN 25 AND 30) (type: boolean) - Statistics: Num rows: 29 Data size: 3018 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 624 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: t (type: tinyint), s (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 29 Data size: 3018 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 624 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: string) sort order: ++ - Statistics: Num rows: 29 Data size: 3018 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 624 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: all inputs Reducer 2 @@ -691,10 +691,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 29 Data size: 3018 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 624 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 29 Data size: 3018 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 624 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat http://git-wip-us.apache.org/repos/asf/hive/blob/e2653db3/ql/src/test/results/clientpositive/llap/parquet_predicate_pushdown.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/parquet_predicate_pushdown.q.out b/ql/src/test/results/clientpositive/llap/parquet_predicate_pushdown.q.out index 6541772..3254fb4 100644 --- a/ql/src/test/results/clientpositive/llap/parquet_predicate_pushdown.q.out +++ b/ql/src/test/results/clientpositive/llap/parquet_predicate_pushdown.q.out @@ -544,15 +544,15 @@ STAGE PLANS: Statistics: Num rows: 1049 Data size: 11539 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (s is not null and (s like 'bob%') and (not (t) IN (-1, -2, -3)) and t BETWEEN 25 AND 30) (type: boolean) - Statistics: Num rows: 131 Data size: 1441 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 29 Data size: 319 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: t (type: tinyint), s (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 131 Data size: 1441 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 29 Data size: 319 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: string) sort order: ++ - Statistics: Num rows: 131 Data size: 1441 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 29 Data size: 319 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -561,10 +561,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 131 Data size: 1441 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 29 Data size: 319 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 131 Data size: 1441 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 29 Data size: 319 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -610,15 +610,15 @@ STAGE PLANS: Statistics: Num rows: 1049 Data size: 11539 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (s is not null and (s like 'bob%') and (not (t) IN (-1, -2, -3)) and t BETWEEN 25 AND 30) (type: boolean) - Statistics: Num rows: 131 Data size: 1441 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 29 Data size: 319 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: t (type: tinyint), s (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 131 Data size: 1441 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 29 Data size: 319 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: string) sort order: ++ - Statistics: Num rows: 131 Data size: 1441 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 29 Data size: 319 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -627,10 +627,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 131 Data size: 1441 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 29 Data size: 319 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 131 Data size: 1441 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 29 Data size: 319 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -741,15 +741,15 @@ STAGE PLANS: Statistics: Num rows: 1049 Data size: 11539 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((d >= 10.0) and (d < 12.0) and (s like '%son') and (t > 0) and si BETWEEN 300 AND 400 and (not (s like '%car%'))) (type: boolean) - Statistics: Num rows: 5 Data size: 55 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: t (type: tinyint), si (type: smallint), d (type: double), s (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 5 Data size: 55 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col3 (type: string) sort order: - - Statistics: Num rows: 5 Data size: 55 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double) Execution mode: llap @@ -760,13 +760,13 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 5 Data size: 55 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 3 - Statistics: Num rows: 3 Data size: 33 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 3 Data size: 33 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -820,15 +820,15 @@ STAGE PLANS: Statistics: Num rows: 1049 Data size: 11539 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((d >= 10.0) and (d < 12.0) and (s like '%son') and (t > 0) and si BETWEEN 300 AND 400 and (not (s like '%car%'))) (type: boolean) - Statistics: Num rows: 5 Data size: 55 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: t (type: tinyint), si (type: smallint), d (type: double), s (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 5 Data size: 55 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col3 (type: string) sort order: - - Statistics: Num rows: 5 Data size: 55 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double) Execution mode: llap @@ -839,13 +839,13 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 5 Data size: 55 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 3 - Statistics: Num rows: 3 Data size: 33 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 3 Data size: 33 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat http://git-wip-us.apache.org/repos/asf/hive/blob/e2653db3/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_1.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_1.q.out b/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_1.q.out index 5c8db64..25c6f15 100644 --- a/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_1.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_1.q.out @@ -36,16 +36,16 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 3093170 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: cint BETWEEN 1000000 AND 3000000 (type: boolean) - Statistics: Num rows: 6144 Data size: 1546640 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1365 Data size: 343800 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 6144 Data size: 1546640 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1365 Data size: 343800 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: int) sort order: + Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 6144 Data size: 1546640 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1365 Data size: 343800 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) Execution mode: llap LLAP IO: all inputs @@ -56,16 +56,16 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 3093170 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (cint BETWEEN 1000000 AND 3000000 and cbigint is not null) (type: boolean) - Statistics: Num rows: 4586 Data size: 1154510 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1019 Data size: 256780 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 4586 Data size: 1154510 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1019 Data size: 256780 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: int) sort order: + Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 4586 Data size: 1154510 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1019 Data size: 256780 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) Execution mode: llap LLAP IO: all inputs @@ -79,11 +79,11 @@ STAGE PLANS: 0 _col2 (type: int) 1 _col2 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 - Statistics: Num rows: 9759 Data size: 6050580 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2166 Data size: 1342920 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: int) sort order: + - Statistics: Num rows: 9759 Data size: 6050580 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2166 Data size: 1342920 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean), _col12 (type: tinyint), _col13 (type: smallint), _col14 (type: int), _col15 (type: bigint), _col16 (type: float), _col17 (type: double), _col18 (type: string), _col19 (type: string), _col20 (type: timestamp), _col21 (type: timestamp), _col22 (type: boolean), _col23 (type: boolean) Reducer 3 Execution mode: llap @@ -91,10 +91,10 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), KEY.reducesinkkey0 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), VALUE._col4 (type: double), VALUE._col5 (type: string), VALUE._col6 (type: string), VALUE._col7 (type: timestamp), VALUE._col8 (type: timestamp), VALUE._col9 (type: boolean), VALUE._col10 (type: boolean), VALUE._col11 (type: tinyint), VALUE._col12 (type: smallint), VALUE._col13 (type: int), VALUE._col14 (type: bigint), VALUE._col15 (type: float), VALUE._col16 (type: double), VALUE._col17 (type: string), VALUE._col18 (type: string), VALUE._col19 (type: timestamp), VALUE._col20 (type: timestamp), VALUE._col21 (type: boolean), VALUE._col22 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 - Statistics: Num rows: 9759 Data size: 6050580 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2166 Data size: 1342920 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 9759 Data size: 6050580 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2166 Data size: 1342920 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -168,16 +168,16 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: cint BETWEEN 1000000 AND 3000000 (type: boolean) - Statistics: Num rows: 6144 Data size: 18348 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1365 Data size: 4080 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cint (type: int) outputColumnNames: _col0 - Statistics: Num rows: 6144 Data size: 18348 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1365 Data size: 4080 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6144 Data size: 18348 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1365 Data size: 4080 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Map 4 @@ -187,16 +187,16 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 110088 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (cint BETWEEN 1000000 AND 3000000 and cbigint is not null) (type: boolean) - Statistics: Num rows: 4586 Data size: 41088 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1019 Data size: 9144 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cint (type: int) outputColumnNames: _col0 - Statistics: Num rows: 4586 Data size: 41088 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1019 Data size: 9144 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 4586 Data size: 41088 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1019 Data size: 9144 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Reducer 2 @@ -208,7 +208,7 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 9759 Data size: 78072 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2166 Data size: 17328 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash @@ -296,16 +296,16 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 73396 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: cint BETWEEN 1000000 AND 3000000 (type: boolean) - Statistics: Num rows: 6144 Data size: 36700 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1365 Data size: 8160 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: csmallint (type: smallint), cint (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 6144 Data size: 36700 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1365 Data size: 8160 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 6144 Data size: 36700 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1365 Data size: 8160 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: smallint) Execution mode: llap LLAP IO: all inputs @@ -316,16 +316,16 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 110088 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (cint BETWEEN 1000000 AND 3000000 and cbigint is not null) (type: boolean) - Statistics: Num rows: 4586 Data size: 41088 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1019 Data size: 9144 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cint (type: int) outputColumnNames: _col0 - Statistics: Num rows: 4586 Data size: 41088 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1019 Data size: 9144 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 4586 Data size: 41088 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1019 Data size: 9144 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Reducer 2 @@ -338,18 +338,18 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 9759 Data size: 39036 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2166 Data size: 8664 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: smallint) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 2765 Data size: 33180 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 615 Data size: 7380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: smallint) sort order: + Map-reduce partition columns: _col0 (type: smallint) - Statistics: Num rows: 2765 Data size: 33180 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 615 Data size: 7380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 3 Execution mode: llap @@ -359,11 +359,11 @@ STAGE PLANS: keys: KEY._col0 (type: smallint) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 2765 Data size: 33180 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 615 Data size: 7380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: bigint) sort order: + - Statistics: Num rows: 2765 Data size: 33180 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 615 Data size: 7380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: smallint) Reducer 4 Execution mode: llap @@ -371,10 +371,10 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), KEY.reducesinkkey0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2765 Data size: 33180 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 615 Data size: 7380 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2765 Data size: 33180 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 615 Data size: 7380 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -449,16 +449,16 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: cint BETWEEN 1000000 AND 3000000 (type: boolean) - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: int) sort order: + Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) Execution mode: llap LLAP IO: all inputs @@ -469,16 +469,16 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (cint BETWEEN 1000000 AND 3000000 and cbigint is not null) (type: boolean) - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: int) sort order: + Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) Execution mode: llap LLAP IO: all inputs @@ -494,12 +494,12 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 input vertices: 1 Map 4 - Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1501 Data size: 322826 Basic stats: COMPLETE Column stats: NONE HybridGraceHashJoin: true Reduce Output Operator key expressions: _col2 (type: int) sort order: + - Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1501 Data size: 322826 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean), _col12 (type: tinyint), _col13 (type: smallint), _col14 (type: int), _col15 (type: bigint), _col16 (type: float), _col17 (type: double), _col18 (type: string), _col19 (type: string), _col20 (type: timestamp), _col21 (type: timestamp), _col22 (type: boolean), _col23 (type: boolean) Reducer 3 Execution mode: llap @@ -507,10 +507,10 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), KEY.reducesinkkey0 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), VALUE._col4 (type: double), VALUE._col5 (type: string), VALUE._col6 (type: string), VALUE._col7 (type: timestamp), VALUE._col8 (type: timestamp), VALUE._col9 (type: boolean), VALUE._col10 (type: boolean), VALUE._col11 (type: tinyint), VALUE._col12 (type: smallint), VALUE._col13 (type: int), VALUE._col14 (type: bigint), VALUE._col15 (type: float), VALUE._col16 (type: double), VALUE._col17 (type: string), VALUE._col18 (type: string), VALUE._col19 (type: timestamp), VALUE._col20 (type: timestamp), VALUE._col21 (type: boolean), VALUE._col22 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 - Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1501 Data size: 322826 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1501 Data size: 322826 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -584,16 +584,16 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: cint BETWEEN 1000000 AND 3000000 (type: boolean) - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int) outputColumnNames: _col0 - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: all inputs Map 4 @@ -603,16 +603,16 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (cint BETWEEN 1000000 AND 3000000 and cbigint is not null) (type: boolean) - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int) outputColumnNames: _col0 - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: all inputs Reducer 2 @@ -626,7 +626,7 @@ STAGE PLANS: 1 KEY.reducesinkkey0 (type: int) input vertices: 1 Map 4 - Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1501 Data size: 322826 Basic stats: COMPLETE Column stats: NONE HybridGraceHashJoin: true Group By Operator aggregations: count() @@ -715,16 +715,16 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: cint BETWEEN 1000000 AND 3000000 (type: boolean) - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: csmallint (type: smallint), cint (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: smallint) Execution mode: llap LLAP IO: all inputs @@ -735,16 +735,16 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (cint BETWEEN 1000000 AND 3000000 and cbigint is not null) (type: boolean) - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int) outputColumnNames: _col0 - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: all inputs Reducer 2 @@ -759,19 +759,19 @@ STAGE PLANS: outputColumnNames: _col0 input vertices: 1 Map 5 - Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1501 Data size: 322826 Basic stats: COMPLETE Column stats: NONE HybridGraceHashJoin: true Group By Operator aggregations: count() keys: _col0 (type: smallint) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1501 Data size: 322826 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: smallint) sort order: + Map-reduce partition columns: _col0 (type: smallint) - Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1501 Data size: 322826 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reducer 3 Execution mode: llap @@ -781,11 +781,11 @@ STAGE PLANS: keys: KEY._col0 (type: smallint) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 750 Data size: 161305 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: smallint) sort order: + - Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 750 Data size: 161305 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reducer 4 Execution mode: llap @@ -793,10 +793,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: smallint), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 750 Data size: 161305 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 750 Data size: 161305 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
