Repository: hive Updated Branches: refs/heads/master 833a7d158 -> 3fec161da
HIVE-13287: Add logic to estimate stats for IN operator (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/3fec161d Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/3fec161d Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/3fec161d Branch: refs/heads/master Commit: 3fec161dad40860d493dff203f9da3925226bb8e Parents: 833a7d1 Author: Jesus Camacho Rodriguez <[email protected]> Authored: Wed Mar 23 21:00:41 2016 +0000 Committer: Jesus Camacho Rodriguez <[email protected]> Committed: Fri Apr 15 12:01:26 2016 +0100 ---------------------------------------------------------------------- .../stats/annotation/StatsRulesProcFactory.java | 107 ++++++++++++++++++- .../clientpositive/filter_cond_pushdown.q.out | 6 +- .../groupby_multi_single_reducer3.q.out | 8 +- .../llap/dynamic_partition_pruning_2.q.out | 30 +++--- ql/src/test/results/clientpositive/pcs.q.out | 6 +- .../results/clientpositive/perf/query17.q.out | 8 +- .../results/clientpositive/perf/query29.q.out | 8 +- .../results/clientpositive/perf/query46.q.out | 10 +- .../results/clientpositive/perf/query89.q.out | 4 +- .../results/clientpositive/pointlookup.q.out | 12 +-- .../results/clientpositive/pointlookup2.q.out | 16 +-- .../results/clientpositive/pointlookup3.q.out | 8 +- .../spark/groupby_multi_single_reducer3.q.out | 8 +- .../tez/dynamic_partition_pruning_2.q.out | 30 +++--- 14 files changed, 180 insertions(+), 81 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/3fec161d/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java index c4fc5ca..320dc10 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java @@ -24,6 +24,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; +import java.util.Set; import java.util.Stack; import org.apache.hadoop.hive.conf.HiveConf; @@ -53,6 +54,7 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnListDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc.ExprNodeDescEqualityWrapper; import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils; import org.apache.hadoop.hive.ql.plan.ExprNodeDynamicListDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc; @@ -64,6 +66,7 @@ import org.apache.hadoop.hive.ql.plan.Statistics; import org.apache.hadoop.hive.ql.stats.StatsUtils; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualNS; @@ -76,19 +79,24 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotEqual; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotNull; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNull; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFStruct; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.google.common.collect.Lists; import com.google.common.collect.Maps; +import com.google.common.collect.Sets; public class StatsRulesProcFactory { private static final Logger LOG = LoggerFactory.getLogger(StatsRulesProcFactory.class.getName()); private static final boolean isDebugEnabled = LOG.isDebugEnabled(); + /** * Collect basic statistics like number of rows, data size and column level statistics from the * table. Also sets the state of the available statistics. Basic and column statistics can have @@ -299,7 +307,7 @@ public class StatsRulesProcFactory { private long evaluateExpression(Statistics stats, ExprNodeDesc pred, AnnotateStatsProcCtx aspCtx, List<String> neededCols, - FilterOperator fop, long evaluatedRowCount) throws CloneNotSupportedException { + FilterOperator fop, long evaluatedRowCount) throws CloneNotSupportedException, SemanticException { long newNumRows = 0; Statistics andStats = null; @@ -338,6 +346,9 @@ public class StatsRulesProcFactory { evaluatedRowCount = newNumRows; } } + } else if (udf instanceof GenericUDFIn) { + // for IN clause + newNumRows = evaluateInExpr(stats, pred, aspCtx, neededCols, fop); } else if (udf instanceof GenericUDFOPNot) { newNumRows = evaluateNotExpr(stats, pred, aspCtx, neededCols, fop); } else if (udf instanceof GenericUDFOPNotNull) { @@ -375,9 +386,97 @@ public class StatsRulesProcFactory { return newNumRows; } + private long evaluateInExpr(Statistics stats, ExprNodeDesc pred, AnnotateStatsProcCtx aspCtx, + List<String> neededCols, FilterOperator fop) throws SemanticException { + + long numRows = stats.getNumRows(); + + ExprNodeGenericFuncDesc fd = (ExprNodeGenericFuncDesc) pred; + + // 1. It is an IN operator, check if it uses STRUCT + List<ExprNodeDesc> children = fd.getChildren(); + List<ExprNodeDesc> columns = Lists.newArrayList(); + List<ColStatistics> columnStats = Lists.newArrayList(); + List<Set<ExprNodeDescEqualityWrapper>> values = Lists.newArrayList(); + ExprNodeDesc columnsChild = children.get(0); + boolean multiColumn; + if (columnsChild instanceof ExprNodeGenericFuncDesc && + ((ExprNodeGenericFuncDesc) columnsChild).getGenericUDF() instanceof GenericUDFStruct) { + for (int j = 0; j < columnsChild.getChildren().size(); j++) { + ExprNodeDesc columnChild = columnsChild.getChildren().get(j); + // If column is not column reference , we bail out + if (!(columnChild instanceof ExprNodeColumnDesc)) { + // Default + return numRows / 2; + } + columns.add(columnChild); + final String columnName = ((ExprNodeColumnDesc)columnChild).getColumn(); + // if column name is not contained in needed column list then it + // is a partition column. We do not need to evaluate partition columns + // in filter expression since it will be taken care by partition pruner + if (neededCols != null && !neededCols.contains(columnName)) { + // Default + return numRows / 2; + } + columnStats.add(stats.getColumnStatisticsFromColName(columnName)); + values.add(Sets.<ExprNodeDescEqualityWrapper>newHashSet()); + } + multiColumn = true; + } else { + // If column is not column reference , we bail out + if (!(columnsChild instanceof ExprNodeColumnDesc)) { + // Default + return numRows / 2; + } + columns.add(columnsChild); + final String columnName = ((ExprNodeColumnDesc)columnsChild).getColumn(); + // if column name is not contained in needed column list then it + // is a partition column. We do not need to evaluate partition columns + // in filter expression since it will be taken care by partition pruner + if (neededCols != null && !neededCols.contains(columnName)) { + // Default + return numRows / 2; + } + columnStats.add(stats.getColumnStatisticsFromColName(columnName)); + values.add(Sets.<ExprNodeDescEqualityWrapper>newHashSet()); + multiColumn = false; + } + + // 2. Extract columns and values + for (int i = 1; i < children.size(); i++) { + ExprNodeDesc child = children.get(i); + // If value is not a constant, we bail out + if (!(child instanceof ExprNodeConstantDesc)) { + // Default + return numRows / 2; + } + if (multiColumn) { + ExprNodeConstantDesc constantChild = (ExprNodeConstantDesc) child; + List<?> items = (List<?>) constantChild.getWritableObjectInspector().getWritableConstantValue(); + List<TypeInfo> structTypes = ((StructTypeInfo) constantChild.getTypeInfo()).getAllStructFieldTypeInfos(); + for (int j = 0; j < structTypes.size(); j++) { + ExprNodeConstantDesc constant = new ExprNodeConstantDesc(structTypes.get(j), items.get(j)); + values.get(j).add(new ExprNodeDescEqualityWrapper(constant)); + } + } else { + values.get(0).add(new ExprNodeDescEqualityWrapper(child)); + } + } + + // 3. Calculate IN selectivity + float factor = 1; + for (int i = 0; i < columnStats.size(); i++) { + long dvs = columnStats.get(i) == null ? 0 : columnStats.get(i).getCountDistint(); + // ( num of distinct vals for col / num of rows ) * num of distinct vals for col in IN clause + float columnFactor = dvs == 0 ? 0.5f : ((float)dvs / numRows) * values.get(i).size(); + factor *= columnFactor; + } + return Math.round( (double)numRows * factor); + } + private long evaluateNotExpr(Statistics stats, ExprNodeDesc pred, AnnotateStatsProcCtx aspCtx, List<String> neededCols, FilterOperator fop) - throws CloneNotSupportedException { + throws CloneNotSupportedException, SemanticException { long numRows = stats.getNumRows(); @@ -676,7 +775,7 @@ public class StatsRulesProcFactory { private long evaluateChildExpr(Statistics stats, ExprNodeDesc child, AnnotateStatsProcCtx aspCtx, List<String> neededCols, - FilterOperator fop, long evaluatedRowCount) throws CloneNotSupportedException { + FilterOperator fop, long evaluatedRowCount) throws CloneNotSupportedException, SemanticException { long numRows = stats.getNumRows(); @@ -761,7 +860,7 @@ public class StatsRulesProcFactory { } else if (udf instanceof GenericUDFOPNull) { return evaluateColEqualsNullExpr(stats, genFunc); } else if (udf instanceof GenericUDFOPAnd || udf instanceof GenericUDFOPOr - || udf instanceof GenericUDFOPNot) { + || udf instanceof GenericUDFIn || udf instanceof GenericUDFOPNot) { return evaluateExpression(stats, genFunc, aspCtx, neededCols, fop, evaluatedRowCount); } } http://git-wip-us.apache.org/repos/asf/hive/blob/3fec161d/ql/src/test/results/clientpositive/filter_cond_pushdown.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/filter_cond_pushdown.q.out b/ql/src/test/results/clientpositive/filter_cond_pushdown.q.out index f48a5a4..132b590 100644 --- a/ql/src/test/results/clientpositive/filter_cond_pushdown.q.out +++ b/ql/src/test/results/clientpositive/filter_cond_pushdown.q.out @@ -442,14 +442,14 @@ STAGE PLANS: Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (((_col1) IN ('2008-04-08', '2008-04-10') and (_col1) IN ('2008-04-08', '2008-04-09') and (_col3 = '2008-04-10')) or (_col3 = '2008-04-08')) (type: boolean) - Statistics: Num rows: 343 Data size: 3643 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 344 Data size: 3654 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 343 Data size: 3643 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 344 Data size: 3654 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 343 Data size: 3643 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 344 Data size: 3654 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat http://git-wip-us.apache.org/repos/asf/hive/blob/3fec161d/ql/src/test/results/clientpositive/groupby_multi_single_reducer3.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/groupby_multi_single_reducer3.q.out b/ql/src/test/results/clientpositive/groupby_multi_single_reducer3.q.out index 5362390..c5488de 100644 --- a/ql/src/test/results/clientpositive/groupby_multi_single_reducer3.q.out +++ b/ql/src/test/results/clientpositive/groupby_multi_single_reducer3.q.out @@ -72,7 +72,7 @@ STAGE PLANS: Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((VALUE._col0) IN ('val_100', 'val_200', 'val_300') and (KEY._col0) IN (100, 150, 200)) (type: boolean) - Statistics: Num rows: 62 Data size: 658 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 63 Data size: 669 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() keys: KEY._col0 (type: string) @@ -93,7 +93,7 @@ STAGE PLANS: name: default.e1 Filter Operator predicate: ((VALUE._col0) IN ('val_400', 'val_500') and (KEY._col0) IN (400, 450)) (type: boolean) - Statistics: Num rows: 62 Data size: 658 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 63 Data size: 669 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() keys: KEY._col0 (type: string) @@ -404,7 +404,7 @@ STAGE PLANS: Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((VALUE._col0) IN ('val_100', 'val_200', 'val_300') and (KEY._col0) IN (100, 150, 200)) (type: boolean) - Statistics: Num rows: 62 Data size: 658 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 63 Data size: 669 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() keys: KEY._col0 (type: string) @@ -425,7 +425,7 @@ STAGE PLANS: name: default.e1 Filter Operator predicate: ((VALUE._col0) IN ('val_400', 'val_500') and (KEY._col0) IN (400, 450)) (type: boolean) - Statistics: Num rows: 62 Data size: 658 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 63 Data size: 669 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() keys: KEY._col0 (type: string) http://git-wip-us.apache.org/repos/asf/hive/blob/3fec161d/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning_2.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning_2.q.out b/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning_2.q.out index 6f93b6a..db3b85d 100644 --- a/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning_2.q.out +++ b/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning_2.q.out @@ -208,31 +208,31 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 15 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((label) IN ('foo', 'bar') and id is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: id (type: int), label (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Select Operator expressions: _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE Dynamic Partitioning Event Operator Target column: dim_shops_id (int) Target Input: agg Partition key expr: dim_shops_id - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE Target Vertex: Map 1 Execution mode: llap LLAP IO: no inputs @@ -382,16 +382,16 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 15 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((label) IN ('foo', 'bar') and id is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: id (type: int), label (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -757,31 +757,31 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 15 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((label) IN ('foo', 'bar') and id is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: id (type: int), label (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Select Operator expressions: _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE Dynamic Partitioning Event Operator Target column: dim_shops_id (int) Target Input: agg Partition key expr: dim_shops_id - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE Target Vertex: Map 1 Execution mode: llap LLAP IO: no inputs http://git-wip-us.apache.org/repos/asf/hive/blob/3fec161d/ql/src/test/results/clientpositive/pcs.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/pcs.q.out b/ql/src/test/results/clientpositive/pcs.q.out index a1382f1..d6d2431 100644 --- a/ql/src/test/results/clientpositive/pcs.q.out +++ b/ql/src/test/results/clientpositive/pcs.q.out @@ -921,17 +921,17 @@ STAGE PLANS: Filter Operator isSamplingPred: false predicate: (struct(_col2,_col0,_col8)) IN (const struct('2000-04-08',1,'2000-04-09'), const struct('2000-04-09',2,'2000-04-08')) (type: boolean) - Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col6 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat http://git-wip-us.apache.org/repos/asf/hive/blob/3fec161d/ql/src/test/results/clientpositive/perf/query17.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/query17.q.out b/ql/src/test/results/clientpositive/perf/query17.q.out index f98ed99..1b5a640 100644 --- a/ql/src/test/results/clientpositive/perf/query17.q.out +++ b/ql/src/test/results/clientpositive/perf/query17.q.out @@ -71,9 +71,9 @@ Stage-0 <-Map 15 [SIMPLE_EDGE] SHUFFLE [RS_37] PartitionCols:_col0 - Select Operator [SEL_17] (rows=36524 width=1119) + Select Operator [SEL_17] (rows=36525 width=1119) Output:["_col0"] - Filter Operator [FIL_95] (rows=36524 width=1119) + Filter Operator [FIL_95] (rows=36525 width=1119) predicate:((d_quarter_name) IN ('2000Q1', '2000Q2', '2000Q3') and d_date_sk is not null) TableScan [TS_15] (rows=73049 width=1119) default@date_dim,d1,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_quarter_name"] @@ -85,9 +85,9 @@ Stage-0 <-Map 14 [SIMPLE_EDGE] SHUFFLE [RS_34] PartitionCols:_col0 - Select Operator [SEL_14] (rows=36524 width=1119) + Select Operator [SEL_14] (rows=36525 width=1119) Output:["_col0"] - Filter Operator [FIL_94] (rows=36524 width=1119) + Filter Operator [FIL_94] (rows=36525 width=1119) predicate:((d_quarter_name) IN ('2000Q1', '2000Q2', '2000Q3') and d_date_sk is not null) TableScan [TS_12] (rows=73049 width=1119) default@date_dim,d1,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_quarter_name"] http://git-wip-us.apache.org/repos/asf/hive/blob/3fec161d/ql/src/test/results/clientpositive/perf/query29.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/query29.q.out b/ql/src/test/results/clientpositive/perf/query29.q.out index 0f4116a..39aca92 100644 --- a/ql/src/test/results/clientpositive/perf/query29.q.out +++ b/ql/src/test/results/clientpositive/perf/query29.q.out @@ -52,7 +52,7 @@ Stage-0 <-Reducer 7 [SIMPLE_EDGE] SHUFFLE [RS_42] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_102] (rows=44193 width=1119) + Merge Join Operator [MERGEJOIN_102] (rows=44194 width=1119) Conds:RS_39._col3=RS_40._col0(Inner),Output:["_col1","_col5","_col10","_col14","_col24","_col25"] <-Map 16 [SIMPLE_EDGE] SHUFFLE [RS_40] @@ -66,14 +66,14 @@ Stage-0 <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_39] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_101] (rows=40176 width=1119) + Merge Join Operator [MERGEJOIN_101] (rows=40177 width=1119) Conds:RS_36._col11=RS_37._col0(Inner),Output:["_col1","_col3","_col5","_col10","_col14"] <-Map 15 [SIMPLE_EDGE] SHUFFLE [RS_37] PartitionCols:_col0 - Select Operator [SEL_17] (rows=36524 width=1119) + Select Operator [SEL_17] (rows=36525 width=1119) Output:["_col0"] - Filter Operator [FIL_94] (rows=36524 width=1119) + Filter Operator [FIL_94] (rows=36525 width=1119) predicate:((d_year) IN (2000, 2001, 2002) and d_date_sk is not null) TableScan [TS_15] (rows=73049 width=1119) default@date_dim,d1,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] http://git-wip-us.apache.org/repos/asf/hive/blob/3fec161d/ql/src/test/results/clientpositive/perf/query46.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/query46.q.out b/ql/src/test/results/clientpositive/perf/query46.q.out index 2bd87aa..11804c9 100644 --- a/ql/src/test/results/clientpositive/perf/query46.q.out +++ b/ql/src/test/results/clientpositive/perf/query46.q.out @@ -83,7 +83,7 @@ Stage-0 <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_24] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_87] (rows=24305 width=1119) + Merge Join Operator [MERGEJOIN_87] (rows=24306 width=1119) Conds:RS_21._col2=RS_22._col0(Inner),Output:["_col1","_col3","_col5","_col6","_col7"] <-Map 12 [SIMPLE_EDGE] SHUFFLE [RS_22] @@ -97,7 +97,7 @@ Stage-0 <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_21] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_86] (rows=22096 width=1119) + Merge Join Operator [MERGEJOIN_86] (rows=22097 width=1119) Conds:RS_18._col4=RS_19._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col7"] <-Map 11 [SIMPLE_EDGE] SHUFFLE [RS_19] @@ -111,7 +111,7 @@ Stage-0 <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_18] PartitionCols:_col4 - Merge Join Operator [MERGEJOIN_85] (rows=20088 width=1119) + Merge Join Operator [MERGEJOIN_85] (rows=20089 width=1119) Conds:RS_15._col0=RS_16._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7"] <-Map 1 [SIMPLE_EDGE] SHUFFLE [RS_15] @@ -125,9 +125,9 @@ Stage-0 <-Map 10 [SIMPLE_EDGE] SHUFFLE [RS_16] PartitionCols:_col0 - Select Operator [SEL_5] (rows=18262 width=1119) + Select Operator [SEL_5] (rows=18263 width=1119) Output:["_col0"] - Filter Operator [FIL_79] (rows=18262 width=1119) + Filter Operator [FIL_79] (rows=18263 width=1119) predicate:((d_dow) IN (6, 0) and (d_year) IN (1998, 1999, 2000) and d_date_sk is not null) TableScan [TS_3] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_dow"] http://git-wip-us.apache.org/repos/asf/hive/blob/3fec161d/ql/src/test/results/clientpositive/perf/query89.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/query89.q.out b/ql/src/test/results/clientpositive/perf/query89.q.out index 75f7385..de91d9b 100644 --- a/ql/src/test/results/clientpositive/perf/query89.q.out +++ b/ql/src/test/results/clientpositive/perf/query89.q.out @@ -117,9 +117,9 @@ Stage-0 <-Map 9 [SIMPLE_EDGE] SHUFFLE [RS_16] PartitionCols:_col0 - Select Operator [SEL_8] (rows=36524 width=1119) + Select Operator [SEL_8] (rows=36525 width=1119) Output:["_col0","_col2"] - Filter Operator [FIL_49] (rows=36524 width=1119) + Filter Operator [FIL_49] (rows=36525 width=1119) predicate:((d_year) IN (2000) and d_date_sk is not null) TableScan [TS_6] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] http://git-wip-us.apache.org/repos/asf/hive/blob/3fec161d/ql/src/test/results/clientpositive/pointlookup.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/pointlookup.q.out b/ql/src/test/results/clientpositive/pointlookup.q.out index 460cc74..78dd7bc 100644 --- a/ql/src/test/results/clientpositive/pointlookup.q.out +++ b/ql/src/test/results/clientpositive/pointlookup.q.out @@ -111,14 +111,14 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (struct(key,value)) IN (const struct('0','8'), const struct('1','5'), const struct('2','6'), const struct('3','8'), const struct('4','1'), const struct('5','6'), const struct('6','1'), const struct('7','1'), const struct('8','1'), const struct('9','1'), const struct('10','3')) (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -177,14 +177,14 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (struct(key,value)) IN (const struct('0','8'), const struct('1','5'), const struct('2','6'), const struct('3','8'), const struct('4','1'), const struct('5','6'), const struct('6','1'), const struct('7','1'), const struct('8','1'), const struct('9','1'), const struct('10','3')) (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat http://git-wip-us.apache.org/repos/asf/hive/blob/3fec161d/ql/src/test/results/clientpositive/pointlookup2.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/pointlookup2.q.out b/ql/src/test/results/clientpositive/pointlookup2.q.out index 869e4cd..6fc6e7f 100644 --- a/ql/src/test/results/clientpositive/pointlookup2.q.out +++ b/ql/src/test/results/clientpositive/pointlookup2.q.out @@ -1169,7 +1169,7 @@ STAGE PLANS: Filter Operator isSamplingPred: false predicate: (struct(_col2,_col4)) IN (const struct('2000-04-08',1), const struct('2000-04-09',2)) (type: boolean) - Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 @@ -1197,7 +1197,7 @@ STAGE PLANS: key expressions: _col4 (type: int), _col5 (type: string), _col2 (type: string) null sort order: aaa sort order: +++ - Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE tag: -1 value expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) auto parallelism: false @@ -1231,13 +1231,13 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY.reducesinkkey2 (type: string), VALUE._col2 (type: string), KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1590,7 +1590,7 @@ STAGE PLANS: Filter Operator isSamplingPred: false predicate: (struct(_col0,_col3)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09')) (type: boolean) - Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 64 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 @@ -1618,7 +1618,7 @@ STAGE PLANS: key expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) null sort order: aaa sort order: +++ - Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 64 Basic stats: COMPLETE Column stats: NONE tag: -1 value expressions: _col2 (type: string), _col4 (type: int), _col5 (type: string) auto parallelism: false @@ -1652,13 +1652,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: string), KEY.reducesinkkey2 (type: string), VALUE._col1 (type: int), VALUE._col2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 64 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 64 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat http://git-wip-us.apache.org/repos/asf/hive/blob/3fec161d/ql/src/test/results/clientpositive/pointlookup3.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/pointlookup3.q.out b/ql/src/test/results/clientpositive/pointlookup3.q.out index e98ba76..2b25b39 100644 --- a/ql/src/test/results/clientpositive/pointlookup3.q.out +++ b/ql/src/test/results/clientpositive/pointlookup3.q.out @@ -1337,7 +1337,7 @@ STAGE PLANS: Filter Operator isSamplingPred: false predicate: (struct(_col2,_col4)) IN (const struct('2000-04-08',1), const struct('2000-04-09',2)) (type: boolean) - Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 @@ -1365,7 +1365,7 @@ STAGE PLANS: key expressions: _col4 (type: int), _col5 (type: string), _col2 (type: string) null sort order: aaa sort order: +++ - Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE tag: -1 value expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string), _col6 (type: string), _col7 (type: string) auto parallelism: false @@ -1399,13 +1399,13 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY.reducesinkkey2 (type: string), VALUE._col2 (type: string), KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col3 (type: string), VALUE._col4 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat http://git-wip-us.apache.org/repos/asf/hive/blob/3fec161d/ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer3.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer3.q.out b/ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer3.q.out index 7bb3ff2..982d719 100644 --- a/ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer3.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer3.q.out @@ -78,7 +78,7 @@ STAGE PLANS: Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((VALUE._col0) IN ('val_100', 'val_200', 'val_300') and (KEY._col0) IN (100, 150, 200)) (type: boolean) - Statistics: Num rows: 62 Data size: 658 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 63 Data size: 669 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() keys: KEY._col0 (type: string) @@ -99,7 +99,7 @@ STAGE PLANS: name: default.e1 Filter Operator predicate: ((VALUE._col0) IN ('val_400', 'val_500') and (KEY._col0) IN (400, 450)) (type: boolean) - Statistics: Num rows: 62 Data size: 658 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 63 Data size: 669 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() keys: KEY._col0 (type: string) @@ -422,7 +422,7 @@ STAGE PLANS: Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((VALUE._col0) IN ('val_100', 'val_200', 'val_300') and (KEY._col0) IN (100, 150, 200)) (type: boolean) - Statistics: Num rows: 62 Data size: 658 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 63 Data size: 669 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() keys: KEY._col0 (type: string) @@ -443,7 +443,7 @@ STAGE PLANS: name: default.e1 Filter Operator predicate: ((VALUE._col0) IN ('val_400', 'val_500') and (KEY._col0) IN (400, 450)) (type: boolean) - Statistics: Num rows: 62 Data size: 658 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 63 Data size: 669 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() keys: KEY._col0 (type: string) http://git-wip-us.apache.org/repos/asf/hive/blob/3fec161d/ql/src/test/results/clientpositive/tez/dynamic_partition_pruning_2.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/tez/dynamic_partition_pruning_2.q.out b/ql/src/test/results/clientpositive/tez/dynamic_partition_pruning_2.q.out index e129795..71b7ee3 100644 --- a/ql/src/test/results/clientpositive/tez/dynamic_partition_pruning_2.q.out +++ b/ql/src/test/results/clientpositive/tez/dynamic_partition_pruning_2.q.out @@ -206,31 +206,31 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 15 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((label) IN ('foo', 'bar') and id is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: id (type: int), label (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Select Operator expressions: _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE Dynamic Partitioning Event Operator Target column: dim_shops_id (int) Target Input: agg Partition key expr: dim_shops_id - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE Target Vertex: Map 1 Reducer 2 Reduce Operator Tree: @@ -374,16 +374,16 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 15 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((label) IN ('foo', 'bar') and id is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: id (type: int), label (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Reducer 2 Reduce Operator Tree: @@ -735,31 +735,31 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 15 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((label) IN ('foo', 'bar') and id is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: id (type: int), label (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Select Operator expressions: _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE Dynamic Partitioning Event Operator Target column: dim_shops_id (int) Target Input: agg Partition key expr: dim_shops_id - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE Target Vertex: Map 1 Reducer 2 Reduce Operator Tree:
