Repository: hive Updated Branches: refs/heads/master b82d38aa3 -> 2e226d22f
HIVE-17399: Semijoin: Do not remove semijoin branch if it feeds to TS->DPP_EVENT (Deepak Jaiswal, reviewed by Gopal V) Signed-off-by: Gopal V <gop...@apache.org> Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/2e226d22 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/2e226d22 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/2e226d22 Branch: refs/heads/master Commit: 2e226d22facea6ea362dc90ce37ae59aeb1666a0 Parents: b82d38a Author: Deepak Jaiswal <djais...@hortonworks.com> Authored: Fri Sep 1 10:51:52 2017 -0700 Committer: Gopal V <gop...@apache.org> Committed: Fri Sep 1 10:52:44 2017 -0700 ---------------------------------------------------------------------- .../hive/ql/parse/SemiJoinBranchInfo.java | 18 + .../hadoop/hive/ql/parse/TezCompiler.java | 105 +++++- .../clientpositive/dynamic_semijoin_reduction.q | 10 + .../llap/dynamic_semijoin_reduction.q.out | 365 +++++++++++++++++++ 4 files changed, 490 insertions(+), 8 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/2e226d22/ql/src/java/org/apache/hadoop/hive/ql/parse/SemiJoinBranchInfo.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemiJoinBranchInfo.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemiJoinBranchInfo.java index 5d7b9e5..c960b05 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemiJoinBranchInfo.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemiJoinBranchInfo.java @@ -24,15 +24,21 @@ import org.apache.hadoop.hive.ql.exec.TableScanOperator; public class SemiJoinBranchInfo { private TableScanOperator ts; private boolean isHint; + // Default value is true, however, if an optimization deems this edge + // important, it should set this to false. This does not guarantee that + // the edge will stay, however, it increases the chances. + private boolean shouldRemove; public SemiJoinBranchInfo(TableScanOperator ts) { this.ts = ts; isHint = false; + shouldRemove = true; } public SemiJoinBranchInfo(TableScanOperator ts, boolean isHint) { this.ts = ts; this.isHint = isHint; + shouldRemove = !isHint; // If hint is true, shouldRemove is redundant anyway } public TableScanOperator getTsOp() { @@ -42,4 +48,16 @@ public class SemiJoinBranchInfo { public boolean getIsHint() { return isHint; } + + public boolean getShouldRemove() { + return shouldRemove; + } + + public void setShouldRemove(boolean shouldRemove) { + // The state only changes from true->false + // Once set to false, it may not change back to true + if (this.shouldRemove) { + this.shouldRemove = shouldRemove; + } + } } http://git-wip-us.apache.org/repos/asf/hive/blob/2e226d22/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java index 5921594..15836ec 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java @@ -165,8 +165,14 @@ public class TezCompiler extends TaskCompiler { runRemoveDynamicPruningOptimization(procCtx, inputs, outputs); perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Run remove dynamic pruning by size"); + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); + markSemiJoinForDPP(procCtx); + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Mark certain semijoin edges important based "); + // Removing semijoin optimization when it may not be beneficial + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); removeSemijoinOptimizationByBenefit(procCtx); + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Remove Semijoins based on cost benefits"); perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); // Remove any parallel edge between semijoin and mapjoin. @@ -955,12 +961,14 @@ public class TezCompiler extends TaskCompiler { if (ts.getStatistics() != null) { long numRows = ts.getStatistics().getNumRows(); if (numRows < pCtx.getConf().getLongVar(ConfVars.TEZ_BIGTABLE_MIN_SIZE_SEMIJOIN_REDUCTION)) { - if (LOG.isDebugEnabled()) { - LOG.debug("Insufficient rows (" + numRows + ") to justify semijoin optimization. Removing semijoin " - + OperatorUtils.getOpNamePretty(rs) + " - " + OperatorUtils.getOpNamePretty(ts)); + if (sjInfo.getShouldRemove()) { + if (LOG.isDebugEnabled()) { + LOG.debug("Insufficient rows (" + numRows + ") to justify semijoin optimization. Removing semijoin " + + OperatorUtils.getOpNamePretty(rs) + " - " + OperatorUtils.getOpNamePretty(ts)); + } + GenTezUtils.removeBranch(rs); + GenTezUtils.removeSemiJoinOperator(pCtx, rs, ts); } - GenTezUtils.removeBranch(rs); - GenTezUtils.removeSemiJoinOperator(pCtx, rs, ts); } } return null; @@ -1055,7 +1063,7 @@ public class TezCompiler extends TaskCompiler { parallelEdges = true; - if (sjInfo.getIsHint()) { + if (sjInfo.getIsHint() || !sjInfo.getShouldRemove()) { // Created by hint, skip it continue; } @@ -1295,8 +1303,8 @@ public class TezCompiler extends TaskCompiler { HiveConf.ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION_THRESHOLD); for (ReduceSinkOperator rs : map.keySet()) { SemiJoinBranchInfo sjInfo = map.get(rs); - if (sjInfo.getIsHint()) { - // Semijoin created using hint, skip it + if (sjInfo.getIsHint() || !sjInfo.getShouldRemove()) { + // Semijoin created using hint or marked useful, skip it continue; } // rs is semijoin optimization branch, which should look like <Parent>-SEL-GB1-RS1-GB2-RS2 @@ -1342,4 +1350,85 @@ public class TezCompiler extends TaskCompiler { GenTezUtils.removeSemiJoinOperator(procCtx.parseContext, rs, ts); } } + + private void markSemiJoinForDPP(OptimizeTezProcContext procCtx) + throws SemanticException { + if(!procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION)) { + // Not needed without semi-join reduction + return; + } + + // Stores the Tablescan operators processed to avoid redoing them. + Map<TableScanOperator, TableScanOperator> tsOps = new HashMap<>(); + Map<ReduceSinkOperator, SemiJoinBranchInfo> map = procCtx.parseContext.getRsToSemiJoinBranchInfo(); + + for (ReduceSinkOperator rs : map.keySet()) { + SemiJoinBranchInfo sjInfo = map.get(rs); + TableScanOperator ts = sjInfo.getTsOp(); + TableScanOperator tsInMap = tsOps.putIfAbsent(ts, ts); + if (tsInMap != null) { + // Already processed, skip + continue; + } + + if (sjInfo.getIsHint() || !sjInfo.getShouldRemove()) { + continue; + } + + // A TS can have multiple branches due to DPP Or Semijoin Opt. + // Use DFS to traverse all the branches until RS or DPP is hit. + Deque<Operator<?>> deque = new LinkedList<>(); + deque.add(ts); + while (!deque.isEmpty()) { + Operator<?> op = deque.pollLast(); + if (op instanceof AppMasterEventOperator && + ((AppMasterEventOperator) op).getConf() instanceof DynamicPruningEventDesc) { + // DPP. Now look up nDVs on both sides to see the selectivity. + // <Parent Ops>-SEL-GB1-RS1-GB2-RS2 + SelectOperator selOp = null; + try { + selOp = (SelectOperator) + (rs.getParentOperators().get(0) + .getParentOperators().get(0) + .getParentOperators().get(0) + .getParentOperators().get(0)); + } catch (NullPointerException e) { + LOG.warn("markSemiJoinForDPP : Null pointer exception caught while accessing semijoin operators"); + assert false; + return; + } + try { + // If stats are not available, just assume its a useful edge + Statistics stats = selOp.getStatistics(); + ExprNodeColumnDesc colExpr = ExprNodeDescUtils.getColumnExpr( + selOp.getConf().getColList().get(0)); + long nDVs = stats.getColumnStatisticsFromColName( + colExpr.getColumn()).getCountDistint(); + if (nDVs > 0) { + // Lookup nDVs on TS side. + RuntimeValuesInfo rti = procCtx.parseContext + .getRsToRuntimeValuesInfoMap().get(rs); + ExprNodeDesc tsExpr = rti.getTsColExpr(); + FilterOperator fil = (FilterOperator) (ts.getChildOperators().get(0)); + Statistics filStats = fil.getStatistics(); + ExprNodeColumnDesc tsColExpr = ExprNodeDescUtils.getColumnExpr(tsExpr); + long nDVsOfTS = filStats.getColumnStatisticsFromColName( + tsColExpr.getColumn()).getCountDistint(); + if (nDVsOfTS >= nDVs) { + sjInfo.setShouldRemove(false); + } + } + } catch (NullPointerException e) { + sjInfo.setShouldRemove(false); + } + break; + } + if (op instanceof ReduceSinkOperator) { + // Done with this branch + continue; + } + deque.addAll(op.getChildOperators()); + } + } + } } http://git-wip-us.apache.org/repos/asf/hive/blob/2e226d22/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction.q b/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction.q index a36e981..24933db 100644 --- a/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction.q +++ b/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction.q @@ -113,6 +113,16 @@ set hive.tez.dynamic.semijoin.reduction=true; EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.ds = srcpart_small.ds) join alltypesorc_int40 on (srcpart_date.value = alltypesorc_int40.cstring); select count(*) from srcpart_date join srcpart_small on (srcpart_date.ds = srcpart_small.ds) join alltypesorc_int40 on (srcpart_date.value = alltypesorc_int40.cstring); +-- HIVE-17399 +create table srcpart_small10 as select * from srcpart_small limit 10; +analyze table srcpart_small10 compute statistics for columns; +set hive.tez.dynamic.semijoin.reduction=false; +EXPLAIN select count(*) from srcpart_small10, srcpart_small, srcpart_date where srcpart_small.key1 = srcpart_small10.key1 and srcpart_date.ds = srcpart_small.ds; +select count(*) from srcpart_small10, srcpart_small, srcpart_date where srcpart_small.key1 = srcpart_small10.key1 and srcpart_date.ds = srcpart_small.ds; +set hive.tez.dynamic.semijoin.reduction=true; +EXPLAIN select count(*) from srcpart_small10, srcpart_small, srcpart_date where srcpart_small.key1 = srcpart_small10.key1 and srcpart_date.ds = srcpart_small.ds; +select count(*) from srcpart_small10, srcpart_small, srcpart_date where srcpart_small.key1 = srcpart_small10.key1 and srcpart_date.ds = srcpart_small.ds; + -- With unions explain select * from alltypesorc_int join (select srcpart_date.key as key from srcpart_date http://git-wip-us.apache.org/repos/asf/hive/blob/2e226d22/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction.q.out b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction.q.out index 61dcf3b..24fbc61 100644 --- a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction.q.out +++ b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction.q.out @@ -3046,6 +3046,371 @@ POSTHOOK: Input: default@srcpart_small@ds=2008-04-08 POSTHOOK: Input: default@srcpart_small@ds=2008-04-09 #### A masked pattern was here #### 0 +PREHOOK: query: create table srcpart_small10 as select * from srcpart_small limit 10 +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@srcpart_small +PREHOOK: Input: default@srcpart_small@ds=2008-04-08 +PREHOOK: Input: default@srcpart_small@ds=2008-04-09 +PREHOOK: Output: database:default +PREHOOK: Output: default@srcpart_small10 +POSTHOOK: query: create table srcpart_small10 as select * from srcpart_small limit 10 +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@srcpart_small +POSTHOOK: Input: default@srcpart_small@ds=2008-04-08 +POSTHOOK: Input: default@srcpart_small@ds=2008-04-09 +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcpart_small10 +POSTHOOK: Lineage: srcpart_small10.ds SIMPLE [(srcpart_small)srcpart_small.FieldSchema(name:ds, type:string, comment:null), ] +POSTHOOK: Lineage: srcpart_small10.key1 SIMPLE [(srcpart_small)srcpart_small.FieldSchema(name:key1, type:string, comment:null), ] +POSTHOOK: Lineage: srcpart_small10.value1 SIMPLE [(srcpart_small)srcpart_small.FieldSchema(name:value1, type:string, comment:null), ] +PREHOOK: query: analyze table srcpart_small10 compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_small10 +PREHOOK: Output: default@srcpart_small10 +#### A masked pattern was here #### +POSTHOOK: query: analyze table srcpart_small10 compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_small10 +POSTHOOK: Output: default@srcpart_small10 +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN select count(*) from srcpart_small10, srcpart_small, srcpart_date where srcpart_small.key1 = srcpart_small10.key1 and srcpart_date.ds = srcpart_small.ds +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select count(*) from srcpart_small10, srcpart_small, srcpart_date where srcpart_small.key1 = srcpart_small10.key1 and srcpart_date.ds = srcpart_small.ds +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 2 (BROADCAST_EDGE) + Map 3 <- Map 1 (BROADCAST_EDGE) + Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcpart_small + filterExpr: key1 is not null (type: boolean) + Statistics: Num rows: 20 Data size: 5420 Basic stats: COMPLETE Column stats: PARTIAL + Filter Operator + predicate: key1 is not null (type: boolean) + Statistics: Num rows: 20 Data size: 5420 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: key1 (type: string), ds (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 20 Data size: 5420 Basic stats: COMPLETE Column stats: PARTIAL + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1 + input vertices: + 1 Map 2 + Statistics: Num rows: 10 Data size: 1840 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 10 Data size: 1840 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 1840 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: PARTIAL + Dynamic Partitioning Event Operator + Target column: ds (string) + Target Input: srcpart_date + Partition key expr: ds + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: PARTIAL + Target Vertex: Map 3 + Execution mode: llap + LLAP IO: all inputs + Map 2 + Map Operator Tree: + TableScan + alias: srcpart_small10 + filterExpr: key1 is not null (type: boolean) + Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key1 is not null (type: boolean) + Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key1 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: srcpart_date + filterExpr: ds is not null (type: boolean) + Statistics: Num rows: 2000 Data size: 720000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ds (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + input vertices: + 0 Map 1 + Statistics: Num rows: 10000 Data size: 80000 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: bigint) + Execution mode: llap + LLAP IO: all inputs + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from srcpart_small10, srcpart_small, srcpart_date where srcpart_small.key1 = srcpart_small10.key1 and srcpart_date.ds = srcpart_small.ds +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_date +PREHOOK: Input: default@srcpart_date@ds=2008-04-08 +PREHOOK: Input: default@srcpart_date@ds=2008-04-09 +PREHOOK: Input: default@srcpart_small +PREHOOK: Input: default@srcpart_small10 +PREHOOK: Input: default@srcpart_small@ds=2008-04-08 +PREHOOK: Input: default@srcpart_small@ds=2008-04-09 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from srcpart_small10, srcpart_small, srcpart_date where srcpart_small.key1 = srcpart_small10.key1 and srcpart_date.ds = srcpart_small.ds +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_date +POSTHOOK: Input: default@srcpart_date@ds=2008-04-08 +POSTHOOK: Input: default@srcpart_date@ds=2008-04-09 +POSTHOOK: Input: default@srcpart_small +POSTHOOK: Input: default@srcpart_small10 +POSTHOOK: Input: default@srcpart_small@ds=2008-04-08 +POSTHOOK: Input: default@srcpart_small@ds=2008-04-09 +#### A masked pattern was here #### +10000 +PREHOOK: query: EXPLAIN select count(*) from srcpart_small10, srcpart_small, srcpart_date where srcpart_small.key1 = srcpart_small10.key1 and srcpart_date.ds = srcpart_small.ds +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select count(*) from srcpart_small10, srcpart_small, srcpart_date where srcpart_small.key1 = srcpart_small10.key1 and srcpart_date.ds = srcpart_small.ds +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 2 (BROADCAST_EDGE), Reducer 3 (BROADCAST_EDGE) + Map 4 <- Map 1 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcpart_small + filterExpr: (key1 is not null and (key1 BETWEEN DynamicValue(RS_10_srcpart_small10_key1_min) AND DynamicValue(RS_10_srcpart_small10_key1_max) and in_bloom_filter(key1, DynamicValue(RS_10_srcpart_small10_key1_bloom_filter)))) (type: boolean) + Statistics: Num rows: 20 Data size: 5420 Basic stats: COMPLETE Column stats: PARTIAL + Filter Operator + predicate: (key1 is not null and (key1 BETWEEN DynamicValue(RS_10_srcpart_small10_key1_min) AND DynamicValue(RS_10_srcpart_small10_key1_max) and in_bloom_filter(key1, DynamicValue(RS_10_srcpart_small10_key1_bloom_filter)))) (type: boolean) + Statistics: Num rows: 20 Data size: 5420 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: key1 (type: string), ds (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 20 Data size: 5420 Basic stats: COMPLETE Column stats: PARTIAL + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1 + input vertices: + 1 Map 2 + Statistics: Num rows: 10 Data size: 1840 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 10 Data size: 1840 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 1840 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: PARTIAL + Dynamic Partitioning Event Operator + Target column: ds (string) + Target Input: srcpart_date + Partition key expr: ds + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: PARTIAL + Target Vertex: Map 4 + Execution mode: llap + LLAP IO: all inputs + Map 2 + Map Operator Tree: + TableScan + alias: srcpart_small10 + filterExpr: key1 is not null (type: boolean) + Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key1 is not null (type: boolean) + Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key1 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=20) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) + Execution mode: llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: srcpart_date + filterExpr: ds is not null (type: boolean) + Statistics: Num rows: 2000 Data size: 720000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ds (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + input vertices: + 0 Map 1 + Statistics: Num rows: 10000 Data size: 80000 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: bigint) + Execution mode: llap + LLAP IO: all inputs + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=20) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from srcpart_small10, srcpart_small, srcpart_date where srcpart_small.key1 = srcpart_small10.key1 and srcpart_date.ds = srcpart_small.ds +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_date +PREHOOK: Input: default@srcpart_date@ds=2008-04-08 +PREHOOK: Input: default@srcpart_date@ds=2008-04-09 +PREHOOK: Input: default@srcpart_small +PREHOOK: Input: default@srcpart_small10 +PREHOOK: Input: default@srcpart_small@ds=2008-04-08 +PREHOOK: Input: default@srcpart_small@ds=2008-04-09 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from srcpart_small10, srcpart_small, srcpart_date where srcpart_small.key1 = srcpart_small10.key1 and srcpart_date.ds = srcpart_small.ds +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_date +POSTHOOK: Input: default@srcpart_date@ds=2008-04-08 +POSTHOOK: Input: default@srcpart_date@ds=2008-04-09 +POSTHOOK: Input: default@srcpart_small +POSTHOOK: Input: default@srcpart_small10 +POSTHOOK: Input: default@srcpart_small@ds=2008-04-08 +POSTHOOK: Input: default@srcpart_small@ds=2008-04-09 +#### A masked pattern was here #### +10000 PREHOOK: query: explain select * from alltypesorc_int join (select srcpart_date.key as key from srcpart_date union all