This is an automated email from the ASF dual-hosted git repository.
dkuzmenko pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 1b4defaa038 HIVE-28549: Limit the maximum number of operators merged
by SharedWorkOptimizer (Shohei Okumiya, reviewed by Denys Kuzmenko, Seonggon
Namgung)
1b4defaa038 is described below
commit 1b4defaa03818a404e73fd50cc76ec1b24d0b776
Author: Shohei Okumiya <[email protected]>
AuthorDate: Tue Apr 1 16:53:29 2025 +0900
HIVE-28549: Limit the maximum number of operators merged by
SharedWorkOptimizer (Shohei Okumiya, reviewed by Denys Kuzmenko, Seonggon
Namgung)
Closes #5492
---
.../java/org/apache/hadoop/hive/conf/HiveConf.java | 2 +
.../hive/ql/optimizer/SharedWorkOptimizer.java | 43 +-
.../sharedwork_map_side_aggregations.q | 24 +
.../llap/sharedwork_map_side_aggregations.q.out | 495 +++++++++++++++++++++
4 files changed, 557 insertions(+), 7 deletions(-)
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 6638e00829e..0b8b72ea704 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -2748,6 +2748,8 @@ public static enum ConfVars {
"Analyzes and merges equiv downstream operators after a successful
shared work optimization step."),
HIVE_SHARED_WORK_PARALLEL_EDGE_SUPPORT("hive.optimize.shared.work.parallel.edge.support",
true,
"Lets the shared work optimizer to create parallel edges in case they
are for semijoins or mapjoins."),
+ HIVE_SHARED_WORK_MAX_SIBLINGS("hive.optimize.shared.work.max.siblings", -1,
+ "The maximum number of operators merged in a single iteration. -1
means infinite"),
HIVE_REMOVE_SQ_COUNT_CHECK("hive.optimize.remove.sq_count_check", true,
"Whether to remove an extra join with sq_count_check for scalar
subqueries "
+ "with constant group by keys."),
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java
index b2580af3e8e..67dcfebcb74 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java
@@ -17,6 +17,8 @@
*/
package org.apache.hadoop.hive.ql.optimizer;
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ListMultimap;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.Collection;
@@ -36,10 +38,12 @@
import java.util.PriorityQueue;
import java.util.Queue;
import java.util.Set;
+import java.util.stream.Collectors;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.hive.common.TableName;
+import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.ql.exec.AppMasterEventOperator;
import org.apache.hadoop.hive.ql.exec.DummyStoreOperator;
@@ -145,7 +149,7 @@ public ParseContext transform(ParseContext pctx) throws
SemanticException {
}
// Map of dbName.TblName -> TSOperator
- ArrayListMultimap<String, TableScanOperator> tableNameToOps =
splitTableScanOpsByTable(pctx);
+ ListMultimap<String, TableScanOperator> tableNameToOps =
splitTableScanOpsByTable(pctx);
// Check whether all tables in the plan are unique
boolean tablesReferencedOnlyOnce =
@@ -171,10 +175,10 @@ public ParseContext transform(ParseContext pctx) throws
SemanticException {
// Gather information about the DPP table scans and store it in the cache
gatherDPPTableScanOps(pctx, optimizerCache);
- for (Entry<String, Long> tablePair : sortedTables) {
- String tableName = tablePair.getKey();
- List<TableScanOperator> scans = tableNameToOps.get(tableName);
-
+ final int batchSize = HiveConf.getIntVar(pctx.getConf(),
ConfVars.HIVE_SHARED_WORK_MAX_SIBLINGS);
+ Preconditions.checkArgument(batchSize == -1 || batchSize > 0, "%s must be
-1 or greater than 0",
+ ConfVars.HIVE_SHARED_WORK_MAX_SIBLINGS.varname);
+ for (List<TableScanOperator> scans : groupTableScanOperators(sortedTables,
tableNameToOps, batchSize)) {
// Execute shared work optimization
runSharedWorkOptimization(pctx, optimizerCache, scans,
Mode.SubtreeMerge);
@@ -251,6 +255,31 @@ public ParseContext transform(ParseContext pctx) throws
SemanticException {
return pctx;
}
+ private static List<List<TableScanOperator>>
groupTableScanOperators(List<Entry<String, Long>> sortedTables,
+ ListMultimap<String, TableScanOperator> tableNameToOps, int batchSize) {
+ if (batchSize == -1) {
+ return sortedTables.stream().map(entry ->
tableNameToOps.get(entry.getKey())).collect(Collectors.toList());
+ }
+
+ final List<List<TableScanOperator>> batches = new ArrayList<>();
+ for (Entry<String, Long> tablePair : sortedTables) {
+ final String tableName = tablePair.getKey();
+ final List<TableScanOperator> scans = tableNameToOps.get(tableName);
+
+ final int limit = scans.size();
+ int from = 0;
+ while (from != limit) {
+ final int to = Math.min(limit, from + batchSize);
+ // We have to copy the list because it is mutated later
+ final List<TableScanOperator> subList = new
ArrayList<>(scans.subList(from, to));
+ batches.add(subList);
+ from = to;
+ }
+ }
+
+ return batches;
+ }
+
private boolean runSharedWorkOptimization(ParseContext pctx,
SharedWorkOptimizerCache optimizerCache, List<TableScanOperator> scans,
Mode mode) throws SemanticException {
boolean ret = false;
@@ -1111,9 +1140,9 @@ private int cmpDataSize(TableScanOperator o1,
TableScanOperator o2) {
}
}
- private static ArrayListMultimap<String, TableScanOperator>
splitTableScanOpsByTable(
+ private static ListMultimap<String, TableScanOperator>
splitTableScanOpsByTable(
ParseContext pctx) {
- ArrayListMultimap<String, TableScanOperator> tableNameToOps =
ArrayListMultimap.create();
+ ListMultimap<String, TableScanOperator> tableNameToOps =
ArrayListMultimap.create();
// Sort by operator ID so we get deterministic results
TSComparator comparator = new TSComparator();
Queue<TableScanOperator> sortedTopOps = new PriorityQueue<>(comparator);
diff --git
a/ql/src/test/queries/clientpositive/sharedwork_map_side_aggregations.q
b/ql/src/test/queries/clientpositive/sharedwork_map_side_aggregations.q
new file mode 100644
index 00000000000..fb1057fdc87
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/sharedwork_map_side_aggregations.q
@@ -0,0 +1,24 @@
+--! qt:dataset:src
+
+set hive.optimize.shared.work.max.siblings=3;
+
+EXPLAIN
+SELECT count(*), SUM(t1.num), SUM(t2.num), SUM(t3.num), SUM(t4.num),
SUM(t5.num), SUM(t6.num), SUM(t7.num)
+FROM (SELECT key, count(*) AS num FROM src WHERE key LIKE '%0%' GROUP BY key)
t0
+LEFT OUTER JOIN (SELECT key, count(*) AS num FROM src WHERE key LIKE '%1%'
GROUP BY key) t1 ON t0.key = t1.key
+LEFT OUTER JOIN (SELECT key, count(*) AS num FROM src WHERE key LIKE '%2%'
GROUP BY key) t2 ON t0.key = t2.key
+LEFT OUTER JOIN (SELECT key, count(*) AS num FROM src WHERE key LIKE '%3%'
GROUP BY key) t3 ON t0.key = t3.key
+LEFT OUTER JOIN (SELECT key, count(*) AS num FROM src WHERE key LIKE '%4%'
GROUP BY key) t4 ON t0.key = t4.key
+LEFT OUTER JOIN (SELECT key, count(*) AS num FROM src WHERE key LIKE '%5%'
GROUP BY key) t5 ON t0.key = t5.key
+LEFT OUTER JOIN (SELECT key, count(*) AS num FROM src WHERE key LIKE '%6%'
GROUP BY key) t6 ON t0.key = t6.key
+LEFT OUTER JOIN (SELECT key, count(*) AS num FROM src WHERE key LIKE '%7%'
GROUP BY key) t7 ON t0.key = t7.key;
+
+SELECT count(*), SUM(t1.num), SUM(t2.num), SUM(t3.num), SUM(t4.num),
SUM(t5.num), SUM(t6.num), SUM(t7.num)
+FROM (SELECT key, count(*) AS num FROM src WHERE key LIKE '%0%' GROUP BY key)
t0
+LEFT OUTER JOIN (SELECT key, count(*) AS num FROM src WHERE key LIKE '%1%'
GROUP BY key) t1 ON t0.key = t1.key
+LEFT OUTER JOIN (SELECT key, count(*) AS num FROM src WHERE key LIKE '%2%'
GROUP BY key) t2 ON t0.key = t2.key
+LEFT OUTER JOIN (SELECT key, count(*) AS num FROM src WHERE key LIKE '%3%'
GROUP BY key) t3 ON t0.key = t3.key
+LEFT OUTER JOIN (SELECT key, count(*) AS num FROM src WHERE key LIKE '%4%'
GROUP BY key) t4 ON t0.key = t4.key
+LEFT OUTER JOIN (SELECT key, count(*) AS num FROM src WHERE key LIKE '%5%'
GROUP BY key) t5 ON t0.key = t5.key
+LEFT OUTER JOIN (SELECT key, count(*) AS num FROM src WHERE key LIKE '%6%'
GROUP BY key) t6 ON t0.key = t6.key
+LEFT OUTER JOIN (SELECT key, count(*) AS num FROM src WHERE key LIKE '%7%'
GROUP BY key) t7 ON t0.key = t7.key;
diff --git
a/ql/src/test/results/clientpositive/llap/sharedwork_map_side_aggregations.q.out
b/ql/src/test/results/clientpositive/llap/sharedwork_map_side_aggregations.q.out
new file mode 100644
index 00000000000..bbe864529e2
--- /dev/null
+++
b/ql/src/test/results/clientpositive/llap/sharedwork_map_side_aggregations.q.out
@@ -0,0 +1,495 @@
+PREHOOK: query: EXPLAIN
+SELECT count(*), SUM(t1.num), SUM(t2.num), SUM(t3.num), SUM(t4.num),
SUM(t5.num), SUM(t6.num), SUM(t7.num)
+FROM (SELECT key, count(*) AS num FROM src WHERE key LIKE '%0%' GROUP BY key)
t0
+LEFT OUTER JOIN (SELECT key, count(*) AS num FROM src WHERE key LIKE '%1%'
GROUP BY key) t1 ON t0.key = t1.key
+LEFT OUTER JOIN (SELECT key, count(*) AS num FROM src WHERE key LIKE '%2%'
GROUP BY key) t2 ON t0.key = t2.key
+LEFT OUTER JOIN (SELECT key, count(*) AS num FROM src WHERE key LIKE '%3%'
GROUP BY key) t3 ON t0.key = t3.key
+LEFT OUTER JOIN (SELECT key, count(*) AS num FROM src WHERE key LIKE '%4%'
GROUP BY key) t4 ON t0.key = t4.key
+LEFT OUTER JOIN (SELECT key, count(*) AS num FROM src WHERE key LIKE '%5%'
GROUP BY key) t5 ON t0.key = t5.key
+LEFT OUTER JOIN (SELECT key, count(*) AS num FROM src WHERE key LIKE '%6%'
GROUP BY key) t6 ON t0.key = t6.key
+LEFT OUTER JOIN (SELECT key, count(*) AS num FROM src WHERE key LIKE '%7%'
GROUP BY key) t7 ON t0.key = t7.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN
+SELECT count(*), SUM(t1.num), SUM(t2.num), SUM(t3.num), SUM(t4.num),
SUM(t5.num), SUM(t6.num), SUM(t7.num)
+FROM (SELECT key, count(*) AS num FROM src WHERE key LIKE '%0%' GROUP BY key)
t0
+LEFT OUTER JOIN (SELECT key, count(*) AS num FROM src WHERE key LIKE '%1%'
GROUP BY key) t1 ON t0.key = t1.key
+LEFT OUTER JOIN (SELECT key, count(*) AS num FROM src WHERE key LIKE '%2%'
GROUP BY key) t2 ON t0.key = t2.key
+LEFT OUTER JOIN (SELECT key, count(*) AS num FROM src WHERE key LIKE '%3%'
GROUP BY key) t3 ON t0.key = t3.key
+LEFT OUTER JOIN (SELECT key, count(*) AS num FROM src WHERE key LIKE '%4%'
GROUP BY key) t4 ON t0.key = t4.key
+LEFT OUTER JOIN (SELECT key, count(*) AS num FROM src WHERE key LIKE '%5%'
GROUP BY key) t5 ON t0.key = t5.key
+LEFT OUTER JOIN (SELECT key, count(*) AS num FROM src WHERE key LIKE '%6%'
GROUP BY key) t6 ON t0.key = t6.key
+LEFT OUTER JOIN (SELECT key, count(*) AS num FROM src WHERE key LIKE '%7%'
GROUP BY key) t7 ON t0.key = t7.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 10 <- Map 1 (SIMPLE_EDGE)
+ Reducer 11 <- Map 1 (SIMPLE_EDGE)
+ Reducer 13 <- Map 12 (SIMPLE_EDGE)
+ Reducer 16 <- Map 15 (SIMPLE_EDGE)
+ Reducer 17 <- Map 15 (SIMPLE_EDGE)
+ Reducer 18 <- Map 15 (SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 12 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 11 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
+ Reducer 4 <- Reducer 10 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE)
+ Reducer 5 <- Reducer 17 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE)
+ Reducer 6 <- Reducer 18 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE)
+ Reducer 7 <- Reducer 13 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE)
+ Reducer 8 <- Reducer 16 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE)
+ Reducer 9 <- Reducer 8 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: src
+ filterExpr: ((key like '%0%') or ((key like '%3%') and (key
like '%0%') and key is not null) or ((key like '%2%') and (key like '%0%') and
key is not null)) (type: boolean)
+ Statistics: Num rows: 500 Data size: 43500 Basic stats:
COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (key like '%0%') (type: boolean)
+ Statistics: Num rows: 250 Data size: 21750 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: key (type: string)
+ minReductionHashAggr: 0.4
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 250 Data size: 21750 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 250 Data size: 21750 Basic
stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: ((key like '%3%') and (key like '%0%') and key
is not null) (type: boolean)
+ Statistics: Num rows: 125 Data size: 10875 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count()
+ keys: key (type: string)
+ minReductionHashAggr: 0.4
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 125 Data size: 11875 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 125 Data size: 11875 Basic
stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: bigint)
+ Filter Operator
+ predicate: ((key like '%2%') and (key like '%0%') and key
is not null) (type: boolean)
+ Statistics: Num rows: 125 Data size: 10875 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count()
+ keys: key (type: string)
+ minReductionHashAggr: 0.4
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 125 Data size: 11875 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 125 Data size: 11875 Basic
stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: bigint)
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map 12
+ Map Operator Tree:
+ TableScan
+ alias: src
+ filterExpr: (((key like '%6%') and (key like '%0%') and key
is not null) or ((key like '%1%') and (key like '%0%') and key is not null))
(type: boolean)
+ Statistics: Num rows: 500 Data size: 43500 Basic stats:
COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: ((key like '%6%') and (key like '%0%') and key
is not null) (type: boolean)
+ Statistics: Num rows: 125 Data size: 10875 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count()
+ keys: key (type: string)
+ minReductionHashAggr: 0.4
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 125 Data size: 11875 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 125 Data size: 11875 Basic
stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: bigint)
+ Filter Operator
+ predicate: ((key like '%1%') and (key like '%0%') and key
is not null) (type: boolean)
+ Statistics: Num rows: 125 Data size: 10875 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count()
+ keys: key (type: string)
+ minReductionHashAggr: 0.4
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 125 Data size: 11875 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 125 Data size: 11875 Basic
stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: bigint)
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map 15
+ Map Operator Tree:
+ TableScan
+ alias: src
+ filterExpr: (((key like '%7%') and (key like '%0%') and key
is not null) or ((key like '%4%') and (key like '%0%') and key is not null) or
((key like '%5%') and (key like '%0%') and key is not null)) (type: boolean)
+ Statistics: Num rows: 500 Data size: 43500 Basic stats:
COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: ((key like '%7%') and (key like '%0%') and key
is not null) (type: boolean)
+ Statistics: Num rows: 125 Data size: 10875 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count()
+ keys: key (type: string)
+ minReductionHashAggr: 0.4
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 125 Data size: 11875 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 125 Data size: 11875 Basic
stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: bigint)
+ Filter Operator
+ predicate: ((key like '%4%') and (key like '%0%') and key
is not null) (type: boolean)
+ Statistics: Num rows: 125 Data size: 10875 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count()
+ keys: key (type: string)
+ minReductionHashAggr: 0.4
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 125 Data size: 11875 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 125 Data size: 11875 Basic
stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: bigint)
+ Filter Operator
+ predicate: ((key like '%5%') and (key like '%0%') and key
is not null) (type: boolean)
+ Statistics: Num rows: 125 Data size: 10875 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count()
+ keys: key (type: string)
+ minReductionHashAggr: 0.4
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 125 Data size: 11875 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 125 Data size: 11875 Basic
stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: bigint)
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Reducer 10
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 125 Data size: 11875 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 125 Data size: 11875 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: bigint)
+ Reducer 11
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 125 Data size: 11875 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 125 Data size: 11875 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: bigint)
+ Reducer 13
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 125 Data size: 11875 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 125 Data size: 11875 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: bigint)
+ Reducer 16
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 125 Data size: 11875 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 125 Data size: 11875 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: bigint)
+ Reducer 17
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 125 Data size: 11875 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 125 Data size: 11875 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: bigint)
+ Reducer 18
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 125 Data size: 11875 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 125 Data size: 11875 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: bigint)
+ Reducer 2
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 125 Data size: 11875 Basic stats:
COMPLETE Column stats: COMPLETE
+ Dummy Store
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 250 Data size: 21750 Basic stats:
COMPLETE Column stats: COMPLETE
+ Merge Join Operator
+ condition map:
+ Left Outer Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col2
+ Statistics: Num rows: 375 Data size: 33633 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 375 Data size: 33633 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col2 (type: bigint)
+ Reducer 3
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Left Outer Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col2, _col4
+ Statistics: Num rows: 375 Data size: 36633 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 375 Data size: 36633 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col2 (type: bigint), _col4 (type: bigint)
+ Reducer 4
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Left Outer Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col2, _col4, _col6
+ Statistics: Num rows: 375 Data size: 39633 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 375 Data size: 39633 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col2 (type: bigint), _col4 (type:
bigint), _col6 (type: bigint)
+ Reducer 5
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Left Outer Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col2, _col4, _col6, _col8
+ Statistics: Num rows: 375 Data size: 42633 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 375 Data size: 42633 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col2 (type: bigint), _col4 (type:
bigint), _col6 (type: bigint), _col8 (type: bigint)
+ Reducer 6
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Left Outer Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col2, _col4, _col6, _col8, _col10
+ Statistics: Num rows: 375 Data size: 45633 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 375 Data size: 45633 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col2 (type: bigint), _col4 (type:
bigint), _col6 (type: bigint), _col8 (type: bigint), _col10 (type: bigint)
+ Reducer 7
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Left Outer Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col2, _col4, _col6, _col8, _col10,
_col12
+ Statistics: Num rows: 375 Data size: 48633 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 375 Data size: 48633 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col2 (type: bigint), _col4 (type:
bigint), _col6 (type: bigint), _col8 (type: bigint), _col10 (type: bigint),
_col12 (type: bigint)
+ Reducer 8
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Left Outer Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col2, _col4, _col6, _col8, _col10, _col12,
_col14
+ Statistics: Num rows: 375 Data size: 19008 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count(), sum(_col2), sum(_col4), sum(_col6),
sum(_col8), sum(_col10), sum(_col12), sum(_col14)
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5,
_col6, _col7
+ Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE
Column stats: COMPLETE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 64 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: bigint), _col1 (type:
bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: bigint),
_col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint)
+ Reducer 9
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0), sum(VALUE._col1),
sum(VALUE._col2), sum(VALUE._col3), sum(VALUE._col4), sum(VALUE._col5),
sum(VALUE._col6), sum(VALUE._col7)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5,
_col6, _col7
+ Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE
Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE
Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT count(*), SUM(t1.num), SUM(t2.num), SUM(t3.num),
SUM(t4.num), SUM(t5.num), SUM(t6.num), SUM(t7.num)
+FROM (SELECT key, count(*) AS num FROM src WHERE key LIKE '%0%' GROUP BY key)
t0
+LEFT OUTER JOIN (SELECT key, count(*) AS num FROM src WHERE key LIKE '%1%'
GROUP BY key) t1 ON t0.key = t1.key
+LEFT OUTER JOIN (SELECT key, count(*) AS num FROM src WHERE key LIKE '%2%'
GROUP BY key) t2 ON t0.key = t2.key
+LEFT OUTER JOIN (SELECT key, count(*) AS num FROM src WHERE key LIKE '%3%'
GROUP BY key) t3 ON t0.key = t3.key
+LEFT OUTER JOIN (SELECT key, count(*) AS num FROM src WHERE key LIKE '%4%'
GROUP BY key) t4 ON t0.key = t4.key
+LEFT OUTER JOIN (SELECT key, count(*) AS num FROM src WHERE key LIKE '%5%'
GROUP BY key) t5 ON t0.key = t5.key
+LEFT OUTER JOIN (SELECT key, count(*) AS num FROM src WHERE key LIKE '%6%'
GROUP BY key) t6 ON t0.key = t6.key
+LEFT OUTER JOIN (SELECT key, count(*) AS num FROM src WHERE key LIKE '%7%'
GROUP BY key) t7 ON t0.key = t7.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT count(*), SUM(t1.num), SUM(t2.num), SUM(t3.num),
SUM(t4.num), SUM(t5.num), SUM(t6.num), SUM(t7.num)
+FROM (SELECT key, count(*) AS num FROM src WHERE key LIKE '%0%' GROUP BY key)
t0
+LEFT OUTER JOIN (SELECT key, count(*) AS num FROM src WHERE key LIKE '%1%'
GROUP BY key) t1 ON t0.key = t1.key
+LEFT OUTER JOIN (SELECT key, count(*) AS num FROM src WHERE key LIKE '%2%'
GROUP BY key) t2 ON t0.key = t2.key
+LEFT OUTER JOIN (SELECT key, count(*) AS num FROM src WHERE key LIKE '%3%'
GROUP BY key) t3 ON t0.key = t3.key
+LEFT OUTER JOIN (SELECT key, count(*) AS num FROM src WHERE key LIKE '%4%'
GROUP BY key) t4 ON t0.key = t4.key
+LEFT OUTER JOIN (SELECT key, count(*) AS num FROM src WHERE key LIKE '%5%'
GROUP BY key) t5 ON t0.key = t5.key
+LEFT OUTER JOIN (SELECT key, count(*) AS num FROM src WHERE key LIKE '%6%'
GROUP BY key) t6 ON t0.key = t6.key
+LEFT OUTER JOIN (SELECT key, count(*) AS num FROM src WHERE key LIKE '%7%'
GROUP BY key) t7 ON t0.key = t7.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+49 22 28 26 31 5 9 10