(hive) branch master updated: HIVE-28549: Limit the maximum number of operators merged by SharedWorkOptimizer (Shohei Okumiya, reviewed by Denys Kuzmenko, Seonggon Namgung)

dkuzmenko Tue, 01 Apr 2025 00:54:23 -0700

This is an automated email from the ASF dual-hosted git repository.

dkuzmenko pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git



The following commit(s) were added to refs/heads/master by this push:
     new 1b4defaa038 HIVE-28549: Limit the maximum number of operators merged 
by SharedWorkOptimizer (Shohei Okumiya, reviewed by Denys Kuzmenko, Seonggon 
Namgung)
1b4defaa038 is described below

commit 1b4defaa03818a404e73fd50cc76ec1b24d0b776
Author: Shohei Okumiya <[email protected]>
AuthorDate: Tue Apr 1 16:53:29 2025 +0900

    HIVE-28549: Limit the maximum number of operators merged by 
SharedWorkOptimizer (Shohei Okumiya, reviewed by Denys Kuzmenko, Seonggon 
Namgung)
    
    Closes #5492
---
 .../java/org/apache/hadoop/hive/conf/HiveConf.java |   2 +
 .../hive/ql/optimizer/SharedWorkOptimizer.java     |  43 +-
 .../sharedwork_map_side_aggregations.q             |  24 +
 .../llap/sharedwork_map_side_aggregations.q.out    | 495 +++++++++++++++++++++
 4 files changed, 557 insertions(+), 7 deletions(-)

diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java 
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 6638e00829e..0b8b72ea704 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -2748,6 +2748,8 @@ public static enum ConfVars {
         "Analyzes and merges equiv downstream operators after a successful 
shared work optimization step."),
     
HIVE_SHARED_WORK_PARALLEL_EDGE_SUPPORT("hive.optimize.shared.work.parallel.edge.support",
 true,
         "Lets the shared work optimizer to create parallel edges in case they 
are for semijoins or mapjoins."),
+    HIVE_SHARED_WORK_MAX_SIBLINGS("hive.optimize.shared.work.max.siblings", -1,
+        "The maximum number of operators merged in a single iteration. -1 
means infinite"),
     HIVE_REMOVE_SQ_COUNT_CHECK("hive.optimize.remove.sq_count_check", true,
         "Whether to remove an extra join with sq_count_check for scalar 
subqueries "
             + "with constant group by keys."),
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java
index b2580af3e8e..67dcfebcb74 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java
@@ -17,6 +17,8 @@
  */
 package org.apache.hadoop.hive.ql.optimizer;
 
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ListMultimap;
 import java.util.ArrayList;
 import java.util.BitSet;
 import java.util.Collection;
@@ -36,10 +38,12 @@
 import java.util.PriorityQueue;
 import java.util.Queue;
 import java.util.Set;
+import java.util.stream.Collectors;
 
 import org.apache.commons.lang3.ArrayUtils;
 import org.apache.commons.lang3.StringUtils;
 import org.apache.hadoop.hive.common.TableName;
+import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
 import org.apache.hadoop.hive.ql.exec.AppMasterEventOperator;
 import org.apache.hadoop.hive.ql.exec.DummyStoreOperator;
@@ -145,7 +149,7 @@ public ParseContext transform(ParseContext pctx) throws 
SemanticException {
     }
 
     // Map of dbName.TblName -> TSOperator
-    ArrayListMultimap<String, TableScanOperator> tableNameToOps = 
splitTableScanOpsByTable(pctx);
+    ListMultimap<String, TableScanOperator> tableNameToOps = 
splitTableScanOpsByTable(pctx);
 
     // Check whether all tables in the plan are unique
     boolean tablesReferencedOnlyOnce =
@@ -171,10 +175,10 @@ public ParseContext transform(ParseContext pctx) throws 
SemanticException {
     // Gather information about the DPP table scans and store it in the cache
     gatherDPPTableScanOps(pctx, optimizerCache);
 
-    for (Entry<String, Long> tablePair : sortedTables) {
-      String tableName = tablePair.getKey();
-      List<TableScanOperator> scans = tableNameToOps.get(tableName);
-
+    final int batchSize = HiveConf.getIntVar(pctx.getConf(), 
ConfVars.HIVE_SHARED_WORK_MAX_SIBLINGS);
+    Preconditions.checkArgument(batchSize == -1 || batchSize > 0, "%s must be 
-1 or greater than 0",
+        ConfVars.HIVE_SHARED_WORK_MAX_SIBLINGS.varname);
+    for (List<TableScanOperator> scans : groupTableScanOperators(sortedTables, 
tableNameToOps, batchSize)) {
       // Execute shared work optimization
       runSharedWorkOptimization(pctx, optimizerCache, scans, 
Mode.SubtreeMerge);
 
@@ -251,6 +255,31 @@ public ParseContext transform(ParseContext pctx) throws 
SemanticException {
     return pctx;
   }
 
+  private static List<List<TableScanOperator>> 
groupTableScanOperators(List<Entry<String, Long>> sortedTables,
+      ListMultimap<String, TableScanOperator> tableNameToOps, int batchSize) {
+    if (batchSize == -1) {
+      return sortedTables.stream().map(entry -> 
tableNameToOps.get(entry.getKey())).collect(Collectors.toList());
+    }
+
+    final List<List<TableScanOperator>> batches = new ArrayList<>();
+    for (Entry<String, Long> tablePair : sortedTables) {
+      final String tableName = tablePair.getKey();
+      final List<TableScanOperator> scans = tableNameToOps.get(tableName);
+
+      final int limit = scans.size();
+      int from = 0;
+      while (from != limit) {
+        final int to = Math.min(limit, from + batchSize);
+        // We have to copy the list because it is mutated later
+        final List<TableScanOperator> subList = new 
ArrayList<>(scans.subList(from, to));
+        batches.add(subList);
+        from = to;
+      }
+    }
+
+    return batches;
+  }
+
   private boolean runSharedWorkOptimization(ParseContext pctx, 
SharedWorkOptimizerCache optimizerCache, List<TableScanOperator> scans,
       Mode mode) throws SemanticException {
     boolean ret = false;
@@ -1111,9 +1140,9 @@ private int cmpDataSize(TableScanOperator o1, 
TableScanOperator o2) {
     }
   }
 
-  private static ArrayListMultimap<String, TableScanOperator> 
splitTableScanOpsByTable(
+  private static ListMultimap<String, TableScanOperator> 
splitTableScanOpsByTable(
           ParseContext pctx) {
-    ArrayListMultimap<String, TableScanOperator> tableNameToOps = 
ArrayListMultimap.create();
+    ListMultimap<String, TableScanOperator> tableNameToOps = 
ArrayListMultimap.create();
     // Sort by operator ID so we get deterministic results
     TSComparator comparator = new TSComparator();
     Queue<TableScanOperator> sortedTopOps = new PriorityQueue<>(comparator);
diff --git 
a/ql/src/test/queries/clientpositive/sharedwork_map_side_aggregations.q 
b/ql/src/test/queries/clientpositive/sharedwork_map_side_aggregations.q
new file mode 100644
index 00000000000..fb1057fdc87
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/sharedwork_map_side_aggregations.q
@@ -0,0 +1,24 @@
+--! qt:dataset:src
+
+set hive.optimize.shared.work.max.siblings=3;
+
+EXPLAIN
+SELECT count(*), SUM(t1.num), SUM(t2.num), SUM(t3.num), SUM(t4.num), 
SUM(t5.num), SUM(t6.num), SUM(t7.num)
+FROM (SELECT key, count(*) AS num FROM src WHERE key LIKE '%0%' GROUP BY key) 
t0
+LEFT OUTER JOIN (SELECT key, count(*) AS num FROM src WHERE key LIKE '%1%' 
GROUP BY key) t1 ON t0.key = t1.key
+LEFT OUTER JOIN (SELECT key, count(*) AS num FROM src WHERE key LIKE '%2%' 
GROUP BY key) t2 ON t0.key = t2.key
+LEFT OUTER JOIN (SELECT key, count(*) AS num FROM src WHERE key LIKE '%3%' 
GROUP BY key) t3 ON t0.key = t3.key
+LEFT OUTER JOIN (SELECT key, count(*) AS num FROM src WHERE key LIKE '%4%' 
GROUP BY key) t4 ON t0.key = t4.key
+LEFT OUTER JOIN (SELECT key, count(*) AS num FROM src WHERE key LIKE '%5%' 
GROUP BY key) t5 ON t0.key = t5.key
+LEFT OUTER JOIN (SELECT key, count(*) AS num FROM src WHERE key LIKE '%6%' 
GROUP BY key) t6 ON t0.key = t6.key
+LEFT OUTER JOIN (SELECT key, count(*) AS num FROM src WHERE key LIKE '%7%' 
GROUP BY key) t7 ON t0.key = t7.key;
+
+SELECT count(*), SUM(t1.num), SUM(t2.num), SUM(t3.num), SUM(t4.num), 
SUM(t5.num), SUM(t6.num), SUM(t7.num)
+FROM (SELECT key, count(*) AS num FROM src WHERE key LIKE '%0%' GROUP BY key) 
t0
+LEFT OUTER JOIN (SELECT key, count(*) AS num FROM src WHERE key LIKE '%1%' 
GROUP BY key) t1 ON t0.key = t1.key
+LEFT OUTER JOIN (SELECT key, count(*) AS num FROM src WHERE key LIKE '%2%' 
GROUP BY key) t2 ON t0.key = t2.key
+LEFT OUTER JOIN (SELECT key, count(*) AS num FROM src WHERE key LIKE '%3%' 
GROUP BY key) t3 ON t0.key = t3.key
+LEFT OUTER JOIN (SELECT key, count(*) AS num FROM src WHERE key LIKE '%4%' 
GROUP BY key) t4 ON t0.key = t4.key
+LEFT OUTER JOIN (SELECT key, count(*) AS num FROM src WHERE key LIKE '%5%' 
GROUP BY key) t5 ON t0.key = t5.key
+LEFT OUTER JOIN (SELECT key, count(*) AS num FROM src WHERE key LIKE '%6%' 
GROUP BY key) t6 ON t0.key = t6.key
+LEFT OUTER JOIN (SELECT key, count(*) AS num FROM src WHERE key LIKE '%7%' 
GROUP BY key) t7 ON t0.key = t7.key;
diff --git 
a/ql/src/test/results/clientpositive/llap/sharedwork_map_side_aggregations.q.out
 
b/ql/src/test/results/clientpositive/llap/sharedwork_map_side_aggregations.q.out
new file mode 100644
index 00000000000..bbe864529e2
--- /dev/null
+++ 
b/ql/src/test/results/clientpositive/llap/sharedwork_map_side_aggregations.q.out
@@ -0,0 +1,495 @@
+PREHOOK: query: EXPLAIN
+SELECT count(*), SUM(t1.num), SUM(t2.num), SUM(t3.num), SUM(t4.num), 
SUM(t5.num), SUM(t6.num), SUM(t7.num)
+FROM (SELECT key, count(*) AS num FROM src WHERE key LIKE '%0%' GROUP BY key) 
t0
+LEFT OUTER JOIN (SELECT key, count(*) AS num FROM src WHERE key LIKE '%1%' 
GROUP BY key) t1 ON t0.key = t1.key
+LEFT OUTER JOIN (SELECT key, count(*) AS num FROM src WHERE key LIKE '%2%' 
GROUP BY key) t2 ON t0.key = t2.key
+LEFT OUTER JOIN (SELECT key, count(*) AS num FROM src WHERE key LIKE '%3%' 
GROUP BY key) t3 ON t0.key = t3.key
+LEFT OUTER JOIN (SELECT key, count(*) AS num FROM src WHERE key LIKE '%4%' 
GROUP BY key) t4 ON t0.key = t4.key
+LEFT OUTER JOIN (SELECT key, count(*) AS num FROM src WHERE key LIKE '%5%' 
GROUP BY key) t5 ON t0.key = t5.key
+LEFT OUTER JOIN (SELECT key, count(*) AS num FROM src WHERE key LIKE '%6%' 
GROUP BY key) t6 ON t0.key = t6.key
+LEFT OUTER JOIN (SELECT key, count(*) AS num FROM src WHERE key LIKE '%7%' 
GROUP BY key) t7 ON t0.key = t7.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN
+SELECT count(*), SUM(t1.num), SUM(t2.num), SUM(t3.num), SUM(t4.num), 
SUM(t5.num), SUM(t6.num), SUM(t7.num)
+FROM (SELECT key, count(*) AS num FROM src WHERE key LIKE '%0%' GROUP BY key) 
t0
+LEFT OUTER JOIN (SELECT key, count(*) AS num FROM src WHERE key LIKE '%1%' 
GROUP BY key) t1 ON t0.key = t1.key
+LEFT OUTER JOIN (SELECT key, count(*) AS num FROM src WHERE key LIKE '%2%' 
GROUP BY key) t2 ON t0.key = t2.key
+LEFT OUTER JOIN (SELECT key, count(*) AS num FROM src WHERE key LIKE '%3%' 
GROUP BY key) t3 ON t0.key = t3.key
+LEFT OUTER JOIN (SELECT key, count(*) AS num FROM src WHERE key LIKE '%4%' 
GROUP BY key) t4 ON t0.key = t4.key
+LEFT OUTER JOIN (SELECT key, count(*) AS num FROM src WHERE key LIKE '%5%' 
GROUP BY key) t5 ON t0.key = t5.key
+LEFT OUTER JOIN (SELECT key, count(*) AS num FROM src WHERE key LIKE '%6%' 
GROUP BY key) t6 ON t0.key = t6.key
+LEFT OUTER JOIN (SELECT key, count(*) AS num FROM src WHERE key LIKE '%7%' 
GROUP BY key) t7 ON t0.key = t7.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 10 <- Map 1 (SIMPLE_EDGE)
+        Reducer 11 <- Map 1 (SIMPLE_EDGE)
+        Reducer 13 <- Map 12 (SIMPLE_EDGE)
+        Reducer 16 <- Map 15 (SIMPLE_EDGE)
+        Reducer 17 <- Map 15 (SIMPLE_EDGE)
+        Reducer 18 <- Map 15 (SIMPLE_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 12 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 11 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
+        Reducer 4 <- Reducer 10 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE)
+        Reducer 5 <- Reducer 17 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE)
+        Reducer 6 <- Reducer 18 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE)
+        Reducer 7 <- Reducer 13 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE)
+        Reducer 8 <- Reducer 16 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE)
+        Reducer 9 <- Reducer 8 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: src
+                  filterExpr: ((key like '%0%') or ((key like '%3%') and (key 
like '%0%') and key is not null) or ((key like '%2%') and (key like '%0%') and 
key is not null)) (type: boolean)
+                  Statistics: Num rows: 500 Data size: 43500 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: (key like '%0%') (type: boolean)
+                    Statistics: Num rows: 250 Data size: 21750 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      keys: key (type: string)
+                      minReductionHashAggr: 0.4
+                      mode: hash
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 250 Data size: 21750 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        null sort order: z
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 250 Data size: 21750 Basic 
stats: COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: ((key like '%3%') and (key like '%0%') and key 
is not null) (type: boolean)
+                    Statistics: Num rows: 125 Data size: 10875 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      aggregations: count()
+                      keys: key (type: string)
+                      minReductionHashAggr: 0.4
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 125 Data size: 11875 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        null sort order: z
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 125 Data size: 11875 Basic 
stats: COMPLETE Column stats: COMPLETE
+                        value expressions: _col1 (type: bigint)
+                  Filter Operator
+                    predicate: ((key like '%2%') and (key like '%0%') and key 
is not null) (type: boolean)
+                    Statistics: Num rows: 125 Data size: 10875 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      aggregations: count()
+                      keys: key (type: string)
+                      minReductionHashAggr: 0.4
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 125 Data size: 11875 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        null sort order: z
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 125 Data size: 11875 Basic 
stats: COMPLETE Column stats: COMPLETE
+                        value expressions: _col1 (type: bigint)
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+        Map 12 
+            Map Operator Tree:
+                TableScan
+                  alias: src
+                  filterExpr: (((key like '%6%') and (key like '%0%') and key 
is not null) or ((key like '%1%') and (key like '%0%') and key is not null)) 
(type: boolean)
+                  Statistics: Num rows: 500 Data size: 43500 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: ((key like '%6%') and (key like '%0%') and key 
is not null) (type: boolean)
+                    Statistics: Num rows: 125 Data size: 10875 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      aggregations: count()
+                      keys: key (type: string)
+                      minReductionHashAggr: 0.4
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 125 Data size: 11875 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        null sort order: z
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 125 Data size: 11875 Basic 
stats: COMPLETE Column stats: COMPLETE
+                        value expressions: _col1 (type: bigint)
+                  Filter Operator
+                    predicate: ((key like '%1%') and (key like '%0%') and key 
is not null) (type: boolean)
+                    Statistics: Num rows: 125 Data size: 10875 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      aggregations: count()
+                      keys: key (type: string)
+                      minReductionHashAggr: 0.4
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 125 Data size: 11875 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        null sort order: z
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 125 Data size: 11875 Basic 
stats: COMPLETE Column stats: COMPLETE
+                        value expressions: _col1 (type: bigint)
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+        Map 15 
+            Map Operator Tree:
+                TableScan
+                  alias: src
+                  filterExpr: (((key like '%7%') and (key like '%0%') and key 
is not null) or ((key like '%4%') and (key like '%0%') and key is not null) or 
((key like '%5%') and (key like '%0%') and key is not null)) (type: boolean)
+                  Statistics: Num rows: 500 Data size: 43500 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: ((key like '%7%') and (key like '%0%') and key 
is not null) (type: boolean)
+                    Statistics: Num rows: 125 Data size: 10875 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      aggregations: count()
+                      keys: key (type: string)
+                      minReductionHashAggr: 0.4
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 125 Data size: 11875 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        null sort order: z
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 125 Data size: 11875 Basic 
stats: COMPLETE Column stats: COMPLETE
+                        value expressions: _col1 (type: bigint)
+                  Filter Operator
+                    predicate: ((key like '%4%') and (key like '%0%') and key 
is not null) (type: boolean)
+                    Statistics: Num rows: 125 Data size: 10875 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      aggregations: count()
+                      keys: key (type: string)
+                      minReductionHashAggr: 0.4
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 125 Data size: 11875 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        null sort order: z
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 125 Data size: 11875 Basic 
stats: COMPLETE Column stats: COMPLETE
+                        value expressions: _col1 (type: bigint)
+                  Filter Operator
+                    predicate: ((key like '%5%') and (key like '%0%') and key 
is not null) (type: boolean)
+                    Statistics: Num rows: 125 Data size: 10875 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      aggregations: count()
+                      keys: key (type: string)
+                      minReductionHashAggr: 0.4
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 125 Data size: 11875 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        null sort order: z
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 125 Data size: 11875 Basic 
stats: COMPLETE Column stats: COMPLETE
+                        value expressions: _col1 (type: bigint)
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+        Reducer 10 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 125 Data size: 11875 Basic stats: 
COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  null sort order: z
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 125 Data size: 11875 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  value expressions: _col1 (type: bigint)
+        Reducer 11 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 125 Data size: 11875 Basic stats: 
COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  null sort order: z
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 125 Data size: 11875 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  value expressions: _col1 (type: bigint)
+        Reducer 13 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 125 Data size: 11875 Basic stats: 
COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  null sort order: z
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 125 Data size: 11875 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  value expressions: _col1 (type: bigint)
+        Reducer 16 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 125 Data size: 11875 Basic stats: 
COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  null sort order: z
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 125 Data size: 11875 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  value expressions: _col1 (type: bigint)
+        Reducer 17 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 125 Data size: 11875 Basic stats: 
COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  null sort order: z
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 125 Data size: 11875 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  value expressions: _col1 (type: bigint)
+        Reducer 18 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 125 Data size: 11875 Basic stats: 
COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  null sort order: z
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 125 Data size: 11875 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  value expressions: _col1 (type: bigint)
+        Reducer 2 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 125 Data size: 11875 Basic stats: 
COMPLETE Column stats: COMPLETE
+                Dummy Store
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 250 Data size: 21750 Basic stats: 
COMPLETE Column stats: COMPLETE
+                Merge Join Operator
+                  condition map:
+                       Left Outer Join 0 to 1
+                  keys:
+                    0 _col0 (type: string)
+                    1 _col0 (type: string)
+                  outputColumnNames: _col0, _col2
+                  Statistics: Num rows: 375 Data size: 33633 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: string)
+                    null sort order: z
+                    sort order: +
+                    Map-reduce partition columns: _col0 (type: string)
+                    Statistics: Num rows: 375 Data size: 33633 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    value expressions: _col2 (type: bigint)
+        Reducer 3 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Left Outer Join 0 to 1
+                keys:
+                  0 _col0 (type: string)
+                  1 _col0 (type: string)
+                outputColumnNames: _col0, _col2, _col4
+                Statistics: Num rows: 375 Data size: 36633 Basic stats: 
COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  null sort order: z
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 375 Data size: 36633 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  value expressions: _col2 (type: bigint), _col4 (type: bigint)
+        Reducer 4 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Left Outer Join 0 to 1
+                keys:
+                  0 _col0 (type: string)
+                  1 _col0 (type: string)
+                outputColumnNames: _col0, _col2, _col4, _col6
+                Statistics: Num rows: 375 Data size: 39633 Basic stats: 
COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  null sort order: z
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 375 Data size: 39633 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  value expressions: _col2 (type: bigint), _col4 (type: 
bigint), _col6 (type: bigint)
+        Reducer 5 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Left Outer Join 0 to 1
+                keys:
+                  0 _col0 (type: string)
+                  1 _col0 (type: string)
+                outputColumnNames: _col0, _col2, _col4, _col6, _col8
+                Statistics: Num rows: 375 Data size: 42633 Basic stats: 
COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  null sort order: z
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 375 Data size: 42633 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  value expressions: _col2 (type: bigint), _col4 (type: 
bigint), _col6 (type: bigint), _col8 (type: bigint)
+        Reducer 6 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Left Outer Join 0 to 1
+                keys:
+                  0 _col0 (type: string)
+                  1 _col0 (type: string)
+                outputColumnNames: _col0, _col2, _col4, _col6, _col8, _col10
+                Statistics: Num rows: 375 Data size: 45633 Basic stats: 
COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  null sort order: z
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 375 Data size: 45633 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  value expressions: _col2 (type: bigint), _col4 (type: 
bigint), _col6 (type: bigint), _col8 (type: bigint), _col10 (type: bigint)
+        Reducer 7 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Left Outer Join 0 to 1
+                keys:
+                  0 _col0 (type: string)
+                  1 _col0 (type: string)
+                outputColumnNames: _col0, _col2, _col4, _col6, _col8, _col10, 
_col12
+                Statistics: Num rows: 375 Data size: 48633 Basic stats: 
COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  null sort order: z
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 375 Data size: 48633 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  value expressions: _col2 (type: bigint), _col4 (type: 
bigint), _col6 (type: bigint), _col8 (type: bigint), _col10 (type: bigint), 
_col12 (type: bigint)
+        Reducer 8 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Left Outer Join 0 to 1
+                keys:
+                  0 _col0 (type: string)
+                  1 _col0 (type: string)
+                outputColumnNames: _col2, _col4, _col6, _col8, _col10, _col12, 
_col14
+                Statistics: Num rows: 375 Data size: 19008 Basic stats: 
COMPLETE Column stats: COMPLETE
+                Group By Operator
+                  aggregations: count(), sum(_col2), sum(_col4), sum(_col6), 
sum(_col8), sum(_col10), sum(_col12), sum(_col14)
+                  minReductionHashAggr: 0.99
+                  mode: hash
+                  outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7
+                  Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Reduce Output Operator
+                    null sort order: 
+                    sort order: 
+                    Statistics: Num rows: 1 Data size: 64 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    value expressions: _col0 (type: bigint), _col1 (type: 
bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: bigint), 
_col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint)
+        Reducer 9 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0), sum(VALUE._col1), 
sum(VALUE._col2), sum(VALUE._col3), sum(VALUE._col4), sum(VALUE._col5), 
sum(VALUE._col6), sum(VALUE._col7)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7
+                Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE 
Column stats: COMPLETE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT count(*), SUM(t1.num), SUM(t2.num), SUM(t3.num), 
SUM(t4.num), SUM(t5.num), SUM(t6.num), SUM(t7.num)
+FROM (SELECT key, count(*) AS num FROM src WHERE key LIKE '%0%' GROUP BY key) 
t0
+LEFT OUTER JOIN (SELECT key, count(*) AS num FROM src WHERE key LIKE '%1%' 
GROUP BY key) t1 ON t0.key = t1.key
+LEFT OUTER JOIN (SELECT key, count(*) AS num FROM src WHERE key LIKE '%2%' 
GROUP BY key) t2 ON t0.key = t2.key
+LEFT OUTER JOIN (SELECT key, count(*) AS num FROM src WHERE key LIKE '%3%' 
GROUP BY key) t3 ON t0.key = t3.key
+LEFT OUTER JOIN (SELECT key, count(*) AS num FROM src WHERE key LIKE '%4%' 
GROUP BY key) t4 ON t0.key = t4.key
+LEFT OUTER JOIN (SELECT key, count(*) AS num FROM src WHERE key LIKE '%5%' 
GROUP BY key) t5 ON t0.key = t5.key
+LEFT OUTER JOIN (SELECT key, count(*) AS num FROM src WHERE key LIKE '%6%' 
GROUP BY key) t6 ON t0.key = t6.key
+LEFT OUTER JOIN (SELECT key, count(*) AS num FROM src WHERE key LIKE '%7%' 
GROUP BY key) t7 ON t0.key = t7.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT count(*), SUM(t1.num), SUM(t2.num), SUM(t3.num), 
SUM(t4.num), SUM(t5.num), SUM(t6.num), SUM(t7.num)
+FROM (SELECT key, count(*) AS num FROM src WHERE key LIKE '%0%' GROUP BY key) 
t0
+LEFT OUTER JOIN (SELECT key, count(*) AS num FROM src WHERE key LIKE '%1%' 
GROUP BY key) t1 ON t0.key = t1.key
+LEFT OUTER JOIN (SELECT key, count(*) AS num FROM src WHERE key LIKE '%2%' 
GROUP BY key) t2 ON t0.key = t2.key
+LEFT OUTER JOIN (SELECT key, count(*) AS num FROM src WHERE key LIKE '%3%' 
GROUP BY key) t3 ON t0.key = t3.key
+LEFT OUTER JOIN (SELECT key, count(*) AS num FROM src WHERE key LIKE '%4%' 
GROUP BY key) t4 ON t0.key = t4.key
+LEFT OUTER JOIN (SELECT key, count(*) AS num FROM src WHERE key LIKE '%5%' 
GROUP BY key) t5 ON t0.key = t5.key
+LEFT OUTER JOIN (SELECT key, count(*) AS num FROM src WHERE key LIKE '%6%' 
GROUP BY key) t6 ON t0.key = t6.key
+LEFT OUTER JOIN (SELECT key, count(*) AS num FROM src WHERE key LIKE '%7%' 
GROUP BY key) t7 ON t0.key = t7.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+49     22      28      26      31      5       9       10

(hive) branch master updated: HIVE-28549: Limit the maximum number of operators merged by SharedWorkOptimizer (Shohei Okumiya, reviewed by Denys Kuzmenko, Seonggon Namgung)

Reply via email to