This is an automated email from the ASF dual-hosted git repository.

krisztiankasa pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 4d6d0ed1409 HIVE-27788: Exception when join has 2 Group By operators 
in the same branch in the same reducer (Krisztian Kasa, reviewed by Stamatis 
Zampetakis)
4d6d0ed1409 is described below

commit 4d6d0ed140901fbcd1b3a37699b838e2ba6be689
Author: Krisztian Kasa <[email protected]>
AuthorDate: Wed Nov 22 05:03:41 2023 +0100

    HIVE-27788: Exception when join has 2 Group By operators in the same branch 
in the same reducer (Krisztian Kasa, reviewed by Stamatis Zampetakis)
---
 .../apache/hadoop/hive/ql/exec/OperatorUtils.java  |  41 +++++
 .../hive/ql/optimizer/ConvertJoinMapJoin.java      |  17 ++
 .../hadoop/hive/ql/exec/TestOperatorUtils.java     | 168 ++++++++++++++++++
 .../clientpositive/auto_sortmerge_join_17.q        |  20 +++
 .../llap/auto_sortmerge_join_17.q.out              | 195 +++++++++++++++++++++
 .../clientpositive/llap/subquery_in_having.q.out   | 182 ++++++++++---------
 6 files changed, 541 insertions(+), 82 deletions(-)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java
index 3924164166b..eb10e9aa6cd 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java
@@ -176,6 +176,47 @@ public class OperatorUtils {
     return found;
   }
 
+  /**
+   * Check whether there are more operators in the specified operator tree 
branch than the given limit
+   * until a ReduceSinkOperator is reached.
+   * The method traverses the parent operators of the specified root operator 
in dept first manner.
+   * @param start root of the operator tree to check
+   * @param opClazz type of operator to track
+   * @param limit maximum allowed number of operator in a branch of the tree
+   * @return true if limit is exceeded false otherwise
+   */
+  public static boolean hasMoreOperatorsThan(
+      Operator<?> start, Class<?> opClazz, int limit) {
+    return hasMoreOperatorsThan(start, opClazz, limit, new HashSet<>());
+  }
+
+  private static boolean hasMoreOperatorsThan(
+      Operator<?> start, Class<?> opClazz, int limit, Set<Operator<?>> 
visited) {
+    if (!visited.add(start)) {
+      return limit < 0;
+    }
+
+    if (limit < 0) {
+      return false;
+    }
+
+    if (start instanceof ReduceSinkOperator) {
+      return false;
+    }
+
+    if (opClazz.isInstance(start)) {
+      limit--;
+    }
+
+    if (start.getParentOperators() != null) {
+      for (Operator<?> parent : start.getParentOperators()) {
+        if (hasMoreOperatorsThan(parent, opClazz, limit, visited)) {
+          return true;
+        }
+      }
+    }
+    return limit < 0;
+  }
 
   public static void setChildrenCollector(List<Operator<? extends 
OperatorDesc>> childOperators, OutputCollector out) {
     if (childOperators == null) {
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java
index 57299f95699..98a80ab603e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java
@@ -83,6 +83,8 @@ import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
 import com.google.common.math.DoubleMath;
 
+import static 
org.apache.hadoop.hive.ql.exec.OperatorUtils.hasMoreOperatorsThan;
+
 /**
  * ConvertJoinMapJoin is an optimization that replaces a common join
  * (aka shuffle join) with a map join (aka broadcast or fragment replicate
@@ -755,6 +757,21 @@ public class ConvertJoinMapJoin implements 
SemanticNodeProcessor {
         LOG.debug("External table {} found in join and also could not provide 
statistics - disabling SMB join.", sb);
         return false;
       }
+      // GBY operators buffers one record. These are processed when 
ReduceRecordSources flushes the operator tree
+      // when end of record stream reached. If the tree has more than two GBY 
operators CommonMergeJoinOperator can
+      // not process all buffered records.
+      // HIVE-27788
+      if (parentOp.getParentOperators() != null) {
+        // Parent operator is RS and hasMoreOperatorsThan traverses until the 
next RS, so we start from grandparent
+        for (Operator<?> grandParent : parentOp.getParentOperators()) {
+          if (hasMoreOperatorsThan(grandParent, GroupByOperator.class, 1)) {
+            LOG.info("We cannot convert to SMB join " +
+                "because one of the join branches has more than one Group by 
operators in the same reducer.");
+            return false;
+          }
+        }
+      }
+
       // each side better have 0 or more RS. if either side is unbalanced, 
cannot convert.
       // This is a workaround for now. Right fix would be to refactor code in 
the
       // MapRecordProcessor and ReduceRecordProcessor with respect to the 
sources.
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperatorUtils.java 
b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperatorUtils.java
new file mode 100644
index 00000000000..6eec95a23ad
--- /dev/null
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperatorUtils.java
@@ -0,0 +1,168 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec;
+
+import org.apache.hadoop.hive.ql.CompilationOpContext;
+import org.apache.hadoop.hive.ql.plan.CommonMergeJoinDesc;
+import org.apache.hadoop.hive.ql.plan.FilterDesc;
+import org.apache.hadoop.hive.ql.plan.GroupByDesc;
+import org.apache.hadoop.hive.ql.plan.LimitDesc;
+import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc;
+import org.apache.hadoop.hive.ql.plan.SelectDesc;
+import org.apache.hadoop.hive.ql.plan.TableScanDesc;
+import org.junit.jupiter.api.Test;
+
+import static java.util.Arrays.asList;
+import static java.util.Collections.singletonList;
+import static org.junit.jupiter.api.Assertions.*;
+
+class TestOperatorUtils {
+  @Test
+  void testHasMoreGBYsReturnsFalseWhenLimitIs0() {
+    // RS-SEL-LIM-FIL
+    CompilationOpContext context = new CompilationOpContext();
+    Operator<?> filter = OperatorFactory.get(context, FilterDesc.class);
+    Operator<?> limit = OperatorFactory.get(context, LimitDesc.class);
+    filter.setParentOperators(singletonList(limit));
+    Operator<?> select = OperatorFactory.get(context, SelectDesc.class);
+    limit.setParentOperators(singletonList(select));
+    Operator<?> rs = OperatorFactory.get(context, ReduceSinkDesc.class);
+    select.setParentOperators(singletonList(rs));
+
+    assertFalse(OperatorUtils.hasMoreOperatorsThan(filter, 
GroupByOperator.class, 0));
+  }
+
+  @Test
+  void testHasMoreGBYsReturnsFalseWhenNoGBYInBranchAndLimitIsMoreThan0() {
+    // RS-SEL-LIM-FIL
+    CompilationOpContext context = new CompilationOpContext();
+    Operator<?> filter = OperatorFactory.get(context, FilterDesc.class);
+    Operator<?> limit = OperatorFactory.get(context, LimitDesc.class);
+    filter.setParentOperators(singletonList(limit));
+    Operator<?> select = OperatorFactory.get(context, SelectDesc.class);
+    limit.setParentOperators(singletonList(select));
+    Operator<?> rs = OperatorFactory.get(context, ReduceSinkDesc.class);
+    select.setParentOperators(singletonList(rs));
+
+    assertFalse(OperatorUtils.hasMoreOperatorsThan(filter, 
GroupByOperator.class, 1));
+  }
+
+  @Test
+  void testHasMoreGBYsReturnsFalseWhenNumberOfGBYIsLessThanLimit() {
+    // RS-GBY-SEL-LIM-FIL
+    CompilationOpContext context = new CompilationOpContext();
+    Operator<?> filter = OperatorFactory.get(context, FilterDesc.class);
+    Operator<?> limit = OperatorFactory.get(context, LimitDesc.class);
+    filter.setParentOperators(singletonList(limit));
+    Operator<?> select = OperatorFactory.get(context, SelectDesc.class);
+    limit.setParentOperators(singletonList(select));
+    Operator<?> gby = OperatorFactory.get(context, GroupByDesc.class);
+    select.setParentOperators(singletonList(gby));
+    Operator<?> rs = OperatorFactory.get(context, ReduceSinkDesc.class);
+    gby.setParentOperators(singletonList(rs));
+
+    assertFalse(OperatorUtils.hasMoreOperatorsThan(filter, 
GroupByOperator.class, 1));
+  }
+
+  @Test
+  void testHasMoreGBYsReturnsTrueWhenNumberOfGBYIsEqualsWithLimit() {
+    // RS-GBY-FIL-SEL-GBY
+    CompilationOpContext context = new CompilationOpContext();
+    Operator<?> gby1 = OperatorFactory.get(context, GroupByDesc.class);
+    Operator<?> select = OperatorFactory.get(context, SelectDesc.class);
+    gby1.setParentOperators(singletonList(select));
+    Operator<?> filter = OperatorFactory.get(context, FilterDesc.class);
+    select.setParentOperators(singletonList(filter));
+    Operator<?> gby2 = OperatorFactory.get(context, GroupByDesc.class);
+    filter.setParentOperators(singletonList(gby2));
+    Operator<?> rs = OperatorFactory.get(context, ReduceSinkDesc.class);
+    gby2.setParentOperators(singletonList(rs));
+
+    assertTrue(OperatorUtils.hasMoreOperatorsThan(gby1, GroupByOperator.class, 
1));
+  }
+
+  @Test
+  void 
testHasMoreGBYsReturnsFalseWhenNumberOfGBYIsEqualsWithLimitButHasAnRSInTheMiddle()
 {
+    // TS-GBY-RS-SEL-GBY
+    CompilationOpContext context = new CompilationOpContext();
+    Operator<?> gby1 = OperatorFactory.get(context, GroupByDesc.class);
+    Operator<?> select = OperatorFactory.get(context, SelectDesc.class);
+    gby1.setParentOperators(singletonList(select));
+    Operator<?> rs = OperatorFactory.get(context, ReduceSinkDesc.class);
+    select.setParentOperators(singletonList(rs));
+    Operator<?> gby2 = OperatorFactory.get(context, GroupByDesc.class);
+    rs.setParentOperators(singletonList(gby2));
+    Operator<?> ts = OperatorFactory.get(context, TableScanDesc.class);
+    gby2.setParentOperators(singletonList(ts));
+
+    assertFalse(OperatorUtils.hasMoreOperatorsThan(gby1, 
GroupByOperator.class, 1));
+  }
+
+  @Test
+  void 
testHasMoreGBYsReturnsTrueWhenBranchHasJoinAndNumberOfGBYIsEqualsWithLimit() {
+    // RS-GBY-FIL--JOIN-GBY
+    //     RS-SEL-/
+    CompilationOpContext context = new CompilationOpContext();
+    Operator<?> gby1 = OperatorFactory.get(context, GroupByDesc.class);
+    Operator<?> join = OperatorFactory.get(context, CommonMergeJoinDesc.class);
+    gby1.setParentOperators(singletonList(join));
+
+    // Join branch #1 has the second GBY
+    Operator<?> filter = OperatorFactory.get(context, FilterDesc.class);
+    Operator<?> gby2 = OperatorFactory.get(context, GroupByDesc.class);
+    filter.setParentOperators(singletonList(gby2));
+    Operator<?> rs = OperatorFactory.get(context, ReduceSinkDesc.class);
+    gby2.setParentOperators(singletonList(rs));
+
+    // Join branch #2
+    Operator<?> select = OperatorFactory.get(context, SelectDesc.class);
+    Operator<?> rs2 = OperatorFactory.get(context, ReduceSinkDesc.class);
+    select.setParentOperators(singletonList(rs2));
+
+    join.setParentOperators(asList(filter, select));
+
+    assertTrue(OperatorUtils.hasMoreOperatorsThan(gby1, GroupByOperator.class, 
1));
+  }
+
+  @Test
+  void 
testHasMoreGBYsReturnsFalseWhenBranchHasJoinAndBothJoinBranchesHasLessGBYThanLimit()
 {
+    // RS-GBY-SEL--JOIN
+    // RS-GBY-FIL-/
+    CompilationOpContext context = new CompilationOpContext();
+    Operator<?> join = OperatorFactory.get(context, CommonMergeJoinDesc.class);
+
+    // Join branch #1
+    Operator<?> filter = OperatorFactory.get(context, FilterDesc.class);
+    Operator<?> gby1 = OperatorFactory.get(context, GroupByDesc.class);
+    filter.setParentOperators(singletonList(gby1));
+    Operator<?> rs = OperatorFactory.get(context, ReduceSinkDesc.class);
+    gby1.setParentOperators(singletonList(rs));
+
+    // Join branch #2
+    Operator<?> select = OperatorFactory.get(context, SelectDesc.class);
+    Operator<?> gby2 = OperatorFactory.get(context, GroupByDesc.class);
+    select.setParentOperators(singletonList(gby2));
+    Operator<?> rs2 = OperatorFactory.get(context, ReduceSinkDesc.class);
+    gby2.setParentOperators(singletonList(rs2));
+
+    join.setParentOperators(asList(filter, select));
+
+    assertFalse(OperatorUtils.hasMoreOperatorsThan(join, 
GroupByOperator.class, 1));
+  }
+}
\ No newline at end of file
diff --git a/ql/src/test/queries/clientpositive/auto_sortmerge_join_17.q 
b/ql/src/test/queries/clientpositive/auto_sortmerge_join_17.q
new file mode 100644
index 00000000000..0d6a5deadb0
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/auto_sortmerge_join_17.q
@@ -0,0 +1,20 @@
+CREATE TABLE tbl1_n5(key int, value string) CLUSTERED BY (key) SORTED BY (key) 
INTO 2 BUCKETS;
+
+insert into tbl1_n5(key, value)
+values
+(0, 'val_0'),
+(2, 'val_2'),
+(9, 'val_9');
+
+explain
+SELECT t1.key from
+(SELECT  key , row_number() over(partition by key order by value desc) as rk 
from tbl1_n5) t1
+join
+( SELECT key,count(distinct value) as cp_count from tbl1_n5 group by key) t2
+on t1.key = t2.key where rk = 1;
+
+SELECT t1.key from
+(SELECT  key , row_number() over(partition by key order by value desc) as rk 
from tbl1_n5) t1
+join
+( SELECT key,count(distinct value) as cp_count from tbl1_n5 group by key) t2
+on t1.key = t2.key where rk = 1;
\ No newline at end of file
diff --git 
a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_17.q.out 
b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_17.q.out
new file mode 100644
index 00000000000..80527eb6240
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_17.q.out
@@ -0,0 +1,195 @@
+PREHOOK: query: CREATE TABLE tbl1_n5(key int, value string) CLUSTERED BY (key) 
SORTED BY (key) INTO 2 BUCKETS
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tbl1_n5
+POSTHOOK: query: CREATE TABLE tbl1_n5(key int, value string) CLUSTERED BY 
(key) SORTED BY (key) INTO 2 BUCKETS
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tbl1_n5
+PREHOOK: query: insert into tbl1_n5(key, value)
+values
+(0, 'val_0'),
+(2, 'val_2'),
+(9, 'val_9')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@tbl1_n5
+POSTHOOK: query: insert into tbl1_n5(key, value)
+values
+(0, 'val_0'),
+(2, 'val_2'),
+(9, 'val_9')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@tbl1_n5
+POSTHOOK: Lineage: tbl1_n5.key SCRIPT []
+POSTHOOK: Lineage: tbl1_n5.value SCRIPT []
+PREHOOK: query: explain
+SELECT t1.key from
+(SELECT  key , row_number() over(partition by key order by value desc) as rk 
from tbl1_n5) t1
+join
+( SELECT key,count(distinct value) as cp_count from tbl1_n5 group by key) t2
+on t1.key = t2.key where rk = 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl1_n5
+#### A masked pattern was here ####
+POSTHOOK: query: explain
+SELECT t1.key from
+(SELECT  key , row_number() over(partition by key order by value desc) as rk 
from tbl1_n5) t1
+join
+( SELECT key,count(distinct value) as cp_count from tbl1_n5 group by key) t2
+on t1.key = t2.key where rk = 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl1_n5
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE)
+        Reducer 4 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: tbl1_n5
+                  filterExpr: key is not null (type: boolean)
+                  Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 3 Data size: 279 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: key (type: int), value (type: string)
+                      null sort order: aa
+                      sort order: +-
+                      Map-reduce partition columns: key (type: int)
+                      Statistics: Num rows: 3 Data size: 279 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      keys: key (type: int), value (type: string)
+                      minReductionHashAggr: 0.4
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 3 Data size: 279 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int), _col1 (type: 
string)
+                        null sort order: zz
+                        sort order: ++
+                        Map-reduce partition columns: _col0 (type: int), _col1 
(type: string)
+                        Statistics: Num rows: 3 Data size: 279 Basic stats: 
COMPLETE Column stats: COMPLETE
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Select Operator
+                expressions: KEY.reducesinkkey0 (type: int), 
KEY.reducesinkkey1 (type: string)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE 
Column stats: COMPLETE
+                PTF Operator
+                  Function definitions:
+                      Input definition
+                        input alias: ptf_0
+                        output shape: _col0: int, _col1: string
+                        type: WINDOWING
+                      Windowing table definition
+                        input alias: ptf_1
+                        name: windowingtablefunction
+                        order by: _col1 DESC NULLS FIRST
+                        partition by: _col0
+                        raw input shape:
+                        window functions:
+                            window function definition
+                              alias: row_number_window_0
+                              name: row_number
+                              window function: GenericUDAFRowNumberEvaluator
+                              window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+                              isPivotResult: true
+                  Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Filter Operator
+                    predicate: (row_number_window_0 = 1) (type: boolean)
+                    Statistics: Num rows: 1 Data size: 93 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: _col0 (type: int)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 1 Data size: 4 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        null sort order: z
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: Num rows: 1 Data size: 4 Basic stats: 
COMPLETE Column stats: COMPLETE
+        Reducer 3 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Left Semi Join 0 to 1
+                keys:
+                  0 _col0 (type: int)
+                  1 _col0 (type: int)
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: COMPLETE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+        Reducer 4 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: int), KEY._col1 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Select Operator
+                  expressions: _col0 (type: int)
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Group By Operator
+                    keys: _col0 (type: int)
+                    minReductionHashAggr: 0.4
+                    mode: hash
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 3 Data size: 12 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: int)
+                      null sort order: z
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: int)
+                      Statistics: Num rows: 3 Data size: 12 Basic stats: 
COMPLETE Column stats: COMPLETE
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT t1.key from
+(SELECT  key , row_number() over(partition by key order by value desc) as rk 
from tbl1_n5) t1
+join
+( SELECT key,count(distinct value) as cp_count from tbl1_n5 group by key) t2
+on t1.key = t2.key where rk = 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl1_n5
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT t1.key from
+(SELECT  key , row_number() over(partition by key order by value desc) as rk 
from tbl1_n5) t1
+join
+( SELECT key,count(distinct value) as cp_count from tbl1_n5 group by key) t2
+on t1.key = t2.key where rk = 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl1_n5
+#### A masked pattern was here ####
+0
+2
+9
diff --git a/ql/src/test/results/clientpositive/llap/subquery_in_having.q.out 
b/ql/src/test/results/clientpositive/llap/subquery_in_having.q.out
index cd5989f5cd9..bf88614de22 100644
--- a/ql/src/test/results/clientpositive/llap/subquery_in_having.q.out
+++ b/ql/src/test/results/clientpositive/llap/subquery_in_having.q.out
@@ -282,7 +282,9 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE)
+        Reducer 4 <- Map 1 (SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -308,17 +310,6 @@ STAGE PLANS:
                         Map-reduce partition columns: _col0 (type: string)
                         Statistics: Num rows: 1 Data size: 188 Basic stats: 
COMPLETE Column stats: NONE
                         value expressions: _col1 (type: bigint), _col2 (type: 
bigint)
-            Execution mode: vectorized, llap
-            LLAP IO: all inputs
-        Map 3 
-            Map Operator Tree:
-                TableScan
-                  alias: part_subq
-                  filterExpr: p_mfgr is not null (type: boolean)
-                  Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE 
Column stats: NONE
-                  Filter Operator
-                    predicate: p_mfgr is not null (type: boolean)
-                    Statistics: Num rows: 1 Data size: 188 Basic stats: 
COMPLETE Column stats: NONE
                     Group By Operator
                       aggregations: max(p_size), min(p_size)
                       keys: p_mfgr (type: string)
@@ -336,6 +327,45 @@ STAGE PLANS:
             Execution mode: vectorized, llap
             LLAP IO: all inputs
         Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: sum(VALUE._col0), count(VALUE._col1)
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE 
Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: string), (UDFToDouble(_col1) / 
_col2) (type: double)
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE 
Column stats: NONE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: string)
+                    null sort order: z
+                    sort order: +
+                    Map-reduce partition columns: _col0 (type: string)
+                    Statistics: Num rows: 1 Data size: 188 Basic stats: 
COMPLETE Column stats: NONE
+                    value expressions: _col1 (type: double)
+        Reducer 3 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Left Semi Join 0 to 1
+                keys:
+                  0 _col0 (type: string)
+                  1 _col0 (type: string)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE 
Column stats: NONE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+        Reducer 4 
+            Execution mode: vectorized, llap
             Reduce Operator Tree:
               Group By Operator
                 aggregations: max(VALUE._col0), min(VALUE._col1)
@@ -352,37 +382,16 @@ STAGE PLANS:
                     Statistics: Num rows: 1 Data size: 188 Basic stats: 
COMPLETE Column stats: NONE
                     Group By Operator
                       keys: _col0 (type: string)
-                      mode: final
+                      minReductionHashAggr: 0.99
+                      mode: hash
                       outputColumnNames: _col0
                       Statistics: Num rows: 1 Data size: 188 Basic stats: 
COMPLETE Column stats: NONE
-                      Dummy Store
-            Execution mode: llap
-            Reduce Operator Tree:
-              Group By Operator
-                aggregations: sum(VALUE._col0), count(VALUE._col1)
-                keys: KEY._col0 (type: string)
-                mode: mergepartial
-                outputColumnNames: _col0, _col1, _col2
-                Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE 
Column stats: NONE
-                Select Operator
-                  expressions: _col0 (type: string), (UDFToDouble(_col1) / 
_col2) (type: double)
-                  outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE 
Column stats: NONE
-                  Merge Join Operator
-                    condition map:
-                         Left Semi Join 0 to 1
-                    keys:
-                      0 _col0 (type: string)
-                      1 _col0 (type: string)
-                    outputColumnNames: _col0, _col1
-                    Statistics: Num rows: 1 Data size: 206 Basic stats: 
COMPLETE Column stats: NONE
-                    File Output Operator
-                      compressed: false
-                      Statistics: Num rows: 1 Data size: 206 Basic stats: 
COMPLETE Column stats: NONE
-                      table:
-                          input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-                          output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                          serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        null sort order: z
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 1 Data size: 188 Basic stats: 
COMPLETE Column stats: NONE
 
   Stage: Stage-0
     Fetch Operator
@@ -425,7 +434,9 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE)
+        Reducer 4 <- Map 1 (SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -451,17 +462,6 @@ STAGE PLANS:
                         Map-reduce partition columns: _col0 (type: string)
                         Statistics: Num rows: 1 Data size: 188 Basic stats: 
COMPLETE Column stats: NONE
                         value expressions: _col1 (type: bigint), _col2 (type: 
bigint)
-            Execution mode: vectorized, llap
-            LLAP IO: all inputs
-        Map 3 
-            Map Operator Tree:
-                TableScan
-                  alias: part_subq
-                  filterExpr: p_mfgr is not null (type: boolean)
-                  Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE 
Column stats: NONE
-                  Filter Operator
-                    predicate: p_mfgr is not null (type: boolean)
-                    Statistics: Num rows: 1 Data size: 188 Basic stats: 
COMPLETE Column stats: NONE
                     Group By Operator
                       aggregations: max(p_size), min(p_size)
                       keys: p_mfgr (type: string)
@@ -479,6 +479,45 @@ STAGE PLANS:
             Execution mode: vectorized, llap
             LLAP IO: all inputs
         Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: sum(VALUE._col0), count(VALUE._col1)
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE 
Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: string), (UDFToDouble(_col1) / 
_col2) (type: double)
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE 
Column stats: NONE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: string)
+                    null sort order: z
+                    sort order: +
+                    Map-reduce partition columns: _col0 (type: string)
+                    Statistics: Num rows: 1 Data size: 188 Basic stats: 
COMPLETE Column stats: NONE
+                    value expressions: _col1 (type: double)
+        Reducer 3 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Left Semi Join 0 to 1
+                keys:
+                  0 _col0 (type: string)
+                  1 _col0 (type: string)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE 
Column stats: NONE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+        Reducer 4 
+            Execution mode: vectorized, llap
             Reduce Operator Tree:
               Group By Operator
                 aggregations: max(VALUE._col0), min(VALUE._col1)
@@ -495,37 +534,16 @@ STAGE PLANS:
                     Statistics: Num rows: 1 Data size: 188 Basic stats: 
COMPLETE Column stats: NONE
                     Group By Operator
                       keys: _col0 (type: string)
-                      mode: final
+                      minReductionHashAggr: 0.99
+                      mode: hash
                       outputColumnNames: _col0
                       Statistics: Num rows: 1 Data size: 188 Basic stats: 
COMPLETE Column stats: NONE
-                      Dummy Store
-            Execution mode: llap
-            Reduce Operator Tree:
-              Group By Operator
-                aggregations: sum(VALUE._col0), count(VALUE._col1)
-                keys: KEY._col0 (type: string)
-                mode: mergepartial
-                outputColumnNames: _col0, _col1, _col2
-                Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE 
Column stats: NONE
-                Select Operator
-                  expressions: _col0 (type: string), (UDFToDouble(_col1) / 
_col2) (type: double)
-                  outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE 
Column stats: NONE
-                  Merge Join Operator
-                    condition map:
-                         Left Semi Join 0 to 1
-                    keys:
-                      0 _col0 (type: string)
-                      1 _col0 (type: string)
-                    outputColumnNames: _col0, _col1
-                    Statistics: Num rows: 1 Data size: 206 Basic stats: 
COMPLETE Column stats: NONE
-                    File Output Operator
-                      compressed: false
-                      Statistics: Num rows: 1 Data size: 206 Basic stats: 
COMPLETE Column stats: NONE
-                      table:
-                          input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-                          output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                          serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        null sort order: z
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 1 Data size: 188 Basic stats: 
COMPLETE Column stats: NONE
 
   Stage: Stage-0
     Fetch Operator


Reply via email to