This is an automated email from the ASF dual-hosted git repository.
krisztiankasa pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 4d6d0ed1409 HIVE-27788: Exception when join has 2 Group By operators
in the same branch in the same reducer (Krisztian Kasa, reviewed by Stamatis
Zampetakis)
4d6d0ed1409 is described below
commit 4d6d0ed140901fbcd1b3a37699b838e2ba6be689
Author: Krisztian Kasa <[email protected]>
AuthorDate: Wed Nov 22 05:03:41 2023 +0100
HIVE-27788: Exception when join has 2 Group By operators in the same branch
in the same reducer (Krisztian Kasa, reviewed by Stamatis Zampetakis)
---
.../apache/hadoop/hive/ql/exec/OperatorUtils.java | 41 +++++
.../hive/ql/optimizer/ConvertJoinMapJoin.java | 17 ++
.../hadoop/hive/ql/exec/TestOperatorUtils.java | 168 ++++++++++++++++++
.../clientpositive/auto_sortmerge_join_17.q | 20 +++
.../llap/auto_sortmerge_join_17.q.out | 195 +++++++++++++++++++++
.../clientpositive/llap/subquery_in_having.q.out | 182 ++++++++++---------
6 files changed, 541 insertions(+), 82 deletions(-)
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java
b/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java
index 3924164166b..eb10e9aa6cd 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java
@@ -176,6 +176,47 @@ public class OperatorUtils {
return found;
}
+ /**
+ * Check whether there are more operators in the specified operator tree
branch than the given limit
+ * until a ReduceSinkOperator is reached.
+ * The method traverses the parent operators of the specified root operator
in dept first manner.
+ * @param start root of the operator tree to check
+ * @param opClazz type of operator to track
+ * @param limit maximum allowed number of operator in a branch of the tree
+ * @return true if limit is exceeded false otherwise
+ */
+ public static boolean hasMoreOperatorsThan(
+ Operator<?> start, Class<?> opClazz, int limit) {
+ return hasMoreOperatorsThan(start, opClazz, limit, new HashSet<>());
+ }
+
+ private static boolean hasMoreOperatorsThan(
+ Operator<?> start, Class<?> opClazz, int limit, Set<Operator<?>>
visited) {
+ if (!visited.add(start)) {
+ return limit < 0;
+ }
+
+ if (limit < 0) {
+ return false;
+ }
+
+ if (start instanceof ReduceSinkOperator) {
+ return false;
+ }
+
+ if (opClazz.isInstance(start)) {
+ limit--;
+ }
+
+ if (start.getParentOperators() != null) {
+ for (Operator<?> parent : start.getParentOperators()) {
+ if (hasMoreOperatorsThan(parent, opClazz, limit, visited)) {
+ return true;
+ }
+ }
+ }
+ return limit < 0;
+ }
public static void setChildrenCollector(List<Operator<? extends
OperatorDesc>> childOperators, OutputCollector out) {
if (childOperators == null) {
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java
index 57299f95699..98a80ab603e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java
@@ -83,6 +83,8 @@ import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.math.DoubleMath;
+import static
org.apache.hadoop.hive.ql.exec.OperatorUtils.hasMoreOperatorsThan;
+
/**
* ConvertJoinMapJoin is an optimization that replaces a common join
* (aka shuffle join) with a map join (aka broadcast or fragment replicate
@@ -755,6 +757,21 @@ public class ConvertJoinMapJoin implements
SemanticNodeProcessor {
LOG.debug("External table {} found in join and also could not provide
statistics - disabling SMB join.", sb);
return false;
}
+ // GBY operators buffers one record. These are processed when
ReduceRecordSources flushes the operator tree
+ // when end of record stream reached. If the tree has more than two GBY
operators CommonMergeJoinOperator can
+ // not process all buffered records.
+ // HIVE-27788
+ if (parentOp.getParentOperators() != null) {
+ // Parent operator is RS and hasMoreOperatorsThan traverses until the
next RS, so we start from grandparent
+ for (Operator<?> grandParent : parentOp.getParentOperators()) {
+ if (hasMoreOperatorsThan(grandParent, GroupByOperator.class, 1)) {
+ LOG.info("We cannot convert to SMB join " +
+ "because one of the join branches has more than one Group by
operators in the same reducer.");
+ return false;
+ }
+ }
+ }
+
// each side better have 0 or more RS. if either side is unbalanced,
cannot convert.
// This is a workaround for now. Right fix would be to refactor code in
the
// MapRecordProcessor and ReduceRecordProcessor with respect to the
sources.
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperatorUtils.java
b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperatorUtils.java
new file mode 100644
index 00000000000..6eec95a23ad
--- /dev/null
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperatorUtils.java
@@ -0,0 +1,168 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec;
+
+import org.apache.hadoop.hive.ql.CompilationOpContext;
+import org.apache.hadoop.hive.ql.plan.CommonMergeJoinDesc;
+import org.apache.hadoop.hive.ql.plan.FilterDesc;
+import org.apache.hadoop.hive.ql.plan.GroupByDesc;
+import org.apache.hadoop.hive.ql.plan.LimitDesc;
+import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc;
+import org.apache.hadoop.hive.ql.plan.SelectDesc;
+import org.apache.hadoop.hive.ql.plan.TableScanDesc;
+import org.junit.jupiter.api.Test;
+
+import static java.util.Arrays.asList;
+import static java.util.Collections.singletonList;
+import static org.junit.jupiter.api.Assertions.*;
+
+class TestOperatorUtils {
+ @Test
+ void testHasMoreGBYsReturnsFalseWhenLimitIs0() {
+ // RS-SEL-LIM-FIL
+ CompilationOpContext context = new CompilationOpContext();
+ Operator<?> filter = OperatorFactory.get(context, FilterDesc.class);
+ Operator<?> limit = OperatorFactory.get(context, LimitDesc.class);
+ filter.setParentOperators(singletonList(limit));
+ Operator<?> select = OperatorFactory.get(context, SelectDesc.class);
+ limit.setParentOperators(singletonList(select));
+ Operator<?> rs = OperatorFactory.get(context, ReduceSinkDesc.class);
+ select.setParentOperators(singletonList(rs));
+
+ assertFalse(OperatorUtils.hasMoreOperatorsThan(filter,
GroupByOperator.class, 0));
+ }
+
+ @Test
+ void testHasMoreGBYsReturnsFalseWhenNoGBYInBranchAndLimitIsMoreThan0() {
+ // RS-SEL-LIM-FIL
+ CompilationOpContext context = new CompilationOpContext();
+ Operator<?> filter = OperatorFactory.get(context, FilterDesc.class);
+ Operator<?> limit = OperatorFactory.get(context, LimitDesc.class);
+ filter.setParentOperators(singletonList(limit));
+ Operator<?> select = OperatorFactory.get(context, SelectDesc.class);
+ limit.setParentOperators(singletonList(select));
+ Operator<?> rs = OperatorFactory.get(context, ReduceSinkDesc.class);
+ select.setParentOperators(singletonList(rs));
+
+ assertFalse(OperatorUtils.hasMoreOperatorsThan(filter,
GroupByOperator.class, 1));
+ }
+
+ @Test
+ void testHasMoreGBYsReturnsFalseWhenNumberOfGBYIsLessThanLimit() {
+ // RS-GBY-SEL-LIM-FIL
+ CompilationOpContext context = new CompilationOpContext();
+ Operator<?> filter = OperatorFactory.get(context, FilterDesc.class);
+ Operator<?> limit = OperatorFactory.get(context, LimitDesc.class);
+ filter.setParentOperators(singletonList(limit));
+ Operator<?> select = OperatorFactory.get(context, SelectDesc.class);
+ limit.setParentOperators(singletonList(select));
+ Operator<?> gby = OperatorFactory.get(context, GroupByDesc.class);
+ select.setParentOperators(singletonList(gby));
+ Operator<?> rs = OperatorFactory.get(context, ReduceSinkDesc.class);
+ gby.setParentOperators(singletonList(rs));
+
+ assertFalse(OperatorUtils.hasMoreOperatorsThan(filter,
GroupByOperator.class, 1));
+ }
+
+ @Test
+ void testHasMoreGBYsReturnsTrueWhenNumberOfGBYIsEqualsWithLimit() {
+ // RS-GBY-FIL-SEL-GBY
+ CompilationOpContext context = new CompilationOpContext();
+ Operator<?> gby1 = OperatorFactory.get(context, GroupByDesc.class);
+ Operator<?> select = OperatorFactory.get(context, SelectDesc.class);
+ gby1.setParentOperators(singletonList(select));
+ Operator<?> filter = OperatorFactory.get(context, FilterDesc.class);
+ select.setParentOperators(singletonList(filter));
+ Operator<?> gby2 = OperatorFactory.get(context, GroupByDesc.class);
+ filter.setParentOperators(singletonList(gby2));
+ Operator<?> rs = OperatorFactory.get(context, ReduceSinkDesc.class);
+ gby2.setParentOperators(singletonList(rs));
+
+ assertTrue(OperatorUtils.hasMoreOperatorsThan(gby1, GroupByOperator.class,
1));
+ }
+
+ @Test
+ void
testHasMoreGBYsReturnsFalseWhenNumberOfGBYIsEqualsWithLimitButHasAnRSInTheMiddle()
{
+ // TS-GBY-RS-SEL-GBY
+ CompilationOpContext context = new CompilationOpContext();
+ Operator<?> gby1 = OperatorFactory.get(context, GroupByDesc.class);
+ Operator<?> select = OperatorFactory.get(context, SelectDesc.class);
+ gby1.setParentOperators(singletonList(select));
+ Operator<?> rs = OperatorFactory.get(context, ReduceSinkDesc.class);
+ select.setParentOperators(singletonList(rs));
+ Operator<?> gby2 = OperatorFactory.get(context, GroupByDesc.class);
+ rs.setParentOperators(singletonList(gby2));
+ Operator<?> ts = OperatorFactory.get(context, TableScanDesc.class);
+ gby2.setParentOperators(singletonList(ts));
+
+ assertFalse(OperatorUtils.hasMoreOperatorsThan(gby1,
GroupByOperator.class, 1));
+ }
+
+ @Test
+ void
testHasMoreGBYsReturnsTrueWhenBranchHasJoinAndNumberOfGBYIsEqualsWithLimit() {
+ // RS-GBY-FIL--JOIN-GBY
+ // RS-SEL-/
+ CompilationOpContext context = new CompilationOpContext();
+ Operator<?> gby1 = OperatorFactory.get(context, GroupByDesc.class);
+ Operator<?> join = OperatorFactory.get(context, CommonMergeJoinDesc.class);
+ gby1.setParentOperators(singletonList(join));
+
+ // Join branch #1 has the second GBY
+ Operator<?> filter = OperatorFactory.get(context, FilterDesc.class);
+ Operator<?> gby2 = OperatorFactory.get(context, GroupByDesc.class);
+ filter.setParentOperators(singletonList(gby2));
+ Operator<?> rs = OperatorFactory.get(context, ReduceSinkDesc.class);
+ gby2.setParentOperators(singletonList(rs));
+
+ // Join branch #2
+ Operator<?> select = OperatorFactory.get(context, SelectDesc.class);
+ Operator<?> rs2 = OperatorFactory.get(context, ReduceSinkDesc.class);
+ select.setParentOperators(singletonList(rs2));
+
+ join.setParentOperators(asList(filter, select));
+
+ assertTrue(OperatorUtils.hasMoreOperatorsThan(gby1, GroupByOperator.class,
1));
+ }
+
+ @Test
+ void
testHasMoreGBYsReturnsFalseWhenBranchHasJoinAndBothJoinBranchesHasLessGBYThanLimit()
{
+ // RS-GBY-SEL--JOIN
+ // RS-GBY-FIL-/
+ CompilationOpContext context = new CompilationOpContext();
+ Operator<?> join = OperatorFactory.get(context, CommonMergeJoinDesc.class);
+
+ // Join branch #1
+ Operator<?> filter = OperatorFactory.get(context, FilterDesc.class);
+ Operator<?> gby1 = OperatorFactory.get(context, GroupByDesc.class);
+ filter.setParentOperators(singletonList(gby1));
+ Operator<?> rs = OperatorFactory.get(context, ReduceSinkDesc.class);
+ gby1.setParentOperators(singletonList(rs));
+
+ // Join branch #2
+ Operator<?> select = OperatorFactory.get(context, SelectDesc.class);
+ Operator<?> gby2 = OperatorFactory.get(context, GroupByDesc.class);
+ select.setParentOperators(singletonList(gby2));
+ Operator<?> rs2 = OperatorFactory.get(context, ReduceSinkDesc.class);
+ gby2.setParentOperators(singletonList(rs2));
+
+ join.setParentOperators(asList(filter, select));
+
+ assertFalse(OperatorUtils.hasMoreOperatorsThan(join,
GroupByOperator.class, 1));
+ }
+}
\ No newline at end of file
diff --git a/ql/src/test/queries/clientpositive/auto_sortmerge_join_17.q
b/ql/src/test/queries/clientpositive/auto_sortmerge_join_17.q
new file mode 100644
index 00000000000..0d6a5deadb0
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/auto_sortmerge_join_17.q
@@ -0,0 +1,20 @@
+CREATE TABLE tbl1_n5(key int, value string) CLUSTERED BY (key) SORTED BY (key)
INTO 2 BUCKETS;
+
+insert into tbl1_n5(key, value)
+values
+(0, 'val_0'),
+(2, 'val_2'),
+(9, 'val_9');
+
+explain
+SELECT t1.key from
+(SELECT key , row_number() over(partition by key order by value desc) as rk
from tbl1_n5) t1
+join
+( SELECT key,count(distinct value) as cp_count from tbl1_n5 group by key) t2
+on t1.key = t2.key where rk = 1;
+
+SELECT t1.key from
+(SELECT key , row_number() over(partition by key order by value desc) as rk
from tbl1_n5) t1
+join
+( SELECT key,count(distinct value) as cp_count from tbl1_n5 group by key) t2
+on t1.key = t2.key where rk = 1;
\ No newline at end of file
diff --git
a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_17.q.out
b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_17.q.out
new file mode 100644
index 00000000000..80527eb6240
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_17.q.out
@@ -0,0 +1,195 @@
+PREHOOK: query: CREATE TABLE tbl1_n5(key int, value string) CLUSTERED BY (key)
SORTED BY (key) INTO 2 BUCKETS
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tbl1_n5
+POSTHOOK: query: CREATE TABLE tbl1_n5(key int, value string) CLUSTERED BY
(key) SORTED BY (key) INTO 2 BUCKETS
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tbl1_n5
+PREHOOK: query: insert into tbl1_n5(key, value)
+values
+(0, 'val_0'),
+(2, 'val_2'),
+(9, 'val_9')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@tbl1_n5
+POSTHOOK: query: insert into tbl1_n5(key, value)
+values
+(0, 'val_0'),
+(2, 'val_2'),
+(9, 'val_9')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@tbl1_n5
+POSTHOOK: Lineage: tbl1_n5.key SCRIPT []
+POSTHOOK: Lineage: tbl1_n5.value SCRIPT []
+PREHOOK: query: explain
+SELECT t1.key from
+(SELECT key , row_number() over(partition by key order by value desc) as rk
from tbl1_n5) t1
+join
+( SELECT key,count(distinct value) as cp_count from tbl1_n5 group by key) t2
+on t1.key = t2.key where rk = 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl1_n5
+#### A masked pattern was here ####
+POSTHOOK: query: explain
+SELECT t1.key from
+(SELECT key , row_number() over(partition by key order by value desc) as rk
from tbl1_n5) t1
+join
+( SELECT key,count(distinct value) as cp_count from tbl1_n5 group by key) t2
+on t1.key = t2.key where rk = 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl1_n5
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE)
+ Reducer 4 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: tbl1_n5
+ filterExpr: key is not null (type: boolean)
+ Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE
Column stats: COMPLETE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 3 Data size: 279 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: key (type: int), value (type: string)
+ null sort order: aa
+ sort order: +-
+ Map-reduce partition columns: key (type: int)
+ Statistics: Num rows: 3 Data size: 279 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: key (type: int), value (type: string)
+ minReductionHashAggr: 0.4
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3 Data size: 279 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type:
string)
+ null sort order: zz
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1
(type: string)
+ Statistics: Num rows: 3 Data size: 279 Basic stats:
COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Reducer 2
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: int),
KEY.reducesinkkey1 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE
Column stats: COMPLETE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col0: int, _col1: string
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col1 DESC NULLS FIRST
+ partition by: _col0
+ raw input shape:
+ window functions:
+ window function definition
+ alias: row_number_window_0
+ name: row_number
+ window function: GenericUDAFRowNumberEvaluator
+ window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+ isPivotResult: true
+ Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE
Column stats: COMPLETE
+ Filter Operator
+ predicate: (row_number_window_0 = 1) (type: boolean)
+ Statistics: Num rows: 1 Data size: 93 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 4 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 1 Data size: 4 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reducer 3
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Left Semi Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE
Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE
Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 4
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: int), KEY._col1 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE
Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE
Column stats: COMPLETE
+ Group By Operator
+ keys: _col0 (type: int)
+ minReductionHashAggr: 0.4
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 3 Data size: 12 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 3 Data size: 12 Basic stats:
COMPLETE Column stats: COMPLETE
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT t1.key from
+(SELECT key , row_number() over(partition by key order by value desc) as rk
from tbl1_n5) t1
+join
+( SELECT key,count(distinct value) as cp_count from tbl1_n5 group by key) t2
+on t1.key = t2.key where rk = 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl1_n5
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT t1.key from
+(SELECT key , row_number() over(partition by key order by value desc) as rk
from tbl1_n5) t1
+join
+( SELECT key,count(distinct value) as cp_count from tbl1_n5 group by key) t2
+on t1.key = t2.key where rk = 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl1_n5
+#### A masked pattern was here ####
+0
+2
+9
diff --git a/ql/src/test/results/clientpositive/llap/subquery_in_having.q.out
b/ql/src/test/results/clientpositive/llap/subquery_in_having.q.out
index cd5989f5cd9..bf88614de22 100644
--- a/ql/src/test/results/clientpositive/llap/subquery_in_having.q.out
+++ b/ql/src/test/results/clientpositive/llap/subquery_in_having.q.out
@@ -282,7 +282,9 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE)
+ Reducer 4 <- Map 1 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -308,17 +310,6 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 1 Data size: 188 Basic stats:
COMPLETE Column stats: NONE
value expressions: _col1 (type: bigint), _col2 (type:
bigint)
- Execution mode: vectorized, llap
- LLAP IO: all inputs
- Map 3
- Map Operator Tree:
- TableScan
- alias: part_subq
- filterExpr: p_mfgr is not null (type: boolean)
- Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE
Column stats: NONE
- Filter Operator
- predicate: p_mfgr is not null (type: boolean)
- Statistics: Num rows: 1 Data size: 188 Basic stats:
COMPLETE Column stats: NONE
Group By Operator
aggregations: max(p_size), min(p_size)
keys: p_mfgr (type: string)
@@ -336,6 +327,45 @@ STAGE PLANS:
Execution mode: vectorized, llap
LLAP IO: all inputs
Reducer 2
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0), count(VALUE._col1)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE
Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), (UDFToDouble(_col1) /
_col2) (type: double)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE
Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 1 Data size: 188 Basic stats:
COMPLETE Column stats: NONE
+ value expressions: _col1 (type: double)
+ Reducer 3
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Left Semi Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE
Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE
Column stats: NONE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 4
+ Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
aggregations: max(VALUE._col0), min(VALUE._col1)
@@ -352,37 +382,16 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 188 Basic stats:
COMPLETE Column stats: NONE
Group By Operator
keys: _col0 (type: string)
- mode: final
+ minReductionHashAggr: 0.99
+ mode: hash
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 188 Basic stats:
COMPLETE Column stats: NONE
- Dummy Store
- Execution mode: llap
- Reduce Operator Tree:
- Group By Operator
- aggregations: sum(VALUE._col0), count(VALUE._col1)
- keys: KEY._col0 (type: string)
- mode: mergepartial
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE
Column stats: NONE
- Select Operator
- expressions: _col0 (type: string), (UDFToDouble(_col1) /
_col2) (type: double)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE
Column stats: NONE
- Merge Join Operator
- condition map:
- Left Semi Join 0 to 1
- keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 206 Basic stats:
COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 206 Basic stats:
COMPLETE Column stats: NONE
- table:
- input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
- output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 1 Data size: 188 Basic stats:
COMPLETE Column stats: NONE
Stage: Stage-0
Fetch Operator
@@ -425,7 +434,9 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE)
+ Reducer 4 <- Map 1 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -451,17 +462,6 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 1 Data size: 188 Basic stats:
COMPLETE Column stats: NONE
value expressions: _col1 (type: bigint), _col2 (type:
bigint)
- Execution mode: vectorized, llap
- LLAP IO: all inputs
- Map 3
- Map Operator Tree:
- TableScan
- alias: part_subq
- filterExpr: p_mfgr is not null (type: boolean)
- Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE
Column stats: NONE
- Filter Operator
- predicate: p_mfgr is not null (type: boolean)
- Statistics: Num rows: 1 Data size: 188 Basic stats:
COMPLETE Column stats: NONE
Group By Operator
aggregations: max(p_size), min(p_size)
keys: p_mfgr (type: string)
@@ -479,6 +479,45 @@ STAGE PLANS:
Execution mode: vectorized, llap
LLAP IO: all inputs
Reducer 2
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0), count(VALUE._col1)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE
Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), (UDFToDouble(_col1) /
_col2) (type: double)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE
Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 1 Data size: 188 Basic stats:
COMPLETE Column stats: NONE
+ value expressions: _col1 (type: double)
+ Reducer 3
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Left Semi Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE
Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE
Column stats: NONE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 4
+ Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
aggregations: max(VALUE._col0), min(VALUE._col1)
@@ -495,37 +534,16 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 188 Basic stats:
COMPLETE Column stats: NONE
Group By Operator
keys: _col0 (type: string)
- mode: final
+ minReductionHashAggr: 0.99
+ mode: hash
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 188 Basic stats:
COMPLETE Column stats: NONE
- Dummy Store
- Execution mode: llap
- Reduce Operator Tree:
- Group By Operator
- aggregations: sum(VALUE._col0), count(VALUE._col1)
- keys: KEY._col0 (type: string)
- mode: mergepartial
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE
Column stats: NONE
- Select Operator
- expressions: _col0 (type: string), (UDFToDouble(_col1) /
_col2) (type: double)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE
Column stats: NONE
- Merge Join Operator
- condition map:
- Left Semi Join 0 to 1
- keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 206 Basic stats:
COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 206 Basic stats:
COMPLETE Column stats: NONE
- table:
- input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
- output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 1 Data size: 188 Basic stats:
COMPLETE Column stats: NONE
Stage: Stage-0
Fetch Operator