This is an automated email from the ASF dual-hosted git repository.
krisztiankasa pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new f265cc25905 HIVE-27876 Incorrect query results on tables with
ClusterBy & SortBy (Ramesh Kumar Thangarajan, reviewed by Krisztian Kasa,
Attila Turoczy)
f265cc25905 is described below
commit f265cc25905d0bdbdc65a16720e33fb21ee79da9
Author: Ramesh Kumar <[email protected]>
AuthorDate: Wed Dec 20 01:02:36 2023 -0800
HIVE-27876 Incorrect query results on tables with ClusterBy & SortBy
(Ramesh Kumar Thangarajan, reviewed by Krisztian Kasa, Attila Turoczy)
---
.../java/org/apache/hadoop/hive/conf/HiveConf.java | 6 +-
.../queries/clientpositive/groupby_sort_2_23.q | 10 +
.../results/clientpositive/cbo_rp_auto_join1.q.out | 457 ++++++++++++++-------
.../llap/auto_sortmerge_join_10.q.out | 295 ++++++++-----
.../clientpositive/llap/bucket_groupby.q.out | 89 +++-
.../clientpositive/llap/groupby_sort_2_23.q.out | 180 ++++++++
6 files changed, 779 insertions(+), 258 deletions(-)
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 714df4c22a9..1fa63ae3821 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -2019,10 +2019,10 @@ public class HiveConf extends Configuration {
HIVEMULTIGROUPBYSINGLEREDUCER("hive.multigroupby.singlereducer", true,
"Whether to optimize multi group by query to generate single M/R job
plan. If the multi group by query has \n" +
"common group by keys, it will be optimized to generate single M/R
job."),
- HIVE_MAP_GROUPBY_SORT("hive.map.groupby.sorted", true,
+ HIVE_MAP_GROUPBY_SORT("hive.map.groupby.sorted", false,
"If the bucketing/sorting properties of the table exactly match the
grouping key, whether to perform \n" +
- "the group by in the mapper by using BucketizedHiveInputFormat. The
only downside to this\n" +
- "is that it limits the number of mappers to the number of files."),
+ "the group by in the mapper by using BucketizedHiveInputFormat. This
can only work if the number of files to be\n" +
+ "processed is exactly 1. The downside to this is that it limits the
number of mappers to the number of files."),
HIVE_DEFAULT_NULLS_LAST("hive.default.nulls.last", true,
"Whether to set NULLS LAST as the default null ordering for ASC order
and " +
"NULLS FIRST for DESC order."),
diff --git a/ql/src/test/queries/clientpositive/groupby_sort_2_23.q
b/ql/src/test/queries/clientpositive/groupby_sort_2_23.q
new file mode 100644
index 00000000000..b241bee6855
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/groupby_sort_2_23.q
@@ -0,0 +1,10 @@
+set hive.mapred.mode=nonstrict;
+set hive.map.aggr=true;
+set hive.explain.user=false;
+
+create table test_bucket(age int, name string, dept string) clustered by (age,
name) sorted by (age asc, name asc) into 2 buckets stored as ORC;
+insert into test_bucket values (1, 'user1', 'dept1'), ( 2, 'user2' , 'dept2');
+insert into test_bucket values (1, 'user1', 'dept1'), ( 2, 'user2' , 'dept2');
+
+explain vectorization detail select age, name, count(*) from test_bucket group
by age, name having count(*) > 1;
+select age, name, count(*) from test_bucket group by age, name having
count(*) > 1;
diff --git a/ql/src/test/results/clientpositive/cbo_rp_auto_join1.q.out
b/ql/src/test/results/clientpositive/cbo_rp_auto_join1.q.out
index 8f3788d40fa..5bdf0edc2b4 100644
--- a/ql/src/test/results/clientpositive/cbo_rp_auto_join1.q.out
+++ b/ql/src/test/results/clientpositive/cbo_rp_auto_join1.q.out
@@ -92,8 +92,10 @@ POSTHOOK: Input: default@tbl2_n12
#### A masked pattern was here ####
STAGE DEPENDENCIES:
Stage-1 is a root stage
- Stage-2 depends on stages: Stage-1
- Stage-0 depends on stages: Stage-2
+ Stage-2 depends on stages: Stage-1, Stage-4
+ Stage-3 depends on stages: Stage-2
+ Stage-4 is a root stage
+ Stage-0 depends on stages: Stage-3
STAGE PLANS:
Stage: Stage-1
@@ -112,49 +114,53 @@ STAGE PLANS:
Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE
Column stats: COMPLETE
Group By Operator
aggregations: count()
+ bucketGroup: true
keys: key (type: int)
- mode: final
+ minReductionHashAggr: 0.99
+ mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE
Column stats: COMPLETE
- Select Operator
- expressions: _col0 (type: int), _col1 (type: bigint)
- outputColumnNames: key, $f1
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
Statistics: Num rows: 6 Data size: 72 Basic stats:
COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: key (type: int)
- null sort order: z
- sort order: +
- Map-reduce partition columns: key (type: int)
- Statistics: Num rows: 6 Data size: 72 Basic stats:
COMPLETE Column stats: COMPLETE
- value expressions: $f1 (type: bigint)
+ value expressions: _col1 (type: bigint)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: key, $f1
+ Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column
stats: COMPLETE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
TableScan
- alias: subq1:b
- filterExpr: key is not null (type: boolean)
- Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE
Column stats: COMPLETE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE
Column stats: COMPLETE
- Select Operator
- expressions: key (type: int)
- outputColumnNames: key
- Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE
Column stats: COMPLETE
- Group By Operator
- aggregations: count()
- keys: key (type: int)
- mode: final
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE
Column stats: COMPLETE
- Select Operator
- expressions: _col0 (type: int), _col1 (type: bigint)
- outputColumnNames: key, $f1
- Statistics: Num rows: 6 Data size: 72 Basic stats:
COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: key (type: int)
- null sort order: z
- sort order: +
- Map-reduce partition columns: key (type: int)
- Statistics: Num rows: 6 Data size: 72 Basic stats:
COMPLETE Column stats: COMPLETE
- value expressions: $f1 (type: bigint)
+ Reduce Output Operator
+ key expressions: key (type: int)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: key (type: int)
+ Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE
Column stats: COMPLETE
+ value expressions: $f1 (type: bigint)
+ TableScan
+ Reduce Output Operator
+ key expressions: key (type: int)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: key (type: int)
+ Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE
Column stats: COMPLETE
+ value expressions: $f1 (type: bigint)
Reduce Operator Tree:
Join Operator
condition map:
@@ -181,7 +187,7 @@ STAGE PLANS:
output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde:
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
- Stage: Stage-2
+ Stage: Stage-3
Map Reduce
Map Operator Tree:
TableScan
@@ -205,6 +211,50 @@ STAGE PLANS:
output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Stage: Stage-4
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: subq1:b
+ filterExpr: key is not null (type: boolean)
+ Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE
Column stats: COMPLETE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE
Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: key
+ Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE
Column stats: COMPLETE
+ Group By Operator
+ aggregations: count()
+ bucketGroup: true
+ keys: key (type: int)
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE
Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 6 Data size: 72 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: bigint)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: key, $f1
+ Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column
stats: COMPLETE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
Stage: Stage-0
Fetch Operator
limit: -1
@@ -255,7 +305,8 @@ POSTHOOK: Input: default@tbl2_n12
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-2 depends on stages: Stage-1
- Stage-0 depends on stages: Stage-2
+ Stage-3 depends on stages: Stage-2
+ Stage-0 depends on stages: Stage-3
STAGE PLANS:
Stage: Stage-1
@@ -273,20 +324,42 @@ STAGE PLANS:
outputColumnNames: key
Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE
Column stats: COMPLETE
Group By Operator
+ bucketGroup: true
keys: key (type: int)
- mode: final
+ minReductionHashAggr: 0.99
+ mode: hash
outputColumnNames: _col0
Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE
Column stats: COMPLETE
- Select Operator
- expressions: _col0 (type: int)
- outputColumnNames: key
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
Statistics: Num rows: 6 Data size: 24 Basic stats:
COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: key (type: int)
- null sort order: z
- sort order: +
- Map-reduce partition columns: key (type: int)
- Statistics: Num rows: 6 Data size: 24 Basic stats:
COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: key
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column
stats: COMPLETE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: key (type: int)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: key (type: int)
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE
Column stats: COMPLETE
TableScan
alias: subq2:subq1:b
filterExpr: key is not null (type: boolean)
@@ -325,7 +398,7 @@ STAGE PLANS:
output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde:
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
- Stage: Stage-2
+ Stage: Stage-3
Map Reduce
Map Operator Tree:
TableScan
@@ -421,16 +494,20 @@ POSTHOOK: Input: default@tbl1_n13
POSTHOOK: Input: default@tbl2_n12
#### A masked pattern was here ####
STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1, Stage-3, Stage-5
Stage-3 is a root stage
- Stage-1 depends on stages: Stage-3
- Stage-0 depends on stages: Stage-1
+ Stage-4 is a root stage
+ Stage-5 depends on stages: Stage-4, Stage-6
+ Stage-6 is a root stage
+ Stage-0 depends on stages: Stage-2
STAGE PLANS:
- Stage: Stage-3
+ Stage: Stage-1
Map Reduce
Map Operator Tree:
TableScan
- alias: src1:subq1:a
+ alias: src2:subq2:a
filterExpr: key is not null (type: boolean)
Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE
Column stats: COMPLETE
Filter Operator
@@ -442,23 +519,89 @@ STAGE PLANS:
Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE
Column stats: COMPLETE
Group By Operator
aggregations: count()
+ bucketGroup: true
keys: key (type: int)
- mode: final
+ minReductionHashAggr: 0.99
+ mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE
Column stats: COMPLETE
- Select Operator
- expressions: _col0 (type: int), _col1 (type: bigint)
- outputColumnNames: key, $f1
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
Statistics: Num rows: 6 Data size: 72 Basic stats:
COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: key (type: int)
- null sort order: z
- sort order: +
- Map-reduce partition columns: key (type: int)
- Statistics: Num rows: 6 Data size: 72 Basic stats:
COMPLETE Column stats: COMPLETE
- value expressions: $f1 (type: bigint)
+ value expressions: _col1 (type: bigint)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: key, $f1
+ Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column
stats: COMPLETE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
TableScan
- alias: src1:subq1:b
+ Reduce Output Operator
+ key expressions: key (type: int)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: key (type: int)
+ Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE
Column stats: COMPLETE
+ value expressions: $f1 (type: bigint)
+ TableScan
+ Reduce Output Operator
+ key expressions: key (type: int)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: key (type: int)
+ Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE
Column stats: COMPLETE
+ value expressions: $f1 (type: bigint)
+ TableScan
+ Reduce Output Operator
+ key expressions: key (type: int)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: key (type: int)
+ Statistics: Num rows: 6 Data size: 120 Basic stats: COMPLETE
Column stats: COMPLETE
+ value expressions: $f1 (type: bigint), $f10 (type: bigint)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ Inner Join 0 to 2
+ keys:
+ 0 key (type: int)
+ 1 key (type: int)
+ 2 key (type: int)
+ outputColumnNames: key, $f1, $f10, $f11, $f100
+ Statistics: Num rows: 6 Data size: 216 Basic stats: COMPLETE Column
stats: COMPLETE
+ Select Operator
+ expressions: key (type: int), ($f11 * $f100) (type: bigint), ($f1
* $f10) (type: bigint)
+ outputColumnNames: key, cnt1, cnt11
+ Statistics: Num rows: 6 Data size: 120 Basic stats: COMPLETE
Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 6 Data size: 120 Basic stats: COMPLETE
Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src2:subq2:b
filterExpr: key is not null (type: boolean)
Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE
Column stats: COMPLETE
Filter Operator
@@ -470,30 +613,27 @@ STAGE PLANS:
Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE
Column stats: COMPLETE
Group By Operator
aggregations: count()
+ bucketGroup: true
keys: key (type: int)
- mode: final
+ minReductionHashAggr: 0.99
+ mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE
Column stats: COMPLETE
- Select Operator
- expressions: _col0 (type: int), _col1 (type: bigint)
- outputColumnNames: key, $f1
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
Statistics: Num rows: 6 Data size: 72 Basic stats:
COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: key (type: int)
- null sort order: z
- sort order: +
- Map-reduce partition columns: key (type: int)
- Statistics: Num rows: 6 Data size: 72 Basic stats:
COMPLETE Column stats: COMPLETE
- value expressions: $f1 (type: bigint)
+ value expressions: _col1 (type: bigint)
+ Execution mode: vectorized
Reduce Operator Tree:
- Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 key (type: int)
- 1 key (type: int)
- outputColumnNames: key, $f1, $f10
- Statistics: Num rows: 6 Data size: 120 Basic stats: COMPLETE Column
stats: COMPLETE
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: key, $f1
+ Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column
stats: COMPLETE
File Output Operator
compressed: false
table:
@@ -501,11 +641,11 @@ STAGE PLANS:
output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
- Stage: Stage-1
+ Stage: Stage-4
Map Reduce
Map Operator Tree:
TableScan
- alias: src2:subq2:a
+ alias: src1:subq1:a
filterExpr: key is not null (type: boolean)
Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE
Column stats: COMPLETE
Filter Operator
@@ -517,23 +657,74 @@ STAGE PLANS:
Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE
Column stats: COMPLETE
Group By Operator
aggregations: count()
+ bucketGroup: true
keys: key (type: int)
- mode: final
+ minReductionHashAggr: 0.99
+ mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE
Column stats: COMPLETE
- Select Operator
- expressions: _col0 (type: int), _col1 (type: bigint)
- outputColumnNames: key, $f1
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
Statistics: Num rows: 6 Data size: 72 Basic stats:
COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: key (type: int)
- null sort order: z
- sort order: +
- Map-reduce partition columns: key (type: int)
- Statistics: Num rows: 6 Data size: 72 Basic stats:
COMPLETE Column stats: COMPLETE
- value expressions: $f1 (type: bigint)
+ value expressions: _col1 (type: bigint)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: key, $f1
+ Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column
stats: COMPLETE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-5
+ Map Reduce
+ Map Operator Tree:
TableScan
- alias: src2:subq2:b
+ Reduce Output Operator
+ key expressions: key (type: int)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: key (type: int)
+ Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE
Column stats: COMPLETE
+ value expressions: $f1 (type: bigint)
+ TableScan
+ Reduce Output Operator
+ key expressions: key (type: int)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: key (type: int)
+ Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE
Column stats: COMPLETE
+ value expressions: $f1 (type: bigint)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 key (type: int)
+ 1 key (type: int)
+ outputColumnNames: key, $f1, $f10
+ Statistics: Num rows: 6 Data size: 120 Basic stats: COMPLETE Column
stats: COMPLETE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-6
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src1:subq1:b
filterExpr: key is not null (type: boolean)
Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE
Column stats: COMPLETE
Filter Operator
@@ -545,51 +736,33 @@ STAGE PLANS:
Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE
Column stats: COMPLETE
Group By Operator
aggregations: count()
+ bucketGroup: true
keys: key (type: int)
- mode: final
+ minReductionHashAggr: 0.99
+ mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE
Column stats: COMPLETE
- Select Operator
- expressions: _col0 (type: int), _col1 (type: bigint)
- outputColumnNames: key, $f1
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
Statistics: Num rows: 6 Data size: 72 Basic stats:
COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: key (type: int)
- null sort order: z
- sort order: +
- Map-reduce partition columns: key (type: int)
- Statistics: Num rows: 6 Data size: 72 Basic stats:
COMPLETE Column stats: COMPLETE
- value expressions: $f1 (type: bigint)
- TableScan
- Reduce Output Operator
- key expressions: key (type: int)
- null sort order: z
- sort order: +
- Map-reduce partition columns: key (type: int)
- Statistics: Num rows: 6 Data size: 120 Basic stats: COMPLETE
Column stats: COMPLETE
- value expressions: $f1 (type: bigint), $f10 (type: bigint)
+ value expressions: _col1 (type: bigint)
+ Execution mode: vectorized
Reduce Operator Tree:
- Join Operator
- condition map:
- Inner Join 0 to 1
- Inner Join 0 to 2
- keys:
- 0 key (type: int)
- 1 key (type: int)
- 2 key (type: int)
- outputColumnNames: key, $f1, $f10, $f11, $f100
- Statistics: Num rows: 6 Data size: 216 Basic stats: COMPLETE Column
stats: COMPLETE
- Select Operator
- expressions: key (type: int), ($f11 * $f100) (type: bigint), ($f1
* $f10) (type: bigint)
- outputColumnNames: key, cnt1, cnt11
- Statistics: Num rows: 6 Data size: 120 Basic stats: COMPLETE
Column stats: COMPLETE
- File Output Operator
- compressed: false
- Statistics: Num rows: 6 Data size: 120 Basic stats: COMPLETE
Column stats: COMPLETE
- table:
- input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
- output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: key, $f1
+ Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column
stats: COMPLETE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Stage: Stage-0
Fetch Operator
diff --git
a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_10.q.out
b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_10.q.out
index 7b93c1d6a54..35538b91174 100644
--- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_10.q.out
+++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_10.q.out
@@ -372,7 +372,9 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
+ Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -384,11 +386,22 @@ STAGE PLANS:
Filter Operator
predicate: (key < 6) (type: boolean)
Statistics: Num rows: 7 Data size: 28 Basic stats:
COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: key (type: int)
+ Group By Operator
+ bucketGroup: true
+ keys: key (type: int)
+ minReductionHashAggr: 0.4
+ mode: hash
outputColumnNames: _col0
- Statistics: Num rows: 7 Data size: 28 Basic stats:
COMPLETE Column stats: COMPLETE
- Dummy Store
+ Statistics: Num rows: 5 Data size: 20 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 5 Data size: 20 Basic stats:
COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map 5
Map Operator Tree:
TableScan
alias: a
@@ -397,31 +410,54 @@ STAGE PLANS:
Filter Operator
predicate: (key < 6) (type: boolean)
Statistics: Num rows: 7 Data size: 28 Basic stats:
COMPLETE Column stats: COMPLETE
- Group By Operator
- keys: key (type: int)
- mode: final
+ Select Operator
+ expressions: key (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 5 Data size: 20 Basic stats:
COMPLETE Column stats: COMPLETE
- Merge Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
- Statistics: Num rows: 7 Data size: 56 Basic stats:
COMPLETE Column stats: COMPLETE
- Group By Operator
- aggregations: count()
- minReductionHashAggr: 0.85714287
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats:
COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- null sort order:
- sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats:
COMPLETE Column stats: COMPLETE
- value expressions: _col0 (type: bigint)
- Execution mode: llap
+ Statistics: Num rows: 7 Data size: 28 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 7 Data size: 28 Basic stats:
COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
Reducer 2
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE
Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE
Column stats: COMPLETE
+ Reducer 3
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE
Column stats: COMPLETE
+ Group By Operator
+ aggregations: count()
+ minReductionHashAggr: 0.85714287
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: COMPLETE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: COMPLETE
+ value expressions: _col0 (type: bigint)
+ Reducer 4
Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
@@ -492,6 +528,8 @@ STAGE PLANS:
#### A masked pattern was here ####
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
+ Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -503,11 +541,22 @@ STAGE PLANS:
Filter Operator
predicate: (key < 6) (type: boolean)
Statistics: Num rows: 7 Data size: 28 Basic stats:
COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: key (type: int)
+ Group By Operator
+ bucketGroup: true
+ keys: key (type: int)
+ minReductionHashAggr: 0.4
+ mode: hash
outputColumnNames: _col0
- Statistics: Num rows: 7 Data size: 28 Basic stats:
COMPLETE Column stats: COMPLETE
- Dummy Store
+ Statistics: Num rows: 5 Data size: 20 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 5 Data size: 20 Basic stats:
COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map 5
Map Operator Tree:
TableScan
alias: a
@@ -516,30 +565,53 @@ STAGE PLANS:
Filter Operator
predicate: (key < 6) (type: boolean)
Statistics: Num rows: 7 Data size: 28 Basic stats:
COMPLETE Column stats: COMPLETE
- Group By Operator
- keys: key (type: int)
- mode: final
+ Select Operator
+ expressions: key (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 5 Data size: 20 Basic stats:
COMPLETE Column stats: COMPLETE
- Merge Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
- outputColumnNames: _col1
+ Statistics: Num rows: 7 Data size: 28 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
Statistics: Num rows: 7 Data size: 28 Basic stats:
COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: _col1 (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 7 Data size: 28 Basic stats:
COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- null sort order: z
- sort order: +
- Statistics: Num rows: 7 Data size: 28 Basic stats:
COMPLETE Column stats: COMPLETE
- Execution mode: llap
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
Reducer 2
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE
Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE
Column stats: COMPLETE
+ Reducer 3
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1
+ Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE
Column stats: COMPLETE
+ Select Operator
+ expressions: _col1 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE
Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ null sort order: z
+ sort order: +
+ Statistics: Num rows: 7 Data size: 28 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reducer 4
Execution mode: vectorized, llap
Reduce Operator Tree:
Select Operator
@@ -717,10 +789,29 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+ Reducer 5 <- Map 4 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
+ Map Operator Tree:
+ TableScan
+ alias: t1
+ Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE
Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 10 Data size: 40 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 10 Data size: 40 Basic stats:
COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map 4
Map Operator Tree:
TableScan
alias: t2
@@ -730,51 +821,50 @@ STAGE PLANS:
predicate: key is not null (type: boolean)
Statistics: Num rows: 10 Data size: 40 Basic stats:
COMPLETE Column stats: COMPLETE
Group By Operator
+ bucketGroup: true
keys: key (type: int)
- mode: final
+ minReductionHashAggr: 0.4
+ mode: hash
outputColumnNames: _col0
Statistics: Num rows: 6 Data size: 24 Basic stats:
COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: true (type: boolean), _col0 (type: int)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 6 Data size: 48 Basic stats:
COMPLETE Column stats: COMPLETE
- Dummy Store
- Map Operator Tree:
- TableScan
- alias: t1
- Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE
Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 6 Data size: 24 Basic stats:
COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Left Outer Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col1 (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE
Column stats: COMPLETE
+ Filter Operator
+ predicate: _col1 is null (type: boolean)
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: COMPLETE
Select Operator
- expressions: key (type: int)
+ expressions: _col0 (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 10 Data size: 40 Basic stats:
COMPLETE Column stats: COMPLETE
- Merge Join Operator
- condition map:
- Left Outer Join 0 to 1
- keys:
- 0 _col0 (type: int)
- 1 _col1 (type: int)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 10 Data size: 80 Basic stats:
COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: _col1 is null (type: boolean)
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: COMPLETE
+ Group By Operator
+ aggregations: count(_col0)
+ minReductionHashAggr: 0.4
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ null sort order:
+ sort order:
Statistics: Num rows: 1 Data size: 8 Basic stats:
COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: _col0 (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats:
COMPLETE Column stats: COMPLETE
- Group By Operator
- aggregations: count(_col0)
- minReductionHashAggr: 0.4
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats:
COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- null sort order:
- sort order:
- Statistics: Num rows: 1 Data size: 8 Basic
stats: COMPLETE Column stats: COMPLETE
- value expressions: _col0 (type: bigint)
- Execution mode: llap
- Reducer 2
+ value expressions: _col0 (type: bigint)
+ Reducer 3
Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
@@ -789,6 +879,25 @@ STAGE PLANS:
input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 5
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE
Column stats: COMPLETE
+ Select Operator
+ expressions: true (type: boolean), _col0 (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE
Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Statistics: Num rows: 6 Data size: 48 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: boolean)
Stage: Stage-0
Fetch Operator
diff --git a/ql/src/test/results/clientpositive/llap/bucket_groupby.q.out
b/ql/src/test/results/clientpositive/llap/bucket_groupby.q.out
index e0a6a183faa..f85c3bbf22c 100644
--- a/ql/src/test/results/clientpositive/llap/bucket_groupby.q.out
+++ b/ql/src/test/results/clientpositive/llap/bucket_groupby.q.out
@@ -1512,30 +1512,79 @@ POSTHOOK: Input: default@clustergroupby
POSTHOOK: Input: default@clustergroupby@ds=102
#### A masked pattern was here ####
STAGE DEPENDENCIES:
- Stage-0 is a root stage
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: clustergroupby
+ filterExpr: (ds = '102') (type: boolean)
+ Statistics: Num rows: 500 Data size: 89000 Basic stats:
COMPLETE Column stats: COMPLETE
+ Top N Key Operator
+ sort order: ++
+ keys: key (type: string), value (type: string)
+ null sort order: zz
+ Statistics: Num rows: 500 Data size: 89000 Basic stats:
COMPLETE Column stats: COMPLETE
+ top n: 10
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: key, value
+ Statistics: Num rows: 500 Data size: 89000 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count()
+ bucketGroup: true
+ keys: key (type: string), value (type: string)
+ minReductionHashAggr: 0.4
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 316 Data size: 58776 Basic
stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type:
string)
+ null sort order: zz
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string),
_col1 (type: string)
+ Statistics: Num rows: 316 Data size: 58776 Basic
stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col2 (type: bigint)
+ Execution mode: llap
+ LLAP IO: all inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 316 Data size: 58776 Basic stats:
COMPLETE Column stats: COMPLETE
+ Limit
+ Number of rows: 10
+ Statistics: Num rows: 10 Data size: 1860 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: string), _col2 (type: bigint)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 10 Data size: 950 Basic stats:
COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 10 Data size: 950 Basic stats:
COMPLETE Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
Stage: Stage-0
Fetch Operator
limit: 10
Processor Tree:
- TableScan
- alias: clustergroupby
- filterExpr: (ds = '102') (type: boolean)
- Select Operator
- expressions: key (type: string), value (type: string)
- outputColumnNames: key, value
- Group By Operator
- aggregations: count()
- keys: key (type: string), value (type: string)
- mode: final
- outputColumnNames: _col0, _col1, _col2
- Limit
- Number of rows: 10
- Select Operator
- expressions: _col0 (type: string), _col2 (type: bigint)
- outputColumnNames: _col0, _col1
- ListSink
+ ListSink
PREHOOK: query: select key, count(1) from clustergroupby where ds='102'
group by key, value limit 10
PREHOOK: type: QUERY
@@ -1547,7 +1596,6 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@clustergroupby
POSTHOOK: Input: default@clustergroupby@ds=102
#### A masked pattern was here ####
-0 3
10 1
100 2
103 2
@@ -1555,8 +1603,9 @@ POSTHOOK: Input: default@clustergroupby@ds=102
105 1
11 1
111 1
-113 2
114 1
+0 3
+113 2
PREHOOK: query: drop table clustergroupby
PREHOOK: type: DROPTABLE
PREHOOK: Input: default@clustergroupby
diff --git a/ql/src/test/results/clientpositive/llap/groupby_sort_2_23.q.out
b/ql/src/test/results/clientpositive/llap/groupby_sort_2_23.q.out
new file mode 100644
index 00000000000..780cb6ccd27
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/groupby_sort_2_23.q.out
@@ -0,0 +1,180 @@
+PREHOOK: query: create table test_bucket(age int, name string, dept string)
clustered by (age, name) sorted by (age asc, name asc) into 2 buckets stored as
ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test_bucket
+POSTHOOK: query: create table test_bucket(age int, name string, dept string)
clustered by (age, name) sorted by (age asc, name asc) into 2 buckets stored as
ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@test_bucket
+PREHOOK: query: insert into test_bucket values (1, 'user1', 'dept1'), ( 2,
'user2' , 'dept2')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@test_bucket
+POSTHOOK: query: insert into test_bucket values (1, 'user1', 'dept1'), ( 2,
'user2' , 'dept2')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@test_bucket
+POSTHOOK: Lineage: test_bucket.age SCRIPT []
+POSTHOOK: Lineage: test_bucket.dept SCRIPT []
+POSTHOOK: Lineage: test_bucket.name SCRIPT []
+PREHOOK: query: insert into test_bucket values (1, 'user1', 'dept1'), ( 2,
'user2' , 'dept2')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@test_bucket
+POSTHOOK: query: insert into test_bucket values (1, 'user1', 'dept1'), ( 2,
'user2' , 'dept2')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@test_bucket
+POSTHOOK: Lineage: test_bucket.age SCRIPT []
+POSTHOOK: Lineage: test_bucket.dept SCRIPT []
+POSTHOOK: Lineage: test_bucket.name SCRIPT []
+PREHOOK: query: explain vectorization detail select age, name, count(*) from
test_bucket group by age, name having count(*) > 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_bucket
+#### A masked pattern was here ####
+POSTHOOK: query: explain vectorization detail select age, name, count(*) from
test_bucket group by age, name having count(*) > 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_bucket
+#### A masked pattern was here ####
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: test_bucket
+ Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE
Column stats: COMPLETE
+ TableScan Vectorization:
+ native: true
+ vectorizationSchemaColumns: [0:age:int, 1:name:string,
2:dept:string, 3:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>,
4:ROW__IS__DELETED:boolean]
+ Select Operator
+ expressions: age (type: int), name (type: string)
+ outputColumnNames: age, name
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 1]
+ Statistics: Num rows: 4 Data size: 372 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count()
+ bucketGroup: true
+ Group By Vectorization:
+ aggregators: VectorUDAFCountStar(*) -> bigint
+ className: VectorGroupByOperator
+ groupByMode: HASH
+ keyExpressions: col 0:int, col 1:string
+ native: false
+ vectorProcessingMode: HASH
+ projectedOutputColumnNums: [0]
+ keys: age (type: int), name (type: string)
+ minReductionHashAggr: 0.5
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 2 Data size: 202 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type:
string)
+ null sort order: zz
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1
(type: string)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkMultiKeyOperator
+ keyColumns: 0:int, 1:string
+ native: true
+ nativeConditionsMet:
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine
tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true,
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ valueColumns: 2:bigint
+ Statistics: Num rows: 2 Data size: 202 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col2 (type: bigint)
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet:
hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: [DECIMAL_64]
+ featureSupportInUse: [DECIMAL_64]
+ inputFileFormats:
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 3
+ includeColumns: [0, 1]
+ dataColumns: age:int, name:string, dept:string
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
+ Reducer 2
+ Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled
IS true, hive.execution.engine tez IN [tez] IS true
+ reduceColumnNullOrder: zz
+ reduceColumnSortOrder: ++
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 3
+ dataColumns: KEY._col0:int, KEY._col1:string,
VALUE._col0:bigint
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ Group By Vectorization:
+ aggregators: VectorUDAFCountMerge(col 2:bigint) -> bigint
+ className: VectorGroupByOperator
+ groupByMode: MERGEPARTIAL
+ keyExpressions: col 0:int, col 1:string
+ native: false
+ vectorProcessingMode: MERGE_PARTIAL
+ projectedOutputColumnNums: [0]
+ keys: KEY._col0 (type: int), KEY._col1 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 2 Data size: 202 Basic stats: COMPLETE
Column stats: COMPLETE
+ Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: FilterLongColGreaterLongScalar(col
2:bigint, val 1)
+ predicate: (_col2 > 1L) (type: boolean)
+ Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE
Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 1 Data size: 101 Basic stats:
COMPLETE Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select age, name, count(*) from test_bucket group by age,
name having count(*) > 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_bucket
+#### A masked pattern was here ####
+POSTHOOK: query: select age, name, count(*) from test_bucket group by age,
name having count(*) > 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_bucket
+#### A masked pattern was here ####
+1 user1 2
+2 user2 2