[hive] branch master updated: HIVE-24579: Incorrect Result For Groupby With Limit (Krisztian Kasa, reviewed by Zoltan Haindrich)

krisztiankasa Tue, 28 Sep 2021 03:40:21 -0700

This is an automated email from the ASF dual-hosted git repository.

krisztiankasa pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git



The following commit(s) were added to refs/heads/master by this push:
     new 9cba262  HIVE-24579: Incorrect Result For Groupby With Limit 
(Krisztian Kasa, reviewed by Zoltan Haindrich)
9cba262 is described below

commit 9cba2626e1b0376dc272ab4ed36a4c72eb6d8f57
Author: Krisztian Kasa <kasakri...@gmail.com>
AuthorDate: Tue Sep 28 12:39:59 2021 +0200

    HIVE-24579: Incorrect Result For Groupby With Limit (Krisztian Kasa, 
reviewed by Zoltan Haindrich)
---
 .../calcite/rules/HiveAggregateSortLimitRule.java  | 117 +++++++
 .../hadoop/hive/ql/parse/CalcitePlanner.java       |   3 +-
 .../queries/clientpositive/topnkey_gby_limit.q     |  21 ++
 .../clientpositive/llap/bucket_groupby.q.out       |  32 +-
 .../results/clientpositive/llap/filter_union.q.out |  14 +
 .../clientpositive/llap/groupby1_limit.q.out       |  47 +--
 .../clientpositive/llap/limit_pushdown.q.out       | 101 ++----
 .../clientpositive/llap/limit_pushdown4.q.out      |  21 +-
 .../llap/offset_limit_ppd_optimizer.q.out          |  98 ++----
 .../clientpositive/llap/topnkey_gby_limit.q.out    | 379 +++++++++++++++++++++
 10 files changed, 671 insertions(+), 162 deletions(-)

diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateSortLimitRule.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateSortLimitRule.java
new file mode 100644
index 0000000..01c6c8d
--- /dev/null
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateSortLimitRule.java
@@ -0,0 +1,117 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.rules;
+
+import com.google.common.collect.ImmutableList;
+import org.apache.calcite.plan.RelOptRule;
+import org.apache.calcite.plan.RelOptRuleCall;
+import org.apache.calcite.rel.RelFieldCollation;
+import org.apache.calcite.tools.RelBuilder;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelCollation;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit;
+
+/**
+ * Rule that adds sorting to GROUP BY col0 LIMIT n in presence of aggregate 
functions.
+ * Ex.: SELECT id, count(1) FROM t_table GROUP BY id LIMIT 2
+ *
+ * Above query has a physical plan like Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ * Both mapper and reducer edges may have multiple Mapper and Reducer 
instances to enable parallel process of data.
+ * Aggregate function results are calculated in two steps:
+ * 1) first mappers calculate a partial result from the rows processed by each 
instance.
+ *    The result is going to be filtered by Top N optimization in the mappers.
+ * 2) In the second step reducers aggregate the partial results coming from 
the mappers. However, some of partial
+ *    results are filtered out by Top N optimization.
+ * Each reducer generates an output file and in the last stage Fetch Operator 
choose one af the to be the result of the
+ * query. In these result files only the first n row has correct aggregation 
results the ones which has a key value
+ * falls in the top n key.
+ *
+ * In order to get correct aggregation results this rule adds sorting to the 
HiveSortLimit above the HiveAggregate
+ * which enables hive to sort merge the results of the reducers and take the 
first n rows of the merged result.
+ *
+ * from:
+ * HiveSortLimit(fetch=[2])
+ *   HiveAggregate(group=[{0}], agg#0=[count()])
+ *
+ * to:
+ * HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[2])
+ *   HiveAggregate(group=[{0}], agg#0=[count()])
+ */
+public class HiveAggregateSortLimitRule extends RelOptRule {
+
+  private static HiveAggregateSortLimitRule instance = null;
+
+  public static final HiveAggregateSortLimitRule getInstance(HiveConf 
hiveConf) {
+    if (instance == null) {
+      RelFieldCollation.NullDirection defaultAscNullDirection;
+      if (HiveConf.getBoolVar(hiveConf, 
HiveConf.ConfVars.HIVE_DEFAULT_NULLS_LAST)) {
+        defaultAscNullDirection = RelFieldCollation.NullDirection.LAST;
+      } else {
+        defaultAscNullDirection = RelFieldCollation.NullDirection.FIRST;
+      }
+      instance = new HiveAggregateSortLimitRule(defaultAscNullDirection);
+    }
+
+    return instance;
+  }
+
+  private final RelFieldCollation.NullDirection defaultAscNullDirection;
+
+
+  private HiveAggregateSortLimitRule(RelFieldCollation.NullDirection 
defaultAscNullDirection) {
+    super(operand(HiveSortLimit.class, operand(HiveAggregate.class, any())),
+            HiveRelFactories.HIVE_BUILDER, "HiveAggregateSortRule");
+    this.defaultAscNullDirection = defaultAscNullDirection;
+  }
+
+  @Override
+  public void onMatch(RelOptRuleCall call) {
+    final HiveSortLimit sortLimit = call.rel(0);
+    final HiveAggregate aggregate = call.rel(1);
+    final RelBuilder relBuilder = call.builder();
+
+    if (sortLimit.getFetchExpr() == null && sortLimit.getOffsetExpr() == null) 
{
+      // No limit, offset -> No Top N -> all rows are forwarded from all RS to 
Reducers
+      return;
+    }
+
+    if (aggregate.getAggCallList().isEmpty()) {
+      // No aggregate functions, any Group By key can be in the final result
+      return;
+    }
+
+    if (!sortLimit.getSortExps().isEmpty()) {
+      // Sort keys already present
+      return;
+    }
+
+    ImmutableList.Builder<RelFieldCollation> newSortKeyBuilder = 
ImmutableList.builder();
+    for (int i : aggregate.getGroupSet()) {
+      RelFieldCollation fieldCollation =
+              new RelFieldCollation(i, RelFieldCollation.Direction.ASCENDING, 
defaultAscNullDirection);
+      newSortKeyBuilder.add(fieldCollation);
+    }
+
+    HiveRelCollation newCollation = new 
HiveRelCollation(newSortKeyBuilder.build());
+    HiveSortLimit newSortLimit = sortLimit.copy(sortLimit.getTraitSet(),
+            aggregate, newCollation, sortLimit.offset, sortLimit.fetch);
+
+    call.transformTo(newSortLimit);
+  }
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index 472b3d7..7cecfc8 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -162,6 +162,7 @@ import 
org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil;
 import org.apache.hadoop.hive.ql.optimizer.calcite.HiveConfPlannerContext;
 import 
org.apache.hadoop.hive.ql.optimizer.calcite.HiveDefaultRelMetadataProvider;
 import 
org.apache.hadoop.hive.ql.optimizer.calcite.HiveTezModelRelMetadataProvider;
+import 
org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveAggregateSortLimitRule;
 import 
org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinSwapConstraintsRule;
 import 
org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSemiJoinProjectTransposeRule;
 import 
org.apache.hadoop.hive.ql.optimizer.calcite.rules.views.HiveMaterializationRelMetadataProvider;
@@ -2214,7 +2215,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
           ProjectRemoveRule.Config.DEFAULT.toRule(), 
HiveUnionMergeRule.INSTANCE,
           new HiveUnionSimpleSelectsToInlineTableRule(dummyTableScan),
           HiveAggregateProjectMergeRule.INSTANCE, 
HiveProjectMergeRule.INSTANCE_NO_FORCE,
-          HiveJoinCommuteRule.INSTANCE);
+          HiveJoinCommuteRule.INSTANCE, 
HiveAggregateSortLimitRule.getInstance(conf));
 
       // 2. Run aggregate-join transpose (cost based)
       //    If it failed because of missing stats, we continue with
diff --git a/ql/src/test/queries/clientpositive/topnkey_gby_limit.q 
b/ql/src/test/queries/clientpositive/topnkey_gby_limit.q
new file mode 100644
index 0000000..9c79d75
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/topnkey_gby_limit.q
@@ -0,0 +1,21 @@
+SET hive.optimize.topnkey=false;
+
+set tez.grouping.max-size=50;
+set tez.grouping.min-size=25;
+
+create table test(id string);
+
+insert into test(id) values
+(4), (3), (4), (3), (4), (3), (4), (3), (4), (3), (4), (3), (4), (3), (4), 
(3), (4), (3), (4), (2), (1), (5);
+
+explain cbo
+select id, count(1) from test group by id limit 2;
+explain extended
+select id, count(1) from test group by id limit 2;
+select id, count(1) from test group by id limit 2;
+
+SET hive.optimize.topnkey=true;
+
+explain extended
+select id, count(1) from test group by id limit 2;
+select id, count(1) from test group by id limit 2;
diff --git a/ql/src/test/results/clientpositive/llap/bucket_groupby.q.out 
b/ql/src/test/results/clientpositive/llap/bucket_groupby.q.out
index 0e96b92..7e08fca 100644
--- a/ql/src/test/results/clientpositive/llap/bucket_groupby.q.out
+++ b/ql/src/test/results/clientpositive/llap/bucket_groupby.q.out
@@ -318,6 +318,7 @@ STAGE PLANS:
 #### A masked pattern was here ####
       Edges:
         Reducer 2 <- Map 1 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -361,6 +362,19 @@ STAGE PLANS:
                 mode: mergepartial
                 outputColumnNames: _col0, _col1
                 Statistics: Num rows: 316 Data size: 3792 Basic stats: 
COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: int)
+                  null sort order: z
+                  sort order: +
+                  Statistics: Num rows: 316 Data size: 3792 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  value expressions: _col1 (type: bigint)
+        Reducer 3 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Select Operator
+                expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 
(type: bigint)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 316 Data size: 3792 Basic stats: 
COMPLETE Column stats: COMPLETE
                 Limit
                   Number of rows: 10
                   Statistics: Num rows: 10 Data size: 120 Basic stats: 
COMPLETE Column stats: COMPLETE
@@ -388,9 +402,9 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@clustergroupby
 POSTHOOK: Input: default@clustergroupby@ds=101
 #### A masked pattern was here ####
-3      416
 1      10
 2      74
+3      416
 PREHOOK: query: explain
 select abs(length(key)), count(1) from clustergroupby  where ds='101'  group 
by abs(length(key)) limit 10
 PREHOOK: type: QUERY
@@ -413,6 +427,7 @@ STAGE PLANS:
 #### A masked pattern was here ####
       Edges:
         Reducer 2 <- Map 1 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -456,6 +471,19 @@ STAGE PLANS:
                 mode: mergepartial
                 outputColumnNames: _col0, _col1
                 Statistics: Num rows: 316 Data size: 3792 Basic stats: 
COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: int)
+                  null sort order: z
+                  sort order: +
+                  Statistics: Num rows: 316 Data size: 3792 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  value expressions: _col1 (type: bigint)
+        Reducer 3 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Select Operator
+                expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 
(type: bigint)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 316 Data size: 3792 Basic stats: 
COMPLETE Column stats: COMPLETE
                 Limit
                   Number of rows: 10
                   Statistics: Num rows: 10 Data size: 120 Basic stats: 
COMPLETE Column stats: COMPLETE
@@ -483,9 +511,9 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@clustergroupby
 POSTHOOK: Input: default@clustergroupby@ds=101
 #### A masked pattern was here ####
-3      416
 1      10
 2      74
+3      416
 PREHOOK: query: explain
 select key, count(1) from clustergroupby  where ds='101'  group by key,'a' 
order by key,'a' limit 10
 PREHOOK: type: QUERY
diff --git a/ql/src/test/results/clientpositive/llap/filter_union.q.out 
b/ql/src/test/results/clientpositive/llap/filter_union.q.out
index 9d0df50..c6eee0e 100644
--- a/ql/src/test/results/clientpositive/llap/filter_union.q.out
+++ b/ql/src/test/results/clientpositive/llap/filter_union.q.out
@@ -495,6 +495,7 @@ STAGE PLANS:
 #### A masked pattern was here ####
       Edges:
         Reducer 2 <- Map 1 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -537,6 +538,19 @@ STAGE PLANS:
                 mode: mergepartial
                 outputColumnNames: _col0, _col1
                 Statistics: Num rows: 316 Data size: 30020 Basic stats: 
COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  null sort order: z
+                  sort order: +
+                  Statistics: Num rows: 316 Data size: 30020 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  value expressions: _col1 (type: bigint)
+        Reducer 3 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Select Operator
+                expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 
(type: bigint)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 316 Data size: 30020 Basic stats: 
COMPLETE Column stats: COMPLETE
                 Limit
                   Number of rows: 0
                   Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE 
Column stats: COMPLETE
diff --git a/ql/src/test/results/clientpositive/llap/groupby1_limit.q.out 
b/ql/src/test/results/clientpositive/llap/groupby1_limit.q.out
index 27d0dd1..4e7e749 100644
--- a/ql/src/test/results/clientpositive/llap/groupby1_limit.q.out
+++ b/ql/src/test/results/clientpositive/llap/groupby1_limit.q.out
@@ -28,7 +28,7 @@ STAGE PLANS:
 #### A masked pattern was here ####
       Edges:
         Reducer 2 <- Map 1 (SIMPLE_EDGE)
-        Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -71,33 +71,34 @@ STAGE PLANS:
                 mode: mergepartial
                 outputColumnNames: _col0, _col1
                 Statistics: Num rows: 316 Data size: 30020 Basic stats: 
COMPLETE Column stats: COMPLETE
-                Limit
-                  Number of rows: 5
-                  Statistics: Num rows: 5 Data size: 475 Basic stats: COMPLETE 
Column stats: COMPLETE
-                  Reduce Output Operator
-                    null sort order: 
-                    sort order: 
-                    Statistics: Num rows: 5 Data size: 475 Basic stats: 
COMPLETE Column stats: COMPLETE
-                    TopN Hash Memory Usage: 0.1
-                    value expressions: _col0 (type: string), _col1 (type: 
double)
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  null sort order: z
+                  sort order: +
+                  Statistics: Num rows: 316 Data size: 30020 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  value expressions: _col1 (type: double)
         Reducer 3 
             Execution mode: vectorized, llap
             Reduce Operator Tree:
-              Limit
-                Number of rows: 5
-                Statistics: Num rows: 5 Data size: 475 Basic stats: COMPLETE 
Column stats: COMPLETE
-                Select Operator
-                  expressions: UDFToInteger(VALUE._col0) (type: int), 
VALUE._col1 (type: double)
-                  outputColumnNames: _col0, _col1
+              Select Operator
+                expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 
(type: double)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 316 Data size: 30020 Basic stats: 
COMPLETE Column stats: COMPLETE
+                Limit
+                  Number of rows: 5
                   Statistics: Num rows: 5 Data size: 475 Basic stats: COMPLETE 
Column stats: COMPLETE
-                  File Output Operator
-                    compressed: false
+                  Select Operator
+                    expressions: UDFToInteger(_col0) (type: int), _col1 (type: 
double)
+                    outputColumnNames: _col0, _col1
                     Statistics: Num rows: 5 Data size: 60 Basic stats: 
COMPLETE Column stats: COMPLETE
-                    table:
-                        input format: org.apache.hadoop.mapred.TextInputFormat
-                        output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                        name: default.dest1_n125
+                    File Output Operator
+                      compressed: false
+                      Statistics: Num rows: 5 Data size: 60 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      table:
+                          input format: 
org.apache.hadoop.mapred.TextInputFormat
+                          output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                          serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                          name: default.dest1_n125
 
   Stage: Stage-2
     Dependency Collection
diff --git a/ql/src/test/results/clientpositive/llap/limit_pushdown.q.out 
b/ql/src/test/results/clientpositive/llap/limit_pushdown.q.out
index e739d5d..7ce1a04 100644
--- a/ql/src/test/results/clientpositive/llap/limit_pushdown.q.out
+++ b/ql/src/test/results/clientpositive/llap/limit_pushdown.q.out
@@ -1040,10 +1040,7 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Reducer 2 <- Map 1 (SIMPLE_EDGE)
-        Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE)
-        Reducer 4 <- Map 1 (SIMPLE_EDGE)
-        Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -1075,6 +1072,13 @@ STAGE PLANS:
                           Map-reduce partition columns: _col0 (type: string)
                           Statistics: Num rows: 316 Data size: 30020 Basic 
stats: COMPLETE Column stats: COMPLETE
                           value expressions: _col1 (type: bigint)
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: src
+                  Statistics: Num rows: 500 Data size: 43500 Basic stats: 
COMPLETE Column stats: COMPLETE
                   Top N Key Operator
                     sort order: +
                     keys: key (type: string)
@@ -1102,7 +1106,6 @@ STAGE PLANS:
             Execution mode: vectorized, llap
             LLAP IO: all inputs
         Reducer 2 
-            Execution mode: vectorized, llap
             Reduce Operator Tree:
               Group By Operator
                 aggregations: count(VALUE._col0)
@@ -1114,77 +1117,47 @@ STAGE PLANS:
                   expressions: _col0 (type: string), _col1 (type: bigint)
                   outputColumnNames: _col0, _col1
                   Limit
-                    Number of rows: 2
-                    Statistics: Num rows: 2 Data size: 190 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Number of rows: 3
+                    Statistics: Num rows: 3 Data size: 285 Basic stats: 
COMPLETE Column stats: COMPLETE
                     Filter Operator
                       predicate: _col0 is not null (type: boolean)
-                      Statistics: Num rows: 2 Data size: 190 Basic stats: 
COMPLETE Column stats: COMPLETE
-                      Reduce Output Operator
-                        key expressions: _col0 (type: string)
-                        null sort order: z
-                        sort order: +
-                        Map-reduce partition columns: _col0 (type: string)
-                        Statistics: Num rows: 2 Data size: 190 Basic stats: 
COMPLETE Column stats: COMPLETE
-                        value expressions: _col1 (type: bigint)
-        Reducer 3 
+                      Statistics: Num rows: 3 Data size: 285 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Dummy Store
             Execution mode: llap
             Reduce Operator Tree:
-              Merge Join Operator
-                condition map:
-                     Inner Join 0 to 1
-                keys:
-                  0 _col0 (type: string)
-                  1 _col0 (type: string)
-                outputColumnNames: _col0, _col1, _col2, _col3
-                Statistics: Num rows: 2 Data size: 380 Basic stats: COMPLETE 
Column stats: COMPLETE
-                Limit
-                  Number of rows: 4
-                  Statistics: Num rows: 2 Data size: 380 Basic stats: COMPLETE 
Column stats: COMPLETE
-                  File Output Operator
-                    compressed: false
-                    Statistics: Num rows: 2 Data size: 380 Basic stats: 
COMPLETE Column stats: COMPLETE
-                    table:
-                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-        Reducer 4 
-            Execution mode: vectorized, llap
-            Reduce Operator Tree:
               Group By Operator
                 aggregations: count(VALUE._col0)
                 keys: KEY._col0 (type: string)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1
                 Statistics: Num rows: 316 Data size: 30020 Basic stats: 
COMPLETE Column stats: COMPLETE
-                Limit
-                  Number of rows: 3
-                  Statistics: Num rows: 3 Data size: 285 Basic stats: COMPLETE 
Column stats: COMPLETE
-                  Reduce Output Operator
-                    null sort order: 
-                    sort order: 
-                    Statistics: Num rows: 3 Data size: 285 Basic stats: 
COMPLETE Column stats: COMPLETE
-                    TopN Hash Memory Usage: 0.3
-                    value expressions: _col0 (type: string), _col1 (type: 
bigint)
-        Reducer 5 
-            Execution mode: vectorized, llap
-            Reduce Operator Tree:
-              Limit
-                Number of rows: 3
-                Statistics: Num rows: 3 Data size: 285 Basic stats: COMPLETE 
Column stats: COMPLETE
                 Select Operator
-                  expressions: VALUE._col0 (type: string), VALUE._col1 (type: 
bigint)
+                  expressions: _col0 (type: string), _col1 (type: bigint)
                   outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 3 Data size: 285 Basic stats: COMPLETE 
Column stats: COMPLETE
-                  Filter Operator
-                    predicate: _col0 is not null (type: boolean)
-                    Statistics: Num rows: 3 Data size: 285 Basic stats: 
COMPLETE Column stats: COMPLETE
-                    Reduce Output Operator
-                      key expressions: _col0 (type: string)
-                      null sort order: z
-                      sort order: +
-                      Map-reduce partition columns: _col0 (type: string)
-                      Statistics: Num rows: 3 Data size: 285 Basic stats: 
COMPLETE Column stats: COMPLETE
-                      value expressions: _col1 (type: bigint)
+                  Limit
+                    Number of rows: 2
+                    Statistics: Num rows: 2 Data size: 190 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Filter Operator
+                      predicate: _col0 is not null (type: boolean)
+                      Statistics: Num rows: 2 Data size: 190 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Merge Join Operator
+                        condition map:
+                             Inner Join 0 to 1
+                        keys:
+                          0 _col0 (type: string)
+                          1 _col0 (type: string)
+                        outputColumnNames: _col0, _col1, _col2, _col3
+                        Statistics: Num rows: 2 Data size: 380 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        Limit
+                          Number of rows: 4
+                          Statistics: Num rows: 2 Data size: 380 Basic stats: 
COMPLETE Column stats: COMPLETE
+                          File Output Operator
+                            compressed: false
+                            Statistics: Num rows: 2 Data size: 380 Basic 
stats: COMPLETE Column stats: COMPLETE
+                            table:
+                                input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                                serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
   Stage: Stage-0
     Fetch Operator
diff --git a/ql/src/test/results/clientpositive/llap/limit_pushdown4.q.out 
b/ql/src/test/results/clientpositive/llap/limit_pushdown4.q.out
index 70f8fcd..6047293 100644
--- a/ql/src/test/results/clientpositive/llap/limit_pushdown4.q.out
+++ b/ql/src/test/results/clientpositive/llap/limit_pushdown4.q.out
@@ -349,16 +349,19 @@ STAGE PLANS:
                 mode: mergepartial
                 outputColumnNames: _col0, _col1
                 Statistics: Num rows: 316 Data size: 30020 Basic stats: 
COMPLETE Column stats: COMPLETE
-                Limit
-                  Number of rows: 5
-                  Statistics: Num rows: 5 Data size: 475 Basic stats: COMPLETE 
Column stats: COMPLETE
-                  File Output Operator
-                    compressed: false
+                Select Operator
+                  expressions: _col0 (type: string), _col1 (type: bigint)
+                  outputColumnNames: _col0, _col1
+                  Limit
+                    Number of rows: 5
                     Statistics: Num rows: 5 Data size: 475 Basic stats: 
COMPLETE Column stats: COMPLETE
-                    table:
-                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    File Output Operator
+                      compressed: false
+                      Statistics: Num rows: 5 Data size: 475 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      table:
+                          input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                          output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                          serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
   Stage: Stage-0
     Fetch Operator
diff --git 
a/ql/src/test/results/clientpositive/llap/offset_limit_ppd_optimizer.q.out 
b/ql/src/test/results/clientpositive/llap/offset_limit_ppd_optimizer.q.out
index f6d4562..d48ef82 100644
--- a/ql/src/test/results/clientpositive/llap/offset_limit_ppd_optimizer.q.out
+++ b/ql/src/test/results/clientpositive/llap/offset_limit_ppd_optimizer.q.out
@@ -1468,10 +1468,7 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Reducer 2 <- Map 1 (SIMPLE_EDGE)
-        Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE)
-        Reducer 4 <- Map 1 (SIMPLE_EDGE)
-        Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -1495,6 +1492,13 @@ STAGE PLANS:
                         sort order: +
                         Map-reduce partition columns: key (type: string)
                         Statistics: Num rows: 500 Data size: 43500 Basic 
stats: COMPLETE Column stats: COMPLETE
+            Execution mode: llap
+            LLAP IO: all inputs
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: src
+                  Statistics: Num rows: 500 Data size: 43500 Basic stats: 
COMPLETE Column stats: COMPLETE
                   Top N Key Operator
                     sort order: +
                     keys: key (type: string)
@@ -1514,7 +1518,6 @@ STAGE PLANS:
             Execution mode: llap
             LLAP IO: all inputs
         Reducer 2 
-            Execution mode: llap
             Reduce Operator Tree:
               Group By Operator
                 aggregations: count()
@@ -1527,41 +1530,12 @@ STAGE PLANS:
                   outputColumnNames: _col0, _col1
                   Limit
                     Number of rows: 20
-                    Offset of rows: 10
+                    Offset of rows: 20
                     Statistics: Num rows: 20 Data size: 1900 Basic stats: 
COMPLETE Column stats: COMPLETE
                     Filter Operator
                       predicate: _col0 is not null (type: boolean)
                       Statistics: Num rows: 20 Data size: 1900 Basic stats: 
COMPLETE Column stats: COMPLETE
-                      Reduce Output Operator
-                        key expressions: _col0 (type: string)
-                        null sort order: z
-                        sort order: +
-                        Map-reduce partition columns: _col0 (type: string)
-                        Statistics: Num rows: 20 Data size: 1900 Basic stats: 
COMPLETE Column stats: COMPLETE
-                        value expressions: _col1 (type: bigint)
-        Reducer 3 
-            Execution mode: llap
-            Reduce Operator Tree:
-              Merge Join Operator
-                condition map:
-                     Inner Join 0 to 1
-                keys:
-                  0 _col0 (type: string)
-                  1 _col0 (type: string)
-                outputColumnNames: _col0, _col1, _col2, _col3
-                Statistics: Num rows: 20 Data size: 3800 Basic stats: COMPLETE 
Column stats: COMPLETE
-                Limit
-                  Number of rows: 5
-                  Offset of rows: 3
-                  Statistics: Num rows: 5 Data size: 950 Basic stats: COMPLETE 
Column stats: COMPLETE
-                  File Output Operator
-                    compressed: false
-                    Statistics: Num rows: 5 Data size: 950 Basic stats: 
COMPLETE Column stats: COMPLETE
-                    table:
-                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-        Reducer 4 
+                      Dummy Store
             Execution mode: llap
             Reduce Operator Tree:
               Group By Operator
@@ -1570,37 +1544,35 @@ STAGE PLANS:
                 mode: complete
                 outputColumnNames: _col0, _col1
                 Statistics: Num rows: 316 Data size: 30020 Basic stats: 
COMPLETE Column stats: COMPLETE
-                Limit
-                  Number of rows: 20
-                  Offset of rows: 20
-                  Statistics: Num rows: 20 Data size: 1900 Basic stats: 
COMPLETE Column stats: COMPLETE
-                  Reduce Output Operator
-                    null sort order: 
-                    sort order: 
-                    Statistics: Num rows: 20 Data size: 1900 Basic stats: 
COMPLETE Column stats: COMPLETE
-                    TopN Hash Memory Usage: 2.0E-5
-                    value expressions: _col0 (type: string), _col1 (type: 
bigint)
-        Reducer 5 
-            Execution mode: llap
-            Reduce Operator Tree:
-              Limit
-                Number of rows: 20
-                Offset of rows: 20
-                Statistics: Num rows: 20 Data size: 1900 Basic stats: COMPLETE 
Column stats: COMPLETE
                 Select Operator
-                  expressions: VALUE._col0 (type: string), VALUE._col1 (type: 
bigint)
+                  expressions: _col0 (type: string), _col1 (type: bigint)
                   outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 20 Data size: 1900 Basic stats: 
COMPLETE Column stats: COMPLETE
-                  Filter Operator
-                    predicate: _col0 is not null (type: boolean)
+                  Limit
+                    Number of rows: 20
+                    Offset of rows: 10
                     Statistics: Num rows: 20 Data size: 1900 Basic stats: 
COMPLETE Column stats: COMPLETE
-                    Reduce Output Operator
-                      key expressions: _col0 (type: string)
-                      null sort order: z
-                      sort order: +
-                      Map-reduce partition columns: _col0 (type: string)
+                    Filter Operator
+                      predicate: _col0 is not null (type: boolean)
                       Statistics: Num rows: 20 Data size: 1900 Basic stats: 
COMPLETE Column stats: COMPLETE
-                      value expressions: _col1 (type: bigint)
+                      Merge Join Operator
+                        condition map:
+                             Inner Join 0 to 1
+                        keys:
+                          0 _col0 (type: string)
+                          1 _col0 (type: string)
+                        outputColumnNames: _col0, _col1, _col2, _col3
+                        Statistics: Num rows: 20 Data size: 3800 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        Limit
+                          Number of rows: 5
+                          Offset of rows: 3
+                          Statistics: Num rows: 5 Data size: 950 Basic stats: 
COMPLETE Column stats: COMPLETE
+                          File Output Operator
+                            compressed: false
+                            Statistics: Num rows: 5 Data size: 950 Basic 
stats: COMPLETE Column stats: COMPLETE
+                            table:
+                                input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                                serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
   Stage: Stage-0
     Fetch Operator
diff --git a/ql/src/test/results/clientpositive/llap/topnkey_gby_limit.q.out 
b/ql/src/test/results/clientpositive/llap/topnkey_gby_limit.q.out
new file mode 100644
index 0000000..c4e12e1
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/topnkey_gby_limit.q.out
@@ -0,0 +1,379 @@
+PREHOOK: query: create table test(id string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test
+POSTHOOK: query: create table test(id string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@test
+PREHOOK: query: insert into test(id) values
+(4), (3), (4), (3), (4), (3), (4), (3), (4), (3), (4), (3), (4), (3), (4), 
(3), (4), (3), (4), (2), (1), (5)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@test
+POSTHOOK: query: insert into test(id) values
+(4), (3), (4), (3), (4), (3), (4), (3), (4), (3), (4), (3), (4), (3), (4), 
(3), (4), (3), (4), (2), (1), (5)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@test
+POSTHOOK: Lineage: test.id SCRIPT []
+PREHOOK: query: explain cbo
+select id, count(1) from test group by id limit 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test
+#### A masked pattern was here ####
+POSTHOOK: query: explain cbo
+select id, count(1) from test group by id limit 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test
+#### A masked pattern was here ####
+CBO PLAN:
+HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[2])
+  HiveProject(id=[$0], $f1=[$1])
+    HiveAggregate(group=[{0}], agg#0=[count()])
+      HiveTableScan(table=[[default, test]], table:alias=[test])
+
+PREHOOK: query: explain extended
+select id, count(1) from test group by id limit 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test
+#### A masked pattern was here ####
+POSTHOOK: query: explain extended
+select id, count(1) from test group by id limit 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test
+#### A masked pattern was here ####
+OPTIMIZED SQL: SELECT `id`, COUNT(*) AS `$f1`
+FROM `default`.`test`
+GROUP BY `id`
+ORDER BY `id`
+LIMIT 2
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: test
+                  Statistics: Num rows: 22 Data size: 1870 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  GatherStats: false
+                  Select Operator
+                    expressions: id (type: string)
+                    outputColumnNames: id
+                    Statistics: Num rows: 22 Data size: 1870 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      aggregations: count()
+                      keys: id (type: string)
+                      minReductionHashAggr: 0.77272725
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 5 Data size: 465 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        bucketingVersion: 2
+                        key expressions: _col0 (type: string)
+                        null sort order: z
+                        numBuckets: -1
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 5 Data size: 465 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        tag: -1
+                        TopN: 2
+                        TopN Hash Memory Usage: 0.1
+                        value expressions: _col1 (type: bigint)
+                        auto parallelism: true
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+            Path -> Alias:
+#### A masked pattern was here ####
+            Path -> Partition:
+#### A masked pattern was here ####
+                Partition
+                  base file name: test
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  properties:
+                    bucket_count -1
+                    bucketing_version 2
+                    column.name.delimiter ,
+                    columns id
+                    columns.types string
+#### A masked pattern was here ####
+                    name default.test
+                    serialization.format 1
+                    serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    properties:
+                      bucketing_version 2
+                      column.name.delimiter ,
+                      columns id
+                      columns.comments 
+                      columns.types string
+#### A masked pattern was here ####
+                      name default.test
+                      serialization.format 1
+                      serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    name: default.test
+                  name: default.test
+            Truncated Path -> Alias:
+              /test [test]
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Needs Tagging: false
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Reduce Output Operator
+                  bucketingVersion: 2
+                  key expressions: _col0 (type: string)
+                  null sort order: z
+                  numBuckets: -1
+                  sort order: +
+                  Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  tag: -1
+                  TopN: 2
+                  TopN Hash Memory Usage: 0.1
+                  value expressions: _col1 (type: bigint)
+                  auto parallelism: false
+        Reducer 3 
+            Execution mode: vectorized, llap
+            Needs Tagging: false
+            Reduce Operator Tree:
+              Select Operator
+                expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 
(type: bigint)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Limit
+                  Number of rows: 2
+                  Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  File Output Operator
+                    bucketingVersion: 2
+                    compressed: false
+                    GlobalTableId: 0
+#### A masked pattern was here ####
+                    NumFilesPerFileSink: 1
+                    Statistics: Num rows: 2 Data size: 186 Basic stats: 
COMPLETE Column stats: COMPLETE
+#### A masked pattern was here ####
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        properties:
+                          bucketing_version -1
+                          columns _col0,_col1
+                          columns.types string:bigint
+                          escape.delim \
+                          hive.serialization.extend.additional.nesting.levels 
true
+                          serialization.escape.crlf true
+                          serialization.format 1
+                          serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    TotalFiles: 1
+                    GatherStats: false
+                    MultiFileSpray: false
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 2
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select id, count(1) from test group by id limit 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test
+#### A masked pattern was here ####
+POSTHOOK: query: select id, count(1) from test group by id limit 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test
+#### A masked pattern was here ####
+1      1
+2      1
+PREHOOK: query: explain extended
+select id, count(1) from test group by id limit 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test
+#### A masked pattern was here ####
+POSTHOOK: query: explain extended
+select id, count(1) from test group by id limit 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test
+#### A masked pattern was here ####
+OPTIMIZED SQL: SELECT `id`, COUNT(*) AS `$f1`
+FROM `default`.`test`
+GROUP BY `id`
+ORDER BY `id`
+LIMIT 2
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: test
+                  Statistics: Num rows: 22 Data size: 1870 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  GatherStats: false
+                  Top N Key Operator
+                    sort order: +
+                    keys: id (type: string)
+                    null sort order: z
+                    Statistics: Num rows: 22 Data size: 1870 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    top n: 2
+                    Select Operator
+                      expressions: id (type: string)
+                      outputColumnNames: id
+                      Statistics: Num rows: 22 Data size: 1870 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Group By Operator
+                        aggregations: count()
+                        keys: id (type: string)
+                        minReductionHashAggr: 0.77272725
+                        mode: hash
+                        outputColumnNames: _col0, _col1
+                        Statistics: Num rows: 5 Data size: 465 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        Reduce Output Operator
+                          bucketingVersion: 2
+                          key expressions: _col0 (type: string)
+                          null sort order: z
+                          numBuckets: -1
+                          sort order: +
+                          Map-reduce partition columns: _col0 (type: string)
+                          Statistics: Num rows: 5 Data size: 465 Basic stats: 
COMPLETE Column stats: COMPLETE
+                          tag: -1
+                          value expressions: _col1 (type: bigint)
+                          auto parallelism: true
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+            Path -> Alias:
+#### A masked pattern was here ####
+            Path -> Partition:
+#### A masked pattern was here ####
+                Partition
+                  base file name: test
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  properties:
+                    bucket_count -1
+                    bucketing_version 2
+                    column.name.delimiter ,
+                    columns id
+                    columns.types string
+#### A masked pattern was here ####
+                    name default.test
+                    serialization.format 1
+                    serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    properties:
+                      bucketing_version 2
+                      column.name.delimiter ,
+                      columns id
+                      columns.comments 
+                      columns.types string
+#### A masked pattern was here ####
+                      name default.test
+                      serialization.format 1
+                      serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    name: default.test
+                  name: default.test
+            Truncated Path -> Alias:
+              /test [test]
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Needs Tagging: false
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Reduce Output Operator
+                  bucketingVersion: 2
+                  key expressions: _col0 (type: string)
+                  null sort order: z
+                  numBuckets: -1
+                  sort order: +
+                  Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  tag: -1
+                  value expressions: _col1 (type: bigint)
+                  auto parallelism: false
+        Reducer 3 
+            Execution mode: vectorized, llap
+            Needs Tagging: false
+            Reduce Operator Tree:
+              Select Operator
+                expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 
(type: bigint)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Limit
+                  Number of rows: 2
+                  Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  File Output Operator
+                    bucketingVersion: 2
+                    compressed: false
+                    GlobalTableId: 0
+#### A masked pattern was here ####
+                    NumFilesPerFileSink: 1
+                    Statistics: Num rows: 2 Data size: 186 Basic stats: 
COMPLETE Column stats: COMPLETE
+#### A masked pattern was here ####
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        properties:
+                          bucketing_version -1
+                          columns _col0,_col1
+                          columns.types string:bigint
+                          escape.delim \
+                          hive.serialization.extend.additional.nesting.levels 
true
+                          serialization.escape.crlf true
+                          serialization.format 1
+                          serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    TotalFiles: 1
+                    GatherStats: false
+                    MultiFileSpray: false
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 2
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select id, count(1) from test group by id limit 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test
+#### A masked pattern was here ####
+POSTHOOK: query: select id, count(1) from test group by id limit 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test
+#### A masked pattern was here ####
+1      1
+2      1

[hive] branch master updated: HIVE-24579: Incorrect Result For Groupby With Limit (Krisztian Kasa, reviewed by Zoltan Haindrich)

Reply via email to