This is an automated email from the ASF dual-hosted git repository.
krisztiankasa pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 9cba262 HIVE-24579: Incorrect Result For Groupby With Limit
(Krisztian Kasa, reviewed by Zoltan Haindrich)
9cba262 is described below
commit 9cba2626e1b0376dc272ab4ed36a4c72eb6d8f57
Author: Krisztian Kasa <[email protected]>
AuthorDate: Tue Sep 28 12:39:59 2021 +0200
HIVE-24579: Incorrect Result For Groupby With Limit (Krisztian Kasa,
reviewed by Zoltan Haindrich)
---
.../calcite/rules/HiveAggregateSortLimitRule.java | 117 +++++++
.../hadoop/hive/ql/parse/CalcitePlanner.java | 3 +-
.../queries/clientpositive/topnkey_gby_limit.q | 21 ++
.../clientpositive/llap/bucket_groupby.q.out | 32 +-
.../results/clientpositive/llap/filter_union.q.out | 14 +
.../clientpositive/llap/groupby1_limit.q.out | 47 +--
.../clientpositive/llap/limit_pushdown.q.out | 101 ++----
.../clientpositive/llap/limit_pushdown4.q.out | 21 +-
.../llap/offset_limit_ppd_optimizer.q.out | 98 ++----
.../clientpositive/llap/topnkey_gby_limit.q.out | 379 +++++++++++++++++++++
10 files changed, 671 insertions(+), 162 deletions(-)
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateSortLimitRule.java
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateSortLimitRule.java
new file mode 100644
index 0000000..01c6c8d
--- /dev/null
+++
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateSortLimitRule.java
@@ -0,0 +1,117 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.rules;
+
+import com.google.common.collect.ImmutableList;
+import org.apache.calcite.plan.RelOptRule;
+import org.apache.calcite.plan.RelOptRuleCall;
+import org.apache.calcite.rel.RelFieldCollation;
+import org.apache.calcite.tools.RelBuilder;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelCollation;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit;
+
+/**
+ * Rule that adds sorting to GROUP BY col0 LIMIT n in presence of aggregate
functions.
+ * Ex.: SELECT id, count(1) FROM t_table GROUP BY id LIMIT 2
+ *
+ * Above query has a physical plan like Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ * Both mapper and reducer edges may have multiple Mapper and Reducer
instances to enable parallel process of data.
+ * Aggregate function results are calculated in two steps:
+ * 1) first mappers calculate a partial result from the rows processed by each
instance.
+ * The result is going to be filtered by Top N optimization in the mappers.
+ * 2) In the second step reducers aggregate the partial results coming from
the mappers. However, some of partial
+ * results are filtered out by Top N optimization.
+ * Each reducer generates an output file and in the last stage Fetch Operator
choose one af the to be the result of the
+ * query. In these result files only the first n row has correct aggregation
results the ones which has a key value
+ * falls in the top n key.
+ *
+ * In order to get correct aggregation results this rule adds sorting to the
HiveSortLimit above the HiveAggregate
+ * which enables hive to sort merge the results of the reducers and take the
first n rows of the merged result.
+ *
+ * from:
+ * HiveSortLimit(fetch=[2])
+ * HiveAggregate(group=[{0}], agg#0=[count()])
+ *
+ * to:
+ * HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[2])
+ * HiveAggregate(group=[{0}], agg#0=[count()])
+ */
+public class HiveAggregateSortLimitRule extends RelOptRule {
+
+ private static HiveAggregateSortLimitRule instance = null;
+
+ public static final HiveAggregateSortLimitRule getInstance(HiveConf
hiveConf) {
+ if (instance == null) {
+ RelFieldCollation.NullDirection defaultAscNullDirection;
+ if (HiveConf.getBoolVar(hiveConf,
HiveConf.ConfVars.HIVE_DEFAULT_NULLS_LAST)) {
+ defaultAscNullDirection = RelFieldCollation.NullDirection.LAST;
+ } else {
+ defaultAscNullDirection = RelFieldCollation.NullDirection.FIRST;
+ }
+ instance = new HiveAggregateSortLimitRule(defaultAscNullDirection);
+ }
+
+ return instance;
+ }
+
+ private final RelFieldCollation.NullDirection defaultAscNullDirection;
+
+
+ private HiveAggregateSortLimitRule(RelFieldCollation.NullDirection
defaultAscNullDirection) {
+ super(operand(HiveSortLimit.class, operand(HiveAggregate.class, any())),
+ HiveRelFactories.HIVE_BUILDER, "HiveAggregateSortRule");
+ this.defaultAscNullDirection = defaultAscNullDirection;
+ }
+
+ @Override
+ public void onMatch(RelOptRuleCall call) {
+ final HiveSortLimit sortLimit = call.rel(0);
+ final HiveAggregate aggregate = call.rel(1);
+ final RelBuilder relBuilder = call.builder();
+
+ if (sortLimit.getFetchExpr() == null && sortLimit.getOffsetExpr() == null)
{
+ // No limit, offset -> No Top N -> all rows are forwarded from all RS to
Reducers
+ return;
+ }
+
+ if (aggregate.getAggCallList().isEmpty()) {
+ // No aggregate functions, any Group By key can be in the final result
+ return;
+ }
+
+ if (!sortLimit.getSortExps().isEmpty()) {
+ // Sort keys already present
+ return;
+ }
+
+ ImmutableList.Builder<RelFieldCollation> newSortKeyBuilder =
ImmutableList.builder();
+ for (int i : aggregate.getGroupSet()) {
+ RelFieldCollation fieldCollation =
+ new RelFieldCollation(i, RelFieldCollation.Direction.ASCENDING,
defaultAscNullDirection);
+ newSortKeyBuilder.add(fieldCollation);
+ }
+
+ HiveRelCollation newCollation = new
HiveRelCollation(newSortKeyBuilder.build());
+ HiveSortLimit newSortLimit = sortLimit.copy(sortLimit.getTraitSet(),
+ aggregate, newCollation, sortLimit.offset, sortLimit.fetch);
+
+ call.transformTo(newSortLimit);
+ }
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index 472b3d7..7cecfc8 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -162,6 +162,7 @@ import
org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil;
import org.apache.hadoop.hive.ql.optimizer.calcite.HiveConfPlannerContext;
import
org.apache.hadoop.hive.ql.optimizer.calcite.HiveDefaultRelMetadataProvider;
import
org.apache.hadoop.hive.ql.optimizer.calcite.HiveTezModelRelMetadataProvider;
+import
org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveAggregateSortLimitRule;
import
org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinSwapConstraintsRule;
import
org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSemiJoinProjectTransposeRule;
import
org.apache.hadoop.hive.ql.optimizer.calcite.rules.views.HiveMaterializationRelMetadataProvider;
@@ -2214,7 +2215,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
ProjectRemoveRule.Config.DEFAULT.toRule(),
HiveUnionMergeRule.INSTANCE,
new HiveUnionSimpleSelectsToInlineTableRule(dummyTableScan),
HiveAggregateProjectMergeRule.INSTANCE,
HiveProjectMergeRule.INSTANCE_NO_FORCE,
- HiveJoinCommuteRule.INSTANCE);
+ HiveJoinCommuteRule.INSTANCE,
HiveAggregateSortLimitRule.getInstance(conf));
// 2. Run aggregate-join transpose (cost based)
// If it failed because of missing stats, we continue with
diff --git a/ql/src/test/queries/clientpositive/topnkey_gby_limit.q
b/ql/src/test/queries/clientpositive/topnkey_gby_limit.q
new file mode 100644
index 0000000..9c79d75
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/topnkey_gby_limit.q
@@ -0,0 +1,21 @@
+SET hive.optimize.topnkey=false;
+
+set tez.grouping.max-size=50;
+set tez.grouping.min-size=25;
+
+create table test(id string);
+
+insert into test(id) values
+(4), (3), (4), (3), (4), (3), (4), (3), (4), (3), (4), (3), (4), (3), (4),
(3), (4), (3), (4), (2), (1), (5);
+
+explain cbo
+select id, count(1) from test group by id limit 2;
+explain extended
+select id, count(1) from test group by id limit 2;
+select id, count(1) from test group by id limit 2;
+
+SET hive.optimize.topnkey=true;
+
+explain extended
+select id, count(1) from test group by id limit 2;
+select id, count(1) from test group by id limit 2;
diff --git a/ql/src/test/results/clientpositive/llap/bucket_groupby.q.out
b/ql/src/test/results/clientpositive/llap/bucket_groupby.q.out
index 0e96b92..7e08fca 100644
--- a/ql/src/test/results/clientpositive/llap/bucket_groupby.q.out
+++ b/ql/src/test/results/clientpositive/llap/bucket_groupby.q.out
@@ -318,6 +318,7 @@ STAGE PLANS:
#### A masked pattern was here ####
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -361,6 +362,19 @@ STAGE PLANS:
mode: mergepartial
outputColumnNames: _col0, _col1
Statistics: Num rows: 316 Data size: 3792 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ null sort order: z
+ sort order: +
+ Statistics: Num rows: 316 Data size: 3792 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: bigint)
+ Reducer 3
+ Execution mode: llap
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: int), VALUE._col0
(type: bigint)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 316 Data size: 3792 Basic stats:
COMPLETE Column stats: COMPLETE
Limit
Number of rows: 10
Statistics: Num rows: 10 Data size: 120 Basic stats:
COMPLETE Column stats: COMPLETE
@@ -388,9 +402,9 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@clustergroupby
POSTHOOK: Input: default@clustergroupby@ds=101
#### A masked pattern was here ####
-3 416
1 10
2 74
+3 416
PREHOOK: query: explain
select abs(length(key)), count(1) from clustergroupby where ds='101' group
by abs(length(key)) limit 10
PREHOOK: type: QUERY
@@ -413,6 +427,7 @@ STAGE PLANS:
#### A masked pattern was here ####
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -456,6 +471,19 @@ STAGE PLANS:
mode: mergepartial
outputColumnNames: _col0, _col1
Statistics: Num rows: 316 Data size: 3792 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ null sort order: z
+ sort order: +
+ Statistics: Num rows: 316 Data size: 3792 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: bigint)
+ Reducer 3
+ Execution mode: llap
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: int), VALUE._col0
(type: bigint)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 316 Data size: 3792 Basic stats:
COMPLETE Column stats: COMPLETE
Limit
Number of rows: 10
Statistics: Num rows: 10 Data size: 120 Basic stats:
COMPLETE Column stats: COMPLETE
@@ -483,9 +511,9 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@clustergroupby
POSTHOOK: Input: default@clustergroupby@ds=101
#### A masked pattern was here ####
-3 416
1 10
2 74
+3 416
PREHOOK: query: explain
select key, count(1) from clustergroupby where ds='101' group by key,'a'
order by key,'a' limit 10
PREHOOK: type: QUERY
diff --git a/ql/src/test/results/clientpositive/llap/filter_union.q.out
b/ql/src/test/results/clientpositive/llap/filter_union.q.out
index 9d0df50..c6eee0e 100644
--- a/ql/src/test/results/clientpositive/llap/filter_union.q.out
+++ b/ql/src/test/results/clientpositive/llap/filter_union.q.out
@@ -495,6 +495,7 @@ STAGE PLANS:
#### A masked pattern was here ####
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -537,6 +538,19 @@ STAGE PLANS:
mode: mergepartial
outputColumnNames: _col0, _col1
Statistics: Num rows: 316 Data size: 30020 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ null sort order: z
+ sort order: +
+ Statistics: Num rows: 316 Data size: 30020 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: bigint)
+ Reducer 3
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), VALUE._col0
(type: bigint)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 316 Data size: 30020 Basic stats:
COMPLETE Column stats: COMPLETE
Limit
Number of rows: 0
Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE
Column stats: COMPLETE
diff --git a/ql/src/test/results/clientpositive/llap/groupby1_limit.q.out
b/ql/src/test/results/clientpositive/llap/groupby1_limit.q.out
index 27d0dd1..4e7e749 100644
--- a/ql/src/test/results/clientpositive/llap/groupby1_limit.q.out
+++ b/ql/src/test/results/clientpositive/llap/groupby1_limit.q.out
@@ -28,7 +28,7 @@ STAGE PLANS:
#### A masked pattern was here ####
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
- Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -71,33 +71,34 @@ STAGE PLANS:
mode: mergepartial
outputColumnNames: _col0, _col1
Statistics: Num rows: 316 Data size: 30020 Basic stats:
COMPLETE Column stats: COMPLETE
- Limit
- Number of rows: 5
- Statistics: Num rows: 5 Data size: 475 Basic stats: COMPLETE
Column stats: COMPLETE
- Reduce Output Operator
- null sort order:
- sort order:
- Statistics: Num rows: 5 Data size: 475 Basic stats:
COMPLETE Column stats: COMPLETE
- TopN Hash Memory Usage: 0.1
- value expressions: _col0 (type: string), _col1 (type:
double)
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ null sort order: z
+ sort order: +
+ Statistics: Num rows: 316 Data size: 30020 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: double)
Reducer 3
Execution mode: vectorized, llap
Reduce Operator Tree:
- Limit
- Number of rows: 5
- Statistics: Num rows: 5 Data size: 475 Basic stats: COMPLETE
Column stats: COMPLETE
- Select Operator
- expressions: UDFToInteger(VALUE._col0) (type: int),
VALUE._col1 (type: double)
- outputColumnNames: _col0, _col1
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), VALUE._col0
(type: double)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 316 Data size: 30020 Basic stats:
COMPLETE Column stats: COMPLETE
+ Limit
+ Number of rows: 5
Statistics: Num rows: 5 Data size: 475 Basic stats: COMPLETE
Column stats: COMPLETE
- File Output Operator
- compressed: false
+ Select Operator
+ expressions: UDFToInteger(_col0) (type: int), _col1 (type:
double)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 5 Data size: 60 Basic stats:
COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.dest1_n125
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 5 Data size: 60 Basic stats:
COMPLETE Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.TextInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest1_n125
Stage: Stage-2
Dependency Collection
diff --git a/ql/src/test/results/clientpositive/llap/limit_pushdown.q.out
b/ql/src/test/results/clientpositive/llap/limit_pushdown.q.out
index e739d5d..7ce1a04 100644
--- a/ql/src/test/results/clientpositive/llap/limit_pushdown.q.out
+++ b/ql/src/test/results/clientpositive/llap/limit_pushdown.q.out
@@ -1040,10 +1040,7 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE)
- Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE)
- Reducer 4 <- Map 1 (SIMPLE_EDGE)
- Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -1075,6 +1072,13 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 316 Data size: 30020 Basic
stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: bigint)
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 43500 Basic stats:
COMPLETE Column stats: COMPLETE
Top N Key Operator
sort order: +
keys: key (type: string)
@@ -1102,7 +1106,6 @@ STAGE PLANS:
Execution mode: vectorized, llap
LLAP IO: all inputs
Reducer 2
- Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
@@ -1114,77 +1117,47 @@ STAGE PLANS:
expressions: _col0 (type: string), _col1 (type: bigint)
outputColumnNames: _col0, _col1
Limit
- Number of rows: 2
- Statistics: Num rows: 2 Data size: 190 Basic stats:
COMPLETE Column stats: COMPLETE
+ Number of rows: 3
+ Statistics: Num rows: 3 Data size: 285 Basic stats:
COMPLETE Column stats: COMPLETE
Filter Operator
predicate: _col0 is not null (type: boolean)
- Statistics: Num rows: 2 Data size: 190 Basic stats:
COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- null sort order: z
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 2 Data size: 190 Basic stats:
COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: bigint)
- Reducer 3
+ Statistics: Num rows: 3 Data size: 285 Basic stats:
COMPLETE Column stats: COMPLETE
+ Dummy Store
Execution mode: llap
Reduce Operator Tree:
- Merge Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 2 Data size: 380 Basic stats: COMPLETE
Column stats: COMPLETE
- Limit
- Number of rows: 4
- Statistics: Num rows: 2 Data size: 380 Basic stats: COMPLETE
Column stats: COMPLETE
- File Output Operator
- compressed: false
- Statistics: Num rows: 2 Data size: 380 Basic stats:
COMPLETE Column stats: COMPLETE
- table:
- input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
- output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Reducer 4
- Execution mode: vectorized, llap
- Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
keys: KEY._col0 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1
Statistics: Num rows: 316 Data size: 30020 Basic stats:
COMPLETE Column stats: COMPLETE
- Limit
- Number of rows: 3
- Statistics: Num rows: 3 Data size: 285 Basic stats: COMPLETE
Column stats: COMPLETE
- Reduce Output Operator
- null sort order:
- sort order:
- Statistics: Num rows: 3 Data size: 285 Basic stats:
COMPLETE Column stats: COMPLETE
- TopN Hash Memory Usage: 0.3
- value expressions: _col0 (type: string), _col1 (type:
bigint)
- Reducer 5
- Execution mode: vectorized, llap
- Reduce Operator Tree:
- Limit
- Number of rows: 3
- Statistics: Num rows: 3 Data size: 285 Basic stats: COMPLETE
Column stats: COMPLETE
Select Operator
- expressions: VALUE._col0 (type: string), VALUE._col1 (type:
bigint)
+ expressions: _col0 (type: string), _col1 (type: bigint)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 3 Data size: 285 Basic stats: COMPLETE
Column stats: COMPLETE
- Filter Operator
- predicate: _col0 is not null (type: boolean)
- Statistics: Num rows: 3 Data size: 285 Basic stats:
COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- null sort order: z
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 3 Data size: 285 Basic stats:
COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: bigint)
+ Limit
+ Number of rows: 2
+ Statistics: Num rows: 2 Data size: 190 Basic stats:
COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: _col0 is not null (type: boolean)
+ Statistics: Num rows: 2 Data size: 190 Basic stats:
COMPLETE Column stats: COMPLETE
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 2 Data size: 380 Basic stats:
COMPLETE Column stats: COMPLETE
+ Limit
+ Number of rows: 4
+ Statistics: Num rows: 2 Data size: 380 Basic stats:
COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 2 Data size: 380 Basic
stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
diff --git a/ql/src/test/results/clientpositive/llap/limit_pushdown4.q.out
b/ql/src/test/results/clientpositive/llap/limit_pushdown4.q.out
index 70f8fcd..6047293 100644
--- a/ql/src/test/results/clientpositive/llap/limit_pushdown4.q.out
+++ b/ql/src/test/results/clientpositive/llap/limit_pushdown4.q.out
@@ -349,16 +349,19 @@ STAGE PLANS:
mode: mergepartial
outputColumnNames: _col0, _col1
Statistics: Num rows: 316 Data size: 30020 Basic stats:
COMPLETE Column stats: COMPLETE
- Limit
- Number of rows: 5
- Statistics: Num rows: 5 Data size: 475 Basic stats: COMPLETE
Column stats: COMPLETE
- File Output Operator
- compressed: false
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: bigint)
+ outputColumnNames: _col0, _col1
+ Limit
+ Number of rows: 5
Statistics: Num rows: 5 Data size: 475 Basic stats:
COMPLETE Column stats: COMPLETE
- table:
- input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
- output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 5 Data size: 475 Basic stats:
COMPLETE Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
diff --git
a/ql/src/test/results/clientpositive/llap/offset_limit_ppd_optimizer.q.out
b/ql/src/test/results/clientpositive/llap/offset_limit_ppd_optimizer.q.out
index f6d4562..d48ef82 100644
--- a/ql/src/test/results/clientpositive/llap/offset_limit_ppd_optimizer.q.out
+++ b/ql/src/test/results/clientpositive/llap/offset_limit_ppd_optimizer.q.out
@@ -1468,10 +1468,7 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE)
- Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE)
- Reducer 4 <- Map 1 (SIMPLE_EDGE)
- Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -1495,6 +1492,13 @@ STAGE PLANS:
sort order: +
Map-reduce partition columns: key (type: string)
Statistics: Num rows: 500 Data size: 43500 Basic
stats: COMPLETE Column stats: COMPLETE
+ Execution mode: llap
+ LLAP IO: all inputs
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 43500 Basic stats:
COMPLETE Column stats: COMPLETE
Top N Key Operator
sort order: +
keys: key (type: string)
@@ -1514,7 +1518,6 @@ STAGE PLANS:
Execution mode: llap
LLAP IO: all inputs
Reducer 2
- Execution mode: llap
Reduce Operator Tree:
Group By Operator
aggregations: count()
@@ -1527,41 +1530,12 @@ STAGE PLANS:
outputColumnNames: _col0, _col1
Limit
Number of rows: 20
- Offset of rows: 10
+ Offset of rows: 20
Statistics: Num rows: 20 Data size: 1900 Basic stats:
COMPLETE Column stats: COMPLETE
Filter Operator
predicate: _col0 is not null (type: boolean)
Statistics: Num rows: 20 Data size: 1900 Basic stats:
COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- null sort order: z
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 20 Data size: 1900 Basic stats:
COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: bigint)
- Reducer 3
- Execution mode: llap
- Reduce Operator Tree:
- Merge Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 20 Data size: 3800 Basic stats: COMPLETE
Column stats: COMPLETE
- Limit
- Number of rows: 5
- Offset of rows: 3
- Statistics: Num rows: 5 Data size: 950 Basic stats: COMPLETE
Column stats: COMPLETE
- File Output Operator
- compressed: false
- Statistics: Num rows: 5 Data size: 950 Basic stats:
COMPLETE Column stats: COMPLETE
- table:
- input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
- output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Reducer 4
+ Dummy Store
Execution mode: llap
Reduce Operator Tree:
Group By Operator
@@ -1570,37 +1544,35 @@ STAGE PLANS:
mode: complete
outputColumnNames: _col0, _col1
Statistics: Num rows: 316 Data size: 30020 Basic stats:
COMPLETE Column stats: COMPLETE
- Limit
- Number of rows: 20
- Offset of rows: 20
- Statistics: Num rows: 20 Data size: 1900 Basic stats:
COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- null sort order:
- sort order:
- Statistics: Num rows: 20 Data size: 1900 Basic stats:
COMPLETE Column stats: COMPLETE
- TopN Hash Memory Usage: 2.0E-5
- value expressions: _col0 (type: string), _col1 (type:
bigint)
- Reducer 5
- Execution mode: llap
- Reduce Operator Tree:
- Limit
- Number of rows: 20
- Offset of rows: 20
- Statistics: Num rows: 20 Data size: 1900 Basic stats: COMPLETE
Column stats: COMPLETE
Select Operator
- expressions: VALUE._col0 (type: string), VALUE._col1 (type:
bigint)
+ expressions: _col0 (type: string), _col1 (type: bigint)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 20 Data size: 1900 Basic stats:
COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: _col0 is not null (type: boolean)
+ Limit
+ Number of rows: 20
+ Offset of rows: 10
Statistics: Num rows: 20 Data size: 1900 Basic stats:
COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- null sort order: z
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
+ Filter Operator
+ predicate: _col0 is not null (type: boolean)
Statistics: Num rows: 20 Data size: 1900 Basic stats:
COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: bigint)
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 20 Data size: 3800 Basic stats:
COMPLETE Column stats: COMPLETE
+ Limit
+ Number of rows: 5
+ Offset of rows: 3
+ Statistics: Num rows: 5 Data size: 950 Basic stats:
COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 5 Data size: 950 Basic
stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
diff --git a/ql/src/test/results/clientpositive/llap/topnkey_gby_limit.q.out
b/ql/src/test/results/clientpositive/llap/topnkey_gby_limit.q.out
new file mode 100644
index 0000000..c4e12e1
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/topnkey_gby_limit.q.out
@@ -0,0 +1,379 @@
+PREHOOK: query: create table test(id string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test
+POSTHOOK: query: create table test(id string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@test
+PREHOOK: query: insert into test(id) values
+(4), (3), (4), (3), (4), (3), (4), (3), (4), (3), (4), (3), (4), (3), (4),
(3), (4), (3), (4), (2), (1), (5)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@test
+POSTHOOK: query: insert into test(id) values
+(4), (3), (4), (3), (4), (3), (4), (3), (4), (3), (4), (3), (4), (3), (4),
(3), (4), (3), (4), (2), (1), (5)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@test
+POSTHOOK: Lineage: test.id SCRIPT []
+PREHOOK: query: explain cbo
+select id, count(1) from test group by id limit 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test
+#### A masked pattern was here ####
+POSTHOOK: query: explain cbo
+select id, count(1) from test group by id limit 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test
+#### A masked pattern was here ####
+CBO PLAN:
+HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[2])
+ HiveProject(id=[$0], $f1=[$1])
+ HiveAggregate(group=[{0}], agg#0=[count()])
+ HiveTableScan(table=[[default, test]], table:alias=[test])
+
+PREHOOK: query: explain extended
+select id, count(1) from test group by id limit 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test
+#### A masked pattern was here ####
+POSTHOOK: query: explain extended
+select id, count(1) from test group by id limit 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test
+#### A masked pattern was here ####
+OPTIMIZED SQL: SELECT `id`, COUNT(*) AS `$f1`
+FROM `default`.`test`
+GROUP BY `id`
+ORDER BY `id`
+LIMIT 2
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: test
+ Statistics: Num rows: 22 Data size: 1870 Basic stats:
COMPLETE Column stats: COMPLETE
+ GatherStats: false
+ Select Operator
+ expressions: id (type: string)
+ outputColumnNames: id
+ Statistics: Num rows: 22 Data size: 1870 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count()
+ keys: id (type: string)
+ minReductionHashAggr: 0.77272725
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 5 Data size: 465 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ bucketingVersion: 2
+ key expressions: _col0 (type: string)
+ null sort order: z
+ numBuckets: -1
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 5 Data size: 465 Basic stats:
COMPLETE Column stats: COMPLETE
+ tag: -1
+ TopN: 2
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col1 (type: bigint)
+ auto parallelism: true
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: test
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ bucketing_version 2
+ column.name.delimiter ,
+ columns id
+ columns.types string
+#### A masked pattern was here ####
+ name default.test
+ serialization.format 1
+ serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucketing_version 2
+ column.name.delimiter ,
+ columns id
+ columns.comments
+ columns.types string
+#### A masked pattern was here ####
+ name default.test
+ serialization.format 1
+ serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test
+ name: default.test
+ Truncated Path -> Alias:
+ /test [test]
+ Reducer 2
+ Execution mode: vectorized, llap
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE
Column stats: COMPLETE
+ Reduce Output Operator
+ bucketingVersion: 2
+ key expressions: _col0 (type: string)
+ null sort order: z
+ numBuckets: -1
+ sort order: +
+ Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE
Column stats: COMPLETE
+ tag: -1
+ TopN: 2
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col1 (type: bigint)
+ auto parallelism: false
+ Reducer 3
+ Execution mode: vectorized, llap
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), VALUE._col0
(type: bigint)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE
Column stats: COMPLETE
+ Limit
+ Number of rows: 2
+ Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE
Column stats: COMPLETE
+ File Output Operator
+ bucketingVersion: 2
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 2 Data size: 186 Basic stats:
COMPLETE Column stats: COMPLETE
+#### A masked pattern was here ####
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ bucketing_version -1
+ columns _col0,_col1
+ columns.types string:bigint
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels
true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 2
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select id, count(1) from test group by id limit 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test
+#### A masked pattern was here ####
+POSTHOOK: query: select id, count(1) from test group by id limit 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test
+#### A masked pattern was here ####
+1 1
+2 1
+PREHOOK: query: explain extended
+select id, count(1) from test group by id limit 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test
+#### A masked pattern was here ####
+POSTHOOK: query: explain extended
+select id, count(1) from test group by id limit 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test
+#### A masked pattern was here ####
+OPTIMIZED SQL: SELECT `id`, COUNT(*) AS `$f1`
+FROM `default`.`test`
+GROUP BY `id`
+ORDER BY `id`
+LIMIT 2
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: test
+ Statistics: Num rows: 22 Data size: 1870 Basic stats:
COMPLETE Column stats: COMPLETE
+ GatherStats: false
+ Top N Key Operator
+ sort order: +
+ keys: id (type: string)
+ null sort order: z
+ Statistics: Num rows: 22 Data size: 1870 Basic stats:
COMPLETE Column stats: COMPLETE
+ top n: 2
+ Select Operator
+ expressions: id (type: string)
+ outputColumnNames: id
+ Statistics: Num rows: 22 Data size: 1870 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count()
+ keys: id (type: string)
+ minReductionHashAggr: 0.77272725
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 5 Data size: 465 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ bucketingVersion: 2
+ key expressions: _col0 (type: string)
+ null sort order: z
+ numBuckets: -1
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 5 Data size: 465 Basic stats:
COMPLETE Column stats: COMPLETE
+ tag: -1
+ value expressions: _col1 (type: bigint)
+ auto parallelism: true
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: test
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ bucketing_version 2
+ column.name.delimiter ,
+ columns id
+ columns.types string
+#### A masked pattern was here ####
+ name default.test
+ serialization.format 1
+ serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucketing_version 2
+ column.name.delimiter ,
+ columns id
+ columns.comments
+ columns.types string
+#### A masked pattern was here ####
+ name default.test
+ serialization.format 1
+ serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test
+ name: default.test
+ Truncated Path -> Alias:
+ /test [test]
+ Reducer 2
+ Execution mode: vectorized, llap
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE
Column stats: COMPLETE
+ Reduce Output Operator
+ bucketingVersion: 2
+ key expressions: _col0 (type: string)
+ null sort order: z
+ numBuckets: -1
+ sort order: +
+ Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE
Column stats: COMPLETE
+ tag: -1
+ value expressions: _col1 (type: bigint)
+ auto parallelism: false
+ Reducer 3
+ Execution mode: vectorized, llap
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), VALUE._col0
(type: bigint)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE
Column stats: COMPLETE
+ Limit
+ Number of rows: 2
+ Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE
Column stats: COMPLETE
+ File Output Operator
+ bucketingVersion: 2
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 2 Data size: 186 Basic stats:
COMPLETE Column stats: COMPLETE
+#### A masked pattern was here ####
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ bucketing_version -1
+ columns _col0,_col1
+ columns.types string:bigint
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels
true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 2
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select id, count(1) from test group by id limit 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test
+#### A masked pattern was here ####
+POSTHOOK: query: select id, count(1) from test group by id limit 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test
+#### A masked pattern was here ####
+1 1
+2 1