This is an automated email from the ASF dual-hosted git repository.
krisztiankasa pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new b7528c5a95e HIVE-28878: Query with window function and redundant GROUP
BY key fails during compilation (Krisztian Kasa, reviewed by Simhadri
Govindappa, Dayakar M)
b7528c5a95e is described below
commit b7528c5a95e89797eca02c3005acd600a84aae46
Author: Krisztian Kasa <[email protected]>
AuthorDate: Wed Apr 9 13:40:31 2025 +0200
HIVE-28878: Query with window function and redundant GROUP BY key fails
during compilation (Krisztian Kasa, reviewed by Simhadri Govindappa, Dayakar M)
---
.../hadoop/hive/ql/parse/CalcitePlanner.java | 1 +
.../clientpositive/cbo_windowing_aggregate.q | 17 +++
.../llap/cbo_windowing_aggregate.q.out | 151 +++++++++++++++++++++
3 files changed, 169 insertions(+)
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index 89f35e7da40..e40b0761eb9 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -4180,6 +4180,7 @@ private RelNode genSelectForWindowing(QB qb, RelNode
srcRel, HashSet<ColumnInfo>
// 3. Construct new Row Resolver with everything from below.
RowResolver out_rwsch = new RowResolver();
+ out_rwsch.setExprResolver(inputRR.getIsExprResolver());
if (!RowResolver.add(out_rwsch, inputRR)) {
LOG.warn(ERROR_MESSAGE_DUPLICATES_DETECTED);
}
diff --git a/ql/src/test/queries/clientpositive/cbo_windowing_aggregate.q
b/ql/src/test/queries/clientpositive/cbo_windowing_aggregate.q
new file mode 100644
index 00000000000..4913fe285d8
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/cbo_windowing_aggregate.q
@@ -0,0 +1,17 @@
+create table t1 (a int);
+
+explain cbo
+select
+ a * 2 as mul1,
+ a * 2 as mul2,
+ row_number() over (order by a * 2)
+from t1
+group by a * 2, a * 2;
+
+explain
+select
+ a * 2 as mul1,
+ a * 2 as mul2,
+ row_number() over (order by a * 2)
+from t1
+group by a * 2, a * 2;
diff --git
a/ql/src/test/results/clientpositive/llap/cbo_windowing_aggregate.q.out
b/ql/src/test/results/clientpositive/llap/cbo_windowing_aggregate.q.out
new file mode 100644
index 00000000000..484a32416bc
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/cbo_windowing_aggregate.q.out
@@ -0,0 +1,151 @@
+PREHOOK: query: create table t1 (a int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t1
+POSTHOOK: query: create table t1 (a int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t1
+PREHOOK: query: explain cbo
+select
+ a * 2 as mul1,
+ a * 2 as mul2,
+ row_number() over (order by a * 2)
+from t1
+group by a * 2, a * 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: explain cbo
+select
+ a * 2 as mul1,
+ a * 2 as mul2,
+ row_number() over (order by a * 2)
+from t1
+group by a * 2, a * 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+CBO PLAN:
+HiveProject(mul1=[$0], mul2=[$0], _o__c2=[row_number() OVER (PARTITION BY 0
ORDER BY $1 NULLS LAST RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED
FOLLOWING)])
+ HiveProject($f0=[$0], $f1=[$1])
+ HiveAggregate(group=[{0, 1}])
+ HiveProject($f0=[*($0, 2)], $f1=[*($0, 2)])
+ HiveTableScan(table=[[default, t1]], table:alias=[t1])
+
+PREHOOK: query: explain
+select
+ a * 2 as mul1,
+ a * 2 as mul2,
+ row_number() over (order by a * 2)
+from t1
+group by a * 2, a * 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: explain
+select
+ a * 2 as mul1,
+ a * 2 as mul2,
+ row_number() over (order by a * 2)
+from t1
+group by a * 2, a * 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: t1
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE
Column stats: NONE
+ Select Operator
+ expressions: (a * 2) (type: int), (a * 2) (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE
Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: int), _col1 (type: int)
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 4 Basic stats:
COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int)
+ null sort order: zz
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1
(type: int)
+ Statistics: Num rows: 1 Data size: 4 Basic stats:
COMPLETE Column stats: NONE
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Reducer 2
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: int), KEY._col1 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE
Column stats: NONE
+ Reduce Output Operator
+ key expressions: 0 (type: int), _col1 (type: int)
+ null sort order: az
+ sort order: ++
+ Map-reduce partition columns: 0 (type: int)
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE
Column stats: NONE
+ value expressions: _col0 (type: int)
+ Reducer 3
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: int), KEY.reducesinkkey1
(type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE
Column stats: NONE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col0: int, _col1: int
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col1 ASC NULLS LAST
+ partition by: 0
+ raw input shape:
+ window functions:
+ window function definition
+ alias: row_number_window_0
+ name: row_number
+ window function: GenericUDAFRowNumberEvaluator
+ window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+ isPivotResult: true
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE
Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col0 (type: int),
row_number_window_0 (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE
Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 4 Basic stats:
COMPLETE Column stats: NONE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+