This is an automated email from the ASF dual-hosted git repository.
krisztiankasa pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 23d148592c8 HIVE-25589: SQL: Implement HAVING/QUALIFY predicates for
ROW_NUMBER()=1 (Krisztian Kasa, reviewed by Zoltan Haindrich)
23d148592c8 is described below
commit 23d148592c8154928286fcd8785e2ff71df75bbd
Author: Krisztian Kasa <[email protected]>
AuthorDate: Thu May 26 13:22:46 2022 +0200
HIVE-25589: SQL: Implement HAVING/QUALIFY predicates for ROW_NUMBER()=1
(Krisztian Kasa, reviewed by Zoltan Haindrich)
---
.../java/org/apache/hadoop/hive/ql/ErrorMsg.java | 2 +
.../apache/hadoop/hive/ql/parse/HiveLexerParent.g | 1 +
.../org/apache/hadoop/hive/ql/parse/HiveParser.g | 10 +-
.../hadoop/hive/ql/parse/IdentifiersParser.g | 7 +
.../hadoop/hive/ql/parse/CalcitePlanner.java | 79 +++++++----
.../apache/hadoop/hive/ql/parse/QBParseInfo.java | 14 ++
.../hadoop/hive/ql/parse/SemanticAnalyzer.java | 10 ++
.../test/queries/clientnegative/qualify_no_cbo.q | 7 +
.../test/queries/clientnegative/qualify_no_cbo_2.q | 8 ++
ql/src/test/queries/clientpositive/qualify.q | 36 +++++
.../test/queries/clientpositive/qualify_distinct.q | 42 ++++++
ql/src/test/queries/clientpositive/qualify_gby.q | 20 +++
.../results/clientnegative/qualify_no_cbo.q.out | 9 ++
.../results/clientnegative/qualify_no_cbo_2.q.out | 9 ++
.../test/results/clientpositive/llap/qualify.q.out | 128 ++++++++++++++++++
.../clientpositive/llap/qualify_distinct.q.out | 146 +++++++++++++++++++++
.../results/clientpositive/llap/qualify_gby.q.out | 69 ++++++++++
17 files changed, 571 insertions(+), 26 deletions(-)
diff --git a/common/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
b/common/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
index 277e6475570..903b949772a 100644
--- a/common/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
+++ b/common/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
@@ -478,6 +478,8 @@ public enum ErrorMsg {
INVALID_METADATA_TABLE_NAME(10430, "Invalid metadata table name {0}.", true),
METADATA_TABLE_NOT_SUPPORTED(10431, "Metadata tables are not supported for
table {0}.", true),
COMPACTION_REFUSED(10432, "Compaction request for {0}.{1}{2} is refused,
details: {3}.", true),
+ CBO_IS_REQUIRED(10433,
+ "The following functionality requires CBO (" +
HiveConf.ConfVars.HIVE_CBO_ENABLED.varname + "): {0}", true),
//========================== 20000 range starts here
========================//
diff --git a/parser/src/java/org/apache/hadoop/hive/ql/parse/HiveLexerParent.g
b/parser/src/java/org/apache/hadoop/hive/ql/parse/HiveLexerParent.g
index 92412a91ce0..4bc7bb20780 100644
--- a/parser/src/java/org/apache/hadoop/hive/ql/parse/HiveLexerParent.g
+++ b/parser/src/java/org/apache/hadoop/hive/ql/parse/HiveLexerParent.g
@@ -41,6 +41,7 @@ KW_ORDER : 'ORDER';
KW_GROUP : 'GROUP';
KW_BY : 'BY';
KW_HAVING : 'HAVING';
+KW_QUALIFY : 'QUALIFY';
KW_WHERE : 'WHERE';
KW_FROM : 'FROM';
KW_AS : 'AS';
diff --git a/parser/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
b/parser/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
index 538d0a71276..9a2c4853602 100644
--- a/parser/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
+++ b/parser/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
@@ -86,6 +86,7 @@ TOK_CUBE_GROUPBY;
TOK_GROUPING_SETS;
TOK_GROUPING_SETS_EXPRESSION;
TOK_HAVING;
+TOK_QUALIFY;
TOK_ORDERBY;
TOK_NULLS_FIRST;
TOK_NULLS_LAST;
@@ -2599,8 +2600,9 @@ atomSelectStatement
g=groupByClause?
h=havingClause?
win=window_clause?
+ q=qualifyClause?
-> ^(TOK_QUERY $f? ^(TOK_INSERT ^(TOK_DESTINATION ^(TOK_DIR TOK_TMP_FILE))
- $s $w? $g? $h? $win?))
+ $s $w? $g? $h? $win? $q?))
|
LPAREN! selectStatement RPAREN!
|
@@ -2717,13 +2719,14 @@ body
groupByClause?
havingClause?
window_clause?
+ qualifyClause?
orderByClause?
clusterByClause?
distributeByClause?
sortByClause?
limitClause? -> ^(TOK_INSERT insertClause
selectClause lateralView? whereClause? groupByClause?
havingClause? orderByClause? clusterByClause?
- distributeByClause? sortByClause? window_clause?
limitClause?)
+ distributeByClause? sortByClause? window_clause?
qualifyClause? limitClause?)
|
selectClause
lateralView?
@@ -2731,13 +2734,14 @@ body
groupByClause?
havingClause?
window_clause?
+ qualifyClause?
orderByClause?
clusterByClause?
distributeByClause?
sortByClause?
limitClause? -> ^(TOK_INSERT ^(TOK_DESTINATION ^(TOK_DIR TOK_TMP_FILE))
selectClause lateralView? whereClause? groupByClause?
havingClause? orderByClause? clusterByClause?
- distributeByClause? sortByClause? window_clause?
limitClause?)
+ distributeByClause? sortByClause? window_clause?
qualifyClause? limitClause?)
;
insertClause
diff --git
a/parser/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
b/parser/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
index 4d807a7ef66..e40d2668244 100644
--- a/parser/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
+++ b/parser/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
@@ -146,6 +146,13 @@ havingClause
KW_HAVING havingCondition -> ^(TOK_HAVING havingCondition)
;
+qualifyClause
+@init { gParent.pushMsg("qualify clause", state); }
+@after { gParent.popMsg(state); }
+ :
+ KW_QUALIFY expression -> ^(TOK_QUALIFY expression)
+ ;
+
havingCondition
@init { gParent.pushMsg("having condition", state); }
@after { gParent.popMsg(state); }
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index e522fc9cea7..47fd2048781 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -4779,34 +4779,27 @@ public class CalcitePlanner extends SemanticAnalyzer {
throw new
SemanticException(SemanticAnalyzer.generateErrorMessage(obAST, error));
}
}
- List<RexNode> originalInputRefs =
Lists.transform(srcRel.getRowType().getFieldList(),
- new Function<RelDataTypeField, RexNode>() {
- @Override
- public RexNode apply(RelDataTypeField input) {
- return new RexInputRef(input.getIndex(), input.getType());
- }
- });
- originalRR = outputRR.duplicate();
- for (int i = 0; i < inputRR.getColumnInfos().size(); i++) {
- ColumnInfo colInfo = new
ColumnInfo(inputRR.getColumnInfos().get(i));
- String internalName =
SemanticAnalyzer.getColumnInternalName(outputRR.getColumnInfos()
- .size() + i);
- colInfo.setInternalName(internalName);
- // if there is any confict, then we do not generate it in the new
select
- // otherwise, we add it into the calciteColLst and generate the
new select
- if (!outputRR.putWithCheck(colInfo.getTabAlias(),
colInfo.getAlias(), internalName,
- colInfo)) {
- LOG.trace("Column already present in RR. skipping.");
- } else {
- columnList.add(originalInputRefs.get(i));
- }
- }
+ originalRR = appendInputColumns(srcRel, columnList, outputRR,
inputRR);
outputRel = genSelectRelNode(columnList, outputRR, srcRel);
// outputRel is the generated augmented select with extra unselected
// columns, and originalRR is the original generated select
return new Pair<RelNode, RowResolver>(outputRel, originalRR);
} else {
- outputRel = genSelectRelNode(columnList, outputRR, srcRel);
+ if (qbp.getQualifyExprForClause(dest) != null) {
+ int originalColumnListSize = columnList.size();
+ originalRR = appendInputColumns(srcRel, columnList, outputRR,
inputRR);
+ RelNode combinedProject = genSelectRelNode(columnList, outputRR,
srcRel);
+ RelNode qualifyRel = genQualifyLogicalPlan(qb, combinedProject);
+ List<RexNode> topProjectColumnList = new
ArrayList<>(originalColumnListSize);
+ for (int i = 0; i < originalColumnListSize; ++i) {
+
topProjectColumnList.add(qualifyRel.getCluster().getRexBuilder().makeInputRef(
+ qualifyRel.getRowType().getFieldList().get(i).getType(),
i));
+ }
+ outputRel = genSelectRelNode(topProjectColumnList, originalRR,
qualifyRel);
+ outputRR = originalRR;
+ } else {
+ outputRel = genSelectRelNode(columnList, outputRR, srcRel);
+ }
}
}
// 9. Handle select distinct as GBY if there exist windowing functions
@@ -4829,6 +4822,33 @@ public class CalcitePlanner extends SemanticAnalyzer {
return new Pair<>(outputRel, outputRR);
}
+ private RowResolver appendInputColumns(RelNode srcRel, List<RexNode>
columnList, RowResolver outputRR, RowResolver inputRR) throws SemanticException
{
+ RowResolver originalRR;
+ List<RexNode> originalInputRefs =
Lists.transform(srcRel.getRowType().getFieldList(),
+ new Function<RelDataTypeField, RexNode>() {
+ @Override
+ public RexNode apply(RelDataTypeField input) {
+ return new RexInputRef(input.getIndex(), input.getType());
+ }
+ });
+ originalRR = outputRR.duplicate();
+ for (int i = 0; i < inputRR.getColumnInfos().size(); i++) {
+ ColumnInfo colInfo = new ColumnInfo(inputRR.getColumnInfos().get(i));
+ String internalName =
SemanticAnalyzer.getColumnInternalName(outputRR.getColumnInfos()
+ .size() + i);
+ colInfo.setInternalName(internalName);
+ // if there is any confict, then we do not generate it in the new
select
+ // otherwise, we add it into the calciteColLst and generate the new
select
+ if (!outputRR.putWithCheck(colInfo.getTabAlias(), colInfo.getAlias(),
internalName,
+ colInfo)) {
+ LOG.trace("Column already present in RR. skipping.");
+ } else {
+ columnList.add(originalInputRefs.get(i));
+ }
+ }
+ return originalRR;
+ }
+
Integer genRexNodeRegex(String colRegex, String tabAlias, ASTNode sel,
List<RexNode> exprList, Set<ColumnInfo> excludeCols, RowResolver input,
RowResolver colSrcRR, Integer pos, RowResolver output, List<String>
aliases,
@@ -5183,6 +5203,19 @@ public class CalcitePlanner extends SemanticAnalyzer {
return gbFilter;
}
+ private RelNode genQualifyLogicalPlan(QB qb, RelNode srcRel) throws
SemanticException {
+ QBParseInfo qbp = getQBParseInfo(qb);
+ String destClauseName = qbp.getClauseNames().iterator().next();
+ ASTNode qualifyClause = qbp.getQualifyExprForClause(destClauseName);
+
+ if (qualifyClause == null) {
+ throw new SemanticException("Missing expression: qualify.");
+ }
+
+ ASTNode targetNode = (ASTNode) qualifyClause.getChild(0);
+ return genFilterRelNode(qb, targetNode, srcRel, null, null, true);
+ }
+
/*
* Bail if having clause uses Select Expression aliases for Aggregation
* expressions. We could do what Hive does. But this is non standard
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java
b/ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java
index 23bb068332c..3e5a5f18853 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java
@@ -74,6 +74,7 @@ public class QBParseInfo {
private final Set<String> destCubes;
private final Set<String> destGroupingSets;
private final Map<String, ASTNode> destToHaving;
+ private final Map<String, ASTNode> destToQualify;
private final Map<String, Boolean> destToOpType;
// insertIntoTables/insertOverwriteTables map a table's fullName to its ast;
private final Map<String, ASTNode> insertIntoTables;
@@ -141,6 +142,7 @@ public class QBParseInfo {
destToWhereExpr = new HashMap<String, ASTNode>();
destToGroupby = new HashMap<String, ASTNode>();
destToHaving = new HashMap<String, ASTNode>();
+ destToQualify = new HashMap<>();
destToClusterby = new HashMap<String, ASTNode>();
destToDistributeby = new HashMap<String, ASTNode>();
destToSortby = new HashMap<String, ASTNode>();
@@ -647,6 +649,18 @@ public class QBParseInfo {
return destToLateralView;
}
+ public void setQualifyExprForClause(String dest, ASTNode ast) {
+ destToQualify.put(dest, ast);
+ }
+
+ public ASTNode getQualifyExprForClause(String dest) {
+ return destToQualify.get(dest);
+ }
+
+ public boolean hasQualifyClause() {
+ return !destToQualify.isEmpty();
+ }
+
protected static enum ClauseType {
CLUSTER_BY_CLAUSE,
DISTRIBUTE_BY_CLAUSE,
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 1fb7b8ad91c..5cfcb71c882 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -1839,6 +1839,12 @@ public class SemanticAnalyzer extends
BaseSemanticAnalyzer {
doPhase1GetDistinctFuncExprs(qbp.getAggregationExprsForClause(ctx_1.dest)));
break;
+ case HiveParser.TOK_QUALIFY:
+ qbp.setQualifyExprForClause(ctx_1.dest, ast);
+ qbp.addAggregationExprsForClause(ctx_1.dest,
+ doPhase1GetAggregationsFromSelect(ast, qb, ctx_1.dest));
+ break;
+
case HiveParser.KW_WINDOW:
if (!qb.hasWindowingSpec(ctx_1.dest) ) {
throw new SemanticException(generateErrorMessage(ast,
@@ -11785,6 +11791,10 @@ public class SemanticAnalyzer extends
BaseSemanticAnalyzer {
private Operator genPlan(QB qb, boolean skipAmbiguityCheck)
throws SemanticException {
+ if (!ctx.isCboSucceeded() && qb.getParseInfo().hasQualifyClause()) {
+ throw new
SemanticException(ErrorMsg.CBO_IS_REQUIRED.getErrorCodedMsg("Qualify clause"));
+ }
+
// First generate all the opInfos for the elements in the from clause
// Must be deterministic order map - see HIVE-8707
Map<String, Operator> aliasToOpInfo = new LinkedHashMap<String,
Operator>();
diff --git a/ql/src/test/queries/clientnegative/qualify_no_cbo.q
b/ql/src/test/queries/clientnegative/qualify_no_cbo.q
new file mode 100644
index 00000000000..5af66423030
--- /dev/null
+++ b/ql/src/test/queries/clientnegative/qualify_no_cbo.q
@@ -0,0 +1,7 @@
+set hive.cbo.enable=false;
+
+create table t1 (a int, b string, c int);
+
+select a, b, c, row_number() over (partition by b order by c) as row_num
+ from t1
+qualify row_num = 1;
diff --git a/ql/src/test/queries/clientnegative/qualify_no_cbo_2.q
b/ql/src/test/queries/clientnegative/qualify_no_cbo_2.q
new file mode 100644
index 00000000000..a95a727c479
--- /dev/null
+++ b/ql/src/test/queries/clientnegative/qualify_no_cbo_2.q
@@ -0,0 +1,8 @@
+create table t1 (a int, b string, c int);
+
+-- distribute by is not supported by cbo so this statement should fall back to
non cbo path and fail
+-- since qualify requires CBO
+select a, b, c, row_number() over (partition by b order by c) as row_num
+ from t1
+qualify row_num = 1
+distribute by a;
diff --git a/ql/src/test/queries/clientpositive/qualify.q
b/ql/src/test/queries/clientpositive/qualify.q
new file mode 100644
index 00000000000..5d0882baa6c
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/qualify.q
@@ -0,0 +1,36 @@
+create table t1 (a int, b string, c int);
+insert into t1 (a, b, c) values
+ (1, 'A', 1),
+ (2, 'A', 2),
+ (3, 'B', 1),
+ (4, 'B', 2);
+
+-- window function is explicitly defined in qualify clause and not included in
select clause
+explain cbo
+select a, b, c
+ from t1
+qualify row_number() over (partition by b order by c) = 1;
+
+select a, b, c
+ from t1
+qualify row_number() over (partition by b order by c) = 1;
+
+-- window function is referenced by its alias in qualify clause and should be
selected too
+explain cbo
+select a, b, c, row_number() over (partition by b order by c) as row_num
+ from t1
+qualify row_num = 1;
+
+select a, b, c, row_number() over (partition by b order by c) as row_num
+ from t1
+qualify row_num = 1;
+
+-- mixed form of the previous two
+explain cbo
+select a, b, c, row_number() over (partition by b order by c) as row_num
+ from t1
+qualify row_num = 1 and row_number() over (order by c) = 1;
+
+select a, b, c, row_number() over (partition by b order by c) as row_num
+ from t1
+qualify row_num = 1 and row_number() over (order by c) = 1;
diff --git a/ql/src/test/queries/clientpositive/qualify_distinct.q
b/ql/src/test/queries/clientpositive/qualify_distinct.q
new file mode 100644
index 00000000000..d4d1692b884
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/qualify_distinct.q
@@ -0,0 +1,42 @@
+create table t1 (a int, b string, c int);
+insert into t1 (a, b, c) values
+ (1, 'A', 1),
+ (1, 'A', 1),
+ (2, 'A', 2),
+ (3, 'B', 1),
+ (4, 'B', 2);
+
+-- window function is explicitly defined in qualify clause and not included in
select clause
+
+explain cbo
+select distinct a, b, c
+ from t1
+qualify row_number() over (partition by b order by c) = 1;
+
+select a, b, c
+ from t1
+qualify row_number() over (partition by b order by c) = 1;
+
+select distinct a, b, c
+ from t1
+qualify row_number() over (partition by b order by c) = 1;
+
+-- window function is referenced by its alias in qualify clause and should be
selected too
+explain cbo
+select distinct a, b, c, row_number() over (partition by b order by c) as
row_num
+ from t1
+qualify row_num = 1;
+
+select distinct a, b, c, row_number() over (partition by b order by c) as
row_num
+ from t1
+qualify row_num = 1;
+
+-- mixed form of the previous two
+explain cbo
+select distinct a, b, c, row_number() over (partition by b order by c) as
row_num
+ from t1
+qualify row_num = 1 and row_number() over (order by c) = 1;
+
+select distinct a, b, c, row_number() over (partition by b order by c) as
row_num
+ from t1
+qualify row_num = 1 and row_number() over (order by c) = 1;
diff --git a/ql/src/test/queries/clientpositive/qualify_gby.q
b/ql/src/test/queries/clientpositive/qualify_gby.q
new file mode 100644
index 00000000000..72c71a10158
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/qualify_gby.q
@@ -0,0 +1,20 @@
+create table t1 (a int, b string, c int);
+insert into t1 (a, b, c) values
+ (1, 'A', 1),
+ (1, 'A', 1),
+ (2, 'A', 2),
+ (3, 'B', 1),
+ (4, 'B', 2);
+
+-- window function is explicitly defined in qualify clause and not included in
select clause
+
+explain cbo
+select a, b, c, count(*)
+ from t1
+group by a, b, c
+qualify row_number() over (partition by b order by c) = 1;
+
+select a, b, c, count(*)
+ from t1
+group by a, b, c
+qualify row_number() over (partition by b order by c) = 1;
diff --git a/ql/src/test/results/clientnegative/qualify_no_cbo.q.out
b/ql/src/test/results/clientnegative/qualify_no_cbo.q.out
new file mode 100644
index 00000000000..97f2f2d17e1
--- /dev/null
+++ b/ql/src/test/results/clientnegative/qualify_no_cbo.q.out
@@ -0,0 +1,9 @@
+PREHOOK: query: create table t1 (a int, b string, c int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t1
+POSTHOOK: query: create table t1 (a int, b string, c int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t1
+FAILED: SemanticException [Error 10433]: The following functionality requires
CBO (hive.cbo.enable): Qualify clause
diff --git a/ql/src/test/results/clientnegative/qualify_no_cbo_2.q.out
b/ql/src/test/results/clientnegative/qualify_no_cbo_2.q.out
new file mode 100644
index 00000000000..97f2f2d17e1
--- /dev/null
+++ b/ql/src/test/results/clientnegative/qualify_no_cbo_2.q.out
@@ -0,0 +1,9 @@
+PREHOOK: query: create table t1 (a int, b string, c int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t1
+POSTHOOK: query: create table t1 (a int, b string, c int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t1
+FAILED: SemanticException [Error 10433]: The following functionality requires
CBO (hive.cbo.enable): Qualify clause
diff --git a/ql/src/test/results/clientpositive/llap/qualify.q.out
b/ql/src/test/results/clientpositive/llap/qualify.q.out
new file mode 100644
index 00000000000..af133326b24
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/qualify.q.out
@@ -0,0 +1,128 @@
+PREHOOK: query: create table t1 (a int, b string, c int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t1
+POSTHOOK: query: create table t1 (a int, b string, c int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t1
+PREHOOK: query: insert into t1 (a, b, c) values
+ (1, 'A', 1),
+ (2, 'A', 2),
+ (3, 'B', 1),
+ (4, 'B', 2)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t1
+POSTHOOK: query: insert into t1 (a, b, c) values
+ (1, 'A', 1),
+ (2, 'A', 2),
+ (3, 'B', 1),
+ (4, 'B', 2)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t1
+POSTHOOK: Lineage: t1.a SCRIPT []
+POSTHOOK: Lineage: t1.b SCRIPT []
+POSTHOOK: Lineage: t1.c SCRIPT []
+PREHOOK: query: explain cbo
+select a, b, c
+ from t1
+qualify row_number() over (partition by b order by c) = 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: explain cbo
+select a, b, c
+ from t1
+qualify row_number() over (partition by b order by c) = 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+CBO PLAN:
+HiveProject(a=[$0], b=[$1], c=[$2])
+ HiveFilter(condition=[=($3, 1)])
+ HiveProject(a=[$0], b=[$1], c=[$2], row_number_window_0=[row_number() OVER
(PARTITION BY $1 ORDER BY $2 NULLS LAST RANGE BETWEEN UNBOUNDED PRECEDING AND
UNBOUNDED FOLLOWING)])
+ HiveTableScan(table=[[default, t1]], table:alias=[t1])
+
+PREHOOK: query: select a, b, c
+ from t1
+qualify row_number() over (partition by b order by c) = 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: select a, b, c
+ from t1
+qualify row_number() over (partition by b order by c) = 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+1 A 1
+3 B 1
+PREHOOK: query: explain cbo
+select a, b, c, row_number() over (partition by b order by c) as row_num
+ from t1
+qualify row_num = 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: explain cbo
+select a, b, c, row_number() over (partition by b order by c) as row_num
+ from t1
+qualify row_num = 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+CBO PLAN:
+HiveProject(a=[$0], b=[$1], c=[$2], row_num=[CAST(1):INTEGER])
+ HiveFilter(condition=[=($3, 1)])
+ HiveProject(a=[$0], b=[$1], c=[$2], row_number_window_0=[row_number() OVER
(PARTITION BY $1 ORDER BY $2 NULLS LAST RANGE BETWEEN UNBOUNDED PRECEDING AND
UNBOUNDED FOLLOWING)])
+ HiveTableScan(table=[[default, t1]], table:alias=[t1])
+
+PREHOOK: query: select a, b, c, row_number() over (partition by b order by c)
as row_num
+ from t1
+qualify row_num = 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: select a, b, c, row_number() over (partition by b order by c)
as row_num
+ from t1
+qualify row_num = 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+1 A 1 1
+3 B 1 1
+PREHOOK: query: explain cbo
+select a, b, c, row_number() over (partition by b order by c) as row_num
+ from t1
+qualify row_num = 1 and row_number() over (order by c) = 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: explain cbo
+select a, b, c, row_number() over (partition by b order by c) as row_num
+ from t1
+qualify row_num = 1 and row_number() over (order by c) = 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+CBO PLAN:
+HiveProject(a=[$0], b=[$1], c=[$2], row_num=[CAST(1):INTEGER])
+ HiveFilter(condition=[AND(=($3, 1), =($4, 1))])
+ HiveProject(a=[$0], b=[$1], c=[$2], row_number_window_0=[row_number() OVER
(PARTITION BY $1 ORDER BY $2 NULLS LAST RANGE BETWEEN UNBOUNDED PRECEDING AND
UNBOUNDED FOLLOWING)], row_number_window_1=[row_number() OVER (PARTITION BY 0
ORDER BY $2 NULLS LAST RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED
FOLLOWING)])
+ HiveTableScan(table=[[default, t1]], table:alias=[t1])
+
+PREHOOK: query: select a, b, c, row_number() over (partition by b order by c)
as row_num
+ from t1
+qualify row_num = 1 and row_number() over (order by c) = 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: select a, b, c, row_number() over (partition by b order by c)
as row_num
+ from t1
+qualify row_num = 1 and row_number() over (order by c) = 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+1 A 1 1
diff --git a/ql/src/test/results/clientpositive/llap/qualify_distinct.q.out
b/ql/src/test/results/clientpositive/llap/qualify_distinct.q.out
new file mode 100644
index 00000000000..ece005ca56f
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/qualify_distinct.q.out
@@ -0,0 +1,146 @@
+PREHOOK: query: create table t1 (a int, b string, c int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t1
+POSTHOOK: query: create table t1 (a int, b string, c int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t1
+PREHOOK: query: insert into t1 (a, b, c) values
+ (1, 'A', 1),
+ (1, 'A', 1),
+ (2, 'A', 2),
+ (3, 'B', 1),
+ (4, 'B', 2)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t1
+POSTHOOK: query: insert into t1 (a, b, c) values
+ (1, 'A', 1),
+ (1, 'A', 1),
+ (2, 'A', 2),
+ (3, 'B', 1),
+ (4, 'B', 2)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t1
+POSTHOOK: Lineage: t1.a SCRIPT []
+POSTHOOK: Lineage: t1.b SCRIPT []
+POSTHOOK: Lineage: t1.c SCRIPT []
+PREHOOK: query: explain cbo
+select distinct a, b, c
+ from t1
+qualify row_number() over (partition by b order by c) = 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: explain cbo
+select distinct a, b, c
+ from t1
+qualify row_number() over (partition by b order by c) = 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+CBO PLAN:
+HiveAggregate(group=[{0, 1, 2}])
+ HiveFilter(condition=[=($3, 1)])
+ HiveProject(a=[$0], b=[$1], c=[$2], row_number_window_0=[row_number() OVER
(PARTITION BY $1 ORDER BY $2 NULLS LAST RANGE BETWEEN UNBOUNDED PRECEDING AND
UNBOUNDED FOLLOWING)])
+ HiveTableScan(table=[[default, t1]], table:alias=[t1])
+
+PREHOOK: query: select a, b, c
+ from t1
+qualify row_number() over (partition by b order by c) = 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: select a, b, c
+ from t1
+qualify row_number() over (partition by b order by c) = 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+1 A 1
+3 B 1
+PREHOOK: query: select distinct a, b, c
+ from t1
+qualify row_number() over (partition by b order by c) = 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: select distinct a, b, c
+ from t1
+qualify row_number() over (partition by b order by c) = 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+1 A 1
+3 B 1
+PREHOOK: query: explain cbo
+select distinct a, b, c, row_number() over (partition by b order by c) as
row_num
+ from t1
+qualify row_num = 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: explain cbo
+select distinct a, b, c, row_number() over (partition by b order by c) as
row_num
+ from t1
+qualify row_num = 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+CBO PLAN:
+HiveProject(a=[$0], b=[$1], c=[$2], row_num=[CAST(1):INTEGER])
+ HiveAggregate(group=[{0, 1, 2}])
+ HiveFilter(condition=[=($3, 1)])
+ HiveProject(a=[$0], b=[$1], c=[$2], row_number_window_0=[row_number()
OVER (PARTITION BY $1 ORDER BY $2 NULLS LAST RANGE BETWEEN UNBOUNDED PRECEDING
AND UNBOUNDED FOLLOWING)])
+ HiveTableScan(table=[[default, t1]], table:alias=[t1])
+
+PREHOOK: query: select distinct a, b, c, row_number() over (partition by b
order by c) as row_num
+ from t1
+qualify row_num = 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: select distinct a, b, c, row_number() over (partition by b
order by c) as row_num
+ from t1
+qualify row_num = 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+1 A 1 1
+3 B 1 1
+PREHOOK: query: explain cbo
+select distinct a, b, c, row_number() over (partition by b order by c) as
row_num
+ from t1
+qualify row_num = 1 and row_number() over (order by c) = 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: explain cbo
+select distinct a, b, c, row_number() over (partition by b order by c) as
row_num
+ from t1
+qualify row_num = 1 and row_number() over (order by c) = 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+CBO PLAN:
+HiveProject(a=[$0], b=[$1], c=[$2], row_num=[CAST(1):INTEGER])
+ HiveAggregate(group=[{0, 1, 2}])
+ HiveFilter(condition=[AND(=($3, 1), =($4, 1))])
+ HiveProject(a=[$0], b=[$1], c=[$2], row_number_window_0=[row_number()
OVER (PARTITION BY $1 ORDER BY $2 NULLS LAST RANGE BETWEEN UNBOUNDED PRECEDING
AND UNBOUNDED FOLLOWING)], row_number_window_1=[row_number() OVER (PARTITION BY
0 ORDER BY $2 NULLS LAST RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED
FOLLOWING)])
+ HiveTableScan(table=[[default, t1]], table:alias=[t1])
+
+PREHOOK: query: select distinct a, b, c, row_number() over (partition by b
order by c) as row_num
+ from t1
+qualify row_num = 1 and row_number() over (order by c) = 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: select distinct a, b, c, row_number() over (partition by b
order by c) as row_num
+ from t1
+qualify row_num = 1 and row_number() over (order by c) = 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+1 A 1 1
diff --git a/ql/src/test/results/clientpositive/llap/qualify_gby.q.out
b/ql/src/test/results/clientpositive/llap/qualify_gby.q.out
new file mode 100644
index 00000000000..d8f9f99f0e5
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/qualify_gby.q.out
@@ -0,0 +1,69 @@
+PREHOOK: query: create table t1 (a int, b string, c int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t1
+POSTHOOK: query: create table t1 (a int, b string, c int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t1
+PREHOOK: query: insert into t1 (a, b, c) values
+ (1, 'A', 1),
+ (1, 'A', 1),
+ (2, 'A', 2),
+ (3, 'B', 1),
+ (4, 'B', 2)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t1
+POSTHOOK: query: insert into t1 (a, b, c) values
+ (1, 'A', 1),
+ (1, 'A', 1),
+ (2, 'A', 2),
+ (3, 'B', 1),
+ (4, 'B', 2)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t1
+POSTHOOK: Lineage: t1.a SCRIPT []
+POSTHOOK: Lineage: t1.b SCRIPT []
+POSTHOOK: Lineage: t1.c SCRIPT []
+PREHOOK: query: explain cbo
+select a, b, c, count(*)
+ from t1
+group by a, b, c
+qualify row_number() over (partition by b order by c) = 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: explain cbo
+select a, b, c, count(*)
+ from t1
+group by a, b, c
+qualify row_number() over (partition by b order by c) = 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+CBO PLAN:
+HiveProject(a=[$0], b=[$1], c=[$2], _o__c3=[$3])
+ HiveFilter(condition=[=($4, 1)])
+ HiveProject((tok_table_or_col a)=[$0], (tok_table_or_col b)=[$1],
(tok_table_or_col c)=[$2], (tok_functionstar count)=[$3],
row_number_window_0=[row_number() OVER (PARTITION BY $1 ORDER BY $2 NULLS LAST
RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)])
+ HiveProject(a=[$0], b=[$1], c=[$2], $f3=[$3])
+ HiveAggregate(group=[{0, 1, 2}], agg#0=[count()])
+ HiveTableScan(table=[[default, t1]], table:alias=[t1])
+
+PREHOOK: query: select a, b, c, count(*)
+ from t1
+group by a, b, c
+qualify row_number() over (partition by b order by c) = 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: select a, b, c, count(*)
+ from t1
+group by a, b, c
+qualify row_number() over (partition by b order by c) = 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+1 A 1 2
+3 B 1 1