This is an automated email from the ASF dual-hosted git repository.
zabetak pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new d186aba1923 HIVE-28280: SemanticException when querying VIEW with
DISTINCT clause (#6103)
d186aba1923 is described below
commit d186aba1923477949d4cede508566f4c3fb5cceb
Author: Soumyakanti Das <[email protected]>
AuthorDate: Fri Nov 7 00:13:28 2025 -0800
HIVE-28280: SemanticException when querying VIEW with DISTINCT clause
(#6103)
---
.../calcite/rules/HiveRelFieldTrimmer.java | 56 ++++++-------
.../optimizer/calcite/rules/RelFieldTrimmer.java | 9 +++
.../hadoop/hive/ql/parse/CalcitePlanner.java | 23 +++---
.../view_top_relnode_not_project_authorization.q | 12 +++
...iew_top_relnode_not_project_authorization.q.out | 91 ++++++++++++++++++++++
5 files changed, 151 insertions(+), 40 deletions(-)
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java
index 08e98a467b2..3d507f4ebf9 100644
---
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java
+++
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java
@@ -28,7 +28,6 @@
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterables;
import org.apache.calcite.adapter.druid.DruidQuery;
-import org.apache.calcite.linq4j.Ord;
import org.apache.calcite.plan.RelOptTable;
import org.apache.calcite.plan.RelOptUtil;
import org.apache.calcite.rel.RelCollation;
@@ -102,8 +101,7 @@ public class HiveRelFieldTrimmer extends RelFieldTrimmer {
private static final ThreadLocal<ColumnAccessInfo> COLUMN_ACCESS_INFO =
new ThreadLocal<>();
- private static final ThreadLocal<Map<HiveProject, Table>>
VIEW_PROJECT_TO_TABLE_SCHEMA =
- new ThreadLocal<>();
+ private static final ThreadLocal<Map<RelNode, Table>> REL_TO_TABLE = new
ThreadLocal<>();
protected HiveRelFieldTrimmer(boolean fetchStats) {
@@ -155,17 +153,18 @@ public RelNode trim(RelBuilder relBuilder, RelNode root) {
}
public RelNode trim(RelBuilder relBuilder, RelNode root,
- ColumnAccessInfo columnAccessInfo, Map<HiveProject, Table>
viewToTableSchema) {
+ ColumnAccessInfo columnAccessInfo,
+ Map<RelNode, Table> relNodeToTable) {
try {
// Set local thread variables
COLUMN_ACCESS_INFO.set(columnAccessInfo);
- VIEW_PROJECT_TO_TABLE_SCHEMA.set(viewToTableSchema);
+ REL_TO_TABLE.set(relNodeToTable);
// Execute pruning
return super.trim(relBuilder, root);
} finally {
// Always remove the local thread variables to avoid leaks
COLUMN_ACCESS_INFO.remove();
- VIEW_PROJECT_TO_TABLE_SCHEMA.remove();
+ REL_TO_TABLE.remove();
}
}
@@ -203,6 +202,30 @@ protected RexNode handle(RexFieldAccess fieldAccess) {
return dispatchTrimFields(input, fieldsUsedBuilder.build(), extraFields);
}
+ @Override
+ protected void preTrim(RelNode rel, ImmutableBitSet fieldsUsed) {
+ setColumnAccessInfoForViews(rel, fieldsUsed);
+ }
+
+ protected void setColumnAccessInfoForViews(RelNode rel, ImmutableBitSet
fieldsUsed) {
+ final ColumnAccessInfo columnAccessInfo = COLUMN_ACCESS_INFO.get();
+ final Map<RelNode, Table> relToTable = REL_TO_TABLE.get();
+
+ // HiveTableScans are handled separately in HiveTableScan's trimFields
method.
+ if (!(rel instanceof HiveTableScan) &&
+ columnAccessInfo != null &&
+ relToTable != null &&
+ relToTable.containsKey(rel)) {
+ Table table = relToTable.get(rel);
+ String tableName = table.getCompleteName();
+ List<FieldSchema> tableAllCols = table.getAllCols();
+
+ for (int i : fieldsUsed) {
+ columnAccessInfo.add(tableName, tableAllCols.get(i).getName());
+ }
+ }
+ }
+
/**
* Variant of {@link #trimFields(RelNode, ImmutableBitSet, Set)} for
* {@link
org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveMultiJoin}.
@@ -726,27 +749,6 @@ public TrimResult trimFields(Aggregate aggregate,
ImmutableBitSet fieldsUsed, Se
return result(relBuilder.build(), mapping);
}
- /**
- * Variant of {@link #trimFields(RelNode, ImmutableBitSet, Set)} for
- * {@link org.apache.calcite.rel.logical.LogicalProject}.
- */
- public TrimResult trimFields(Project project, ImmutableBitSet fieldsUsed,
- Set<RelDataTypeField> extraFields) {
- // set columnAccessInfo for ViewColumnAuthorization
- final ColumnAccessInfo columnAccessInfo = COLUMN_ACCESS_INFO.get();
- final Map<HiveProject, Table> viewProjectToTableSchema =
VIEW_PROJECT_TO_TABLE_SCHEMA.get();
- if (columnAccessInfo != null && viewProjectToTableSchema != null
- && viewProjectToTableSchema.containsKey(project)) {
- for (Ord<RexNode> ord : Ord.zip(project.getProjects())) {
- if (fieldsUsed.get(ord.i)) {
- Table tab = viewProjectToTableSchema.get(project);
- columnAccessInfo.add(tab.getCompleteName(),
tab.getAllCols().get(ord.i).getName());
- }
- }
- }
- return super.trimFields(project, fieldsUsed, extraFields);
- }
-
public TrimResult trimFields(HiveTableScan tableAccessRel, ImmutableBitSet
fieldsUsed,
Set<RelDataTypeField> extraFields) {
final TrimResult result = super.trimFields(tableAccessRel, fieldsUsed,
extraFields);
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/RelFieldTrimmer.java
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/RelFieldTrimmer.java
index 88f99eb1bc0..27e6ca03bf4 100644
---
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/RelFieldTrimmer.java
+++
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/RelFieldTrimmer.java
@@ -282,6 +282,7 @@ protected final TrimResult dispatchTrimFields(
RelNode rel,
ImmutableBitSet fieldsUsed,
Set<RelDataTypeField> extraFields) {
+ preTrim(rel, fieldsUsed);
final TrimResult trimResult =
trimFieldsDispatcher.invoke(rel, fieldsUsed, extraFields);
final RelNode newRel = trimResult.left;
@@ -1239,6 +1240,14 @@ public TrimResult trimFields(
return result(newTableAccessRel, mapping);
}
+ /**
+ * Run this method before trimming columns from a relational expression.
+ *
+ * @param rel RelNode
+ * @param fieldsUsed Fields used
+ */
+ protected void preTrim(RelNode rel, ImmutableBitSet fieldsUsed) {}
+
//~ Inner Classes ----------------------------------------------------------
/**
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index 19c780250eb..defe96db9f9 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -1577,7 +1577,7 @@ public class CalcitePlannerAction implements
Frameworks.PlannerAction<RelNode> {
private final Map<String, PrunedPartitionList> partitionCache;
private final Map<String, ColumnStatsList> colStatsCache;
private final ColumnAccessInfo columnAccessInfo;
- private Map<HiveProject, Table> viewProjectToTableSchema;
+ private final Map<RelNode, Table> relToTable;
private final QB rootQB;
// correlated vars across subqueries within same query needs to have
different ID
@@ -1604,6 +1604,7 @@ protected CalcitePlannerAction(
this.rootQB = rootQB;
this.colStatsCache = ctx.getOpContext().getColStatsCache();
this.columnAccessInfo = columnAccessInfo;
+ this.relToTable = new HashMap<>();
}
@Override
@@ -1662,8 +1663,12 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema
relOptSchema, SchemaPlu
// We need to get the ColumnAccessInfo and viewToTableSchema for views.
if (conf.getBoolVar(ConfVars.HIVE_STATS_COLLECT_SCANCOLS) ||
!skipAuthorization()) {
HiveRelFieldTrimmer.get()
- .trim(HiveRelFactories.HIVE_BUILDER.create(optCluster, null),
calcitePlan, this.columnAccessInfo,
- this.viewProjectToTableSchema);
+ .trim(
+ HiveRelFactories.HIVE_BUILDER.create(optCluster, null),
+ calcitePlan,
+ this.columnAccessInfo,
+ this.relToTable
+ );
}
perfLogger.perfLogEnd(this.getClass().getName(),
PerfLogger.MV_REWRITE_FIELD_TRIMMER);
@@ -4917,15 +4922,7 @@ private RelNode genLogicalPlan(QB qb, boolean
outerMostQB,
aliasToRel.put(subqAlias, relNode);
if (qb.getViewToTabSchema().containsKey(subqAlias)) {
- if (relNode instanceof HiveProject) {
- if (this.viewProjectToTableSchema == null) {
- this.viewProjectToTableSchema = new LinkedHashMap<>();
- }
- viewProjectToTableSchema.put((HiveProject) relNode,
qb.getViewToTabSchema().get(subqAlias));
- } else {
- throw new SemanticException("View " + subqAlias + " is
corresponding to "
- + relNode.toString() + ", rather than a HiveProject.");
- }
+ relToTable.put(relNode, qb.getViewToTabSchema().get(subqAlias));
}
}
@@ -5046,7 +5043,7 @@ private RelNode genLogicalPlan(QB qb, boolean outerMostQB,
setQB(qb);
return srcRel;
}
-
+
private RelNode genGBHavingLogicalPlan(QB qb, RelNode srcRel) throws
SemanticException {
RelNode gbFilter = null;
QBParseInfo qbp = getQBParseInfo(qb);
diff --git
a/ql/src/test/queries/clientpositive/view_top_relnode_not_project_authorization.q
b/ql/src/test/queries/clientpositive/view_top_relnode_not_project_authorization.q
new file mode 100644
index 00000000000..a3a20a684b4
--- /dev/null
+++
b/ql/src/test/queries/clientpositive/view_top_relnode_not_project_authorization.q
@@ -0,0 +1,12 @@
+set hive.security.authorization.enabled=true;
+create table t1 (username string, id int);
+
+create view vw_t0 as select distinct username from t1 group by username;
+explain cbo select * from vw_t0;
+
+create view vw_t1 as select distinct username from t1 order by username desc
limit 5;
+explain cbo select * from vw_t1;
+
+create view vw_t2 as
+select username from (select username, id from t1 where id > 10 limit 1) x
where username > 'a' order by id;
+explain cbo select * from vw_t2;
\ No newline at end of file
diff --git
a/ql/src/test/results/clientpositive/llap/view_top_relnode_not_project_authorization.q.out
b/ql/src/test/results/clientpositive/llap/view_top_relnode_not_project_authorization.q.out
new file mode 100644
index 00000000000..5c4589c21e2
--- /dev/null
+++
b/ql/src/test/results/clientpositive/llap/view_top_relnode_not_project_authorization.q.out
@@ -0,0 +1,91 @@
+PREHOOK: query: create table t1 (username string, id int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t1
+POSTHOOK: query: create table t1 (username string, id int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t1
+PREHOOK: query: create view vw_t0 as select distinct username from t1 group by
username
+PREHOOK: type: CREATEVIEW
+PREHOOK: Input: default@t1
+PREHOOK: Output: database:default
+PREHOOK: Output: default@vw_t0
+POSTHOOK: query: create view vw_t0 as select distinct username from t1 group
by username
+POSTHOOK: type: CREATEVIEW
+POSTHOOK: Input: default@t1
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@vw_t0
+POSTHOOK: Lineage: vw_t0.username SIMPLE [(t1)t1.FieldSchema(name:username,
type:string, comment:null), ]
+PREHOOK: query: explain cbo select * from vw_t0
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@vw_t0
+#### A masked pattern was here ####
+POSTHOOK: query: explain cbo select * from vw_t0
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Input: default@vw_t0
+#### A masked pattern was here ####
+CBO PLAN:
+HiveAggregate(group=[{0}])
+ HiveTableScan(table=[[default, t1]], table:alias=[t1])
+
+PREHOOK: query: create view vw_t1 as select distinct username from t1 order by
username desc limit 5
+PREHOOK: type: CREATEVIEW
+PREHOOK: Input: default@t1
+PREHOOK: Output: database:default
+PREHOOK: Output: default@vw_t1
+POSTHOOK: query: create view vw_t1 as select distinct username from t1 order
by username desc limit 5
+POSTHOOK: type: CREATEVIEW
+POSTHOOK: Input: default@t1
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@vw_t1
+POSTHOOK: Lineage: vw_t1.username SIMPLE [(t1)t1.FieldSchema(name:username,
type:string, comment:null), ]
+PREHOOK: query: explain cbo select * from vw_t1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@vw_t1
+#### A masked pattern was here ####
+POSTHOOK: query: explain cbo select * from vw_t1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Input: default@vw_t1
+#### A masked pattern was here ####
+CBO PLAN:
+HiveSortLimit(sort0=[$0], dir0=[DESC], fetch=[5])
+ HiveProject(username=[$0])
+ HiveAggregate(group=[{0}])
+ HiveTableScan(table=[[default, t1]], table:alias=[t1])
+
+PREHOOK: query: create view vw_t2 as
+select username from (select username, id from t1 where id > 10 limit 1) x
where username > 'a' order by id
+PREHOOK: type: CREATEVIEW
+PREHOOK: Input: default@t1
+PREHOOK: Output: database:default
+PREHOOK: Output: default@vw_t2
+POSTHOOK: query: create view vw_t2 as
+select username from (select username, id from t1 where id > 10 limit 1) x
where username > 'a' order by id
+POSTHOOK: type: CREATEVIEW
+POSTHOOK: Input: default@t1
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@vw_t2
+POSTHOOK: Lineage: vw_t2.username SIMPLE [(t1)t1.FieldSchema(name:username,
type:string, comment:null), ]
+PREHOOK: query: explain cbo select * from vw_t2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@vw_t2
+#### A masked pattern was here ####
+POSTHOOK: query: explain cbo select * from vw_t2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Input: default@vw_t2
+#### A masked pattern was here ####
+CBO PLAN:
+HiveFilter(condition=[>($0, _UTF-16LE'a')])
+ HiveProject(username=[$0])
+ HiveSortLimit(fetch=[1])
+ HiveProject(username=[$0])
+ HiveFilter(condition=[>($1, 10)])
+ HiveTableScan(table=[[default, t1]], table:alias=[t1])
+