This is an automated email from the ASF dual-hosted git repository.

zabetak pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new d186aba1923 HIVE-28280: SemanticException when querying VIEW with 
DISTINCT clause (#6103)
d186aba1923 is described below

commit d186aba1923477949d4cede508566f4c3fb5cceb
Author: Soumyakanti Das <[email protected]>
AuthorDate: Fri Nov 7 00:13:28 2025 -0800

    HIVE-28280: SemanticException when querying VIEW with DISTINCT clause 
(#6103)
---
 .../calcite/rules/HiveRelFieldTrimmer.java         | 56 ++++++-------
 .../optimizer/calcite/rules/RelFieldTrimmer.java   |  9 +++
 .../hadoop/hive/ql/parse/CalcitePlanner.java       | 23 +++---
 .../view_top_relnode_not_project_authorization.q   | 12 +++
 ...iew_top_relnode_not_project_authorization.q.out | 91 ++++++++++++++++++++++
 5 files changed, 151 insertions(+), 40 deletions(-)

diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java
index 08e98a467b2..3d507f4ebf9 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java
@@ -28,7 +28,6 @@
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.Iterables;
 import org.apache.calcite.adapter.druid.DruidQuery;
-import org.apache.calcite.linq4j.Ord;
 import org.apache.calcite.plan.RelOptTable;
 import org.apache.calcite.plan.RelOptUtil;
 import org.apache.calcite.rel.RelCollation;
@@ -102,8 +101,7 @@ public class HiveRelFieldTrimmer extends RelFieldTrimmer {
 
   private static final ThreadLocal<ColumnAccessInfo> COLUMN_ACCESS_INFO =
       new ThreadLocal<>();
-  private static final ThreadLocal<Map<HiveProject, Table>> 
VIEW_PROJECT_TO_TABLE_SCHEMA =
-      new ThreadLocal<>();
+  private static final ThreadLocal<Map<RelNode, Table>> REL_TO_TABLE = new 
ThreadLocal<>();
 
 
   protected HiveRelFieldTrimmer(boolean fetchStats) {
@@ -155,17 +153,18 @@ public RelNode trim(RelBuilder relBuilder, RelNode root) {
   }
 
   public RelNode trim(RelBuilder relBuilder, RelNode root,
-      ColumnAccessInfo columnAccessInfo, Map<HiveProject, Table> 
viewToTableSchema) {
+                      ColumnAccessInfo columnAccessInfo,
+                      Map<RelNode, Table> relNodeToTable) {
     try {
       // Set local thread variables
       COLUMN_ACCESS_INFO.set(columnAccessInfo);
-      VIEW_PROJECT_TO_TABLE_SCHEMA.set(viewToTableSchema);
+      REL_TO_TABLE.set(relNodeToTable);
       // Execute pruning
       return super.trim(relBuilder, root);
     } finally {
       // Always remove the local thread variables to avoid leaks
       COLUMN_ACCESS_INFO.remove();
-      VIEW_PROJECT_TO_TABLE_SCHEMA.remove();
+      REL_TO_TABLE.remove();
     }
   }
 
@@ -203,6 +202,30 @@ protected RexNode handle(RexFieldAccess fieldAccess) {
     return dispatchTrimFields(input, fieldsUsedBuilder.build(), extraFields);
   }
 
+  @Override
+  protected void preTrim(RelNode rel, ImmutableBitSet fieldsUsed) {
+    setColumnAccessInfoForViews(rel, fieldsUsed);
+  }
+
+  protected void setColumnAccessInfoForViews(RelNode rel, ImmutableBitSet 
fieldsUsed) {
+    final ColumnAccessInfo columnAccessInfo = COLUMN_ACCESS_INFO.get();
+    final Map<RelNode, Table> relToTable = REL_TO_TABLE.get();
+
+    // HiveTableScans are handled separately in HiveTableScan's trimFields 
method.
+    if (!(rel instanceof HiveTableScan) &&
+        columnAccessInfo != null &&
+        relToTable != null &&
+        relToTable.containsKey(rel)) {
+      Table table = relToTable.get(rel);
+      String tableName = table.getCompleteName();
+      List<FieldSchema> tableAllCols = table.getAllCols();
+      
+      for (int i : fieldsUsed) {
+        columnAccessInfo.add(tableName, tableAllCols.get(i).getName());
+      }
+    }
+  }
+
   /**
    * Variant of {@link #trimFields(RelNode, ImmutableBitSet, Set)} for
    * {@link 
org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveMultiJoin}.
@@ -726,27 +749,6 @@ public TrimResult trimFields(Aggregate aggregate, 
ImmutableBitSet fieldsUsed, Se
     return result(relBuilder.build(), mapping);
   }
 
-  /**
-   * Variant of {@link #trimFields(RelNode, ImmutableBitSet, Set)} for
-   * {@link org.apache.calcite.rel.logical.LogicalProject}.
-   */
-  public TrimResult trimFields(Project project, ImmutableBitSet fieldsUsed,
-      Set<RelDataTypeField> extraFields) {
-    // set columnAccessInfo for ViewColumnAuthorization
-    final ColumnAccessInfo columnAccessInfo = COLUMN_ACCESS_INFO.get();
-    final Map<HiveProject, Table> viewProjectToTableSchema = 
VIEW_PROJECT_TO_TABLE_SCHEMA.get();
-    if (columnAccessInfo != null && viewProjectToTableSchema != null
-        && viewProjectToTableSchema.containsKey(project)) {
-      for (Ord<RexNode> ord : Ord.zip(project.getProjects())) {
-        if (fieldsUsed.get(ord.i)) {
-          Table tab = viewProjectToTableSchema.get(project);
-          columnAccessInfo.add(tab.getCompleteName(), 
tab.getAllCols().get(ord.i).getName());
-        }
-      }
-    }
-    return super.trimFields(project, fieldsUsed, extraFields);
-  }
-
   public TrimResult trimFields(HiveTableScan tableAccessRel, ImmutableBitSet 
fieldsUsed,
       Set<RelDataTypeField> extraFields) {
     final TrimResult result = super.trimFields(tableAccessRel, fieldsUsed, 
extraFields);
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/RelFieldTrimmer.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/RelFieldTrimmer.java
index 88f99eb1bc0..27e6ca03bf4 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/RelFieldTrimmer.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/RelFieldTrimmer.java
@@ -282,6 +282,7 @@ protected final TrimResult dispatchTrimFields(
       RelNode rel,
       ImmutableBitSet fieldsUsed,
       Set<RelDataTypeField> extraFields) {
+    preTrim(rel, fieldsUsed);
     final TrimResult trimResult =
         trimFieldsDispatcher.invoke(rel, fieldsUsed, extraFields);
     final RelNode newRel = trimResult.left;
@@ -1239,6 +1240,14 @@ public TrimResult trimFields(
     return result(newTableAccessRel, mapping);
   }
 
+  /**
+   * Run this method before trimming columns from a relational expression.
+   * 
+   * @param rel RelNode
+   * @param fieldsUsed Fields used
+   */
+  protected void preTrim(RelNode rel, ImmutableBitSet fieldsUsed) {}
+
   //~ Inner Classes ----------------------------------------------------------
 
   /**
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index 19c780250eb..defe96db9f9 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -1577,7 +1577,7 @@ public class CalcitePlannerAction implements 
Frameworks.PlannerAction<RelNode> {
     private final Map<String, PrunedPartitionList>        partitionCache;
     private final Map<String, ColumnStatsList>            colStatsCache;
     private final ColumnAccessInfo columnAccessInfo;
-    private Map<HiveProject, Table> viewProjectToTableSchema;
+    private final Map<RelNode, Table> relToTable;
     private final QB rootQB;
 
     // correlated vars across subqueries within same query needs to have 
different ID
@@ -1604,6 +1604,7 @@ protected CalcitePlannerAction(
       this.rootQB = rootQB;
       this.colStatsCache = ctx.getOpContext().getColStatsCache();
       this.columnAccessInfo = columnAccessInfo;
+      this.relToTable = new HashMap<>();
     }
 
     @Override
@@ -1662,8 +1663,12 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema 
relOptSchema, SchemaPlu
       // We need to get the ColumnAccessInfo and viewToTableSchema for views.
       if (conf.getBoolVar(ConfVars.HIVE_STATS_COLLECT_SCANCOLS) || 
!skipAuthorization()) {
         HiveRelFieldTrimmer.get()
-            .trim(HiveRelFactories.HIVE_BUILDER.create(optCluster, null), 
calcitePlan, this.columnAccessInfo,
-                this.viewProjectToTableSchema);
+            .trim(
+                HiveRelFactories.HIVE_BUILDER.create(optCluster, null),
+                calcitePlan,
+                this.columnAccessInfo,
+                this.relToTable
+            );
       }
       perfLogger.perfLogEnd(this.getClass().getName(), 
PerfLogger.MV_REWRITE_FIELD_TRIMMER);
 
@@ -4917,15 +4922,7 @@ private RelNode genLogicalPlan(QB qb, boolean 
outerMostQB,
 
         aliasToRel.put(subqAlias, relNode);
         if (qb.getViewToTabSchema().containsKey(subqAlias)) {
-          if (relNode instanceof HiveProject) {
-            if (this.viewProjectToTableSchema == null) {
-              this.viewProjectToTableSchema = new LinkedHashMap<>();
-            }
-            viewProjectToTableSchema.put((HiveProject) relNode, 
qb.getViewToTabSchema().get(subqAlias));
-          } else {
-            throw new SemanticException("View " + subqAlias + " is 
corresponding to "
-                + relNode.toString() + ", rather than a HiveProject.");
-          }
+          relToTable.put(relNode, qb.getViewToTabSchema().get(subqAlias));
         }
       }
 
@@ -5046,7 +5043,7 @@ private RelNode genLogicalPlan(QB qb, boolean outerMostQB,
       setQB(qb);
       return srcRel;
     }
-
+    
     private RelNode genGBHavingLogicalPlan(QB qb, RelNode srcRel) throws 
SemanticException {
       RelNode gbFilter = null;
       QBParseInfo qbp = getQBParseInfo(qb);
diff --git 
a/ql/src/test/queries/clientpositive/view_top_relnode_not_project_authorization.q
 
b/ql/src/test/queries/clientpositive/view_top_relnode_not_project_authorization.q
new file mode 100644
index 00000000000..a3a20a684b4
--- /dev/null
+++ 
b/ql/src/test/queries/clientpositive/view_top_relnode_not_project_authorization.q
@@ -0,0 +1,12 @@
+set hive.security.authorization.enabled=true;
+create table t1 (username string, id int);
+
+create view vw_t0 as select distinct username from t1 group by username;
+explain cbo select * from vw_t0;
+
+create view vw_t1 as select distinct username from t1 order by username desc 
limit 5;
+explain cbo select * from vw_t1;
+
+create view vw_t2 as 
+select username from (select username, id from t1 where id > 10 limit 1) x 
where username > 'a' order by id;
+explain cbo select * from vw_t2;
\ No newline at end of file
diff --git 
a/ql/src/test/results/clientpositive/llap/view_top_relnode_not_project_authorization.q.out
 
b/ql/src/test/results/clientpositive/llap/view_top_relnode_not_project_authorization.q.out
new file mode 100644
index 00000000000..5c4589c21e2
--- /dev/null
+++ 
b/ql/src/test/results/clientpositive/llap/view_top_relnode_not_project_authorization.q.out
@@ -0,0 +1,91 @@
+PREHOOK: query: create table t1 (username string, id int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t1
+POSTHOOK: query: create table t1 (username string, id int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t1
+PREHOOK: query: create view vw_t0 as select distinct username from t1 group by 
username
+PREHOOK: type: CREATEVIEW
+PREHOOK: Input: default@t1
+PREHOOK: Output: database:default
+PREHOOK: Output: default@vw_t0
+POSTHOOK: query: create view vw_t0 as select distinct username from t1 group 
by username
+POSTHOOK: type: CREATEVIEW
+POSTHOOK: Input: default@t1
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@vw_t0
+POSTHOOK: Lineage: vw_t0.username SIMPLE [(t1)t1.FieldSchema(name:username, 
type:string, comment:null), ]
+PREHOOK: query: explain cbo select * from vw_t0
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@vw_t0
+#### A masked pattern was here ####
+POSTHOOK: query: explain cbo select * from vw_t0
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Input: default@vw_t0
+#### A masked pattern was here ####
+CBO PLAN:
+HiveAggregate(group=[{0}])
+  HiveTableScan(table=[[default, t1]], table:alias=[t1])
+
+PREHOOK: query: create view vw_t1 as select distinct username from t1 order by 
username desc limit 5
+PREHOOK: type: CREATEVIEW
+PREHOOK: Input: default@t1
+PREHOOK: Output: database:default
+PREHOOK: Output: default@vw_t1
+POSTHOOK: query: create view vw_t1 as select distinct username from t1 order 
by username desc limit 5
+POSTHOOK: type: CREATEVIEW
+POSTHOOK: Input: default@t1
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@vw_t1
+POSTHOOK: Lineage: vw_t1.username SIMPLE [(t1)t1.FieldSchema(name:username, 
type:string, comment:null), ]
+PREHOOK: query: explain cbo select * from vw_t1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@vw_t1
+#### A masked pattern was here ####
+POSTHOOK: query: explain cbo select * from vw_t1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Input: default@vw_t1
+#### A masked pattern was here ####
+CBO PLAN:
+HiveSortLimit(sort0=[$0], dir0=[DESC], fetch=[5])
+  HiveProject(username=[$0])
+    HiveAggregate(group=[{0}])
+      HiveTableScan(table=[[default, t1]], table:alias=[t1])
+
+PREHOOK: query: create view vw_t2 as 
+select username from (select username, id from t1 where id > 10 limit 1) x 
where username > 'a' order by id
+PREHOOK: type: CREATEVIEW
+PREHOOK: Input: default@t1
+PREHOOK: Output: database:default
+PREHOOK: Output: default@vw_t2
+POSTHOOK: query: create view vw_t2 as 
+select username from (select username, id from t1 where id > 10 limit 1) x 
where username > 'a' order by id
+POSTHOOK: type: CREATEVIEW
+POSTHOOK: Input: default@t1
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@vw_t2
+POSTHOOK: Lineage: vw_t2.username SIMPLE [(t1)t1.FieldSchema(name:username, 
type:string, comment:null), ]
+PREHOOK: query: explain cbo select * from vw_t2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@vw_t2
+#### A masked pattern was here ####
+POSTHOOK: query: explain cbo select * from vw_t2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Input: default@vw_t2
+#### A masked pattern was here ####
+CBO PLAN:
+HiveFilter(condition=[>($0, _UTF-16LE'a')])
+  HiveProject(username=[$0])
+    HiveSortLimit(fetch=[1])
+      HiveProject(username=[$0])
+        HiveFilter(condition=[>($1, 10)])
+          HiveTableScan(table=[[default, t1]], table:alias=[t1])
+

Reply via email to