This is an automated email from the ASF dual-hosted git repository.
yao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kyuubi.git
The following commit(s) were added to refs/heads/master by this push:
new 3c2291ef5 [KYUUBI #5677][AUTHZ] Typeof expression miss column
information
3c2291ef5 is described below
commit 3c2291ef5c90001252a592c9cbed3c99a3707bba
Author: Angerszhuuuu <[email protected]>
AuthorDate: Wed Nov 15 12:37:43 2023 +0800
[KYUUBI #5677][AUTHZ] Typeof expression miss column information
### _Why are the changes needed?_
To close #5677
Typeof expression miss column information, in this pr we add a place holder
to support this.
For sql
```
SELECT typeof(id), typeof(age) FROM default.t1 LIMIT 1
```
Without this pr, typeof after optimizer passed to PrivilegeBuild is just an
attribute, miss the origin child expression info.
```
GlobalLimit 21
+- LocalLimit 21
+- Project [int AS typeof(id)#27, int AS typeof(age)#28]
+- Relation default.t1[id#18,age#19,part#20] parquet
```
When handle this plan's project list, it's an attribute and miss expression
when expression result is a constant value, then we can't extract the
`TypeOf`'s child expression.

This is caused by `TypeOf` expression is foldable
<img width="778" alt="image"
src="https://github.com/apache/kyuubi/assets/46485123/770a36fc-235f-4f26-bca7-a5058e120919">
It will be convert to a constant value after spark optimizer, then cause we
miss the child expression, then can't extract columns by `collectLeaves`
In this pr we wrap the `TypeOf` by a non-foldable expression placeholder
then we can get the expression contains column of table when `mergeProjection`
After this pr, the plan is like below
```
GlobalLimit 21
+- LocalLimit 21
+- Project [typeofplaceholder(id#21) AS typeof(id)#30,
typeofplaceholder(day#23) AS typeof(day)#31]
+- HiveTableRelation [`default`.`table1`,
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, Data Cols: [id#21,
scope#22, day#23], Partition Cols: []]
```
### _How was this patch tested?_
- [x] Add some test cases that check the changes thoroughly including
negative and positive cases if possible
- [ ] Add screenshots for manual tests if appropriate
- [ ] [Run
test](https://kyuubi.readthedocs.io/en/master/contributing/code/testing.html#running-tests)
locally before make a pull request
### _Was this patch authored or co-authored using generative AI tooling?_
No
Closes #5678 from AngersZhuuuu/KYUUBO-5677.
Closes #5677
273cd61bf [Angerszhuuuu] Update TypeOfPlaceHolder.scala
8b4383db5 [Angerszhuuuu] done
af5dae541 [Angerszhuuuu] Update RangerSparkExtensionSuite.scala
37edcdf6d [Angerszhuuuu] fix for spark-3.1
df15599a3 [Angerszhuuuu] update
a7dd3c453 [Angerszhuuuu] Update RangerSparkExtension.scala
6b76dac0d [Angerszhuuuu] [KYUUBI #5677][AUTHZ] Typeof expression miss
column information
f8a94c72d [Angerszhuuuu] [KYUUBI #5677][AUTHZ] Typeof expression miss
column information
Authored-by: Angerszhuuuu <[email protected]>
Signed-off-by: Kent Yao <[email protected]>
---
.../spark/authz/ranger/RangerSparkExtension.scala | 5 ++-
.../RuleEliminateTypeOf.scala} | 18 ++++++---
.../expression/RuleApplyTypeOfMarker.scala} | 15 ++++---
.../authz/rule/expression/TypeOfPlaceHolder.scala | 41 +++++++++++++++++++
.../spark/authz/util/WithInternalChildren.scala | 5 +++
.../authz/ranger/RangerSparkExtensionSuite.scala | 46 +++++++++++++++++++++-
6 files changed, 116 insertions(+), 14 deletions(-)
diff --git
a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RangerSparkExtension.scala
b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RangerSparkExtension.scala
index 01645ff97..f6d439003 100644
---
a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RangerSparkExtension.scala
+++
b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RangerSparkExtension.scala
@@ -19,9 +19,10 @@ package org.apache.kyuubi.plugin.spark.authz.ranger
import org.apache.spark.sql.SparkSessionExtensions
-import org.apache.kyuubi.plugin.spark.authz.rule.{RuleEliminateMarker,
RuleEliminatePermanentViewMarker}
+import org.apache.kyuubi.plugin.spark.authz.rule.{RuleEliminateMarker,
RuleEliminatePermanentViewMarker, RuleEliminateTypeOf}
import
org.apache.kyuubi.plugin.spark.authz.rule.config.AuthzConfigurationChecker
import
org.apache.kyuubi.plugin.spark.authz.rule.datamasking.{RuleApplyDataMaskingStage0,
RuleApplyDataMaskingStage1}
+import
org.apache.kyuubi.plugin.spark.authz.rule.expression.RuleApplyTypeOfMarker
import
org.apache.kyuubi.plugin.spark.authz.rule.permanentview.RuleApplyPermanentViewMarker
import
org.apache.kyuubi.plugin.spark.authz.rule.rowfilter.{FilterDataSourceV2Strategy,
RuleApplyRowFilter, RuleReplaceShowObjectCommands}
@@ -46,12 +47,14 @@ class RangerSparkExtension extends (SparkSessionExtensions
=> Unit) {
v1.injectCheckRule(AuthzConfigurationChecker)
v1.injectResolutionRule(_ => new RuleReplaceShowObjectCommands())
v1.injectResolutionRule(_ => new RuleApplyPermanentViewMarker())
+ v1.injectResolutionRule(_ => new RuleApplyTypeOfMarker())
v1.injectResolutionRule(RuleApplyRowFilter)
v1.injectResolutionRule(RuleApplyDataMaskingStage0)
v1.injectResolutionRule(RuleApplyDataMaskingStage1)
v1.injectOptimizerRule(_ => new RuleEliminateMarker())
v1.injectOptimizerRule(new RuleAuthorization(_))
v1.injectOptimizerRule(_ => new RuleEliminatePermanentViewMarker())
+ v1.injectOptimizerRule(_ => new RuleEliminateTypeOf())
v1.injectPlannerStrategy(new FilterDataSourceV2Strategy(_))
}
}
diff --git
a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/util/WithInternalChildren.scala
b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/rule/RuleEliminateTypeOf.scala
similarity index 64%
copy from
extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/util/WithInternalChildren.scala
copy to
extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/rule/RuleEliminateTypeOf.scala
index bbce1dff8..d474383f6 100644
---
a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/util/WithInternalChildren.scala
+++
b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/rule/RuleEliminateTypeOf.scala
@@ -15,14 +15,20 @@
* limitations under the License.
*/
-package org.apache.kyuubi.plugin.spark.authz.util
+package org.apache.kyuubi.plugin.spark.authz.rule
+import org.apache.spark.sql.catalyst.expressions.TypeOf
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.rules.Rule
-trait WithInternalChildren {
- def withNewChildrenInternal(newChildren: IndexedSeq[LogicalPlan]):
LogicalPlan
-}
+import org.apache.kyuubi.plugin.spark.authz.rule.expression.TypeOfPlaceHolder
-trait WithInternalChild {
- def withNewChildInternal(newChild: LogicalPlan): LogicalPlan
+class RuleEliminateTypeOf extends Rule[LogicalPlan] {
+ override def apply(plan: LogicalPlan): LogicalPlan = {
+ plan.transformUp { case p =>
+ p.transformExpressionsUp {
+ case toph: TypeOfPlaceHolder => TypeOf(toph.expr)
+ }
+ }
+ }
}
diff --git
a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/util/WithInternalChildren.scala
b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/rule/expression/RuleApplyTypeOfMarker.scala
similarity index 69%
copy from
extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/util/WithInternalChildren.scala
copy to
extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/rule/expression/RuleApplyTypeOfMarker.scala
index bbce1dff8..411977624 100644
---
a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/util/WithInternalChildren.scala
+++
b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/rule/expression/RuleApplyTypeOfMarker.scala
@@ -15,14 +15,17 @@
* limitations under the License.
*/
-package org.apache.kyuubi.plugin.spark.authz.util
+package org.apache.kyuubi.plugin.spark.authz.rule.expression
+import org.apache.spark.sql.catalyst.expressions.TypeOf
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.rules.Rule
-trait WithInternalChildren {
- def withNewChildrenInternal(newChildren: IndexedSeq[LogicalPlan]):
LogicalPlan
-}
+class RuleApplyTypeOfMarker extends Rule[LogicalPlan] {
-trait WithInternalChild {
- def withNewChildInternal(newChild: LogicalPlan): LogicalPlan
+ override def apply(plan: LogicalPlan): LogicalPlan = {
+ plan transformAllExpressions {
+ case typeof: TypeOf => TypeOfPlaceHolder(typeof.child)
+ }
+ }
}
diff --git
a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/rule/expression/TypeOfPlaceHolder.scala
b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/rule/expression/TypeOfPlaceHolder.scala
new file mode 100644
index 000000000..ebc9cecf5
--- /dev/null
+++
b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/rule/expression/TypeOfPlaceHolder.scala
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kyuubi.plugin.spark.authz.rule.expression
+
+import org.apache.spark.sql.catalyst.expressions.{Expression, UnaryExpression}
+import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext,
ExprCode}
+import org.apache.spark.sql.types.{DataType, StringType}
+
+import org.apache.kyuubi.plugin.spark.authz.util.WithInternalExpressionChild
+
+case class TypeOfPlaceHolder(expr: Expression) extends UnaryExpression
+ with WithInternalExpressionChild {
+ override def dataType: DataType = StringType
+
+ // Avoid fold constant expression by Spark Optimizer
+ override def foldable: Boolean = false
+
+ override def child: Expression = expr
+
+ override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+ defineCodeGen(ctx, ev, _ =>
s"""UTF8String.fromString(${child.dataType.catalogString})""")
+ }
+
+ override def withNewChildInternal(newChild: Expression): Expression =
+ copy(expr = newChild)
+}
diff --git
a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/util/WithInternalChildren.scala
b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/util/WithInternalChildren.scala
index bbce1dff8..582b34abe 100644
---
a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/util/WithInternalChildren.scala
+++
b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/util/WithInternalChildren.scala
@@ -17,6 +17,7 @@
package org.apache.kyuubi.plugin.spark.authz.util
+import org.apache.spark.sql.catalyst.expressions.Expression
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
trait WithInternalChildren {
@@ -26,3 +27,7 @@ trait WithInternalChildren {
trait WithInternalChild {
def withNewChildInternal(newChild: LogicalPlan): LogicalPlan
}
+
+trait WithInternalExpressionChild {
+ def withNewChildInternal(newChild: Expression): Expression
+}
diff --git
a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RangerSparkExtensionSuite.scala
b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RangerSparkExtensionSuite.scala
index 571e206f4..d3a10cf9a 100644
---
a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RangerSparkExtensionSuite.scala
+++
b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RangerSparkExtensionSuite.scala
@@ -23,7 +23,7 @@ import java.nio.file.Path
import scala.util.Try
import org.apache.hadoop.security.UserGroupInformation
-import org.apache.spark.sql.SparkSessionExtensions
+import org.apache.spark.sql.{Row, SparkSessionExtensions}
import org.apache.spark.sql.catalyst.analysis.NoSuchTableException
import org.apache.spark.sql.catalyst.catalog.HiveTableRelation
import org.apache.spark.sql.catalyst.plans.logical.Statistics
@@ -1296,4 +1296,48 @@ class HiveCatalogRangerSparkExtensionSuite extends
RangerSparkExtensionSuite {
}
}
}
+
+ test("[KYUUBI #5677][AUTHZ] Typeof expression miss column information") {
+ val db1 = defaultDb
+ val table1 = "table1"
+ withSingleCallEnabled {
+ withCleanTmpResources(Seq((s"$db1.$table1", "table"))) {
+ doAs(
+ admin,
+ sql(
+ s"""
+ |CREATE TABLE IF NOT EXISTS $db1.$table1(
+ |id int,
+ |scope int,
+ |day string)
+ |""".stripMargin))
+ doAs(admin, sql(s"INSERT INTO $db1.$table1 SELECT 1, 2, 'TONY'"))
+ interceptContains[AccessControlException](
+ doAs(
+ someone,
+ sql(s"SELECT typeof(id), typeof(typeof(day)) FROM
$db1.$table1").collect()))(
+ s"does not have [select] privilege on
[$db1/$table1/id,$db1/$table1/day]")
+ interceptContains[AccessControlException](
+ doAs(
+ someone,
+ sql(
+ s"""
+ |SELECT
+ |typeof(cast(id as string)),
+ |typeof(substring(day, 1, 3))
+ |FROM $db1.$table1""".stripMargin).collect()))(
+ s"does not have [select] privilege on
[$db1/$table1/id,$db1/$table1/day]")
+ checkAnswer(
+ admin,
+ s"""
+ |SELECT
+ |typeof(id),
+ |typeof(typeof(day)),
+ |typeof(cast(id as string)),
+ |typeof(substring(day, 1, 3))
+ |FROM $db1.$table1""".stripMargin,
+ Seq(Row("int", "string", "string", "string")))
+ }
+ }
+ }
}