This is an automated email from the ASF dual-hosted git repository.
liuxun pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/submarine.git
The following commit(s) were added to refs/heads/master by this push:
new 81dca71 SUBMARINE-531. Fix data masking for nested fields with
functions
81dca71 is described below
commit 81dca71416f8605f765cdb250a6c776f16fdf59e
Author: Kent Yao <[email protected]>
AuthorDate: Mon Jun 29 21:46:10 2020 +0800
SUBMARINE-531. Fix data masking for nested fields with functions
### What is this PR for?
This PR fix #313, where data masking works incorrectly with function-nested
fileds
### What type of PR is it?
Bug Fix
### Todos
* [ ] - Task
### What is the Jira issue?
* Open an issue on Jira https://issues.apache.org/jira/browse/SUBMARINE-531
### How should this be tested?
add more unit tests
### Screenshots (if appropriate)
### Questions:
* Does the licenses files need update? No
* Is there breaking changes for older versions? NO
* Does this needs documentation? NO
Author: Kent Yao <[email protected]>
Closes #332 from yaooqinn/SUBMARINE-531 and squashes the following commits:
c8cb492 [Kent Yao] Merge branch 'master' into SUBMARINE-531
577fce2 [Kent Yao] SUBMARINE-531. Fix data masking for nested fields with
functions
fffed1e [Kent Yao] SUBMARINE-489. DCL Framework: SHOW CURRENT ROLES syntax
---
.../optimizer/SubmarineDataMaskingExtension.scala | 30 +++++++++++++++++-----
.../spark/security/DataMaskingSQLTest.scala | 22 ++++++++++++++++
2 files changed, 46 insertions(+), 6 deletions(-)
diff --git
a/submarine-security/spark-security/src/main/scala/org/apache/spark/sql/catalyst/optimizer/SubmarineDataMaskingExtension.scala
b/submarine-security/spark-security/src/main/scala/org/apache/spark/sql/catalyst/optimizer/SubmarineDataMaskingExtension.scala
index 5dad02a..0132385 100644
---
a/submarine-security/spark-security/src/main/scala/org/apache/spark/sql/catalyst/optimizer/SubmarineDataMaskingExtension.scala
+++
b/submarine-security/spark-security/src/main/scala/org/apache/spark/sql/catalyst/optimizer/SubmarineDataMaskingExtension.scala
@@ -129,7 +129,8 @@ case class SubmarineDataMaskingExtension(spark:
SparkSession) extends Rule[Logic
private def collectTransformers(
plan: LogicalPlan,
table: CatalogTable,
- aliases: mutable.Map[Alias, ExprId]): Map[ExprId, NamedExpression] = {
+ aliases: mutable.Map[Alias, ExprId],
+ outputs: Seq[NamedExpression]): Map[ExprId, NamedExpression] = {
try {
val maskEnableResults = plan.output.map { expr =>
expr -> getAccessResult(table.identifier, expr)
@@ -140,6 +141,18 @@ case class SubmarineDataMaskingExtension(spark:
SparkSession) extends Rule[Logic
}.filter(_._2 != null).toMap
val aliasedMaskers = new mutable.HashMap[ExprId, Alias]()
+
+ for (output <- outputs) {
+ val newOutput = output transformUp {
+ case ar: AttributeReference => formedMaskers.getOrElse(ar.exprId, ar)
+ }
+
+ if (!output.equals(newOutput)) {
+ val newAlias = Alias(newOutput, output.name)()
+ aliasedMaskers.put(output.exprId, newAlias)
+ }
+ }
+
for ((alias, id) <- aliases if formedMaskers.contains(id)) {
val originalAlias = formedMaskers(id)
val newChild = originalAlias.child mapChildren {
@@ -171,10 +184,10 @@ case class SubmarineDataMaskingExtension(spark:
SparkSession) extends Rule[Logic
val aliases = new mutable.HashMap[Alias, ExprId]()
plan.transformAllExpressions {
case a: Alias =>
- a.child match {
+ a.child transformUp {
case ne: NamedExpression =>
- aliases.put(a, ne.exprId)
- case _ =>
+ aliases.getOrElseUpdate(a, ne.exprId)
+ ne
}
a
}
@@ -184,11 +197,16 @@ case class SubmarineDataMaskingExtension(spark:
SparkSession) extends Rule[Logic
private def collectAllTransformers(
plan: LogicalPlan,
aliases: mutable.Map[Alias, ExprId]): Map[ExprId, NamedExpression] = {
+ val outputs = plan match {
+ case p: Project => p.projectList
+ case o => o.output
+ }
+
plan.collectLeaves().flatMap {
case h: HiveTableRelation =>
- collectTransformers(h, h.tableMeta, aliases)
+ collectTransformers(h, h.tableMeta, aliases, outputs)
case l: LogicalRelation if l.catalogTable.isDefined =>
- collectTransformers(l, l.catalogTable.get, aliases)
+ collectTransformers(l, l.catalogTable.get, aliases, outputs)
case _ => Seq.empty
}.toMap
}
diff --git
a/submarine-security/spark-security/src/test/scala/org/apache/submarine/spark/security/DataMaskingSQLTest.scala
b/submarine-security/spark-security/src/test/scala/org/apache/submarine/spark/security/DataMaskingSQLTest.scala
index f228845..b33bd5a 100644
---
a/submarine-security/spark-security/src/test/scala/org/apache/submarine/spark/security/DataMaskingSQLTest.scala
+++
b/submarine-security/spark-security/src/test/scala/org/apache/submarine/spark/security/DataMaskingSQLTest.scala
@@ -188,6 +188,28 @@ case class DataMaskingSQLTest() extends FunSuite with
BeforeAndAfterAll {
}
}
+ test("MASK_SHOW_LAST_4 and functions") {
+ val statement =
+ s"""
+ |select
+ | key,
+ | value,
+ | substr(value, 0, 18),
+ | substr(value, 0, 18) as v1,
+ | substr(cast(value as string), 0, 18) as v2
+ | from default.rangertbl5 where value = 'val_277'""".stripMargin
+ withUser("bob") {
+ val df = sql(statement)
+ println(df.queryExecution.optimizedPlan)
+ val row = df.take(1)(0)
+ assert(row.getString(1) === "xxx_277", "value shows last 4 characters")
+ assert(row.getString(2) === "xxx_277", "value shows last 4 characters")
+ assert(row.getString(3) === "xxx_277", "value shows last 4 characters")
+ assert(row.getString(4) === "xxx_277", "value shows last 4 characters")
+
+ }
+ }
+
test("NO MASKING") {
val statement = "select * from default.rangertbl6 where value = 'val_277'"
withUser("bob") {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]