bowenliang123 commented on code in PR #4643:
URL: https://github.com/apache/kyuubi/pull/4643#discussion_r1154029838


##########
extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/SparkRangerAdminPlugin.scala:
##########
@@ -90,7 +90,7 @@ object SparkRangerAdminPlugin extends 
RangerBasePlugin("spark", "sparkSql")
 
   private def regexp_replace(expr: String, hasLen: Boolean = false): String = {
     val pos = if (hasLen) ", 5" else ""
-    val upper = s"regexp_replace($expr, '[A-Z]', 'X'$pos)"
+    val upper = s"regexp_replace($expr, '[A-Z\u4e00-\u9fa5]', 'X'$pos)"

Review Comment:
   Any reference for the starting and ending unicode for Chinese chars?



##########
extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/SparkRangerAdminPlugin.scala:
##########
@@ -90,7 +90,7 @@ object SparkRangerAdminPlugin extends 
RangerBasePlugin("spark", "sparkSql")
 
   private def regexp_replace(expr: String, hasLen: Boolean = false): String = {
     val pos = if (hasLen) ", 5" else ""
-    val upper = s"regexp_replace($expr, '[A-Z]', 'X'$pos)"

Review Comment:
   Chinese chars should not be part of upper chars. Use a separate replacer for 
it.



##########
extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/SparkRangerAdminPlugin.scala:
##########
@@ -90,7 +90,7 @@ object SparkRangerAdminPlugin extends 
RangerBasePlugin("spark", "sparkSql")
 
   private def regexp_replace(expr: String, hasLen: Boolean = false): String = {

Review Comment:
   Use a separate ut cases for testing this `regexp_replace` method.



##########
extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RangerSparkExtensionSuite.scala:
##########
@@ -388,6 +389,19 @@ abstract class RangerSparkExtensionSuite extends 
AnyFunSuite
           assert(sql(s"SELECT value1 FROM $db.${table}2").collect() ===
             Seq(Row(DigestUtils.md5Hex("1"))))
         })
+
+      doAs(
+        "admin",
+        sql(
+          s"INSERT INTO $db.$table SELECT 10, 1, 'hello', " +
+            s"'\u6d4b\u8bd5\u4e2d\u6587\u524d\u56db\u4e2aabcABC', " +

Review Comment:
   Add comments to explain what and why to use this unicodes in tests. Source 
code is in UTF-8 encoding and good to have Chinese chars.



##########
extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RangerSparkExtensionSuite.scala:
##########
@@ -342,6 +342,7 @@ abstract class RangerSparkExtensionSuite extends AnyFunSuite
         sql(
           s"INSERT INTO $db.$table SELECT 1, 1, 'hello', 'world', " +
             s"timestamp'2018-11-17 12:34:56', 'World'"))
+

Review Comment:
   Remove the unessary blank line.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to