This is an automated email from the ASF dual-hosted git repository.

ulyssesyou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-kyuubi.git


The following commit(s) were added to refs/heads/master by this push:
     new 1fee068c9 [KYUUBI #2591] Redact secret information from ProcBuilder log
1fee068c9 is described below

commit 1fee068c99802b68f6fb1f7572050b054a9fef01
Author: sychen <[email protected]>
AuthorDate: Tue May 10 18:12:31 2022 +0800

    [KYUUBI #2591] Redact secret information from ProcBuilder log
    
    ### _Why are the changes needed?_
    Now the user can see the command to start the engine, which may have some 
sensitive information.
    Introduce a configuration item to support replacing sensitive information.
    
    For example, if you use the `kyuubi.ha.zookeeper.auth.digest` 
configuration, you can configure `kyuubi.server.redaction.regex`  
`(?i)zookeeper.auth.digest`
    
    close #2591
    
    ### _How was this patch tested?_
    - [x] Add some test cases that check the changes thoroughly including 
negative and positive cases if possible
    
    - [x] Add screenshots for manual tests if appropriate
    
    - [x] [Run 
test](https://kyuubi.apache.org/docs/latest/develop_tools/testing.html#running-tests)
 locally before make a pull request
    
    Closes #2592 from cxzl25/KYUUBI-2591.
    
    Closes #2591
    
    67567a26 [sychen] update doc
    eb1ec9a1 [sychen] redact kv
    fbcba2dd [sychen] Merge branch 'master' into KYUUBI-2591
    346e21b1 [sychen] kyuubi.server.redaction.regex
    
    Authored-by: sychen <[email protected]>
    Signed-off-by: ulysses-you <[email protected]>
---
 docs/deployment/settings.md                        |  1 +
 .../src/main/scala/org/apache/kyuubi/Utils.scala   | 50 ++++++++++++++++++++++
 .../org/apache/kyuubi/config/ConfigBuilder.scala   | 14 ++++++
 .../org/apache/kyuubi/config/KyuubiConf.scala      |  9 ++++
 .../test/scala/org/apache/kyuubi/UtilsSuite.scala  | 50 ++++++++++++++++++++++
 .../scala/org/apache/kyuubi/engine/EngineRef.scala |  5 ++-
 .../org/apache/kyuubi/engine/ProcBuilder.scala     |  2 +-
 .../kyuubi/engine/hive/HiveProcessBuilder.scala    |  2 +-
 .../kyuubi/engine/spark/SparkProcessBuilder.scala  |  2 +-
 .../kyuubi/engine/trino/TrinoProcessBuilder.scala  |  2 +-
 10 files changed, 131 insertions(+), 6 deletions(-)

diff --git a/docs/deployment/settings.md b/docs/deployment/settings.md
index 31533e681..89aff1849 100644
--- a/docs/deployment/settings.md
+++ b/docs/deployment/settings.md
@@ -350,6 +350,7 @@ Key | Default | Meaning | Type | Since
 <code>kyuubi.server.limit.connections.per.user</code>|<div style='width: 
65pt;word-wrap: break-word;white-space: normal'>&lt;undefined&gt;</div>|<div 
style='width: 170pt;word-wrap: break-word;white-space: normal'>Maximum kyuubi 
server connections per user. Any user exceeding this limit will not be allowed 
to connect.</div>|<div style='width: 30pt'>int</div>|<div style='width: 
20pt'>1.6.0</div>
 <code>kyuubi.server.limit.connections.per.user.ipaddress</code>|<div 
style='width: 65pt;word-wrap: break-word;white-space: 
normal'>&lt;undefined&gt;</div>|<div style='width: 170pt;word-wrap: 
break-word;white-space: normal'>Maximum kyuubi server connections per 
user:ipaddress combination. Any user-ipaddress exceeding this limit will not be 
allowed to connect.</div>|<div style='width: 30pt'>int</div>|<div style='width: 
20pt'>1.6.0</div>
 <code>kyuubi.server.name</code>|<div style='width: 65pt;word-wrap: 
break-word;white-space: normal'>&lt;undefined&gt;</div>|<div style='width: 
170pt;word-wrap: break-word;white-space: normal'>The name of Kyuubi 
Server.</div>|<div style='width: 30pt'>string</div>|<div style='width: 
20pt'>1.5.0</div>
+<code>kyuubi.server.redaction.regex</code>|<div style='width: 65pt;word-wrap: 
break-word;white-space: normal'>&lt;undefined&gt;</div>|<div style='width: 
170pt;word-wrap: break-word;white-space: normal'>Regex to decide which Kyuubi 
contain sensitive information. When this regex matches a property key or value, 
the value is redacted from the various logs.</div>|<div style='width: 
30pt'></div>|<div style='width: 20pt'>1.6.0</div>
 
 
 ### Session
diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/Utils.scala 
b/kyuubi-common/src/main/scala/org/apache/kyuubi/Utils.scala
index e97cece85..1789d4fe9 100644
--- a/kyuubi-common/src/main/scala/org/apache/kyuubi/Utils.scala
+++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/Utils.scala
@@ -25,6 +25,7 @@ import java.util.{Properties, TimeZone, UUID}
 
 import scala.collection.JavaConverters._
 import scala.util.control.NonFatal
+import scala.util.matching.Regex
 
 import org.apache.commons.lang3.SystemUtils
 import org.apache.commons.lang3.time.DateFormatUtils
@@ -288,4 +289,53 @@ object Utils extends Logging {
       }
     }
   }
+
+  val REDACTION_REPLACEMENT_TEXT = "*********(redacted)"
+
+  private val PATTERN_FOR_KEY_VALUE_ARG = "(.+?)=(.+)".r
+
+  def redactCommandLineArgs(conf: KyuubiConf, commands: Array[String]): 
Array[String] = {
+    val redactionPattern = conf.get(SERVER_SECRET_REDACTION_PATTERN)
+    var nextKV = false
+    commands.map {
+      case PATTERN_FOR_KEY_VALUE_ARG(key, value) if nextKV =>
+        val (_, newValue) = redact(redactionPattern, Seq((key, value))).head
+        nextKV = false
+        s"$key=$newValue"
+
+      case cmd if cmd == "--conf" =>
+        nextKV = true
+        cmd
+
+      case cmd =>
+        cmd
+    }
+  }
+
+  /**
+   * Redact the sensitive values in the given map. If a map key matches the 
redaction pattern then
+   * its value is replaced with a dummy text.
+   */
+  def redact[K, V](regex: Option[Regex], kvs: Seq[(K, V)]): Seq[(K, V)] = {
+    regex match {
+      case None => kvs
+      case Some(r) => redact(r, kvs)
+    }
+  }
+
+  private def redact[K, V](redactionPattern: Regex, kvs: Seq[(K, V)]): Seq[(K, 
V)] = {
+    kvs.map {
+      case (key: String, value: String) =>
+        redactionPattern.findFirstIn(key)
+          .orElse(redactionPattern.findFirstIn(value))
+          .map { _ => (key, REDACTION_REPLACEMENT_TEXT) }
+          .getOrElse((key, value))
+      case (key, value: String) =>
+        redactionPattern.findFirstIn(value)
+          .map { _ => (key, REDACTION_REPLACEMENT_TEXT) }
+          .getOrElse((key, value))
+      case (key, value) =>
+        (key, value)
+    }.asInstanceOf[Seq[(K, V)]]
+  }
 }
diff --git 
a/kyuubi-common/src/main/scala/org/apache/kyuubi/config/ConfigBuilder.scala 
b/kyuubi-common/src/main/scala/org/apache/kyuubi/config/ConfigBuilder.scala
index 8863aabc3..d834de44a 100644
--- a/kyuubi-common/src/main/scala/org/apache/kyuubi/config/ConfigBuilder.scala
+++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/config/ConfigBuilder.scala
@@ -18,8 +18,10 @@
 package org.apache.kyuubi.config
 
 import java.time.Duration
+import java.util.regex.PatternSyntaxException
 
 import scala.util.{Failure, Success, Try}
+import scala.util.matching.Regex
 
 private[kyuubi] case class ConfigBuilder(key: String) {
 
@@ -119,6 +121,18 @@ private[kyuubi] case class ConfigBuilder(key: String) {
     _onCreate.foreach(_(entry))
     entry
   }
+
+  def regexConf: TypedConfigBuilder[Regex] = {
+    def regexFromString(str: String, key: String): Regex = {
+      try str.r
+      catch {
+        case e: PatternSyntaxException =>
+          throw new IllegalArgumentException(s"$key should be a regex, but was 
$str", e)
+      }
+    }
+
+    new TypedConfigBuilder(this, regexFromString(_, this.key), _.toString)
+  }
 }
 
 private[kyuubi] case class TypedConfigBuilder[T](
diff --git 
a/kyuubi-common/src/main/scala/org/apache/kyuubi/config/KyuubiConf.scala 
b/kyuubi-common/src/main/scala/org/apache/kyuubi/config/KyuubiConf.scala
index 49c6a649f..1aea159db 100644
--- a/kyuubi-common/src/main/scala/org/apache/kyuubi/config/KyuubiConf.scala
+++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/config/KyuubiConf.scala
@@ -23,6 +23,7 @@ import java.util.concurrent.ConcurrentHashMap
 import java.util.regex.Pattern
 
 import scala.collection.JavaConverters._
+import scala.util.matching.Regex
 
 import org.apache.kyuubi.{Logging, Utils}
 import org.apache.kyuubi.config.KyuubiConf._
@@ -1490,4 +1491,12 @@ object KyuubiConf {
       .version("1.6.0")
       .booleanConf
       .createWithDefault(false)
+
+  val SERVER_SECRET_REDACTION_PATTERN: OptionalConfigEntry[Regex] =
+    buildConf("kyuubi.server.redaction.regex")
+      .doc("Regex to decide which Kyuubi contain sensitive information. When 
this regex matches " +
+        "a property key or value, the value is redacted from the various 
logs.")
+      .version("1.6.0")
+      .regexConf
+      .createOptional
 }
diff --git a/kyuubi-common/src/test/scala/org/apache/kyuubi/UtilsSuite.scala 
b/kyuubi-common/src/test/scala/org/apache/kyuubi/UtilsSuite.scala
index 3b50b3066..4c667447c 100644
--- a/kyuubi-common/src/test/scala/org/apache/kyuubi/UtilsSuite.scala
+++ b/kyuubi-common/src/test/scala/org/apache/kyuubi/UtilsSuite.scala
@@ -23,9 +23,12 @@ import java.nio.file.{Files, Paths}
 import java.security.PrivilegedExceptionAction
 import java.util.Properties
 
+import scala.collection.mutable.ArrayBuffer
+
 import org.apache.hadoop.security.UserGroupInformation
 
 import org.apache.kyuubi.config.KyuubiConf
+import org.apache.kyuubi.config.KyuubiConf.SERVER_SECRET_REDACTION_PATTERN
 
 class UtilsSuite extends KyuubiFunSuite {
 
@@ -160,4 +163,51 @@ class UtilsSuite extends KyuubiFunSuite {
     val exception2 = 
intercept[IllegalArgumentException](Utils.fromCommandLineArgs(args2, conf))
     assert(exception2.getMessage.contains("Illegal argument: a"))
   }
+
+  test("redact sensitive information in command line args") {
+    val conf = new KyuubiConf()
+    conf.set(SERVER_SECRET_REDACTION_PATTERN, "(?i)secret|password".r)
+
+    val buffer = new ArrayBuffer[String]()
+    buffer += "main"
+    buffer += "--conf"
+    buffer += "kyuubi.my.password=sensitive_value"
+    buffer += "--conf"
+    buffer += "kyuubi.regular.property1=regular_value"
+    buffer += "--conf"
+    buffer += "kyuubi.my.secret=sensitive_value"
+    buffer += "--conf"
+    buffer += "kyuubi.regular.property2=regular_value"
+
+    val commands = buffer.toArray
+
+    // Redact sensitive information
+    val redactedCmdArgs = Utils.redactCommandLineArgs(conf, commands)
+
+    val expectBuffer = new ArrayBuffer[String]()
+    expectBuffer += "main"
+    expectBuffer += "--conf"
+    expectBuffer += "kyuubi.my.password=" + Utils.REDACTION_REPLACEMENT_TEXT
+    expectBuffer += "--conf"
+    expectBuffer += "kyuubi.regular.property1=regular_value"
+    expectBuffer += "--conf"
+    expectBuffer += "kyuubi.my.secret=" + Utils.REDACTION_REPLACEMENT_TEXT
+    expectBuffer += "--conf"
+    expectBuffer += "kyuubi.regular.property2=regular_value"
+
+    assert(expectBuffer.toArray === redactedCmdArgs)
+  }
+
+  test("redact sensitive information") {
+    val secretKeys = Some("my.password".r)
+    assert(Utils.redact(secretKeys, Seq(("kyuubi.my.password", "12345"))) ===
+      Seq(("kyuubi.my.password", Utils.REDACTION_REPLACEMENT_TEXT)))
+    assert(Utils.redact(secretKeys, Seq(("anything", 
"kyuubi.my.password=12345"))) ===
+      Seq(("anything", Utils.REDACTION_REPLACEMENT_TEXT)))
+    assert(Utils.redact(secretKeys, Seq((999, "kyuubi.my.password=12345"))) ===
+      Seq((999, Utils.REDACTION_REPLACEMENT_TEXT)))
+    // Do not redact when value type is not string
+    assert(Utils.redact(secretKeys, Seq(("my.password", 12345))) ===
+      Seq(("my.password", 12345)))
+  }
 }
diff --git 
a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/EngineRef.scala 
b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/EngineRef.scala
index 5db3b63d7..14235fc53 100644
--- a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/EngineRef.scala
+++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/EngineRef.scala
@@ -185,7 +185,8 @@ private[kyuubi] class EngineRef(
 
     MetricsSystem.tracing(_.incCount(ENGINE_TOTAL))
     try {
-      info(s"Launching engine:\n$builder")
+      val redactedCmd = builder.toString
+      info(s"Launching engine:\n$redactedCmd")
       val process = builder.start
       var exitValue: Option[Int] = None
       while (engineRef.isEmpty) {
@@ -205,7 +206,7 @@ private[kyuubi] class EngineRef(
           process.destroyForcibly()
           MetricsSystem.tracing(_.incCount(MetricRegistry.name(ENGINE_TIMEOUT, 
appUser)))
           throw KyuubiSQLException(
-            s"Timeout($timeout ms) to launched $engineType engine with 
$builder. $killMessage",
+            s"Timeout($timeout ms) to launched $engineType engine with 
$redactedCmd. $killMessage",
             builder.getError)
         }
         engineRef = discoveryClient.getEngineByRefId(engineSpace, engineRefId)
diff --git 
a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/ProcBuilder.scala 
b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/ProcBuilder.scala
index 53dd7fe0f..d4f6fd618 100644
--- a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/ProcBuilder.scala
+++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/ProcBuilder.scala
@@ -270,7 +270,7 @@ trait ProcBuilder {
     if (commands == null) {
       super.toString()
     } else {
-      commands.map {
+      Utils.redactCommandLineArgs(conf, commands).map {
         case arg if arg.startsWith("--") => s"\\\n\t$arg"
         case arg => arg
       }.mkString(" ")
diff --git 
a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/hive/HiveProcessBuilder.scala
 
b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/hive/HiveProcessBuilder.scala
index 8c490174e..34d077646 100644
--- 
a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/hive/HiveProcessBuilder.scala
+++ 
b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/hive/HiveProcessBuilder.scala
@@ -105,7 +105,7 @@ class HiveProcessBuilder(
     buffer.toArray
   }
 
-  override def toString: String = commands.mkString("\n")
+  override def toString: String = Utils.redactCommandLineArgs(conf, 
commands).mkString("\n")
 
   override def shortName: String = "hive"
 }
diff --git 
a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/spark/SparkProcessBuilder.scala
 
b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/spark/SparkProcessBuilder.scala
index 0dc9b09d5..7b9e81da6 100644
--- 
a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/spark/SparkProcessBuilder.scala
+++ 
b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/spark/SparkProcessBuilder.scala
@@ -55,7 +55,7 @@ class SparkProcessBuilder(
 
     var allConf = conf.getAll
 
-    // if enable sasl kerberos authentication for zookeeper, need to upload 
the server ketab file
+    // if enable sasl kerberos authentication for zookeeper, need to upload 
the server keytab file
     if 
(AuthTypes.withName(conf.get(HighAvailabilityConf.HA_ZK_ENGINE_AUTH_TYPE))
         == AuthTypes.KERBEROS) {
       allConf = allConf ++ zkAuthKeytabFileConf(allConf)
diff --git 
a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/trino/TrinoProcessBuilder.scala
 
b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/trino/TrinoProcessBuilder.scala
index de3cbc5ca..191e24ca2 100644
--- 
a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/trino/TrinoProcessBuilder.scala
+++ 
b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/trino/TrinoProcessBuilder.scala
@@ -98,5 +98,5 @@ class TrinoProcessBuilder(
 
   override def shortName: String = "trino"
 
-  override def toString: String = commands.mkString("\n")
+  override def toString: String = Utils.redactCommandLineArgs(conf, 
commands).mkString("\n")
 }

Reply via email to