This is an automated email from the ASF dual-hosted git repository.

ethanfeng pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/celeborn.git


The following commit(s) were added to refs/heads/main by this push:
     new f04ebccd4 [CELEBORN-1368] Log celeborn config for debugging purposes
f04ebccd4 is described below

commit f04ebccd4d34561335d7b54be189264e58d3f7e7
Author: Aravind Patnam <[email protected]>
AuthorDate: Mon Apr 8 15:11:35 2024 +0800

    [CELEBORN-1368] Log celeborn config for debugging purposes
    
    ### What changes were proposed in this pull request?
    Log celeborn config for debugging purposes.
    
    ### Why are the changes needed?
    Help with debugging
    
    ### Does this PR introduce _any_ user-facing change?
    No
    
    ### How was this patch tested?
    tested the patch internally.
    
    Closes #2442 from akpatnam25/CELEBORN-1368.
    
    Authored-by: Aravind Patnam <[email protected]>
    Signed-off-by: mingji <[email protected]>
---
 .../org/apache/celeborn/common/CelebornConf.scala  | 22 ++++++++++++
 .../org/apache/celeborn/common/util/Utils.scala    | 41 ++++++++++++++++++++++
 docs/configuration/master.md                       |  2 ++
 docs/configuration/worker.md                       |  2 ++
 .../celeborn/service/deploy/master/Master.scala    |  4 +++
 .../celeborn/server/common/HttpService.scala       |  6 ++--
 .../celeborn/service/deploy/worker/Worker.scala    |  5 +++
 7 files changed, 80 insertions(+), 2 deletions(-)

diff --git 
a/common/src/main/scala/org/apache/celeborn/common/CelebornConf.scala 
b/common/src/main/scala/org/apache/celeborn/common/CelebornConf.scala
index d8b482877..6ab67fb89 100644
--- a/common/src/main/scala/org/apache/celeborn/common/CelebornConf.scala
+++ b/common/src/main/scala/org/apache/celeborn/common/CelebornConf.scala
@@ -1281,6 +1281,10 @@ class CelebornConf(loadDefaults: Boolean) extends 
Cloneable with Logging with Se
   //                     Rack Resolver                   //
   // //////////////////////////////////////////////////////
   def rackResolverRefreshInterval = get(RACKRESOLVER_REFRESH_INTERVAL)
+
+  def logCelebornConfEnabled = get(LOG_CELEBORN_CONF_ENABLED)
+
+  def secretRedactionPattern = get(SECRET_REDACTION_PATTERN)
 }
 
 object CelebornConf extends Logging {
@@ -4924,4 +4928,22 @@ object CelebornConf extends Logging {
         s"Invalid maxEncryptedBlockSize, must be a position number upto 
${Int.MaxValue}")
       .createWithDefaultString("64k")
 
+  val SECRET_REDACTION_PATTERN =
+    buildConf("celeborn.redaction.regex")
+      .categories("master", "worker")
+      .doc("Regex to decide which Celeborn configuration properties and 
environment variables in " +
+        "master and worker environments contain sensitive information. When 
this regex matches " +
+        "a property key or value, the value is redacted from the logging.")
+      .version("0.5.0")
+      .regexConf
+      .createWithDefault("(?i)secret|password|token|access[.]key".r)
+
+  val LOG_CELEBORN_CONF_ENABLED: ConfigEntry[Boolean] =
+    buildConf("celeborn.logConf.enabled")
+      .categories("master", "worker")
+      .version("0.5.0")
+      .doc("When `true`, log the CelebornConf for debugging purposes.")
+      .booleanConf
+      .createWithDefault(false)
+
 }
diff --git a/common/src/main/scala/org/apache/celeborn/common/util/Utils.scala 
b/common/src/main/scala/org/apache/celeborn/common/util/Utils.scala
index bab5b5d8b..07e9fe30b 100644
--- a/common/src/main/scala/org/apache/celeborn/common/util/Utils.scala
+++ b/common/src/main/scala/org/apache/celeborn/common/util/Utils.scala
@@ -34,6 +34,7 @@ import scala.io.Source
 import scala.reflect.ClassTag
 import scala.util.{Random => ScalaRandom, Try}
 import scala.util.control.{ControlThrowable, NonFatal}
+import scala.util.matching.Regex
 
 import com.google.protobuf.{ByteString, GeneratedMessageV3}
 import io.netty.channel.unix.Errors.NativeIoException
@@ -1101,4 +1102,44 @@ object Utils extends Logging {
     val host = components.dropRight(portsNum).mkString(":")
     Array(host) ++ portsArr
   }
+
+  private val REDACTION_REPLACEMENT_TEXT = "*********(redacted)"
+
+  /**
+   * Redact the sensitive values in the given map. If a map key matches the 
redaction pattern then
+   * its value is replaced with a dummy text.
+   */
+  def redact(conf: CelebornConf, kvs: Seq[(String, String)]): Seq[(String, 
String)] = {
+    val redactionPattern = conf.secretRedactionPattern
+    redact(redactionPattern, kvs)
+  }
+
+  private def redact[K, V](redactionPattern: Regex, kvs: Seq[(K, V)]): Seq[(K, 
V)] = {
+    // If the sensitive information regex matches with either the key or the 
value, redact the value
+    // While the original intent was to only redact the value if the key 
matched with the regex,
+    // we've found that especially in verbose mode, the value of the property 
may contain sensitive
+    // information like so:
+    //
+    // celeborn.dynamicConfig.store.db.hikari.password=secret_password ...
+    //
+    // And, in such cases, simply searching for the sensitive information 
regex in the key name is
+    // not sufficient. The values themselves have to be searched as well and 
redacted if matched.
+    // This does mean we may be accounting more false positives - for example, 
if the value of an
+    // arbitrary property contained the term 'password', we may redact the 
value from the UI and
+    // logs. In order to work around it, user would have to make the 
celeborn.redaction.regex property
+    // more specific.
+    kvs.map {
+      case (key: String, value: String) =>
+        redactionPattern.findFirstIn(key)
+          .orElse(redactionPattern.findFirstIn(value))
+          .map { _ => (key, REDACTION_REPLACEMENT_TEXT) }
+          .getOrElse((key, value))
+      case (key, value: String) =>
+        redactionPattern.findFirstIn(value)
+          .map { _ => (key, REDACTION_REPLACEMENT_TEXT) }
+          .getOrElse((key, value))
+      case (key, value) =>
+        (key, value)
+    }.asInstanceOf[Seq[(K, V)]]
+  }
 }
diff --git a/docs/configuration/master.md b/docs/configuration/master.md
index e2217868b..582f3f8b3 100644
--- a/docs/configuration/master.md
+++ b/docs/configuration/master.md
@@ -33,6 +33,7 @@ license: |
 | celeborn.dynamicConfig.store.db.hikari.username |  | false | The username of 
db store backend. | 0.5.0 |  | 
 | celeborn.dynamicConfig.store.fs.path | &lt;undefined&gt; | false | The path 
of dynamic config file for fs store backend. The file format should be yaml. 
The default path is `${CELEBORN_CONF_DIR}/dynamicConfig.yaml`. | 0.5.0 |  | 
 | celeborn.internal.port.enabled | false | false | Whether to create a 
internal port on Masters/Workers for inter-Masters/Workers communication. This 
is beneficial when SASL authentication is enforced for all interactions between 
clients and Celeborn Services, but the services can exchange messages without 
being subject to SASL authentication. | 0.5.0 |  | 
+| celeborn.logConf.enabled | false | false | When `true`, log the CelebornConf 
for debugging purposes. | 0.5.0 |  | 
 | celeborn.master.estimatedPartitionSize.initialSize | 64mb | false | Initial 
partition size for estimation, it will change according to runtime stats. | 
0.3.0 | celeborn.shuffle.initialEstimatedPartitionSize | 
 | celeborn.master.estimatedPartitionSize.maxSize | &lt;undefined&gt; | false | 
Max partition size for estimation. Default value should be 
celeborn.worker.shuffle.partitionSplit.max * 2. | 0.4.1 |  | 
 | celeborn.master.estimatedPartitionSize.minSize | 8mb | false | Ignore 
partition size smaller than this configuration of partition size for 
estimation. | 0.3.0 | celeborn.shuffle.minPartitionSizeToEstimate | 
@@ -60,6 +61,7 @@ license: |
 | celeborn.master.userResourceConsumption.update.interval | 30s | false | Time 
length for a window about compute user resource consumption. | 0.3.0 |  | 
 | celeborn.master.workerUnavailableInfo.expireTimeout | 1800s | false | Worker 
unavailable info would be cleared when the retention period is expired | 0.3.1 
|  | 
 | celeborn.quota.enabled | true | false | When Master side sets to true, the 
master will enable to check the quota via QuotaManager. When Client side sets 
to true, LifecycleManager will request Master side to check whether the current 
user has enough quota before registration of shuffle. Fallback to the default 
shuffle service of Spark when Master side checks that there is no enough quota 
for current user. | 0.2.0 |  | 
+| celeborn.redaction.regex | (?i)secret|password|token|access[.]key | false | 
Regex to decide which Celeborn configuration properties and environment 
variables in master and worker environments contain sensitive information. When 
this regex matches a property key or value, the value is redacted from the 
logging. | 0.5.0 |  | 
 | celeborn.storage.availableTypes | HDD | false | Enabled storages. Available 
options: MEMORY,HDD,SSD,HDFS. Note: HDD and SSD would be treated as identical. 
| 0.3.0 | celeborn.storage.activeTypes | 
 | celeborn.storage.hdfs.dir | &lt;undefined&gt; | false | HDFS base directory 
for Celeborn to store shuffle data. | 0.2.0 |  | 
 | celeborn.storage.hdfs.kerberos.keytab | &lt;undefined&gt; | false | Kerberos 
keytab file path for HDFS storage connection. | 0.3.2 |  | 
diff --git a/docs/configuration/worker.md b/docs/configuration/worker.md
index 675ec7064..63bd69b7a 100644
--- a/docs/configuration/worker.md
+++ b/docs/configuration/worker.md
@@ -33,9 +33,11 @@ license: |
 | celeborn.dynamicConfig.store.db.hikari.username |  | false | The username of 
db store backend. | 0.5.0 |  | 
 | celeborn.dynamicConfig.store.fs.path | &lt;undefined&gt; | false | The path 
of dynamic config file for fs store backend. The file format should be yaml. 
The default path is `${CELEBORN_CONF_DIR}/dynamicConfig.yaml`. | 0.5.0 |  | 
 | celeborn.internal.port.enabled | false | false | Whether to create a 
internal port on Masters/Workers for inter-Masters/Workers communication. This 
is beneficial when SASL authentication is enforced for all interactions between 
clients and Celeborn Services, but the services can exchange messages without 
being subject to SASL authentication. | 0.5.0 |  | 
+| celeborn.logConf.enabled | false | false | When `true`, log the CelebornConf 
for debugging purposes. | 0.5.0 |  | 
 | celeborn.master.endpoints | &lt;localhost&gt;:9097 | false | Endpoints of 
master nodes for celeborn client to connect, allowed pattern is: 
`<host1>:<port1>[,<host2>:<port2>]*`, e.g. `clb1:9097,clb2:9098,clb3:9099`. If 
the port is omitted, 9097 will be used. | 0.2.0 |  | 
 | celeborn.master.estimatedPartitionSize.minSize | 8mb | false | Ignore 
partition size smaller than this configuration of partition size for 
estimation. | 0.3.0 | celeborn.shuffle.minPartitionSizeToEstimate | 
 | celeborn.master.internal.endpoints | &lt;localhost&gt;:8097 | false | 
Endpoints of master nodes just for celeborn workers to connect, allowed pattern 
is: `<host1>:<port1>[,<host2>:<port2>]*`, e.g. `clb1:8097,clb2:8097,clb3:8097`. 
If the port is omitted, 8097 will be used. | 0.5.0 |  | 
+| celeborn.redaction.regex | (?i)secret|password|token|access[.]key | false | 
Regex to decide which Celeborn configuration properties and environment 
variables in master and worker environments contain sensitive information. When 
this regex matches a property key or value, the value is redacted from the 
logging. | 0.5.0 |  | 
 | celeborn.shuffle.chunk.size | 8m | false | Max chunk size of reducer's 
merged shuffle data. For example, if a reducer's shuffle data is 128M and the 
data will need 16 fetch chunk requests to fetch. | 0.2.0 |  | 
 | celeborn.storage.availableTypes | HDD | false | Enabled storages. Available 
options: MEMORY,HDD,SSD,HDFS. Note: HDD and SSD would be treated as identical. 
| 0.3.0 | celeborn.storage.activeTypes | 
 | celeborn.storage.hdfs.dir | &lt;undefined&gt; | false | HDFS base directory 
for Celeborn to store shuffle data. | 0.2.0 |  | 
diff --git 
a/master/src/main/scala/org/apache/celeborn/service/deploy/master/Master.scala 
b/master/src/main/scala/org/apache/celeborn/service/deploy/master/Master.scala
index 736cc7e45..835ba96ab 100644
--- 
a/master/src/main/scala/org/apache/celeborn/service/deploy/master/Master.scala
+++ 
b/master/src/main/scala/org/apache/celeborn/service/deploy/master/Master.scala
@@ -82,6 +82,10 @@ private[celeborn] class Master(
   // Send ApplicationMeta to workers
   private var sendApplicationMetaExecutor: ExecutorService = _
 
+  if (conf.logCelebornConfEnabled) {
+    logInfo(getConf)
+  }
+
   override val rpcEnv: RpcEnv =
     if (!authEnabled) {
       RpcEnv.create(
diff --git 
a/service/src/main/scala/org/apache/celeborn/server/common/HttpService.scala 
b/service/src/main/scala/org/apache/celeborn/server/common/HttpService.scala
index 18e016187..540b3eadd 100644
--- a/service/src/main/scala/org/apache/celeborn/server/common/HttpService.scala
+++ b/service/src/main/scala/org/apache/celeborn/server/common/HttpService.scala
@@ -23,6 +23,7 @@ import scala.collection.JavaConverters._
 
 import org.apache.celeborn.common.CelebornConf
 import org.apache.celeborn.common.internal.Logging
+import org.apache.celeborn.common.util.Utils
 import org.apache.celeborn.server.common.http.HttpServer
 import org.apache.celeborn.server.common.http.api.ApiRootResource
 import org.apache.celeborn.server.common.service.config.ConfigLevel
@@ -35,8 +36,9 @@ abstract class HttpService extends Service with Logging {
     val sb = new StringBuilder
     sb.append("=========================== Configuration 
============================\n")
     if (conf.getAll.nonEmpty) {
-      val maxKeyLength = conf.getAll.toMap.keys.map(_.length).max
-      conf.getAll.sortBy(_._1).foreach { case (key, value) =>
+      val redactedConf = Utils.redact(conf, conf.getAll)
+      val maxKeyLength = redactedConf.toMap.keys.map(_.length).max
+      redactedConf.sortBy(_._1).foreach { case (key, value) =>
         sb.append(config(key, value, maxKeyLength))
       }
     }
diff --git 
a/worker/src/main/scala/org/apache/celeborn/service/deploy/worker/Worker.scala 
b/worker/src/main/scala/org/apache/celeborn/service/deploy/worker/Worker.scala
index 0da4b5513..e1d4bf812 100644
--- 
a/worker/src/main/scala/org/apache/celeborn/service/deploy/worker/Worker.scala
+++ 
b/worker/src/main/scala/org/apache/celeborn/service/deploy/worker/Worker.scala
@@ -86,6 +86,11 @@ private[celeborn] class Worker(
   val workerStatusManager = new WorkerStatusManager(conf)
   private val authEnabled = conf.authEnabled
   private val secretRegistry = new 
WorkerSecretRegistryImpl(conf.workerApplicationRegistryCacheSize)
+
+  if (conf.logCelebornConfEnabled) {
+    logInfo(getConf)
+  }
+
   val rpcEnv: RpcEnv =
     if (!authEnabled) {
       RpcEnv.create(

Reply via email to