This is an automated email from the ASF dual-hosted git repository.

dongjoon-hyun pushed a commit to branch branch-4.2
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-4.2 by this push:
     new 9f90cd4008f0 [SPARK-57098][4.2][UI] Worker UI JSON endpoint redaction
9f90cd4008f0 is described below

commit 9f90cd4008f0e6cf6e19c48b2eab0d4cd658bdd1
Author: Peter Toth <[email protected]>
AuthorDate: Wed May 27 13:44:12 2026 -0700

    [SPARK-57098][4.2][UI] Worker UI JSON endpoint redaction
    
    ### What changes were proposed in this pull request?
    
    The standalone Worker UI serves `GET /json/`, which returns 
`JsonProtocol.writeWorkerState(...)` and includes each executor's 
`ApplicationDescription.command` rendered as `Command.toString`. Since 
`Command` is a case class, that string contains the full `environment` map and 
`javaOpts` sequence.
    
    This PR redacts `Command.environment` via `Utils.redact(conf, ...)` and 
`Command.javaOpts` via `Utils.redactCommandLineArgs(conf, ...)` before calling 
`toString`, reusing the same redaction APIs already applied to the launch 
command in `ExecutorRunner` logging (`ExecutorRunner.scala:162-164`). The 
`command` field stays a string rendered from a redacted `Command.copy(...)`, so 
the JSON schema is unchanged.
    
    `ExecutorRunner.conf` is promoted to `val` so
    `JsonProtocol.writeExecutorRunner` can plumb the worker's `SparkConf` into 
`writeApplicationDescription` for `spark.redaction.regex` lookups.
    
    ### Why are the changes needed?
    
    `environment` and `javaOpts` routinely carry secrets: JDBC passwords, AWS 
credentials, SSL keystore passwords, Hadoop credential store passwords, 
`spark.executorEnv.*` values, etc. `ExecutorRunner` already redacts the same 
content when writing the launch command to logs, but `JsonProtocol` emits it 
unredacted over the Worker UI HTTP endpoint. The Worker UI listens on port 8081 
with no authentication by default, so any caller with network access to the 
worker can read the secrets with  [...]
    
    ### Does this PR introduce _any_ user-facing change?
    
    Yes. The `command` field returned by `GET /json/` on the standalone Worker 
UI now has secret-bearing values in `environment` and `javaOpts` replaced with 
`*********(redacted)` when they match `spark.redaction.regex` (the default 
pattern matches keys like `secret`, `password`, `token`, etc.). The JSON schema 
is unchanged -- `command` remains a single string in `Command.toString` format 
-- so existing tooling that parses this endpoint continues to work; only the 
sensitive values that we [...]
    
    ### How was this patch tested?
    
    - Added `SPARK-57098: secrets in executor command are redacted in worker 
JSON endpoint` to `JsonProtocolSuite`, covering both environment-variable and 
`-D` java-opt secret carriers, asserting that redacted values are scrubbed 
while non-sensitive values (`JAVA_HOME`, `-Xmx2g`) pass through.
    - `build/sbt 'core/testOnly org.apache.spark.deploy.JsonProtocolSuite'` -- 
11/11 tests pass.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    Generated-by: Claude Opus 4.7
    
    Closes #56147 from peter-toth/SPARK-57098-worker-ui-json-redaction-4.2.
    
    Authored-by: Peter Toth <[email protected]>
    Signed-off-by: Dongjoon Hyun <[email protected]>
---
 .../org/apache/spark/deploy/JsonProtocol.scala     | 16 +++++++---
 .../spark/deploy/worker/ExecutorRunner.scala       |  2 +-
 .../apache/spark/deploy/JsonProtocolSuite.scala    | 37 ++++++++++++++++++++--
 3 files changed, 48 insertions(+), 7 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/JsonProtocol.scala 
b/core/src/main/scala/org/apache/spark/deploy/JsonProtocol.scala
index 04302c77a398..2a3fd0d004e1 100644
--- a/core/src/main/scala/org/apache/spark/deploy/JsonProtocol.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/JsonProtocol.scala
@@ -20,10 +20,12 @@ package org.apache.spark.deploy
 import org.json4s.JsonAST._
 import org.json4s.JsonDSL._
 
+import org.apache.spark.SparkConf
 import org.apache.spark.deploy.DeployMessages.{MasterStateResponse, 
WorkerStateResponse}
 import org.apache.spark.deploy.master._
 import org.apache.spark.deploy.worker.ExecutorRunner
 import org.apache.spark.resource.{ResourceInformation, ResourceRequirement}
+import org.apache.spark.util.Utils
 
 private[deploy] object JsonProtocol {
 
@@ -123,10 +125,16 @@ private[deploy] object JsonProtocol {
    *         `memoryperexecutor` minimal memory in MB required to each executor
    *         `resourcesperexecutor` minimal resources required to each executor
    *         `user` name of the user who submitted the application
-   *         `command` the command string used to submit the application
+   *         `command` the command string used to submit the application, with 
secret-bearing
+   *         fields (`environment`, `javaOpts`) redacted using 
`spark.redaction.regex`
    * For compatibility also returns the deprecated `memoryperslave` & 
`resourcesperslave` fields.
    */
-  def writeApplicationDescription(obj: ApplicationDescription): JObject = {
+  def writeApplicationDescription(obj: ApplicationDescription, conf: 
SparkConf): JObject = {
+    val redactedEnvironment = Utils.redact(conf, 
obj.command.environment.toSeq).toMap
+    val redactedJavaOpts = Utils.redactCommandLineArgs(conf, 
obj.command.javaOpts)
+    val redactedCommand = obj.command.copy(
+      environment = redactedEnvironment,
+      javaOpts = redactedJavaOpts)
     ("name" -> obj.name) ~
     ("cores" -> obj.maxCores.getOrElse(0)) ~
     ("memoryperexecutor" -> obj.memoryPerExecutorMB) ~
@@ -134,7 +142,7 @@ private[deploy] object JsonProtocol {
     ("memoryperslave" -> obj.memoryPerExecutorMB) ~
     ("resourcesperslave" -> 
obj.resourceReqsPerExecutor.toList.map(writeResourceRequirement)) ~
     ("user" -> obj.user) ~
-    ("command" -> obj.command.toString)
+    ("command" -> redactedCommand.toString)
   }
 
   /**
@@ -154,7 +162,7 @@ private[deploy] object JsonProtocol {
     ("memory" -> obj.memory) ~
     ("resources" -> writeResourcesInfo(obj.resources)) ~
     ("appid" -> obj.appId) ~
-    ("appdesc" -> writeApplicationDescription(obj.appDesc))
+    ("appdesc" -> writeApplicationDescription(obj.appDesc, obj.conf))
   }
 
   /**
diff --git 
a/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala 
b/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala
index 8f0b684a93e8..5bfb486b0e60 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala
@@ -54,7 +54,7 @@ private[deploy] class ExecutorRunner(
     val sparkHome: File,
     val executorDir: File,
     val workerUrl: String,
-    conf: SparkConf,
+    val conf: SparkConf,
     val appLocalDirs: Seq[String],
     @volatile var state: ExecutorState.Value,
     val rpId: Int,
diff --git 
a/core/src/test/scala/org/apache/spark/deploy/JsonProtocolSuite.scala 
b/core/src/test/scala/org/apache/spark/deploy/JsonProtocolSuite.scala
index 518a8c8b3d05..6d2c663a2588 100644
--- a/core/src/test/scala/org/apache/spark/deploy/JsonProtocolSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/JsonProtocolSuite.scala
@@ -23,10 +23,11 @@ import com.fasterxml.jackson.core.JsonParseException
 import org.json4s._
 import org.json4s.jackson.JsonMethods
 
-import org.apache.spark.{JsonTestUtils, SparkFunSuite}
+import org.apache.spark.{JsonTestUtils, SparkConf, SparkFunSuite}
 import org.apache.spark.deploy.DeployMessages.{MasterStateResponse, 
WorkerStateResponse}
 import org.apache.spark.deploy.master.{ApplicationInfo, RecoveryState, 
WorkerInfo}
 import org.apache.spark.deploy.worker.ExecutorRunner
+import org.apache.spark.util.Utils
 
 class JsonProtocolSuite extends SparkFunSuite with JsonTestUtils {
 
@@ -45,7 +46,7 @@ class JsonProtocolSuite extends SparkFunSuite with 
JsonTestUtils {
   }
 
   test("writeApplicationDescription") {
-    val output = JsonProtocol.writeApplicationDescription(createAppDesc())
+    val output = JsonProtocol.writeApplicationDescription(createAppDesc(), new 
SparkConf())
     assertValidJson(output)
     assertValidDataInJson(output, 
JsonMethods.parse(JsonConstants.appDescJsonStr))
   }
@@ -105,6 +106,38 @@ class JsonProtocolSuite extends SparkFunSuite with 
JsonTestUtils {
     assertValidDataInJson(output, 
JsonMethods.parse(JsonConstants.workerStateJsonStr))
   }
 
+  test("SPARK-57098: secrets in executor command are redacted in worker JSON 
endpoint") {
+    val conf = new SparkConf()
+    val secretEnv = Map(
+      "HADOOP_CREDSTORE_PASSWORD" -> "topsecret",
+      "JAVA_HOME" -> "/usr/lib/jvm/default",
+      "AWS_SECRET_ACCESS_KEY" -> "aws-secret-value")
+    val secretJavaOpts = Seq(
+      "-Dspark.ssl.keyStorePassword=ssl-secret",
+      "-Dspark.executorEnv.PASSWORD=env-secret",
+      "-Xmx2g")
+    val cmd = new Command(
+      "mainClass", List("arg1"), secretEnv, Seq(), Seq(), secretJavaOpts)
+    val appDesc = new ApplicationDescription(
+      "name", Some(4), cmd, "appUiUrl", defaultResourceProfile)
+
+    val output = JsonProtocol.writeApplicationDescription(appDesc, conf)
+    val commandStr = (output \ "command") match {
+      case JString(s) => s
+      case other => fail(s"Expected JString for 'command', got: $other")
+    }
+
+    // Sensitive values are scrubbed.
+    assert(!commandStr.contains("topsecret"))
+    assert(!commandStr.contains("ssl-secret"))
+    assert(!commandStr.contains("env-secret"))
+    assert(!commandStr.contains("aws-secret-value"))
+    assert(commandStr.contains(Utils.REDACTION_REPLACEMENT_TEXT))
+    // Non-sensitive values pass through.
+    assert(commandStr.contains("/usr/lib/jvm/default"))
+    assert(commandStr.contains("-Xmx2g"))
+  }
+
   test("SPARK-46883: writeClusterUtilization") {
     val workers = Array(createWorkerInfo(), createWorkerInfo())
     val activeApps = Array(createAppInfo())


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to