This is an automated email from the ASF dual-hosted git repository.

yangzy pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new 5f7bb925e [GLUTEN-6368] Redact sensitive configs when calling 
`gluten::printConfig` (#6793)
5f7bb925e is described below

commit 5f7bb925e3f19f1cd13855b207842ec7cb239e4d
Author: Arnav Balyan <[email protected]>
AuthorDate: Mon Aug 19 11:38:51 2024 +0530

    [GLUTEN-6368] Redact sensitive configs when calling `gluten::printConfig` 
(#6793)
---
 cpp/core/config/GlutenConfig.cc                    | 27 +++++++++++++++++++---
 cpp/core/config/GlutenConfig.h                     |  3 +++
 .../scala/org/apache/gluten/GlutenConfig.scala     |  7 ++++--
 3 files changed, 32 insertions(+), 5 deletions(-)

diff --git a/cpp/core/config/GlutenConfig.cc b/cpp/core/config/GlutenConfig.cc
index fa04ecfa4..bc6ad1cbe 100644
--- a/cpp/core/config/GlutenConfig.cc
+++ b/cpp/core/config/GlutenConfig.cc
@@ -15,13 +15,26 @@
  * limitations under the License.
  */
 
+#include <boost/regex.hpp>
 #include <jni.h>
-
+#include <optional>
 #include "compute/ProtobufUtils.h"
 #include "config.pb.h"
 #include "jni/JniError.h"
 
+namespace {
+
+std::optional<boost::regex> getRedactionRegex(const 
std::unordered_map<std::string, std::string>& conf) {
+  auto it = conf.find(gluten::kSparkRedactionRegex);
+  if (it != conf.end()) {
+    return boost::regex(it->second);
+  }
+  return std::nullopt;
+}
+} // namespace
+
 namespace gluten {
+
 std::unordered_map<std::string, std::string>
 parseConfMap(JNIEnv* env, const uint8_t* planData, const int32_t 
planDataLength) {
   std::unordered_map<std::string, std::string> sparkConfs;
@@ -37,9 +50,17 @@ parseConfMap(JNIEnv* env, const uint8_t* planData, const 
int32_t planDataLength)
 std::string printConfig(const std::unordered_map<std::string, std::string>& 
conf) {
   std::ostringstream oss;
   oss << std::endl;
-  for (auto& [k, v] : conf) {
-    oss << " [" << k << ", " << v << "]\n";
+
+  auto redactionRegex = getRedactionRegex(conf);
+
+  for (const auto& [k, v] : conf) {
+    if (redactionRegex && boost::regex_match(k, *redactionRegex)) {
+      oss << " [" << k << ", " << kSparkRedactionString << "]\n";
+    } else {
+      oss << " [" << k << ", " << v << "]\n";
+    }
   }
   return oss.str();
 }
+
 } // namespace gluten
diff --git a/cpp/core/config/GlutenConfig.h b/cpp/core/config/GlutenConfig.h
index 057d85930..31318ff0a 100644
--- a/cpp/core/config/GlutenConfig.h
+++ b/cpp/core/config/GlutenConfig.h
@@ -66,6 +66,9 @@ const std::string kShuffleCompressionCodecBackend = 
"spark.gluten.sql.columnar.s
 const std::string kQatBackendName = "qat";
 const std::string kIaaBackendName = "iaa";
 
+const std::string kSparkRedactionRegex = "spark.redaction.regex";
+const std::string kSparkRedactionString = "*********(redacted)";
+
 std::unordered_map<std::string, std::string>
 parseConfMap(JNIEnv* env, const uint8_t* planData, const int32_t 
planDataLength);
 
diff --git a/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala 
b/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala
index 88491f6bf..fa78060da 100644
--- a/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala
+++ b/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala
@@ -538,6 +538,7 @@ object GlutenConfig {
   val GLUTEN_ONHEAP_SIZE_KEY = "spark.executor.memory"
   val GLUTEN_OFFHEAP_SIZE_KEY = "spark.memory.offHeap.size"
   val GLUTEN_OFFHEAP_ENABLED = "spark.memory.offHeap.enabled"
+  val SPARK_REDACTION_REGEX = "spark.redaction.regex"
 
   // For Soft Affinity Scheduling
   // Enable Soft Affinity Scheduling, default value is false
@@ -677,7 +678,8 @@ object GlutenConfig {
       // gcs config
       SPARK_GCS_STORAGE_ROOT_URL,
       SPARK_GCS_AUTH_TYPE,
-      SPARK_GCS_AUTH_SERVICE_ACCOUNT_JSON_KEYFILE
+      SPARK_GCS_AUTH_SERVICE_ACCOUNT_JSON_KEYFILE,
+      SPARK_REDACTION_REGEX
     )
     nativeConfMap.putAll(conf.filter(e => keys.contains(e._1)).asJava)
 
@@ -764,7 +766,8 @@ object GlutenConfig {
       GLUTEN_TASK_OFFHEAP_SIZE_IN_BYTES_KEY,
       GLUTEN_OFFHEAP_ENABLED,
       SESSION_LOCAL_TIMEZONE.key,
-      DECIMAL_OPERATIONS_ALLOW_PREC_LOSS.key
+      DECIMAL_OPERATIONS_ALLOW_PREC_LOSS.key,
+      SPARK_REDACTION_REGEX
     )
     nativeConfMap.putAll(conf.filter(e => keys.contains(e._1)).asJava)
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to