This is an automated email from the ASF dual-hosted git repository.
yangzy pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 5f7bb925e [GLUTEN-6368] Redact sensitive configs when calling
`gluten::printConfig` (#6793)
5f7bb925e is described below
commit 5f7bb925e3f19f1cd13855b207842ec7cb239e4d
Author: Arnav Balyan <[email protected]>
AuthorDate: Mon Aug 19 11:38:51 2024 +0530
[GLUTEN-6368] Redact sensitive configs when calling `gluten::printConfig`
(#6793)
---
cpp/core/config/GlutenConfig.cc | 27 +++++++++++++++++++---
cpp/core/config/GlutenConfig.h | 3 +++
.../scala/org/apache/gluten/GlutenConfig.scala | 7 ++++--
3 files changed, 32 insertions(+), 5 deletions(-)
diff --git a/cpp/core/config/GlutenConfig.cc b/cpp/core/config/GlutenConfig.cc
index fa04ecfa4..bc6ad1cbe 100644
--- a/cpp/core/config/GlutenConfig.cc
+++ b/cpp/core/config/GlutenConfig.cc
@@ -15,13 +15,26 @@
* limitations under the License.
*/
+#include <boost/regex.hpp>
#include <jni.h>
-
+#include <optional>
#include "compute/ProtobufUtils.h"
#include "config.pb.h"
#include "jni/JniError.h"
+namespace {
+
+std::optional<boost::regex> getRedactionRegex(const
std::unordered_map<std::string, std::string>& conf) {
+ auto it = conf.find(gluten::kSparkRedactionRegex);
+ if (it != conf.end()) {
+ return boost::regex(it->second);
+ }
+ return std::nullopt;
+}
+} // namespace
+
namespace gluten {
+
std::unordered_map<std::string, std::string>
parseConfMap(JNIEnv* env, const uint8_t* planData, const int32_t
planDataLength) {
std::unordered_map<std::string, std::string> sparkConfs;
@@ -37,9 +50,17 @@ parseConfMap(JNIEnv* env, const uint8_t* planData, const
int32_t planDataLength)
std::string printConfig(const std::unordered_map<std::string, std::string>&
conf) {
std::ostringstream oss;
oss << std::endl;
- for (auto& [k, v] : conf) {
- oss << " [" << k << ", " << v << "]\n";
+
+ auto redactionRegex = getRedactionRegex(conf);
+
+ for (const auto& [k, v] : conf) {
+ if (redactionRegex && boost::regex_match(k, *redactionRegex)) {
+ oss << " [" << k << ", " << kSparkRedactionString << "]\n";
+ } else {
+ oss << " [" << k << ", " << v << "]\n";
+ }
}
return oss.str();
}
+
} // namespace gluten
diff --git a/cpp/core/config/GlutenConfig.h b/cpp/core/config/GlutenConfig.h
index 057d85930..31318ff0a 100644
--- a/cpp/core/config/GlutenConfig.h
+++ b/cpp/core/config/GlutenConfig.h
@@ -66,6 +66,9 @@ const std::string kShuffleCompressionCodecBackend =
"spark.gluten.sql.columnar.s
const std::string kQatBackendName = "qat";
const std::string kIaaBackendName = "iaa";
+const std::string kSparkRedactionRegex = "spark.redaction.regex";
+const std::string kSparkRedactionString = "*********(redacted)";
+
std::unordered_map<std::string, std::string>
parseConfMap(JNIEnv* env, const uint8_t* planData, const int32_t
planDataLength);
diff --git a/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala
b/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala
index 88491f6bf..fa78060da 100644
--- a/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala
+++ b/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala
@@ -538,6 +538,7 @@ object GlutenConfig {
val GLUTEN_ONHEAP_SIZE_KEY = "spark.executor.memory"
val GLUTEN_OFFHEAP_SIZE_KEY = "spark.memory.offHeap.size"
val GLUTEN_OFFHEAP_ENABLED = "spark.memory.offHeap.enabled"
+ val SPARK_REDACTION_REGEX = "spark.redaction.regex"
// For Soft Affinity Scheduling
// Enable Soft Affinity Scheduling, default value is false
@@ -677,7 +678,8 @@ object GlutenConfig {
// gcs config
SPARK_GCS_STORAGE_ROOT_URL,
SPARK_GCS_AUTH_TYPE,
- SPARK_GCS_AUTH_SERVICE_ACCOUNT_JSON_KEYFILE
+ SPARK_GCS_AUTH_SERVICE_ACCOUNT_JSON_KEYFILE,
+ SPARK_REDACTION_REGEX
)
nativeConfMap.putAll(conf.filter(e => keys.contains(e._1)).asJava)
@@ -764,7 +766,8 @@ object GlutenConfig {
GLUTEN_TASK_OFFHEAP_SIZE_IN_BYTES_KEY,
GLUTEN_OFFHEAP_ENABLED,
SESSION_LOCAL_TIMEZONE.key,
- DECIMAL_OPERATIONS_ALLOW_PREC_LOSS.key
+ DECIMAL_OPERATIONS_ALLOW_PREC_LOSS.key,
+ SPARK_REDACTION_REGEX
)
nativeConfMap.putAll(conf.filter(e => keys.contains(e._1)).asJava)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]