(incubator-gluten) branch main updated: Add a RichSparkConf to simplify interoperations gluten config entries (#9914)

yao Wed, 11 Jun 2025 03:43:40 -0700

This is an automated email from the ASF dual-hosted git repository.

yao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git



The following commit(s) were added to refs/heads/main by this push:
     new bba3640fff Add a RichSparkConf to simplify interoperations gluten 
config entries (#9914)
bba3640fff is described below

commit bba3640fff39e8d9a0c15c0be6ec3a899d0ad6ae
Author: Kent Yao <[email protected]>
AuthorDate: Wed Jun 11 18:43:09 2025 +0800

    Add a RichSparkConf to simplify interoperations gluten config entries 
(#9914)
    
    * Revert "Revert "Add RichSparkConf to simplify the interoperations with 
gluten config entries (#9876)" (#9905)"
    
    This reverts commit 9ac7bf57bb3b4f55e30c988de237818997f1a25c.
    
    * Add a RichSparkConf to simplify interoperations gluten config entries
    
    * Add a RichSparkConf to simplify interoperations gluten config entries
    
    * Add a RichSparkConf to simplify interoperations gluten config entries
    
    * Add a RichSparkConf to simplify interoperations gluten config entries
    
    * Add a RichSparkConf to simplify interoperations gluten config entries
    
    * fix test
    
    * Addr comments
---
 .../backendsapi/clickhouse/CHListenerApi.scala     | 15 +++---
 .../backendsapi/clickhouse/CHTransformerApi.scala  |  9 ++--
 .../backendsapi/velox/VeloxListenerApi.scala       | 29 +++++------
 .../apache/gluten/utils/SharedLibraryLoader.scala  |  9 ++--
 .../scala/org/apache/gluten/GlutenPlugin.scala     | 56 +++++++--------------
 .../spark/sql/internal/SparkConfigUtil.scala       | 57 +++++++++++++++++++++-
 .../gluten/config/SparkConfigUtilSuite.scala}      | 19 ++++++--
 .../org/apache/gluten/expression/UDFMappings.scala |  7 +--
 8 files changed, 119 insertions(+), 82 deletions(-)

diff --git 
a/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHListenerApi.scala
 
b/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHListenerApi.scala
index e22f1d75f4..5cddfb6e8a 100644
--- 
a/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHListenerApi.scala
+++ 
b/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHListenerApi.scala
@@ -30,10 +30,10 @@ import org.apache.spark.{SPARK_VERSION, SparkConf, 
SparkContext}
 import org.apache.spark.api.plugin.PluginContext
 import org.apache.spark.internal.Logging
 import org.apache.spark.listener.CHGlutenSQLAppStatusListener
-import org.apache.spark.network.util.JavaUtils
 import org.apache.spark.rpc.{GlutenDriverEndpoint, GlutenExecutorEndpoint}
 import org.apache.spark.sql.execution.datasources.GlutenWriterColumnarRules
 import org.apache.spark.sql.execution.datasources.v1._
+import org.apache.spark.sql.internal.SparkConfigUtil._
 import org.apache.spark.sql.utils.ExpressionUtil
 import org.apache.spark.util.{SparkDirectoryUtil, SparkShutdownManagerUtil}
 
@@ -49,7 +49,7 @@ class CHListenerApi extends ListenerApi with Logging {
     initialize(pc.conf, isDriver = true)
 
     val expressionExtensionTransformer = 
ExpressionUtil.extendedExpressionTransformer(
-      pc.conf.get(GlutenConfig.EXTENDED_EXPRESSION_TRAN_CONF.key, "")
+      pc.conf.get(GlutenConfig.EXTENDED_EXPRESSION_TRAN_CONF)
     )
     if (expressionExtensionTransformer != null) {
       
ExpressionExtensionTrait.registerExpressionExtension(expressionExtensionTransformer)
@@ -75,8 +75,7 @@ class CHListenerApi extends ListenerApi with Logging {
     CHBatchType.ensureRegistered()
     CHCarrierRowType.ensureRegistered()
     SparkDirectoryUtil.init(conf)
-    val libPath =
-      conf.get(GlutenConfig.GLUTEN_LIB_PATH.key, 
GlutenConfig.GLUTEN_LIB_PATH.defaultValueString)
+    val libPath = conf.get(GlutenConfig.GLUTEN_LIB_PATH)
     if (StringUtils.isBlank(libPath)) {
       throw new IllegalArgumentException(
         "Please set spark.gluten.sql.columnar.libpath to enable clickhouse 
backend")
@@ -102,14 +101,14 @@ class CHListenerApi extends ListenerApi with Logging {
     }
 
     // add memory limit for external sort
-    if (conf.getLong(RuntimeSettings.MAX_BYTES_BEFORE_EXTERNAL_SORT.key, -1) < 
0) {
-      if (conf.getBoolean("spark.memory.offHeap.enabled", defaultValue = 
false)) {
-        val memSize = 
JavaUtils.byteStringAsBytes(conf.get("spark.memory.offHeap.size"))
+    if (conf.get(RuntimeSettings.MAX_BYTES_BEFORE_EXTERNAL_SORT) <= 0) {
+      if (conf.getBoolean(GlutenConfig.SPARK_OFFHEAP_ENABLED, defaultValue = 
false)) {
+        val memSize = conf.getSizeAsBytes(GlutenConfig.SPARK_OFFHEAP_SIZE_KEY, 
0)
         if (memSize > 0L) {
           val cores = conf.getInt("spark.executor.cores", 1).toLong
           val sortMemLimit = ((memSize / cores) * 0.8).toLong
           logDebug(s"max memory for sorting: $sortMemLimit")
-          conf.set(RuntimeSettings.MAX_BYTES_BEFORE_EXTERNAL_SORT.key, 
sortMemLimit.toString)
+          conf.set(RuntimeSettings.MAX_BYTES_BEFORE_EXTERNAL_SORT, 
sortMemLimit)
         }
       }
     }
diff --git 
a/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHTransformerApi.scala
 
b/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHTransformerApi.scala
index 906d6d9ef7..e0d4b9f83e 100644
--- 
a/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHTransformerApi.scala
+++ 
b/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHTransformerApi.scala
@@ -17,6 +17,7 @@
 package org.apache.gluten.backendsapi.clickhouse
 
 import org.apache.gluten.backendsapi.TransformerApi
+import org.apache.gluten.config.GlutenConfig
 import org.apache.gluten.execution.{CHHashAggregateExecTransformer, 
WriteFilesExecTransformer}
 import org.apache.gluten.expression.ConverterUtils
 import org.apache.gluten.substrait.SubstraitContext
@@ -37,6 +38,7 @@ import 
org.apache.spark.sql.execution.datasources.orc.OrcFileFormat
 import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
 import org.apache.spark.sql.execution.datasources.v1.Write
 import 
org.apache.spark.sql.execution.datasources.v2.clickhouse.source.DeltaMergeTreeFileFormat
+import org.apache.spark.sql.internal.SparkConfigUtil
 import org.apache.spark.sql.sources.DataSourceRegister
 import org.apache.spark.sql.types._
 import org.apache.spark.util.collection.BitSet
@@ -96,11 +98,10 @@ class CHTransformerApi extends TransformerApi with Logging {
       backendPrefix: String): Unit = {
 
     require(backendPrefix == CHConfig.CONF_PREFIX)
-    if (nativeConfMap.getOrDefault("spark.memory.offHeap.enabled", 
"false").toBoolean) {
-      val offHeapSize =
-        
nativeConfMap.getOrDefault("spark.gluten.memory.offHeap.size.in.bytes", 
"0").toLong
+    if (nativeConfMap.getOrDefault(GlutenConfig.SPARK_OFFHEAP_ENABLED, 
"false").toBoolean) {
+      val offHeapSize: Long =
+        SparkConfigUtil.get(nativeConfMap, 
GlutenConfig.COLUMNAR_OFFHEAP_SIZE_IN_BYTES)
       if (offHeapSize > 0) {
-
         // Only set default max_bytes_before_external_group_by for CH when it 
is not set explicitly.
         val groupBySpillKey = 
CHConfig.runtimeSettings("max_bytes_before_external_group_by")
         if (!nativeConfMap.containsKey(groupBySpillKey)) {
diff --git 
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxListenerApi.scala
 
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxListenerApi.scala
index b9e24e5b9c..44bfd0aab3 100644
--- 
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxListenerApi.scala
+++ 
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxListenerApi.scala
@@ -43,6 +43,7 @@ import 
org.apache.spark.sql.execution.datasources.GlutenWriterColumnarRules
 import 
org.apache.spark.sql.execution.datasources.velox.{VeloxParquetWriterInjects, 
VeloxRowSplitter}
 import org.apache.spark.sql.expression.UDFResolver
 import org.apache.spark.sql.internal.{GlutenConfigUtil, StaticSQLConf}
+import org.apache.spark.sql.internal.SparkConfigUtil._
 import org.apache.spark.util.{SparkDirectoryUtil, SparkResourceUtil, 
SparkShutdownManagerUtil}
 
 import org.apache.commons.lang3.StringUtils
@@ -59,18 +60,15 @@ class VeloxListenerApi extends ListenerApi with Logging {
     // When the Velox cache is enabled, the Velox file handle cache should 
also be enabled.
     // Otherwise, a 'reference id not found' error may occur.
     if (
-      conf.getBoolean(COLUMNAR_VELOX_CACHE_ENABLED.key, false) &&
-      !conf.getBoolean(COLUMNAR_VELOX_FILE_HANDLE_CACHE_ENABLED.key, false)
+      conf.get(COLUMNAR_VELOX_CACHE_ENABLED) &&
+      !conf.get(COLUMNAR_VELOX_FILE_HANDLE_CACHE_ENABLED)
     ) {
       throw new IllegalArgumentException(
         s"${COLUMNAR_VELOX_CACHE_ENABLED.key} and " +
           s"${COLUMNAR_VELOX_FILE_HANDLE_CACHE_ENABLED.key} should be enabled 
together.")
     }
 
-    if (
-      conf.getBoolean(COLUMNAR_VELOX_CACHE_ENABLED.key, false) &&
-      conf.getSizeAsBytes(LOAD_QUANTUM.key, LOAD_QUANTUM.defaultValueString) > 
8 * 1024 * 1024
-    ) {
+    if (conf.get(COLUMNAR_VELOX_CACHE_ENABLED) && conf.get(LOAD_QUANTUM) > 8 * 
1024 * 1024) {
       throw new IllegalArgumentException(
         s"Velox currently only support up to 8MB load quantum size " +
           s"on SSD cache enabled by ${COLUMNAR_VELOX_CACHE_ENABLED.key}, " +
@@ -100,13 +98,11 @@ class VeloxListenerApi extends ListenerApi with Logging {
           s" the recommended size 
${ByteUnit.BYTE.toMiB(desiredOverheadSize)}MiB." +
           s" This may cause OOM.")
     }
-    conf.set(GlutenConfig.COLUMNAR_OVERHEAD_SIZE_IN_BYTES.key, 
overheadSize.toString)
+    conf.set(GlutenConfig.COLUMNAR_OVERHEAD_SIZE_IN_BYTES, overheadSize)
 
     // Sql table cache serializer.
-    if (conf.getBoolean(GlutenConfig.COLUMNAR_TABLE_CACHE_ENABLED.key, 
defaultValue = false)) {
-      conf.set(
-        StaticSQLConf.SPARK_CACHE_SERIALIZER.key,
-        classOf[ColumnarCachedBatchSerializer].getName)
+    if (conf.get(GlutenConfig.COLUMNAR_TABLE_CACHE_ENABLED)) {
+      conf.set(StaticSQLConf.SPARK_CACHE_SERIALIZER, 
classOf[ColumnarCachedBatchSerializer].getName)
     }
 
     // Static initializers for driver.
@@ -154,8 +150,8 @@ class VeloxListenerApi extends ListenerApi with Logging {
   private def initialize(conf: SparkConf, isDriver: Boolean): Unit = {
     // Sets this configuration only once, since not undoable.
     // DebugInstance should be created first.
-    if (conf.getBoolean(GlutenConfig.DEBUG_KEEP_JNI_WORKSPACE.key, 
defaultValue = false)) {
-      val debugDir = conf.get(GlutenConfig.DEBUG_KEEP_JNI_WORKSPACE_DIR.key)
+    if (conf.get(GlutenConfig.DEBUG_KEEP_JNI_WORKSPACE)) {
+      val debugDir = conf.get(GlutenConfig.DEBUG_KEEP_JNI_WORKSPACE_DIR)
       JniWorkspace.enableDebug(debugDir)
     } else {
       JniWorkspace.initializeDefault(
@@ -202,11 +198,11 @@ class VeloxListenerApi extends ListenerApi with Logging {
     SharedLibraryLoader.load(conf, loader)
 
     // Load backend libraries.
-    val libPath = conf.get(GlutenConfig.GLUTEN_LIB_PATH.key, StringUtils.EMPTY)
+    val libPath = conf.get(GlutenConfig.GLUTEN_LIB_PATH)
     if (StringUtils.isNotBlank(libPath)) { // Path based load. Ignore all 
other loadees.
       JniLibLoader.loadFromPath(libPath)
     } else {
-      val baseLibName = conf.get(GlutenConfig.GLUTEN_LIB_NAME.key, "gluten")
+      val baseLibName = conf.get(GlutenConfig.GLUTEN_LIB_NAME)
       loader.load(s"$platformLibDir/${System.mapLibraryName(baseLibName)}")
       
loader.load(s"$platformLibDir/${System.mapLibraryName(VeloxBackend.BACKEND_NAME)}")
     }
@@ -224,8 +220,7 @@ class VeloxListenerApi extends ListenerApi with Logging {
   }
 
   private def addIfNeedMemoryDumpShutdownHook(conf: SparkConf): Unit = {
-    val memoryDumpOnExit =
-      conf.get(MEMORY_DUMP_ON_EXIT.key, 
MEMORY_DUMP_ON_EXIT.defaultValueString).toBoolean
+    val memoryDumpOnExit = conf.get(MEMORY_DUMP_ON_EXIT)
     if (memoryDumpOnExit) {
       SparkShutdownManagerUtil.addHook(
         () => {
diff --git 
a/backends-velox/src/main/scala/org/apache/gluten/utils/SharedLibraryLoader.scala
 
b/backends-velox/src/main/scala/org/apache/gluten/utils/SharedLibraryLoader.scala
index 3632202de9..109f3f014a 100755
--- 
a/backends-velox/src/main/scala/org/apache/gluten/utils/SharedLibraryLoader.scala
+++ 
b/backends-velox/src/main/scala/org/apache/gluten/utils/SharedLibraryLoader.scala
@@ -21,6 +21,7 @@ import org.apache.gluten.exception.GlutenException
 import org.apache.gluten.jni.JniLibLoader
 
 import org.apache.spark.SparkConf
+import org.apache.spark.sql.internal.SparkConfigUtil._
 
 import scala.sys.process._
 
@@ -30,9 +31,7 @@ trait SharedLibraryLoader {
 
 object SharedLibraryLoader {
   def load(conf: SparkConf, jni: JniLibLoader): Unit = {
-    val shouldLoad = conf.getBoolean(
-      GlutenConfig.GLUTEN_LOAD_LIB_FROM_JAR.key,
-      GlutenConfig.GLUTEN_LOAD_LIB_FROM_JAR.defaultValue.get)
+    val shouldLoad = conf.get(GlutenConfig.GLUTEN_LOAD_LIB_FROM_JAR)
     if (!shouldLoad) {
       return
     }
@@ -54,9 +53,9 @@ object SharedLibraryLoader {
   }
 
   private def find(conf: SparkConf): SharedLibraryLoader = {
-    val systemName = conf.getOption(GlutenConfig.GLUTEN_LOAD_LIB_OS.key)
+    val systemName = conf.get(GlutenConfig.GLUTEN_LOAD_LIB_OS)
     val loader = if (systemName.isDefined) {
-      val systemVersion = 
conf.getOption(GlutenConfig.GLUTEN_LOAD_LIB_OS_VERSION.key)
+      val systemVersion = conf.get(GlutenConfig.GLUTEN_LOAD_LIB_OS_VERSION)
       if (systemVersion.isEmpty) {
         throw new GlutenException(
           s"${GlutenConfig.GLUTEN_LOAD_LIB_OS_VERSION.key} must be specified 
when specifies the " +
diff --git a/gluten-core/src/main/scala/org/apache/gluten/GlutenPlugin.scala 
b/gluten-core/src/main/scala/org/apache/gluten/GlutenPlugin.scala
index 611e4e398c..0208d6d0db 100644
--- a/gluten-core/src/main/scala/org/apache/gluten/GlutenPlugin.scala
+++ b/gluten-core/src/main/scala/org/apache/gluten/GlutenPlugin.scala
@@ -33,7 +33,8 @@ import org.apache.spark.network.util.JavaUtils
 import org.apache.spark.softaffinity.SoftAffinityListener
 import org.apache.spark.sql.execution.adaptive.GlutenCostEvaluator
 import org.apache.spark.sql.execution.ui.{GlutenSQLAppStatusListener, 
GlutenUIUtils}
-import org.apache.spark.sql.internal.{SparkConfigUtil, SQLConf}
+import org.apache.spark.sql.internal.SparkConfigUtil._
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.StaticSQLConf.SPARK_SESSION_EXTENSIONS
 import org.apache.spark.task.TaskResources
 import org.apache.spark.util.SparkResourceUtil
@@ -62,11 +63,7 @@ private[gluten] class GlutenDriverPlugin extends 
DriverPlugin with Logging {
 
     // Register Gluten listeners
     GlutenSQLAppStatusListener.register(sc)
-    if (
-      conf.getBoolean(
-        GLUTEN_SOFT_AFFINITY_ENABLED.key,
-        GLUTEN_SOFT_AFFINITY_ENABLED.defaultValue.get)
-    ) {
+    if (conf.get(GLUTEN_SOFT_AFFINITY_ENABLED)) {
       SoftAffinityListener.register(sc)
     }
 
@@ -137,19 +134,13 @@ private[gluten] class GlutenDriverPlugin extends 
DriverPlugin with Logging {
   }
 
   private def checkOffHeapSettings(conf: SparkConf): Unit = {
-    if (
-      conf.getBoolean(
-        DYNAMIC_OFFHEAP_SIZING_ENABLED.key,
-        DYNAMIC_OFFHEAP_SIZING_ENABLED.defaultValue.get)
-    ) {
+    if (conf.get(DYNAMIC_OFFHEAP_SIZING_ENABLED)) {
       // When dynamic off-heap sizing is enabled, off-heap mode is not 
strictly required to be
       // enabled. Skip the check.
       return
     }
 
-    if (
-      conf.getBoolean(COLUMNAR_MEMORY_UNTRACKED.key, 
COLUMNAR_MEMORY_UNTRACKED.defaultValue.get)
-    ) {
+    if (conf.get(COLUMNAR_MEMORY_UNTRACKED)) {
       // When untracked memory mode is enabled, off-heap mode is not strictly 
required to be
       // enabled. Skip the check.
       return
@@ -169,22 +160,17 @@ private[gluten] class GlutenDriverPlugin extends 
DriverPlugin with Logging {
 
   private def setPredefinedConfigs(conf: SparkConf): Unit = {
     // Spark SQL extensions
-    val extensionSeq =
-      SparkConfigUtil.getEntryValue(conf, 
SPARK_SESSION_EXTENSIONS).getOrElse(Seq.empty)
+    val extensionSeq = conf.get(SPARK_SESSION_EXTENSIONS).getOrElse(Seq.empty)
     if 
(!extensionSeq.toSet.contains(GlutenSessionExtensions.GLUTEN_SESSION_EXTENSION_NAME))
 {
       conf.set(
-        SPARK_SESSION_EXTENSIONS.key,
-        (extensionSeq :+ 
GlutenSessionExtensions.GLUTEN_SESSION_EXTENSION_NAME).mkString(","))
+        SPARK_SESSION_EXTENSIONS,
+        extensionSeq :+ GlutenSessionExtensions.GLUTEN_SESSION_EXTENSION_NAME)
     }
 
     // adaptive custom cost evaluator class
-    val enableGlutenCostEvaluator = conf.getBoolean(
-      GlutenConfig.COST_EVALUATOR_ENABLED.key,
-      GlutenConfig.COST_EVALUATOR_ENABLED.defaultValue.get)
+    val enableGlutenCostEvaluator = 
conf.get(GlutenConfig.COST_EVALUATOR_ENABLED)
     if (enableGlutenCostEvaluator) {
-      conf.set(
-        SQLConf.ADAPTIVE_CUSTOM_COST_EVALUATOR_CLASS.key,
-        classOf[GlutenCostEvaluator].getName)
+      conf.set(SQLConf.ADAPTIVE_CUSTOM_COST_EVALUATOR_CLASS, 
classOf[GlutenCostEvaluator].getName)
     }
 
     // check memory off-heap enabled and size.
@@ -194,39 +180,33 @@ private[gluten] class GlutenDriverPlugin extends 
DriverPlugin with Logging {
     val offHeapSize = conf.getSizeAsBytes(SPARK_OFFHEAP_SIZE_KEY)
 
     // Set off-heap size in bytes.
-    conf.set(COLUMNAR_OFFHEAP_SIZE_IN_BYTES.key, offHeapSize.toString)
+    conf.set(COLUMNAR_OFFHEAP_SIZE_IN_BYTES, offHeapSize)
 
     // Set off-heap size in bytes per task.
     val taskSlots = SparkResourceUtil.getTaskSlots(conf)
-    conf.set(NUM_TASK_SLOTS_PER_EXECUTOR.key, taskSlots.toString)
+    conf.set(NUM_TASK_SLOTS_PER_EXECUTOR, taskSlots)
     val offHeapPerTask = offHeapSize / taskSlots
-    conf.set(COLUMNAR_TASK_OFFHEAP_SIZE_IN_BYTES.key, offHeapPerTask.toString)
+    conf.set(COLUMNAR_TASK_OFFHEAP_SIZE_IN_BYTES, offHeapPerTask)
 
     // Pessimistic off-heap sizes, with the assumption that all non-borrowable 
storage memory
     // determined by spark.memory.storageFraction was used.
     val fraction = 1.0d - conf.getDouble("spark.memory.storageFraction", 0.5d)
     val conservativeOffHeapPerTask = (offHeapSize * fraction).toLong / 
taskSlots
-    conf.set(
-      COLUMNAR_CONSERVATIVE_TASK_OFFHEAP_SIZE_IN_BYTES.key,
-      conservativeOffHeapPerTask.toString)
+    conf.set(COLUMNAR_CONSERVATIVE_TASK_OFFHEAP_SIZE_IN_BYTES, 
conservativeOffHeapPerTask)
 
     // Disable vanilla columnar readers, to prevent columnar-to-columnar 
conversions.
     // FIXME: Do we still need this trick since
     //  https://github.com/apache/incubator-gluten/pull/1931 was merged?
-    if (
-      !conf.getBoolean(
-        VANILLA_VECTORIZED_READERS_ENABLED.key,
-        VANILLA_VECTORIZED_READERS_ENABLED.defaultValue.get)
-    ) {
+    if (!conf.get(VANILLA_VECTORIZED_READERS_ENABLED)) {
       // FIXME Hongze 22/12/06
       //  BatchScan.scala in shim was not always loaded by class loader.
       //  The file should be removed and the "ClassCastException" issue caused 
by
       //  spark.sql.<format>.enableVectorizedReader=true should be fixed in 
another way.
       //  Before the issue is fixed we force the use of vanilla row reader by 
using
       //  the following statement.
-      conf.set(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key, "false")
-      conf.set(SQLConf.ORC_VECTORIZED_READER_ENABLED.key, "false")
-      conf.set(SQLConf.CACHE_VECTORIZED_READER_ENABLED.key, "false")
+      conf.set(SQLConf.PARQUET_VECTORIZED_READER_ENABLED, false)
+      conf.set(SQLConf.ORC_VECTORIZED_READER_ENABLED, false)
+      conf.set(SQLConf.CACHE_VECTORIZED_READER_ENABLED, false)
     }
   }
 }
diff --git 
a/gluten-core/src/main/scala/org/apache/spark/sql/internal/SparkConfigUtil.scala
 
b/gluten-core/src/main/scala/org/apache/spark/sql/internal/SparkConfigUtil.scala
index 945174073e..47c3a0b7f1 100644
--- 
a/gluten-core/src/main/scala/org/apache/spark/sql/internal/SparkConfigUtil.scala
+++ 
b/gluten-core/src/main/scala/org/apache/spark/sql/internal/SparkConfigUtil.scala
@@ -16,11 +16,64 @@
  */
 package org.apache.spark.sql.internal
 
+import org.apache.gluten.config.ConfigEntry
+
 import org.apache.spark.SparkConf
-import org.apache.spark.internal.config.ConfigEntry
+import org.apache.spark.internal.config.{ConfigEntry => SparkConfigEntry, 
OptionalConfigEntry}
 
 object SparkConfigUtil {
-  def getEntryValue[T](conf: SparkConf, entry: ConfigEntry[T]): T = {
+
+  implicit class RichSparkConf(val conf: SparkConf) {
+    def get[T](entry: SparkConfigEntry[T]): T = {
+      SparkConfigUtil.get(conf, entry)
+    }
+
+    def get[T](entry: ConfigEntry[T]): T = {
+      SparkConfigUtil.get(conf, entry)
+    }
+
+    def set[T](entry: SparkConfigEntry[T], value: T): SparkConf = {
+      SparkConfigUtil.set(conf, entry, value)
+    }
+
+    def set[T](entry: OptionalConfigEntry[T], value: T): SparkConf = {
+      SparkConfigUtil.set(conf, entry, value)
+    }
+
+    def set[T](entry: ConfigEntry[T], value: T): SparkConf = {
+      SparkConfigUtil.set(conf, entry, value)
+    }
+  }
+
+  def get[T](conf: SparkConf, entry: SparkConfigEntry[T]): T = {
     conf.get(entry)
   }
+
+  def get[T](conf: SparkConf, entry: ConfigEntry[T]): T = {
+    entry.valueConverter(conf.get(entry.key, entry.defaultValueString))
+  }
+
+  def get[T](conf: java.util.Map[String, String], entry: SparkConfigEntry[T]): 
T = {
+    entry.valueConverter(conf.getOrDefault(entry.key, 
entry.defaultValueString))
+  }
+
+  def get[T](conf: java.util.Map[String, String], entry: ConfigEntry[T]): T = {
+    entry.valueConverter(conf.getOrDefault(entry.key, 
entry.defaultValueString))
+  }
+
+  def set[T](conf: SparkConf, entry: SparkConfigEntry[T], value: T): SparkConf 
= {
+    conf.set(entry, value)
+  }
+
+  def set[T](conf: SparkConf, entry: OptionalConfigEntry[T], value: T): 
SparkConf = {
+    conf.set(entry, value)
+  }
+
+  def set[T](conf: SparkConf, entry: ConfigEntry[T], value: T): SparkConf = {
+    value match {
+      case Some(v) => conf.set(entry.key, v.toString)
+      case None | null => conf.set(entry.key, null)
+      case _ => conf.set(entry.key, value.toString)
+    }
+  }
 }
diff --git 
a/gluten-core/src/main/scala/org/apache/spark/sql/internal/SparkConfigUtil.scala
 
b/gluten-core/src/test/scala/org/apache/gluten/config/SparkConfigUtilSuite.scala
similarity index 54%
copy from 
gluten-core/src/main/scala/org/apache/spark/sql/internal/SparkConfigUtil.scala
copy to 
gluten-core/src/test/scala/org/apache/gluten/config/SparkConfigUtilSuite.scala
index 945174073e..a9a15f46f4 100644
--- 
a/gluten-core/src/main/scala/org/apache/spark/sql/internal/SparkConfigUtil.scala
+++ 
b/gluten-core/src/test/scala/org/apache/gluten/config/SparkConfigUtilSuite.scala
@@ -14,13 +14,22 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.spark.sql.internal
+package org.apache.gluten.config
 
 import org.apache.spark.SparkConf
-import org.apache.spark.internal.config.ConfigEntry
+import org.apache.spark.sql.internal.SparkConfigUtil._
+import org.apache.spark.sql.internal.SQLConf
 
-object SparkConfigUtil {
-  def getEntryValue[T](conf: SparkConf, entry: ConfigEntry[T]): T = {
-    conf.get(entry)
+import org.scalatest.funsuite.AnyFunSuiteLike
+
+class SparkConfigUtilSuite extends AnyFunSuiteLike {
+
+  test("SparkConfigUtil.get and set methods") {
+    val conf = new SparkConf()
+    conf.set(GlutenConfig.SHUFFLE_WRITER_BUFFER_SIZE, Some(1024 * 1024))
+    assert(conf.get(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD) === 10L * 1024 * 
1024)
+    assert(conf.get(GlutenConfig.GLUTEN_UI_ENABLED) === true)
+    assert(conf.get(GlutenConfig.TEXT_INPUT_ROW_MAX_BLOCK_SIZE) === 8L * 1024)
+    assert(conf.get(GlutenConfig.SHUFFLE_WRITER_BUFFER_SIZE) === Some(1024 * 
1024))
   }
 }
diff --git 
a/gluten-substrait/src/main/scala/org/apache/gluten/expression/UDFMappings.scala
 
b/gluten-substrait/src/main/scala/org/apache/gluten/expression/UDFMappings.scala
index 568688c0bd..baaa6b2d64 100644
--- 
a/gluten-substrait/src/main/scala/org/apache/gluten/expression/UDFMappings.scala
+++ 
b/gluten-substrait/src/main/scala/org/apache/gluten/expression/UDFMappings.scala
@@ -20,6 +20,7 @@ import org.apache.gluten.config.GlutenConfig
 
 import org.apache.spark.SparkConf
 import org.apache.spark.internal.Logging
+import org.apache.spark.sql.internal.SparkConfigUtil._
 
 import org.apache.commons.lang3.StringUtils
 
@@ -58,19 +59,19 @@ object UDFMappings extends Logging {
   }
 
   def loadFromSparkConf(conf: SparkConf): Unit = {
-    val strHiveUDFs = conf.get(GlutenConfig.GLUTEN_SUPPORTED_HIVE_UDFS.key, "")
+    val strHiveUDFs = conf.get(GlutenConfig.GLUTEN_SUPPORTED_HIVE_UDFS)
     if (!StringUtils.isBlank(strHiveUDFs)) {
       parseStringToMap(strHiveUDFs, hiveUDFMap)
       logDebug(s"loaded hive udf mappings:${hiveUDFMap.toString}")
     }
 
-    val strPythonUDFs = 
conf.get(GlutenConfig.GLUTEN_SUPPORTED_PYTHON_UDFS.key, "")
+    val strPythonUDFs = conf.get(GlutenConfig.GLUTEN_SUPPORTED_PYTHON_UDFS)
     if (!StringUtils.isBlank(strPythonUDFs)) {
       parseStringToMap(strPythonUDFs, pythonUDFMap)
       logDebug(s"loaded python udf mappings:${pythonUDFMap.toString}")
     }
 
-    val strScalaUDFs = conf.get(GlutenConfig.GLUTEN_SUPPORTED_SCALA_UDFS.key, 
"")
+    val strScalaUDFs = conf.get(GlutenConfig.GLUTEN_SUPPORTED_SCALA_UDFS)
     if (!StringUtils.isBlank(strScalaUDFs)) {
       parseStringToMap(strScalaUDFs, scalaUDFMap)
       logDebug(s"loaded scala udf mappings:${scalaUDFMap.toString}")


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(incubator-gluten) branch main updated: Add a RichSparkConf to simplify interoperations gluten config entries (#9914)

Reply via email to