(incubator-gluten) branch main updated: [VL] Fix warning when spark.gluten.sql.columnarToRowMemoryThreshold is not set (#6866)

hongze Thu, 15 Aug 2024 22:24:14 -0700

This is an automated email from the ASF dual-hosted git repository.

hongze pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git



The following commit(s) were added to refs/heads/main by this push:
     new a038e9332 [VL] Fix warning when 
spark.gluten.sql.columnarToRowMemoryThreshold is not set (#6866)
a038e9332 is described below

commit a038e9332a7b06f7e9e31892d0544c715b78a1c8
Author: Hongze Zhang <[email protected]>
AuthorDate: Fri Aug 16 13:23:58 2024 +0800

    [VL] Fix warning when spark.gluten.sql.columnarToRowMemoryThreshold is not 
set (#6866)
---
 .../org/apache/gluten/execution/VeloxTPCHSuite.scala      |  2 +-
 cpp/core/config/GlutenConfig.h                            |  1 -
 cpp/core/jni/JniWrapper.cc                                | 15 ++-------------
 .../src/main/scala/org/apache/gluten/GlutenConfig.scala   | 11 +++++------
 4 files changed, 8 insertions(+), 21 deletions(-)

diff --git 
a/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxTPCHSuite.scala
 
b/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxTPCHSuite.scala
index 22f96bbbc..0e94c242c 100644
--- 
a/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxTPCHSuite.scala
+++ 
b/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxTPCHSuite.scala
@@ -255,7 +255,7 @@ class VeloxTPCHDistinctSpillSuite extends 
VeloxTPCHTableSupport {
     super.sparkConf
       .set("spark.memory.offHeap.size", "50m")
       .set("spark.gluten.memory.overAcquiredMemoryRatio", "0.9") // to trigger 
distinct spill early
-      .set(GlutenConfig.GLUTEN_COLUMNAR_TO_ROW_MEM_THRESHOLD_KEY, "8k")
+      .set(GlutenConfig.GLUTEN_COLUMNAR_TO_ROW_MEM_THRESHOLD.key, "8k")
   }
 
   test("distinct spill") {
diff --git a/cpp/core/config/GlutenConfig.h b/cpp/core/config/GlutenConfig.h
index e4f5a884b..057d85930 100644
--- a/cpp/core/config/GlutenConfig.h
+++ b/cpp/core/config/GlutenConfig.h
@@ -57,7 +57,6 @@ const std::string kGzipWindowSize4k = "4096";
 const std::string kParquetCompressionCodec = 
"spark.sql.parquet.compression.codec";
 
 const std::string kColumnarToRowMemoryThreshold = 
"spark.gluten.sql.columnarToRowMemoryThreshold";
-const std::string kColumnarToRowMemoryDefaultThreshold = "67108864"; // 64MB
 
 const std::string kUGIUserName = "spark.gluten.ugi.username";
 const std::string kUGITokens = "spark.gluten.ugi.tokens";
diff --git a/cpp/core/jni/JniWrapper.cc b/cpp/core/jni/JniWrapper.cc
index 5c2752f18..4be5e9142 100644
--- a/cpp/core/jni/JniWrapper.cc
+++ b/cpp/core/jni/JniWrapper.cc
@@ -534,19 +534,8 @@ 
Java_org_apache_gluten_vectorized_NativeColumnarToRowJniWrapper_nativeColumnarTo
   auto& conf = ctx->getConfMap();
   int64_t column2RowMemThreshold;
   auto it = conf.find(kColumnarToRowMemoryThreshold);
-  bool confIsLegal =
-      ((it == conf.end()) ? false : std::all_of(it->second.begin(), 
it->second.end(), [](unsigned char c) {
-        return std::isdigit(c);
-      }));
-  if (confIsLegal) {
-    column2RowMemThreshold = std::stoll(it->second);
-  } else {
-    LOG(INFO)
-        << "Because the spark.gluten.sql.columnarToRowMemoryThreshold 
configuration item is invalid, the kColumnarToRowMemoryDefaultThreshold default 
value is used, which is "
-        << kColumnarToRowMemoryDefaultThreshold << " byte";
-    column2RowMemThreshold = std::stoll(kColumnarToRowMemoryDefaultThreshold);
-  }
-
+  GLUTEN_CHECK(!(it == conf.end()), "Required key not found in runtime config: 
" + kColumnarToRowMemoryThreshold);
+  column2RowMemThreshold = std::stoll(it->second);
   // Convert the native batch to Spark unsafe row.
   return 
ctx->saveObject(ctx->createColumnar2RowConverter(column2RowMemThreshold));
   JNI_METHOD_END(kInvalidObjectHandle)
diff --git a/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala 
b/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala
index b1ef4be5c..0146c3604 100644
--- a/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala
+++ b/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala
@@ -587,9 +587,6 @@ object GlutenConfig {
 
   val GLUTEN_SHUFFLE_WRITER_MERGE_THRESHOLD = 
"spark.gluten.sql.columnar.shuffle.merge.threshold"
 
-  // Columnar to row memory threshold.
-  val GLUTEN_COLUMNAR_TO_ROW_MEM_THRESHOLD_KEY = 
"spark.gluten.sql.columnarToRowMemoryThreshold"
-
   // Controls whether to load DLL from jars. User can get dependent native 
libs packed into a jar
   // by executing dev/package.sh. Then, with that jar configured, Gluten can 
load the native libs
   // at runtime. This config is just for velox backend. And it is NOT 
applicable to the situation
@@ -654,7 +651,6 @@ object GlutenConfig {
       GLUTEN_SAVE_DIR,
       GLUTEN_TASK_OFFHEAP_SIZE_IN_BYTES_KEY,
       GLUTEN_MAX_BATCH_SIZE_KEY,
-      GLUTEN_COLUMNAR_TO_ROW_MEM_THRESHOLD_KEY,
       GLUTEN_SHUFFLE_WRITER_BUFFER_SIZE,
       SQLConf.SESSION_LOCAL_TIMEZONE.key,
       GLUTEN_DEFAULT_SESSION_TIMEZONE_KEY,
@@ -690,7 +686,10 @@ object GlutenConfig {
       (SQLConf.IGNORE_MISSING_FILES.key, 
SQLConf.IGNORE_MISSING_FILES.defaultValueString),
       (
         COLUMNAR_MEMORY_BACKTRACE_ALLOCATION.key,
-        COLUMNAR_MEMORY_BACKTRACE_ALLOCATION.defaultValueString)
+        COLUMNAR_MEMORY_BACKTRACE_ALLOCATION.defaultValueString),
+      (
+        GLUTEN_COLUMNAR_TO_ROW_MEM_THRESHOLD.key,
+        GLUTEN_COLUMNAR_TO_ROW_MEM_THRESHOLD.defaultValue.get.toString)
     )
     keyWithDefault.forEach(e => nativeConfMap.put(e._1, conf.getOrElse(e._1, 
e._2)))
 
@@ -1123,7 +1122,7 @@ object GlutenConfig {
       .createWithDefault(4096)
 
   val GLUTEN_COLUMNAR_TO_ROW_MEM_THRESHOLD =
-    buildConf(GLUTEN_COLUMNAR_TO_ROW_MEM_THRESHOLD_KEY)
+    buildConf("spark.gluten.sql.columnarToRowMemoryThreshold")
       .internal()
       .bytesConf(ByteUnit.BYTE)
       .createWithDefaultString("64MB")


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(incubator-gluten) branch main updated: [VL] Fix warning when spark.gluten.sql.columnarToRowMemoryThreshold is not set (#6866)

Reply via email to