This is an automated email from the ASF dual-hosted git repository.
hongze pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new a038e9332 [VL] Fix warning when
spark.gluten.sql.columnarToRowMemoryThreshold is not set (#6866)
a038e9332 is described below
commit a038e9332a7b06f7e9e31892d0544c715b78a1c8
Author: Hongze Zhang <[email protected]>
AuthorDate: Fri Aug 16 13:23:58 2024 +0800
[VL] Fix warning when spark.gluten.sql.columnarToRowMemoryThreshold is not
set (#6866)
---
.../org/apache/gluten/execution/VeloxTPCHSuite.scala | 2 +-
cpp/core/config/GlutenConfig.h | 1 -
cpp/core/jni/JniWrapper.cc | 15 ++-------------
.../src/main/scala/org/apache/gluten/GlutenConfig.scala | 11 +++++------
4 files changed, 8 insertions(+), 21 deletions(-)
diff --git
a/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxTPCHSuite.scala
b/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxTPCHSuite.scala
index 22f96bbbc..0e94c242c 100644
---
a/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxTPCHSuite.scala
+++
b/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxTPCHSuite.scala
@@ -255,7 +255,7 @@ class VeloxTPCHDistinctSpillSuite extends
VeloxTPCHTableSupport {
super.sparkConf
.set("spark.memory.offHeap.size", "50m")
.set("spark.gluten.memory.overAcquiredMemoryRatio", "0.9") // to trigger
distinct spill early
- .set(GlutenConfig.GLUTEN_COLUMNAR_TO_ROW_MEM_THRESHOLD_KEY, "8k")
+ .set(GlutenConfig.GLUTEN_COLUMNAR_TO_ROW_MEM_THRESHOLD.key, "8k")
}
test("distinct spill") {
diff --git a/cpp/core/config/GlutenConfig.h b/cpp/core/config/GlutenConfig.h
index e4f5a884b..057d85930 100644
--- a/cpp/core/config/GlutenConfig.h
+++ b/cpp/core/config/GlutenConfig.h
@@ -57,7 +57,6 @@ const std::string kGzipWindowSize4k = "4096";
const std::string kParquetCompressionCodec =
"spark.sql.parquet.compression.codec";
const std::string kColumnarToRowMemoryThreshold =
"spark.gluten.sql.columnarToRowMemoryThreshold";
-const std::string kColumnarToRowMemoryDefaultThreshold = "67108864"; // 64MB
const std::string kUGIUserName = "spark.gluten.ugi.username";
const std::string kUGITokens = "spark.gluten.ugi.tokens";
diff --git a/cpp/core/jni/JniWrapper.cc b/cpp/core/jni/JniWrapper.cc
index 5c2752f18..4be5e9142 100644
--- a/cpp/core/jni/JniWrapper.cc
+++ b/cpp/core/jni/JniWrapper.cc
@@ -534,19 +534,8 @@
Java_org_apache_gluten_vectorized_NativeColumnarToRowJniWrapper_nativeColumnarTo
auto& conf = ctx->getConfMap();
int64_t column2RowMemThreshold;
auto it = conf.find(kColumnarToRowMemoryThreshold);
- bool confIsLegal =
- ((it == conf.end()) ? false : std::all_of(it->second.begin(),
it->second.end(), [](unsigned char c) {
- return std::isdigit(c);
- }));
- if (confIsLegal) {
- column2RowMemThreshold = std::stoll(it->second);
- } else {
- LOG(INFO)
- << "Because the spark.gluten.sql.columnarToRowMemoryThreshold
configuration item is invalid, the kColumnarToRowMemoryDefaultThreshold default
value is used, which is "
- << kColumnarToRowMemoryDefaultThreshold << " byte";
- column2RowMemThreshold = std::stoll(kColumnarToRowMemoryDefaultThreshold);
- }
-
+ GLUTEN_CHECK(!(it == conf.end()), "Required key not found in runtime config:
" + kColumnarToRowMemoryThreshold);
+ column2RowMemThreshold = std::stoll(it->second);
// Convert the native batch to Spark unsafe row.
return
ctx->saveObject(ctx->createColumnar2RowConverter(column2RowMemThreshold));
JNI_METHOD_END(kInvalidObjectHandle)
diff --git a/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala
b/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala
index b1ef4be5c..0146c3604 100644
--- a/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala
+++ b/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala
@@ -587,9 +587,6 @@ object GlutenConfig {
val GLUTEN_SHUFFLE_WRITER_MERGE_THRESHOLD =
"spark.gluten.sql.columnar.shuffle.merge.threshold"
- // Columnar to row memory threshold.
- val GLUTEN_COLUMNAR_TO_ROW_MEM_THRESHOLD_KEY =
"spark.gluten.sql.columnarToRowMemoryThreshold"
-
// Controls whether to load DLL from jars. User can get dependent native
libs packed into a jar
// by executing dev/package.sh. Then, with that jar configured, Gluten can
load the native libs
// at runtime. This config is just for velox backend. And it is NOT
applicable to the situation
@@ -654,7 +651,6 @@ object GlutenConfig {
GLUTEN_SAVE_DIR,
GLUTEN_TASK_OFFHEAP_SIZE_IN_BYTES_KEY,
GLUTEN_MAX_BATCH_SIZE_KEY,
- GLUTEN_COLUMNAR_TO_ROW_MEM_THRESHOLD_KEY,
GLUTEN_SHUFFLE_WRITER_BUFFER_SIZE,
SQLConf.SESSION_LOCAL_TIMEZONE.key,
GLUTEN_DEFAULT_SESSION_TIMEZONE_KEY,
@@ -690,7 +686,10 @@ object GlutenConfig {
(SQLConf.IGNORE_MISSING_FILES.key,
SQLConf.IGNORE_MISSING_FILES.defaultValueString),
(
COLUMNAR_MEMORY_BACKTRACE_ALLOCATION.key,
- COLUMNAR_MEMORY_BACKTRACE_ALLOCATION.defaultValueString)
+ COLUMNAR_MEMORY_BACKTRACE_ALLOCATION.defaultValueString),
+ (
+ GLUTEN_COLUMNAR_TO_ROW_MEM_THRESHOLD.key,
+ GLUTEN_COLUMNAR_TO_ROW_MEM_THRESHOLD.defaultValue.get.toString)
)
keyWithDefault.forEach(e => nativeConfMap.put(e._1, conf.getOrElse(e._1,
e._2)))
@@ -1123,7 +1122,7 @@ object GlutenConfig {
.createWithDefault(4096)
val GLUTEN_COLUMNAR_TO_ROW_MEM_THRESHOLD =
- buildConf(GLUTEN_COLUMNAR_TO_ROW_MEM_THRESHOLD_KEY)
+ buildConf("spark.gluten.sql.columnarToRowMemoryThreshold")
.internal()
.bytesConf(ByteUnit.BYTE)
.createWithDefaultString("64MB")
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]