This is an automated email from the ASF dual-hosted git repository.
philo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 42ae6b2af5 [VL] Change loadQuantum default value to 8MB from 258MB
(#8186)
42ae6b2af5 is described below
commit 42ae6b2af5d106db415166bec80ad415837de099
Author: Kaifei Yi <[email protected]>
AuthorDate: Tue Dec 10 10:26:16 2024 +0800
[VL] Change loadQuantum default value to 8MB from 258MB (#8186)
---
.../scala/org/apache/spark/sql/execution/VeloxParquetReadSuite.scala | 2 +-
cpp/velox/compute/VeloxBackend.cc | 3 ++-
shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala | 3 ++-
3 files changed, 5 insertions(+), 3 deletions(-)
diff --git
a/backends-velox/src/test/scala/org/apache/spark/sql/execution/VeloxParquetReadSuite.scala
b/backends-velox/src/test/scala/org/apache/spark/sql/execution/VeloxParquetReadSuite.scala
index cb3eeaec6d..77372d47cb 100644
---
a/backends-velox/src/test/scala/org/apache/spark/sql/execution/VeloxParquetReadSuite.scala
+++
b/backends-velox/src/test/scala/org/apache/spark/sql/execution/VeloxParquetReadSuite.scala
@@ -29,7 +29,7 @@ class VeloxParquetReadSuite extends
VeloxWholeStageTransformerSuite {
override protected def sparkConf: SparkConf = {
super.sparkConf
- .set(GlutenConfig.LOAD_QUANTUM.key, "128m")
+ .set(GlutenConfig.LOAD_QUANTUM.key, "8m")
}
testWithSpecifiedSparkVersion("read example parquet files", Some("3.5"),
Some("3.5")) {
diff --git a/cpp/velox/compute/VeloxBackend.cc
b/cpp/velox/compute/VeloxBackend.cc
index c453b9981f..10f7768d6b 100644
--- a/cpp/velox/compute/VeloxBackend.cc
+++ b/cpp/velox/compute/VeloxBackend.cc
@@ -277,8 +277,9 @@ void VeloxBackend::initConnector() {
backendConf_->get<std::string>(kMaxCoalescedDistance, "512KB"); // 512KB
connectorConfMap[velox::connector::hive::HiveConfig::kPrefetchRowGroups] =
backendConf_->get<std::string>(kPrefetchRowGroups, "1");
+ // Velox currently only support up to 8MB load quantum size on SSD.
connectorConfMap[velox::connector::hive::HiveConfig::kLoadQuantum] =
- backendConf_->get<std::string>(kLoadQuantum, "268435456"); // 256M
+ backendConf_->get<std::string>(kLoadQuantum, "8388608"); // 8M
connectorConfMap[velox::connector::hive::HiveConfig::kFooterEstimatedSize] =
backendConf_->get<std::string>(kDirectorySizeGuess, "32768"); // 32K
connectorConfMap[velox::connector::hive::HiveConfig::kFilePreloadThreshold] =
diff --git a/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala
b/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala
index 4f243f03fb..98a9e82b2c 100644
--- a/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala
+++ b/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala
@@ -2097,12 +2097,13 @@ object GlutenConfig {
.intConf
.createWithDefault(1)
+ // Velox currently only support up to 8MB load quantum size on SSD.
val LOAD_QUANTUM =
buildStaticConf("spark.gluten.sql.columnar.backend.velox.loadQuantum")
.internal()
.doc("Set the load quantum for velox file scan")
.bytesConf(ByteUnit.BYTE)
- .createWithDefaultString("256MB")
+ .createWithDefaultString("8MB")
val MAX_COALESCED_DISTANCE_BYTES =
buildStaticConf("spark.gluten.sql.columnar.backend.velox.maxCoalescedDistance")
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]