This is an automated email from the ASF dual-hosted git repository.
felixybw pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 8060cea49 [VL] Add 3 configs of spill (#5088)
8060cea49 is described below
commit 8060cea49874587e1831f729d6a3e3c9718e657a
Author: BInwei Yang <[email protected]>
AuthorDate: Tue Apr 2 09:54:02 2024 -0700
[VL] Add 3 configs of spill (#5088)
add 3 spill config
spark.gluten.sql.columnar.backend.velox.MaxSpillRunRows
spark.gluten.sql.columnar.backend.velox.MaxSpillBytes
spark.gluten.sql.columnar.backend.velox.spillWriteBufferSize
---
cpp/velox/compute/WholeStageResultIterator.cc | 14 +++++++++++
.../scala/org/apache/gluten/GlutenConfig.scala | 27 ++++++++++++++++++++++
2 files changed, 41 insertions(+)
diff --git a/cpp/velox/compute/WholeStageResultIterator.cc
b/cpp/velox/compute/WholeStageResultIterator.cc
index 14044782b..89b77ac85 100644
--- a/cpp/velox/compute/WholeStageResultIterator.cc
+++ b/cpp/velox/compute/WholeStageResultIterator.cc
@@ -44,10 +44,18 @@ const uint32_t kSpillThreadNumDefaultValue = 0;
const std::string kAggregationSpillEnabled =
"spark.gluten.sql.columnar.backend.velox.aggregationSpillEnabled";
const std::string kJoinSpillEnabled =
"spark.gluten.sql.columnar.backend.velox.joinSpillEnabled";
const std::string kOrderBySpillEnabled =
"spark.gluten.sql.columnar.backend.velox.orderBySpillEnabled";
+
+// spill config
+// refer to
+//
https://github.com/facebookincubator/velox/blob/95f3e80e77d046c12fbc79dc529366be402e9c2b/velox/docs/configs.rst#spilling
const std::string kMaxSpillLevel =
"spark.gluten.sql.columnar.backend.velox.maxSpillLevel";
const std::string kMaxSpillFileSize =
"spark.gluten.sql.columnar.backend.velox.maxSpillFileSize";
const std::string kSpillStartPartitionBit =
"spark.gluten.sql.columnar.backend.velox.spillStartPartitionBit";
const std::string kSpillPartitionBits =
"spark.gluten.sql.columnar.backend.velox.spillPartitionBits";
+const std::string kMaxSpillRunRows =
"spark.gluten.sql.columnar.backend.velox.MaxSpillRunRows";
+const std::string kMaxSpillBytes =
"spark.gluten.sql.columnar.backend.velox.MaxSpillBytes";
+const std::string kSpillWriteBufferSize =
"spark.gluten.sql.columnar.backend.velox.spillWriteBufferSize";
+
const std::string kSpillableReservationGrowthPct =
"spark.gluten.sql.columnar.backend.velox.spillableReservationGrowthPct";
const std::string kSpillCompressionKind = "spark.io.compression.codec";
@@ -513,6 +521,12 @@ std::unordered_map<std::string, std::string>
WholeStageResultIterator::getQueryC
configs[velox::core::QueryConfig::kMaxSpillLevel] =
std::to_string(veloxCfg_->get<int32_t>(kMaxSpillLevel, 4));
configs[velox::core::QueryConfig::kMaxSpillFileSize] =
std::to_string(veloxCfg_->get<uint64_t>(kMaxSpillFileSize, 20L * 1024
* 1024));
+ configs[velox::core::QueryConfig::kMaxSpillRunRows] =
+ std::to_string(veloxCfg_->get<uint64_t>(kMaxSpillRunRows, 12L * 1024 *
1024));
+ configs[velox::core::QueryConfig::kMaxSpillBytes] =
+ std::to_string(veloxCfg_->get<uint64_t>(kMaxSpillBytes,
107374182400LL));
+ configs[velox::core::QueryConfig::kSpillWriteBufferSize] =
+ std::to_string(veloxCfg_->get<uint64_t>(kSpillWriteBufferSize, 4L *
1024 * 1024));
configs[velox::core::QueryConfig::kSpillStartPartitionBit] =
std::to_string(veloxCfg_->get<uint8_t>(kSpillStartPartitionBit, 29));
configs[velox::core::QueryConfig::kJoinSpillPartitionBits] =
diff --git a/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala
b/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala
index 130dbf7bc..d2b0fd78f 100644
--- a/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala
+++ b/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala
@@ -254,6 +254,12 @@ class GlutenConfig(conf: SQLConf) extends Logging {
def veloxSpillFileSystem: String =
conf.getConf(COLUMNAR_VELOX_SPILL_FILE_SYSTEM)
+ def veloxMaxSpillRunRows: Long =
conf.getConf(COLUMNAR_VELOX_MAX_SPILL_RUN_ROWS)
+
+ def veloxMaxSpillBytes: Long = conf.getConf(COLUMNAR_VELOX_MAX_SPILL_BYTES)
+
+ def veloxMaxWriteBufferSize: Long =
conf.getConf(COLUMNAR_VELOX_MAX_SPILL_WRITE_BUFFER_SIZE)
+
def veloxBloomFilterExpectedNumItems: Long =
conf.getConf(COLUMNAR_VELOX_BLOOM_FILTER_EXPECTED_NUM_ITEMS)
@@ -1265,6 +1271,27 @@ object GlutenConfig {
.checkValues(Set("local", "heap-over-local"))
.createWithDefaultString("local")
+ val COLUMNAR_VELOX_MAX_SPILL_RUN_ROWS =
+ buildConf("spark.gluten.sql.columnar.backend.velox.maxSpillRunRows")
+ .internal()
+ .doc("The maximum row size of a single spill run")
+ .bytesConf(ByteUnit.BYTE)
+ .createWithDefaultString("12M")
+
+ val COLUMNAR_VELOX_MAX_SPILL_BYTES =
+ buildConf("spark.gluten.sql.columnar.backend.velox.maxSpillBytes")
+ .internal()
+ .doc("The maximum file size of a query")
+ .bytesConf(ByteUnit.BYTE)
+ .createWithDefaultString("100G")
+
+ val COLUMNAR_VELOX_MAX_SPILL_WRITE_BUFFER_SIZE =
+ buildConf("spark.gluten.sql.columnar.backend.velox.spillWriteBufferSize")
+ .internal()
+ .doc("The maximum write buffer size")
+ .bytesConf(ByteUnit.BYTE)
+ .createWithDefaultString("4M")
+
val MAX_PARTITION_PER_WRITERS_SESSION =
buildConf("spark.gluten.sql.columnar.backend.velox.maxPartitionsPerWritersSession")
.internal()
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]