This is an automated email from the ASF dual-hosted git repository.
hongze pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 3d21141f0 [GLUTEN-7243][VL] Fix Q97 cross-task spilling hangs (#7244)
3d21141f0 is described below
commit 3d21141f0872d1a130994a7db4080feb3ba8781f
Author: Hongze Zhang <[email protected]>
AuthorDate: Sat Sep 14 20:57:02 2024 +0800
[GLUTEN-7243][VL] Fix Q97 cross-task spilling hangs (#7244)
Closes #7243
---
.github/workflows/velox_backend.yml | 1 -
cpp/velox/compute/WholeStageResultIterator.cc | 19 +++++++++++++------
ep/build-velox/src/get_velox.sh | 2 +-
3 files changed, 14 insertions(+), 8 deletions(-)
diff --git a/.github/workflows/velox_backend.yml
b/.github/workflows/velox_backend.yml
index 572a2a2d8..cd0fd1cd4 100644
--- a/.github/workflows/velox_backend.yml
+++ b/.github/workflows/velox_backend.yml
@@ -391,7 +391,6 @@ jobs:
-d=FLUSH_MODE:FLUSHED,spark.gluten.sql.columnar.backend.velox.maxPartialAggregationMemoryRatio=0.05,spark.gluten.sql.columnar.backend.velox.maxExtendedPartialAggregationMemoryRatio=0.1,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinPct=100,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinRows=0
- name: (To be fixed) TPC-DS SF30.0 Parquet local spark3.2 Q97 low
memory, IO threads off
continue-on-error: true # OOM
- timeout-minutes: 15 #
https://github.com/apache/incubator-gluten/issues/7243
run: |
cd tools/gluten-it \
&& GLUTEN_IT_JVM_ARGS=-Xmx3G sbin/gluten-it.sh parameterized \
diff --git a/cpp/velox/compute/WholeStageResultIterator.cc
b/cpp/velox/compute/WholeStageResultIterator.cc
index 4aa3449b6..1da5633df 100644
--- a/cpp/velox/compute/WholeStageResultIterator.cc
+++ b/cpp/velox/compute/WholeStageResultIterator.cc
@@ -231,20 +231,27 @@ int64_t WholeStageResultIterator::spillFixedSize(int64_t
size) {
std::string poolName{pool->root()->name() + "/" + pool->name()};
std::string logPrefix{"Spill[" + poolName + "]: "};
int64_t shrunken = memoryManager_->shrink(size);
- // todo return the actual spilled size?
if (spillStrategy_ == "auto") {
+ if (task_->numThreads() != 0) {
+ // Task should have zero running threads, otherwise there's
+ // possibility that this spill call hangs. See
https://github.com/apache/incubator-gluten/issues/7243.
+ // As of now, non-zero running threads usually happens when:
+ // 1. Task A spills task B;
+ // 2. Task A trys to grow buffers created by task B, during which spill
is requested on task A again;
+ VLOG(2) << logPrefix << "Spill is requested on a task " <<
task_->taskId()
+ << " that has non-zero running threads, which is not currently
supported. Skipping.";
+ return shrunken;
+ }
int64_t remaining = size - shrunken;
- LOG(INFO) << logPrefix << "Trying to request spilling for " << remaining
<< " bytes...";
+ LOG(INFO) << logPrefix << "Trying to request spill for " << remaining << "
bytes...";
auto* mm = memoryManager_->getMemoryManager();
- uint64_t spilledOut = mm->arbitrator()->shrinkCapacity(remaining); // this
conducts spilling
+ uint64_t spilledOut = mm->arbitrator()->shrinkCapacity(remaining); // this
conducts spill
LOG(INFO) << logPrefix << "Successfully spilled out " << spilledOut << "
bytes.";
uint64_t total = shrunken + spilledOut;
VLOG(2) << logPrefix << "Successfully reclaimed total " << total << "
bytes.";
return total;
- } else {
- LOG(WARNING) << "Spill-to-disk was disabled since " << kSpillStrategy << "
was not configured.";
}
-
+ LOG(WARNING) << "Spill-to-disk was disabled since " << kSpillStrategy << "
was not configured.";
VLOG(2) << logPrefix << "Successfully reclaimed total " << shrunken << "
bytes.";
return shrunken;
}
diff --git a/ep/build-velox/src/get_velox.sh b/ep/build-velox/src/get_velox.sh
index 28228224b..8a580e7ba 100755
--- a/ep/build-velox/src/get_velox.sh
+++ b/ep/build-velox/src/get_velox.sh
@@ -17,7 +17,7 @@
set -exu
VELOX_REPO=https://github.com/oap-project/velox.git
-VELOX_BRANCH=2024_09_14
+VELOX_BRANCH=2024_09_14-2
VELOX_HOME=""
OS=`uname -s`
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]