This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch branch-3.5
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.5 by this push:
new 2bc4c828e05b [SPARK-49385][K8S] Fix `getReusablePVCs` to use
`podCreationTimeout` instead of `podAllocationDelay`
2bc4c828e05b is described below
commit 2bc4c828e05bfd8f4ee417a6d5eff1612c0be8e5
Author: Dongjoon Hyun <[email protected]>
AuthorDate: Sun Aug 25 20:37:47 2024 -0700
[SPARK-49385][K8S] Fix `getReusablePVCs` to use `podCreationTimeout`
instead of `podAllocationDelay`
This PR aims to use `podCreationTimeout` instead of `podAllocationDelay`
when `getReusablePVCs` excludes the newly created PVCs of previous batches.
K8s control plane pod creation can be delayed due to the unknown reasons.
So, `podAllocationDelay (default: 1s)` is insufficient to say that the previous
allocation batch's pods are created with their PVCs. We had better wait until
`podCreationTimeout`.
This affects only the initial set of executors because the baseline is
PVC's `getCreationTimestamp`. So, this fixes only a buggy situation where a PVC
is shared by two executors due to the long pending executor pod.
Pass the CIs with newly updated test cases.
No.
Closes #47867 from dongjoon-hyun/SPARK-49385.
Authored-by: Dongjoon Hyun <[email protected]>
Signed-off-by: Dongjoon Hyun <[email protected]>
(cherry picked from commit f59607995bbd18cb881d062b65e32ff97c133107)
Signed-off-by: Dongjoon Hyun <[email protected]>
---
.../apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala | 2 +-
.../spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala | 4 ++--
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git
a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala
b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala
index 25970e918ec4..0ace14e2aaaf 100644
---
a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala
+++
b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala
@@ -424,7 +424,7 @@ class ExecutorPodsAllocator(
val reusablePVCs = createdPVCs
.filterNot(pvc => pvcsInUse.contains(pvc.getMetadata.getName))
.filter(pvc => now -
Instant.parse(pvc.getMetadata.getCreationTimestamp).toEpochMilli
- > podAllocationDelay)
+ > podCreationTimeout)
logInfo(s"Found ${reusablePVCs.size} reusable PVCs from
${createdPVCs.size} PVCs")
reusablePVCs
} catch {
diff --git
a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala
b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala
index a5b904b8cd95..f202499c849e 100644
---
a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala
+++
b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala
@@ -768,7 +768,7 @@ class ExecutorPodsAllocatorSuite extends SparkFunSuite with
BeforeAndAfter {
val pvc = persistentVolumeClaim("pvc-0", "gp2", "200Gi")
pvc.getMetadata
- .setCreationTimestamp(Instant.now().minus(podAllocationDelay + 1,
MILLIS).toString)
+ .setCreationTimestamp(Instant.now().minus(podCreationTimeout + 1,
MILLIS).toString)
when(persistentVolumeClaimList.getItems).thenReturn(Seq(pvc).asJava)
when(executorBuilder.buildFromFeatures(any(classOf[KubernetesExecutorConf]),
meq(secMgr),
meq(kubernetesClient), any(classOf[ResourceProfile])))
@@ -846,7 +846,7 @@ class ExecutorPodsAllocatorSuite extends SparkFunSuite with
BeforeAndAfter {
val pvc2 = persistentVolumeClaim("pvc-2", "gp2", "200Gi")
val now = Instant.now()
- pvc1.getMetadata.setCreationTimestamp(now.minus(2 * podAllocationDelay,
MILLIS).toString)
+ pvc1.getMetadata.setCreationTimestamp(now.minus(podCreationTimeout + 1,
MILLIS).toString)
pvc2.getMetadata.setCreationTimestamp(now.toString)
when(persistentVolumeClaimList.getItems).thenReturn(Seq(pvc1, pvc2).asJava)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]