This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch branch-3.4
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.4 by this push:
     new a0dd0ab7ad7f [SPARK-49385][K8S] Fix `getReusablePVCs` to use 
`podCreationTimeout` instead of `podAllocationDelay`
a0dd0ab7ad7f is described below

commit a0dd0ab7ad7fcac0e9cf04335adfaeed8be74207
Author: Dongjoon Hyun <[email protected]>
AuthorDate: Sun Aug 25 20:37:47 2024 -0700

    [SPARK-49385][K8S] Fix `getReusablePVCs` to use `podCreationTimeout` 
instead of `podAllocationDelay`
    
    This PR aims to use `podCreationTimeout` instead of `podAllocationDelay` 
when `getReusablePVCs` excludes the newly created PVCs of previous batches.
    
    K8s control plane pod creation can be delayed due to the unknown reasons. 
So, `podAllocationDelay (default: 1s)` is insufficient to say that the previous 
allocation batch's pods are created with their PVCs. We had better wait until 
`podCreationTimeout`.
    
    This affects only the initial set of executors because the baseline is 
PVC's `getCreationTimestamp`. So, this fixes only a buggy situation where a PVC 
is shared by two executors due to the long pending executor pod.
    
    Pass the CIs with newly updated test cases.
    
    No.
    
    Closes #47867 from dongjoon-hyun/SPARK-49385.
    
    Authored-by: Dongjoon Hyun <[email protected]>
    Signed-off-by: Dongjoon Hyun <[email protected]>
    (cherry picked from commit f59607995bbd18cb881d062b65e32ff97c133107)
    Signed-off-by: Dongjoon Hyun <[email protected]>
---
 .../apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala    | 2 +-
 .../spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala      | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git 
a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala
 
b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala
index 7174ba95927e..bbd594ed8b3f 100644
--- 
a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala
+++ 
b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala
@@ -394,7 +394,7 @@ class ExecutorPodsAllocator(
         val reusablePVCs = createdPVCs
           .filterNot(pvc => pvcsInUse.contains(pvc.getMetadata.getName))
           .filter(pvc => now - 
Instant.parse(pvc.getMetadata.getCreationTimestamp).toEpochMilli
-            > podAllocationDelay)
+            > podCreationTimeout)
         logInfo(s"Found ${reusablePVCs.size} reusable PVCs from 
${createdPVCs.size} PVCs")
         reusablePVCs
       } catch {
diff --git 
a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala
 
b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala
index 83946c74a701..bd3ab173fa73 100644
--- 
a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala
+++ 
b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala
@@ -728,7 +728,7 @@ class ExecutorPodsAllocatorSuite extends SparkFunSuite with 
BeforeAndAfter {
 
     val pvc = persistentVolumeClaim("pvc-0", "gp2", "200Gi")
     pvc.getMetadata
-      .setCreationTimestamp(Instant.now().minus(podAllocationDelay + 1, 
MILLIS).toString)
+      .setCreationTimestamp(Instant.now().minus(podCreationTimeout + 1, 
MILLIS).toString)
     when(persistentVolumeClaimList.getItems).thenReturn(Seq(pvc).asJava)
     
when(executorBuilder.buildFromFeatures(any(classOf[KubernetesExecutorConf]), 
meq(secMgr),
         meq(kubernetesClient), any(classOf[ResourceProfile])))
@@ -806,7 +806,7 @@ class ExecutorPodsAllocatorSuite extends SparkFunSuite with 
BeforeAndAfter {
     val pvc2 = persistentVolumeClaim("pvc-2", "gp2", "200Gi")
 
     val now = Instant.now()
-    pvc1.getMetadata.setCreationTimestamp(now.minus(2 * podAllocationDelay, 
MILLIS).toString)
+    pvc1.getMetadata.setCreationTimestamp(now.minus(podCreationTimeout + 1, 
MILLIS).toString)
     pvc2.getMetadata.setCreationTimestamp(now.toString)
 
     when(persistentVolumeClaimList.getItems).thenReturn(Seq(pvc1, pvc2).asJava)


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to