This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new f59607995bbd [SPARK-49385][K8S] Fix `getReusablePVCs` to use 
`podCreationTimeout` instead of `podAllocationDelay`
f59607995bbd is described below

commit f59607995bbd18cb881d062b65e32ff97c133107
Author: Dongjoon Hyun <[email protected]>
AuthorDate: Sun Aug 25 20:37:47 2024 -0700

    [SPARK-49385][K8S] Fix `getReusablePVCs` to use `podCreationTimeout` 
instead of `podAllocationDelay`
    
    ### What changes were proposed in this pull request?
    
    This PR aims to use `podCreationTimeout` instead of `podAllocationDelay` 
when `getReusablePVCs` excludes the newly created PVCs of previous batches.
    
    ### Why are the changes needed?
    
    K8s control plane pod creation can be delayed due to the unknown reasons. 
So, `podAllocationDelay (default: 1s)` is insufficient to say that the previous 
allocation batch's pods are created with their PVCs. We had better wait until 
`podCreationTimeout`.
    
    ### Does this PR introduce _any_ user-facing change?
    
    This affects only the initial set of executors because the baseline is 
PVC's `getCreationTimestamp`. So, this fixes only a buggy situation where a PVC 
is shared by two executors due to the long pending executor pod.
    
    ### How was this patch tested?
    
    Pass the CIs with newly updated test cases.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    No.
    
    Closes #47867 from dongjoon-hyun/SPARK-49385.
    
    Authored-by: Dongjoon Hyun <[email protected]>
    Signed-off-by: Dongjoon Hyun <[email protected]>
---
 .../apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala    | 2 +-
 .../spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala      | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git 
a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala
 
b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala
index ef3547fd389f..8bc6e9340871 100644
--- 
a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala
+++ 
b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala
@@ -430,7 +430,7 @@ class ExecutorPodsAllocator(
         val reusablePVCs = createdPVCs
           .filterNot(pvc => pvcsInUse.contains(pvc.getMetadata.getName))
           .filter(pvc => now - 
Instant.parse(pvc.getMetadata.getCreationTimestamp).toEpochMilli
-            > podAllocationDelay)
+            > podCreationTimeout)
         logInfo(log"Found ${MDC(LogKeys.COUNT, reusablePVCs.size)} reusable 
PVCs from " +
           log"${MDC(LogKeys.TOTAL, createdPVCs.size)} PVCs")
         reusablePVCs
diff --git 
a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala
 
b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala
index 74a56729e86d..093f5ef3bcb7 100644
--- 
a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala
+++ 
b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala
@@ -771,7 +771,7 @@ class ExecutorPodsAllocatorSuite extends SparkFunSuite with 
BeforeAndAfter {
 
     val pvc = persistentVolumeClaim("pvc-0", "gp2", "200Gi")
     pvc.getMetadata
-      .setCreationTimestamp(Instant.now().minus(podAllocationDelay + 1, 
MILLIS).toString)
+      .setCreationTimestamp(Instant.now().minus(podCreationTimeout + 1, 
MILLIS).toString)
     when(persistentVolumeClaimList.getItems).thenReturn(Seq(pvc).asJava)
     
when(executorBuilder.buildFromFeatures(any(classOf[KubernetesExecutorConf]), 
meq(secMgr),
         meq(kubernetesClient), any(classOf[ResourceProfile])))
@@ -849,7 +849,7 @@ class ExecutorPodsAllocatorSuite extends SparkFunSuite with 
BeforeAndAfter {
     val pvc2 = persistentVolumeClaim("pvc-2", "gp2", "200Gi")
 
     val now = Instant.now()
-    pvc1.getMetadata.setCreationTimestamp(now.minus(2 * podAllocationDelay, 
MILLIS).toString)
+    pvc1.getMetadata.setCreationTimestamp(now.minus(podCreationTimeout + 1, 
MILLIS).toString)
     pvc2.getMetadata.setCreationTimestamp(now.toString)
 
     when(persistentVolumeClaimList.getItems).thenReturn(Seq(pvc1, pvc2).asJava)


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to