This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new f59607995bbd [SPARK-49385][K8S] Fix `getReusablePVCs` to use
`podCreationTimeout` instead of `podAllocationDelay`
f59607995bbd is described below
commit f59607995bbd18cb881d062b65e32ff97c133107
Author: Dongjoon Hyun <[email protected]>
AuthorDate: Sun Aug 25 20:37:47 2024 -0700
[SPARK-49385][K8S] Fix `getReusablePVCs` to use `podCreationTimeout`
instead of `podAllocationDelay`
### What changes were proposed in this pull request?
This PR aims to use `podCreationTimeout` instead of `podAllocationDelay`
when `getReusablePVCs` excludes the newly created PVCs of previous batches.
### Why are the changes needed?
K8s control plane pod creation can be delayed due to the unknown reasons.
So, `podAllocationDelay (default: 1s)` is insufficient to say that the previous
allocation batch's pods are created with their PVCs. We had better wait until
`podCreationTimeout`.
### Does this PR introduce _any_ user-facing change?
This affects only the initial set of executors because the baseline is
PVC's `getCreationTimestamp`. So, this fixes only a buggy situation where a PVC
is shared by two executors due to the long pending executor pod.
### How was this patch tested?
Pass the CIs with newly updated test cases.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #47867 from dongjoon-hyun/SPARK-49385.
Authored-by: Dongjoon Hyun <[email protected]>
Signed-off-by: Dongjoon Hyun <[email protected]>
---
.../apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala | 2 +-
.../spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala | 4 ++--
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git
a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala
b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala
index ef3547fd389f..8bc6e9340871 100644
---
a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala
+++
b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala
@@ -430,7 +430,7 @@ class ExecutorPodsAllocator(
val reusablePVCs = createdPVCs
.filterNot(pvc => pvcsInUse.contains(pvc.getMetadata.getName))
.filter(pvc => now -
Instant.parse(pvc.getMetadata.getCreationTimestamp).toEpochMilli
- > podAllocationDelay)
+ > podCreationTimeout)
logInfo(log"Found ${MDC(LogKeys.COUNT, reusablePVCs.size)} reusable
PVCs from " +
log"${MDC(LogKeys.TOTAL, createdPVCs.size)} PVCs")
reusablePVCs
diff --git
a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala
b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala
index 74a56729e86d..093f5ef3bcb7 100644
---
a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala
+++
b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala
@@ -771,7 +771,7 @@ class ExecutorPodsAllocatorSuite extends SparkFunSuite with
BeforeAndAfter {
val pvc = persistentVolumeClaim("pvc-0", "gp2", "200Gi")
pvc.getMetadata
- .setCreationTimestamp(Instant.now().minus(podAllocationDelay + 1,
MILLIS).toString)
+ .setCreationTimestamp(Instant.now().minus(podCreationTimeout + 1,
MILLIS).toString)
when(persistentVolumeClaimList.getItems).thenReturn(Seq(pvc).asJava)
when(executorBuilder.buildFromFeatures(any(classOf[KubernetesExecutorConf]),
meq(secMgr),
meq(kubernetesClient), any(classOf[ResourceProfile])))
@@ -849,7 +849,7 @@ class ExecutorPodsAllocatorSuite extends SparkFunSuite with
BeforeAndAfter {
val pvc2 = persistentVolumeClaim("pvc-2", "gp2", "200Gi")
val now = Instant.now()
- pvc1.getMetadata.setCreationTimestamp(now.minus(2 * podAllocationDelay,
MILLIS).toString)
+ pvc1.getMetadata.setCreationTimestamp(now.minus(podCreationTimeout + 1,
MILLIS).toString)
pvc2.getMetadata.setCreationTimestamp(now.toString)
when(persistentVolumeClaimList.getItems).thenReturn(Seq(pvc1, pvc2).asJava)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]