This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new a6c724ed816 [SPARK-39450][K8S] Reuse PVCs by default
a6c724ed816 is described below

commit a6c724ed816bd83cc6b3f5178b462e63b8e85972
Author: Dongjoon Hyun <[email protected]>
AuthorDate: Sat Jun 11 23:08:46 2022 -0700

    [SPARK-39450][K8S] Reuse PVCs by default
    
    ### What changes were proposed in this pull request?
    
    This PR aims to reuse PVCs by default via setting the followings.
    ```
    spark.kubernetes.driver.ownPersistentVolumeClaim=true
    spark.kubernetes.driver.reusePersistentVolumeClaim=true
    ```
    
    ### Why are the changes needed?
    
    By reusing PVCs (and PVs), we can remove the overhead of PV creation and 
removal.
    
    ### Does this PR introduce _any_ user-facing change?
    
    Yes. The migration guide is updated with this change.
    
    ### How was this patch tested?
    
    Pass the CIs.
    
    Closes #36849 from dongjoon-hyun/SPARK-39450.
    
    Authored-by: Dongjoon Hyun <[email protected]>
    Signed-off-by: Dongjoon Hyun <[email protected]>
---
 docs/core-migration-guide.md                                       | 4 ++++
 docs/running-on-kubernetes.md                                      | 4 ++--
 .../core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala   | 4 ++--
 .../spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala   | 7 ++++---
 4 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/docs/core-migration-guide.md b/docs/core-migration-guide.md
index 745b80d6eec..1a16b8f112a 100644
--- a/docs/core-migration-guide.md
+++ b/docs/core-migration-guide.md
@@ -22,6 +22,10 @@ license: |
 * Table of contents
 {:toc}
 
+## Upgrading from Core 3.3 to 3.4
+
+- Since Spark 3.4, Spark driver will own `PersistentVolumnClaim`s and try to 
reuse if they are not assigned to live executors. To restore the behavior 
before Spark 3.4, you can set 
`spark.kubernetes.driver.ownPersistentVolumeClaim` to `false` and 
`spark.kubernetes.driver.reusePersistentVolumeClaim` to `false`.
+
 ## Upgrading from Core 3.2 to 3.3
 
 - Since Spark 3.3, Spark migrates its log4j dependency from 1.x to 2.x because 
log4j 1.x has reached end of life and is no longer supported by the community. 
Vulnerabilities reported after August 2015 against log4j 1.x were not checked 
and will not be fixed. Users should rewrite original log4j properties files 
using log4j2 syntax (XML, JSON, YAML, or properties format). Spark rewrites the 
`conf/log4j.properties.template` which is included in Spark distribution, to 
`conf/log4j2.properties [...]
diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md
index 3445e22f434..d141fbd804a 100644
--- a/docs/running-on-kubernetes.md
+++ b/docs/running-on-kubernetes.md
@@ -1395,7 +1395,7 @@ See the [configuration page](configuration.html) for 
information on Spark config
 </tr>
 <tr>
   <td><code>spark.kubernetes.driver.ownPersistentVolumeClaim</code></td>
-  <td><code>false</code></td>
+  <td><code>true</code></td>
   <td>
     If true, driver pod becomes the owner of on-demand persistent volume 
claims instead of the executor pods
   </td>
@@ -1403,7 +1403,7 @@ See the [configuration page](configuration.html) for 
information on Spark config
 </tr>
 <tr>
   <td><code>spark.kubernetes.driver.reusePersistentVolumeClaim</code></td>
-  <td><code>false</code></td>
+  <td><code>true</code></td>
   <td>
     If true, driver pod tries to reuse driver-owned on-demand persistent 
volume claims
     of the deleted executor pods if exists. This can be useful to reduce 
executor pod
diff --git 
a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
 
b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
index 7930cd0ce15..168c86ecb3a 100644
--- 
a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
+++ 
b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
@@ -69,7 +69,7 @@ private[spark] object Config extends Logging {
         "instead of the executor pods")
       .version("3.2.0")
       .booleanConf
-      .createWithDefault(false)
+      .createWithDefault(true)
 
   val KUBERNETES_DRIVER_REUSE_PVC =
     ConfigBuilder("spark.kubernetes.driver.reusePersistentVolumeClaim")
@@ -83,7 +83,7 @@ private[spark] object Config extends Logging {
         s"sometimes. This config requires 
${KUBERNETES_DRIVER_OWN_PVC.key}=true.")
       .version("3.2.0")
       .booleanConf
-      .createWithDefault(false)
+      .createWithDefault(true)
 
   val KUBERNETES_NAMESPACE =
     ConfigBuilder("spark.kubernetes.namespace")
diff --git 
a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala
 
b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala
index d263bd00731..87bd8ef3d9d 100644
--- 
a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala
+++ 
b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala
@@ -117,6 +117,10 @@ class ExecutorPodsAllocatorSuite extends SparkFunSuite 
with BeforeAndAfter {
       conf, secMgr, executorBuilder, kubernetesClient, snapshotsStore, 
waitForExecutorPodsClock)
     when(schedulerBackend.getExecutorIds).thenReturn(Seq.empty)
     podsAllocatorUnderTest.start(TEST_SPARK_APP_ID, schedulerBackend)
+    
when(kubernetesClient.persistentVolumeClaims()).thenReturn(persistentVolumeClaims)
+    when(persistentVolumeClaims.withLabel(any(), 
any())).thenReturn(labeledPersistentVolumeClaims)
+    
when(labeledPersistentVolumeClaims.list()).thenReturn(persistentVolumeClaimList)
+    
when(persistentVolumeClaimList.getItems).thenReturn(Seq.empty[PersistentVolumeClaim].asJava)
   }
 
   test("SPARK-36052: test splitSlots") {
@@ -694,9 +698,6 @@ class ExecutorPodsAllocatorSuite extends SparkFunSuite with 
BeforeAndAfter {
       .set(s"$prefix.option.sizeLimit", "200Gi")
       .set(s"$prefix.option.storageClass", "gp2")
 
-    
when(kubernetesClient.persistentVolumeClaims()).thenReturn(persistentVolumeClaims)
-    when(persistentVolumeClaims.withLabel(any(), 
any())).thenReturn(labeledPersistentVolumeClaims)
-    
when(labeledPersistentVolumeClaims.list()).thenReturn(persistentVolumeClaimList)
     when(persistentVolumeClaimList.getItems)
       .thenReturn(Seq(persistentVolumeClaim("pvc-0", "gp2", "200Gi")).asJava)
     
when(executorBuilder.buildFromFeatures(any(classOf[KubernetesExecutorConf]), 
meq(secMgr),


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to