This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new bb2a25ed93fc [SPARK-53080][CORE][CONNECT] Support `cleanDirectory` in 
`SparkFileUtils` and `JavaUtils`
bb2a25ed93fc is described below

commit bb2a25ed93fcb0fe5f42be777b0d5f1f75c1f450
Author: Dongjoon Hyun <dongj...@apache.org>
AuthorDate: Sun Aug 3 07:44:38 2025 -0700

    [SPARK-53080][CORE][CONNECT] Support `cleanDirectory` in `SparkFileUtils` 
and `JavaUtils`
    
    ### What changes were proposed in this pull request?
    
    This PR aims to support `cleanDirectory` in `SparkFileUtils` and 
`JavaUtils`.
    
    ### Why are the changes needed?
    
    To provide a better implementation.
    
    **BEFORE**
    
    ```scala
    scala> spark.time(org.apache.commons.io.FileUtils.cleanDirectory(new 
java.io.File("/tmp/spark")))
    Time taken: 1731 ms
    ```
    
    **AFTER**
    
    ```scala
    scala> 
spark.time(org.apache.spark.network.util.JavaUtils.cleanDirectory(new 
java.io.File("/tmp/spark")))
    Time taken: 1247 ms
    ```
    
    ### Does this PR introduce _any_ user-facing change?
    
    No behavior change.
    
    ### How was this patch tested?
    
    Pass the CIs.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    No.
    
    Closes #51792 from dongjoon-hyun/SPARK-53080.
    
    Authored-by: Dongjoon Hyun <dongj...@apache.org>
    Signed-off-by: Dongjoon Hyun <dongj...@apache.org>
---
 .../org/apache/spark/network/util/JavaUtils.java   | 24 ++++++++++++++++++++++
 .../org/apache/spark/util/SparkFileUtils.scala     |  5 +++++
 scalastyle-config.xml                              |  7 ++++++-
 .../org/apache/spark/sql/connect/ml/MLCache.scala  |  3 +--
 4 files changed, 36 insertions(+), 3 deletions(-)

diff --git 
a/common/utils/src/main/java/org/apache/spark/network/util/JavaUtils.java 
b/common/utils/src/main/java/org/apache/spark/network/util/JavaUtils.java
index d84d16794d3a..6027a1d120da 100644
--- a/common/utils/src/main/java/org/apache/spark/network/util/JavaUtils.java
+++ b/common/utils/src/main/java/org/apache/spark/network/util/JavaUtils.java
@@ -145,6 +145,30 @@ public class JavaUtils {
     return size.get();
   }
 
+  public static void cleanDirectory(File dir) throws IOException {
+    if (dir == null || !dir.exists() || !dir.isDirectory()) {
+      throw new IllegalArgumentException("Invalid input directory " + dir);
+    }
+    cleanDirectory(dir.toPath());
+  }
+
+  private static void cleanDirectory(Path rootDir) throws IOException {
+    Files.walkFileTree(rootDir, new SimpleFileVisitor<Path>() {
+      @Override
+      public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) 
throws IOException {
+        Files.delete(file);
+        return FileVisitResult.CONTINUE;
+      }
+
+      @Override
+      public FileVisitResult postVisitDirectory(Path dir, IOException e) 
throws IOException {
+        if (e != null) throw e;
+        if (!dir.equals(rootDir)) Files.delete(dir);
+        return FileVisitResult.CONTINUE;
+      }
+    });
+  }
+
   /**
    * Delete a file or directory and its contents recursively.
    * Don't follow directories if they are symlinks.
diff --git 
a/common/utils/src/main/scala/org/apache/spark/util/SparkFileUtils.scala 
b/common/utils/src/main/scala/org/apache/spark/util/SparkFileUtils.scala
index 964211a49c71..2026b9c84329 100644
--- a/common/utils/src/main/scala/org/apache/spark/util/SparkFileUtils.scala
+++ b/common/utils/src/main/scala/org/apache/spark/util/SparkFileUtils.scala
@@ -119,6 +119,11 @@ private[spark] trait SparkFileUtils extends Logging {
     createDirectory(root, namePrefix)
   }
 
+  /** Delete recursively while keeping the given directory itself. */
+  def cleanDirectory(dir: File): Unit = {
+    JavaUtils.cleanDirectory(dir)
+  }
+
   /**
    * Delete a file or directory and its contents recursively.
    * Don't follow directories if they are symlinks.
diff --git a/scalastyle-config.xml b/scalastyle-config.xml
index 3605daa081c4..190279ad229b 100644
--- a/scalastyle-config.xml
+++ b/scalastyle-config.xml
@@ -302,13 +302,18 @@ This file is divided into 3 sections:
     <customMessage>Use Files.write instead.</customMessage>
   </check>
 
+  <check customId="cleanDirectory" level="error" 
class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter name="regex"> 
FileUtils\.cleanDirectory</parameter></parameters>
+    <customMessage>Use cleanDirectory of 
JavaUtils/SparkFileUtils/Utils</customMessage>
+  </check>
+
   <check customId="deleteRecursively" level="error" 
class="org.scalastyle.file.RegexChecker" enabled="true">
     <parameters><parameter 
name="regex">FileUtils\.deleteDirectory</parameter></parameters>
     <customMessage>Use deleteRecursively of SparkFileUtils or 
Utils</customMessage>
   </check>
 
   <check customId="deleteQuietly" level="error" 
class="org.scalastyle.file.RegexChecker" enabled="true">
-    <parameters><parameter 
name="regex">FileUtils\.deleteQuietly</parameter></parameters>
+    <parameters><parameter name="regex"> 
FileUtils\.deleteQuietly</parameter></parameters>
     <customMessage>Use deleteQuietly of 
JavaUtils/SparkFileUtils/Utils</customMessage>
   </check>
 
diff --git 
a/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/ml/MLCache.scala
 
b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/ml/MLCache.scala
index 4c1ded31e800..f944110a54c7 100644
--- 
a/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/ml/MLCache.scala
+++ 
b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/ml/MLCache.scala
@@ -25,7 +25,6 @@ import java.util.concurrent.atomic.AtomicLong
 import scala.collection.mutable
 
 import com.google.common.cache.{CacheBuilder, RemovalNotification}
-import org.apache.commons.io.FileUtils
 
 import org.apache.spark.SparkException
 import org.apache.spark.internal.Logging
@@ -243,7 +242,7 @@ private[connect] class MLCache(sessionHolder: 
SessionHolder) extends Logging {
     val size = cachedModel.size()
     cachedModel.clear()
     if (getMemoryControlEnabled) {
-      FileUtils.cleanDirectory(new File(offloadedModelsDir.toString))
+      SparkFileUtils.cleanDirectory(new File(offloadedModelsDir.toString))
     }
     size
   }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to