This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new bb2a25ed93fc [SPARK-53080][CORE][CONNECT] Support `cleanDirectory` in `SparkFileUtils` and `JavaUtils` bb2a25ed93fc is described below commit bb2a25ed93fcb0fe5f42be777b0d5f1f75c1f450 Author: Dongjoon Hyun <dongj...@apache.org> AuthorDate: Sun Aug 3 07:44:38 2025 -0700 [SPARK-53080][CORE][CONNECT] Support `cleanDirectory` in `SparkFileUtils` and `JavaUtils` ### What changes were proposed in this pull request? This PR aims to support `cleanDirectory` in `SparkFileUtils` and `JavaUtils`. ### Why are the changes needed? To provide a better implementation. **BEFORE** ```scala scala> spark.time(org.apache.commons.io.FileUtils.cleanDirectory(new java.io.File("/tmp/spark"))) Time taken: 1731 ms ``` **AFTER** ```scala scala> spark.time(org.apache.spark.network.util.JavaUtils.cleanDirectory(new java.io.File("/tmp/spark"))) Time taken: 1247 ms ``` ### Does this PR introduce _any_ user-facing change? No behavior change. ### How was this patch tested? Pass the CIs. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #51792 from dongjoon-hyun/SPARK-53080. Authored-by: Dongjoon Hyun <dongj...@apache.org> Signed-off-by: Dongjoon Hyun <dongj...@apache.org> --- .../org/apache/spark/network/util/JavaUtils.java | 24 ++++++++++++++++++++++ .../org/apache/spark/util/SparkFileUtils.scala | 5 +++++ scalastyle-config.xml | 7 ++++++- .../org/apache/spark/sql/connect/ml/MLCache.scala | 3 +-- 4 files changed, 36 insertions(+), 3 deletions(-) diff --git a/common/utils/src/main/java/org/apache/spark/network/util/JavaUtils.java b/common/utils/src/main/java/org/apache/spark/network/util/JavaUtils.java index d84d16794d3a..6027a1d120da 100644 --- a/common/utils/src/main/java/org/apache/spark/network/util/JavaUtils.java +++ b/common/utils/src/main/java/org/apache/spark/network/util/JavaUtils.java @@ -145,6 +145,30 @@ public class JavaUtils { return size.get(); } + public static void cleanDirectory(File dir) throws IOException { + if (dir == null || !dir.exists() || !dir.isDirectory()) { + throw new IllegalArgumentException("Invalid input directory " + dir); + } + cleanDirectory(dir.toPath()); + } + + private static void cleanDirectory(Path rootDir) throws IOException { + Files.walkFileTree(rootDir, new SimpleFileVisitor<Path>() { + @Override + public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException { + Files.delete(file); + return FileVisitResult.CONTINUE; + } + + @Override + public FileVisitResult postVisitDirectory(Path dir, IOException e) throws IOException { + if (e != null) throw e; + if (!dir.equals(rootDir)) Files.delete(dir); + return FileVisitResult.CONTINUE; + } + }); + } + /** * Delete a file or directory and its contents recursively. * Don't follow directories if they are symlinks. diff --git a/common/utils/src/main/scala/org/apache/spark/util/SparkFileUtils.scala b/common/utils/src/main/scala/org/apache/spark/util/SparkFileUtils.scala index 964211a49c71..2026b9c84329 100644 --- a/common/utils/src/main/scala/org/apache/spark/util/SparkFileUtils.scala +++ b/common/utils/src/main/scala/org/apache/spark/util/SparkFileUtils.scala @@ -119,6 +119,11 @@ private[spark] trait SparkFileUtils extends Logging { createDirectory(root, namePrefix) } + /** Delete recursively while keeping the given directory itself. */ + def cleanDirectory(dir: File): Unit = { + JavaUtils.cleanDirectory(dir) + } + /** * Delete a file or directory and its contents recursively. * Don't follow directories if they are symlinks. diff --git a/scalastyle-config.xml b/scalastyle-config.xml index 3605daa081c4..190279ad229b 100644 --- a/scalastyle-config.xml +++ b/scalastyle-config.xml @@ -302,13 +302,18 @@ This file is divided into 3 sections: <customMessage>Use Files.write instead.</customMessage> </check> + <check customId="cleanDirectory" level="error" class="org.scalastyle.file.RegexChecker" enabled="true"> + <parameters><parameter name="regex"> FileUtils\.cleanDirectory</parameter></parameters> + <customMessage>Use cleanDirectory of JavaUtils/SparkFileUtils/Utils</customMessage> + </check> + <check customId="deleteRecursively" level="error" class="org.scalastyle.file.RegexChecker" enabled="true"> <parameters><parameter name="regex">FileUtils\.deleteDirectory</parameter></parameters> <customMessage>Use deleteRecursively of SparkFileUtils or Utils</customMessage> </check> <check customId="deleteQuietly" level="error" class="org.scalastyle.file.RegexChecker" enabled="true"> - <parameters><parameter name="regex">FileUtils\.deleteQuietly</parameter></parameters> + <parameters><parameter name="regex"> FileUtils\.deleteQuietly</parameter></parameters> <customMessage>Use deleteQuietly of JavaUtils/SparkFileUtils/Utils</customMessage> </check> diff --git a/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/ml/MLCache.scala b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/ml/MLCache.scala index 4c1ded31e800..f944110a54c7 100644 --- a/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/ml/MLCache.scala +++ b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/ml/MLCache.scala @@ -25,7 +25,6 @@ import java.util.concurrent.atomic.AtomicLong import scala.collection.mutable import com.google.common.cache.{CacheBuilder, RemovalNotification} -import org.apache.commons.io.FileUtils import org.apache.spark.SparkException import org.apache.spark.internal.Logging @@ -243,7 +242,7 @@ private[connect] class MLCache(sessionHolder: SessionHolder) extends Logging { val size = cachedModel.size() cachedModel.clear() if (getMemoryControlEnabled) { - FileUtils.cleanDirectory(new File(offloadedModelsDir.toString)) + SparkFileUtils.cleanDirectory(new File(offloadedModelsDir.toString)) } size } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org