This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new d2187f4b4ea1 [SPARK-53135][CORE][SQL] Support `copyURLToFile` in `SparkFileUtils` and `JavaUtils` d2187f4b4ea1 is described below commit d2187f4b4ea1fc4a5086a9657330d265b677efb5 Author: Dongjoon Hyun <dongj...@apache.org> AuthorDate: Tue Aug 5 19:06:19 2025 -0700 [SPARK-53135][CORE][SQL] Support `copyURLToFile` in `SparkFileUtils` and `JavaUtils` ### What changes were proposed in this pull request? This PR aims to support `copyURLToFile` in `SparkFileUtils` and `JavaUtils`. ### Why are the changes needed? To improve Spark string utility features. ### Does this PR introduce _any_ user-facing change? No behavior change. ### How was this patch tested? Pass the CIs. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #51861 from dongjoon-hyun/SPARK-53135. Authored-by: Dongjoon Hyun <dongj...@apache.org> Signed-off-by: Dongjoon Hyun <dongj...@apache.org> --- .../main/java/org/apache/spark/network/util/JavaUtils.java | 14 ++++++++++++++ .../main/scala/org/apache/spark/util/SparkFileUtils.scala | 6 +++++- core/src/test/scala/org/apache/spark/SparkFunSuite.scala | 3 +-- scalastyle-config.xml | 5 +++++ .../spark/sql/execution/datasources/orc/OrcTest.scala | 4 ++-- 5 files changed, 27 insertions(+), 5 deletions(-) diff --git a/common/utils/src/main/java/org/apache/spark/network/util/JavaUtils.java b/common/utils/src/main/java/org/apache/spark/network/util/JavaUtils.java index 554bd7f937af..7ee80104611c 100644 --- a/common/utils/src/main/java/org/apache/spark/network/util/JavaUtils.java +++ b/common/utils/src/main/java/org/apache/spark/network/util/JavaUtils.java @@ -18,6 +18,7 @@ package org.apache.spark.network.util; import java.io.*; +import java.net.URL; import java.nio.ByteBuffer; import java.nio.channels.ReadableByteChannel; import java.nio.charset.StandardCharsets; @@ -585,6 +586,19 @@ public class JavaUtils { } } + /** + * Copy the content of a URL into a file. + */ + public static void copyURLToFile(URL url, File file) throws IOException { + if (url == null || file == null || (file.exists() && file.isDirectory())) { + throw new IllegalArgumentException("Invalid input " + url + " or " + file); + } + Files.createDirectories(file.getParentFile().toPath()); + try (InputStream in = url.openStream()) { + Files.copy(in, file.toPath(), StandardCopyOption.REPLACE_EXISTING); + } + } + public static String join(List<Object> arr, String sep) { if (arr == null) return ""; StringJoiner joiner = new StringJoiner(sep == null ? "" : sep); diff --git a/common/utils/src/main/scala/org/apache/spark/util/SparkFileUtils.scala b/common/utils/src/main/scala/org/apache/spark/util/SparkFileUtils.scala index 8da9332baa03..7ce83474089c 100644 --- a/common/utils/src/main/scala/org/apache/spark/util/SparkFileUtils.scala +++ b/common/utils/src/main/scala/org/apache/spark/util/SparkFileUtils.scala @@ -17,7 +17,7 @@ package org.apache.spark.util import java.io.File -import java.net.{URI, URISyntaxException} +import java.net.{URI, URISyntaxException, URL} import java.nio.file.{Files, Path, StandardCopyOption} import java.nio.file.attribute.FileTime @@ -199,6 +199,10 @@ private[spark] trait SparkFileUtils extends Logging { Files.copy(src.toPath(), dst.toPath(), StandardCopyOption.REPLACE_EXISTING) } + def copyURLToFile(url: URL, file: File): Unit = { + JavaUtils.copyURLToFile(url, file) + } + /** Return true if the content of the files are equal or they both don't exist */ def contentEquals(file1: File, file2: File): Boolean = { if (file1 == null && file2 != null || file1 != null && file2 == null) { diff --git a/core/src/test/scala/org/apache/spark/SparkFunSuite.scala b/core/src/test/scala/org/apache/spark/SparkFunSuite.scala index 0f80c5e65696..3d28bde1fb2b 100644 --- a/core/src/test/scala/org/apache/spark/SparkFunSuite.scala +++ b/core/src/test/scala/org/apache/spark/SparkFunSuite.scala @@ -26,7 +26,6 @@ import scala.annotation.tailrec import scala.collection.mutable.ArrayBuffer import scala.jdk.CollectionConverters._ -import org.apache.commons.io.FileUtils import org.apache.logging.log4j._ import org.apache.logging.log4j.core.{LogEvent, Logger, LoggerContext} import org.apache.logging.log4j.core.appender.AbstractAppender @@ -126,7 +125,7 @@ abstract class SparkFunSuite // copy it into a temporary one for accessing it from the dependent module. val file = File.createTempFile("test-resource", suffix) file.deleteOnExit() - FileUtils.copyURLToFile(url, file) + Utils.copyURLToFile(url, file) file } diff --git a/scalastyle-config.xml b/scalastyle-config.xml index 5cb1b4ecfea7..d665536845b4 100644 --- a/scalastyle-config.xml +++ b/scalastyle-config.xml @@ -342,6 +342,11 @@ This file is divided into 3 sections: <customMessage>Use copyFile of JavaUtils/SparkFileUtils/Utils instead.</customMessage> </check> + <check customId="copyURLToFile" level="error" class="org.scalastyle.file.RegexChecker" enabled="true"> + <parameters><parameter name="regex">\bFileUtils\.copyURLToFile\b</parameter></parameters> + <customMessage>Use copyURLToFile of JavaUtils instead.</customMessage> + </check> + <check customId="copyFile" level="error" class="org.scalastyle.file.RegexChecker" enabled="true"> <parameters><parameter name="regex">\bFileUtils\.copyFile\b</parameter></parameters> <customMessage>Use copyFile of SparkFileUtils or Utils instead.</customMessage> diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcTest.scala index b96a61962a70..7f95c1fe85d7 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcTest.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcTest.scala @@ -22,7 +22,6 @@ import java.io.File import scala.reflect.ClassTag import scala.reflect.runtime.universe.TypeTag -import org.apache.commons.io.FileUtils import org.scalatest.BeforeAndAfterAll import org.apache.spark.sql.{Column, DataFrame, QueryTest} @@ -35,6 +34,7 @@ import org.apache.spark.sql.execution.datasources.v2.orc.OrcScan import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.internal.SQLConf.ORC_IMPLEMENTATION import org.apache.spark.util.ArrayImplicits._ +import org.apache.spark.util.Utils /** * OrcTest @@ -143,7 +143,7 @@ trait OrcTest extends QueryTest with FileBasedDataSourceTest with BeforeAndAfter // Copy to avoid URISyntaxException when `sql/hive` accesses the resources in `sql/core` val file = File.createTempFile("orc-test", ".orc") file.deleteOnExit(); - FileUtils.copyURLToFile(url, file) + Utils.copyURLToFile(url, file) spark.read.orc(file.getAbsolutePath) } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org