This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new d2187f4b4ea1 [SPARK-53135][CORE][SQL] Support `copyURLToFile` in 
`SparkFileUtils` and `JavaUtils`
d2187f4b4ea1 is described below

commit d2187f4b4ea1fc4a5086a9657330d265b677efb5
Author: Dongjoon Hyun <dongj...@apache.org>
AuthorDate: Tue Aug 5 19:06:19 2025 -0700

    [SPARK-53135][CORE][SQL] Support `copyURLToFile` in `SparkFileUtils` and 
`JavaUtils`
    
    ### What changes were proposed in this pull request?
    
    This PR aims to support `copyURLToFile` in `SparkFileUtils` and `JavaUtils`.
    
    ### Why are the changes needed?
    
    To improve Spark string utility features.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No behavior change.
    
    ### How was this patch tested?
    
    Pass the CIs.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    No.
    
    Closes #51861 from dongjoon-hyun/SPARK-53135.
    
    Authored-by: Dongjoon Hyun <dongj...@apache.org>
    Signed-off-by: Dongjoon Hyun <dongj...@apache.org>
---
 .../main/java/org/apache/spark/network/util/JavaUtils.java | 14 ++++++++++++++
 .../main/scala/org/apache/spark/util/SparkFileUtils.scala  |  6 +++++-
 core/src/test/scala/org/apache/spark/SparkFunSuite.scala   |  3 +--
 scalastyle-config.xml                                      |  5 +++++
 .../spark/sql/execution/datasources/orc/OrcTest.scala      |  4 ++--
 5 files changed, 27 insertions(+), 5 deletions(-)

diff --git 
a/common/utils/src/main/java/org/apache/spark/network/util/JavaUtils.java 
b/common/utils/src/main/java/org/apache/spark/network/util/JavaUtils.java
index 554bd7f937af..7ee80104611c 100644
--- a/common/utils/src/main/java/org/apache/spark/network/util/JavaUtils.java
+++ b/common/utils/src/main/java/org/apache/spark/network/util/JavaUtils.java
@@ -18,6 +18,7 @@
 package org.apache.spark.network.util;
 
 import java.io.*;
+import java.net.URL;
 import java.nio.ByteBuffer;
 import java.nio.channels.ReadableByteChannel;
 import java.nio.charset.StandardCharsets;
@@ -585,6 +586,19 @@ public class JavaUtils {
     }
   }
 
+  /**
+   * Copy the content of a URL into a file.
+   */
+  public static void copyURLToFile(URL url, File file) throws IOException {
+    if (url == null || file == null || (file.exists() && file.isDirectory())) {
+      throw new IllegalArgumentException("Invalid input " + url + " or " + 
file);
+    }
+    Files.createDirectories(file.getParentFile().toPath());
+    try (InputStream in = url.openStream()) {
+      Files.copy(in, file.toPath(), StandardCopyOption.REPLACE_EXISTING);
+    }
+  }
+
   public static String join(List<Object> arr, String sep) {
     if (arr == null) return "";
     StringJoiner joiner = new StringJoiner(sep == null ? "" : sep);
diff --git 
a/common/utils/src/main/scala/org/apache/spark/util/SparkFileUtils.scala 
b/common/utils/src/main/scala/org/apache/spark/util/SparkFileUtils.scala
index 8da9332baa03..7ce83474089c 100644
--- a/common/utils/src/main/scala/org/apache/spark/util/SparkFileUtils.scala
+++ b/common/utils/src/main/scala/org/apache/spark/util/SparkFileUtils.scala
@@ -17,7 +17,7 @@
 package org.apache.spark.util
 
 import java.io.File
-import java.net.{URI, URISyntaxException}
+import java.net.{URI, URISyntaxException, URL}
 import java.nio.file.{Files, Path, StandardCopyOption}
 import java.nio.file.attribute.FileTime
 
@@ -199,6 +199,10 @@ private[spark] trait SparkFileUtils extends Logging {
     Files.copy(src.toPath(), dst.toPath(), StandardCopyOption.REPLACE_EXISTING)
   }
 
+  def copyURLToFile(url: URL, file: File): Unit = {
+    JavaUtils.copyURLToFile(url, file)
+  }
+
   /** Return true if the content of the files are equal or they both don't 
exist */
   def contentEquals(file1: File, file2: File): Boolean = {
     if (file1 == null && file2 != null || file1 != null && file2 == null) {
diff --git a/core/src/test/scala/org/apache/spark/SparkFunSuite.scala 
b/core/src/test/scala/org/apache/spark/SparkFunSuite.scala
index 0f80c5e65696..3d28bde1fb2b 100644
--- a/core/src/test/scala/org/apache/spark/SparkFunSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkFunSuite.scala
@@ -26,7 +26,6 @@ import scala.annotation.tailrec
 import scala.collection.mutable.ArrayBuffer
 import scala.jdk.CollectionConverters._
 
-import org.apache.commons.io.FileUtils
 import org.apache.logging.log4j._
 import org.apache.logging.log4j.core.{LogEvent, Logger, LoggerContext}
 import org.apache.logging.log4j.core.appender.AbstractAppender
@@ -126,7 +125,7 @@ abstract class SparkFunSuite
     // copy it into a temporary one for accessing it from the dependent module.
     val file = File.createTempFile("test-resource", suffix)
     file.deleteOnExit()
-    FileUtils.copyURLToFile(url, file)
+    Utils.copyURLToFile(url, file)
     file
   }
 
diff --git a/scalastyle-config.xml b/scalastyle-config.xml
index 5cb1b4ecfea7..d665536845b4 100644
--- a/scalastyle-config.xml
+++ b/scalastyle-config.xml
@@ -342,6 +342,11 @@ This file is divided into 3 sections:
     <customMessage>Use copyFile of JavaUtils/SparkFileUtils/Utils 
instead.</customMessage>
   </check>
 
+  <check customId="copyURLToFile" level="error" 
class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter 
name="regex">\bFileUtils\.copyURLToFile\b</parameter></parameters>
+    <customMessage>Use copyURLToFile of JavaUtils instead.</customMessage>
+  </check>
+
   <check customId="copyFile" level="error" 
class="org.scalastyle.file.RegexChecker" enabled="true">
     <parameters><parameter 
name="regex">\bFileUtils\.copyFile\b</parameter></parameters>
     <customMessage>Use copyFile of SparkFileUtils or Utils 
instead.</customMessage>
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcTest.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcTest.scala
index b96a61962a70..7f95c1fe85d7 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcTest.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcTest.scala
@@ -22,7 +22,6 @@ import java.io.File
 import scala.reflect.ClassTag
 import scala.reflect.runtime.universe.TypeTag
 
-import org.apache.commons.io.FileUtils
 import org.scalatest.BeforeAndAfterAll
 
 import org.apache.spark.sql.{Column, DataFrame, QueryTest}
@@ -35,6 +34,7 @@ import 
org.apache.spark.sql.execution.datasources.v2.orc.OrcScan
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.SQLConf.ORC_IMPLEMENTATION
 import org.apache.spark.util.ArrayImplicits._
+import org.apache.spark.util.Utils
 
 /**
  * OrcTest
@@ -143,7 +143,7 @@ trait OrcTest extends QueryTest with 
FileBasedDataSourceTest with BeforeAndAfter
     // Copy to avoid URISyntaxException when `sql/hive` accesses the resources 
in `sql/core`
     val file = File.createTempFile("orc-test", ".orc")
     file.deleteOnExit();
-    FileUtils.copyURLToFile(url, file)
+    Utils.copyURLToFile(url, file)
     spark.read.orc(file.getAbsolutePath)
   }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to