(spark) branch master updated: [SPARK-53210][CORE][SQL][DSTREAM][YARN] Use Java `Files.write(String)?` instead of `com.google.common.io.Files.write`

yangjie01 Sat, 09 Aug 2025 01:08:21 -0700

This is an automated email from the ASF dual-hosted git repository.

yangjie01 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new ccff9980762f [SPARK-53210][CORE][SQL][DSTREAM][YARN] Use Java 
`Files.write(String)?` instead of `com.google.common.io.Files.write`
ccff9980762f is described below

commit ccff9980762fd83f2619532028c04cb030b632bb
Author: Dongjoon Hyun <dongj...@apache.org>
AuthorDate: Sat Aug 9 16:07:57 2025 +0800

    [SPARK-53210][CORE][SQL][DSTREAM][YARN] Use Java `Files.write(String)?` 
instead of `com.google.common.io.Files.write`
    
    ### What changes were proposed in this pull request?
    
    This PR aims to use Java `Files.write` instead of 
`com.google.common.io.Files.write`.
    
    ### Why are the changes needed?
    
    Java native API is faster than `Google` API.
    
    ```scala
    scala> val a = new Array[Byte](2_000_000_000)
    
    scala> spark.time(java.nio.file.Files.write(Path.of("/tmp/a"), a))
    Time taken: 560 ms
    
    scala> spark.time(com.google.common.io.Files.write(a, new 
java.io.File("/tmp/a")))
    Time taken: 685 ms
    ```
    
    ### Does this PR introduce _any_ user-facing change?
    
    No. This is a test-only change.
    
    ### How was this patch tested?
    
    Pass the CIs.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    No.
    
    Closes #51935 from dongjoon-hyun/SPARK-53210.
    
    Authored-by: Dongjoon Hyun <dongj...@apache.org>
    Signed-off-by: yangjie01 <yangji...@baidu.com>
---
 .../org/apache/spark/network/sasl/SparkSaslSuite.java     |  6 +++---
 .../src/test/java/test/org/apache/spark/JavaAPISuite.java |  8 ++++----
 .../apache/spark/executor/ExecutorClassLoaderSuite.scala  |  5 ++---
 .../spark/deploy/yarn/YarnSparkHadoopUtilSuite.scala      |  5 ++---
 .../scala/org/apache/spark/sql/hive/InsertSuite.scala     |  4 ++--
 .../org/apache/spark/streaming/InputStreamsSuite.scala    | 15 +++++++--------
 6 files changed, 20 insertions(+), 23 deletions(-)

diff --git 
a/common/network-common/src/test/java/org/apache/spark/network/sasl/SparkSaslSuite.java
 
b/common/network-common/src/test/java/org/apache/spark/network/sasl/SparkSaslSuite.java
index 48376e5bf2d4..715b4489edbd 100644
--- 
a/common/network-common/src/test/java/org/apache/spark/network/sasl/SparkSaslSuite.java
+++ 
b/common/network-common/src/test/java/org/apache/spark/network/sasl/SparkSaslSuite.java
@@ -23,6 +23,7 @@ import static org.mockito.Mockito.*;
 import java.io.File;
 import java.lang.reflect.Method;
 import java.nio.ByteBuffer;
+import java.nio.file.Files;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
@@ -36,7 +37,6 @@ import java.util.concurrent.atomic.AtomicReference;
 import javax.security.sasl.SaslException;
 
 import com.google.common.collect.ImmutableMap;
-import com.google.common.io.Files;
 import io.netty.buffer.ByteBuf;
 import io.netty.buffer.Unpooled;
 import io.netty.channel.Channel;
@@ -220,7 +220,7 @@ public class SparkSaslSuite {
 
       byte[] data = new byte[8 * 1024];
       new Random().nextBytes(data);
-      Files.write(data, file);
+      Files.write(file.toPath(), data);
 
       SaslEncryptionBackend backend = mock(SaslEncryptionBackend.class);
       // It doesn't really matter what we return here, as long as it's not 
null.
@@ -261,7 +261,7 @@ public class SparkSaslSuite {
 
       byte[] data = new byte[8 * 1024];
       new Random().nextBytes(data);
-      Files.write(data, file);
+      Files.write(file.toPath(), data);
 
       ctx = new SaslTestCtx(rpcHandler, true, false, testConf);
 
diff --git a/core/src/test/java/test/org/apache/spark/JavaAPISuite.java 
b/core/src/test/java/test/org/apache/spark/JavaAPISuite.java
index 57eafc38d3a6..4b5aeeec2392 100644
--- a/core/src/test/java/test/org/apache/spark/JavaAPISuite.java
+++ b/core/src/test/java/test/org/apache/spark/JavaAPISuite.java
@@ -21,6 +21,7 @@ import java.io.*;
 import java.nio.channels.FileChannel;
 import java.nio.ByteBuffer;
 import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
@@ -49,7 +50,6 @@ import com.google.common.collect.Iterables;
 import com.google.common.collect.Iterators;
 import com.google.common.collect.Lists;
 import com.google.common.base.Throwables;
-import com.google.common.io.Files;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.Text;
@@ -960,7 +960,7 @@ public class JavaAPISuite implements Serializable {
     rdd.saveAsTextFile(outputDir);
     // Read the plain text file and check it's OK
     File outputFile = new File(outputDir, "part-00000");
-    String content = java.nio.file.Files.readString(outputFile.toPath());
+    String content = Files.readString(outputFile.toPath());
     assertEquals("1\n2\n3\n4\n", content);
     // Also try reading it in as a text file RDD
     List<String> expected = Arrays.asList("1", "2", "3", "4");
@@ -977,8 +977,8 @@ public class JavaAPISuite implements Serializable {
     String path1 = new Path(tempDirName, "part-00000").toUri().getPath();
     String path2 = new Path(tempDirName, "part-00001").toUri().getPath();
 
-    Files.write(content1, new File(path1));
-    Files.write(content2, new File(path2));
+    Files.write(new File(path1).toPath(), content1);
+    Files.write(new File(path2).toPath(), content2);
 
     Map<String, String> container = new HashMap<>();
     container.put(path1, new Text(content1).toString());
diff --git 
a/core/src/test/scala/org/apache/spark/executor/ExecutorClassLoaderSuite.scala 
b/core/src/test/scala/org/apache/spark/executor/ExecutorClassLoaderSuite.scala
index 22a3ab075cf8..fbb52971960c 100644
--- 
a/core/src/test/scala/org/apache/spark/executor/ExecutorClassLoaderSuite.scala
+++ 
b/core/src/test/scala/org/apache/spark/executor/ExecutorClassLoaderSuite.scala
@@ -22,14 +22,13 @@ import java.lang.reflect.InvocationTargetException
 import java.net.{URI, URL, URLClassLoader}
 import java.nio.channels.{FileChannel, ReadableByteChannel}
 import java.nio.charset.StandardCharsets
-import java.nio.file.{Paths, StandardOpenOption}
+import java.nio.file.{Files, Paths, StandardOpenOption}
 import java.util
 import java.util.Collections
 import javax.tools.{JavaFileObject, SimpleJavaFileObject, ToolProvider}
 
 import scala.io.Source
 
-import com.google.common.io.Files
 import org.mockito.ArgumentMatchers.{any, anyString}
 import org.mockito.Mockito._
 import org.mockito.invocation.InvocationOnMock
@@ -65,7 +64,7 @@ class ExecutorClassLoaderSuite
     urls2 = List(tempDir2.toURI.toURL).toArray
     childClassNames.foreach(TestUtils.createCompiledClass(_, tempDir1, "1"))
     parentResourceNames.foreach { x =>
-      Files.write("resource".getBytes(StandardCharsets.UTF_8), new 
File(tempDir2, x))
+      Files.writeString(new File(tempDir2, x).toPath, "resource")
     }
     parentClassNames.foreach(TestUtils.createCompiledClass(_, tempDir2, "2"))
   }
diff --git 
a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtilSuite.scala
 
b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtilSuite.scala
index fe50d118a57e..a6b9caae8d36 100644
--- 
a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtilSuite.scala
+++ 
b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtilSuite.scala
@@ -18,9 +18,8 @@
 package org.apache.spark.deploy.yarn
 
 import java.io.{File, IOException}
-import java.nio.charset.StandardCharsets
+import java.nio.file.Files
 
-import com.google.common.io.Files
 import org.apache.hadoop.yarn.api.records.ApplicationAccessType
 import org.apache.hadoop.yarn.conf.YarnConfiguration
 import org.scalatest.matchers.must.Matchers
@@ -54,7 +53,7 @@ class YarnSparkHadoopUtilSuite extends SparkFunSuite with 
Matchers with ResetSys
     val args = Array("arg1", "${arg.2}", "\"arg3\"", "'arg4'", "$arg5", 
"\\arg6")
     try {
       val argLine = args.map(a => 
YarnSparkHadoopUtil.escapeForShell(a)).mkString(" ")
-      Files.write(("bash -c \"echo " + argLine + 
"\"").getBytes(StandardCharsets.UTF_8), scriptFile)
+      Files.writeString(scriptFile.toPath, "bash -c \"echo " + argLine + "\"")
       scriptFile.setExecutable(true)
 
       val proc = Runtime.getRuntime().exec(Array(scriptFile.getAbsolutePath()))
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertSuite.scala 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertSuite.scala
index 7ac2f5feb97c..f9c001a1a077 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertSuite.scala
@@ -18,9 +18,9 @@
 package org.apache.spark.sql.hive
 
 import java.io.File
+import java.nio.file.Files
 import java.util.Locale
 
-import com.google.common.io.Files
 import org.apache.hadoop.fs.Path
 import org.scalatest.BeforeAndAfter
 
@@ -824,7 +824,7 @@ class InsertSuite extends QueryTest with TestHiveSingleton 
with BeforeAndAfter
       withTempDir { dir =>
         val file = new File(dir, "test.hex")
         val hex = "AABBCC"
-        Files.write(Hex.unhex(hex), file)
+        Files.write(file.toPath, Hex.unhex(hex))
         val path = file.getParent
         sql(s"create table t1 (c string) STORED AS TEXTFILE location '$path'")
         checkAnswer(
diff --git 
a/streaming/src/test/scala/org/apache/spark/streaming/InputStreamsSuite.scala 
b/streaming/src/test/scala/org/apache/spark/streaming/InputStreamsSuite.scala
index 42d3e5eb4995..d9a084e482eb 100644
--- 
a/streaming/src/test/scala/org/apache/spark/streaming/InputStreamsSuite.scala
+++ 
b/streaming/src/test/scala/org/apache/spark/streaming/InputStreamsSuite.scala
@@ -20,14 +20,13 @@ package org.apache.spark.streaming
 import java.io.{BufferedWriter, File, OutputStreamWriter}
 import java.net.{ServerSocket, Socket, SocketException}
 import java.nio.charset.StandardCharsets
-import java.nio.file.Files.writeString
+import java.nio.file.Files
 import java.util.concurrent._
 import java.util.concurrent.atomic.AtomicInteger
 
 import scala.collection.mutable
 import scala.jdk.CollectionConverters._
 
-import com.google.common.io.Files
 import org.apache.hadoop.fs.{FileSystem, Path}
 import org.apache.hadoop.io.{LongWritable, Text}
 import org.apache.hadoop.mapreduce.lib.input.TextInputFormat
@@ -132,7 +131,7 @@ class InputStreamsSuite extends TestSuiteBase with 
BeforeAndAfter {
       val batchDuration = Seconds(2)
       // Create a file that exists before the StreamingContext is created:
       val existingFile = new File(testDir, "0")
-      writeString(existingFile.toPath, "0\n")
+      Files.writeString(existingFile.toPath, "0\n")
       assert(existingFile.setLastModified(10000) && existingFile.lastModified 
=== 10000)
 
       // Set up the streaming context and input streams
@@ -156,7 +155,7 @@ class InputStreamsSuite extends TestSuiteBase with 
BeforeAndAfter {
         for (i <- 0 until numCopies) {
           Thread.sleep(batchDuration.milliseconds)
           val file = new File(testDir, i.toString)
-          Files.write(input.map(b => (b + i).toByte), file)
+          Files.write(file.toPath, input.map(b => (b + i).toByte))
           assert(file.setLastModified(clock.getTimeMillis()))
           assert(file.lastModified === clock.getTimeMillis())
           logInfo(s"Created file $file")
@@ -191,7 +190,7 @@ class InputStreamsSuite extends TestSuiteBase with 
BeforeAndAfter {
 
       // Create a file that exists before the StreamingContext is created:
       val existingFile = new File(testDir, "0")
-      writeString(existingFile.toPath, "0\n")
+      Files.writeString(existingFile.toPath, "0\n")
       assert(existingFile.setLastModified(10000) && existingFile.lastModified 
=== 10000)
 
       val pathWithWildCard = testDir.toString + "/*/"
@@ -215,7 +214,7 @@ class InputStreamsSuite extends TestSuiteBase with 
BeforeAndAfter {
 
         def createFileAndAdvanceTime(data: Int, dir: File): Unit = {
           val file = new File(testSubDir1, data.toString)
-          writeString(file.toPath, s"$data\n")
+          Files.writeString(file.toPath, s"$data\n")
           assert(file.setLastModified(clock.getTimeMillis()))
           assert(file.lastModified === clock.getTimeMillis())
           logInfo(s"Created file $file")
@@ -478,7 +477,7 @@ class InputStreamsSuite extends TestSuiteBase with 
BeforeAndAfter {
       val batchDuration = Seconds(2)
       // Create a file that exists before the StreamingContext is created:
       val existingFile = new File(testDir, "0")
-      writeString(existingFile.toPath, "0\n")
+      Files.writeString(existingFile.toPath, "0\n")
       assert(existingFile.setLastModified(10000) && existingFile.lastModified 
=== 10000)
 
       // Set up the streaming context and input streams
@@ -502,7 +501,7 @@ class InputStreamsSuite extends TestSuiteBase with 
BeforeAndAfter {
         val input = Seq(1, 2, 3, 4, 5)
         input.foreach { i =>
           val file = new File(testDir, i.toString)
-          writeString(file.toPath, s"$i\n")
+          Files.writeString(file.toPath, s"$i\n")
           assert(file.setLastModified(clock.getTimeMillis()))
           assert(file.lastModified === clock.getTimeMillis())
           logInfo("Created file " + file)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

(spark) branch master updated: [SPARK-53210][CORE][SQL][DSTREAM][YARN] Use Java `Files.write(String)?` instead of `com.google.common.io.Files.write`

Reply via email to