This is an automated email from the ASF dual-hosted git repository. yangjie01 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new ccff9980762f [SPARK-53210][CORE][SQL][DSTREAM][YARN] Use Java `Files.write(String)?` instead of `com.google.common.io.Files.write` ccff9980762f is described below commit ccff9980762fd83f2619532028c04cb030b632bb Author: Dongjoon Hyun <dongj...@apache.org> AuthorDate: Sat Aug 9 16:07:57 2025 +0800 [SPARK-53210][CORE][SQL][DSTREAM][YARN] Use Java `Files.write(String)?` instead of `com.google.common.io.Files.write` ### What changes were proposed in this pull request? This PR aims to use Java `Files.write` instead of `com.google.common.io.Files.write`. ### Why are the changes needed? Java native API is faster than `Google` API. ```scala scala> val a = new Array[Byte](2_000_000_000) scala> spark.time(java.nio.file.Files.write(Path.of("/tmp/a"), a)) Time taken: 560 ms scala> spark.time(com.google.common.io.Files.write(a, new java.io.File("/tmp/a"))) Time taken: 685 ms ``` ### Does this PR introduce _any_ user-facing change? No. This is a test-only change. ### How was this patch tested? Pass the CIs. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #51935 from dongjoon-hyun/SPARK-53210. Authored-by: Dongjoon Hyun <dongj...@apache.org> Signed-off-by: yangjie01 <yangji...@baidu.com> --- .../org/apache/spark/network/sasl/SparkSaslSuite.java | 6 +++--- .../src/test/java/test/org/apache/spark/JavaAPISuite.java | 8 ++++---- .../apache/spark/executor/ExecutorClassLoaderSuite.scala | 5 ++--- .../spark/deploy/yarn/YarnSparkHadoopUtilSuite.scala | 5 ++--- .../scala/org/apache/spark/sql/hive/InsertSuite.scala | 4 ++-- .../org/apache/spark/streaming/InputStreamsSuite.scala | 15 +++++++-------- 6 files changed, 20 insertions(+), 23 deletions(-) diff --git a/common/network-common/src/test/java/org/apache/spark/network/sasl/SparkSaslSuite.java b/common/network-common/src/test/java/org/apache/spark/network/sasl/SparkSaslSuite.java index 48376e5bf2d4..715b4489edbd 100644 --- a/common/network-common/src/test/java/org/apache/spark/network/sasl/SparkSaslSuite.java +++ b/common/network-common/src/test/java/org/apache/spark/network/sasl/SparkSaslSuite.java @@ -23,6 +23,7 @@ import static org.mockito.Mockito.*; import java.io.File; import java.lang.reflect.Method; import java.nio.ByteBuffer; +import java.nio.file.Files; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -36,7 +37,6 @@ import java.util.concurrent.atomic.AtomicReference; import javax.security.sasl.SaslException; import com.google.common.collect.ImmutableMap; -import com.google.common.io.Files; import io.netty.buffer.ByteBuf; import io.netty.buffer.Unpooled; import io.netty.channel.Channel; @@ -220,7 +220,7 @@ public class SparkSaslSuite { byte[] data = new byte[8 * 1024]; new Random().nextBytes(data); - Files.write(data, file); + Files.write(file.toPath(), data); SaslEncryptionBackend backend = mock(SaslEncryptionBackend.class); // It doesn't really matter what we return here, as long as it's not null. @@ -261,7 +261,7 @@ public class SparkSaslSuite { byte[] data = new byte[8 * 1024]; new Random().nextBytes(data); - Files.write(data, file); + Files.write(file.toPath(), data); ctx = new SaslTestCtx(rpcHandler, true, false, testConf); diff --git a/core/src/test/java/test/org/apache/spark/JavaAPISuite.java b/core/src/test/java/test/org/apache/spark/JavaAPISuite.java index 57eafc38d3a6..4b5aeeec2392 100644 --- a/core/src/test/java/test/org/apache/spark/JavaAPISuite.java +++ b/core/src/test/java/test/org/apache/spark/JavaAPISuite.java @@ -21,6 +21,7 @@ import java.io.*; import java.nio.channels.FileChannel; import java.nio.ByteBuffer; import java.nio.charset.StandardCharsets; +import java.nio.file.Files; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -49,7 +50,6 @@ import com.google.common.collect.Iterables; import com.google.common.collect.Iterators; import com.google.common.collect.Lists; import com.google.common.base.Throwables; -import com.google.common.io.Files; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; @@ -960,7 +960,7 @@ public class JavaAPISuite implements Serializable { rdd.saveAsTextFile(outputDir); // Read the plain text file and check it's OK File outputFile = new File(outputDir, "part-00000"); - String content = java.nio.file.Files.readString(outputFile.toPath()); + String content = Files.readString(outputFile.toPath()); assertEquals("1\n2\n3\n4\n", content); // Also try reading it in as a text file RDD List<String> expected = Arrays.asList("1", "2", "3", "4"); @@ -977,8 +977,8 @@ public class JavaAPISuite implements Serializable { String path1 = new Path(tempDirName, "part-00000").toUri().getPath(); String path2 = new Path(tempDirName, "part-00001").toUri().getPath(); - Files.write(content1, new File(path1)); - Files.write(content2, new File(path2)); + Files.write(new File(path1).toPath(), content1); + Files.write(new File(path2).toPath(), content2); Map<String, String> container = new HashMap<>(); container.put(path1, new Text(content1).toString()); diff --git a/core/src/test/scala/org/apache/spark/executor/ExecutorClassLoaderSuite.scala b/core/src/test/scala/org/apache/spark/executor/ExecutorClassLoaderSuite.scala index 22a3ab075cf8..fbb52971960c 100644 --- a/core/src/test/scala/org/apache/spark/executor/ExecutorClassLoaderSuite.scala +++ b/core/src/test/scala/org/apache/spark/executor/ExecutorClassLoaderSuite.scala @@ -22,14 +22,13 @@ import java.lang.reflect.InvocationTargetException import java.net.{URI, URL, URLClassLoader} import java.nio.channels.{FileChannel, ReadableByteChannel} import java.nio.charset.StandardCharsets -import java.nio.file.{Paths, StandardOpenOption} +import java.nio.file.{Files, Paths, StandardOpenOption} import java.util import java.util.Collections import javax.tools.{JavaFileObject, SimpleJavaFileObject, ToolProvider} import scala.io.Source -import com.google.common.io.Files import org.mockito.ArgumentMatchers.{any, anyString} import org.mockito.Mockito._ import org.mockito.invocation.InvocationOnMock @@ -65,7 +64,7 @@ class ExecutorClassLoaderSuite urls2 = List(tempDir2.toURI.toURL).toArray childClassNames.foreach(TestUtils.createCompiledClass(_, tempDir1, "1")) parentResourceNames.foreach { x => - Files.write("resource".getBytes(StandardCharsets.UTF_8), new File(tempDir2, x)) + Files.writeString(new File(tempDir2, x).toPath, "resource") } parentClassNames.foreach(TestUtils.createCompiledClass(_, tempDir2, "2")) } diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtilSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtilSuite.scala index fe50d118a57e..a6b9caae8d36 100644 --- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtilSuite.scala +++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtilSuite.scala @@ -18,9 +18,8 @@ package org.apache.spark.deploy.yarn import java.io.{File, IOException} -import java.nio.charset.StandardCharsets +import java.nio.file.Files -import com.google.common.io.Files import org.apache.hadoop.yarn.api.records.ApplicationAccessType import org.apache.hadoop.yarn.conf.YarnConfiguration import org.scalatest.matchers.must.Matchers @@ -54,7 +53,7 @@ class YarnSparkHadoopUtilSuite extends SparkFunSuite with Matchers with ResetSys val args = Array("arg1", "${arg.2}", "\"arg3\"", "'arg4'", "$arg5", "\\arg6") try { val argLine = args.map(a => YarnSparkHadoopUtil.escapeForShell(a)).mkString(" ") - Files.write(("bash -c \"echo " + argLine + "\"").getBytes(StandardCharsets.UTF_8), scriptFile) + Files.writeString(scriptFile.toPath, "bash -c \"echo " + argLine + "\"") scriptFile.setExecutable(true) val proc = Runtime.getRuntime().exec(Array(scriptFile.getAbsolutePath())) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertSuite.scala index 7ac2f5feb97c..f9c001a1a077 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertSuite.scala @@ -18,9 +18,9 @@ package org.apache.spark.sql.hive import java.io.File +import java.nio.file.Files import java.util.Locale -import com.google.common.io.Files import org.apache.hadoop.fs.Path import org.scalatest.BeforeAndAfter @@ -824,7 +824,7 @@ class InsertSuite extends QueryTest with TestHiveSingleton with BeforeAndAfter withTempDir { dir => val file = new File(dir, "test.hex") val hex = "AABBCC" - Files.write(Hex.unhex(hex), file) + Files.write(file.toPath, Hex.unhex(hex)) val path = file.getParent sql(s"create table t1 (c string) STORED AS TEXTFILE location '$path'") checkAnswer( diff --git a/streaming/src/test/scala/org/apache/spark/streaming/InputStreamsSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/InputStreamsSuite.scala index 42d3e5eb4995..d9a084e482eb 100644 --- a/streaming/src/test/scala/org/apache/spark/streaming/InputStreamsSuite.scala +++ b/streaming/src/test/scala/org/apache/spark/streaming/InputStreamsSuite.scala @@ -20,14 +20,13 @@ package org.apache.spark.streaming import java.io.{BufferedWriter, File, OutputStreamWriter} import java.net.{ServerSocket, Socket, SocketException} import java.nio.charset.StandardCharsets -import java.nio.file.Files.writeString +import java.nio.file.Files import java.util.concurrent._ import java.util.concurrent.atomic.AtomicInteger import scala.collection.mutable import scala.jdk.CollectionConverters._ -import com.google.common.io.Files import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.hadoop.io.{LongWritable, Text} import org.apache.hadoop.mapreduce.lib.input.TextInputFormat @@ -132,7 +131,7 @@ class InputStreamsSuite extends TestSuiteBase with BeforeAndAfter { val batchDuration = Seconds(2) // Create a file that exists before the StreamingContext is created: val existingFile = new File(testDir, "0") - writeString(existingFile.toPath, "0\n") + Files.writeString(existingFile.toPath, "0\n") assert(existingFile.setLastModified(10000) && existingFile.lastModified === 10000) // Set up the streaming context and input streams @@ -156,7 +155,7 @@ class InputStreamsSuite extends TestSuiteBase with BeforeAndAfter { for (i <- 0 until numCopies) { Thread.sleep(batchDuration.milliseconds) val file = new File(testDir, i.toString) - Files.write(input.map(b => (b + i).toByte), file) + Files.write(file.toPath, input.map(b => (b + i).toByte)) assert(file.setLastModified(clock.getTimeMillis())) assert(file.lastModified === clock.getTimeMillis()) logInfo(s"Created file $file") @@ -191,7 +190,7 @@ class InputStreamsSuite extends TestSuiteBase with BeforeAndAfter { // Create a file that exists before the StreamingContext is created: val existingFile = new File(testDir, "0") - writeString(existingFile.toPath, "0\n") + Files.writeString(existingFile.toPath, "0\n") assert(existingFile.setLastModified(10000) && existingFile.lastModified === 10000) val pathWithWildCard = testDir.toString + "/*/" @@ -215,7 +214,7 @@ class InputStreamsSuite extends TestSuiteBase with BeforeAndAfter { def createFileAndAdvanceTime(data: Int, dir: File): Unit = { val file = new File(testSubDir1, data.toString) - writeString(file.toPath, s"$data\n") + Files.writeString(file.toPath, s"$data\n") assert(file.setLastModified(clock.getTimeMillis())) assert(file.lastModified === clock.getTimeMillis()) logInfo(s"Created file $file") @@ -478,7 +477,7 @@ class InputStreamsSuite extends TestSuiteBase with BeforeAndAfter { val batchDuration = Seconds(2) // Create a file that exists before the StreamingContext is created: val existingFile = new File(testDir, "0") - writeString(existingFile.toPath, "0\n") + Files.writeString(existingFile.toPath, "0\n") assert(existingFile.setLastModified(10000) && existingFile.lastModified === 10000) // Set up the streaming context and input streams @@ -502,7 +501,7 @@ class InputStreamsSuite extends TestSuiteBase with BeforeAndAfter { val input = Seq(1, 2, 3, 4, 5) input.foreach { i => val file = new File(testDir, i.toString) - writeString(file.toPath, s"$i\n") + Files.writeString(file.toPath, s"$i\n") assert(file.setLastModified(clock.getTimeMillis())) assert(file.lastModified === clock.getTimeMillis()) logInfo("Created file " + file) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org