This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new b206304b897a [SPARK-53082][CORE][SQL][TESTS] Use Java `Files.readString` instead of `FileUtils.readFileToString` b206304b897a is described below commit b206304b897a606dc924eb70a6ed0861efd7782a Author: Dongjoon Hyun <dongj...@apache.org> AuthorDate: Sat Aug 2 23:43:53 2025 -0700 [SPARK-53082][CORE][SQL][TESTS] Use Java `Files.readString` instead of `FileUtils.readFileToString` ### What changes were proposed in this pull request? This PR aims to use Java `Files.readString` instead of `FileUtils.readFileToString`. ### Why are the changes needed? `Files.readString` is roughly 3 times faster. **BEFORE** ```scala scala> spark.time(org.apache.commons.io.FileUtils.readFileToString(new java.io.File("/tmp/500000000_byte")).length) Time taken: 347 ms ``` **AFTER** ```scala scala> spark.time(java.nio.file.Files.readString(java.nio.file.Path.of("/tmp/500000000_byte")).length) Time taken: 144 ms ``` ### Does this PR introduce _any_ user-facing change? No behavior change. ### How was this patch tested? Pass the CIs. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #51794 from dongjoon-hyun/SPARK-53082. Authored-by: Dongjoon Hyun <dongj...@apache.org> Signed-off-by: Dongjoon Hyun <dongj...@apache.org> --- core/src/test/scala/org/apache/spark/SparkFunSuite.scala | 2 +- core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala | 4 ++-- scalastyle-config.xml | 5 +++++ .../src/test/scala/org/apache/spark/sql/PlanStabilitySuite.scala | 7 +++---- .../scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala | 3 ++- 5 files changed, 13 insertions(+), 8 deletions(-) diff --git a/core/src/test/scala/org/apache/spark/SparkFunSuite.scala b/core/src/test/scala/org/apache/spark/SparkFunSuite.scala index 7bb5b9929177..0f80c5e65696 100644 --- a/core/src/test/scala/org/apache/spark/SparkFunSuite.scala +++ b/core/src/test/scala/org/apache/spark/SparkFunSuite.scala @@ -205,7 +205,7 @@ abstract class SparkFunSuite logInfo("\n\n===== EXTRA LOGS FOR THE FAILED TEST\n") workerLogfiles.foreach { logFile => logInfo(s"\n----- Logfile: ${logFile.getAbsolutePath()}") - logInfo(FileUtils.readFileToString(logFile, "UTF-8")) + logInfo(Files.readString(logFile.toPath)) } } } diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala index 43755387c9ea..c92cb4eb6be1 100644 --- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala @@ -1293,8 +1293,8 @@ class SparkSubmitSuite // The path and filename are preserved. assert(outputUri.getPath.endsWith(new Path(sourceUri).getName)) - assert(FileUtils.readFileToString(new File(outputUri.getPath), StandardCharsets.UTF_8) === - FileUtils.readFileToString(new File(sourceUri.getPath), StandardCharsets.UTF_8)) + assert(Files.readString(new File(outputUri.getPath).toPath) === + Files.readString(new File(sourceUri.getPath).toPath)) } private def deleteTempOutputFile(outputPath: String): Unit = { diff --git a/scalastyle-config.xml b/scalastyle-config.xml index c8dd063efb21..9f6fb363830c 100644 --- a/scalastyle-config.xml +++ b/scalastyle-config.xml @@ -287,6 +287,11 @@ This file is divided into 3 sections: <customMessage>Use Files.readAllLines instead.</customMessage> </check> + <check customId="readLines" level="error" class="org.scalastyle.file.RegexChecker" enabled="true"> + <parameters><parameter name="regex">FileUtils\.readFileToString</parameter></parameters> + <customMessage>Use Files.readString instead.</customMessage> + </check> + <check customId="writeLines" level="error" class="org.scalastyle.file.RegexChecker" enabled="true"> <parameters><parameter name="regex">FileUtils\.writeLines</parameter></parameters> <customMessage>Use Files.write instead.</customMessage> diff --git a/sql/core/src/test/scala/org/apache/spark/sql/PlanStabilitySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/PlanStabilitySuite.scala index 62a1c5211b77..59bc28c1c242 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/PlanStabilitySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/PlanStabilitySuite.scala @@ -104,9 +104,9 @@ trait PlanStabilitySuite extends DisableAdaptiveExecutionSuite { private def isApproved( dir: File, actualSimplifiedPlan: String, actualExplain: String): Boolean = { val simplifiedFile = new File(dir, "simplified.txt") - val expectedSimplified = FileUtils.readFileToString(simplifiedFile, StandardCharsets.UTF_8) + val expectedSimplified = Files.readString(simplifiedFile.toPath) lazy val explainFile = new File(dir, "explain.txt") - lazy val expectedExplain = FileUtils.readFileToString(explainFile, StandardCharsets.UTF_8) + lazy val expectedExplain = Files.readString(explainFile.toPath) expectedSimplified == actualSimplifiedPlan && expectedExplain == actualExplain } @@ -150,8 +150,7 @@ trait PlanStabilitySuite extends DisableAdaptiveExecutionSuite { val actualSimplifiedFile = new File(tempDir, s"$name.actual.simplified.txt") val actualExplainFile = new File(tempDir, s"$name.actual.explain.txt") - val approvedSimplified = FileUtils.readFileToString( - approvedSimplifiedFile, StandardCharsets.UTF_8) + val approvedSimplified = Files.readString(approvedSimplifiedFile.toPath) // write out for debugging Files.writeString(actualSimplifiedFile.toPath(), actualSimplified, StandardCharsets.UTF_8) Files.writeString(actualExplainFile.toPath(), explain, StandardCharsets.UTF_8) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala index a1623b9da750..1ccd19962831 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala @@ -19,6 +19,7 @@ package org.apache.spark.sql.streaming import java.io.File import java.nio.charset.StandardCharsets.UTF_8 +import java.nio.file.Files import java.util.Collections import java.util.concurrent.CountDownLatch @@ -1109,7 +1110,7 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging wi // Ideally we should copy "_spark_metadata" directly like what the user is supposed to do to // migrate to new version. However, in our test, "tempDir" will be different in each run and // we need to fix the absolute path in the metadata to match "tempDir". - val sparkMetadata = FileUtils.readFileToString(new File(legacySparkMetadataDir, "0"), UTF_8) + val sparkMetadata = Files.readString(new File(legacySparkMetadataDir, "0").toPath) FileUtils.write( new File(legacySparkMetadataDir, "0"), sparkMetadata.replaceAll("TEMPDIR", dir.getCanonicalPath), UTF_8) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org