(spark) branch master updated: [SPARK-53090][CORE][SS][TESTS] Use Java `OutputStream.write` instead of `IOUtils.write`

dongjoon Sun, 03 Aug 2025 17:10:00 -0700

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new a8e98340be51 [SPARK-53090][CORE][SS][TESTS] Use Java 
`OutputStream.write` instead of `IOUtils.write`
a8e98340be51 is described below

commit a8e98340be5189d4be7f3dfe1863faddc3e574bc
Author: Dongjoon Hyun <dongj...@apache.org>
AuthorDate: Sun Aug 3 17:09:45 2025 -0700

    [SPARK-53090][CORE][SS][TESTS] Use Java `OutputStream.write` instead of 
`IOUtils.write`
    
    ### What changes were proposed in this pull request?
    
    This PR aims to use Java `OutputStream.write` instead of `IOUtils.write`.
    
    ### Why are the changes needed?
    
    To use a better implementation for our use cases.
    
    ```scala
    scala> val s = "a".repeat(400_000_000)
    
    scala> spark.time(new 
java.io.FileOutputStream("/tmp/a").write(s.getBytes()))
    Time taken: 270 ms
    
    scala> spark.time(org.apache.commons.io.IOUtils.write(s, new 
java.io.FileOutputStream("/tmp/a")))
    Time taken: 1070 ms
    ```
    
    ### Does this PR introduce _any_ user-facing change?
    
    No.
    
    ### How was this patch tested?
    
    Pass the CIs.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    No.
    
    Closes #51804 from dongjoon-hyun/SPARK-53090.
    
    Authored-by: Dongjoon Hyun <dongj...@apache.org>
    Signed-off-by: Dongjoon Hyun <dongj...@apache.org>
---
 core/src/test/scala/org/apache/spark/util/UtilsSuite.scala           | 3 +--
 scalastyle-config.xml                                                | 5 +++++
 .../test/scala/org/apache/spark/streaming/InputStreamsSuite.scala    | 3 +--
 3 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala 
b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
index 33739e8129e5..53be7da17d8c 100644
--- a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
@@ -32,7 +32,6 @@ import scala.collection.mutable.ListBuffer
 import scala.util.{Random, Try}
 
 import com.google.common.io.Files
-import org.apache.commons.io.IOUtils
 import org.apache.commons.math3.stat.inference.ChiSquareTest
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
@@ -343,7 +342,7 @@ class UtilsSuite extends SparkFunSuite with 
ResetSystemProperties {
     } else {
       new FileOutputStream(path)
     }
-    IOUtils.write(content, outputStream)
+    outputStream.write(content)
     outputStream.close()
     content.length
   }
diff --git a/scalastyle-config.xml b/scalastyle-config.xml
index 914861a14ea8..d0e15998c36a 100644
--- a/scalastyle-config.xml
+++ b/scalastyle-config.xml
@@ -627,6 +627,11 @@ This file is divided into 3 sections:
     <customMessage>Use toString of SparkStreamUtils or Utils 
instead.</customMessage>
   </check>
 
+  <check customId="ioutilswrite" level="error" 
class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter 
name="regex">\bIOUtils\.write\b</parameter></parameters>
+    <customMessage>Use Java `write` instead.</customMessage>
+  </check>
+
   <check customId="maputils" level="error" 
class="org.scalastyle.file.RegexChecker" enabled="true">
     <parameters><parameter 
name="regex">org\.apache\.commons\.collections4\.MapUtils\b</parameter></parameters>
     <customMessage>Use org.apache.spark.util.collection.Utils 
instead.</customMessage>
diff --git 
a/streaming/src/test/scala/org/apache/spark/streaming/InputStreamsSuite.scala 
b/streaming/src/test/scala/org/apache/spark/streaming/InputStreamsSuite.scala
index 64335a96045b..70ed0a4b4bd8 100644
--- 
a/streaming/src/test/scala/org/apache/spark/streaming/InputStreamsSuite.scala
+++ 
b/streaming/src/test/scala/org/apache/spark/streaming/InputStreamsSuite.scala
@@ -27,7 +27,6 @@ import scala.collection.mutable
 import scala.jdk.CollectionConverters._
 
 import com.google.common.io.Files
-import org.apache.commons.io.IOUtils
 import org.apache.hadoop.fs.{FileSystem, Path}
 import org.apache.hadoop.io.{LongWritable, Text}
 import org.apache.hadoop.mapreduce.lib.input.TextInputFormat
@@ -264,7 +263,7 @@ class InputStreamsSuite extends TestSuiteBase with 
BeforeAndAfter {
 
         def write(path: Path, text: String): Unit = {
           val out = fs.create(path, true)
-          IOUtils.write(text, out, StandardCharsets.UTF_8)
+          out.write(text.getBytes(StandardCharsets.UTF_8))
           out.close()
         }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

(spark) branch master updated: [SPARK-53090][CORE][SS][TESTS] Use Java `OutputStream.write` instead of `IOUtils.write`

Reply via email to