(spark) branch master updated: [SPARK-53185][CORE][YARN][TESTS] Use `SparkStreamUtils.toString` instead of `ByteStreams.toByteArray`

dongjoon Thu, 07 Aug 2025 17:08:51 -0700

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new 44fd3587868e [SPARK-53185][CORE][YARN][TESTS] Use 
`SparkStreamUtils.toString` instead of `ByteStreams.toByteArray`
44fd3587868e is described below

commit 44fd3587868e4735838cc184973fe5cc85259063
Author: Dongjoon Hyun <dongj...@apache.org>
AuthorDate: Thu Aug 7 17:08:33 2025 -0700

    [SPARK-53185][CORE][YARN][TESTS] Use `SparkStreamUtils.toString` instead of 
`ByteStreams.toByteArray`
    
    ### What changes were proposed in this pull request?
    
    This PR aims to `SparkStreamUtils.toString` instead of `new 
String(ByteStreams.toByteArray(...))` pattern.
    
    ```scala
    -  val decrypted = new String(ByteStreams.toByteArray(in), UTF_8)
    -  assert(content === decrypted)
    +  assert(content === Utils.toString(in))
    ```
    
    ### Why are the changes needed?
    
    To simplify the code because we already have the same code.
    
    
https://github.com/apache/spark/blob/c77f316cae6032171936587a2dba1d0a633879ae/common/utils/src/main/scala/org/apache/spark/util/SparkStreamUtils.scala#L109-L111
    
    ### Does this PR introduce _any_ user-facing change?
    
    No, this is a test-only change.
    
    ### How was this patch tested?
    
    Pass the CIs.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    No.
    
    Closes #51912 from dongjoon-hyun/SPARK-53185.
    
    Authored-by: Dongjoon Hyun <dongj...@apache.org>
    Signed-off-by: Dongjoon Hyun <dongj...@apache.org>
---
 .../org/apache/spark/deploy/history/FsHistoryProviderSuite.scala    | 4 +---
 .../scala/org/apache/spark/deploy/history/HistoryServerSuite.scala  | 6 +-----
 .../scala/org/apache/spark/security/CryptoStreamUtilsSuite.scala    | 4 ++--
 .../org/apache/spark/deploy/yarn/YarnSparkHadoopUtilSuite.scala     | 6 +++---
 4 files changed, 7 insertions(+), 13 deletions(-)

diff --git 
a/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
 
b/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
index 563fc85c8ceb..0e282822e63e 100644
--- 
a/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
+++ 
b/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
@@ -26,7 +26,6 @@ import java.util.zip.{ZipInputStream, ZipOutputStream}
 
 import scala.concurrent.duration._
 
-import com.google.common.io.ByteStreams
 import org.apache.hadoop.fs.{FileStatus, FileSystem, FSDataInputStream, Path}
 import org.apache.hadoop.hdfs.{DFSInputStream, DistributedFileSystem}
 import org.apache.hadoop.ipc.{CallerContext => HadoopCallerContext}
@@ -707,9 +706,8 @@ abstract class FsHistoryProviderSuite extends SparkFunSuite 
with Matchers with P
       var entry = inputStream.getNextEntry
       entry should not be null
       while (entry != null) {
-        val actual = new String(ByteStreams.toByteArray(inputStream), 
StandardCharsets.UTF_8)
         val expected = Files.readString(logs.find(_.getName == 
entry.getName).get.toPath)
-        actual should be (expected)
+        Utils.toString(inputStream) should be (expected)
         totalEntries += 1
         entry = inputStream.getNextEntry
       }
diff --git 
a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala 
b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
index 9d064d31672f..0a564f571521 100644
--- 
a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
+++ 
b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
@@ -18,14 +18,12 @@ package org.apache.spark.deploy.history
 
 import java.io.{File, FileInputStream, FileWriter, InputStream, IOException}
 import java.net.{HttpURLConnection, URI, URL}
-import java.nio.charset.StandardCharsets
 import java.nio.file.Files
 import java.util.zip.ZipInputStream
 
 import scala.concurrent.duration._
 import scala.jdk.CollectionConverters._
 
-import com.google.common.io.ByteStreams
 import jakarta.servlet._
 import jakarta.servlet.http.{HttpServletRequest, HttpServletRequestWrapper, 
HttpServletResponse}
 import org.apache.hadoop.fs.{FileStatus, FileSystem, Path}
@@ -309,9 +307,7 @@ abstract class HistoryServerSuite extends SparkFunSuite 
with BeforeAndAfter with
         val expectedFile = {
           new File(logDir, entry.getName)
         }
-        val expected = Files.readString(expectedFile.toPath)
-        val actual = new String(ByteStreams.toByteArray(zipStream), 
StandardCharsets.UTF_8)
-        actual should be (expected)
+        Utils.toString(zipStream) should be 
(Files.readString(expectedFile.toPath))
         filesCompared += 1
       }
       entry = zipStream.getNextEntry
diff --git 
a/core/src/test/scala/org/apache/spark/security/CryptoStreamUtilsSuite.scala 
b/core/src/test/scala/org/apache/spark/security/CryptoStreamUtilsSuite.scala
index e3171116a3e1..83a9eb1df98f 100644
--- a/core/src/test/scala/org/apache/spark/security/CryptoStreamUtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/security/CryptoStreamUtilsSuite.scala
@@ -30,6 +30,7 @@ import org.apache.spark.network.util.CryptoUtils
 import org.apache.spark.security.CryptoStreamUtils._
 import org.apache.spark.serializer.{JavaSerializer, SerializerManager}
 import org.apache.spark.storage.TempShuffleBlockId
+import org.apache.spark.util.Utils
 
 class CryptoStreamUtilsSuite extends SparkFunSuite {
 
@@ -116,8 +117,7 @@ class CryptoStreamUtilsSuite extends SparkFunSuite {
 
       val in = CryptoStreamUtils.createCryptoInputStream(new 
ByteArrayInputStream(encrypted),
         sc.conf, SparkEnv.get.securityManager.getIOEncryptionKey().get)
-      val decrypted = new String(ByteStreams.toByteArray(in), UTF_8)
-      assert(content === decrypted)
+      assert(content === Utils.toString(in))
     } finally {
       sc.stop()
     }
diff --git 
a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtilSuite.scala
 
b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtilSuite.scala
index 562ce3b5bdba..fe50d118a57e 100644
--- 
a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtilSuite.scala
+++ 
b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtilSuite.scala
@@ -20,7 +20,7 @@ package org.apache.spark.deploy.yarn
 import java.io.{File, IOException}
 import java.nio.charset.StandardCharsets
 
-import com.google.common.io.{ByteStreams, Files}
+import com.google.common.io.Files
 import org.apache.hadoop.yarn.api.records.ApplicationAccessType
 import org.apache.hadoop.yarn.conf.YarnConfiguration
 import org.scalatest.matchers.must.Matchers
@@ -58,8 +58,8 @@ class YarnSparkHadoopUtilSuite extends SparkFunSuite with 
Matchers with ResetSys
       scriptFile.setExecutable(true)
 
       val proc = Runtime.getRuntime().exec(Array(scriptFile.getAbsolutePath()))
-      val out = new 
String(ByteStreams.toByteArray(proc.getInputStream())).trim()
-      val err = new String(ByteStreams.toByteArray(proc.getErrorStream()))
+      val out = Utils.toString(proc.getInputStream()).trim()
+      val err = Utils.toString(proc.getErrorStream())
       val exitCode = proc.waitFor()
       exitCode should be (0)
       out should be (args.mkString(" "))


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

(spark) branch master updated: [SPARK-53185][CORE][YARN][TESTS] Use `SparkStreamUtils.toString` instead of `ByteStreams.toByteArray`

Reply via email to