(spark) branch master updated: [SPARK-53246][TEST] remove class files for ReplSuite

wenchen Tue, 12 Aug 2025 07:34:48 -0700

This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new 554f6b64f1e2 [SPARK-53246][TEST] remove class files for ReplSuite
554f6b64f1e2 is described below

commit 554f6b64f1e2b2346499f6d3340a3695244bfc84
Author: Wenchen Fan <wenc...@databricks.com>
AuthorDate: Tue Aug 12 22:34:08 2025 +0800

    [SPARK-53246][TEST] remove class files for ReplSuite
    
    ### What changes were proposed in this pull request?
    
    Keeping compiled class files in the Spark repo has a security concern. This 
PR removes one class file for `ReplSuite` and generates it dynamically.
    
    ### Why are the changes needed?
    
    test cleanup
    
    ### Does this PR introduce _any_ user-facing change?
    
    no
    
    ### How was this patch tested?
    
    N/A
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    no
    
    Closes #51974 from cloud-fan/minor.
    
    Authored-by: Wenchen Fan <wenc...@databricks.com>
    Signed-off-by: Wenchen Fan <wenc...@databricks.com>
---
 dev/test-classes.txt                               |   1 -
 repl/src/test/resources/IntSumUdf.class            | Bin 1333 -> 0 bytes
 .../org/apache/spark/repl}/IntSumUdf.scala         |   2 +
 .../scala/org/apache/spark/repl/ReplSuite.scala    | 126 +++++++++++----------
 4 files changed, 69 insertions(+), 60 deletions(-)

diff --git a/dev/test-classes.txt b/dev/test-classes.txt
index 5315c970c5ba..2dc6b290ad4f 100644
--- a/dev/test-classes.txt
+++ b/dev/test-classes.txt
@@ -1,4 +1,3 @@
-repl/src/test/resources/IntSumUdf.class
 sql/core/src/test/resources/artifact-tests/Hello.class
 sql/core/src/test/resources/artifact-tests/IntSumUdf.class
 sql/core/src/test/resources/artifact-tests/smallClassFile.class
diff --git a/repl/src/test/resources/IntSumUdf.class 
b/repl/src/test/resources/IntSumUdf.class
deleted file mode 100644
index 75a41446cfca..000000000000
Binary files a/repl/src/test/resources/IntSumUdf.class and /dev/null differ
diff --git a/repl/src/test/resources/IntSumUdf.scala 
b/repl/src/test/scala/org/apache/spark/repl/IntSumUdf.scala
similarity index 96%
rename from repl/src/test/resources/IntSumUdf.scala
rename to repl/src/test/scala/org/apache/spark/repl/IntSumUdf.scala
index 9678caaed5db..9f2767127765 100644
--- a/repl/src/test/resources/IntSumUdf.scala
+++ b/repl/src/test/scala/org/apache/spark/repl/IntSumUdf.scala
@@ -15,6 +15,8 @@
  * limitations under the License.
  */
 
+package org.apache.spark.repl
+
 import org.apache.spark.sql.api.java.UDF2
 
 class IntSumUdf extends UDF2[Long, Long, Long] {
diff --git a/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala 
b/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala
index 02555fdd1535..4471f93840e2 100644
--- a/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala
+++ b/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala
@@ -398,68 +398,76 @@ class ReplSuite extends SparkFunSuite {
   }
 
   test("register UDF via SparkSession.addArtifact") {
-    val artifactPath = new File("src/test/resources").toPath
-    val intSumUdfPath = artifactPath.resolve("IntSumUdf.class")
-    assume(intSumUdfPath.toFile.exists)
-    val output = runInterpreterInPasteMode("local",
-      s"""
-         |import org.apache.spark.sql.api.java.UDF2
-         |import org.apache.spark.sql.types.DataTypes
-         |
-         |spark.addArtifact("${intSumUdfPath.toString}")
-         |
-         |spark.udf.registerJava("intSum", "IntSumUdf", DataTypes.LongType)
-         |
-         |val r = spark.range(5)
-         |  .withColumn("id2", col("id") + 1)
-         |  .selectExpr("intSum(id, id2)")
-         |  .collect()
-         |assert(r.map(_.getLong(0)).toSeq == Seq(1, 3, 5, 7, 9))
-         |
-      """.stripMargin)
-    assertContains("Array([1], [3], [5], [7], [9])", output)
-    assertDoesNotContain("error:", output)
-    assertDoesNotContain("Exception", output)
-    assertDoesNotContain("assertion failed", output)
-
-    // The UDF should not work in a new REPL session.
-    val anotherOutput = runInterpreterInPasteMode("local",
-      s"""
-         |val r = spark.range(5)
-         |  .withColumn("id2", col("id") + 1)
-         |  .selectExpr("intSum(id, id2)")
-         |  .collect()
-         |
-      """.stripMargin)
-    assertContains(
-      "[UNRESOLVED_ROUTINE] Cannot resolve routine `intSum` on search path",
-      anotherOutput)
+    withTempDir { tempDir =>
+      val clsName = "org.apache.spark.repl.IntSumUdf"
+      val intSumUdfStream = classOf[IntSumUdf]
+        .getResourceAsStream("/" + clsName.replace(".", "/") + ".class")
+      val intSumUdfPath = new File(tempDir, "IntSumUdf.class")
+      Files.copy(intSumUdfStream, intSumUdfPath.toPath)
+      val output = runInterpreterInPasteMode("local",
+        s"""
+           |import org.apache.spark.sql.api.java.UDF2
+           |import org.apache.spark.sql.types.DataTypes
+           |
+           |spark.addArtifact("${intSumUdfPath.toString}")
+           |
+           |spark.udf.registerJava("intSum", "$clsName", DataTypes.LongType)
+           |
+           |val r = spark.range(5)
+           |  .withColumn("id2", col("id") + 1)
+           |  .selectExpr("intSum(id, id2)")
+           |  .collect()
+           |assert(r.map(_.getLong(0)).toSeq == Seq(1, 3, 5, 7, 9))
+           |
+        """.stripMargin)
+      assertContains("Array([1], [3], [5], [7], [9])", output)
+      assertDoesNotContain("error:", output)
+      assertDoesNotContain("Exception", output)
+      assertDoesNotContain("assertion failed", output)
+
+      // The UDF should not work in a new REPL session.
+      val anotherOutput = runInterpreterInPasteMode("local",
+        s"""
+           |val r = spark.range(5)
+           |  .withColumn("id2", col("id") + 1)
+           |  .selectExpr("intSum(id, id2)")
+           |  .collect()
+           |
+        """.stripMargin)
+      assertContains(
+        "[UNRESOLVED_ROUTINE] Cannot resolve routine `intSum` on search path",
+        anotherOutput)
+    }
   }
 
   test("register a class via SparkSession.addArtifact") {
-    val artifactPath = new File("src/test/resources").toPath
-    val intSumUdfPath = artifactPath.resolve("IntSumUdf.class")
-    assume(intSumUdfPath.toFile.exists)
-    val output = runInterpreterInPasteMode("local",
-      s"""
-         |import org.apache.spark.sql.functions.udf
-         |
-         |spark.addArtifact("${intSumUdfPath.toString}")
-         |
-         |val intSumUdf = udf((x: Long, y: Long) => new IntSumUdf().call(x, y))
-         |spark.udf.register("intSum", intSumUdf)
-         |
-         |val r = spark.range(5)
-         |  .withColumn("id2", col("id") + 1)
-         |  .selectExpr("intSum(id, id2)")
-         |  .collect()
-         |assert(r.map(_.getLong(0)).toSeq == Seq(1, 3, 5, 7, 9))
-         |
-      """.stripMargin)
-    assertContains("Array([1], [3], [5], [7], [9])", output)
-    assertDoesNotContain("error:", output)
-    assertDoesNotContain("Exception", output)
-    assertDoesNotContain("assertion failed", output)
+    withTempDir { tempDir =>
+      val clsName = "org.apache.spark.repl.IntSumUdf"
+      val intSumUdfStream = classOf[IntSumUdf]
+        .getResourceAsStream("/" + clsName.replace(".", "/") + ".class")
+      val intSumUdfPath = new File(tempDir, "IntSumUdf.class")
+      Files.copy(intSumUdfStream, intSumUdfPath.toPath)
+      val output = runInterpreterInPasteMode("local",
+        s"""
+           |import org.apache.spark.sql.functions.udf
+           |
+           |spark.addArtifact("${intSumUdfPath.toString}")
+           |
+           |val intSumUdf = udf((x: Long, y: Long) => new $clsName().call(x, 
y))
+           |spark.udf.register("intSum", intSumUdf)
+           |
+           |val r = spark.range(5)
+           |  .withColumn("id2", col("id") + 1)
+           |  .selectExpr("intSum(id, id2)")
+           |  .collect()
+           |assert(r.map(_.getLong(0)).toSeq == Seq(1, 3, 5, 7, 9))
+           |
+        """.stripMargin)
+      assertContains("Array([1], [3], [5], [7], [9])", output)
+      assertDoesNotContain("error:", output)
+      assertDoesNotContain("Exception", output)
+      assertDoesNotContain("assertion failed", output)
+    }
   }
 
   test("SPARK-53129: spark-shell imports java.net._ by default") {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

(spark) branch master updated: [SPARK-53246][TEST] remove class files for ReplSuite

Reply via email to