This is an automated email from the ASF dual-hosted git repository. wenchen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 554f6b64f1e2 [SPARK-53246][TEST] remove class files for ReplSuite 554f6b64f1e2 is described below commit 554f6b64f1e2b2346499f6d3340a3695244bfc84 Author: Wenchen Fan <wenc...@databricks.com> AuthorDate: Tue Aug 12 22:34:08 2025 +0800 [SPARK-53246][TEST] remove class files for ReplSuite ### What changes were proposed in this pull request? Keeping compiled class files in the Spark repo has a security concern. This PR removes one class file for `ReplSuite` and generates it dynamically. ### Why are the changes needed? test cleanup ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? N/A ### Was this patch authored or co-authored using generative AI tooling? no Closes #51974 from cloud-fan/minor. Authored-by: Wenchen Fan <wenc...@databricks.com> Signed-off-by: Wenchen Fan <wenc...@databricks.com> --- dev/test-classes.txt | 1 - repl/src/test/resources/IntSumUdf.class | Bin 1333 -> 0 bytes .../org/apache/spark/repl}/IntSumUdf.scala | 2 + .../scala/org/apache/spark/repl/ReplSuite.scala | 126 +++++++++++---------- 4 files changed, 69 insertions(+), 60 deletions(-) diff --git a/dev/test-classes.txt b/dev/test-classes.txt index 5315c970c5ba..2dc6b290ad4f 100644 --- a/dev/test-classes.txt +++ b/dev/test-classes.txt @@ -1,4 +1,3 @@ -repl/src/test/resources/IntSumUdf.class sql/core/src/test/resources/artifact-tests/Hello.class sql/core/src/test/resources/artifact-tests/IntSumUdf.class sql/core/src/test/resources/artifact-tests/smallClassFile.class diff --git a/repl/src/test/resources/IntSumUdf.class b/repl/src/test/resources/IntSumUdf.class deleted file mode 100644 index 75a41446cfca..000000000000 Binary files a/repl/src/test/resources/IntSumUdf.class and /dev/null differ diff --git a/repl/src/test/resources/IntSumUdf.scala b/repl/src/test/scala/org/apache/spark/repl/IntSumUdf.scala similarity index 96% rename from repl/src/test/resources/IntSumUdf.scala rename to repl/src/test/scala/org/apache/spark/repl/IntSumUdf.scala index 9678caaed5db..9f2767127765 100644 --- a/repl/src/test/resources/IntSumUdf.scala +++ b/repl/src/test/scala/org/apache/spark/repl/IntSumUdf.scala @@ -15,6 +15,8 @@ * limitations under the License. */ +package org.apache.spark.repl + import org.apache.spark.sql.api.java.UDF2 class IntSumUdf extends UDF2[Long, Long, Long] { diff --git a/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala b/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala index 02555fdd1535..4471f93840e2 100644 --- a/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala +++ b/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala @@ -398,68 +398,76 @@ class ReplSuite extends SparkFunSuite { } test("register UDF via SparkSession.addArtifact") { - val artifactPath = new File("src/test/resources").toPath - val intSumUdfPath = artifactPath.resolve("IntSumUdf.class") - assume(intSumUdfPath.toFile.exists) - val output = runInterpreterInPasteMode("local", - s""" - |import org.apache.spark.sql.api.java.UDF2 - |import org.apache.spark.sql.types.DataTypes - | - |spark.addArtifact("${intSumUdfPath.toString}") - | - |spark.udf.registerJava("intSum", "IntSumUdf", DataTypes.LongType) - | - |val r = spark.range(5) - | .withColumn("id2", col("id") + 1) - | .selectExpr("intSum(id, id2)") - | .collect() - |assert(r.map(_.getLong(0)).toSeq == Seq(1, 3, 5, 7, 9)) - | - """.stripMargin) - assertContains("Array([1], [3], [5], [7], [9])", output) - assertDoesNotContain("error:", output) - assertDoesNotContain("Exception", output) - assertDoesNotContain("assertion failed", output) - - // The UDF should not work in a new REPL session. - val anotherOutput = runInterpreterInPasteMode("local", - s""" - |val r = spark.range(5) - | .withColumn("id2", col("id") + 1) - | .selectExpr("intSum(id, id2)") - | .collect() - | - """.stripMargin) - assertContains( - "[UNRESOLVED_ROUTINE] Cannot resolve routine `intSum` on search path", - anotherOutput) + withTempDir { tempDir => + val clsName = "org.apache.spark.repl.IntSumUdf" + val intSumUdfStream = classOf[IntSumUdf] + .getResourceAsStream("/" + clsName.replace(".", "/") + ".class") + val intSumUdfPath = new File(tempDir, "IntSumUdf.class") + Files.copy(intSumUdfStream, intSumUdfPath.toPath) + val output = runInterpreterInPasteMode("local", + s""" + |import org.apache.spark.sql.api.java.UDF2 + |import org.apache.spark.sql.types.DataTypes + | + |spark.addArtifact("${intSumUdfPath.toString}") + | + |spark.udf.registerJava("intSum", "$clsName", DataTypes.LongType) + | + |val r = spark.range(5) + | .withColumn("id2", col("id") + 1) + | .selectExpr("intSum(id, id2)") + | .collect() + |assert(r.map(_.getLong(0)).toSeq == Seq(1, 3, 5, 7, 9)) + | + """.stripMargin) + assertContains("Array([1], [3], [5], [7], [9])", output) + assertDoesNotContain("error:", output) + assertDoesNotContain("Exception", output) + assertDoesNotContain("assertion failed", output) + + // The UDF should not work in a new REPL session. + val anotherOutput = runInterpreterInPasteMode("local", + s""" + |val r = spark.range(5) + | .withColumn("id2", col("id") + 1) + | .selectExpr("intSum(id, id2)") + | .collect() + | + """.stripMargin) + assertContains( + "[UNRESOLVED_ROUTINE] Cannot resolve routine `intSum` on search path", + anotherOutput) + } } test("register a class via SparkSession.addArtifact") { - val artifactPath = new File("src/test/resources").toPath - val intSumUdfPath = artifactPath.resolve("IntSumUdf.class") - assume(intSumUdfPath.toFile.exists) - val output = runInterpreterInPasteMode("local", - s""" - |import org.apache.spark.sql.functions.udf - | - |spark.addArtifact("${intSumUdfPath.toString}") - | - |val intSumUdf = udf((x: Long, y: Long) => new IntSumUdf().call(x, y)) - |spark.udf.register("intSum", intSumUdf) - | - |val r = spark.range(5) - | .withColumn("id2", col("id") + 1) - | .selectExpr("intSum(id, id2)") - | .collect() - |assert(r.map(_.getLong(0)).toSeq == Seq(1, 3, 5, 7, 9)) - | - """.stripMargin) - assertContains("Array([1], [3], [5], [7], [9])", output) - assertDoesNotContain("error:", output) - assertDoesNotContain("Exception", output) - assertDoesNotContain("assertion failed", output) + withTempDir { tempDir => + val clsName = "org.apache.spark.repl.IntSumUdf" + val intSumUdfStream = classOf[IntSumUdf] + .getResourceAsStream("/" + clsName.replace(".", "/") + ".class") + val intSumUdfPath = new File(tempDir, "IntSumUdf.class") + Files.copy(intSumUdfStream, intSumUdfPath.toPath) + val output = runInterpreterInPasteMode("local", + s""" + |import org.apache.spark.sql.functions.udf + | + |spark.addArtifact("${intSumUdfPath.toString}") + | + |val intSumUdf = udf((x: Long, y: Long) => new $clsName().call(x, y)) + |spark.udf.register("intSum", intSumUdf) + | + |val r = spark.range(5) + | .withColumn("id2", col("id") + 1) + | .selectExpr("intSum(id, id2)") + | .collect() + |assert(r.map(_.getLong(0)).toSeq == Seq(1, 3, 5, 7, 9)) + | + """.stripMargin) + assertContains("Array([1], [3], [5], [7], [9])", output) + assertDoesNotContain("error:", output) + assertDoesNotContain("Exception", output) + assertDoesNotContain("assertion failed", output) + } } test("SPARK-53129: spark-shell imports java.net._ by default") { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org