This is an automated email from the ASF dual-hosted git repository. ruifengz pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 697596bf9357 [MINOR][PYTHON][TESTS] Enable `test_udf_cache` parity test 697596bf9357 is described below commit 697596bf93575fb97c744a746a1ff97d73e022ec Author: Ruifeng Zheng <ruife...@apache.org> AuthorDate: Thu Apr 11 08:25:37 2024 +0800 [MINOR][PYTHON][TESTS] Enable `test_udf_cache` parity test ### What changes were proposed in this pull request? Enable `test_udf_cache` parity test ### Why are the changes needed? test coverage ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? ci ### Was this patch authored or co-authored using generative AI tooling? no Closes #45980 from zhengruifeng/enable_test_udf_cache. Authored-by: Ruifeng Zheng <ruife...@apache.org> Signed-off-by: Ruifeng Zheng <ruife...@apache.org> --- python/pyspark/sql/tests/connect/test_parity_udf.py | 4 ---- python/pyspark/sql/tests/test_udf.py | 20 ++++++++++++-------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/python/pyspark/sql/tests/connect/test_parity_udf.py b/python/pyspark/sql/tests/connect/test_parity_udf.py index c6534a725104..17d7ae0eb9fc 100644 --- a/python/pyspark/sql/tests/connect/test_parity_udf.py +++ b/python/pyspark/sql/tests/connect/test_parity_udf.py @@ -52,10 +52,6 @@ class UDFParityTests(BaseUDFTestsMixin, ReusedConnectTestCase): def test_broadcast_in_udf(self): super().test_broadcast_in_udf() - @unittest.skip("Spark Connect does not support cache() but the test depends on it.") - def test_udf_cache(self): - super().test_udf_cache() - @unittest.skip("Requires JVM access.") def test_udf_defers_judf_initialization(self): super().test_udf_defers_judf_initialization() diff --git a/python/pyspark/sql/tests/test_udf.py b/python/pyspark/sql/tests/test_udf.py index d76572531b73..e8dc8a024b0e 100644 --- a/python/pyspark/sql/tests/test_udf.py +++ b/python/pyspark/sql/tests/test_udf.py @@ -22,6 +22,8 @@ import shutil import tempfile import unittest import datetime +import io +from contextlib import redirect_stdout from pyspark.sql import SparkSession, Column, Row from pyspark.sql.functions import col, udf, assert_true, lit, rand @@ -817,14 +819,16 @@ class BaseUDFTestsMixin(object): df = self.spark.range(1) df.select(udf(func)("id")).cache() - self.assertEqual( - df.select(udf(func)("id")) - ._jdf.queryExecution() - .withCachedData() - .getClass() - .getSimpleName(), - "InMemoryRelation", - ) + with io.StringIO() as buf, redirect_stdout(buf): + df.select(udf(func)("id")).explain() + # == Physical Plan == + # InMemoryTableScan [func(id)#30] + # +- InMemoryRelation [func(id)#30], StorageLevel(...) + # +- *(2) Project [pythonUDF0#5 AS func(id)#3] + # +- BatchEvalPython [func(id#0L)#2], [pythonUDF0#5] + # +- *(1) Range (0, 1, step=1, splits=12) + self.assertEqual(1, buf.getvalue().count("InMemoryTableScan")) + self.assertEqual(1, buf.getvalue().count("InMemoryRelation")) # SPARK-34545 def test_udf_input_serialization_valuecompare_disabled(self): --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org