This is an automated email from the ASF dual-hosted git repository.
ruifengz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 697596bf9357 [MINOR][PYTHON][TESTS] Enable `test_udf_cache` parity test
697596bf9357 is described below
commit 697596bf93575fb97c744a746a1ff97d73e022ec
Author: Ruifeng Zheng <[email protected]>
AuthorDate: Thu Apr 11 08:25:37 2024 +0800
[MINOR][PYTHON][TESTS] Enable `test_udf_cache` parity test
### What changes were proposed in this pull request?
Enable `test_udf_cache` parity test
### Why are the changes needed?
test coverage
### Does this PR introduce _any_ user-facing change?
no
### How was this patch tested?
ci
### Was this patch authored or co-authored using generative AI tooling?
no
Closes #45980 from zhengruifeng/enable_test_udf_cache.
Authored-by: Ruifeng Zheng <[email protected]>
Signed-off-by: Ruifeng Zheng <[email protected]>
---
python/pyspark/sql/tests/connect/test_parity_udf.py | 4 ----
python/pyspark/sql/tests/test_udf.py | 20 ++++++++++++--------
2 files changed, 12 insertions(+), 12 deletions(-)
diff --git a/python/pyspark/sql/tests/connect/test_parity_udf.py
b/python/pyspark/sql/tests/connect/test_parity_udf.py
index c6534a725104..17d7ae0eb9fc 100644
--- a/python/pyspark/sql/tests/connect/test_parity_udf.py
+++ b/python/pyspark/sql/tests/connect/test_parity_udf.py
@@ -52,10 +52,6 @@ class UDFParityTests(BaseUDFTestsMixin,
ReusedConnectTestCase):
def test_broadcast_in_udf(self):
super().test_broadcast_in_udf()
- @unittest.skip("Spark Connect does not support cache() but the test
depends on it.")
- def test_udf_cache(self):
- super().test_udf_cache()
-
@unittest.skip("Requires JVM access.")
def test_udf_defers_judf_initialization(self):
super().test_udf_defers_judf_initialization()
diff --git a/python/pyspark/sql/tests/test_udf.py
b/python/pyspark/sql/tests/test_udf.py
index d76572531b73..e8dc8a024b0e 100644
--- a/python/pyspark/sql/tests/test_udf.py
+++ b/python/pyspark/sql/tests/test_udf.py
@@ -22,6 +22,8 @@ import shutil
import tempfile
import unittest
import datetime
+import io
+from contextlib import redirect_stdout
from pyspark.sql import SparkSession, Column, Row
from pyspark.sql.functions import col, udf, assert_true, lit, rand
@@ -817,14 +819,16 @@ class BaseUDFTestsMixin(object):
df = self.spark.range(1)
df.select(udf(func)("id")).cache()
- self.assertEqual(
- df.select(udf(func)("id"))
- ._jdf.queryExecution()
- .withCachedData()
- .getClass()
- .getSimpleName(),
- "InMemoryRelation",
- )
+ with io.StringIO() as buf, redirect_stdout(buf):
+ df.select(udf(func)("id")).explain()
+ # == Physical Plan ==
+ # InMemoryTableScan [func(id)#30]
+ # +- InMemoryRelation [func(id)#30], StorageLevel(...)
+ # +- *(2) Project [pythonUDF0#5 AS func(id)#3]
+ # +- BatchEvalPython [func(id#0L)#2], [pythonUDF0#5]
+ # +- *(1) Range (0, 1, step=1, splits=12)
+ self.assertEqual(1, buf.getvalue().count("InMemoryTableScan"))
+ self.assertEqual(1, buf.getvalue().count("InMemoryRelation"))
# SPARK-34545
def test_udf_input_serialization_valuecompare_disabled(self):
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]