This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 47afefb6679 [SPARK-41980][CONNECT][TESTS] Enable 
test_functions_broadcast in functions parity test
47afefb6679 is described below

commit 47afefb66794df16399e34791a68bdec9885778e
Author: Hyukjin Kwon <[email protected]>
AuthorDate: Thu Jan 12 10:31:40 2023 +0900

    [SPARK-41980][CONNECT][TESTS] Enable test_functions_broadcast in functions 
parity test
    
    ### What changes were proposed in this pull request?
    
    This PR enables `test_functions_broadcast` back by avoiding `_jdf` access 
in the original test.
    
    ### Why are the changes needed?
    
    For test coverage and feature parity.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No, test-only.
    
    ### How was this patch tested?
    
    Fixed unittests.
    
    Closes #39500 from HyukjinKwon/SPARK-41980.
    
    Authored-by: Hyukjin Kwon <[email protected]>
    Signed-off-by: Hyukjin Kwon <[email protected]>
---
 python/pyspark/sql/tests/connect/test_parity_functions.py |  6 ------
 python/pyspark/sql/tests/test_functions.py                | 14 +++++++++-----
 2 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/python/pyspark/sql/tests/connect/test_parity_functions.py 
b/python/pyspark/sql/tests/connect/test_parity_functions.py
index 2f6ed05559f..dd7229d158f 100644
--- a/python/pyspark/sql/tests/connect/test_parity_functions.py
+++ b/python/pyspark/sql/tests/connect/test_parity_functions.py
@@ -40,12 +40,6 @@ class FunctionsParityTests(FunctionsTestsMixin, 
ReusedConnectTestCase):
     def test_function_parity(self):
         super().test_function_parity()
 
-    @unittest.skip(
-        "Spark Connect does not support Spark Context, _jdf but the test 
depends on that."
-    )
-    def test_functions_broadcast(self):
-        super().test_functions_broadcast()
-
     @unittest.skip("Spark Connect does not support Spark Context but the test 
depends on that.")
     def test_input_file_name_reset_for_rdd(self):
         super().test_input_file_name_reset_for_rdd()
diff --git a/python/pyspark/sql/tests/test_functions.py 
b/python/pyspark/sql/tests/test_functions.py
index 4db1eed1eb1..38a4e3e6644 100644
--- a/python/pyspark/sql/tests/test_functions.py
+++ b/python/pyspark/sql/tests/test_functions.py
@@ -16,6 +16,8 @@
 #
 
 import datetime
+import io
+from contextlib import redirect_stdout
 from inspect import getmembers, isfunction
 from itertools import chain
 import re
@@ -452,15 +454,17 @@ class FunctionsTestsMixin:
         df2 = self.spark.createDataFrame([(1, "1"), (2, "2")], ("key", 
"value"))
 
         # equijoin - should be converted into broadcast join
-        plan1 = df1.join(broadcast(df2), 
"key")._jdf.queryExecution().executedPlan()
-        self.assertEqual(1, plan1.toString().count("BroadcastHashJoin"))
+        with io.StringIO() as buf, redirect_stdout(buf):
+            df1.join(broadcast(df2), "key").explain(True)
+            self.assertGreaterEqual(buf.getvalue().count("Broadcast"), 1)
 
         # no join key -- should not be a broadcast join
-        plan2 = 
df1.crossJoin(broadcast(df2))._jdf.queryExecution().executedPlan()
-        self.assertEqual(0, plan2.toString().count("BroadcastHashJoin"))
+        with io.StringIO() as buf, redirect_stdout(buf):
+            df1.crossJoin(broadcast(df2)).explain(True)
+            self.assertGreaterEqual(buf.getvalue().count("Broadcast"), 1)
 
         # planner should not crash without a join
-        broadcast(df1)._jdf.queryExecution().executedPlan()
+        broadcast(df1).explain(True)
 
     def test_first_last_ignorenulls(self):
         from pyspark.sql import functions


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to