This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch branch-3.4
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.4 by this push:
     new d0af25f4137 [SPARK-42267][CONNECT][PYTHON] DataFrame.join` should 
standardize the JoinType string
d0af25f4137 is described below

commit d0af25f413787bc3451116a447e0eef488b532bd
Author: Ruifeng Zheng <[email protected]>
AuthorDate: Wed Feb 8 17:28:54 2023 +0900

    [SPARK-42267][CONNECT][PYTHON] DataFrame.join` should standardize the 
JoinType string
    
    ### What changes were proposed in this pull request?
     standardize the JoinType string
    
    be consistent with PySpark 
https://github.com/apache/spark/blob/05c0fa573881b49d8ead9a5e16071190e5841e1b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/joinTypes.scala#L25
    
    ### Why are the changes needed?
    ```
    >>> df = spark.range(1)
    >>> df2 = spark.range(2)
    >>> df.join(df2, how="left_outer")
    Traceback (most recent call last):
    File "<stdin>", line 1, in <module>
    File "/Users/xinrong.meng/spark/python/pyspark/sql/connect/dataframe.py", 
line 438, in join
    plan.Join(left=self._plan, right=other._plan, on=on, how=how),
    File "/Users/xinrong.meng/spark/python/pyspark/sql/connect/plan.py", line 
730, in _init_
    raise NotImplementedError(
    NotImplementedError:
    Unsupported join type: left_outer. Supported join types include:
    "inner", "outer", "full", "fullouter", "full_outer",
    "leftouter", "left", "left_outer", "rightouter",
    "right", "right_outer", "leftsemi", "left_semi",
    "semi", "leftanti", "left_anti", "anti", "cross",
    ```
    
    ### Does this PR introduce _any_ user-facing change?
    yes
    
    ### How was this patch tested?
    updated UT
    
    Closes #39938 from zhengruifeng/connect_join_types.
    
    Authored-by: Ruifeng Zheng <[email protected]>
    Signed-off-by: Hyukjin Kwon <[email protected]>
    (cherry picked from commit f24ce658a71fdb57440f0caff1d3369ade4f688a)
    Signed-off-by: Hyukjin Kwon <[email protected]>
---
 python/pyspark/sql/connect/dataframe.py               |  2 ++
 python/pyspark/sql/tests/connect/test_connect_plan.py | 13 ++++++++++++-
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/python/pyspark/sql/connect/dataframe.py 
b/python/pyspark/sql/connect/dataframe.py
index 0c3eb5b8934..40625f77df5 100644
--- a/python/pyspark/sql/connect/dataframe.py
+++ b/python/pyspark/sql/connect/dataframe.py
@@ -436,6 +436,8 @@ class DataFrame:
             raise Exception("Cannot join when self._plan is empty.")
         if other._plan is None:
             raise Exception("Cannot join when other._plan is empty.")
+        if how is not None and isinstance(how, str):
+            how = how.lower().replace("_", "")
 
         return DataFrame.withPlan(
             plan.Join(left=self._plan, right=other._plan, on=on, how=how),
diff --git a/python/pyspark/sql/tests/connect/test_connect_plan.py 
b/python/pyspark/sql/tests/connect/test_connect_plan.py
index 9cb7c5dac1e..026f286cc56 100644
--- a/python/pyspark/sql/tests/connect/test_connect_plan.py
+++ b/python/pyspark/sql/tests/connect/test_connect_plan.py
@@ -685,10 +685,21 @@ class SparkConnectPlanTests(PlanOnlyTestFixture):
             (None, proto.Join.JoinType.JOIN_TYPE_INNER),
             ("inner", proto.Join.JoinType.JOIN_TYPE_INNER),
             ("outer", proto.Join.JoinType.JOIN_TYPE_FULL_OUTER),
+            ("full", proto.Join.JoinType.JOIN_TYPE_FULL_OUTER),
+            ("fullouter", proto.Join.JoinType.JOIN_TYPE_FULL_OUTER),
+            ("full_outer", proto.Join.JoinType.JOIN_TYPE_FULL_OUTER),
+            ("left", proto.Join.JoinType.JOIN_TYPE_LEFT_OUTER),
             ("leftouter", proto.Join.JoinType.JOIN_TYPE_LEFT_OUTER),
+            ("left_outer", proto.Join.JoinType.JOIN_TYPE_LEFT_OUTER),
+            ("right", proto.Join.JoinType.JOIN_TYPE_RIGHT_OUTER),
             ("rightouter", proto.Join.JoinType.JOIN_TYPE_RIGHT_OUTER),
-            ("leftanti", proto.Join.JoinType.JOIN_TYPE_LEFT_ANTI),
+            ("right_outer", proto.Join.JoinType.JOIN_TYPE_RIGHT_OUTER),
+            ("semi", proto.Join.JoinType.JOIN_TYPE_LEFT_SEMI),
             ("leftsemi", proto.Join.JoinType.JOIN_TYPE_LEFT_SEMI),
+            ("left_semi", proto.Join.JoinType.JOIN_TYPE_LEFT_SEMI),
+            ("anti", proto.Join.JoinType.JOIN_TYPE_LEFT_ANTI),
+            ("leftanti", proto.Join.JoinType.JOIN_TYPE_LEFT_ANTI),
+            ("left_anti", proto.Join.JoinType.JOIN_TYPE_LEFT_ANTI),
             ("cross", proto.Join.JoinType.JOIN_TYPE_CROSS),
         ]:
             joined_df = df_left.join(df_right, on=col("name"), 
how=join_type_str)._plan.to_proto(


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to