This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new e6bbd3172940 [SPARK-45984][PYTHON[DOCS] Refine docstring of `DataFrame.intersectAll` e6bbd3172940 is described below commit e6bbd317294032d3352e09c01bf9b640dae46f71 Author: Hyukjin Kwon <gurwls...@apache.org> AuthorDate: Sat Nov 18 10:14:53 2023 -0800 [SPARK-45984][PYTHON[DOCS] Refine docstring of `DataFrame.intersectAll` ### What changes were proposed in this pull request? This PR proposes to improve the docstring of `DataFrame.intersectAll`. ### Why are the changes needed? For end users, and better usability of PySpark. ### Does this PR introduce _any_ user-facing change? Yes, it fixes the user facing documentation. ### How was this patch tested? Manually tested. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #43883 from HyukjinKwon/SPARK-45984. Authored-by: Hyukjin Kwon <gurwls...@apache.org> Signed-off-by: Dongjoon Hyun <dh...@apple.com> --- python/pyspark/sql/dataframe.py | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py index f83f75a47896..8d7c7d70a501 100644 --- a/python/pyspark/sql/dataframe.py +++ b/python/pyspark/sql/dataframe.py @@ -4791,9 +4791,12 @@ class DataFrame(PandasMapOpsMixin, PandasConversionMixin): Examples -------- + Example 1: Intersecting two DataFrames with the same schema + >>> df1 = spark.createDataFrame([("a", 1), ("a", 1), ("b", 3), ("c", 4)], ["C1", "C2"]) >>> df2 = spark.createDataFrame([("a", 1), ("a", 1), ("b", 3)], ["C1", "C2"]) - >>> df1.intersectAll(df2).sort("C1", "C2").show() + >>> result_df = df1.intersectAll(df2).sort("C1", "C2") + >>> result_df.show() +---+---+ | C1| C2| +---+---+ @@ -4801,6 +4804,31 @@ class DataFrame(PandasMapOpsMixin, PandasConversionMixin): | a| 1| | b| 3| +---+---+ + + Example 2: Intersecting two DataFrames with different schemas + + >>> df1 = spark.createDataFrame([(1, "A"), (2, "B")], ["id", "value"]) + >>> df2 = spark.createDataFrame([(2, "B"), (3, "C")], ["id", "value"]) + >>> result_df = df1.intersectAll(df2).sort("id", "value") + >>> result_df.show() + +---+-----+ + | id|value| + +---+-----+ + | 2| B| + +---+-----+ + + Example 3: Intersecting all rows from two DataFrames with mismatched columns + + >>> df1 = spark.createDataFrame([(1, 2), (1, 2), (3, 4)], ["A", "B"]) + >>> df2 = spark.createDataFrame([(1, 2), (1, 2)], ["C", "D"]) + >>> result_df = df1.intersectAll(df2).sort("A", "B") + >>> result_df.show() + +---+---+ + | A| B| + +---+---+ + | 1| 2| + | 1| 2| + +---+---+ """ return DataFrame(self._jdf.intersectAll(other._jdf), self.sparkSession) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org