This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new b2aa17ca5afd [SPARK-45985][PYTHON][DOCS] Refine docstring of `DataFrame.intersect` b2aa17ca5afd is described below commit b2aa17ca5afd6ab1d5e47b0e1722193465684f36 Author: Hyukjin Kwon <gurwls...@apache.org> AuthorDate: Sat Nov 18 10:16:02 2023 -0800 [SPARK-45985][PYTHON][DOCS] Refine docstring of `DataFrame.intersect` ### What changes were proposed in this pull request? This PR proposes to improve the docstring of `DataFrame.intersect`. ### Why are the changes needed? For end users, and better usability of PySpark. ### Does this PR introduce _any_ user-facing change? Yes, it fixes the user facing documentation. ### How was this patch tested? Manually tested. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #43884 from HyukjinKwon/SPARK-45985. Authored-by: Hyukjin Kwon <gurwls...@apache.org> Signed-off-by: Dongjoon Hyun <dh...@apple.com> --- python/pyspark/sql/dataframe.py | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py index 8d7c7d70a501..01067bd4c482 100644 --- a/python/pyspark/sql/dataframe.py +++ b/python/pyspark/sql/dataframe.py @@ -4755,14 +4755,41 @@ class DataFrame(PandasMapOpsMixin, PandasConversionMixin): Examples -------- + Example 1: Intersecting two DataFrames with the same schema + >>> df1 = spark.createDataFrame([("a", 1), ("a", 1), ("b", 3), ("c", 4)], ["C1", "C2"]) >>> df2 = spark.createDataFrame([("a", 1), ("a", 1), ("b", 3)], ["C1", "C2"]) - >>> df1.intersect(df2).sort(df1.C1.desc()).show() + >>> result_df = df1.intersect(df2).sort("C1", "C2") + >>> result_df.show() +---+---+ | C1| C2| +---+---+ - | b| 3| | a| 1| + | b| 3| + +---+---+ + + Example 2: Intersecting two DataFrames with different schemas + + >>> df1 = spark.createDataFrame([(1, "A"), (2, "B")], ["id", "value"]) + >>> df2 = spark.createDataFrame([(2, "B"), (3, "C")], ["id", "value"]) + >>> result_df = df1.intersect(df2).sort("id", "value") + >>> result_df.show() + +---+-----+ + | id|value| + +---+-----+ + | 2| B| + +---+-----+ + + Example 3: Intersecting all rows from two DataFrames with mismatched columns + + >>> df1 = spark.createDataFrame([(1, 2), (1, 2), (3, 4)], ["A", "B"]) + >>> df2 = spark.createDataFrame([(1, 2), (1, 2)], ["C", "D"]) + >>> result_df = df1.intersect(df2).sort("A", "B") + >>> result_df.show() + +---+---+ + | A| B| + +---+---+ + | 1| 2| +---+---+ """ return DataFrame(self._jdf.intersect(other._jdf), self.sparkSession) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org