This is an automated email from the ASF dual-hosted git repository. ruifengz pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 76acd12a73cb [SPARK-45218][PYTHON][DOCS] Refine docstring of Column.isin 76acd12a73cb is described below commit 76acd12a73cb824f38eaf350f143b8f94585f299 Author: allisonwang-db <allison.w...@databricks.com> AuthorDate: Thu Sep 21 07:58:13 2023 +0800 [SPARK-45218][PYTHON][DOCS] Refine docstring of Column.isin ### What changes were proposed in this pull request? This PR refines the docstring of `Column.isin` by updating the examples. ### Why are the changes needed? To improve PySpark documentation. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? doctest ### Was this patch authored or co-authored using generative AI tooling? No Closes #43001 from allisonwang-db/spark-45218-refine-isin. Authored-by: allisonwang-db <allison.w...@databricks.com> Signed-off-by: Ruifeng Zheng <ruife...@apache.org> --- python/pyspark/sql/column.py | 40 ++++++++++++++++++++++++++++++++-------- 1 file changed, 32 insertions(+), 8 deletions(-) diff --git a/python/pyspark/sql/column.py b/python/pyspark/sql/column.py index d91cfdf52951..203e53474f74 100644 --- a/python/pyspark/sql/column.py +++ b/python/pyspark/sql/column.py @@ -962,8 +962,9 @@ class Column: Parameters ---------- - cols - The result will only be true at a location if any value matches in the Column. + cols : Any + The values to compare with the column values. The result will only be true at a location + if any value matches in the Column. Returns ------- @@ -972,12 +973,35 @@ class Column: Examples -------- - >>> df = spark.createDataFrame( - ... [(2, "Alice"), (5, "Bob")], ["age", "name"]) - >>> df[df.name.isin("Bob", "Mike")].collect() - [Row(age=5, name='Bob')] - >>> df[df.age.isin([1, 2, 3])].collect() - [Row(age=2, name='Alice')] + >>> df = spark.createDataFrame([(2, "Alice"), (5, "Bob"), (8, "Mike")], ["age", "name"]) + + Example 1: Filter rows with names in the specified values + + >>> df[df.name.isin("Bob", "Mike")].show() + +---+----+ + |age|name| + +---+----+ + | 5| Bob| + | 8|Mike| + +---+----+ + + Example 2: Filter rows with ages in the specified list + + >>> df[df.age.isin([1, 2, 3])].show() + +---+-----+ + |age| name| + +---+-----+ + | 2|Alice| + +---+-----+ + + Example 3: Filter rows with names not in the specified values + + >>> df[~df.name.isin("Alice", "Bob")].show() + +---+----+ + |age|name| + +---+----+ + | 8|Mike| + +---+----+ """ if len(cols) == 1 and isinstance(cols[0], (list, set)): cols = cast(Tuple, cols[0]) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org