This is an automated email from the ASF dual-hosted git repository. ruifengz pushed a commit to branch branch-3.5 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.5 by this push: new 12fd2a0b007 [SPARK-44889][PYTHON][CONNECT] Fix docstring of `monotonically_increasing_id` 12fd2a0b007 is described below commit 12fd2a0b00752a93c4df5c9a22477245f228ee73 Author: Ruifeng Zheng <ruife...@apache.org> AuthorDate: Mon Aug 21 18:21:35 2023 +0800 [SPARK-44889][PYTHON][CONNECT] Fix docstring of `monotonically_increasing_id` ### What changes were proposed in this pull request? Fix docstring of `monotonically_increasing_id` ### Why are the changes needed? 1, using `from pyspark.sql import functions as F` to avoid implicit wildcard import; 2, using dataframe APIs instead of RDD, so the docstring can be reused in Connect; after this fix, all dostrings are reused between vanilla PySpark and Spark Connect Python Client ### Does this PR introduce _any_ user-facing change? yes ### How was this patch tested? CI ### Was this patch authored or co-authored using generative AI tooling? No Closes #42582 from zhengruifeng/fix_monotonically_increasing_id_docstring. Authored-by: Ruifeng Zheng <ruife...@apache.org> Signed-off-by: Ruifeng Zheng <ruife...@apache.org> (cherry picked from commit 72c62b6596d21e975c5597f8fff84b1a9d070a02) Signed-off-by: Ruifeng Zheng <ruife...@apache.org> --- python/pyspark/sql/connect/functions.py | 3 --- python/pyspark/sql/functions.py | 19 ++++++++++++++++--- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/python/pyspark/sql/connect/functions.py b/python/pyspark/sql/connect/functions.py index bfffc86c953..9d01073e5da 100644 --- a/python/pyspark/sql/connect/functions.py +++ b/python/pyspark/sql/connect/functions.py @@ -3900,9 +3900,6 @@ def _test() -> None: globs = pyspark.sql.connect.functions.__dict__.copy() - # Spark Connect does not support Spark Context but the test depends on that. - del pyspark.sql.connect.functions.monotonically_increasing_id.__doc__ - globs["spark"] = ( PySparkSession.builder.appName("sql.connect.functions tests") .remote("local[4]") diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index 39458a009d4..c31a00121ef 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -4311,9 +4311,22 @@ def monotonically_increasing_id() -> Column: Examples -------- - >>> df0 = sc.parallelize(range(2), 2).mapPartitions(lambda x: [(1,), (2,), (3,)]).toDF(['col1']) - >>> df0.select(monotonically_increasing_id().alias('id')).collect() - [Row(id=0), Row(id=1), Row(id=2), Row(id=8589934592), Row(id=8589934593), Row(id=8589934594)] + >>> from pyspark.sql import functions as F + >>> spark.range(0, 10, 1, 2).select(F.monotonically_increasing_id()).show() + +-----------------------------+ + |monotonically_increasing_id()| + +-----------------------------+ + | 0| + | 1| + | 2| + | 3| + | 4| + | 8589934592| + | 8589934593| + | 8589934594| + | 8589934595| + | 8589934596| + +-----------------------------+ """ return _invoke_function("monotonically_increasing_id") --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org