This is an automated email from the ASF dual-hosted git repository. ruifengz pushed a commit to branch branch-3.5 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.5 by this push: new 83556c49c87 [SPARK-44891][PYTHON][CONNECT] Enable Doctests of `rand`, `randn` and `log` 83556c49c87 is described below commit 83556c49c87c6f29a5e5c8d3708ad4d881ebe738 Author: Ruifeng Zheng <ruife...@apache.org> AuthorDate: Tue Aug 22 07:47:57 2023 +0800 [SPARK-44891][PYTHON][CONNECT] Enable Doctests of `rand`, `randn` and `log` ### What changes were proposed in this pull request? I roughly went thought all the skipped doctests in `pyspark.sql.functions`, and find we can enabled doctests of `rand`, `randn` and `log`, by making them deterministic: - specify the `numPartitions` in `spark.range` for `rand` `randn`; - changes the input values for `log` ### Why are the changes needed? Enable Doctests of `rand`, `randn` and `log`, improve test coverage ### Does this PR introduce _any_ user-facing change? yes ### How was this patch tested? enabled doctests ### Was this patch authored or co-authored using generative AI tooling? No Closes #42584 from zhengruifeng/make_doctest_deterministic. Authored-by: Ruifeng Zheng <ruife...@apache.org> Signed-off-by: Ruifeng Zheng <ruife...@apache.org> (cherry picked from commit 290b6327faadb5bbe25e9243955d3cf0c4ca4cfa) Signed-off-by: Ruifeng Zheng <ruife...@apache.org> --- python/pyspark/sql/functions.py | 59 +++++++++++++++++++++-------------------- 1 file changed, 30 insertions(+), 29 deletions(-) diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index c31a00121ef..b285550d024 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -4615,13 +4615,13 @@ def rand(seed: Optional[int] = None) -> Column: Examples -------- - >>> df = spark.range(2) - >>> df.withColumn('rand', rand(seed=42) * 3).show() # doctest: +SKIP + >>> from pyspark.sql import functions as F + >>> spark.range(0, 2, 1, 1).withColumn('rand', F.rand(seed=42) * 3).show() +---+------------------+ | id| rand| +---+------------------+ - | 0|1.4385751892400076| - | 1|1.7082186019706387| + | 0|1.8575681106759028| + | 1|1.5288056527339444| +---+------------------+ """ if seed is not None: @@ -4656,14 +4656,14 @@ def randn(seed: Optional[int] = None) -> Column: Examples -------- - >>> df = spark.range(2) - >>> df.withColumn('randn', randn(seed=42)).show() # doctest: +SKIP - +---+--------------------+ - | id| randn| - +---+--------------------+ - | 0|-0.04167221574820542| - | 1| 0.15241403986452778| - +---+--------------------+ + >>> from pyspark.sql import functions as F + >>> spark.range(0, 2, 1, 1).withColumn('randn', F.randn(seed=42)).show() + +---+------------------+ + | id| randn| + +---+------------------+ + | 0| 2.384479054241165| + | 1|0.1920934041293524| + +---+------------------+ """ if seed is not None: return _invoke_function("randn", seed) @@ -5158,26 +5158,27 @@ def log(arg1: Union["ColumnOrName", float], arg2: Optional["ColumnOrName"] = Non Examples -------- - >>> df = spark.createDataFrame([10, 100, 1000], "INT") - >>> df.select(log(10.0, df.value).alias('ten')).show() # doctest: +SKIP - +---+ - |ten| - +---+ - |1.0| - |2.0| - |3.0| - +---+ + >>> from pyspark.sql import functions as F + >>> df = spark.sql("SELECT * FROM VALUES (1), (2), (4) AS t(value)") + >>> df.select(F.log(2.0, df.value).alias('log2_value')).show() + +----------+ + |log2_value| + +----------+ + | 0.0| + | 1.0| + | 2.0| + +----------+ And Natural logarithm - >>> df.select(log(df.value)).show() # doctest: +SKIP - +-----------------+ - | ln(value)| - +-----------------+ - |2.302585092994046| - |4.605170185988092| - |4.605170185988092| - +-----------------+ + >>> df.select(F.log(df.value).alias('ln_value')).show() + +------------------+ + | ln_value| + +------------------+ + | 0.0| + |0.6931471805599453| + |1.3862943611198906| + +------------------+ """ if arg2 is None: return _invoke_function_over_columns("log", cast("ColumnOrName", arg1)) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org