This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 9f3e9500661 [SPARK-40010][PYTHON][DOCS] Make pyspark.sql.window 
examples self-contained
9f3e9500661 is described below

commit 9f3e950066142a8f588b88102d2ac92fdb45af98
Author: Qian.Sun <[email protected]>
AuthorDate: Wed Aug 10 11:43:37 2022 +0900

    [SPARK-40010][PYTHON][DOCS] Make pyspark.sql.window examples self-contained
    
    ### What changes were proposed in this pull request?
    
    This PR proposes to improve the examples in `pyspark.sql.window` by making 
each example self-contained with a brief explanation and a bit more realistic 
example.
    
    ### Why are the changes needed?
    
    To make the documentation more readable and able to copy and paste directly 
in PySpark shell.
    
    ### Does this PR introduce _any_ user-facing change?
    
    Yes, it changes the documentation
    
    ### How was this patch tested?
    
    Manually ran each doctest.
    
    Closes #37450 from dcoliversun/SPARK-40010.
    
    Authored-by: Qian.Sun <[email protected]>
    Signed-off-by: Hyukjin Kwon <[email protected]>
---
 python/pyspark/sql/window.py | 50 ++++++++++++++++++++++++++++++++++----------
 1 file changed, 39 insertions(+), 11 deletions(-)

diff --git a/python/pyspark/sql/window.py b/python/pyspark/sql/window.py
index b8bc90f458c..f895e2010ce 100644
--- a/python/pyspark/sql/window.py
+++ b/python/pyspark/sql/window.py
@@ -128,11 +128,23 @@ class Window:
         --------
         >>> from pyspark.sql import Window
         >>> from pyspark.sql import functions as func
-        >>> from pyspark.sql import SQLContext
-        >>> sc = SparkContext.getOrCreate()
-        >>> sqlContext = SQLContext(sc)
-        >>> tup = [(1, "a"), (1, "a"), (2, "a"), (1, "b"), (2, "b"), (3, "b")]
-        >>> df = sqlContext.createDataFrame(tup, ["id", "category"])
+        >>> df = spark.createDataFrame(
+        ...      [(1, "a"), (1, "a"), (2, "a"), (1, "b"), (2, "b"), (3, "b")], 
["id", "category"])
+        >>> df.show()
+        +---+--------+
+        | id|category|
+        +---+--------+
+        |  1|       a|
+        |  1|       a|
+        |  2|       a|
+        |  1|       b|
+        |  2|       b|
+        |  3|       b|
+        +---+--------+
+
+        Calculate sum of ``id`` in the range from currentRow to currentRow + 1
+        in partition ``category``
+
         >>> window = 
Window.partitionBy("category").orderBy("id").rowsBetween(Window.currentRow, 1)
         >>> df.withColumn("sum", func.sum("id").over(window)).sort("id", 
"category", "sum").show()
         +---+--------+---+
@@ -196,11 +208,23 @@ class Window:
         --------
         >>> from pyspark.sql import Window
         >>> from pyspark.sql import functions as func
-        >>> from pyspark.sql import SQLContext
-        >>> sc = SparkContext.getOrCreate()
-        >>> sqlContext = SQLContext(sc)
-        >>> tup = [(1, "a"), (1, "a"), (2, "a"), (1, "b"), (2, "b"), (3, "b")]
-        >>> df = sqlContext.createDataFrame(tup, ["id", "category"])
+        >>> df = spark.createDataFrame(
+        ...      [(1, "a"), (1, "a"), (2, "a"), (1, "b"), (2, "b"), (3, "b")], 
["id", "category"])
+        >>> df.show()
+        +---+--------+
+        | id|category|
+        +---+--------+
+        |  1|       a|
+        |  1|       a|
+        |  2|       a|
+        |  1|       b|
+        |  2|       b|
+        |  3|       b|
+        +---+--------+
+
+        Calculate sum of ``id`` in the range from ``id`` of currentRow to 
``id`` of currentRow + 1
+        in partition ``category``
+
         >>> window = 
Window.partitionBy("category").orderBy("id").rangeBetween(Window.currentRow, 1)
         >>> df.withColumn("sum", func.sum("id").over(window)).sort("id", 
"category").show()
         +---+--------+---+
@@ -329,13 +353,17 @@ class WindowSpec:
 
 def _test() -> None:
     import doctest
+    from pyspark.sql import SparkSession
     import pyspark.sql.window
 
-    SparkContext("local[4]", "PythonTest")
     globs = pyspark.sql.window.__dict__.copy()
+    spark = SparkSession.builder.master("local[4]").appName("sql.window 
tests").getOrCreate()
+    globs["spark"] = spark
+
     (failure_count, test_count) = doctest.testmod(
         pyspark.sql.window, globs=globs, 
optionflags=doctest.NORMALIZE_WHITESPACE
     )
+    spark.stop()
     if failure_count:
         sys.exit(-1)
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to