This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 28b0c2c11d68 [SPARK-46398][PYTHON][TESTS] Test rangeBetween window
function (pyspark.sql.window)
28b0c2c11d68 is described below
commit 28b0c2c11d681d9051a7b82b63e9aaaa29bfecef
Author: Xinrong Meng <[email protected]>
AuthorDate: Thu Dec 21 08:16:49 2023 +0900
[SPARK-46398][PYTHON][TESTS] Test rangeBetween window function
(pyspark.sql.window)
### What changes were proposed in this pull request?
Test rangeBetween window function (pyspark.sql.window).
### Why are the changes needed?
Subtasks of
[SPARK-46041](https://issues.apache.org/jira/browse/SPARK-46041) to improve
test coverage.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
Test change only.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #44339 from xinrong-meng/test_window_func.
Authored-by: Xinrong Meng <[email protected]>
Signed-off-by: Hyukjin Kwon <[email protected]>
---
python/pyspark/sql/tests/test_functions.py | 28 ++++++++++++++++++++++++++++
1 file changed, 28 insertions(+)
diff --git a/python/pyspark/sql/tests/test_functions.py
b/python/pyspark/sql/tests/test_functions.py
index 5352ee04d7fe..df1ddd0301ad 100644
--- a/python/pyspark/sql/tests/test_functions.py
+++ b/python/pyspark/sql/tests/test_functions.py
@@ -997,6 +997,34 @@ class FunctionsTestsMixin:
for r, ex in zip(rs, expected):
self.assertEqual(tuple(r), ex[: len(r)])
+ def test_window_functions_moving_average(self):
+ data = [
+ (datetime.datetime(2023, 1, 1), 20),
+ (datetime.datetime(2023, 1, 2), 22),
+ (datetime.datetime(2023, 1, 3), 21),
+ (datetime.datetime(2023, 1, 4), 23),
+ (datetime.datetime(2023, 1, 5), 24),
+ (datetime.datetime(2023, 1, 6), 26),
+ ]
+ df = self.spark.createDataFrame(data, ["date", "temperature"])
+
+ def to_sec(i):
+ return i * 86400
+
+ w =
Window.orderBy(F.col("date").cast("timestamp").cast("long")).rangeBetween(-to_sec(3),
0)
+ res = df.withColumn("3_day_avg_temp", F.avg("temperature").over(w))
+ rs = sorted(res.collect())
+ expected = [
+ (datetime.datetime(2023, 1, 1, 0, 0), 20, 20.0),
+ (datetime.datetime(2023, 1, 2, 0, 0), 22, 21.0),
+ (datetime.datetime(2023, 1, 3, 0, 0), 21, 21.0),
+ (datetime.datetime(2023, 1, 4, 0, 0), 23, 21.5),
+ (datetime.datetime(2023, 1, 5, 0, 0), 24, 22.5),
+ (datetime.datetime(2023, 1, 6, 0, 0), 26, 23.5),
+ ]
+ for r, ex in zip(rs, expected):
+ self.assertEqual(tuple(r), ex[: len(r)])
+
def test_window_time(self):
df = self.spark.createDataFrame(
[(datetime.datetime(2016, 3, 11, 9, 0, 7), 1)], ["date", "val"]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]