Github user zero323 commented on a diff in the pull request:
https://github.com/apache/spark/pull/16123#discussion_r90725698
--- Diff: python/pyspark/sql/tests.py ---
@@ -1980,6 +1980,41 @@ def
assert_runs_only_one_job_stage_and_task(job_group_name, f):
# Regression test for SPARK-17514: limit(n).collect() should the
perform same as take(n)
assert_runs_only_one_job_stage_and_task("collect_limit", lambda:
df.limit(1).collect())
+ @unittest.skipIf(sys.version_info < (3, 3), "Unittest < 3.3 doesn't
support mocking")
+ def test_unbounded_frames(self):
+ from unittest.mock import patch
+ from pyspark.sql import functions as F
+ from pyspark.sql import window
+ import importlib
+
+ df = self.spark.range(0, 3)
+
+ def rows_frame_match():
+ return "ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED
FOLLOWING" in df.select(
+ F.count("*").over(window.Window.rowsBetween(-sys.maxsize,
sys.maxsize))
+ ).columns[0]
+
+ def range_frame_match():
+ return "RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED
FOLLOWING" in df.select(
+ F.count("*").over(window.Window.rangeBetween(-sys.maxsize,
sys.maxsize))
+ ).columns[0]
+
+ with patch("sys.maxsize", 2 ** 31 - 1):
+ importlib.reload(window)
--- End diff --
This is something I don't like but it looks better than alternatives:
- Converting thresholds to methods so current `sys.maxsize` is picked up.
- Mocking thresholds as well.
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]