This is an automated email from the ASF dual-hosted git repository. hvanhovell pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new aee49e16188 [SPARK-41301][CONNECT] Homogenize Behavior for SparkSession.range() aee49e16188 is described below commit aee49e161887e3dc15701d2f1c98ddf75e3ceeac Author: Martin Grund <martin.gr...@databricks.com> AuthorDate: Mon Nov 28 14:47:40 2022 -0400 [SPARK-41301][CONNECT] Homogenize Behavior for SparkSession.range() ### What changes were proposed in this pull request? In PySpark the `end` parameter to `SparkSession.range` is optional and the first parameter is then used with an implicit `0` for `start`. This patch homogenizes the behavior for Spark Connect. ### Why are the changes needed? Compatibility. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? UT Closes #38822 from grundprinzip/SPARK-41301. Authored-by: Martin Grund <martin.gr...@databricks.com> Signed-off-by: Herman van Hovell <her...@databricks.com> --- python/pyspark/sql/connect/session.py | 10 ++++++++-- python/pyspark/sql/tests/connect/test_connect_basic.py | 4 ++++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/python/pyspark/sql/connect/session.py b/python/pyspark/sql/connect/session.py index c9b76cf47f9..2e24f4e7971 100644 --- a/python/pyspark/sql/connect/session.py +++ b/python/pyspark/sql/connect/session.py @@ -257,7 +257,7 @@ class SparkSession(object): def range( self, start: int, - end: int, + end: Optional[int] = None, step: int = 1, numPartitions: Optional[int] = None, ) -> DataFrame: @@ -283,6 +283,12 @@ class SparkSession(object): ------- :class:`DataFrame` """ + if end is None: + actual_end = start + start = 0 + else: + actual_end = end + return DataFrame.withPlan( - Range(start=start, end=end, step=step, num_partitions=numPartitions), self + Range(start=start, end=actual_end, step=step, num_partitions=numPartitions), self ) diff --git a/python/pyspark/sql/tests/connect/test_connect_basic.py b/python/pyspark/sql/tests/connect/test_connect_basic.py index e0f5f23fdb4..028819a88ca 100644 --- a/python/pyspark/sql/tests/connect/test_connect_basic.py +++ b/python/pyspark/sql/tests/connect/test_connect_basic.py @@ -363,6 +363,10 @@ class SparkConnectTests(SparkConnectSQLTestCase): self.connect.range(start=0, end=10, step=3, numPartitions=2).toPandas(), self.spark.range(start=0, end=10, step=3, numPartitions=2).toPandas(), ) + # SPARK-41301 + self.assert_eq( + self.connect.range(10).toPandas(), self.connect.range(start=0, end=10).toPandas() + ) def test_create_global_temp_view(self): # SPARK-41127: test global temp view creation. --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org