(spark) branch master updated: [SPARK-55675][PS][TESTS] Fix tests for window functions

gurwls223 Tue, 24 Feb 2026 22:57:22 -0800

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new b29324602b03 [SPARK-55675][PS][TESTS] Fix tests for window functions
b29324602b03 is described below

commit b29324602b038489bde38c57794c2d4e228634f2
Author: Takuya Ueshin <[email protected]>
AuthorDate: Wed Feb 25 15:55:57 2026 +0900

    [SPARK-55675][PS][TESTS] Fix tests for window functions
    
    ### What changes were proposed in this pull request?
    
    Fixes tests for window functions.
    
    ### Why are the changes needed?
    
    There are test failures related to window functions.
    
    - `pyspark.pandas.tests.frame.test_interpolate`
    - `pyspark.pandas.tests.series.test_interpolate`
    - `pyspark.pandas.tests.window.test_expanding_adv`
    
    ### Does this PR introduce _any_ user-facing change?
    
    No.
    
    ### How was this patch tested?
    
    Updated the related tests.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    Codex (GPT-5.3-Codex)
    
    Closes #54469 from ueshin/issues/SPARK-55675/window.
    
    Authored-by: Takuya Ueshin <[email protected]>
    Signed-off-by: Hyukjin Kwon <[email protected]>
---
 .../pyspark/pandas/tests/frame/test_interpolate.py | 28 +++++++++++++++-------
 .../pandas/tests/series/test_interpolate.py        | 28 +++++++++++++++-------
 .../pyspark/pandas/tests/window/test_expanding.py  | 15 +++++++-----
 .../pandas/tests/window/test_expanding_adv.py      | 11 ++++++---
 4 files changed, 57 insertions(+), 25 deletions(-)

diff --git a/python/pyspark/pandas/tests/frame/test_interpolate.py 
b/python/pyspark/pandas/tests/frame/test_interpolate.py
index d61addb3dcc5..2b65d09f006d 100644
--- a/python/pyspark/pandas/tests/frame/test_interpolate.py
+++ b/python/pyspark/pandas/tests/frame/test_interpolate.py
@@ -18,6 +18,7 @@ import numpy as np
 import pandas as pd
 
 import pyspark.pandas as ps
+from pyspark.loose_version import LooseVersion
 from pyspark.testing.pandasutils import PandasOnSparkTestCase, TestUtils
 
 
@@ -35,14 +36,25 @@ class FrameInterpolateMixin:
             (4, "backward", "inside"),
             (5, "both", "inside"),
         ]:
-            self.assert_eq(
-                psobj.interpolate(
-                    limit=limit, limit_direction=limit_direction, 
limit_area=limit_area
-                ).sort_index(),
-                pobj.interpolate(
-                    limit=limit, limit_direction=limit_direction, 
limit_area=limit_area
-                ).sort_index(),
-            )
+            # pandas 3.0.0 can raise when limit >= len(obj) in interpolate 
edge cases.
+            effective_limit = limit
+            if LooseVersion(pd.__version__) >= "3.0.0":
+                effective_limit = min(limit, len(pobj) - 1)
+            with self.subTest(
+                limit=effective_limit, limit_direction=limit_direction, 
limit_area=limit_area
+            ):
+                self.assert_eq(
+                    psobj.interpolate(
+                        limit=effective_limit,
+                        limit_direction=limit_direction,
+                        limit_area=limit_area,
+                    ).sort_index(),
+                    pobj.interpolate(
+                        limit=effective_limit,
+                        limit_direction=limit_direction,
+                        limit_area=limit_area,
+                    ).sort_index(),
+                )
 
     def test_interpolate(self):
         pdf = pd.DataFrame(
diff --git a/python/pyspark/pandas/tests/series/test_interpolate.py 
b/python/pyspark/pandas/tests/series/test_interpolate.py
index 57b6a57f6c62..87155f9f33b8 100644
--- a/python/pyspark/pandas/tests/series/test_interpolate.py
+++ b/python/pyspark/pandas/tests/series/test_interpolate.py
@@ -18,6 +18,7 @@ import numpy as np
 import pandas as pd
 
 import pyspark.pandas as ps
+from pyspark.loose_version import LooseVersion
 from pyspark.testing.pandasutils import PandasOnSparkTestCase
 
 
@@ -35,14 +36,25 @@ class SeriesInterpolateMixin:
             (4, "backward", "inside"),
             (5, "both", "inside"),
         ]:
-            self.assert_eq(
-                psobj.interpolate(
-                    limit=limit, limit_direction=limit_direction, 
limit_area=limit_area
-                ).sort_index(),
-                pobj.interpolate(
-                    limit=limit, limit_direction=limit_direction, 
limit_area=limit_area
-                ).sort_index(),
-            )
+            # pandas 3.0.0 can raise when limit >= len(obj) in interpolate 
edge cases.
+            effective_limit = limit
+            if LooseVersion(pd.__version__) >= "3.0.0":
+                effective_limit = min(limit, len(pobj) - 1)
+            with self.subTest(
+                limit=effective_limit, limit_direction=limit_direction, 
limit_area=limit_area
+            ):
+                self.assert_eq(
+                    psobj.interpolate(
+                        limit=effective_limit,
+                        limit_direction=limit_direction,
+                        limit_area=limit_area,
+                    ).sort_index(),
+                    pobj.interpolate(
+                        limit=effective_limit,
+                        limit_direction=limit_direction,
+                        limit_area=limit_area,
+                    ).sort_index(),
+                )
 
     def test_interpolate(self):
         pser = pd.Series(
diff --git a/python/pyspark/pandas/tests/window/test_expanding.py 
b/python/pyspark/pandas/tests/window/test_expanding.py
index 61e9f48a8a2f..90b8803881b1 100644
--- a/python/pyspark/pandas/tests/window/test_expanding.py
+++ b/python/pyspark/pandas/tests/window/test_expanding.py
@@ -23,17 +23,18 @@ from pyspark.testing.pandasutils import 
PandasOnSparkTestCase
 
 
 class ExpandingTestingFuncMixin:
-    def _test_expanding_func(self, ps_func, pd_func=None):
+    def _test_expanding_func(self, ps_func, pd_func=None, *, int_almost=False, 
float_almost=False):
         if not pd_func:
             pd_func = ps_func
         if isinstance(pd_func, str):
             pd_func = self.convert_str_to_lambda(pd_func)
         if isinstance(ps_func, str):
             ps_func = self.convert_str_to_lambda(ps_func)
+
         pser = pd.Series([1, 2, 3, 7, 9, 8], index=np.random.rand(6), name="a")
         psser = ps.from_pandas(pser)
-        self.assert_eq(ps_func(psser.expanding(2)), 
pd_func(pser.expanding(2)), almost=True)
-        self.assert_eq(ps_func(psser.expanding(2)), 
pd_func(pser.expanding(2)), almost=True)
+        self.assert_eq(ps_func(psser.expanding(2)), 
pd_func(pser.expanding(2)), almost=int_almost)
+        self.assert_eq(ps_func(psser.expanding(2)), 
pd_func(pser.expanding(2)), almost=int_almost)
 
         # Multiindex
         pser = pd.Series(
@@ -46,14 +47,16 @@ class ExpandingTestingFuncMixin:
             {"a": [1.0, 2.0, 3.0, 2.0], "b": [4.0, 2.0, 3.0, 1.0]}, 
index=np.random.rand(4)
         )
         psdf = ps.from_pandas(pdf)
-        self.assert_eq(ps_func(psdf.expanding(2)), pd_func(pdf.expanding(2)))
-        self.assert_eq(ps_func(psdf.expanding(2)).sum(), 
pd_func(pdf.expanding(2)).sum())
+        self.assert_eq(ps_func(psdf.expanding(2)), pd_func(pdf.expanding(2)), 
almost=float_almost)
+        self.assert_eq(
+            ps_func(psdf.expanding(2)).sum(), pd_func(pdf.expanding(2)).sum(), 
almost=float_almost
+        )
 
         # Multiindex column
         columns = pd.MultiIndex.from_tuples([("a", "x"), ("a", "y")])
         pdf.columns = columns
         psdf.columns = columns
-        self.assert_eq(ps_func(psdf.expanding(2)), pd_func(pdf.expanding(2)))
+        self.assert_eq(ps_func(psdf.expanding(2)), pd_func(pdf.expanding(2)), 
almost=float_almost)
 
 
 class ExpandingMixin(ExpandingTestingFuncMixin):
diff --git a/python/pyspark/pandas/tests/window/test_expanding_adv.py 
b/python/pyspark/pandas/tests/window/test_expanding_adv.py
index 5ee43f97e6fb..554c11e46b22 100644
--- a/python/pyspark/pandas/tests/window/test_expanding_adv.py
+++ b/python/pyspark/pandas/tests/window/test_expanding_adv.py
@@ -15,6 +15,9 @@
 # limitations under the License.
 #
 
+import pandas as pd
+
+from pyspark.loose_version import LooseVersion
 from pyspark.testing.pandasutils import PandasOnSparkTestCase
 from pyspark.pandas.tests.window.test_expanding import 
ExpandingTestingFuncMixin
 
@@ -27,13 +30,15 @@ class ExpandingAdvMixin(ExpandingTestingFuncMixin):
         self._test_expanding_func("std")
 
     def test_expanding_var(self):
-        self._test_expanding_func("var")
+        self._test_expanding_func("var", int_almost=True)
 
     def test_expanding_skew(self):
-        self._test_expanding_func("skew")
+        self._test_expanding_func("skew", int_almost=True)
 
     def test_expanding_kurt(self):
-        self._test_expanding_func("kurt")
+        self._test_expanding_func(
+            "kurt", int_almost=True, float_almost=LooseVersion(pd.__version__) 
>= "3.0.0"
+        )
 
 
 class ExpandingAdvTests(


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(spark) branch master updated: [SPARK-55675][PS][TESTS] Fix tests for window functions

Reply via email to