This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new b29324602b03 [SPARK-55675][PS][TESTS] Fix tests for window functions
b29324602b03 is described below
commit b29324602b038489bde38c57794c2d4e228634f2
Author: Takuya Ueshin <[email protected]>
AuthorDate: Wed Feb 25 15:55:57 2026 +0900
[SPARK-55675][PS][TESTS] Fix tests for window functions
### What changes were proposed in this pull request?
Fixes tests for window functions.
### Why are the changes needed?
There are test failures related to window functions.
- `pyspark.pandas.tests.frame.test_interpolate`
- `pyspark.pandas.tests.series.test_interpolate`
- `pyspark.pandas.tests.window.test_expanding_adv`
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
Updated the related tests.
### Was this patch authored or co-authored using generative AI tooling?
Codex (GPT-5.3-Codex)
Closes #54469 from ueshin/issues/SPARK-55675/window.
Authored-by: Takuya Ueshin <[email protected]>
Signed-off-by: Hyukjin Kwon <[email protected]>
---
.../pyspark/pandas/tests/frame/test_interpolate.py | 28 +++++++++++++++-------
.../pandas/tests/series/test_interpolate.py | 28 +++++++++++++++-------
.../pyspark/pandas/tests/window/test_expanding.py | 15 +++++++-----
.../pandas/tests/window/test_expanding_adv.py | 11 ++++++---
4 files changed, 57 insertions(+), 25 deletions(-)
diff --git a/python/pyspark/pandas/tests/frame/test_interpolate.py
b/python/pyspark/pandas/tests/frame/test_interpolate.py
index d61addb3dcc5..2b65d09f006d 100644
--- a/python/pyspark/pandas/tests/frame/test_interpolate.py
+++ b/python/pyspark/pandas/tests/frame/test_interpolate.py
@@ -18,6 +18,7 @@ import numpy as np
import pandas as pd
import pyspark.pandas as ps
+from pyspark.loose_version import LooseVersion
from pyspark.testing.pandasutils import PandasOnSparkTestCase, TestUtils
@@ -35,14 +36,25 @@ class FrameInterpolateMixin:
(4, "backward", "inside"),
(5, "both", "inside"),
]:
- self.assert_eq(
- psobj.interpolate(
- limit=limit, limit_direction=limit_direction,
limit_area=limit_area
- ).sort_index(),
- pobj.interpolate(
- limit=limit, limit_direction=limit_direction,
limit_area=limit_area
- ).sort_index(),
- )
+ # pandas 3.0.0 can raise when limit >= len(obj) in interpolate
edge cases.
+ effective_limit = limit
+ if LooseVersion(pd.__version__) >= "3.0.0":
+ effective_limit = min(limit, len(pobj) - 1)
+ with self.subTest(
+ limit=effective_limit, limit_direction=limit_direction,
limit_area=limit_area
+ ):
+ self.assert_eq(
+ psobj.interpolate(
+ limit=effective_limit,
+ limit_direction=limit_direction,
+ limit_area=limit_area,
+ ).sort_index(),
+ pobj.interpolate(
+ limit=effective_limit,
+ limit_direction=limit_direction,
+ limit_area=limit_area,
+ ).sort_index(),
+ )
def test_interpolate(self):
pdf = pd.DataFrame(
diff --git a/python/pyspark/pandas/tests/series/test_interpolate.py
b/python/pyspark/pandas/tests/series/test_interpolate.py
index 57b6a57f6c62..87155f9f33b8 100644
--- a/python/pyspark/pandas/tests/series/test_interpolate.py
+++ b/python/pyspark/pandas/tests/series/test_interpolate.py
@@ -18,6 +18,7 @@ import numpy as np
import pandas as pd
import pyspark.pandas as ps
+from pyspark.loose_version import LooseVersion
from pyspark.testing.pandasutils import PandasOnSparkTestCase
@@ -35,14 +36,25 @@ class SeriesInterpolateMixin:
(4, "backward", "inside"),
(5, "both", "inside"),
]:
- self.assert_eq(
- psobj.interpolate(
- limit=limit, limit_direction=limit_direction,
limit_area=limit_area
- ).sort_index(),
- pobj.interpolate(
- limit=limit, limit_direction=limit_direction,
limit_area=limit_area
- ).sort_index(),
- )
+ # pandas 3.0.0 can raise when limit >= len(obj) in interpolate
edge cases.
+ effective_limit = limit
+ if LooseVersion(pd.__version__) >= "3.0.0":
+ effective_limit = min(limit, len(pobj) - 1)
+ with self.subTest(
+ limit=effective_limit, limit_direction=limit_direction,
limit_area=limit_area
+ ):
+ self.assert_eq(
+ psobj.interpolate(
+ limit=effective_limit,
+ limit_direction=limit_direction,
+ limit_area=limit_area,
+ ).sort_index(),
+ pobj.interpolate(
+ limit=effective_limit,
+ limit_direction=limit_direction,
+ limit_area=limit_area,
+ ).sort_index(),
+ )
def test_interpolate(self):
pser = pd.Series(
diff --git a/python/pyspark/pandas/tests/window/test_expanding.py
b/python/pyspark/pandas/tests/window/test_expanding.py
index 61e9f48a8a2f..90b8803881b1 100644
--- a/python/pyspark/pandas/tests/window/test_expanding.py
+++ b/python/pyspark/pandas/tests/window/test_expanding.py
@@ -23,17 +23,18 @@ from pyspark.testing.pandasutils import
PandasOnSparkTestCase
class ExpandingTestingFuncMixin:
- def _test_expanding_func(self, ps_func, pd_func=None):
+ def _test_expanding_func(self, ps_func, pd_func=None, *, int_almost=False,
float_almost=False):
if not pd_func:
pd_func = ps_func
if isinstance(pd_func, str):
pd_func = self.convert_str_to_lambda(pd_func)
if isinstance(ps_func, str):
ps_func = self.convert_str_to_lambda(ps_func)
+
pser = pd.Series([1, 2, 3, 7, 9, 8], index=np.random.rand(6), name="a")
psser = ps.from_pandas(pser)
- self.assert_eq(ps_func(psser.expanding(2)),
pd_func(pser.expanding(2)), almost=True)
- self.assert_eq(ps_func(psser.expanding(2)),
pd_func(pser.expanding(2)), almost=True)
+ self.assert_eq(ps_func(psser.expanding(2)),
pd_func(pser.expanding(2)), almost=int_almost)
+ self.assert_eq(ps_func(psser.expanding(2)),
pd_func(pser.expanding(2)), almost=int_almost)
# Multiindex
pser = pd.Series(
@@ -46,14 +47,16 @@ class ExpandingTestingFuncMixin:
{"a": [1.0, 2.0, 3.0, 2.0], "b": [4.0, 2.0, 3.0, 1.0]},
index=np.random.rand(4)
)
psdf = ps.from_pandas(pdf)
- self.assert_eq(ps_func(psdf.expanding(2)), pd_func(pdf.expanding(2)))
- self.assert_eq(ps_func(psdf.expanding(2)).sum(),
pd_func(pdf.expanding(2)).sum())
+ self.assert_eq(ps_func(psdf.expanding(2)), pd_func(pdf.expanding(2)),
almost=float_almost)
+ self.assert_eq(
+ ps_func(psdf.expanding(2)).sum(), pd_func(pdf.expanding(2)).sum(),
almost=float_almost
+ )
# Multiindex column
columns = pd.MultiIndex.from_tuples([("a", "x"), ("a", "y")])
pdf.columns = columns
psdf.columns = columns
- self.assert_eq(ps_func(psdf.expanding(2)), pd_func(pdf.expanding(2)))
+ self.assert_eq(ps_func(psdf.expanding(2)), pd_func(pdf.expanding(2)),
almost=float_almost)
class ExpandingMixin(ExpandingTestingFuncMixin):
diff --git a/python/pyspark/pandas/tests/window/test_expanding_adv.py
b/python/pyspark/pandas/tests/window/test_expanding_adv.py
index 5ee43f97e6fb..554c11e46b22 100644
--- a/python/pyspark/pandas/tests/window/test_expanding_adv.py
+++ b/python/pyspark/pandas/tests/window/test_expanding_adv.py
@@ -15,6 +15,9 @@
# limitations under the License.
#
+import pandas as pd
+
+from pyspark.loose_version import LooseVersion
from pyspark.testing.pandasutils import PandasOnSparkTestCase
from pyspark.pandas.tests.window.test_expanding import
ExpandingTestingFuncMixin
@@ -27,13 +30,15 @@ class ExpandingAdvMixin(ExpandingTestingFuncMixin):
self._test_expanding_func("std")
def test_expanding_var(self):
- self._test_expanding_func("var")
+ self._test_expanding_func("var", int_almost=True)
def test_expanding_skew(self):
- self._test_expanding_func("skew")
+ self._test_expanding_func("skew", int_almost=True)
def test_expanding_kurt(self):
- self._test_expanding_func("kurt")
+ self._test_expanding_func(
+ "kurt", int_almost=True, float_almost=LooseVersion(pd.__version__)
>= "3.0.0"
+ )
class ExpandingAdvTests(
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]