commit python-pandas for openSUSE:Factory

Source-Sync Sat, 27 Jun 2026 09:04:26 -0700

Script 'mail_helper' called by obssrc
Hello community,

here is the log from the commit of package python-pandas for openSUSE:Factory 
checked in at 2026-06-27 18:03:04
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:Factory/python-pandas (Old)
 and      /work/SRC/openSUSE:Factory/.python-pandas.new.11887 (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


Package is "python-pandas"

Sat Jun 27 18:03:04 2026 rev:82 rq:1361563 version:2.3.3

Changes:
--------
--- /work/SRC/openSUSE:Factory/python-pandas/python-pandas.changes      
2026-06-23 17:40:35.933721471 +0200
+++ /work/SRC/openSUSE:Factory/.python-pandas.new.11887/python-pandas.changes   
2026-06-27 18:03:23.546528220 +0200
@@ -1,0 +2,6 @@
+Mon Jun 22 09:30:04 UTC 2026 - Josef Melcr <[email protected]>
+
+- Add upstream pandas-pr62863.patch and pandas-pr63143.patch to
+  fix testsuite failures with GCC 16
+
+-------------------------------------------------------------------

New:
----
  pandas-pr62863.patch
  pandas-pr63143.patch

----------(New B)----------
  New:
- Add upstream pandas-pr62863.patch and pandas-pr63143.patch to
  fix testsuite failures with GCC 16
  New:
- Add upstream pandas-pr62863.patch and pandas-pr63143.patch to
  fix testsuite failures with GCC 16
----------(New E)----------

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Other differences:
------------------
++++++ python-pandas.spec ++++++
--- /var/tmp/diff_new_pack.RDnFoj/_old  2026-06-27 18:03:29.138715684 +0200
+++ /var/tmp/diff_new_pack.RDnFoj/_new  2026-06-27 18:03:29.150716087 +0200
@@ -75,6 +75,10 @@
 Patch2:         pandas-pr62553-numexpr.patch
 # PATCH-FIX-UPSTREAM pandas-pr63406-meson-types.patch 
gh#pandas-dev/pandas#63406 BLD: newer versions of meson are pickier about types
 Patch3:         pandas-pr63406-meson-types.patch
+# PATCH-FIX-UPSTREAM pandas-pr62863.patch gh#pandas-dev/pandas#62863 BUG: fix 
polluted window in skewness computation
+Patch4:         pandas-pr62863.patch
+# PATCH-FIX-UPSTREAM pandas-pr63143.patch gh#pandas-dev/pandas#63143 BUG: fix 
polluted window in rolling kurt
+Patch5:         pandas-pr63143.patch
 %if !%{with test}
 BuildRequires:  %{python_module Cython >= 3.0.5}
 BuildRequires:  %{python_module devel >= 3.9}

++++++ pandas-pr62863.patch ++++++
>From 88c276a489abd1742c84ca1cafedb1b9e93f083a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=81lvaro=20Kothe?= <[email protected]>
Date: Wed, 5 Nov 2025 21:06:16 -0300
Subject: [PATCH] BUG: fix polluted window in skewness computation (#62863)

---
 doc/source/whatsnew/v3.0.0.rst       |   1 +
 pandas/_libs/window/aggregations.pyx | 202 ++++++++++++++-------------
 pandas/tests/window/test_rolling.py  |  37 +++--
 3 files changed, 130 insertions(+), 110 deletions(-)

diff --git a/pandas/_libs/window/aggregations.pyx 
b/pandas/_libs/window/aggregations.pyx
index 0c8ea28b60ce8..dccd93e8dafd9 100644
--- a/pandas/_libs/window/aggregations.pyx
+++ b/pandas/_libs/window/aggregations.pyx
@@ -1,6 +1,7 @@
 # cython: boundscheck=False, wraparound=False, cdivision=True
 
 from libc.math cimport (
+    fabs,
     round,
     signbit,
     sqrt,
@@ -60,6 +61,12 @@ cdef:
     float64_t MAXfloat64 = np.inf
 
     float64_t NaN = <float64_t>np.nan
+    float64_t EpsF64 = np.finfo(np.float64).eps
+
+    # Consider an operation ill-conditioned if
+    # it will only have up to 3 significant digits in base 10 remaining.
+    # https://en.wikipedia.org/wiki/Condition_number
+    float64_t InvCondTol = EpsF64 * 1e3
 
 cdef bint is_monotonic_increasing_start_end_bounds(
     ndarray[int64_t, ndim=1] start, ndarray[int64_t, ndim=1] end
@@ -482,18 +489,15 @@ def roll_var(const float64_t[:] values, ndarray[int64_t] 
start,
 
 
 cdef float64_t calc_skew(int64_t minp, int64_t nobs,
-                         float64_t x, float64_t xx, float64_t xxx,
+                         float64_t mean, float64_t m2, float64_t m3,
                          int64_t num_consecutive_same_value
                          ) noexcept nogil:
     cdef:
         float64_t result, dnobs
-        float64_t A, B, C, R
+        float64_t moments_ratio, correction
 
     if nobs >= minp:
         dnobs = <float64_t>nobs
-        A = x / dnobs
-        B = xx / dnobs - A * A
-        C = xxx / dnobs - A * A * A - 3 * A * B
 
         if nobs < 3:
             result = NaN
@@ -501,21 +505,21 @@ cdef float64_t calc_skew(int64_t minp, int64_t nobs,
         # uniform case, force result to be 0
         elif num_consecutive_same_value >= nobs:
             result = 0.0
-        # #18044: with uniform distribution, floating issue will
-        #         cause B != 0. and cause the result is a very
+        # #18044: with degenerate distribution, floating issue will
+        #         cause m2 != 0. and cause the result is a very
         #         large number.
         #
         #         in core/nanops.py nanskew/nankurt call the function
         #         _zero_out_fperr(m2) to fix floating error.
         #         if the variance is less than 1e-14, it could be
         #         treat as zero, here we follow the original
-        #         skew/kurt behaviour to check B <= 1e-14
-        elif B <= 1e-14:
+        #         skew/kurt behaviour to check m2 <= n * 1e-14
+        elif m2 <= dnobs * 1e-14:
             result = NaN
         else:
-            R = sqrt(B)
-            result = ((sqrt(dnobs * (dnobs - 1.)) * C) /
-                      ((dnobs - 2) * R * R * R))
+            moments_ratio = m3 / (m2 * sqrt(m2))
+            correction = dnobs * sqrt((dnobs - 1)) / (dnobs - 2)
+            result = moments_ratio * correction
     else:
         result = NaN
 
@@ -523,34 +527,36 @@ cdef float64_t calc_skew(int64_t minp, int64_t nobs,
 
 
 cdef void add_skew(float64_t val, int64_t *nobs,
-                   float64_t *x, float64_t *xx,
-                   float64_t *xxx,
-                   float64_t *compensation_x,
-                   float64_t *compensation_xx,
-                   float64_t *compensation_xxx,
+                   float64_t *mean, float64_t *m2,
+                   float64_t *m3,
+                   bint *numerically_unstable,
                    int64_t *num_consecutive_same_value,
                    float64_t *prev_value,
                    ) noexcept nogil:
     """ add a value from the skew calc """
     cdef:
-        float64_t y, t
+        float64_t n, delta, delta_n, term1, m3_update, new_m3
+
+    # Formulas adapted from
+    # 
https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Higher-order_statistics
 
     # Not NaN
     if val == val:
-        nobs[0] = nobs[0] + 1
-
-        y = val - compensation_x[0]
-        t = x[0] + y
-        compensation_x[0] = t - x[0] - y
-        x[0] = t
-        y = val * val - compensation_xx[0]
-        t = xx[0] + y
-        compensation_xx[0] = t - xx[0] - y
-        xx[0] = t
-        y = val * val * val - compensation_xxx[0]
-        t = xxx[0] + y
-        compensation_xxx[0] = t - xxx[0] - y
-        xxx[0] = t
+        nobs[0] += 1
+        n = <float64_t>(nobs[0])
+        delta = val - mean[0]
+        delta_n = delta / n
+        term1 = delta * delta_n * (n - 1.0)
+
+        m3_update = delta_n * (term1 * (n - 2.0) - 3.0 * m2[0])
+        new_m3 = m3[0] + m3_update
+        if (fabs(m3_update) + fabs(m3[0])) * InvCondTol > fabs(new_m3):
+            # possible catastrophic cancellation
+            numerically_unstable[0] = True
+
+        m3[0] = new_m3
+        m2[0] += term1
+        mean[0] += delta_n
 
         # GH#42064, record num of same values to remove floating point 
artifacts
         if val == prev_value[0]:
@@ -562,69 +568,63 @@ cdef void add_skew(float64_t val, int64_t *nobs,
 
 
 cdef void remove_skew(float64_t val, int64_t *nobs,
-                      float64_t *x, float64_t *xx,
-                      float64_t *xxx,
-                      float64_t *compensation_x,
-                      float64_t *compensation_xx,
-                      float64_t *compensation_xxx) noexcept nogil:
+                      float64_t *mean, float64_t *m2,
+                      float64_t *m3,
+                      bint *numerically_unstable) noexcept nogil:
     """ remove a value from the skew calc """
     cdef:
-        float64_t y, t
+        float64_t n, delta, delta_n, term1, m3_update, new_m3
+
+    # This is the online update for the central moments
+    # when we remove an observation.
+    #
+    # δ = x - m_{n+1}
+    # m_{n} = m_{n+1} - (δ / n)
+    # m²_n = Σ_{i=1}^{n+1}(x_i - m_{n})² - (x - m_{n})² # uses new mean
+    #      = m²_{n+1} - (δ²/n)*(n+1)
+    # m³_n = Σ_{i=1}^{n+1}(x_i - m_{n})³ - (x - m_{n})³ # uses new mean
+    #      = m³_{n+1} - (δ³/n²)*(n+1)*(n+2) + 3 * m²_{n+1}*(δ/n)
 
     # Not NaN
     if val == val:
-        nobs[0] = nobs[0] - 1
+        nobs[0] -= 1
+        n = <float64_t>(nobs[0])
+        delta = val - mean[0]
+        delta_n = delta / n
+        term1 = delta_n * delta * (n + 1.0)
 
-        y = - val - compensation_x[0]
-        t = x[0] + y
-        compensation_x[0] = t - x[0] - y
-        x[0] = t
-        y = - val * val - compensation_xx[0]
-        t = xx[0] + y
-        compensation_xx[0] = t - xx[0] - y
-        xx[0] = t
-        y = - val * val * val - compensation_xxx[0]
-        t = xxx[0] + y
-        compensation_xxx[0] = t - xxx[0] - y
-        xxx[0] = t
+        m3_update = delta_n * (term1 * (n + 2.0) - 3.0 * m2[0])
+        new_m3 = m3[0] - m3_update
+
+        if (fabs(m3_update) + fabs(m3[0])) * InvCondTol > fabs(new_m3):
+            # possible catastrophic cancellation
+            numerically_unstable[0] = True
+
+        m3[0] = new_m3
+        m2[0] -= term1
+        mean[0] -= delta_n
 
 
-def roll_skew(ndarray[float64_t] values, ndarray[int64_t] start,
+def roll_skew(const float64_t[:] values, ndarray[int64_t] start,
               ndarray[int64_t] end, int64_t minp) -> np.ndarray:
     cdef:
         Py_ssize_t i, j
-        float64_t val, min_val, mean_val, sum_val = 0
-        float64_t compensation_xxx_add, compensation_xxx_remove
-        float64_t compensation_xx_add, compensation_xx_remove
-        float64_t compensation_x_add, compensation_x_remove
-        float64_t x, xx, xxx
+        float64_t val
+        float64_t mean, m2, m3
         float64_t prev_value
-        int64_t nobs = 0, N = len(start), V = len(values), nobs_mean = 0
+        int64_t nobs = 0, N = len(start)
         int64_t s, e, num_consecutive_same_value
-        ndarray[float64_t] output, values_copy
+        ndarray[float64_t] output
         bint is_monotonic_increasing_bounds
+        bint requires_recompute, numerically_unstable = False
 
     minp = max(minp, 3)
     is_monotonic_increasing_bounds = is_monotonic_increasing_start_end_bounds(
         start, end
     )
     output = np.empty(N, dtype=np.float64)
-    min_val = np.nanmin(values)
-    values_copy = np.copy(values)
 
     with nogil:
-        for i in range(0, V):
-            val = values_copy[i]
-            if val == val:
-                nobs_mean += 1
-                sum_val += val
-        mean_val = sum_val / nobs_mean
-        # Other cases would lead to imprecision for smallest values
-        if min_val - mean_val > -1e5:
-            mean_val = round(mean_val)
-            for i in range(0, V):
-                values_copy[i] = values_copy[i] - mean_val
-
         for i in range(0, N):
 
             s = start[i]
@@ -632,46 +632,48 @@ def roll_skew(ndarray[float64_t] values, ndarray[int64_t] 
start,
 
             # Over the first window, observations can only be added
             # never removed
-            if i == 0 or not is_monotonic_increasing_bounds or s >= end[i - 1]:
-
-                prev_value = values[s]
-                num_consecutive_same_value = 0
-
-                compensation_xxx_add = compensation_xxx_remove = 0
-                compensation_xx_add = compensation_xx_remove = 0
-                compensation_x_add = compensation_x_remove = 0
-                x = xx = xxx = 0
-                nobs = 0
-                for j in range(s, e):
-                    val = values_copy[j]
-                    add_skew(val, &nobs, &x, &xx, &xxx, &compensation_x_add,
-                             &compensation_xx_add, &compensation_xxx_add,
-                             &num_consecutive_same_value, &prev_value)
-
-            else:
+            requires_recompute = (
+                i == 0
+                or not is_monotonic_increasing_bounds
+                or s >= end[i - 1]
+            )
 
+            if not requires_recompute:
                 # After the first window, observations can both be added
                 # and removed
                 # calculate deletes
                 for j in range(start[i - 1], s):
-                    val = values_copy[j]
-                    remove_skew(val, &nobs, &x, &xx, &xxx, 
&compensation_x_remove,
-                                &compensation_xx_remove, 
&compensation_xxx_remove)
+                    val = values[j]
+                    remove_skew(val, &nobs, &mean, &m2, &m3, 
&numerically_unstable)
 
                 # calculate adds
                 for j in range(end[i - 1], e):
-                    val = values_copy[j]
-                    add_skew(val, &nobs, &x, &xx, &xxx, &compensation_x_add,
-                             &compensation_xx_add, &compensation_xxx_add,
+                    val = values[j]
+                    add_skew(val, &nobs, &mean, &m2, &m3, 
&numerically_unstable,
                              &num_consecutive_same_value, &prev_value)
 
-            output[i] = calc_skew(minp, nobs, x, xx, xxx, 
num_consecutive_same_value)
+            if requires_recompute or numerically_unstable:
+
+                prev_value = values[s]
+                num_consecutive_same_value = 0
+
+                mean = m2 = m3 = 0.0
+                nobs = 0
+
+                for j in range(s, e):
+                    val = values[j]
+                    add_skew(val, &nobs, &mean, &m2, &m3, 
&numerically_unstable,
+                             &num_consecutive_same_value, &prev_value)
+
+                numerically_unstable = False
+
+            output[i] = calc_skew(minp, nobs, mean, m2, m3, 
num_consecutive_same_value)
 
             if not is_monotonic_increasing_bounds:
                 nobs = 0
-                x = 0.0
-                xx = 0.0
-                xxx = 0.0
+                mean = 0.0
+                m2 = 0.0
+                m3 = 0.0
 
     return output
 
diff --git a/pandas/tests/window/test_rolling.py 
b/pandas/tests/window/test_rolling.py
index 3003b142edd3b..5b00aeed79db6 100644
--- a/pandas/tests/window/test_rolling.py
+++ b/pandas/tests/window/test_rolling.py
@@ -1175,7 +1175,9 @@ def test_rolling_decreasing_indices(method):
     increasing = getattr(df.rolling(window=5), method)()
     decreasing = getattr(df_reverse.rolling(window=5), method)()
 
-    assert np.abs(decreasing.values[::-1][:-4] - increasing.values[4:]).max() 
< 1e-12
+    tm.assert_almost_equal(
+        decreasing.values[::-1][:-4], increasing.values[4:], atol=1e-12
+    )
 
 
 @pytest.mark.parametrize(
@@ -1441,17 +1443,30 @@ def test_rolling_skew_kurt_numerical_stability(method):
 
 
 @pytest.mark.parametrize(
-    ("method", "values"),
+    ("method", "data", "values"),
     [
-        ("skew", [2.0, 0.854563, 0.0, 1.999984]),
-        ("kurt", [4.0, -1.289256, -1.2, 3.999946]),
+        (
+            "skew",
+            [3000000, 1, 1, 2, 3, 4, 999],
+            [np.nan] * 3 + [2.0, 0.854563, 0.0, 1.999984],
+        ),
+        (
+            "skew",
+            [1e6, -1e6, 1, 2, 3, 4, 5, 6],
+            [np.nan] * 3 + [-5.51135192e-06, -2.0, 0.0, 0.0, 0.0],
+        ),
+        (
+            "kurt",
+            [3000000, 1, 1, 2, 3, 4, 999],
+            [np.nan] * 3 + [4.0, -1.289256, -1.2, 3.999946],
+        ),
     ],
 )
-def test_rolling_skew_kurt_large_value_range(method, values):
-    # GH: 37557
-    s = Series([3000000, 1, 1, 2, 3, 4, 999])
+def test_rolling_skew_kurt_large_value_range(method, data, values):
+    # GH: 37557, 47461
+    s = Series(data)
     result = getattr(s.rolling(4), method)()
-    expected = Series([np.nan] * 3 + values)
+    expected = Series(values)
     tm.assert_series_equal(result, expected)
 
 
@@ -1837,9 +1852,11 @@ def test_rolling_skew_kurt_floating_artifacts():
     sr = Series([1 / 3, 4, 0, 0, 0, 0, 0])
     r = sr.rolling(4)
     result = r.skew()
-    assert (result[-2:] == 0).all()
+    expected = Series([np.nan, np.nan, np.nan, 1.9619045191072484, 2.0, 0.0, 
0.0])
+    tm.assert_series_equal(result, expected)
     result = r.kurt()
-    assert (result[-2:] == -3).all()
+    expected = Series([np.nan, np.nan, np.nan, 3.8636048803878786, 4.0, -3.0, 
-3.0])
+    tm.assert_series_equal(result, expected)
 
 
 def test_numeric_only_frame(arithmetic_win_operators, numeric_only):

++++++ pandas-pr63143.patch ++++++
>From 55864a0a2a8833b9b00b17236aa93c1b859051ef Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=81lvaro=20Kothe?= <[email protected]>
Date: Mon, 1 Dec 2025 16:14:20 -0300
Subject: [PATCH] BUG: fix polluted window in rolling kurt (#63143)

---
 doc/source/whatsnew/v3.0.0.rst       |   3 +-
 pandas/_libs/window/aggregations.pyx | 220 +++++++++++++--------------
 pandas/tests/window/test_rolling.py  |  27 +++-
 3 files changed, 130 insertions(+), 120 deletions(-)

diff --git a/pandas/_libs/window/aggregations.pyx 
b/pandas/_libs/window/aggregations.pyx
index 89530c6c9c46c..84fa55448fd76 100644
--- a/pandas/_libs/window/aggregations.pyx
+++ b/pandas/_libs/window/aggregations.pyx
@@ -2,7 +2,6 @@
 
 from libc.math cimport (
     fabs,
-    round,
     signbit,
     sqrt,
 )
@@ -683,30 +682,22 @@ def roll_skew(const float64_t[:] values, ndarray[int64_t] 
start,
 
 
 cdef float64_t calc_kurt(int64_t minp, int64_t nobs,
-                         float64_t x, float64_t xx,
-                         float64_t xxx, float64_t xxxx,
+                         float64_t m2, float64_t m4,
                          int64_t num_consecutive_same_value,
                          ) noexcept nogil:
     cdef:
-        float64_t result, dnobs
-        float64_t A, B, C, D, R, K
+        float64_t result, dnobs, term1, term2, inner, correction
+        float64_t moments_ratio
 
     if nobs >= minp:
         if nobs < 4:
             result = NaN
         # GH 42064 46431
-        # uniform case, force result to be -3.
+        # degenerate case, force result to be -3.
         elif num_consecutive_same_value >= nobs:
             result = -3.
         else:
             dnobs = <float64_t>nobs
-            A = x / dnobs
-            R = A * A
-            B = xx / dnobs - R
-            R = R * A
-            C = xxx / dnobs - R - 3 * A * B
-            R = R * A
-            D = xxxx / dnobs - R - 6 * B * A * A - 4 * C * A
 
             # #18044: with uniform distribution, floating issue will
             #         cause B != 0. and cause the result is a very
@@ -716,12 +707,17 @@ cdef float64_t calc_kurt(int64_t minp, int64_t nobs,
             #         _zero_out_fperr(m2) to fix floating error.
             #         if the variance is less than 1e-14, it could be
             #         treat as zero, here we follow the original
-            #         skew/kurt behaviour to check B <= 1e-14
-            if B <= 1e-14:
+            #         skew/kurt behaviour to check m2 <= n * 1e-14
+            if m2 <= dnobs * 1e-14:
                 result = NaN
             else:
-                K = (dnobs * dnobs - 1.) * D / (B * B) - 3 * ((dnobs - 1.) ** 
2)
-                result = K / ((dnobs - 2.) * (dnobs - 3.))
+                moments_ratio = m4 / (m2 * m2)
+                term1 = dnobs * (dnobs + 1.0) * moments_ratio
+                term2 = 3.0 * (dnobs - 1.0)
+                inner = term1 - term2
+
+                correction = (dnobs - 1.0) / ((dnobs - 2.0) * (dnobs - 3.0))
+                result = correction * inner
     else:
         result = NaN
 
@@ -729,39 +725,43 @@ cdef float64_t calc_kurt(int64_t minp, int64_t nobs,
 
 
 cdef void add_kurt(float64_t val, int64_t *nobs,
-                   float64_t *x, float64_t *xx,
-                   float64_t *xxx, float64_t *xxxx,
-                   float64_t *compensation_x,
-                   float64_t *compensation_xx,
-                   float64_t *compensation_xxx,
-                   float64_t *compensation_xxxx,
+                   float64_t *mean, float64_t *m2,
+                   float64_t *m3, float64_t *m4,
+                   bint *numerically_unstable,
                    int64_t *num_consecutive_same_value,
                    float64_t *prev_value
                    ) noexcept nogil:
     """ add a value from the kurotic calc """
     cdef:
-        float64_t y, t
+        float64_t n, delta, delta_n, term1, m4_update, new_m4
+
+    # Formulas adapted from
+    # 
https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Higher-order_statistics
 
     # Not NaN
     if val == val:
-        nobs[0] = nobs[0] + 1
+        nobs[0] += 1
+        n = <float64_t>(nobs[0])
+        delta = val - mean[0]
+        delta_n = delta / n
+        term1 = delta * delta_n * (n - 1.0)
+
+        m4_update = delta_n * (
+                -4.0 * m3[0]
+                + delta_n * (
+                    6 * m2[0] + term1 * (n * n - 3.0 * n + 3.0)
+                    )
+                )
+        new_m4 = m4[0] + m4_update
+
+        if (fabs(m4_update) + fabs(m4[0])) * InvCondTol > fabs(new_m4):
+            # possible catastrophic cancellation
+            numerically_unstable[0] = True
 
-        y = val - compensation_x[0]
-        t = x[0] + y
-        compensation_x[0] = t - x[0] - y
-        x[0] = t
-        y = val * val - compensation_xx[0]
-        t = xx[0] + y
-        compensation_xx[0] = t - xx[0] - y
-        xx[0] = t
-        y = val * val * val - compensation_xxx[0]
-        t = xxx[0] + y
-        compensation_xxx[0] = t - xxx[0] - y
-        xxx[0] = t
-        y = val * val * val * val - compensation_xxxx[0]
-        t = xxxx[0] + y
-        compensation_xxxx[0] = t - xxxx[0] - y
-        xxxx[0] = t
+        m4[0] = new_m4
+        m3[0] += delta_n * (term1 * (n - 2.0) - 3.0 * m2[0])
+        m2[0] += term1
+        mean[0] += delta_n
 
         # GH#42064, record num of same values to remove floating point 
artifacts
         if val == prev_value[0]:
@@ -773,75 +773,60 @@ cdef void add_kurt(float64_t val, int64_t *nobs,
 
 
 cdef void remove_kurt(float64_t val, int64_t *nobs,
-                      float64_t *x, float64_t *xx,
-                      float64_t *xxx, float64_t *xxxx,
-                      float64_t *compensation_x,
-                      float64_t *compensation_xx,
-                      float64_t *compensation_xxx,
-                      float64_t *compensation_xxxx) noexcept nogil:
+                      float64_t *mean, float64_t *m2,
+                      float64_t *m3, float64_t *m4,
+                      bint *numerically_unstable,
+                      ) noexcept nogil:
     """ remove a value from the kurotic calc """
     cdef:
-        float64_t y, t
+        float64_t n, delta, delta_n, term1, m4_update, new_m4
 
     # Not NaN
     if val == val:
-        nobs[0] = nobs[0] - 1
+        nobs[0] -= 1
+        n = <float64_t>(nobs[0])
+        delta = val - mean[0]
+        delta_n = delta / n
+        term1 = delta_n * delta * (n + 1.0)
+
+        m4_update = delta_n * (
+                4.0 * m3[0]
+                + delta_n * (
+                    6.0 * m2[0]
+                    - term1 * (n * n + 3.0 * n + 3.0)
+                    )
+                )
+        new_m4 = m4[0] + m4_update
+
+        if (fabs(m4_update) + fabs(m4[0])) * InvCondTol > fabs(new_m4):
+            # possible catastrophic cancellation
+            numerically_unstable[0] = True
 
-        y = - val - compensation_x[0]
-        t = x[0] + y
-        compensation_x[0] = t - x[0] - y
-        x[0] = t
-        y = - val * val - compensation_xx[0]
-        t = xx[0] + y
-        compensation_xx[0] = t - xx[0] - y
-        xx[0] = t
-        y = - val * val * val - compensation_xxx[0]
-        t = xxx[0] + y
-        compensation_xxx[0] = t - xxx[0] - y
-        xxx[0] = t
-        y = - val * val * val * val - compensation_xxxx[0]
-        t = xxxx[0] + y
-        compensation_xxxx[0] = t - xxxx[0] - y
-        xxxx[0] = t
-
-
-def roll_kurt(ndarray[float64_t] values, ndarray[int64_t] start,
+        m4[0] = new_m4
+        m3[0] -= delta_n * (term1 * (n + 2.0) - 3.0 * m2[0])
+        m2[0] -= term1
+        mean[0] -= delta_n
+
+
+def roll_kurt(const float64_t[:] values, ndarray[int64_t] start,
               ndarray[int64_t] end, int64_t minp) -> np.ndarray:
     cdef:
         Py_ssize_t i, j
-        float64_t val, mean_val, min_val, sum_val = 0
-        float64_t compensation_xxxx_add, compensation_xxxx_remove
-        float64_t compensation_xxx_remove, compensation_xxx_add
-        float64_t compensation_xx_remove, compensation_xx_add
-        float64_t compensation_x_remove, compensation_x_add
-        float64_t x, xx, xxx, xxxx
+        float64_t mean, m2, m3, m4
         float64_t prev_value
         int64_t nobs, s, e, num_consecutive_same_value
-        int64_t N = len(start), V = len(values), nobs_mean = 0
-        ndarray[float64_t] output, values_copy
+        int64_t N = len(start)
+        ndarray[float64_t] output
         bint is_monotonic_increasing_bounds
+        bint requires_recompute, numerically_unstable = False
 
     minp = max(minp, 4)
     is_monotonic_increasing_bounds = is_monotonic_increasing_start_end_bounds(
         start, end
     )
     output = np.empty(N, dtype=np.float64)
-    values_copy = np.copy(values)
-    min_val = np.nanmin(values)
 
     with nogil:
-        for i in range(0, V):
-            val = values_copy[i]
-            if val == val:
-                nobs_mean += 1
-                sum_val += val
-        mean_val = sum_val / nobs_mean
-        # Other cases would lead to imprecision for smallest values
-        if min_val - mean_val > -1e4:
-            mean_val = round(mean_val)
-            for i in range(0, V):
-                values_copy[i] = values_copy[i] - mean_val
-
         for i in range(0, N):
 
             s = start[i]
@@ -849,49 +834,48 @@ def roll_kurt(ndarray[float64_t] values, ndarray[int64_t] 
start,
 
             # Over the first window, observations can only be added
             # never removed
-            if i == 0 or not is_monotonic_increasing_bounds or s >= end[i - 1]:
-
-                prev_value = values[s]
-                num_consecutive_same_value = 0
-
-                compensation_xxxx_add = compensation_xxxx_remove = 0
-                compensation_xxx_remove = compensation_xxx_add = 0
-                compensation_xx_remove = compensation_xx_add = 0
-                compensation_x_remove = compensation_x_add = 0
-                x = xx = xxx = xxxx = 0
-                nobs = 0
-                for j in range(s, e):
-                    add_kurt(values_copy[j], &nobs, &x, &xx, &xxx, &xxxx,
-                             &compensation_x_add, &compensation_xx_add,
-                             &compensation_xxx_add, &compensation_xxxx_add,
-                             &num_consecutive_same_value, &prev_value)
+            requires_recompute = (
+                i == 0
+                or not is_monotonic_increasing_bounds
+                or s >= end[i - 1]
+            )
 
-            else:
+            if not requires_recompute:
 
                 # After the first window, observations can both be added
                 # and removed
                 # calculate deletes
                 for j in range(start[i - 1], s):
-                    remove_kurt(values_copy[j], &nobs, &x, &xx, &xxx, &xxxx,
-                                &compensation_x_remove, 
&compensation_xx_remove,
-                                &compensation_xxx_remove, 
&compensation_xxxx_remove)
+                    remove_kurt(values[j], &nobs, &mean, &m2, &m3, &m4,
+                                &numerically_unstable)
 
                 # calculate adds
                 for j in range(end[i - 1], e):
-                    add_kurt(values_copy[j], &nobs, &x, &xx, &xxx, &xxxx,
-                             &compensation_x_add, &compensation_xx_add,
-                             &compensation_xxx_add, &compensation_xxxx_add,
+                    add_kurt(values[j], &nobs, &mean, &m2, &m3, &m4,
+                             &numerically_unstable,
+                             &num_consecutive_same_value, &prev_value)
+
+            if requires_recompute or numerically_unstable:
+
+                prev_value = values[s]
+                num_consecutive_same_value = 0
+
+                mean = m2 = m3 = m4 = 0.0
+                nobs = 0
+                for j in range(s, e):
+                    add_kurt(values[j], &nobs, &mean, &m2, &m3, &m4,
+                             &numerically_unstable,
                              &num_consecutive_same_value, &prev_value)
 
-            output[i] = calc_kurt(minp, nobs, x, xx, xxx, xxxx,
+            output[i] = calc_kurt(minp, nobs, m2, m4,
                                   num_consecutive_same_value)
 
             if not is_monotonic_increasing_bounds:
                 nobs = 0
-                x = 0.0
-                xx = 0.0
-                xxx = 0.0
-                xxxx = 0.0
+                mean = 0.0
+                m2 = 0.0
+                m3 = 0.0
+                m4 = 0.0
 
     return output
 
diff --git a/pandas/tests/window/test_rolling.py 
b/pandas/tests/window/test_rolling.py
index 91e1dd1a50719..9efc3f473cfcb 100644
--- a/pandas/tests/window/test_rolling.py
+++ b/pandas/tests/window/test_rolling.py
@@ -1516,16 +1516,41 @@ def test_rolling_skew_kurt_numerical_stability(method):
             [3000000, 1, 1, 2, 3, 4, 999],
             [np.nan] * 3 + [4.0, -1.289256, -1.2, 3.999946],
         ),
+        (
+            "kurt",
+            [1e6, -1e6, 1, 2, 3, 4, 5, 6],
+            [np.nan] * 3 + [1.5, 4.0, -1.2, -1.2, -1.2],
+        ),
     ],
 )
 def test_rolling_skew_kurt_large_value_range(method, data, values):
-    # GH: 37557, 47461
+    # GH: 37557, 47461, 61416
     s = Series(data)
     result = getattr(s.rolling(4), method)()
     expected = Series(values)
     tm.assert_series_equal(result, expected)
 
 
[email protected]("method", ["skew", "kurt"])
+def test_same_result_with_different_lengths(method):
+    # GH-54380
+    len_smaller = 10
+    len_bigger = 12
+    window_size = 8
+
+    rng = np.random.default_rng(2)
+    data = rng.normal(loc=0.0, scale=1e3, size=len_bigger)
+    window_smaller = Series(data[:len_smaller]).rolling(window_size)
+    window_bigger = Series(data).rolling(window_size)
+
+    result_smaller = getattr(window_smaller, method)()
+    result_bigger = getattr(window_bigger, method)()
+
+    result_bigger_trimmed = result_bigger[:len_smaller]
+
+    tm.assert_series_equal(result_smaller, result_bigger_trimmed, 
check_exact=True)
+
+
 def test_invalid_method():
     with pytest.raises(ValueError, match="method must be 'table' or 'single"):
         Series(range(1)).rolling(1, method="foo")

commit python-pandas for openSUSE:Factory

Reply via email to