Script 'mail_helper' called by obssrc Hello community, here is the log from the commit of package python-pandas for openSUSE:Factory checked in at 2026-06-27 18:03:04 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/python-pandas (Old) and /work/SRC/openSUSE:Factory/.python-pandas.new.11887 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "python-pandas" Sat Jun 27 18:03:04 2026 rev:82 rq:1361563 version:2.3.3 Changes: -------- --- /work/SRC/openSUSE:Factory/python-pandas/python-pandas.changes 2026-06-23 17:40:35.933721471 +0200 +++ /work/SRC/openSUSE:Factory/.python-pandas.new.11887/python-pandas.changes 2026-06-27 18:03:23.546528220 +0200 @@ -1,0 +2,6 @@ +Mon Jun 22 09:30:04 UTC 2026 - Josef Melcr <[email protected]> + +- Add upstream pandas-pr62863.patch and pandas-pr63143.patch to + fix testsuite failures with GCC 16 + +------------------------------------------------------------------- New: ---- pandas-pr62863.patch pandas-pr63143.patch ----------(New B)---------- New: - Add upstream pandas-pr62863.patch and pandas-pr63143.patch to fix testsuite failures with GCC 16 New: - Add upstream pandas-pr62863.patch and pandas-pr63143.patch to fix testsuite failures with GCC 16 ----------(New E)---------- ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ python-pandas.spec ++++++ --- /var/tmp/diff_new_pack.RDnFoj/_old 2026-06-27 18:03:29.138715684 +0200 +++ /var/tmp/diff_new_pack.RDnFoj/_new 2026-06-27 18:03:29.150716087 +0200 @@ -75,6 +75,10 @@ Patch2: pandas-pr62553-numexpr.patch # PATCH-FIX-UPSTREAM pandas-pr63406-meson-types.patch gh#pandas-dev/pandas#63406 BLD: newer versions of meson are pickier about types Patch3: pandas-pr63406-meson-types.patch +# PATCH-FIX-UPSTREAM pandas-pr62863.patch gh#pandas-dev/pandas#62863 BUG: fix polluted window in skewness computation +Patch4: pandas-pr62863.patch +# PATCH-FIX-UPSTREAM pandas-pr63143.patch gh#pandas-dev/pandas#63143 BUG: fix polluted window in rolling kurt +Patch5: pandas-pr63143.patch %if !%{with test} BuildRequires: %{python_module Cython >= 3.0.5} BuildRequires: %{python_module devel >= 3.9} ++++++ pandas-pr62863.patch ++++++ >From 88c276a489abd1742c84ca1cafedb1b9e93f083a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Kothe?= <[email protected]> Date: Wed, 5 Nov 2025 21:06:16 -0300 Subject: [PATCH] BUG: fix polluted window in skewness computation (#62863) --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/_libs/window/aggregations.pyx | 202 ++++++++++++++------------- pandas/tests/window/test_rolling.py | 37 +++-- 3 files changed, 130 insertions(+), 110 deletions(-) diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx index 0c8ea28b60ce8..dccd93e8dafd9 100644 --- a/pandas/_libs/window/aggregations.pyx +++ b/pandas/_libs/window/aggregations.pyx @@ -1,6 +1,7 @@ # cython: boundscheck=False, wraparound=False, cdivision=True from libc.math cimport ( + fabs, round, signbit, sqrt, @@ -60,6 +61,12 @@ cdef: float64_t MAXfloat64 = np.inf float64_t NaN = <float64_t>np.nan + float64_t EpsF64 = np.finfo(np.float64).eps + + # Consider an operation ill-conditioned if + # it will only have up to 3 significant digits in base 10 remaining. + # https://en.wikipedia.org/wiki/Condition_number + float64_t InvCondTol = EpsF64 * 1e3 cdef bint is_monotonic_increasing_start_end_bounds( ndarray[int64_t, ndim=1] start, ndarray[int64_t, ndim=1] end @@ -482,18 +489,15 @@ def roll_var(const float64_t[:] values, ndarray[int64_t] start, cdef float64_t calc_skew(int64_t minp, int64_t nobs, - float64_t x, float64_t xx, float64_t xxx, + float64_t mean, float64_t m2, float64_t m3, int64_t num_consecutive_same_value ) noexcept nogil: cdef: float64_t result, dnobs - float64_t A, B, C, R + float64_t moments_ratio, correction if nobs >= minp: dnobs = <float64_t>nobs - A = x / dnobs - B = xx / dnobs - A * A - C = xxx / dnobs - A * A * A - 3 * A * B if nobs < 3: result = NaN @@ -501,21 +505,21 @@ cdef float64_t calc_skew(int64_t minp, int64_t nobs, # uniform case, force result to be 0 elif num_consecutive_same_value >= nobs: result = 0.0 - # #18044: with uniform distribution, floating issue will - # cause B != 0. and cause the result is a very + # #18044: with degenerate distribution, floating issue will + # cause m2 != 0. and cause the result is a very # large number. # # in core/nanops.py nanskew/nankurt call the function # _zero_out_fperr(m2) to fix floating error. # if the variance is less than 1e-14, it could be # treat as zero, here we follow the original - # skew/kurt behaviour to check B <= 1e-14 - elif B <= 1e-14: + # skew/kurt behaviour to check m2 <= n * 1e-14 + elif m2 <= dnobs * 1e-14: result = NaN else: - R = sqrt(B) - result = ((sqrt(dnobs * (dnobs - 1.)) * C) / - ((dnobs - 2) * R * R * R)) + moments_ratio = m3 / (m2 * sqrt(m2)) + correction = dnobs * sqrt((dnobs - 1)) / (dnobs - 2) + result = moments_ratio * correction else: result = NaN @@ -523,34 +527,36 @@ cdef float64_t calc_skew(int64_t minp, int64_t nobs, cdef void add_skew(float64_t val, int64_t *nobs, - float64_t *x, float64_t *xx, - float64_t *xxx, - float64_t *compensation_x, - float64_t *compensation_xx, - float64_t *compensation_xxx, + float64_t *mean, float64_t *m2, + float64_t *m3, + bint *numerically_unstable, int64_t *num_consecutive_same_value, float64_t *prev_value, ) noexcept nogil: """ add a value from the skew calc """ cdef: - float64_t y, t + float64_t n, delta, delta_n, term1, m3_update, new_m3 + + # Formulas adapted from + # https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Higher-order_statistics # Not NaN if val == val: - nobs[0] = nobs[0] + 1 - - y = val - compensation_x[0] - t = x[0] + y - compensation_x[0] = t - x[0] - y - x[0] = t - y = val * val - compensation_xx[0] - t = xx[0] + y - compensation_xx[0] = t - xx[0] - y - xx[0] = t - y = val * val * val - compensation_xxx[0] - t = xxx[0] + y - compensation_xxx[0] = t - xxx[0] - y - xxx[0] = t + nobs[0] += 1 + n = <float64_t>(nobs[0]) + delta = val - mean[0] + delta_n = delta / n + term1 = delta * delta_n * (n - 1.0) + + m3_update = delta_n * (term1 * (n - 2.0) - 3.0 * m2[0]) + new_m3 = m3[0] + m3_update + if (fabs(m3_update) + fabs(m3[0])) * InvCondTol > fabs(new_m3): + # possible catastrophic cancellation + numerically_unstable[0] = True + + m3[0] = new_m3 + m2[0] += term1 + mean[0] += delta_n # GH#42064, record num of same values to remove floating point artifacts if val == prev_value[0]: @@ -562,69 +568,63 @@ cdef void add_skew(float64_t val, int64_t *nobs, cdef void remove_skew(float64_t val, int64_t *nobs, - float64_t *x, float64_t *xx, - float64_t *xxx, - float64_t *compensation_x, - float64_t *compensation_xx, - float64_t *compensation_xxx) noexcept nogil: + float64_t *mean, float64_t *m2, + float64_t *m3, + bint *numerically_unstable) noexcept nogil: """ remove a value from the skew calc """ cdef: - float64_t y, t + float64_t n, delta, delta_n, term1, m3_update, new_m3 + + # This is the online update for the central moments + # when we remove an observation. + # + # δ = x - m_{n+1} + # m_{n} = m_{n+1} - (δ / n) + # m²_n = Σ_{i=1}^{n+1}(x_i - m_{n})² - (x - m_{n})² # uses new mean + # = m²_{n+1} - (δ²/n)*(n+1) + # m³_n = Σ_{i=1}^{n+1}(x_i - m_{n})³ - (x - m_{n})³ # uses new mean + # = m³_{n+1} - (δ³/n²)*(n+1)*(n+2) + 3 * m²_{n+1}*(δ/n) # Not NaN if val == val: - nobs[0] = nobs[0] - 1 + nobs[0] -= 1 + n = <float64_t>(nobs[0]) + delta = val - mean[0] + delta_n = delta / n + term1 = delta_n * delta * (n + 1.0) - y = - val - compensation_x[0] - t = x[0] + y - compensation_x[0] = t - x[0] - y - x[0] = t - y = - val * val - compensation_xx[0] - t = xx[0] + y - compensation_xx[0] = t - xx[0] - y - xx[0] = t - y = - val * val * val - compensation_xxx[0] - t = xxx[0] + y - compensation_xxx[0] = t - xxx[0] - y - xxx[0] = t + m3_update = delta_n * (term1 * (n + 2.0) - 3.0 * m2[0]) + new_m3 = m3[0] - m3_update + + if (fabs(m3_update) + fabs(m3[0])) * InvCondTol > fabs(new_m3): + # possible catastrophic cancellation + numerically_unstable[0] = True + + m3[0] = new_m3 + m2[0] -= term1 + mean[0] -= delta_n -def roll_skew(ndarray[float64_t] values, ndarray[int64_t] start, +def roll_skew(const float64_t[:] values, ndarray[int64_t] start, ndarray[int64_t] end, int64_t minp) -> np.ndarray: cdef: Py_ssize_t i, j - float64_t val, min_val, mean_val, sum_val = 0 - float64_t compensation_xxx_add, compensation_xxx_remove - float64_t compensation_xx_add, compensation_xx_remove - float64_t compensation_x_add, compensation_x_remove - float64_t x, xx, xxx + float64_t val + float64_t mean, m2, m3 float64_t prev_value - int64_t nobs = 0, N = len(start), V = len(values), nobs_mean = 0 + int64_t nobs = 0, N = len(start) int64_t s, e, num_consecutive_same_value - ndarray[float64_t] output, values_copy + ndarray[float64_t] output bint is_monotonic_increasing_bounds + bint requires_recompute, numerically_unstable = False minp = max(minp, 3) is_monotonic_increasing_bounds = is_monotonic_increasing_start_end_bounds( start, end ) output = np.empty(N, dtype=np.float64) - min_val = np.nanmin(values) - values_copy = np.copy(values) with nogil: - for i in range(0, V): - val = values_copy[i] - if val == val: - nobs_mean += 1 - sum_val += val - mean_val = sum_val / nobs_mean - # Other cases would lead to imprecision for smallest values - if min_val - mean_val > -1e5: - mean_val = round(mean_val) - for i in range(0, V): - values_copy[i] = values_copy[i] - mean_val - for i in range(0, N): s = start[i] @@ -632,46 +632,48 @@ def roll_skew(ndarray[float64_t] values, ndarray[int64_t] start, # Over the first window, observations can only be added # never removed - if i == 0 or not is_monotonic_increasing_bounds or s >= end[i - 1]: - - prev_value = values[s] - num_consecutive_same_value = 0 - - compensation_xxx_add = compensation_xxx_remove = 0 - compensation_xx_add = compensation_xx_remove = 0 - compensation_x_add = compensation_x_remove = 0 - x = xx = xxx = 0 - nobs = 0 - for j in range(s, e): - val = values_copy[j] - add_skew(val, &nobs, &x, &xx, &xxx, &compensation_x_add, - &compensation_xx_add, &compensation_xxx_add, - &num_consecutive_same_value, &prev_value) - - else: + requires_recompute = ( + i == 0 + or not is_monotonic_increasing_bounds + or s >= end[i - 1] + ) + if not requires_recompute: # After the first window, observations can both be added # and removed # calculate deletes for j in range(start[i - 1], s): - val = values_copy[j] - remove_skew(val, &nobs, &x, &xx, &xxx, &compensation_x_remove, - &compensation_xx_remove, &compensation_xxx_remove) + val = values[j] + remove_skew(val, &nobs, &mean, &m2, &m3, &numerically_unstable) # calculate adds for j in range(end[i - 1], e): - val = values_copy[j] - add_skew(val, &nobs, &x, &xx, &xxx, &compensation_x_add, - &compensation_xx_add, &compensation_xxx_add, + val = values[j] + add_skew(val, &nobs, &mean, &m2, &m3, &numerically_unstable, &num_consecutive_same_value, &prev_value) - output[i] = calc_skew(minp, nobs, x, xx, xxx, num_consecutive_same_value) + if requires_recompute or numerically_unstable: + + prev_value = values[s] + num_consecutive_same_value = 0 + + mean = m2 = m3 = 0.0 + nobs = 0 + + for j in range(s, e): + val = values[j] + add_skew(val, &nobs, &mean, &m2, &m3, &numerically_unstable, + &num_consecutive_same_value, &prev_value) + + numerically_unstable = False + + output[i] = calc_skew(minp, nobs, mean, m2, m3, num_consecutive_same_value) if not is_monotonic_increasing_bounds: nobs = 0 - x = 0.0 - xx = 0.0 - xxx = 0.0 + mean = 0.0 + m2 = 0.0 + m3 = 0.0 return output diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 3003b142edd3b..5b00aeed79db6 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -1175,7 +1175,9 @@ def test_rolling_decreasing_indices(method): increasing = getattr(df.rolling(window=5), method)() decreasing = getattr(df_reverse.rolling(window=5), method)() - assert np.abs(decreasing.values[::-1][:-4] - increasing.values[4:]).max() < 1e-12 + tm.assert_almost_equal( + decreasing.values[::-1][:-4], increasing.values[4:], atol=1e-12 + ) @pytest.mark.parametrize( @@ -1441,17 +1443,30 @@ def test_rolling_skew_kurt_numerical_stability(method): @pytest.mark.parametrize( - ("method", "values"), + ("method", "data", "values"), [ - ("skew", [2.0, 0.854563, 0.0, 1.999984]), - ("kurt", [4.0, -1.289256, -1.2, 3.999946]), + ( + "skew", + [3000000, 1, 1, 2, 3, 4, 999], + [np.nan] * 3 + [2.0, 0.854563, 0.0, 1.999984], + ), + ( + "skew", + [1e6, -1e6, 1, 2, 3, 4, 5, 6], + [np.nan] * 3 + [-5.51135192e-06, -2.0, 0.0, 0.0, 0.0], + ), + ( + "kurt", + [3000000, 1, 1, 2, 3, 4, 999], + [np.nan] * 3 + [4.0, -1.289256, -1.2, 3.999946], + ), ], ) -def test_rolling_skew_kurt_large_value_range(method, values): - # GH: 37557 - s = Series([3000000, 1, 1, 2, 3, 4, 999]) +def test_rolling_skew_kurt_large_value_range(method, data, values): + # GH: 37557, 47461 + s = Series(data) result = getattr(s.rolling(4), method)() - expected = Series([np.nan] * 3 + values) + expected = Series(values) tm.assert_series_equal(result, expected) @@ -1837,9 +1852,11 @@ def test_rolling_skew_kurt_floating_artifacts(): sr = Series([1 / 3, 4, 0, 0, 0, 0, 0]) r = sr.rolling(4) result = r.skew() - assert (result[-2:] == 0).all() + expected = Series([np.nan, np.nan, np.nan, 1.9619045191072484, 2.0, 0.0, 0.0]) + tm.assert_series_equal(result, expected) result = r.kurt() - assert (result[-2:] == -3).all() + expected = Series([np.nan, np.nan, np.nan, 3.8636048803878786, 4.0, -3.0, -3.0]) + tm.assert_series_equal(result, expected) def test_numeric_only_frame(arithmetic_win_operators, numeric_only): ++++++ pandas-pr63143.patch ++++++ >From 55864a0a2a8833b9b00b17236aa93c1b859051ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Kothe?= <[email protected]> Date: Mon, 1 Dec 2025 16:14:20 -0300 Subject: [PATCH] BUG: fix polluted window in rolling kurt (#63143) --- doc/source/whatsnew/v3.0.0.rst | 3 +- pandas/_libs/window/aggregations.pyx | 220 +++++++++++++-------------- pandas/tests/window/test_rolling.py | 27 +++- 3 files changed, 130 insertions(+), 120 deletions(-) diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx index 89530c6c9c46c..84fa55448fd76 100644 --- a/pandas/_libs/window/aggregations.pyx +++ b/pandas/_libs/window/aggregations.pyx @@ -2,7 +2,6 @@ from libc.math cimport ( fabs, - round, signbit, sqrt, ) @@ -683,30 +682,22 @@ def roll_skew(const float64_t[:] values, ndarray[int64_t] start, cdef float64_t calc_kurt(int64_t minp, int64_t nobs, - float64_t x, float64_t xx, - float64_t xxx, float64_t xxxx, + float64_t m2, float64_t m4, int64_t num_consecutive_same_value, ) noexcept nogil: cdef: - float64_t result, dnobs - float64_t A, B, C, D, R, K + float64_t result, dnobs, term1, term2, inner, correction + float64_t moments_ratio if nobs >= minp: if nobs < 4: result = NaN # GH 42064 46431 - # uniform case, force result to be -3. + # degenerate case, force result to be -3. elif num_consecutive_same_value >= nobs: result = -3. else: dnobs = <float64_t>nobs - A = x / dnobs - R = A * A - B = xx / dnobs - R - R = R * A - C = xxx / dnobs - R - 3 * A * B - R = R * A - D = xxxx / dnobs - R - 6 * B * A * A - 4 * C * A # #18044: with uniform distribution, floating issue will # cause B != 0. and cause the result is a very @@ -716,12 +707,17 @@ cdef float64_t calc_kurt(int64_t minp, int64_t nobs, # _zero_out_fperr(m2) to fix floating error. # if the variance is less than 1e-14, it could be # treat as zero, here we follow the original - # skew/kurt behaviour to check B <= 1e-14 - if B <= 1e-14: + # skew/kurt behaviour to check m2 <= n * 1e-14 + if m2 <= dnobs * 1e-14: result = NaN else: - K = (dnobs * dnobs - 1.) * D / (B * B) - 3 * ((dnobs - 1.) ** 2) - result = K / ((dnobs - 2.) * (dnobs - 3.)) + moments_ratio = m4 / (m2 * m2) + term1 = dnobs * (dnobs + 1.0) * moments_ratio + term2 = 3.0 * (dnobs - 1.0) + inner = term1 - term2 + + correction = (dnobs - 1.0) / ((dnobs - 2.0) * (dnobs - 3.0)) + result = correction * inner else: result = NaN @@ -729,39 +725,43 @@ cdef float64_t calc_kurt(int64_t minp, int64_t nobs, cdef void add_kurt(float64_t val, int64_t *nobs, - float64_t *x, float64_t *xx, - float64_t *xxx, float64_t *xxxx, - float64_t *compensation_x, - float64_t *compensation_xx, - float64_t *compensation_xxx, - float64_t *compensation_xxxx, + float64_t *mean, float64_t *m2, + float64_t *m3, float64_t *m4, + bint *numerically_unstable, int64_t *num_consecutive_same_value, float64_t *prev_value ) noexcept nogil: """ add a value from the kurotic calc """ cdef: - float64_t y, t + float64_t n, delta, delta_n, term1, m4_update, new_m4 + + # Formulas adapted from + # https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Higher-order_statistics # Not NaN if val == val: - nobs[0] = nobs[0] + 1 + nobs[0] += 1 + n = <float64_t>(nobs[0]) + delta = val - mean[0] + delta_n = delta / n + term1 = delta * delta_n * (n - 1.0) + + m4_update = delta_n * ( + -4.0 * m3[0] + + delta_n * ( + 6 * m2[0] + term1 * (n * n - 3.0 * n + 3.0) + ) + ) + new_m4 = m4[0] + m4_update + + if (fabs(m4_update) + fabs(m4[0])) * InvCondTol > fabs(new_m4): + # possible catastrophic cancellation + numerically_unstable[0] = True - y = val - compensation_x[0] - t = x[0] + y - compensation_x[0] = t - x[0] - y - x[0] = t - y = val * val - compensation_xx[0] - t = xx[0] + y - compensation_xx[0] = t - xx[0] - y - xx[0] = t - y = val * val * val - compensation_xxx[0] - t = xxx[0] + y - compensation_xxx[0] = t - xxx[0] - y - xxx[0] = t - y = val * val * val * val - compensation_xxxx[0] - t = xxxx[0] + y - compensation_xxxx[0] = t - xxxx[0] - y - xxxx[0] = t + m4[0] = new_m4 + m3[0] += delta_n * (term1 * (n - 2.0) - 3.0 * m2[0]) + m2[0] += term1 + mean[0] += delta_n # GH#42064, record num of same values to remove floating point artifacts if val == prev_value[0]: @@ -773,75 +773,60 @@ cdef void add_kurt(float64_t val, int64_t *nobs, cdef void remove_kurt(float64_t val, int64_t *nobs, - float64_t *x, float64_t *xx, - float64_t *xxx, float64_t *xxxx, - float64_t *compensation_x, - float64_t *compensation_xx, - float64_t *compensation_xxx, - float64_t *compensation_xxxx) noexcept nogil: + float64_t *mean, float64_t *m2, + float64_t *m3, float64_t *m4, + bint *numerically_unstable, + ) noexcept nogil: """ remove a value from the kurotic calc """ cdef: - float64_t y, t + float64_t n, delta, delta_n, term1, m4_update, new_m4 # Not NaN if val == val: - nobs[0] = nobs[0] - 1 + nobs[0] -= 1 + n = <float64_t>(nobs[0]) + delta = val - mean[0] + delta_n = delta / n + term1 = delta_n * delta * (n + 1.0) + + m4_update = delta_n * ( + 4.0 * m3[0] + + delta_n * ( + 6.0 * m2[0] + - term1 * (n * n + 3.0 * n + 3.0) + ) + ) + new_m4 = m4[0] + m4_update + + if (fabs(m4_update) + fabs(m4[0])) * InvCondTol > fabs(new_m4): + # possible catastrophic cancellation + numerically_unstable[0] = True - y = - val - compensation_x[0] - t = x[0] + y - compensation_x[0] = t - x[0] - y - x[0] = t - y = - val * val - compensation_xx[0] - t = xx[0] + y - compensation_xx[0] = t - xx[0] - y - xx[0] = t - y = - val * val * val - compensation_xxx[0] - t = xxx[0] + y - compensation_xxx[0] = t - xxx[0] - y - xxx[0] = t - y = - val * val * val * val - compensation_xxxx[0] - t = xxxx[0] + y - compensation_xxxx[0] = t - xxxx[0] - y - xxxx[0] = t - - -def roll_kurt(ndarray[float64_t] values, ndarray[int64_t] start, + m4[0] = new_m4 + m3[0] -= delta_n * (term1 * (n + 2.0) - 3.0 * m2[0]) + m2[0] -= term1 + mean[0] -= delta_n + + +def roll_kurt(const float64_t[:] values, ndarray[int64_t] start, ndarray[int64_t] end, int64_t minp) -> np.ndarray: cdef: Py_ssize_t i, j - float64_t val, mean_val, min_val, sum_val = 0 - float64_t compensation_xxxx_add, compensation_xxxx_remove - float64_t compensation_xxx_remove, compensation_xxx_add - float64_t compensation_xx_remove, compensation_xx_add - float64_t compensation_x_remove, compensation_x_add - float64_t x, xx, xxx, xxxx + float64_t mean, m2, m3, m4 float64_t prev_value int64_t nobs, s, e, num_consecutive_same_value - int64_t N = len(start), V = len(values), nobs_mean = 0 - ndarray[float64_t] output, values_copy + int64_t N = len(start) + ndarray[float64_t] output bint is_monotonic_increasing_bounds + bint requires_recompute, numerically_unstable = False minp = max(minp, 4) is_monotonic_increasing_bounds = is_monotonic_increasing_start_end_bounds( start, end ) output = np.empty(N, dtype=np.float64) - values_copy = np.copy(values) - min_val = np.nanmin(values) with nogil: - for i in range(0, V): - val = values_copy[i] - if val == val: - nobs_mean += 1 - sum_val += val - mean_val = sum_val / nobs_mean - # Other cases would lead to imprecision for smallest values - if min_val - mean_val > -1e4: - mean_val = round(mean_val) - for i in range(0, V): - values_copy[i] = values_copy[i] - mean_val - for i in range(0, N): s = start[i] @@ -849,49 +834,48 @@ def roll_kurt(ndarray[float64_t] values, ndarray[int64_t] start, # Over the first window, observations can only be added # never removed - if i == 0 or not is_monotonic_increasing_bounds or s >= end[i - 1]: - - prev_value = values[s] - num_consecutive_same_value = 0 - - compensation_xxxx_add = compensation_xxxx_remove = 0 - compensation_xxx_remove = compensation_xxx_add = 0 - compensation_xx_remove = compensation_xx_add = 0 - compensation_x_remove = compensation_x_add = 0 - x = xx = xxx = xxxx = 0 - nobs = 0 - for j in range(s, e): - add_kurt(values_copy[j], &nobs, &x, &xx, &xxx, &xxxx, - &compensation_x_add, &compensation_xx_add, - &compensation_xxx_add, &compensation_xxxx_add, - &num_consecutive_same_value, &prev_value) + requires_recompute = ( + i == 0 + or not is_monotonic_increasing_bounds + or s >= end[i - 1] + ) - else: + if not requires_recompute: # After the first window, observations can both be added # and removed # calculate deletes for j in range(start[i - 1], s): - remove_kurt(values_copy[j], &nobs, &x, &xx, &xxx, &xxxx, - &compensation_x_remove, &compensation_xx_remove, - &compensation_xxx_remove, &compensation_xxxx_remove) + remove_kurt(values[j], &nobs, &mean, &m2, &m3, &m4, + &numerically_unstable) # calculate adds for j in range(end[i - 1], e): - add_kurt(values_copy[j], &nobs, &x, &xx, &xxx, &xxxx, - &compensation_x_add, &compensation_xx_add, - &compensation_xxx_add, &compensation_xxxx_add, + add_kurt(values[j], &nobs, &mean, &m2, &m3, &m4, + &numerically_unstable, + &num_consecutive_same_value, &prev_value) + + if requires_recompute or numerically_unstable: + + prev_value = values[s] + num_consecutive_same_value = 0 + + mean = m2 = m3 = m4 = 0.0 + nobs = 0 + for j in range(s, e): + add_kurt(values[j], &nobs, &mean, &m2, &m3, &m4, + &numerically_unstable, &num_consecutive_same_value, &prev_value) - output[i] = calc_kurt(minp, nobs, x, xx, xxx, xxxx, + output[i] = calc_kurt(minp, nobs, m2, m4, num_consecutive_same_value) if not is_monotonic_increasing_bounds: nobs = 0 - x = 0.0 - xx = 0.0 - xxx = 0.0 - xxxx = 0.0 + mean = 0.0 + m2 = 0.0 + m3 = 0.0 + m4 = 0.0 return output diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 91e1dd1a50719..9efc3f473cfcb 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -1516,16 +1516,41 @@ def test_rolling_skew_kurt_numerical_stability(method): [3000000, 1, 1, 2, 3, 4, 999], [np.nan] * 3 + [4.0, -1.289256, -1.2, 3.999946], ), + ( + "kurt", + [1e6, -1e6, 1, 2, 3, 4, 5, 6], + [np.nan] * 3 + [1.5, 4.0, -1.2, -1.2, -1.2], + ), ], ) def test_rolling_skew_kurt_large_value_range(method, data, values): - # GH: 37557, 47461 + # GH: 37557, 47461, 61416 s = Series(data) result = getattr(s.rolling(4), method)() expected = Series(values) tm.assert_series_equal(result, expected) [email protected]("method", ["skew", "kurt"]) +def test_same_result_with_different_lengths(method): + # GH-54380 + len_smaller = 10 + len_bigger = 12 + window_size = 8 + + rng = np.random.default_rng(2) + data = rng.normal(loc=0.0, scale=1e3, size=len_bigger) + window_smaller = Series(data[:len_smaller]).rolling(window_size) + window_bigger = Series(data).rolling(window_size) + + result_smaller = getattr(window_smaller, method)() + result_bigger = getattr(window_bigger, method)() + + result_bigger_trimmed = result_bigger[:len_smaller] + + tm.assert_series_equal(result_smaller, result_bigger_trimmed, check_exact=True) + + def test_invalid_method(): with pytest.raises(ValueError, match="method must be 'table' or 'single"): Series(range(1)).rolling(1, method="foo")
