https://github.com/python/cpython/commit/0823f4361850145152a94e9086bede6a000d8a4a
commit: 0823f4361850145152a94e9086bede6a000d8a4a
branch: main
author: Raymond Hettinger <rhettin...@users.noreply.github.com>
committer: rhettinger <rhettin...@users.noreply.github.com>
date: 2024-04-15T10:08:21-05:00
summary:

gh-115532: Minor tweaks to kde() (gh-117897)

files:
M Doc/library/statistics.rst
M Lib/statistics.py

diff --git a/Doc/library/statistics.rst b/Doc/library/statistics.rst
index 197c123f8356d8..873ccd650f45cd 100644
--- a/Doc/library/statistics.rst
+++ b/Doc/library/statistics.rst
@@ -1163,7 +1163,7 @@ accurately approximated inverse cumulative distribution 
function.
 .. testcode::
 
    from random import choice, random, seed
-   from math import sqrt, log, pi, tan, asin
+   from math import sqrt, log, pi, tan, asin, cos, acos
    from statistics import NormalDist
 
    kernel_invcdfs = {
@@ -1172,6 +1172,7 @@ accurately approximated inverse cumulative distribution 
function.
        'sigmoid': lambda p: log(tan(p * pi/2)),
        'rectangular': lambda p: 2*p - 1,
        'triangular': lambda p: sqrt(2*p) - 1 if p < 0.5 else 1 - sqrt(2 - 2*p),
+       'parabolic': lambda p: 2 * cos((acos(2*p-1) + pi) / 3),
        'cosine': lambda p: 2*asin(2*p - 1)/pi,
    }
 
diff --git a/Lib/statistics.py b/Lib/statistics.py
index 58fb31def8896e..fc00891b083dc3 100644
--- a/Lib/statistics.py
+++ b/Lib/statistics.py
@@ -919,13 +919,13 @@ def kde(data, h, kernel='normal', *, cumulative=False):
             sqrt2pi = sqrt(2 * pi)
             sqrt2 = sqrt(2)
             K = lambda t: exp(-1/2 * t * t) / sqrt2pi
-            I = lambda t: 1/2 * (1.0 + erf(t / sqrt2))
+            W = lambda t: 1/2 * (1.0 + erf(t / sqrt2))
             support = None
 
         case 'logistic':
             # 1.0 / (exp(t) + 2.0 + exp(-t))
             K = lambda t: 1/2 / (1.0 + cosh(t))
-            I = lambda t: 1.0 - 1.0 / (exp(t) + 1.0)
+            W = lambda t: 1.0 - 1.0 / (exp(t) + 1.0)
             support = None
 
         case 'sigmoid':
@@ -933,39 +933,39 @@ def kde(data, h, kernel='normal', *, cumulative=False):
             c1 = 1 / pi
             c2 = 2 / pi
             K = lambda t: c1 / cosh(t)
-            I = lambda t: c2 * atan(exp(t))
+            W = lambda t: c2 * atan(exp(t))
             support = None
 
         case 'rectangular' | 'uniform':
             K = lambda t: 1/2
-            I = lambda t: 1/2 * t + 1/2
+            W = lambda t: 1/2 * t + 1/2
             support = 1.0
 
         case 'triangular':
             K = lambda t: 1.0 - abs(t)
-            I = lambda t: t*t * (1/2 if t < 0.0 else -1/2) + t + 1/2
+            W = lambda t: t*t * (1/2 if t < 0.0 else -1/2) + t + 1/2
             support = 1.0
 
         case 'parabolic' | 'epanechnikov':
             K = lambda t: 3/4 * (1.0 - t * t)
-            I = lambda t: -1/4 * t**3 + 3/4 * t + 1/2
+            W = lambda t: -1/4 * t**3 + 3/4 * t + 1/2
             support = 1.0
 
         case 'quartic' | 'biweight':
             K = lambda t: 15/16 * (1.0 - t * t) ** 2
-            I = lambda t: 3/16 * t**5 - 5/8 * t**3 + 15/16 * t + 1/2
+            W = lambda t: 3/16 * t**5 - 5/8 * t**3 + 15/16 * t + 1/2
             support = 1.0
 
         case 'triweight':
             K = lambda t: 35/32 * (1.0 - t * t) ** 3
-            I = lambda t: 35/32 * (-1/7*t**7 + 3/5*t**5 - t**3 + t) + 1/2
+            W = lambda t: 35/32 * (-1/7*t**7 + 3/5*t**5 - t**3 + t) + 1/2
             support = 1.0
 
         case 'cosine':
             c1 = pi / 4
             c2 = pi / 2
             K = lambda t: c1 * cos(c2 * t)
-            I = lambda t: 1/2 * sin(c2 * t) + 1/2
+            W = lambda t: 1/2 * sin(c2 * t) + 1/2
             support = 1.0
 
         case _:
@@ -974,10 +974,14 @@ def kde(data, h, kernel='normal', *, cumulative=False):
     if support is None:
 
         def pdf(x):
+            n = len(data)
             return sum(K((x - x_i) / h) for x_i in data) / (n * h)
 
         def cdf(x):
-            return sum(I((x - x_i) / h) for x_i in data) / n
+
+            n = len(data)
+            return sum(W((x - x_i) / h) for x_i in data) / n
+
 
     else:
 
@@ -985,16 +989,24 @@ def cdf(x):
         bandwidth = h * support
 
         def pdf(x):
+            nonlocal n, sample
+            if len(data) != n:
+                sample = sorted(data)
+                n = len(data)
             i = bisect_left(sample, x - bandwidth)
             j = bisect_right(sample, x + bandwidth)
             supported = sample[i : j]
             return sum(K((x - x_i) / h) for x_i in supported) / (n * h)
 
         def cdf(x):
+            nonlocal n, sample
+            if len(data) != n:
+                sample = sorted(data)
+                n = len(data)
             i = bisect_left(sample, x - bandwidth)
             j = bisect_right(sample, x + bandwidth)
             supported = sample[i : j]
-            return sum((I((x - x_i) / h) for x_i in supported), i) / n
+            return sum((W((x - x_i) / h) for x_i in supported), i) / n
 
     if cumulative:
         cdf.__doc__ = f'CDF estimate with {h=!r} and {kernel=!r}'

_______________________________________________
Python-checkins mailing list -- python-checkins@python.org
To unsubscribe send an email to python-checkins-le...@python.org
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: arch...@mail-archive.com

Reply via email to