Revision: 8127
          http://matplotlib.svn.sourceforge.net/matplotlib/?rev=8127&view=rev
Author:   astraw
Date:     2010-02-12 02:21:05 +0000 (Fri, 12 Feb 2010)

Log Message:
-----------
Add option to bootstrap confidence intervals for boxplot (Paul Hobson)

Modified Paths:
--------------
    trunk/matplotlib/CHANGELOG
    trunk/matplotlib/lib/matplotlib/axes.py

Added Paths:
-----------
    trunk/matplotlib/examples/pylab_examples/boxplot_demo3.py

Modified: trunk/matplotlib/CHANGELOG
===================================================================
--- trunk/matplotlib/CHANGELOG  2010-02-11 13:15:28 UTC (rev 8126)
+++ trunk/matplotlib/CHANGELOG  2010-02-12 02:21:05 UTC (rev 8127)
@@ -1,3 +1,7 @@
+2010-02-11 Added 'bootstrap' option to boxplot. This allows bootstrap
+           estimates of median confidence intervals. Based on an
+           initial patch by Paul Hobson. - ADS
+
 2010-02-06 Added setup.cfg "basedirlist" option to override setting
            in setupext.py "basedir" dictionary; added "gnu0"
            platform requested by Benjamin Drung. - EF

Added: trunk/matplotlib/examples/pylab_examples/boxplot_demo3.py
===================================================================
--- trunk/matplotlib/examples/pylab_examples/boxplot_demo3.py                   
        (rev 0)
+++ trunk/matplotlib/examples/pylab_examples/boxplot_demo3.py   2010-02-12 
02:21:05 UTC (rev 8127)
@@ -0,0 +1,27 @@
+import matplotlib.pyplot as plt
+import matplotlib.transforms as mtransforms
+import numpy as np
+
+np.random.seed(2)
+inc = 0.1
+e1 = np.random.uniform(0,1, size=(500,))
+e2 = np.random.uniform(0,1, size=(500,))
+e3 = np.random.uniform(0,1 + inc, size=(500,))
+e4 = np.random.uniform(0,1 + 2*inc, size=(500,))
+
+treatments = [e1,e2,e3,e4]
+
+fig = plt.figure()
+ax = fig.add_subplot(111)
+pos = np.array(range(len(treatments)))+1
+bp = ax.boxplot( treatments, sym='k+', patch_artist=True,
+                 positions=pos, notch=1, bootstrap=5000 )
+text_transform= mtransforms.blended_transform_factory(ax.transData,
+                                                     ax.transAxes)
+ax.set_xlabel('treatment')
+ax.set_ylabel('response')
+ax.set_ylim(-0.2, 1.4)
+plt.setp(bp['whiskers'], color='k',  linestyle='-' )
+plt.setp(bp['fliers'], markersize=3.0)
+fig.subplots_adjust(right=0.99,top=0.99)
+plt.show()

Modified: trunk/matplotlib/lib/matplotlib/axes.py
===================================================================
--- trunk/matplotlib/lib/matplotlib/axes.py     2010-02-11 13:15:28 UTC (rev 
8126)
+++ trunk/matplotlib/lib/matplotlib/axes.py     2010-02-12 02:21:05 UTC (rev 
8127)
@@ -4881,7 +4881,8 @@
         return (l0, caplines, barcols)
 
     def boxplot(self, x, notch=0, sym='b+', vert=1, whis=1.5,
-                positions=None, widths=None, patch_artist=False):
+                positions=None, widths=None, patch_artist=False,
+                bootstrap=None):
         """
         call signature::
 
@@ -4910,6 +4911,16 @@
         a function of the inner quartile range.  They extend to the
         most extreme data point within ( ``whis*(75%-25%)`` ) data range.
 
+        *bootstrap* (default None) specifies whether to bootstrap the
+        confidence intervals around the median for notched
+        boxplots. If bootstrap==None, no bootstrapping is performed,
+        and notches are calculated using a Gaussian-based asymptotic
+        approximation (see McGill, R., Tukey, J.W., and Larsen, W.A.,
+        1978, and Kendall and Stuart, 1967). Otherwise, bootstrap
+        specifies the number of times to bootstrap the median to
+        determine it's 95% confidence intervals. Values between 1000
+        and 10000 are recommended.
+
         *positions* (default 1,2,...,n) sets the horizontal positions of
         the boxes. The ticks and limits are automatically set to match
         the positions.
@@ -5021,8 +5032,33 @@
                 med_x = [box_x_min, box_x_max]
             # calculate 'notch' plot
             else:
-                notch_max = med + 1.57*iq/np.sqrt(row)
-                notch_min = med - 1.57*iq/np.sqrt(row)
+                if bootstrap is not None:
+                    # Do a bootstrap estimate of notch locations.
+                    def bootstrapMedian(data, N=5000):
+                        # determine 95% confidence intervals of the median
+                        M = len(data)
+                        percentile = [2.5,97.5]
+                        estimate = np.zeros(N)
+                        for n in range(N):
+                            bsIndex = np.random.random_integers(0,M-1,M)
+                            bsData = data[bsIndex]
+                            estimate[n] = mlab.prctile(bsData, 50)
+                        CI = mlab.prctile(estimate, percentile)
+                        return CI
+
+                    # get conf. intervals around median
+                    CI = bootstrapMedian(d, N=bootstrap)
+                    notch_max = CI[1]
+                    notch_min = CI[0]
+                else:
+                    # Estimate notch locations using Gaussian-based
+                    # asymptotic approximation.
+                    #
+                    # For discussion: McGill, R., Tukey, J.W.,
+                    # and Larsen, W.A. (1978) "Variations of
+                    # Boxplots", The American Statistician, 32:12-16.
+                    notch_max = med + 1.57*iq/np.sqrt(row)
+                    notch_min = med - 1.57*iq/np.sqrt(row)
                 # make our notched box vectors
                 box_x = [box_x_min, box_x_max, box_x_max, cap_x_max, box_x_max,
                          box_x_max, box_x_min, box_x_min, cap_x_min, box_x_min,


This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.

------------------------------------------------------------------------------
SOLARIS 10 is the OS for Data Centers - provides features such as DTrace,
Predictive Self Healing and Award Winning ZFS. Get Solaris 10 NOW
http://p.sf.net/sfu/solaris-dev2dev
_______________________________________________
Matplotlib-checkins mailing list
Matplotlib-checkins@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/matplotlib-checkins

Reply via email to