Revision: 8127 http://matplotlib.svn.sourceforge.net/matplotlib/?rev=8127&view=rev Author: astraw Date: 2010-02-12 02:21:05 +0000 (Fri, 12 Feb 2010)
Log Message: ----------- Add option to bootstrap confidence intervals for boxplot (Paul Hobson) Modified Paths: -------------- trunk/matplotlib/CHANGELOG trunk/matplotlib/lib/matplotlib/axes.py Added Paths: ----------- trunk/matplotlib/examples/pylab_examples/boxplot_demo3.py Modified: trunk/matplotlib/CHANGELOG =================================================================== --- trunk/matplotlib/CHANGELOG 2010-02-11 13:15:28 UTC (rev 8126) +++ trunk/matplotlib/CHANGELOG 2010-02-12 02:21:05 UTC (rev 8127) @@ -1,3 +1,7 @@ +2010-02-11 Added 'bootstrap' option to boxplot. This allows bootstrap + estimates of median confidence intervals. Based on an + initial patch by Paul Hobson. - ADS + 2010-02-06 Added setup.cfg "basedirlist" option to override setting in setupext.py "basedir" dictionary; added "gnu0" platform requested by Benjamin Drung. - EF Added: trunk/matplotlib/examples/pylab_examples/boxplot_demo3.py =================================================================== --- trunk/matplotlib/examples/pylab_examples/boxplot_demo3.py (rev 0) +++ trunk/matplotlib/examples/pylab_examples/boxplot_demo3.py 2010-02-12 02:21:05 UTC (rev 8127) @@ -0,0 +1,27 @@ +import matplotlib.pyplot as plt +import matplotlib.transforms as mtransforms +import numpy as np + +np.random.seed(2) +inc = 0.1 +e1 = np.random.uniform(0,1, size=(500,)) +e2 = np.random.uniform(0,1, size=(500,)) +e3 = np.random.uniform(0,1 + inc, size=(500,)) +e4 = np.random.uniform(0,1 + 2*inc, size=(500,)) + +treatments = [e1,e2,e3,e4] + +fig = plt.figure() +ax = fig.add_subplot(111) +pos = np.array(range(len(treatments)))+1 +bp = ax.boxplot( treatments, sym='k+', patch_artist=True, + positions=pos, notch=1, bootstrap=5000 ) +text_transform= mtransforms.blended_transform_factory(ax.transData, + ax.transAxes) +ax.set_xlabel('treatment') +ax.set_ylabel('response') +ax.set_ylim(-0.2, 1.4) +plt.setp(bp['whiskers'], color='k', linestyle='-' ) +plt.setp(bp['fliers'], markersize=3.0) +fig.subplots_adjust(right=0.99,top=0.99) +plt.show() Modified: trunk/matplotlib/lib/matplotlib/axes.py =================================================================== --- trunk/matplotlib/lib/matplotlib/axes.py 2010-02-11 13:15:28 UTC (rev 8126) +++ trunk/matplotlib/lib/matplotlib/axes.py 2010-02-12 02:21:05 UTC (rev 8127) @@ -4881,7 +4881,8 @@ return (l0, caplines, barcols) def boxplot(self, x, notch=0, sym='b+', vert=1, whis=1.5, - positions=None, widths=None, patch_artist=False): + positions=None, widths=None, patch_artist=False, + bootstrap=None): """ call signature:: @@ -4910,6 +4911,16 @@ a function of the inner quartile range. They extend to the most extreme data point within ( ``whis*(75%-25%)`` ) data range. + *bootstrap* (default None) specifies whether to bootstrap the + confidence intervals around the median for notched + boxplots. If bootstrap==None, no bootstrapping is performed, + and notches are calculated using a Gaussian-based asymptotic + approximation (see McGill, R., Tukey, J.W., and Larsen, W.A., + 1978, and Kendall and Stuart, 1967). Otherwise, bootstrap + specifies the number of times to bootstrap the median to + determine it's 95% confidence intervals. Values between 1000 + and 10000 are recommended. + *positions* (default 1,2,...,n) sets the horizontal positions of the boxes. The ticks and limits are automatically set to match the positions. @@ -5021,8 +5032,33 @@ med_x = [box_x_min, box_x_max] # calculate 'notch' plot else: - notch_max = med + 1.57*iq/np.sqrt(row) - notch_min = med - 1.57*iq/np.sqrt(row) + if bootstrap is not None: + # Do a bootstrap estimate of notch locations. + def bootstrapMedian(data, N=5000): + # determine 95% confidence intervals of the median + M = len(data) + percentile = [2.5,97.5] + estimate = np.zeros(N) + for n in range(N): + bsIndex = np.random.random_integers(0,M-1,M) + bsData = data[bsIndex] + estimate[n] = mlab.prctile(bsData, 50) + CI = mlab.prctile(estimate, percentile) + return CI + + # get conf. intervals around median + CI = bootstrapMedian(d, N=bootstrap) + notch_max = CI[1] + notch_min = CI[0] + else: + # Estimate notch locations using Gaussian-based + # asymptotic approximation. + # + # For discussion: McGill, R., Tukey, J.W., + # and Larsen, W.A. (1978) "Variations of + # Boxplots", The American Statistician, 32:12-16. + notch_max = med + 1.57*iq/np.sqrt(row) + notch_min = med - 1.57*iq/np.sqrt(row) # make our notched box vectors box_x = [box_x_min, box_x_max, box_x_max, cap_x_max, box_x_max, box_x_max, box_x_min, box_x_min, cap_x_min, box_x_min, This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. ------------------------------------------------------------------------------ SOLARIS 10 is the OS for Data Centers - provides features such as DTrace, Predictive Self Healing and Award Winning ZFS. Get Solaris 10 NOW http://p.sf.net/sfu/solaris-dev2dev _______________________________________________ Matplotlib-checkins mailing list Matplotlib-checkins@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/matplotlib-checkins