This is an automated email from the git hooks/post-receive script. yoh pushed a commit to annotated tag v0.1 in repository python-mne.
commit 70fb56b2450fee3fbd018d6d9a31c76ae98266b8 Author: Alexandre Gramfort <[email protected]> Date: Wed Feb 16 16:57:37 2011 -0500 ENH : new cluster level non-parametric statistic on 1D data --- examples/stats/plot_cluster_stats_evoked.py | 82 +++++++++++++++ mne/stats/__init__.py | 1 + mne/stats/cluster_level.py | 157 ++++++++++++++++++++++++++++ mne/stats/permutations.py | 63 ++--------- mne/stats/tests/test_cluster_level.py | 39 +++++++ mne/stats/tests/test_permutations.py | 12 +-- 6 files changed, 293 insertions(+), 61 deletions(-) diff --git a/examples/stats/plot_cluster_stats_evoked.py b/examples/stats/plot_cluster_stats_evoked.py new file mode 100644 index 0000000..7215200 --- /dev/null +++ b/examples/stats/plot_cluster_stats_evoked.py @@ -0,0 +1,82 @@ +""" +======================================================= +Permutation T-test on sensor data with 1D cluster level +======================================================= + +One tests if the evoked response is significantly different +between conditions. Multiple comparison problem is adressed +with cluster level permutation test. + +""" + +# Authors: Alexandre Gramfort <[email protected]> +# +# License: BSD (3-clause) + +print __doc__ + +import numpy as np + +import mne +from mne import fiff +from mne.stats import permutation_1d_cluster_test +from mne.datasets import sample + +############################################################################### +# Set parameters +data_path = sample.data_path('..') +raw_fname = data_path + '/MEG/sample/sample_audvis_filt-0-40_raw.fif' +event_fname = data_path + '/MEG/sample/sample_audvis_filt-0-40_raw-eve.fif' +event_id = 1 +tmin = -0.2 +tmax = 0.5 + +# Setup for reading the raw data +raw = fiff.setup_read_raw(raw_fname) +events = mne.read_events(event_fname) + +channel = 'MEG 1332' +include = [channel] + +############################################################################### +# Read epochs for the channel of interest +picks = fiff.pick_types(raw['info'], meg=False, include=include) +event_id = 1 +data1, times, channel_names = mne.read_epochs(raw, events, event_id, + tmin, tmax, picks=picks, baseline=(None, 0)) +condition1 = np.squeeze(np.array([d['epoch'] for d in data1])) # as 3D matrix + +event_id = 2 +data2, times, channel_names = mne.read_epochs(raw, events, event_id, + tmin, tmax, picks=picks, baseline=(None, 0)) +condition2 = np.squeeze(np.array([d['epoch'] for d in data2])) # as 3D matrix + +############################################################################### +# Compute statistic +threshold = 6.0 +T_obs, clusters, cluster_p_values, H0 = \ + permutation_1d_cluster_test([condition1, condition2], + n_permutations=1000, threshold=threshold, tail=1) + +############################################################################### +# Plot +import pylab as pl +pl.close('all') +pl.subplot(211) +pl.title('Channel : ' + channel) +pl.plot(times, condition1.mean(axis=0) - condition2.mean(axis=0), + label="ERF Contrast (Event 1 - Event 2)") +pl.ylabel("MEG (T / m)") +pl.legend() +pl.subplot(212) +for i_c, (start, stop) in enumerate(clusters): + if cluster_p_values[i_c] <= 0.05: + h = pl.axvspan(times[start], times[stop-1], color='r', alpha=0.3) + else: + pl.axvspan(times[start], times[stop-1], color=(0.3, 0.3, 0.3), + alpha=0.3) +hf = pl.plot(times, T_obs, 'g') +pl.legend((h, ), ('cluster p-value < 0.05', )) +pl.xlabel("time (ms)") +pl.ylabel("f-values") +pl.show() diff --git a/mne/stats/__init__.py b/mne/stats/__init__.py index 954fed4..4b782c8 100644 --- a/mne/stats/__init__.py +++ b/mne/stats/__init__.py @@ -1 +1,2 @@ from .permutations import permutation_t_test +from .cluster_level import permutation_1d_cluster_test diff --git a/mne/stats/cluster_level.py b/mne/stats/cluster_level.py new file mode 100644 index 0000000..b93c3f7 --- /dev/null +++ b/mne/stats/cluster_level.py @@ -0,0 +1,157 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +# Author: Thorsten Kranz <[email protected]> +# Alexandre Gramfort <[email protected]> +# +# License: Simplified BSD + +import numpy as np +from scipy import stats +from scipy.stats import percentileofscore + + +def f_oneway(*args): + """Call scipy.stats.f_oneway, but return only f-value""" + return stats.f_oneway(*args)[0] + + +def _find_clusters(x, threshold, tail=0): + """For a given 1d-array (test statistic), find all clusters which + are above/below a certain threshold. Returns a list of 2-tuples. + + Parameters + ---------- + x: 1D array + Data + threshold: float + Where to threshold the statistic + tail : -1 | 0 | 1 + Type of comparison + + Returns + ------- + clusters: list of tuples + Each tuple is a pair of indices (begin/end of cluster) + + Example + ------- + >>> _find_clusters([1, 2, 3, 1], 1.9, tail=1) + [(1, 3)] + >>> _find_clusters([2, 2, 3, 1], 1.9, tail=1) + [(0, 3)] + >>> _find_clusters([1, 2, 3, 2], 1.9, tail=1) + [(1, 4)] + >>> _find_clusters([1, -2, 3, 1], 1.9, tail=0) + [(1, 3)] + >>> _find_clusters([1, -2, -3, 1], -1.9, tail=-1) + [(1, 3)] + """ + if not tail in [-1, 0, 1]: + raise ValueError('invalid tail parameter') + + x = np.asanyarray(x) + + x = np.concatenate([np.array([threshold]), x, np.array([threshold])]) + if tail == -1: + x_in = (x < threshold).astype(np.int) + elif tail == 1: + x_in = (x > threshold).astype(np.int) + else: + x_in = (np.abs(x) > threshold).astype(np.int) + + x_switch = np.diff(x_in) + in_points = np.where(x_switch > 0)[0] + out_points = np.where(x_switch < 0)[0] + clusters = zip(in_points, out_points) + return clusters + + +def permutation_1d_cluster_test(X, stat_fun=f_oneway, threshold=1.67, + n_permutations=1000, tail=0): + """Cluster-level statistical permutation test + + For a list of 2d-arrays of data, e.g. power values, calculate some + statistics for each timepoint (dim 1) over groups. Do a cluster + analysis with permutation test for calculating corrected p-values + + Parameters + ---------- + X: list + List of 2d-arrays containing the data, dim 1: timepoints, dim 2: + elements of groups + stat_fun : callable + function called to calculate statistics, must accept 1d-arrays as + arguments (default: scipy.stats.f_oneway) + tail : -1 or 0 or 1 (default = 0) + If tail is 1, the statistic is thresholded above threshold. + If tail is -1, the statistic is thresholded below threshold. + If tail is 0, the statistic is thresholded on both sides of + the distribution. + + Returns + ------- + T_obs : array of shape [n_tests] + T-statistic observerd for all variables + clusters: list of tuples + Each tuple is a pair of indices (begin/end of cluster) + cluster_pv: array + P-value for each cluster + H0 : array of shape [n_permutations] + Max cluster level stats observed under permutation. + + Notes + ----- + Reference: + Cluster permutation algorithm as described in + Maris/Oostenveld (2007), + "Nonparametric statistical testing of EEG- and MEG-data" + Journal of Neuroscience Methods, Vol. 164, No. 1., pp. 177-190. + doi:10.1016/j.jneumeth.2007.03.024 + """ + X_full = np.concatenate(X, axis=0) + n_samples_total = X_full.shape[0] + n_samples_per_condition = [x.shape[0] for x in X] + + # Step 1: Calculate Anova (or other stat_fun) for original data + # ------------------------------------------------------------- + T_obs = stat_fun(*X) + + clusters = _find_clusters(T_obs, threshold, tail) + + splits_idx = np.cumsum(n_samples_per_condition)[:-1] + # Step 2: If we have some clusters, repeat process on permutated data + # ------------------------------------------------------------------- + if len(clusters) > 0: + cluster_stats = [np.sum(T_obs[c[0]:c[1]]) for c in clusters] + cluster_pv = np.ones(len(clusters), dtype=np.float) + H0 = np.zeros(n_permutations) # histogram + for i_s in range(n_permutations): + # make list of indices for random data split + indices_lists = np.split(np.random.permutation(n_samples_total), + splits_idx) + + X_shuffle_list = [X_full[indices] for indices in indices_lists] + T_obs_surr = stat_fun(*X_shuffle_list) + clusters_perm = _find_clusters(T_obs_surr, threshold, tail) + + if len(clusters_perm) > 0: + cluster_stats_perm = [np.sum(T_obs_surr[c[0]:c[1]]) + for c in clusters_perm] + H0[i_s] = max(cluster_stats_perm) + else: + H0[i_s] = 0 + + # for each cluster in original data, calculate p-value as percentile + # of its cluster statistics within all cluster statistics in surrogate + # data + cluster_pv[:] = [percentileofscore(H0, + cluster_stats[i_cl]) + for i_cl in range(len(clusters))] + cluster_pv[:] = (100.0 - cluster_pv[:]) / 100.0 # from pct to fraction + return T_obs, clusters, cluster_pv, H0 + else: + return T_obs, np.array([]), np.array([]), np.array([]) + + +permutation_1d_cluster_test.__test__ = False diff --git a/mne/stats/permutations.py b/mne/stats/permutations.py index e32be53..8b1cda7 100644 --- a/mne/stats/permutations.py +++ b/mne/stats/permutations.py @@ -78,12 +78,12 @@ def permutation_t_test(X, n_permutations=10000, tail=0): Returns ------- + T_obs : array of shape [n_tests] + T-statistic observed for all variables + p_values : array of shape [n_tests] P-values for all the tests (aka variables) - T0 : array of shape [n_tests] - T-statistic for all variables - H0 : array of shape [n_permutations] T-statistic obtained by permutations and t-max trick for multiple comparison. @@ -109,7 +109,7 @@ def permutation_t_test(X, n_permutations=10000, tail=0): mu0 = np.mean(X, axis=0) dof_scaling = sqrt(n_samples / (n_samples - 1.0)) std0 = np.sqrt(X2 - mu0**2) * dof_scaling # get std with variance splitting - T0 = np.mean(X, axis=0) / (std0 / sqrt(n_samples)) + T_obs = np.mean(X, axis=0) / (std0 / sqrt(n_samples)) if do_exact: perms = bin_perm_rep(n_samples, a=1, b=-1)[1:,:] @@ -124,60 +124,13 @@ def permutation_t_test(X, n_permutations=10000, tail=0): scaling = float(n_permutations + 1) if tail == 0: - p_values = 1.0 - np.searchsorted(H0, np.abs(T0)) / scaling + p_values = 1.0 - np.searchsorted(H0, np.abs(T_obs)) / scaling elif tail == 1: - p_values = 1.0 - np.searchsorted(H0, T0) / scaling + p_values = 1.0 - np.searchsorted(H0, T_obs) / scaling elif tail == -1: - p_values = 1.0 - np.searchsorted(H0, -T0) / scaling + p_values = 1.0 - np.searchsorted(H0, -T_obs) / scaling - return p_values, T0, H0 + return T_obs, p_values, H0 permutation_t_test.__test__ = False # for nosetests - -if __name__ == '__main__': - # 1 sample t-test - n_samples, n_tests = 30, 5 - n_permutations = 50000 - # n_permutations = 'exact' - X = np.random.randn(n_samples, n_tests) - X[:,:2] += 0.6 - p_values, T0, H0 = permutation_t_test(X, n_permutations, tail=1) - is_significant = p_values < 0.05 - print 80*"-" - print "-------- 1-sample t-test :" - print "T stats : ", T0 - print "p_values : ", p_values - print "Is significant : ", is_significant - - print 80*"-" - print "-------- Comparison analytic vs permutation :" - p_values, T0, H0 = permutation_t_test(X, n_permutations, tail=1) - print "--- permutation_t_test :" - print "T stats : ", T0 - print "p_values : ", p_values - print "Is significant : ", is_significant - - from scipy import stats - T0, p_values = stats.ttest_1samp(X[:,0], 0) - print "--- scipy.stats.ttest_1samp :" - print "T stats : ", T0 - print "p_values : ", p_values - - # 2 samples t-test - X1 = np.random.randn(n_samples, n_tests) - X2 = np.random.randn(n_samples, n_tests) - X1[:,:2] += 2 - p_values, T0, H0 = permutation_t_test(X1 - X2, n_permutations) - print 80*"-" - print "-------- 2-samples t-test :" - print "T stats : ", T0 - print "p_values : ", p_values - print "Is significant : ", is_significant - - # import pylab as pl - # pl.close('all') - # pl.hist(H0) - # y_min, y_max = pl.ylim() - # pl.vlines(T0, y_min, y_max, color='g', linewidth=2, linestyle='--') - # pl.show() diff --git a/mne/stats/tests/test_cluster_level.py b/mne/stats/tests/test_cluster_level.py new file mode 100644 index 0000000..9b6b999 --- /dev/null +++ b/mne/stats/tests/test_cluster_level.py @@ -0,0 +1,39 @@ +import numpy as np +from numpy.testing import assert_equal + +from ..cluster_level import permutation_1d_cluster_test + + +def test_permutation_t_test(): + """Test T-test based on permutations + """ + noiselevel = 20 + + normfactor = np.hanning(20).sum() + + condition1 = np.random.randn(40, 350) * noiselevel + for c in condition1: + c[:] = np.convolve(c, np.hanning(20), mode="same") / normfactor + + condition2 = np.random.randn(33, 350) * noiselevel + for c in condition2: + c[:] = np.convolve(c, np.hanning(20), mode="same") / normfactor + + pseudoekp = 5 * np.hanning(150)[None,:] + condition1[:, 100:250] += pseudoekp + condition2[:, 100:250] -= pseudoekp + + T_obs, clusters, cluster_p_values, hist = permutation_1d_cluster_test( + [condition1, condition2], n_permutations=500, + tail=1) + assert_equal(np.sum(cluster_p_values < 0.05), 1) + + T_obs, clusters, cluster_p_values, hist = permutation_1d_cluster_test( + [condition1, condition2], n_permutations=500, + tail=0) + assert_equal(np.sum(cluster_p_values < 0.05), 1) + + T_obs, clusters, cluster_p_values, hist = permutation_1d_cluster_test( + [condition1, condition2], n_permutations=500, + tail=-1) + assert_equal(np.sum(cluster_p_values < 0.05), 0) diff --git a/mne/stats/tests/test_permutations.py b/mne/stats/tests/test_permutations.py index 3161dd1..768dbf8 100644 --- a/mne/stats/tests/test_permutations.py +++ b/mne/stats/tests/test_permutations.py @@ -15,20 +15,20 @@ def test_permutation_t_test(): X = np.random.randn(n_samples, n_tests) X[:,:2] += 1 - p_values, T0, H0 = permutation_t_test(X, n_permutations=999, tail=0) + T_obs, p_values, H0 = permutation_t_test(X, n_permutations=999, tail=0) is_significant = p_values < 0.05 assert_array_equal(is_significant, [True, True, False, False, False]) - p_values, T0, H0 = permutation_t_test(X, n_permutations=999, tail=1) + T_obs, p_values, H0 = permutation_t_test(X, n_permutations=999, tail=1) is_significant = p_values < 0.05 assert_array_equal(is_significant, [True, True, False, False, False]) - p_values, T0, H0 = permutation_t_test(X, n_permutations=999, tail=-1) + T_obs, p_values, H0 = permutation_t_test(X, n_permutations=999, tail=-1) is_significant = p_values < 0.05 assert_array_equal(is_significant, [False, False, False, False, False]) X = np.random.randn(18, 1) - p_values, T0, H0 = permutation_t_test(X[:, [0]], n_permutations='all') - T0_scipy, p_values_scipy = stats.ttest_1samp(X[:, 0], 0) - assert_almost_equal(T0[0], T0_scipy, 8) + T_obs, p_values, H0 = permutation_t_test(X[:, [0]], n_permutations='all') + T_obs_scipy, p_values_scipy = stats.ttest_1samp(X[:, 0], 0) + assert_almost_equal(T_obs[0], T_obs_scipy, 8) assert_almost_equal(p_values[0], p_values_scipy, 2) -- Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/python-mne.git _______________________________________________ debian-med-commit mailing list [email protected] http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/debian-med-commit
