This is an automated email from the git hooks/post-receive script. yoh pushed a commit to annotated tag v0.1 in repository python-mne.
commit b9017b33da5fb95dca197d2e8cddb05493ec03dd Author: Alexandre Gramfort <[email protected]> Date: Mon Mar 7 11:10:10 2011 -0500 towards 2D permutation cluster stats WIP --- examples/stats/plot_cluster_stats_evoked.py | 4 +- mne/stats/__init__.py | 2 +- mne/stats/cluster_level.py | 180 +++++++++++++++------------- mne/stats/parametric.py | 79 ++++++++++++ mne/stats/tests/test_cluster_level.py | 8 +- mne/tfr.py | 24 ++++ 6 files changed, 205 insertions(+), 92 deletions(-) diff --git a/examples/stats/plot_cluster_stats_evoked.py b/examples/stats/plot_cluster_stats_evoked.py index c6dd8fd..53dbe64 100644 --- a/examples/stats/plot_cluster_stats_evoked.py +++ b/examples/stats/plot_cluster_stats_evoked.py @@ -19,7 +19,7 @@ import numpy as np import mne from mne import fiff -from mne.stats import permutation_1d_cluster_test +from mne.stats import permutation_cluster_test from mne.datasets import sample ############################################################################### @@ -55,7 +55,7 @@ condition2 = np.squeeze(np.array([d['epoch'] for d in data2])) # as 3D matrix # Compute statistic threshold = 6.0 T_obs, clusters, cluster_p_values, H0 = \ - permutation_1d_cluster_test([condition1, condition2], + permutation_cluster_test([condition1, condition2], n_permutations=1000, threshold=threshold, tail=1) ############################################################################### diff --git a/mne/stats/__init__.py b/mne/stats/__init__.py index 4b782c8..2d810de 100644 --- a/mne/stats/__init__.py +++ b/mne/stats/__init__.py @@ -1,2 +1,2 @@ from .permutations import permutation_t_test -from .cluster_level import permutation_1d_cluster_test +from .cluster_level import permutation_cluster_test diff --git a/mne/stats/cluster_level.py b/mne/stats/cluster_level.py index 70d9973..f1a5bbb 100644 --- a/mne/stats/cluster_level.py +++ b/mne/stats/cluster_level.py @@ -7,24 +7,11 @@ # License: Simplified BSD import numpy as np -from scipy import stats, ndimage +from scipy import ndimage from scipy.stats import percentileofscore -from scikits.learn.feature_selection import univariate_selection - -def f_oneway(*args): - """Call scipy.stats.f_oneway, but return only f-value""" - return univariate_selection.f_oneway(*args)[0] - # return stats.f_oneway(*args)[0] - -# def best_component(x, threshold, tail=0): -# if tail == -1: -# x_in = x < threshold -# elif tail == 1: -# x_in = x > threshold -# else: -# x_in = np.abs(x) > threshold -# labels, n_labels = ndimage.label(x_in) -# return np.max(ndimage.measurements.sum(x, labels, index=range(1, n_labels+1))) + +from .parametric import f_oneway + def _find_clusters(x, threshold, tail=0): """For a given 1d-array (test statistic), find all clusters which @@ -41,21 +28,12 @@ def _find_clusters(x, threshold, tail=0): Returns ------- - clusters: list of tuples - Each tuple is a pair of indices (begin/end of cluster) + clusters: list of slices or list of arrays (boolean masks) + We use slices for 1D signals and mask to multidimensional + arrays. - Example - ------- - >>> _find_clusters([1, 2, 3, 1], 1.9, tail=1) - [(1, 3)] - >>> _find_clusters([2, 2, 3, 1], 1.9, tail=1) - [(0, 3)] - >>> _find_clusters([1, 2, 3, 2], 1.9, tail=1) - [(1, 4)] - >>> _find_clusters([1, -2, 3, 1], 1.9, tail=0) - [(1, 3)] - >>> _find_clusters([1, -2, -3, 1], -1.9, tail=-1) - [(1, 3)] + sums: array + Sum of x values in clusters """ if not tail in [-1, 0, 1]: raise ValueError('invalid tail parameter') @@ -70,10 +48,22 @@ def _find_clusters(x, threshold, tail=0): x_in = np.abs(x) > threshold labels, n_labels = ndimage.label(x_in) - return ndimage.find_objects(labels, n_labels) + if x.ndim == 1: + clusters = ndimage.find_objects(labels, n_labels) + sums = ndimage.measurements.sum(x, labels, index=range(1, n_labels+1)) + else: + clusters = list() + sums = np.empty(n_labels) + for l in range(1, n_labels+1): + c = labels == l + clusters.append(c) + sums[l-1] = np.sum(x[c]) + + return clusters, sums -def permutation_1d_cluster_test(X, stat_fun=f_oneway, threshold=1.67, + +def permutation_cluster_test(X, stat_fun=f_oneway, threshold=1.67, n_permutations=1000, tail=0): """Cluster-level statistical permutation test @@ -116,14 +106,13 @@ def permutation_1d_cluster_test(X, stat_fun=f_oneway, threshold=1.67, doi:10.1016/j.jneumeth.2007.03.024 """ X_full = np.concatenate(X, axis=0) - n_samples_total = X_full.shape[0] n_samples_per_condition = [x.shape[0] for x in X] # Step 1: Calculate Anova (or other stat_fun) for original data # ------------------------------------------------------------- T_obs = stat_fun(*X) - clusters = _find_clusters(T_obs, threshold, tail) + clusters, cluster_stats = _find_clusters(T_obs, threshold, tail) # make list of indices for random data split splits_idx = np.append([0], np.cumsum(n_samples_per_condition)) @@ -133,19 +122,16 @@ def permutation_1d_cluster_test(X, stat_fun=f_oneway, threshold=1.67, # Step 2: If we have some clusters, repeat process on permutated data # ------------------------------------------------------------------- if len(clusters) > 0: - cluster_stats = [np.sum(T_obs[c]) for c in clusters] cluster_pv = np.ones(len(clusters), dtype=np.float) H0 = np.zeros(n_permutations) # histogram for i_s in range(n_permutations): np.random.shuffle(X_full) X_shuffle_list = [X_full[s] for s in slices] T_obs_surr = stat_fun(*X_shuffle_list) - clusters_perm = _find_clusters(T_obs_surr, threshold, tail) + _, perm_clusters_sums = _find_clusters(T_obs_surr, threshold, tail) - if len(clusters_perm) > 0: - cluster_stats_perm = [np.sum(T_obs_surr[c]) - for c in clusters_perm] - H0[i_s] = max(cluster_stats_perm) + if len(perm_clusters_sums) > 0: + H0[i_s] = np.max(perm_clusters_sums) else: H0[i_s] = 0 @@ -160,47 +146,71 @@ def permutation_1d_cluster_test(X, stat_fun=f_oneway, threshold=1.67, return T_obs, np.array([]), np.array([]), np.array([]) -permutation_1d_cluster_test.__test__ = False - -if __name__ == "__main__": - noiselevel = 30 - - normfactor = np.hanning(20).sum() - - condition1 = np.random.randn(50, 500) * noiselevel - for i in range(50): - condition1[i] = np.convolve(condition1[i], np.hanning(20), - mode="same") / normfactor - - condition2 = np.random.randn(43, 500) * noiselevel - for i in range(43): - condition2[i] = np.convolve(condition2[i], np.hanning(20), - mode="same") / normfactor - - pseudoekp = 5 * np.hanning(150)[None,:] - condition1[:, 100:250] += pseudoekp - condition2[:, 100:250] -= pseudoekp - - fs, cluster_times, cluster_p_values, histogram = permutation_1d_cluster_test( - [condition1, condition2], n_permutations=1000) - - # # Plotting for a better understanding - # import pylab as pl - # pl.close('all') - # pl.subplot(211) - # pl.plot(condition1.mean(axis=0), label="Condition 1") - # pl.plot(condition2.mean(axis=0), label="Condition 2") - # pl.ylabel("signal [a.u.]") - # pl.subplot(212) - # for i_c, c in enumerate(cluster_times): - # c = c[0] - # if cluster_p_values[i_c] <= 0.05: - # h = pl.axvspan(c.start, c.stop-1, color='r', alpha=0.3) - # else: - # pl.axvspan(c.start, c.stop-1, color=(0.3, 0.3, 0.3), alpha=0.3) - # hf = pl.plot(fs, 'g') - # pl.legend((h, ), ('cluster p-value < 0.05', )) - # pl.xlabel("time (ms)") - # pl.ylabel("f-values") - # pl.show() - +permutation_cluster_test.__test__ = False + +# if __name__ == "__main__": +# noiselevel = 30 +# np.random.seed(0) +# +# # 1D +# normfactor = np.hanning(20).sum() +# condition1 = np.random.randn(50, 300) * noiselevel +# for i in range(50): +# condition1[i] = np.convolve(condition1[i], np.hanning(20), +# mode="same") / normfactor +# condition2 = np.random.randn(43, 300) * noiselevel +# for i in range(43): +# condition2[i] = np.convolve(condition2[i], np.hanning(20), +# mode="same") / normfactor +# pseudoekp = 5 * np.hanning(150)[None,:] +# condition1[:, 100:250] += pseudoekp +# condition2[:, 100:250] -= pseudoekp +# +# # Make it 2D +# condition1 = np.tile(condition1[:,100:275,None], (1, 1, 15)) +# condition2 = np.tile(condition2[:,100:275,None], (1, 1, 15)) +# shape1 = condition1[..., :3].shape +# shape2 = condition2[..., :3].shape +# condition1[..., :3] = np.random.randn(*shape1) * noiselevel +# condition2[..., :3] = np.random.randn(*shape2) * noiselevel +# condition1[..., -3:] = np.random.randn(*shape1) * noiselevel +# condition2[..., -3:] = np.random.randn(*shape2) * noiselevel +# +# fs, clusters, cluster_p_values, histogram = permutation_cluster_test( +# [condition1, condition2], n_permutations=1000) +# +# # # Plotting for a better understanding +# # import pylab as pl +# # pl.close('all') +# # +# # if condition1.ndim == 2: +# # pl.subplot(211) +# # pl.plot(condition1.mean(axis=0), label="Condition 1") +# # pl.plot(condition2.mean(axis=0), label="Condition 2") +# # pl.ylabel("signal [a.u.]") +# # pl.subplot(212) +# # for i_c, c in enumerate(clusters): +# # c = c[0] +# # if cluster_p_values[i_c] <= 0.05: +# # h = pl.axvspan(c.start, c.stop-1, color='r', alpha=0.3) +# # else: +# # pl.axvspan(c.start, c.stop-1, color=(0.3, 0.3, 0.3), alpha=0.3) +# # hf = pl.plot(fs, 'g') +# # pl.legend((h, ), ('cluster p-value < 0.05', )) +# # pl.xlabel("time (ms)") +# # pl.ylabel("f-values") +# # else: +# # fs_plot = np.nan * np.ones_like(fs) +# # for c, p_val in zip(clusters, cluster_p_values): +# # if p_val <= 0.05: +# # fs_plot[c] = fs[c] +# # +# # pl.imshow(fs.T, cmap=pl.cm.gray) +# # pl.imshow(fs_plot.T, cmap=pl.cm.jet) +# # # pl.imshow(fs.T, cmap=pl.cm.gray, alpha=0.6) +# # # pl.imshow(fs_plot.T, cmap=pl.cm.jet, alpha=0.6) +# # pl.xlabel('time') +# # pl.ylabel('Freq') +# # pl.colorbar() +# # +# # pl.show() diff --git a/mne/stats/parametric.py b/mne/stats/parametric.py new file mode 100644 index 0000000..a489a4e --- /dev/null +++ b/mne/stats/parametric.py @@ -0,0 +1,79 @@ +import numpy as np +from scipy import stats + +# The following function is a rewriting of scipy.stats.f_oneway +# Contrary to the scipy.stats.f_oneway implementation it does not +# copy the data while keeping the inputs unchanged. +def _f_oneway(*args): + """ + Performs a 1-way ANOVA. + + The on-way ANOVA tests the null hypothesis that 2 or more groups have + the same population mean. The test is applied to samples from two or + more groups, possibly with differing sizes. + + Parameters + ---------- + sample1, sample2, ... : array_like + The sample measurements should be given as arguments. + + Returns + ------- + F-value : float + The computed F-value of the test + p-value : float + The associated p-value from the F-distribution + + Notes + ----- + The ANOVA test has important assumptions that must be satisfied in order + for the associated p-value to be valid. + + 1. The samples are independent + 2. Each sample is from a normally distributed population + 3. The population standard deviations of the groups are all equal. This + property is known as homocedasticity. + + If these assumptions are not true for a given set of data, it may still be + possible to use the Kruskal-Wallis H-test (`stats.kruskal`_) although with + some loss of power + + The algorithm is from Heiman[2], pp.394-7. + + See scipy.stats.f_oneway that should give the same results while + being less efficient + + References + ---------- + .. [1] Lowry, Richard. "Concepts and Applications of Inferential + Statistics". Chapter 14. http://faculty.vassar.edu/lowry/ch14pt1.html + + .. [2] Heiman, G.W. Research Methods in Statistics. 2002. + + """ + n_classes = len(args) + n_samples_per_class = np.array([len(a) for a in args]) + n_samples = np.sum(n_samples_per_class) + ss_alldata = reduce(lambda x, y: x+y, [np.sum(a**2, axis=0) for a in args]) + sums_args = [np.sum(a, axis=0) for a in args] + square_of_sums_alldata = reduce(lambda x, y: x+y, sums_args)**2 + square_of_sums_args = [s**2 for s in sums_args] + sstot = ss_alldata - square_of_sums_alldata / float(n_samples) + ssbn = 0 + for k, _ in enumerate(args): + ssbn += square_of_sums_args[k] / n_samples_per_class[k] + ssbn -= square_of_sums_alldata / float(n_samples) + sswn = sstot - ssbn + dfbn = n_classes - 1 + dfwn = n_samples - n_classes + msb = ssbn / float(dfbn) + msw = sswn / float(dfwn) + f = msb / msw + prob = stats.fprob(dfbn, dfwn, f) + return f, prob + + +def f_oneway(*args): + """Call scipy.stats.f_oneway, but return only f-value""" + return _f_oneway(*args)[0] + diff --git a/mne/stats/tests/test_cluster_level.py b/mne/stats/tests/test_cluster_level.py index 9b6b999..3b9e3bf 100644 --- a/mne/stats/tests/test_cluster_level.py +++ b/mne/stats/tests/test_cluster_level.py @@ -1,7 +1,7 @@ import numpy as np from numpy.testing import assert_equal -from ..cluster_level import permutation_1d_cluster_test +from ..cluster_level import permutation_cluster_test def test_permutation_t_test(): @@ -23,17 +23,17 @@ def test_permutation_t_test(): condition1[:, 100:250] += pseudoekp condition2[:, 100:250] -= pseudoekp - T_obs, clusters, cluster_p_values, hist = permutation_1d_cluster_test( + T_obs, clusters, cluster_p_values, hist = permutation_cluster_test( [condition1, condition2], n_permutations=500, tail=1) assert_equal(np.sum(cluster_p_values < 0.05), 1) - T_obs, clusters, cluster_p_values, hist = permutation_1d_cluster_test( + T_obs, clusters, cluster_p_values, hist = permutation_cluster_test( [condition1, condition2], n_permutations=500, tail=0) assert_equal(np.sum(cluster_p_values < 0.05), 1) - T_obs, clusters, cluster_p_values, hist = permutation_1d_cluster_test( + T_obs, clusters, cluster_p_values, hist = permutation_cluster_test( [condition1, condition2], n_permutations=500, tail=-1) assert_equal(np.sum(cluster_p_values < 0.05), 0) diff --git a/mne/tfr.py b/mne/tfr.py index 6a61cb4..65d42bd 100644 --- a/mne/tfr.py +++ b/mne/tfr.py @@ -189,6 +189,30 @@ def _time_frequency(X, Ws, use_fft): return psd, plf +def single_trial_power(epochs, Fs, frequencies, use_fft=True, n_cycles=25, + n_jobs=1): + """Compute time-frequency power on single epochs + """ + n_frequencies = len(frequencies) + n_epochs, n_channels, n_times = epochs.shape + + # Precompute wavelets for given frequency range to save time + Ws = morlet(Fs, frequencies, n_cycles=n_cycles) + + power = np.empty((n_epochs, n_channels, n_frequencies, n_times), + dtype=np.float) + + mode = 'same' + if use_fft: + _cwt = _cwt_fft + else: + _cwt = _cwt_convolve + + for k, e in enumerate(epochs): + mode = 'same' + power[k] = np.abs(_cwt(e, Ws, mode))**2 + + return power def time_frequency(epochs, Fs, frequencies, use_fft=True, n_cycles=25, n_jobs=1): -- Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/python-mne.git _______________________________________________ debian-med-commit mailing list [email protected] http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/debian-med-commit
