[med-svn] [python-mne] 79/376: ENH : new cluster level non-parametric statistic on 1D data

Yaroslav Halchenko Fri, 27 Nov 2015 09:24:37 -0800

This is an automated email from the git hooks/post-receive script.

yoh pushed a commit to annotated tag v0.1
in repository python-mne.


commit 70fb56b2450fee3fbd018d6d9a31c76ae98266b8
Author: Alexandre Gramfort <[email protected]>
Date:   Wed Feb 16 16:57:37 2011 -0500

    ENH : new cluster level non-parametric statistic on 1D data
---
 examples/stats/plot_cluster_stats_evoked.py |  82 +++++++++++++++
 mne/stats/__init__.py                       |   1 +
 mne/stats/cluster_level.py                  | 157 ++++++++++++++++++++++++++++
 mne/stats/permutations.py                   |  63 ++---------
 mne/stats/tests/test_cluster_level.py       |  39 +++++++
 mne/stats/tests/test_permutations.py        |  12 +--
 6 files changed, 293 insertions(+), 61 deletions(-)

diff --git a/examples/stats/plot_cluster_stats_evoked.py 
b/examples/stats/plot_cluster_stats_evoked.py
new file mode 100644
index 0000000..7215200
--- /dev/null
+++ b/examples/stats/plot_cluster_stats_evoked.py
@@ -0,0 +1,82 @@
+"""
+=======================================================
+Permutation T-test on sensor data with 1D cluster level
+=======================================================
+
+One tests if the evoked response is significantly different
+between conditions. Multiple comparison problem is adressed
+with cluster level permutation test.
+
+"""
+
+# Authors: Alexandre Gramfort <[email protected]>
+#
+# License: BSD (3-clause)
+
+print __doc__
+
+import numpy as np
+
+import mne
+from mne import fiff
+from mne.stats import permutation_1d_cluster_test
+from mne.datasets import sample
+
+###############################################################################
+# Set parameters
+data_path = sample.data_path('..')
+raw_fname = data_path + '/MEG/sample/sample_audvis_filt-0-40_raw.fif'
+event_fname = data_path + '/MEG/sample/sample_audvis_filt-0-40_raw-eve.fif'
+event_id = 1
+tmin = -0.2
+tmax = 0.5
+
+#   Setup for reading the raw data
+raw = fiff.setup_read_raw(raw_fname)
+events = mne.read_events(event_fname)
+
+channel = 'MEG 1332'
+include = [channel]
+
+###############################################################################
+# Read epochs for the channel of interest
+picks = fiff.pick_types(raw['info'], meg=False, include=include)
+event_id = 1
+data1, times, channel_names = mne.read_epochs(raw, events, event_id,
+                            tmin, tmax, picks=picks, baseline=(None, 0))
+condition1 = np.squeeze(np.array([d['epoch'] for d in data1])) # as 3D matrix
+
+event_id = 2
+data2, times, channel_names = mne.read_epochs(raw, events, event_id,
+                            tmin, tmax, picks=picks, baseline=(None, 0))
+condition2 = np.squeeze(np.array([d['epoch'] for d in data2])) # as 3D matrix
+
+###############################################################################
+# Compute statistic
+threshold = 6.0
+T_obs, clusters, cluster_p_values, H0 = \
+                permutation_1d_cluster_test([condition1, condition2],
+                            n_permutations=1000, threshold=threshold, tail=1)
+
+###############################################################################
+# Plot
+import pylab as pl
+pl.close('all')
+pl.subplot(211)
+pl.title('Channel : ' + channel)
+pl.plot(times, condition1.mean(axis=0) - condition2.mean(axis=0),
+        label="ERF Contrast (Event 1 - Event 2)")
+pl.ylabel("MEG (T / m)")
+pl.legend()
+pl.subplot(212)
+for i_c, (start, stop) in enumerate(clusters):
+    if cluster_p_values[i_c] <= 0.05:
+        h = pl.axvspan(times[start], times[stop-1], color='r', alpha=0.3)
+    else:
+        pl.axvspan(times[start], times[stop-1], color=(0.3, 0.3, 0.3),
+                   alpha=0.3)
+hf = pl.plot(times, T_obs, 'g')
+pl.legend((h, ), ('cluster p-value < 0.05', ))
+pl.xlabel("time (ms)")
+pl.ylabel("f-values")
+pl.show()
diff --git a/mne/stats/__init__.py b/mne/stats/__init__.py
index 954fed4..4b782c8 100644
--- a/mne/stats/__init__.py
+++ b/mne/stats/__init__.py
@@ -1 +1,2 @@
 from .permutations import permutation_t_test
+from .cluster_level import permutation_1d_cluster_test
diff --git a/mne/stats/cluster_level.py b/mne/stats/cluster_level.py
new file mode 100644
index 0000000..b93c3f7
--- /dev/null
+++ b/mne/stats/cluster_level.py
@@ -0,0 +1,157 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# Author: Thorsten Kranz <[email protected]>
+#         Alexandre Gramfort <[email protected]>
+#
+# License: Simplified BSD
+
+import numpy as np
+from scipy import stats
+from scipy.stats import percentileofscore
+
+
+def f_oneway(*args):
+    """Call scipy.stats.f_oneway, but return only f-value"""
+    return stats.f_oneway(*args)[0]
+
+
+def _find_clusters(x, threshold, tail=0):
+    """For a given 1d-array (test statistic), find all clusters which
+    are above/below a certain threshold. Returns a list of 2-tuples.
+
+    Parameters
+    ----------
+    x: 1D array
+        Data
+    threshold: float
+        Where to threshold the statistic
+    tail : -1 | 0 | 1
+        Type of comparison
+
+    Returns
+    -------
+    clusters: list of tuples
+        Each tuple is a pair of indices (begin/end of cluster)
+
+    Example
+    -------
+    >>> _find_clusters([1, 2, 3, 1], 1.9, tail=1)
+    [(1, 3)]
+    >>> _find_clusters([2, 2, 3, 1], 1.9, tail=1)
+    [(0, 3)]
+    >>> _find_clusters([1, 2, 3, 2], 1.9, tail=1)
+    [(1, 4)]
+    >>> _find_clusters([1, -2, 3, 1], 1.9, tail=0)
+    [(1, 3)]
+    >>> _find_clusters([1, -2, -3, 1], -1.9, tail=-1)
+    [(1, 3)]
+    """
+    if not tail in [-1, 0, 1]:
+        raise ValueError('invalid tail parameter')
+
+    x = np.asanyarray(x)
+
+    x = np.concatenate([np.array([threshold]), x, np.array([threshold])])
+    if tail == -1:
+        x_in = (x < threshold).astype(np.int)
+    elif tail == 1:
+        x_in = (x > threshold).astype(np.int)
+    else:
+        x_in = (np.abs(x) > threshold).astype(np.int)
+
+    x_switch = np.diff(x_in)
+    in_points = np.where(x_switch > 0)[0]
+    out_points = np.where(x_switch < 0)[0]
+    clusters = zip(in_points, out_points)
+    return clusters
+
+
+def permutation_1d_cluster_test(X, stat_fun=f_oneway, threshold=1.67,
+                             n_permutations=1000, tail=0):
+    """Cluster-level statistical permutation test
+
+    For a list of 2d-arrays of data, e.g. power values, calculate some
+    statistics for each timepoint (dim 1) over groups.  Do a cluster
+    analysis with permutation test for calculating corrected p-values
+
+    Parameters
+    ----------
+    X: list
+        List of 2d-arrays containing the data, dim 1: timepoints, dim 2:
+        elements of groups
+    stat_fun : callable
+        function called to calculate statistics, must accept 1d-arrays as
+        arguments (default: scipy.stats.f_oneway)
+    tail : -1 or 0 or 1 (default = 0)
+        If tail is 1, the statistic is thresholded above threshold.
+        If tail is -1, the statistic is thresholded below threshold.
+        If tail is 0, the statistic is thresholded on both sides of
+        the distribution.
+
+    Returns
+    -------
+    T_obs : array of shape [n_tests]
+        T-statistic observerd for all variables
+    clusters: list of tuples
+        Each tuple is a pair of indices (begin/end of cluster)
+    cluster_pv: array
+        P-value for each cluster
+    H0 : array of shape [n_permutations]
+        Max cluster level stats observed under permutation.
+
+    Notes
+    -----
+    Reference:
+    Cluster permutation algorithm as described in
+    Maris/Oostenveld (2007),
+    "Nonparametric statistical testing of EEG- and MEG-data"
+    Journal of Neuroscience Methods, Vol. 164, No. 1., pp. 177-190.
+    doi:10.1016/j.jneumeth.2007.03.024
+    """
+    X_full = np.concatenate(X, axis=0)
+    n_samples_total = X_full.shape[0]
+    n_samples_per_condition = [x.shape[0] for x in X]
+
+    # Step 1: Calculate Anova (or other stat_fun) for original data
+    # -------------------------------------------------------------
+    T_obs = stat_fun(*X)
+
+    clusters = _find_clusters(T_obs, threshold, tail)
+
+    splits_idx = np.cumsum(n_samples_per_condition)[:-1]
+    # Step 2: If we have some clusters, repeat process on permutated data
+    # -------------------------------------------------------------------
+    if len(clusters) > 0:
+        cluster_stats = [np.sum(T_obs[c[0]:c[1]]) for c in clusters]
+        cluster_pv = np.ones(len(clusters), dtype=np.float)
+        H0 = np.zeros(n_permutations) # histogram
+        for i_s in range(n_permutations):
+            # make list of indices for random data split
+            indices_lists = np.split(np.random.permutation(n_samples_total),
+                                     splits_idx)
+
+            X_shuffle_list = [X_full[indices] for indices in indices_lists]
+            T_obs_surr = stat_fun(*X_shuffle_list)
+            clusters_perm = _find_clusters(T_obs_surr, threshold, tail)
+
+            if len(clusters_perm) > 0:
+                cluster_stats_perm = [np.sum(T_obs_surr[c[0]:c[1]])
+                                      for c in clusters_perm]
+                H0[i_s] = max(cluster_stats_perm)
+            else:
+                H0[i_s] = 0
+
+        # for each cluster in original data, calculate p-value as percentile
+        # of its cluster statistics within all cluster statistics in surrogate
+        # data
+        cluster_pv[:] = [percentileofscore(H0,
+                                           cluster_stats[i_cl])
+                          for i_cl in range(len(clusters))]
+        cluster_pv[:] = (100.0 - cluster_pv[:]) / 100.0 # from pct to fraction
+        return T_obs, clusters, cluster_pv, H0
+    else:
+        return T_obs, np.array([]), np.array([]), np.array([])
+
+
+permutation_1d_cluster_test.__test__ = False
diff --git a/mne/stats/permutations.py b/mne/stats/permutations.py
index e32be53..8b1cda7 100644
--- a/mne/stats/permutations.py
+++ b/mne/stats/permutations.py
@@ -78,12 +78,12 @@ def permutation_t_test(X, n_permutations=10000, tail=0):
 
     Returns
     -------
+    T_obs : array of shape [n_tests]
+        T-statistic observed for all variables
+
     p_values : array of shape [n_tests]
         P-values for all the tests (aka variables)
 
-    T0 : array of shape [n_tests]
-        T-statistic for all variables
-
     H0 : array of shape [n_permutations]
         T-statistic obtained by permutations and t-max trick for multiple
         comparison.
@@ -109,7 +109,7 @@ def permutation_t_test(X, n_permutations=10000, tail=0):
     mu0 = np.mean(X, axis=0)
     dof_scaling = sqrt(n_samples / (n_samples - 1.0))
     std0 = np.sqrt(X2 - mu0**2) * dof_scaling # get std with variance splitting
-    T0 = np.mean(X, axis=0) / (std0 / sqrt(n_samples))
+    T_obs = np.mean(X, axis=0) / (std0 / sqrt(n_samples))
 
     if do_exact:
         perms = bin_perm_rep(n_samples, a=1, b=-1)[1:,:]
@@ -124,60 +124,13 @@ def permutation_t_test(X, n_permutations=10000, tail=0):
     scaling = float(n_permutations + 1)
 
     if tail == 0:
-        p_values = 1.0 - np.searchsorted(H0, np.abs(T0)) / scaling
+        p_values = 1.0 - np.searchsorted(H0, np.abs(T_obs)) / scaling
     elif tail == 1:
-        p_values = 1.0 - np.searchsorted(H0, T0) / scaling
+        p_values = 1.0 - np.searchsorted(H0, T_obs) / scaling
     elif tail == -1:
-        p_values = 1.0 - np.searchsorted(H0, -T0) / scaling
+        p_values = 1.0 - np.searchsorted(H0, -T_obs) / scaling
 
-    return p_values, T0, H0
+    return T_obs, p_values, H0
 
 permutation_t_test.__test__ = False # for nosetests
 
-
-if __name__ == '__main__':
-    # 1 sample t-test
-    n_samples, n_tests = 30, 5
-    n_permutations = 50000
-    # n_permutations = 'exact'
-    X = np.random.randn(n_samples, n_tests)
-    X[:,:2] += 0.6
-    p_values, T0, H0 = permutation_t_test(X, n_permutations, tail=1)
-    is_significant = p_values < 0.05
-    print 80*"-"
-    print "-------- 1-sample t-test :"
-    print "T stats : ", T0
-    print "p_values : ", p_values
-    print "Is significant : ", is_significant
-
-    print 80*"-"
-    print "-------- Comparison analytic vs permutation :"
-    p_values, T0, H0 = permutation_t_test(X, n_permutations, tail=1)
-    print "--- permutation_t_test :"
-    print "T stats : ", T0
-    print "p_values : ", p_values
-    print "Is significant : ", is_significant
-
-    from scipy import stats
-    T0, p_values = stats.ttest_1samp(X[:,0], 0)
-    print "--- scipy.stats.ttest_1samp :"
-    print "T stats : ", T0
-    print "p_values : ", p_values
-
-    # 2 samples t-test
-    X1 = np.random.randn(n_samples, n_tests)
-    X2 = np.random.randn(n_samples, n_tests)
-    X1[:,:2] += 2
-    p_values, T0, H0 = permutation_t_test(X1 - X2, n_permutations)
-    print 80*"-"
-    print "-------- 2-samples t-test :"
-    print "T stats : ", T0
-    print "p_values : ", p_values
-    print "Is significant : ", is_significant
-
-    # import pylab as pl
-    # pl.close('all')
-    # pl.hist(H0)
-    # y_min, y_max = pl.ylim()
-    # pl.vlines(T0, y_min, y_max, color='g', linewidth=2, linestyle='--')
-    # pl.show()
diff --git a/mne/stats/tests/test_cluster_level.py 
b/mne/stats/tests/test_cluster_level.py
new file mode 100644
index 0000000..9b6b999
--- /dev/null
+++ b/mne/stats/tests/test_cluster_level.py
@@ -0,0 +1,39 @@
+import numpy as np
+from numpy.testing import assert_equal
+
+from ..cluster_level import permutation_1d_cluster_test
+
+
+def test_permutation_t_test():
+    """Test T-test based on permutations
+    """
+    noiselevel = 20
+
+    normfactor = np.hanning(20).sum()
+
+    condition1 = np.random.randn(40, 350) * noiselevel
+    for c in condition1:
+        c[:] = np.convolve(c, np.hanning(20), mode="same") / normfactor
+
+    condition2 = np.random.randn(33, 350) * noiselevel
+    for c in condition2:
+        c[:] = np.convolve(c, np.hanning(20), mode="same") / normfactor
+
+    pseudoekp = 5 * np.hanning(150)[None,:]
+    condition1[:, 100:250] += pseudoekp
+    condition2[:, 100:250] -= pseudoekp
+
+    T_obs, clusters, cluster_p_values, hist = permutation_1d_cluster_test(
+                                [condition1, condition2], n_permutations=500,
+                                tail=1)
+    assert_equal(np.sum(cluster_p_values < 0.05), 1)
+
+    T_obs, clusters, cluster_p_values, hist = permutation_1d_cluster_test(
+                                [condition1, condition2], n_permutations=500,
+                                tail=0)
+    assert_equal(np.sum(cluster_p_values < 0.05), 1)
+
+    T_obs, clusters, cluster_p_values, hist = permutation_1d_cluster_test(
+                                [condition1, condition2], n_permutations=500,
+                                tail=-1)
+    assert_equal(np.sum(cluster_p_values < 0.05), 0)
diff --git a/mne/stats/tests/test_permutations.py 
b/mne/stats/tests/test_permutations.py
index 3161dd1..768dbf8 100644
--- a/mne/stats/tests/test_permutations.py
+++ b/mne/stats/tests/test_permutations.py
@@ -15,20 +15,20 @@ def test_permutation_t_test():
     X = np.random.randn(n_samples, n_tests)
     X[:,:2] += 1
 
-    p_values, T0, H0 = permutation_t_test(X, n_permutations=999, tail=0)
+    T_obs, p_values, H0 = permutation_t_test(X, n_permutations=999, tail=0)
     is_significant = p_values < 0.05
     assert_array_equal(is_significant, [True, True, False, False, False])
 
-    p_values, T0, H0 = permutation_t_test(X, n_permutations=999, tail=1)
+    T_obs, p_values, H0 = permutation_t_test(X, n_permutations=999, tail=1)
     is_significant = p_values < 0.05
     assert_array_equal(is_significant, [True, True, False, False, False])
 
-    p_values, T0, H0 = permutation_t_test(X, n_permutations=999, tail=-1)
+    T_obs, p_values, H0 = permutation_t_test(X, n_permutations=999, tail=-1)
     is_significant = p_values < 0.05
     assert_array_equal(is_significant, [False, False, False, False, False])
 
     X = np.random.randn(18, 1)
-    p_values, T0, H0 = permutation_t_test(X[:, [0]], n_permutations='all')
-    T0_scipy, p_values_scipy = stats.ttest_1samp(X[:, 0], 0)
-    assert_almost_equal(T0[0], T0_scipy, 8)
+    T_obs, p_values, H0 = permutation_t_test(X[:, [0]], n_permutations='all')
+    T_obs_scipy, p_values_scipy = stats.ttest_1samp(X[:, 0], 0)
+    assert_almost_equal(T_obs[0], T_obs_scipy, 8)
     assert_almost_equal(p_values[0], p_values_scipy, 2)

-- 
Alioth's /usr/local/bin/git-commit-notice on 
/srv/git.debian.org/git/debian-med/python-mne.git

_______________________________________________
debian-med-commit mailing list
[email protected]
http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/debian-med-commit

[med-svn] [python-mne] 79/376: ENH : new cluster level non-parametric statistic on 1D data

Reply via email to