From e15292fc0bc8d5e32dd6d6099a839bf810963f3a Mon Sep 17 00:00:00 2001 From: Stefan Appelhoff Date: Mon, 28 Oct 2024 16:13:09 +0100 Subject: [PATCH] eegbci api: allow downloading multiple subjects (#12918) --- doc/changes/devel/12918.apichange.rst | 1 + doc/documentation/datasets.rst | 18 +++-- examples/decoding/decoding_csp_eeg.py | 11 +-- examples/preprocessing/eeg_bridging.py | 2 +- examples/preprocessing/muscle_ica.py | 2 +- .../time_frequency/time_frequency_erds.py | 2 +- mne/datasets/eegbci/eegbci.py | 80 +++++++++++++------ mne/datasets/eegbci/tests/test_eegbci.py | 18 ++++- mne/datasets/utils.py | 5 +- tutorials/forward/35_eeg_no_mri.py | 2 +- .../40_artifact_correction_ica.py | 4 +- 11 files changed, 98 insertions(+), 47 deletions(-) create mode 100644 doc/changes/devel/12918.apichange.rst diff --git a/doc/changes/devel/12918.apichange.rst b/doc/changes/devel/12918.apichange.rst new file mode 100644 index 00000000000..958662b1b6f --- /dev/null +++ b/doc/changes/devel/12918.apichange.rst @@ -0,0 +1 @@ +Deprecate ``subject`` parameter in favor of ``subjects`` in :func:`mne.datasets.eegbci.load_data`, by `Stefan Appelhoff`_. diff --git a/doc/documentation/datasets.rst b/doc/documentation/datasets.rst index 70da39cccd8..0a0280ae5ac 100644 --- a/doc/documentation/datasets.rst +++ b/doc/documentation/datasets.rst @@ -161,17 +161,23 @@ EEGBCI motor imagery ==================== :func:`mne.datasets.eegbci.load_data` -The EEGBCI dataset is documented in :footcite:`SchalkEtAl2004`. The data set is -available at PhysioNet :footcite:`GoldbergerEtAl2000`. The dataset contains -64-channel EEG recordings from 109 subjects and 14 runs on each subject in EDF+ -format. The recordings were made using the BCI2000 system. To load a subject, -do:: +The EEGBCI dataset is documented in :footcite:`SchalkEtAl2004` and on the +`PhysioNet documentation page `_. +The data set is available at PhysioNet :footcite:`GoldbergerEtAl2000`. +It contains 64-channel EEG recordings from 109 subjects and 14 runs on each +subject in EDF+ format. The recordings were made using the BCI2000 system. +To load a subject, do:: from mne.io import concatenate_raws, read_raw_edf from mne.datasets import eegbci - raw_fnames = eegbci.load_data(subject, runs) + subjects = [1] # may vary + runs = [4, 8, 12] # may vary + raw_fnames = eegbci.load_data(subjects, runs) raws = [read_raw_edf(f, preload=True) for f in raw_fnames] + # concatenate runs from subject raw = concatenate_raws(raws) + # make channel names follow standard conventions + eegbci.standardize(raw) .. topic:: Examples diff --git a/examples/decoding/decoding_csp_eeg.py b/examples/decoding/decoding_csp_eeg.py index 2ffd18d34b4..5859edde166 100644 --- a/examples/decoding/decoding_csp_eeg.py +++ b/examples/decoding/decoding_csp_eeg.py @@ -10,8 +10,9 @@ See https://en.wikipedia.org/wiki/Common_spatial_pattern and :footcite:`Koles1991`. The EEGBCI dataset is documented in -:footcite:`SchalkEtAl2004` and is available at PhysioNet -:footcite:`GoldbergerEtAl2000`. +:footcite:`SchalkEtAl2004` and on the +`PhysioNet documentation page `_. +The dataset is available at PhysioNet :footcite:`GoldbergerEtAl2000`. """ # Authors: Martin Billinger # @@ -40,15 +41,15 @@ # avoid classification of evoked responses by using epochs that start 1s after # cue onset. tmin, tmax = -1.0, 4.0 -subject = 1 +subjects = 1 runs = [6, 10, 14] # motor imagery: hands vs feet -raw_fnames = eegbci.load_data(subject, runs) +raw_fnames = eegbci.load_data(subjects, runs) raw = concatenate_raws([read_raw_edf(f, preload=True) for f in raw_fnames]) eegbci.standardize(raw) # set channel names montage = make_standard_montage("standard_1005") raw.set_montage(montage) -raw.annotations.rename(dict(T1="hands", T2="feet")) +raw.annotations.rename(dict(T1="hands", T2="feet")) # as documented on PhysioNet raw.set_eeg_reference(projection=True) # Apply band-pass filter diff --git a/examples/preprocessing/eeg_bridging.py b/examples/preprocessing/eeg_bridging.py index b0eb50a039d..37d85c55df6 100644 --- a/examples/preprocessing/eeg_bridging.py +++ b/examples/preprocessing/eeg_bridging.py @@ -65,7 +65,7 @@ raw_data = dict() # store infos for electrode positions for sub in range(1, 11): print(f"Computing electrode bridges for subject {sub}") - raw_fname = mne.datasets.eegbci.load_data(subject=sub, runs=(1,))[0] + raw_fname = mne.datasets.eegbci.load_data(subjects=sub, runs=(1,))[0] raw = mne.io.read_raw(raw_fname, preload=True, verbose=False) mne.datasets.eegbci.standardize(raw) # set channel names raw.set_montage(montage, verbose=False) diff --git a/examples/preprocessing/muscle_ica.py b/examples/preprocessing/muscle_ica.py index 64c14f5f5af..f61d1e22bc4 100644 --- a/examples/preprocessing/muscle_ica.py +++ b/examples/preprocessing/muscle_ica.py @@ -97,7 +97,7 @@ for sub in (1, 2): raw = mne.io.read_raw_edf( - mne.datasets.eegbci.load_data(subject=sub, runs=(1,))[0], preload=True + mne.datasets.eegbci.load_data(subjects=sub, runs=(1,))[0], preload=True ) mne.datasets.eegbci.standardize(raw) # set channel names montage = mne.channels.make_standard_montage("standard_1005") diff --git a/examples/time_frequency/time_frequency_erds.py b/examples/time_frequency/time_frequency_erds.py index 1d805121739..93272eb7aa3 100644 --- a/examples/time_frequency/time_frequency_erds.py +++ b/examples/time_frequency/time_frequency_erds.py @@ -50,7 +50,7 @@ # First, we load and preprocess the data. We use runs 6, 10, and 14 from # subject 1 (these runs contains hand and feet motor imagery). -fnames = eegbci.load_data(subject=1, runs=(6, 10, 14)) +fnames = eegbci.load_data(subjects=1, runs=(6, 10, 14)) raw = concatenate_raws([read_raw_edf(f, preload=True) for f in fnames]) raw.rename_channels(lambda x: x.strip(".")) # remove dots from channel names diff --git a/mne/datasets/eegbci/eegbci.py b/mne/datasets/eegbci/eegbci.py index 83dde035d1b..91d78f57a03 100644 --- a/mne/datasets/eegbci/eegbci.py +++ b/mne/datasets/eegbci/eegbci.py @@ -9,7 +9,7 @@ from os import path as op from pathlib import Path -from ...utils import _url_to_local_path, logger, verbose +from ...utils import _url_to_local_path, logger, verbose, warn from ..utils import _do_path_update, _downloader_params, _get_path, _log_time_size EEGMI_URL = "https://physionet.org/files/eegmmidb/1.0.0/" @@ -21,7 +21,9 @@ def data_path(url, path=None, force_update=False, update_path=None, *, verbose=N This is a low-level function useful for getting a local copy of a remote EEGBCI dataset :footcite:`SchalkEtAl2004`, which is also available at PhysioNet - :footcite:`GoldbergerEtAl2000`. + :footcite:`GoldbergerEtAl2000`. Metadata, such as the meaning of event markers + may be obtained from the + `PhysioNet documentation page `_. Parameters ---------- @@ -92,8 +94,10 @@ def data_path(url, path=None, force_update=False, update_path=None, *, verbose=N @verbose def load_data( - subject, - runs, + subjects=None, + runs=None, + *, + subject=None, path=None, force_update=False, update_path=None, @@ -103,14 +107,19 @@ def load_data( """Get paths to local copies of EEGBCI dataset files. This will fetch data for the EEGBCI dataset :footcite:`SchalkEtAl2004`, which is - also available at PhysioNet :footcite:`GoldbergerEtAl2000`. + also available at PhysioNet :footcite:`GoldbergerEtAl2000`. Metadata, such as the + meaning of event markers may be obtained from the + `PhysioNet documentation page `_. Parameters ---------- - subject : int - The subject to use. Can be in the range of 1-109 (inclusive). + subjects : int | list of int + The subjects to use. Can be in the range of 1-109 (inclusive). runs : int | list of int The runs to use (see Notes for details). + subject : int + This parameter is deprecated and will be removed in mne version 1.9. + Please use ``subjects`` instead. path : None | path-like Location of where to look for the EEGBCI data. If ``None``, the environment variable or config parameter ``MNE_DATASETS_EEGBCI_PATH`` is used. If neither @@ -149,11 +158,11 @@ def load_data( For example, one could do:: >>> from mne.datasets import eegbci - >>> eegbci.load_data(1, [6, 10, 14], "~/datasets") # doctest:+SKIP + >>> eegbci.load_data([1, 2], [6, 10, 14], "~/datasets") # doctest:+SKIP - This would download runs 6, 10, and 14 (hand/foot motor imagery) runs from subject 1 - in the EEGBCI dataset to "~/datasets" and prompt the user to store this path in the - config (if it does not already exist). + This would download runs 6, 10, and 14 (hand/foot motor imagery) runs from subjects + 1 and 2 in the EEGBCI dataset to "~/datasets" and prompt the user to store this path + in the config (if it does not already exist). References ---------- @@ -161,8 +170,27 @@ def load_data( """ import pooch + # XXX: Remove this with mne 1.9 ↓↓↓ + # Also remove the subject parameter at that point. + # Also remove the `None` default for subjects and runs params at that point. + if subject is not None: + subjects = subject + warn( + "The ``subject`` parameter is deprecated and will be removed in version " + "1.9. Use the ``subjects`` parameter (note the `s`) to suppress this " + "warning.", + FutureWarning, + ) + del subject + if subjects is None or runs is None: + raise ValueError("You must pass the parameters ``subjects`` and ``runs``.") + # ↑↑↑ + t0 = time.time() + if not hasattr(subjects, "__iter__"): + subjects = [subjects] + if not hasattr(runs, "__iter__"): runs = [runs] @@ -198,20 +226,22 @@ def load_data( # fetch the file(s) data_paths = [] sz = 0 - for run in runs: - file_part = f"S{subject:03d}/S{subject:03d}R{run:02d}.edf" - destination = Path(base_path, file_part) - data_paths.append(destination) - if destination.exists(): - if force_update: - destination.unlink() - else: - continue - if sz == 0: # log once - logger.info("Downloading EEGBCI data") - fetcher.fetch(file_part) - # update path in config if desired - sz += destination.stat().st_size + for subject in subjects: + for run in runs: + file_part = f"S{subject:03d}/S{subject:03d}R{run:02d}.edf" + destination = Path(base_path, file_part) + data_paths.append(destination) + if destination.exists(): + if force_update: + destination.unlink() + else: + continue + if sz == 0: # log once + logger.info("Downloading EEGBCI data") + fetcher.fetch(file_part) + # update path in config if desired + sz += destination.stat().st_size + _do_path_update(path, update_path, config_key, name) if sz > 0: _log_time_size(t0, sz) diff --git a/mne/datasets/eegbci/tests/test_eegbci.py b/mne/datasets/eegbci/tests/test_eegbci.py index 6ce58861c44..40ef5ee030f 100644 --- a/mne/datasets/eegbci/tests/test_eegbci.py +++ b/mne/datasets/eegbci/tests/test_eegbci.py @@ -2,12 +2,26 @@ # License: BSD-3-Clause # Copyright the MNE-Python contributors. +import pytest + from mne.datasets import eegbci def test_eegbci_download(tmp_path, fake_retrieve): """Test Sleep Physionet URL handling.""" - for subj in range(4): - fnames = eegbci.load_data(subj + 1, runs=[3], path=tmp_path, update_path=False) + subjects = range(1, 5) + for subj in subjects: + fnames = eegbci.load_data(subj, runs=[3], path=tmp_path, update_path=False) assert len(fnames) == 1, subj assert fake_retrieve.call_count == 4 + + # XXX: remove in version 1.9 + with pytest.warns(FutureWarning, match="The ``subject``"): + fnames = eegbci.load_data( + subject=subjects, runs=[3], path=tmp_path, update_path=False + ) + assert len(fnames) == 4 + + # XXX: remove in version 1.9 + with pytest.raises(ValueError, match="You must pass the parameters"): + fnames = eegbci.load_data(path=tmp_path, update_path=False) diff --git a/mne/datasets/utils.py b/mne/datasets/utils.py index 34dbf0ef803..1b8dc535daa 100644 --- a/mne/datasets/utils.py +++ b/mne/datasets/utils.py @@ -347,9 +347,8 @@ def _download_all_example_data(verbose=True): sleep_physionet, ) - eegbci.load_data(1, [6, 10, 14], update_path=True) - for subj in range(4): - eegbci.load_data(subj + 1, runs=[3], update_path=True) + eegbci.load_data(subjects=1, runs=[6, 10, 14], update_path=True) + eegbci.load_data(subjects=range(1, 5), runs=[3], update_path=True) logger.info("[done eegbci]") sleep_physionet.age.fetch_data(subjects=[0, 1], recording=[1]) diff --git a/tutorials/forward/35_eeg_no_mri.py b/tutorials/forward/35_eeg_no_mri.py index 0fca916cabf..10d13864670 100644 --- a/tutorials/forward/35_eeg_no_mri.py +++ b/tutorials/forward/35_eeg_no_mri.py @@ -48,7 +48,7 @@ # .. note:: See :ref:`plot_montage` to view all the standard EEG montages # available in MNE-Python. -(raw_fname,) = eegbci.load_data(subject=1, runs=[6]) +(raw_fname,) = eegbci.load_data(subjects=1, runs=[6]) raw = mne.io.read_raw_edf(raw_fname, preload=True) # Clean channel names to be able to use a standard 1005 montage diff --git a/tutorials/preprocessing/40_artifact_correction_ica.py b/tutorials/preprocessing/40_artifact_correction_ica.py index 87db02be1f8..5eeb7b79d64 100644 --- a/tutorials/preprocessing/40_artifact_correction_ica.py +++ b/tutorials/preprocessing/40_artifact_correction_ica.py @@ -532,9 +532,9 @@ raws = list() icas = list() -for subj in range(4): +for subj in range(1, 5): # EEGBCI subjects are 1-indexed; run 3 is a left/right hand movement task - fname = mne.datasets.eegbci.load_data(subj + 1, runs=[3])[0] + fname = mne.datasets.eegbci.load_data(subj, runs=[3])[0] raw = mne.io.read_raw_edf(fname).load_data().resample(50) # remove trailing `.` from channel names so we can set montage mne.datasets.eegbci.standardize(raw)