Source code for mne_bids.stats

"""Some functions to extract stats from a BIDS dataset."""

# Authors: Alex Gramfort <alexandre.gramfort@inria.fr>
#
# License: BSD (3-clause)


from mne_bids import BIDSPath, get_datatypes
from mne_bids.config import EPHY_ALLOWED_DATATYPES


[docs]def count_events(root_or_path, datatype='auto'):
    """Count events present in dataset.

    Parameters
    ----------
    root_or_path : str | pathlib.Path | mne_bids.BIDSPath
        If str or Path it is the root folder of the BIDS dataset.
        If a BIDSPath is passed it allows to limit the count
        to a subject, a session or a run by only considering
        the event files that match this BIDSPath.
    datatype : str
        Type of the data recording. Can be ``meg``, ``eeg``,
        ``ieeg`` or ``auto``. If ``auto`` and a :class:`mne_bids.BIDSPath`
        isinstance is passed as ``root_or_path`` which has a ``datatype``
        attribute set, then this data type will be used. Otherwise, only
        one data type should be present in the dataset to avoid any
        ambiguity.

    Returns
    -------
    counts : pandas.DataFrame
        The pandas dataframe containing all the counts of trial_type
        in all matching events.tsv files.
    """
    import pandas as pd

    if not isinstance(root_or_path, BIDSPath):
        bids_path = BIDSPath(root=root_or_path)
    else:
        bids_path = root_or_path.copy()

    bids_path.update(suffix='events', extension='tsv')

    datatypes = get_datatypes(bids_path.root)
    this_datatypes = list(set(datatypes).intersection(EPHY_ALLOWED_DATATYPES))

    if (datatype == 'auto') and (bids_path.datatype is not None):
        datatype = bids_path.datatype

    if datatype == 'auto':
        if len(this_datatypes) > 1:
            raise ValueError(f'Multiple datatypes present ({this_datatypes}).'
                             f' You need to specity datatype got: {datatype})')
        elif len(this_datatypes) == 0:
            raise ValueError('No valid datatype present.')

        datatype = this_datatypes[0]

    if datatype not in EPHY_ALLOWED_DATATYPES:
        raise ValueError(f'datatype ({datatype}) is not supported. '
                         f'It must be one of: {EPHY_ALLOWED_DATATYPES})')

    bids_path.update(datatype=datatype)

    tasks = sorted(set([bp.task for bp in bids_path.match()]))

    all_counts = []

    for task in tasks:
        bids_path.update(task=task)

        all_df = []
        for bp in bids_path.match():
            df = pd.read_csv(str(bp), delimiter='\t')
            df['subject'] = bp.subject
            if bp.session is not None:
                df['session'] = bp.session
            if bp.run is not None:
                df['run'] = bp.run
            all_df.append(df)

        if not all_df:
            continue

        df = pd.concat(all_df)
        groups = ['subject']
        if bp.session is not None:
            groups.append('session')
        if bp.run is not None:
            groups.append('run')

        if 'stim_type' in df.columns:
            # Deal with some old files that use stim_type rather than
            # trial_type
            df = df.rename(columns={"stim_type": "trial_type"})

        # There are datasets out there without a `trial_type` or `stim_type`
        # column.
        if 'trial_type' in df.columns:
            groups.append('trial_type')

        counts = df.groupby(groups).size()
        counts = counts.unstack()

        if 'BAD_ACQ_SKIP' in counts.columns:
            counts = counts.drop('BAD_ACQ_SKIP', axis=1)

        counts.columns = pd.MultiIndex.from_arrays(
            [[task] * counts.shape[1], counts.columns]
        )

        all_counts.append(counts)

    if not all_counts:
        raise ValueError('No events files found.')

    counts = pd.concat(all_counts, axis=1)

    return counts