Source code for mne_bids.stats

"""Some functions to extract stats from a BIDS dataset."""

# Authors: The MNE-BIDS developers
# SPDX-License-Identifier: BSD-3-Clause

from mne_bids import BIDSPath, get_datatypes
from mne_bids.config import EPHY_ALLOWED_DATATYPES



[docs]
def count_events(root_or_path, datatype="auto"):
    """Count events present in dataset.

    Parameters
    ----------
    root_or_path : path-like | mne_bids.BIDSPath
        If str or Path it is the root folder of the BIDS dataset.
        If a BIDSPath is passed it allows to limit the count
        to a subject, a session or a run by only considering
        the event files that match this BIDSPath.
    datatype : str
        Type of the data recording. Can be ``meg``, ``eeg``,
        ``ieeg`` or ``auto``. If ``auto`` and a :class:`mne_bids.BIDSPath`
        isinstance is passed as ``root_or_path`` which has a ``datatype``
        attribute set, then this data type will be used. Otherwise, only
        one data type should be present in the dataset to avoid any
        ambiguity.

    Returns
    -------
    counts : pandas.DataFrame
        The pandas dataframe containing all the counts of trial_type
        in all matching events.tsv files.

    Notes
    -----
    .. versionchanged:: 0.15
       Table values were changed from floats (with NaN for missing values)
       to Pandas nullable integer arrays.
    """
    import pandas as pd

    if not isinstance(root_or_path, BIDSPath):
        bids_path = BIDSPath(root=root_or_path)
    else:
        bids_path = root_or_path.copy()

    bids_path.update(suffix="events", extension=".tsv")

    datatypes = get_datatypes(bids_path.root)
    this_datatypes = list(set(datatypes).intersection(EPHY_ALLOWED_DATATYPES))

    if (datatype == "auto") and (bids_path.datatype is not None):
        datatype = bids_path.datatype

    if datatype == "auto":
        if len(this_datatypes) > 1:
            raise ValueError(
                f"Multiple datatypes present ({this_datatypes})."
                f" You need to specity datatype got: {datatype})"
            )
        elif len(this_datatypes) == 0:
            raise ValueError("No valid datatype present.")

        datatype = this_datatypes[0]

    if datatype not in EPHY_ALLOWED_DATATYPES:
        raise ValueError(
            f"datatype ({datatype}) is not supported. "
            f"It must be one of: {EPHY_ALLOWED_DATATYPES})"
        )

    bids_path.update(datatype=datatype)

    tasks = sorted(set([bp.task for bp in bids_path.match()]))

    all_counts = []

    for task in tasks:
        bids_path.update(task=task)

        all_df = []
        for bp in bids_path.match():
            df = pd.read_csv(str(bp), delimiter="\t")
            df["subject"] = bp.subject
            if bp.session is not None:
                df["session"] = bp.session
            if bp.run is not None:
                df["run"] = bp.run
            all_df.append(df)

        if not all_df:
            continue

        df = pd.concat(all_df)
        groups = ["subject"]
        if bp.session is not None:
            groups.append("session")
        if bp.run is not None:
            groups.append("run")

        if "stim_type" in df.columns:
            # Deal with some old files that use stim_type rather than
            # trial_type
            df = df.rename(columns={"stim_type": "trial_type"})

        # There are datasets out there without a `trial_type` or `stim_type`
        # column.
        if "trial_type" in df.columns:
            groups.append("trial_type")

        counts = df.groupby(groups).size()
        counts = counts.unstack(fill_value=-1)
        counts.replace(-1, pd.NA, inplace=True)

        if "BAD_ACQ_SKIP" in counts.columns:
            counts = counts.drop("BAD_ACQ_SKIP", axis=1)

        counts.columns = pd.MultiIndex.from_arrays(
            [[task] * counts.shape[1], counts.columns]
        )

        all_counts.append(counts)

    if not all_counts:
        raise ValueError("No events files found.")

    counts = pd.concat(all_counts, axis=1)

    return counts