Source code for mne_bids.stats
"""Some functions to extract stats from a BIDS dataset."""
# Authors: Alex Gramfort <alexandre.gramfort@inria.fr>
#
# License: BSD-3-Clause
from mne_bids import BIDSPath, get_datatypes
from mne_bids.config import EPHY_ALLOWED_DATATYPES
[docs]
def count_events(root_or_path, datatype="auto"):
    """Count events present in dataset.
    Parameters
    ----------
    root_or_path : path-like | mne_bids.BIDSPath
        If str or Path it is the root folder of the BIDS dataset.
        If a BIDSPath is passed it allows to limit the count
        to a subject, a session or a run by only considering
        the event files that match this BIDSPath.
    datatype : str
        Type of the data recording. Can be ``meg``, ``eeg``,
        ``ieeg`` or ``auto``. If ``auto`` and a :class:`mne_bids.BIDSPath`
        isinstance is passed as ``root_or_path`` which has a ``datatype``
        attribute set, then this data type will be used. Otherwise, only
        one data type should be present in the dataset to avoid any
        ambiguity.
    Returns
    -------
    counts : pandas.DataFrame
        The pandas dataframe containing all the counts of trial_type
        in all matching events.tsv files.
    Notes
    -----
    .. versionchanged:: 0.15
       Table values were changed from floats (with NaN for missing values)
       to Pandas nullable integer arrays.
    """
    import pandas as pd
    if not isinstance(root_or_path, BIDSPath):
        bids_path = BIDSPath(root=root_or_path)
    else:
        bids_path = root_or_path.copy()
    bids_path.update(suffix="events", extension=".tsv")
    datatypes = get_datatypes(bids_path.root)
    this_datatypes = list(set(datatypes).intersection(EPHY_ALLOWED_DATATYPES))
    if (datatype == "auto") and (bids_path.datatype is not None):
        datatype = bids_path.datatype
    if datatype == "auto":
        if len(this_datatypes) > 1:
            raise ValueError(
                f"Multiple datatypes present ({this_datatypes})."
                f" You need to specity datatype got: {datatype})"
            )
        elif len(this_datatypes) == 0:
            raise ValueError("No valid datatype present.")
        datatype = this_datatypes[0]
    if datatype not in EPHY_ALLOWED_DATATYPES:
        raise ValueError(
            f"datatype ({datatype}) is not supported. "
            f"It must be one of: {EPHY_ALLOWED_DATATYPES})"
        )
    bids_path.update(datatype=datatype)
    tasks = sorted(set([bp.task for bp in bids_path.match()]))
    all_counts = []
    for task in tasks:
        bids_path.update(task=task)
        all_df = []
        for bp in bids_path.match():
            df = pd.read_csv(str(bp), delimiter="\t")
            df["subject"] = bp.subject
            if bp.session is not None:
                df["session"] = bp.session
            if bp.run is not None:
                df["run"] = bp.run
            all_df.append(df)
        if not all_df:
            continue
        df = pd.concat(all_df)
        groups = ["subject"]
        if bp.session is not None:
            groups.append("session")
        if bp.run is not None:
            groups.append("run")
        if "stim_type" in df.columns:
            # Deal with some old files that use stim_type rather than
            # trial_type
            df = df.rename(columns={"stim_type": "trial_type"})
        # There are datasets out there without a `trial_type` or `stim_type`
        # column.
        if "trial_type" in df.columns:
            groups.append("trial_type")
        counts = df.groupby(groups).size()
        counts = counts.unstack(fill_value=-1)
        counts.replace(-1, pd.NA, inplace=True)
        if "BAD_ACQ_SKIP" in counts.columns:
            counts = counts.drop("BAD_ACQ_SKIP", axis=1)
        counts.columns = pd.MultiIndex.from_arrays(
            [[task] * counts.shape[1], counts.columns]
        )
        all_counts.append(counts)
    if not all_counts:
        raise ValueError("No events files found.")
    counts = pd.concat(all_counts, axis=1)
    return counts