Source code for mne_bids.stats
"""Some functions to extract stats from a BIDS dataset."""
# Authors: Alex Gramfort <alexandre.gramfort@inria.fr>
#
# License: BSD-3-Clause
from mne_bids import BIDSPath, get_datatypes
from mne_bids.config import EPHY_ALLOWED_DATATYPES
[docs]
def count_events(root_or_path, datatype="auto"):
"""Count events present in dataset.
Parameters
----------
root_or_path : path-like | mne_bids.BIDSPath
If str or Path it is the root folder of the BIDS dataset.
If a BIDSPath is passed it allows to limit the count
to a subject, a session or a run by only considering
the event files that match this BIDSPath.
datatype : str
Type of the data recording. Can be ``meg``, ``eeg``,
``ieeg`` or ``auto``. If ``auto`` and a :class:`mne_bids.BIDSPath`
isinstance is passed as ``root_or_path`` which has a ``datatype``
attribute set, then this data type will be used. Otherwise, only
one data type should be present in the dataset to avoid any
ambiguity.
Returns
-------
counts : pandas.DataFrame
The pandas dataframe containing all the counts of trial_type
in all matching events.tsv files.
Notes
-----
.. versionchanged:: 0.15
Table values were changed from floats (with NaN for missing values)
to Pandas nullable integer arrays.
"""
import pandas as pd
if not isinstance(root_or_path, BIDSPath):
bids_path = BIDSPath(root=root_or_path)
else:
bids_path = root_or_path.copy()
bids_path.update(suffix="events", extension=".tsv")
datatypes = get_datatypes(bids_path.root)
this_datatypes = list(set(datatypes).intersection(EPHY_ALLOWED_DATATYPES))
if (datatype == "auto") and (bids_path.datatype is not None):
datatype = bids_path.datatype
if datatype == "auto":
if len(this_datatypes) > 1:
raise ValueError(
f"Multiple datatypes present ({this_datatypes})."
f" You need to specity datatype got: {datatype})"
)
elif len(this_datatypes) == 0:
raise ValueError("No valid datatype present.")
datatype = this_datatypes[0]
if datatype not in EPHY_ALLOWED_DATATYPES:
raise ValueError(
f"datatype ({datatype}) is not supported. "
f"It must be one of: {EPHY_ALLOWED_DATATYPES})"
)
bids_path.update(datatype=datatype)
tasks = sorted(set([bp.task for bp in bids_path.match()]))
all_counts = []
for task in tasks:
bids_path.update(task=task)
all_df = []
for bp in bids_path.match():
df = pd.read_csv(str(bp), delimiter="\t")
df["subject"] = bp.subject
if bp.session is not None:
df["session"] = bp.session
if bp.run is not None:
df["run"] = bp.run
all_df.append(df)
if not all_df:
continue
df = pd.concat(all_df)
groups = ["subject"]
if bp.session is not None:
groups.append("session")
if bp.run is not None:
groups.append("run")
if "stim_type" in df.columns:
# Deal with some old files that use stim_type rather than
# trial_type
df = df.rename(columns={"stim_type": "trial_type"})
# There are datasets out there without a `trial_type` or `stim_type`
# column.
if "trial_type" in df.columns:
groups.append("trial_type")
counts = df.groupby(groups).size()
counts = counts.unstack(fill_value=-1)
counts.replace(-1, pd.NA, inplace=True)
if "BAD_ACQ_SKIP" in counts.columns:
counts = counts.drop("BAD_ACQ_SKIP", axis=1)
counts.columns = pd.MultiIndex.from_arrays(
[[task] * counts.shape[1], counts.columns]
)
all_counts.append(counts)
if not all_counts:
raise ValueError("No events files found.")
counts = pd.concat(all_counts, axis=1)
return counts