"""Make BIDS report from dataset and sidecar files."""
# Authors: The MNE-BIDS developers
# SPDX-License-Identifier: BSD-3-Clause
import json
import os.path as op
import textwrap
from pathlib import Path
import jinja2
import numpy as np
from mne.utils import logger, verbose
from mne_bids.config import ALLOWED_DATATYPES, DOI
from mne_bids.path import (
BIDSPath,
_find_matching_sidecar,
_parse_ext,
get_bids_path_from_fname,
get_datatypes,
get_entity_vals,
)
from mne_bids.tsv_handler import _from_tsv
from mne_bids.utils import warn
jinja_env = jinja2.Environment(
loader=jinja2.PackageLoader(
package_name="mne_bids.report", package_path="templates"
)
)
def _pretty_str(listed):
# make strings a sequence of ',' and 'and'
if not isinstance(listed, list):
listed = list(listed)
if len(listed) <= 1:
return ",".join(listed)
return "{}, and {}".format(", ".join(listed[:-1]), listed[-1])
def _range_str(minval, maxval, meanval, stdval, n_unknown, typ):
if minval == "n/a":
return "ages all unknown"
if n_unknown > 0:
unknown_str = f"; {n_unknown} with unknown {typ}"
else:
unknown_str = ""
return (
f"ages ranged from {round(minval, 2)} to {round(maxval, 2)} "
f"(mean = {round(meanval, 2)}, std = {round(stdval, 2)}{unknown_str})"
)
def _summarize_participant_hand(hands):
n_unknown = len([hand for hand in hands if hand == "n/a"])
if n_unknown == len(hands):
return "handedness were all unknown"
n_rhand = len([hand for hand in hands if hand.upper() == "R"])
n_lhand = len([hand for hand in hands if hand.upper() == "L"])
n_ambidex = len([hand for hand in hands if hand.upper() == "A"])
return (
f"comprised of {n_rhand} right hand, {n_lhand} left hand "
f"and {n_ambidex} ambidextrous"
)
def _summarize_participant_sex(sexs):
n_unknown = len([sex for sex in sexs if sex == "n/a"])
if n_unknown == len(sexs):
return "sex were all unknown"
n_males = len([sex for sex in sexs if sex.upper() == "M"])
n_females = len([sex for sex in sexs if sex.upper() == "F"])
return f"comprised of {n_males} male and {n_females} female participants"
def _length_recording_str(length_recordings):
import numpy as np
if length_recordings is None:
return ""
min_record_length = round(np.min(length_recordings), 2)
max_record_length = round(np.max(length_recordings), 2)
mean_record_length = round(np.mean(length_recordings), 2)
std_record_length = round(np.std(length_recordings), 2)
total_record_length = round(sum(length_recordings), 2)
return (
f"Recording durations ranged from {min_record_length} to "
f"{max_record_length} seconds "
f"(mean = {mean_record_length}, std = {std_record_length}), "
f"for a total of {total_record_length} seconds of data recorded "
f"over all scans."
)
def _summarize_software_filters(software_filters):
if software_filters in [{}, "n/a"]:
return ""
msg = ""
for key, value in software_filters.items():
msg += f"{key}"
if isinstance(value, dict) and value:
parameters = []
for param_name, param_value in value.items():
if param_name and param_value:
parameters.append(f"{param_value} {param_name}")
if parameters:
msg += " with parameters "
msg += ", ".join(parameters)
return msg
def _pretty_dict(template_dict):
"""Remove problematic blank spaces."""
for key, val in template_dict.items():
if val == " ":
template_dict[key] = "n/a"
def _summarize_dataset(root):
"""Summarize the dataset_desecription.json file.
Required dataset descriptors include:
- Name
- BIDSVersion
Added descriptors include:
- Authors
- DOI
Parameters
----------
root : path-like
The path of the root of the BIDS compatible folder.
Returns
-------
template_dict : dict
A dictionary of values for various template strings.
"""
dataset_descrip_fpath = op.join(root, "dataset_description.json")
if not op.exists(dataset_descrip_fpath):
return dict()
# read file and 'REQUIRED' components of it
with open(dataset_descrip_fpath, encoding="utf-8-sig") as fin:
dataset_description = json.load(fin)
# create dictionary to pass into template string
name = dataset_description["Name"]
bids_version = dataset_description["BIDSVersion"]
authors = dataset_description["Authors"]
template_dict = {
"name": name,
"bids_version": bids_version,
"mne_bids_doi": DOI,
"authors": _pretty_str(authors),
}
_pretty_dict(template_dict)
return template_dict
def _summarize_participants_tsv(root):
"""Summarize `participants.tsv` file in BIDS root directory.
Parameters
----------
root : path-like
The path of the root of the BIDS compatible folder.
Returns
-------
template_dict : dict
A dictionary of values for various template strings.
"""
participants_tsv_fpath = op.join(root, "participants.tsv")
if not op.exists(participants_tsv_fpath):
return dict()
participants_tsv = _from_tsv(str(participants_tsv_fpath))
p_ids = participants_tsv["participant_id"]
logger.info(f"Summarizing participants.tsv {participants_tsv_fpath}...")
# summarize sex count statistics
keys = ["M", "F", "n/a"]
p_sex = participants_tsv.get("sex")
# phrasing works for both sex and gender
p_gender = participants_tsv.get("gender")
sexs = ["n/a"]
if p_sex or p_gender:
# only summarize sex if it conforms to `keys` referenced above
p_sex = p_gender if p_sex is None else p_sex
if all([sex.upper() in keys for sex in p_sex if sex != "n/a"]):
sexs = p_sex
# summarize hand count statistics
keys = ["R", "L", "A", "n/a"]
p_hands = participants_tsv.get("hand")
hands = ["n/a"]
if p_hands:
# only summarize handedness if it conforms to
# mne-bids handedness
if all([hand.upper() in keys for hand in p_hands if hand != "n/a"]):
hands = p_hands
# summarize age statistics: mean, std, min, max
p_ages = participants_tsv.get("age")
min_age, max_age = "n/a", "n/a"
mean_age, std_age = "n/a", "n/a"
n_age_unknown = len(p_ages) if p_ages else len(p_ids)
if p_ages:
# only summarize age if they are numerics
if all([age.isnumeric() for age in p_ages if age != "n/a"]):
age_list = [float(age) for age in p_ages if age != "n/a"]
n_age_unknown = len(p_ids) - len(age_list)
if age_list:
min_age, max_age = np.min(age_list), np.max(age_list)
mean_age, std_age = np.mean(age_list), np.std(age_list)
template_dict = {
"sexs": _summarize_participant_sex(sexs),
"hands": _summarize_participant_hand(hands),
"ages": _range_str(min_age, max_age, mean_age, std_age, n_age_unknown, "age"),
}
return template_dict
def _summarize_scans(root, session=None):
"""Summarize scans in BIDS root directory.
Summarizes scans only if there is a *_scans.tsv file.
Parameters
----------
root : path-like
The path of the root of the BIDS compatible folder.
session : str, optional
The session for a item. Corresponds to "ses".
Returns
-------
template_dict : dict
A dictionary of values for various template strings.
"""
root = Path(root)
if session is None:
search_str = "*_scans.tsv"
else:
search_str = f"*ses-{session}" f"*_scans.tsv"
scans_fpaths = list(root.rglob(search_str))
if len(scans_fpaths) == 0:
warn(
"No *scans.tsv files found. Currently, "
"we do not generate a report without the scans.tsv files."
)
return dict()
logger.info(f"Summarizing scans.tsv files {scans_fpaths}...")
# summarize sidecar.json, channels.tsv template
sidecar_dict = _summarize_sidecar_json(root, scans_fpaths)
channels_dict = _summarize_channels_tsv(root, scans_fpaths)
template_dict = dict()
template_dict.update(**sidecar_dict)
template_dict.update(**channels_dict)
return template_dict
def _summarize_sidecar_json(root, scans_fpaths):
"""Summarize scans in BIDS root directory.
Parameters
----------
root : path-like
The path of the root of the BIDS compatible folder.
scans_fpaths : list
A list of all *_scans.tsv files in ``root``. The summary
will occur for all scans listed in the *_scans.tsv files.
Returns
-------
template_dict : dict
A dictionary of values for various template strings.
"""
n_scans = 0
powerlinefreqs, sfreqs = set(), set()
manufacturers = set()
length_recordings = []
# loop through each scan
for scan_fpath in scans_fpaths:
# load in the scans.tsv file
# and read metadata for each scan
scans_tsv = _from_tsv(scan_fpath)
scans = scans_tsv["filename"]
for scan in scans:
# summarize metadata of recordings
bids_path, ext = _parse_ext(scan)
datatype = op.dirname(scan)
if datatype not in ALLOWED_DATATYPES:
continue
n_scans += 1
# convert to BIDSPath
if not isinstance(bids_path, BIDSPath):
bids_path = get_bids_path_from_fname(bids_path)
bids_path.root = root
# XXX: improve to allow emptyroom
if bids_path.subject == "emptyroom":
continue
sidecar_fname = _find_matching_sidecar(
bids_path=bids_path, suffix=datatype, extension=".json"
)
with open(sidecar_fname, encoding="utf-8-sig") as fin:
sidecar_json = json.load(fin)
# aggregate metadata from each scan
# REQUIRED kwargs
sfreq = sidecar_json["SamplingFrequency"]
powerlinefreq = str(sidecar_json["PowerLineFrequency"])
software_filters = sidecar_json.get("SoftwareFilters")
if not software_filters:
software_filters = "n/a"
# RECOMMENDED kwargs
manufacturer = sidecar_json.get("Manufacturer", "n/a")
record_duration = sidecar_json.get("RecordingDuration", "n/a")
sfreqs.add(str(np.round(sfreq, 2)))
powerlinefreqs.add(str(powerlinefreq))
if manufacturer != "n/a":
manufacturers.add(manufacturer)
length_recordings.append(record_duration)
# XXX: length summary is only allowed, if no 'n/a' was found
if any([dur == "n/a" for dur in length_recordings]):
length_recordings = None
template_dict = {
"n_scans": n_scans,
"manufacturer": _pretty_str(manufacturers),
"sfreq": _pretty_str(sfreqs),
"powerlinefreq": _pretty_str(powerlinefreqs),
"software_filters": _summarize_software_filters(software_filters),
"length_recordings": _length_recording_str(length_recordings),
}
return template_dict
def _summarize_channels_tsv(root, scans_fpaths):
"""Summarize channels.tsv data in BIDS root directory.
Currently, summarizes all REQUIRED components of channels
data, and some RECOMMENDED and OPTIONAL components.
Parameters
----------
root : path-like
The path of the root of the BIDS compatible folder.
scans_fpaths : list
A list of all *_scans.tsv files in ``root``. The summary
will occur for all scans listed in the *_scans.tsv files.
Returns
-------
template_dict : dict
A dictionary of values for various template strings.
"""
root = Path(root)
# keep track of channel type, status
ch_status_count = {"bad": [], "good": []}
ch_count = []
# loop through each scan
for scan_fpath in scans_fpaths:
# load in the scans.tsv file
# and read metadata for each scan
scans_tsv = _from_tsv(scan_fpath)
scans = scans_tsv["filename"]
for scan in scans:
# summarize metadata of recordings
bids_path, _ = _parse_ext(scan)
datatype = op.dirname(scan)
if datatype not in ["meg", "eeg", "ieeg"]:
continue
# convert to BIDSPath
if not isinstance(bids_path, BIDSPath):
bids_path = get_bids_path_from_fname(bids_path)
bids_path.root = root
# XXX: improve to allow emptyroom
if bids_path.subject == "emptyroom":
continue
channels_fname = _find_matching_sidecar(
bids_path=bids_path, suffix="channels", extension=".tsv"
)
# summarize channels.tsv
channels_tsv = _from_tsv(channels_fname)
for status in ch_status_count.keys():
ch_status = [ch for ch in channels_tsv["status"] if ch == status]
ch_status_count[status].append(len(ch_status))
ch_count.append(len(channels_tsv["name"]))
# create summary template strings for status
template_dict = {
"mean_chs": np.mean(ch_count),
"std_chs": np.std(ch_count),
"mean_good_chs": np.mean(ch_status_count["good"]),
"std_good_chs": np.std(ch_status_count["good"]),
"mean_bad_chs": np.mean(ch_status_count["bad"]),
"std_bad_chs": np.std(ch_status_count["bad"]),
}
for key, val in template_dict.items():
template_dict[key] = round(val, 2)
return template_dict
[docs]
@verbose
def make_report(root, session=None, verbose=None):
"""Create a methods paragraph string from BIDS dataset.
Summarizes the REQUIRED components in the BIDS specification
and also some RECOMMENDED components. Currently, the methods
paragraph summarize the:
- dataset_description.json file
- (optional) participants.tsv file
- (optional) datatype-agnostic files for (M/I)EEG data,
which reads files from the ``*_scans.tsv`` file.
Parameters
----------
root : path-like
The path of the root of the BIDS compatible folder.
session : str | None
The (optional) session for a item. Corresponds to "ses".
%(verbose)s
Returns
-------
paragraph : str
The paragraph wrapped with 80 characters per line
describing the summary of the subjects.
"""
# high level summary
subjects = get_entity_vals(root, entity_key="subject")
sessions = get_entity_vals(root, entity_key="session")
modalities = get_datatypes(root)
# only summarize allowed modalities (MEG/EEG/iEEG) data
# map them to a pretty looking string
datatype_map = {
"meg": "MEG",
"eeg": "EEG",
"ieeg": "iEEG",
}
modalities = [
datatype_map[datatype]
for datatype in modalities
if datatype in datatype_map.keys()
]
# REQUIRED: dataset_description.json summary
dataset_summary = _summarize_dataset(root)
# RECOMMENDED: participants summary
participant_summary = _summarize_participants_tsv(root)
# RECOMMENDED: scans summary
scans_summary = _summarize_scans(root, session=session)
dataset_agnostic_summary = scans_summary.copy()
dataset_agnostic_summary["system"] = _pretty_str(modalities)
# turn off 'recommended' report summary
# if files are not available to summarize
if not participant_summary:
participants_info = ""
else:
particpants_info_template = jinja_env.get_template("participants.jinja")
participants_info = particpants_info_template.render(**participant_summary)
logger.info(f"The participant template found: {participants_info}")
if not scans_summary:
datatype_agnostic_info = ""
else:
datatype_agnostic_template = jinja_env.get_template("datatype_agnostic.jinja")
datatype_agnostic_info = datatype_agnostic_template.render(
**dataset_agnostic_summary
)
dataset_summary.update(
{
"n_subjects": len(subjects),
"participants_info": participants_info,
"n_sessions": len(sessions),
"sessions": _pretty_str(sessions),
}
)
# XXX: add channel summary for modalities (ieeg, meg, eeg)
# create the content and mne Template
# lower-case templates are "Recommended",
# while upper-case templates are "Required".
dataset_summary_template = jinja_env.get_template("dataset_summary.jinja")
dataset_summary_info = dataset_summary_template.render(**dataset_summary)
# Concatenate info and clean the paragraph
paragraph = f"{dataset_summary_info}\n{datatype_agnostic_info}"
paragraph = paragraph.replace("\n", " ")
while " " in paragraph:
paragraph = paragraph.replace(" ", " ")
return "\n".join(textwrap.wrap(paragraph, width=80))