"""Make BIDS report from dataset and sidecar files."""
# Authors: Adam Li <adam2392@gmail.com>
#
# License: BSD-3-Clause
import json
import os.path as op
import textwrap
from pathlib import Path
import numpy as np
from mne.externals.tempita import Template
from mne.utils import warn, logger, verbose
from mne_bids.config import DOI, ALLOWED_DATATYPES
from mne_bids.tsv_handler import _from_tsv
from mne_bids.path import (get_bids_path_from_fname, get_datatypes,
get_entity_vals, BIDSPath,
_parse_ext, _find_matching_sidecar)
# functions to be used inside Template strings
FUNCTION_TEMPLATE = """{{py:
def _pretty_str(listed):
# make strings a sequence of ',' and 'and'
if not isinstance(listed, list):
listed = list(listed)
if len(listed) <= 1:
return ','.join(listed)
return '{}, and {}'.format(', '.join(listed[:-1]), listed[-1])
def _range_str(minval, maxval, meanval, stdval, n_unknown, type):
if minval == 'n/a':
return 'ages all unknown'
if n_unknown > 0:
unknown_str = f'; {n_unknown} with unknown {type}'
else:
unknown_str = ''
return f'ages ranged from {round(minval, 2)} to {round(maxval, 2)} '\
f'(mean = {round(meanval, 2)}, std = {round(stdval, 2)}{unknown_str})'
def _summarize_participant_hand(hands):
n_unknown = len([hand for hand in hands if hand == 'n/a'])
if n_unknown == len(hands):
return f'handedness were all unknown'
n_rhand = len([hand for hand in hands if hand.upper() == 'R'])
n_lhand = len([hand for hand in hands if hand.upper() == 'L'])
n_ambidex = len([hand for hand in hands if hand.upper() == 'A'])
return f'comprised of {n_rhand} right hand, {n_lhand} left hand ' \
f'and {n_ambidex} ambidextrous'
def _summarize_participant_sex(sexs):
n_unknown = len([sex for sex in sexs if sex == 'n/a'])
if n_unknown == len(sexs):
return f'sex were all unknown'
n_males = len([sex for sex in sexs if sex.upper() == 'M'])
n_females = len([sex for sex in sexs if sex.upper() == 'F'])
return f'comprised of {n_males} male and {n_females} female participants'
def _length_recording_str(length_recordings):
import numpy as np
if length_recordings is None:
return ''
min_record_length = round(np.min(length_recordings), 2)
max_record_length = round(np.max(length_recordings), 2)
mean_record_length = round(np.mean(length_recordings), 2)
std_record_length = round(np.std(length_recordings), 2)
total_record_length = round(sum(length_recordings), 2)
return f' Recording durations ranged from {min_record_length} to {max_record_length} seconds '\
f'(mean = {mean_record_length}, std = {std_record_length}), '\
f'for a total of {total_record_length} seconds of data recorded '\
f'over all scans.'
def _summarize_software_filters(software_filters):
if software_filters in [{}, 'n/a']:
return ''
msg = ''
for key, value in software_filters.items():
msg += f'{key}'
if isinstance(value, dict) and value:
parameters = []
for param_name, param_value in value.items():
if param_name and param_value:
parameters.append(f'{param_value} {param_name}')
if parameters:
msg += ' with parameters '
msg += ', '.join(parameters)
return msg
}}""" # noqa
BIDS_DATASET_TEMPLATE = \
"""{{if name == 'n/a'}}This{{else}}The {{name}}{{endif}}
dataset was created by {{_pretty_str(authors)}} and conforms to BIDS version
{{bids_version}}. This report was generated with
MNE-BIDS ({{mne_bids_doi}}). """
BIDS_DATASET_TEMPLATE += \
"""The dataset consists of
{{n_subjects}} participants ({{PARTICIPANTS_TEMPLATE}}) {{if n_sessions}}and {{n_sessions}} recording sessions: {{(_pretty_str(sessions))}}.{{else}}.{{endif}} """ # noqa
PARTICIPANTS_TEMPLATE = \
"""{{_summarize_participant_sex(sexs)}};
{{_summarize_participant_hand(hands)}}; {{_range_str(min_age, max_age, mean_age, std_age, n_age_unknown, 'age')}}""" # noqa
DATATYPE_AGNOSTIC_TEMPLATE = \
"""Data was recorded using a {{_pretty_str(system)}} system
{{if manufacturer}}({{_pretty_str(manufacturer)}} manufacturer){{endif}}
sampled at {{_pretty_str(sfreq)}} Hz
with line noise at {{_pretty_str(powerlinefreq)}} Hz.
{{if _summarize_software_filters(software_filters)}}
The following software filters were applied during recording:
{{_summarize_software_filters(software_filters)}}.
{{endif}}
{{if n_scans > 1}}
There were {{n_scans}} scans in total.
{{else}}There was {{n_scans}} scan in total.
{{endif}}
{{_length_recording_str(length_recordings)}}
For each dataset, there were on average {{mean_chs}} (std = {{std_chs}}) recording channels per scan,
out of which {{mean_good_chs}} (std = {{std_good_chs}}) were used in analysis
({{mean_bad_chs}} +/- {{std_bad_chs}} were removed from analysis). """ # noqa
def _pretty_dict(template_dict):
"""Remove problematic blank spaces."""
for key, val in template_dict.items():
if val == ' ':
template_dict[key] = 'n/a'
def _summarize_dataset(root):
"""Summarize the dataset_desecription.json file.
Required dataset descriptors include:
- Name
- BIDSVersion
Added descriptors include:
- Authors
- DOI
Parameters
----------
root : path-like
The path of the root of the BIDS compatible folder.
Returns
-------
template_dict : dict
A dictionary of values for various template strings.
"""
dataset_descrip_fpath = op.join(root,
'dataset_description.json')
if not op.exists(dataset_descrip_fpath):
return dict()
# read file and 'REQUIRED' components of it
with open(dataset_descrip_fpath, 'r', encoding='utf-8-sig') as fin:
dataset_description = json.load(fin)
# create dictionary to pass into template string
name = dataset_description['Name']
bids_version = dataset_description['BIDSVersion']
authors = dataset_description['Authors']
template_dict = {
'name': name,
'bids_version': bids_version,
'mne_bids_doi': DOI,
'authors': authors,
}
_pretty_dict(template_dict)
return template_dict
def _summarize_participants_tsv(root):
"""Summarize `participants.tsv` file in BIDS root directory.
Parameters
----------
root : path-like
The path of the root of the BIDS compatible folder.
Returns
-------
template_dict : dict
A dictionary of values for various template strings.
"""
participants_tsv_fpath = op.join(root, 'participants.tsv')
if not op.exists(participants_tsv_fpath):
return dict()
participants_tsv = _from_tsv(str(participants_tsv_fpath))
p_ids = participants_tsv['participant_id']
logger.info(f'Summarizing participants.tsv {participants_tsv_fpath}...')
# summarize sex count statistics
keys = ['M', 'F', 'n/a']
p_sex = participants_tsv.get('sex')
# phrasing works for both sex and gender
p_gender = participants_tsv.get('gender')
sexs = ['n/a']
if p_sex or p_gender:
# only summarize sex if it conforms to `keys` referenced above
p_sex = p_gender if p_sex is None else p_sex
if all([sex.upper() in keys
for sex in p_sex if sex != 'n/a']):
sexs = p_sex
# summarize hand count statistics
keys = ['R', 'L', 'A', 'n/a']
p_hands = participants_tsv.get('hand')
hands = ['n/a']
if p_hands:
# only summarize handedness if it conforms to
# mne-bids handedness
if all([hand.upper() in keys
for hand in p_hands if hand != 'n/a']):
hands = p_hands
# summarize age statistics: mean, std, min, max
p_ages = participants_tsv.get('age')
min_age, max_age = 'n/a', 'n/a'
mean_age, std_age = 'n/a', 'n/a'
n_age_unknown = len(p_ages)
if p_ages:
# only summarize age if they are numerics
if all([age.isnumeric() for age in p_ages if age != 'n/a']):
age_list = [float(age) for age in p_ages if age != 'n/a']
n_age_unknown = len(p_ids) - len(age_list)
if age_list:
min_age, max_age = np.min(age_list), np.max(age_list)
mean_age, std_age = np.mean(age_list), np.std(age_list)
template_dict = {
'sexs': sexs,
'hands': hands,
'n_age_unknown': n_age_unknown,
'mean_age': mean_age,
'std_age': std_age,
'min_age': min_age,
'max_age': max_age,
}
return template_dict
def _summarize_scans(root, session=None):
"""Summarize scans in BIDS root directory.
Summarizes scans only if there is a *_scans.tsv file.
Parameters
----------
root : path-like
The path of the root of the BIDS compatible folder.
session : str, optional
The session for a item. Corresponds to "ses".
Returns
-------
template_dict : dict
A dictionary of values for various template strings.
"""
root = Path(root)
if session is None:
search_str = '*_scans.tsv'
else:
search_str = f'*ses-{session}' \
f'*_scans.tsv'
scans_fpaths = list(root.rglob(search_str))
if len(scans_fpaths) == 0:
warn('No *scans.tsv files found. Currently, '
'we do not generate a report without the scans.tsv files.')
return dict()
logger.info(f'Summarizing scans.tsv files {scans_fpaths}...')
# summarize sidecar.json, channels.tsv template
sidecar_dict = _summarize_sidecar_json(root, scans_fpaths)
channels_dict = _summarize_channels_tsv(root, scans_fpaths)
template_dict = dict()
template_dict.update(**sidecar_dict)
template_dict.update(**channels_dict)
return template_dict
def _summarize_sidecar_json(root, scans_fpaths):
"""Summarize scans in BIDS root directory.
Parameters
----------
root : path-like
The path of the root of the BIDS compatible folder.
scans_fpaths : list
A list of all *_scans.tsv files in ``root``. The summary
will occur for all scans listed in the *_scans.tsv files.
Returns
-------
template_dict : dict
A dictionary of values for various template strings.
"""
n_scans = 0
powerlinefreqs, sfreqs = set(), set()
manufacturers = set()
length_recordings = []
# loop through each scan
for scan_fpath in scans_fpaths:
# load in the scans.tsv file
# and read metadata for each scan
scans_tsv = _from_tsv(scan_fpath)
scans = scans_tsv['filename']
for scan in scans:
# summarize metadata of recordings
bids_path, ext = _parse_ext(scan)
datatype = op.dirname(scan)
if datatype not in ALLOWED_DATATYPES:
continue
n_scans += 1
# convert to BIDS Path
if not isinstance(bids_path, BIDSPath):
bids_path = get_bids_path_from_fname(bids_path)
bids_path.root = root
# XXX: improve to allow emptyroom
if bids_path.subject == 'emptyroom':
continue
sidecar_fname = _find_matching_sidecar(bids_path=bids_path,
suffix=datatype,
extension='.json')
with open(sidecar_fname, 'r', encoding='utf-8-sig') as fin:
sidecar_json = json.load(fin)
# aggregate metadata from each scan
# REQUIRED kwargs
sfreq = sidecar_json['SamplingFrequency']
powerlinefreq = str(sidecar_json['PowerLineFrequency'])
software_filters = sidecar_json.get('SoftwareFilters')
if not software_filters:
software_filters = 'n/a'
# RECOMMENDED kwargs
manufacturer = sidecar_json.get('Manufacturer', 'n/a')
record_duration = sidecar_json.get('RecordingDuration', 'n/a')
sfreqs.add(str(np.round(sfreq, 2)))
powerlinefreqs.add(str(powerlinefreq))
if manufacturer != 'n/a':
manufacturers.add(manufacturer)
length_recordings.append(record_duration)
# XXX: length summary is only allowed, if no 'n/a' was found
if any([dur == 'n/a' for dur in length_recordings]):
length_recordings = None
template_dict = {
'n_scans': n_scans,
'manufacturer': list(manufacturers),
'sfreq': sfreqs,
'powerlinefreq': powerlinefreqs,
'software_filters': software_filters,
'length_recordings': length_recordings,
}
return template_dict
def _summarize_channels_tsv(root, scans_fpaths):
"""Summarize channels.tsv data in BIDS root directory.
Currently, summarizes all REQUIRED components of channels
data, and some RECOMMENDED and OPTIONAL components.
Parameters
----------
root : path-like
The path of the root of the BIDS compatible folder.
scans_fpaths : list
A list of all *_scans.tsv files in ``root``. The summary
will occur for all scans listed in the *_scans.tsv files.
Returns
-------
template_dict : dict
A dictionary of values for various template strings.
"""
root = Path(root)
# keep track of channel type, status
ch_status_count = {'bad': [], 'good': []}
ch_count = []
# loop through each scan
for scan_fpath in scans_fpaths:
# load in the scans.tsv file
# and read metadata for each scan
scans_tsv = _from_tsv(scan_fpath)
scans = scans_tsv['filename']
for scan in scans:
# summarize metadata of recordings
bids_path, _ = _parse_ext(scan)
datatype = op.dirname(scan)
if datatype not in ['meg', 'eeg', 'ieeg']:
continue
# convert to BIDS Path
if not isinstance(bids_path, BIDSPath):
bids_path = get_bids_path_from_fname(bids_path)
bids_path.root = root
# XXX: improve to allow emptyroom
if bids_path.subject == 'emptyroom':
continue
channels_fname = _find_matching_sidecar(bids_path=bids_path,
suffix='channels',
extension='.tsv')
# summarize channels.tsv
channels_tsv = _from_tsv(channels_fname)
for status in ch_status_count.keys():
ch_status = [ch for ch in channels_tsv['status']
if ch == status]
ch_status_count[status].append(len(ch_status))
ch_count.append(len(channels_tsv['name']))
# create summary template strings for status
template_dict = {
'mean_chs': np.mean(ch_count),
'std_chs': np.std(ch_count),
'mean_good_chs': np.mean(ch_status_count['good']),
'std_good_chs': np.std(ch_status_count['good']),
'mean_bad_chs': np.mean(ch_status_count['bad']),
'std_bad_chs': np.std(ch_status_count['bad']),
}
for key, val in template_dict.items():
template_dict[key] = round(val, 2)
return template_dict
[docs]@verbose
def make_report(root, session=None, verbose=None):
"""Create a methods paragraph string from BIDS dataset.
Summarizes the REQUIRED components in the BIDS specification
and also some RECOMMENDED components. Currently, the methods
paragraph summarize the:
- dataset_description.json file
- (optional) participants.tsv file
- (optional) datatype-agnostic files for (M/I)EEG data,
which reads files from the ``*_scans.tsv`` file.
Parameters
----------
root : path-like
The path of the root of the BIDS compatible folder.
session : str | None
The (optional) session for a item. Corresponds to "ses".
%(verbose)s
Returns
-------
paragraph : str
The paragraph wrapped with 80 characters per line
describing the summary of the subjects.
"""
# high level summary
subjects = get_entity_vals(root, entity_key='subject')
sessions = get_entity_vals(root, entity_key='session')
modalities = get_datatypes(root)
# only summarize allowed modalities (MEG/EEG/iEEG) data
# map them to a pretty looking string
datatype_map = {
'meg': 'MEG',
'eeg': 'EEG',
'ieeg': 'iEEG',
}
modalities = [datatype_map[datatype] for datatype in modalities
if datatype in datatype_map.keys()]
# REQUIRED: dataset_description.json summary
dataset_summary = _summarize_dataset(root)
# RECOMMENDED: participants summary
participant_summary = _summarize_participants_tsv(root)
# RECOMMENDED: scans summary
scans_summary = _summarize_scans(root, session=session)
# turn off 'recommended' report summary
# if files are not available to summarize
if not participant_summary:
participant_template = ''
else:
content = f'{FUNCTION_TEMPLATE}{PARTICIPANTS_TEMPLATE}'
participant_template = Template(content=content)
participant_template = participant_template.substitute(
**participant_summary)
logger.info(f'The participant template found: {participant_template}')
dataset_summary['PARTICIPANTS_TEMPLATE'] = str(participant_template)
if not scans_summary:
datatype_agnostic_template = ''
else:
datatype_agnostic_template = DATATYPE_AGNOSTIC_TEMPLATE
dataset_summary.update({
'system': modalities,
'n_subjects': len(subjects),
'n_sessions': len(sessions),
'sessions': sessions,
})
# XXX: add channel summary for modalities (ieeg, meg, eeg)
# create the content and mne Template
# lower-case templates are "Recommended",
# while upper-case templates are "Required".
content = f'{FUNCTION_TEMPLATE}{BIDS_DATASET_TEMPLATE}' \
f'{datatype_agnostic_template}'
paragraph = Template(content=content)
paragraph = paragraph.substitute(**dataset_summary,
**scans_summary)
# Clean paragraph
paragraph = paragraph.replace('\n', ' ')
paragraph = paragraph.replace(' ', ' ')
return '\n'.join(textwrap.wrap(paragraph, width=80))