Source code for mne_bids.report._report

"""Make BIDS report from dataset and sidecar files."""
# Authors: Adam Li <adam2392@gmail.com>
#
# License: BSD-3-Clause
import json
import os.path as op
import textwrap
from pathlib import Path

import numpy as np
import jinja2
from mne.utils import warn, logger, verbose

from mne_bids.config import DOI, ALLOWED_DATATYPES
from mne_bids.tsv_handler import _from_tsv
from mne_bids.path import (get_bids_path_from_fname, get_datatypes,
                           get_entity_vals, BIDSPath,
                           _parse_ext, _find_matching_sidecar)


jinja_env = jinja2.Environment(
    loader=jinja2.PackageLoader(
        package_name='mne_bids.report',
        package_path='templates'
    )
)


def _pretty_str(listed):
    # make strings a sequence of ',' and 'and'
    if not isinstance(listed, list):
        listed = list(listed)

    if len(listed) <= 1:
        return ','.join(listed)
    return '{}, and {}'.format(', '.join(listed[:-1]), listed[-1])


def _range_str(minval, maxval, meanval, stdval, n_unknown, type):
    if minval == 'n/a':
        return 'ages all unknown'

    if n_unknown > 0:
        unknown_str = f'; {n_unknown} with unknown {type}'
    else:
        unknown_str = ''
    return (
        f'ages ranged from {round(minval, 2)} to {round(maxval, 2)} '
        f'(mean = {round(meanval, 2)}, std = {round(stdval, 2)}{unknown_str})'
    )


def _summarize_participant_hand(hands):
    n_unknown = len([hand for hand in hands if hand == 'n/a'])

    if n_unknown == len(hands):
        return 'handedness were all unknown'

    n_rhand = len([hand for hand in hands if hand.upper() == 'R'])
    n_lhand = len([hand for hand in hands if hand.upper() == 'L'])
    n_ambidex = len([hand for hand in hands if hand.upper() == 'A'])

    return f'comprised of {n_rhand} right hand, {n_lhand} left hand ' \
           f'and {n_ambidex} ambidextrous'


def _summarize_participant_sex(sexs):
    n_unknown = len([sex for sex in sexs if sex == 'n/a'])

    if n_unknown == len(sexs):
        return 'sex were all unknown'

    n_males = len([sex for sex in sexs if sex.upper() == 'M'])
    n_females = len([sex for sex in sexs if sex.upper() == 'F'])

    return f'comprised of {n_males} male and {n_females} female participants'


def _length_recording_str(length_recordings):
    import numpy as np
    if length_recordings is None:
        return ''

    min_record_length = round(np.min(length_recordings), 2)
    max_record_length = round(np.max(length_recordings), 2)
    mean_record_length = round(np.mean(length_recordings), 2)
    std_record_length = round(np.std(length_recordings), 2)
    total_record_length = round(sum(length_recordings), 2)

    return (
        f'Recording durations ranged from {min_record_length} to '
        f'{max_record_length} seconds '
        f'(mean = {mean_record_length}, std = {std_record_length}), '
        f'for a total of {total_record_length} seconds of data recorded '
        f'over all scans.'
    )


def _summarize_software_filters(software_filters):
    if software_filters in [{}, 'n/a']:
        return ''

    msg = ''
    for key, value in software_filters.items():
        msg += f'{key}'

        if isinstance(value, dict) and value:
            parameters = []
            for param_name, param_value in value.items():
                if param_name and param_value:
                    parameters.append(f'{param_value} {param_name}')
            if parameters:
                msg += ' with parameters '
                msg += ', '.join(parameters)
    return msg


def _pretty_dict(template_dict):
    """Remove problematic blank spaces."""
    for key, val in template_dict.items():
        if val == ' ':
            template_dict[key] = 'n/a'


def _summarize_dataset(root):
    """Summarize the dataset_desecription.json file.

    Required dataset descriptors include:
        - Name
        - BIDSVersion

    Added descriptors include:
        - Authors
        - DOI

    Parameters
    ----------
    root : path-like
        The path of the root of the BIDS compatible folder.

    Returns
    -------
    template_dict : dict
        A dictionary of values for various template strings.
    """
    dataset_descrip_fpath = op.join(root,
                                    'dataset_description.json')
    if not op.exists(dataset_descrip_fpath):
        return dict()

    # read file and 'REQUIRED' components of it
    with open(dataset_descrip_fpath, 'r', encoding='utf-8-sig') as fin:
        dataset_description = json.load(fin)

    # create dictionary to pass into template string
    name = dataset_description['Name']
    bids_version = dataset_description['BIDSVersion']
    authors = dataset_description['Authors']
    template_dict = {
        'name': name,
        'bids_version': bids_version,
        'mne_bids_doi': DOI,
        'authors': _pretty_str(authors),
    }
    _pretty_dict(template_dict)
    return template_dict


def _summarize_participants_tsv(root):
    """Summarize `participants.tsv` file in BIDS root directory.

    Parameters
    ----------
    root : path-like
        The path of the root of the BIDS compatible folder.

    Returns
    -------
    template_dict : dict
        A dictionary of values for various template strings.
    """
    participants_tsv_fpath = op.join(root, 'participants.tsv')
    if not op.exists(participants_tsv_fpath):
        return dict()

    participants_tsv = _from_tsv(str(participants_tsv_fpath))
    p_ids = participants_tsv['participant_id']
    logger.info(f'Summarizing participants.tsv {participants_tsv_fpath}...')

    # summarize sex count statistics
    keys = ['M', 'F', 'n/a']
    p_sex = participants_tsv.get('sex')
    # phrasing works for both sex and gender
    p_gender = participants_tsv.get('gender')
    sexs = ['n/a']
    if p_sex or p_gender:
        # only summarize sex if it conforms to `keys` referenced above
        p_sex = p_gender if p_sex is None else p_sex
        if all([sex.upper() in keys
                for sex in p_sex if sex != 'n/a']):
            sexs = p_sex

    # summarize hand count statistics
    keys = ['R', 'L', 'A', 'n/a']
    p_hands = participants_tsv.get('hand')
    hands = ['n/a']
    if p_hands:
        # only summarize handedness if it conforms to
        # mne-bids handedness
        if all([hand.upper() in keys
                for hand in p_hands if hand != 'n/a']):
            hands = p_hands

    # summarize age statistics: mean, std, min, max
    p_ages = participants_tsv.get('age')
    min_age, max_age = 'n/a', 'n/a'
    mean_age, std_age = 'n/a', 'n/a'
    n_age_unknown = len(p_ages) if p_ages else len(p_ids)
    if p_ages:
        # only summarize age if they are numerics
        if all([age.isnumeric() for age in p_ages if age != 'n/a']):
            age_list = [float(age) for age in p_ages if age != 'n/a']
            n_age_unknown = len(p_ids) - len(age_list)
            if age_list:
                min_age, max_age = np.min(age_list), np.max(age_list)
                mean_age, std_age = np.mean(age_list), np.std(age_list)

    template_dict = {
        'sexs': _summarize_participant_sex(sexs),
        'hands': _summarize_participant_hand(hands),
        'ages': _range_str(
            min_age, max_age, mean_age, std_age, n_age_unknown,
            'age'
        )
    }
    return template_dict


def _summarize_scans(root, session=None):
    """Summarize scans in BIDS root directory.

    Summarizes scans only if there is a *_scans.tsv file.

    Parameters
    ----------
    root : path-like
        The path of the root of the BIDS compatible folder.
    session : str, optional
        The session for a item. Corresponds to "ses".

    Returns
    -------
    template_dict : dict
        A dictionary of values for various template strings.

    """
    root = Path(root)
    if session is None:
        search_str = '*_scans.tsv'
    else:
        search_str = f'*ses-{session}' \
                     f'*_scans.tsv'
    scans_fpaths = list(root.rglob(search_str))
    if len(scans_fpaths) == 0:
        warn('No *scans.tsv files found. Currently, '
             'we do not generate a report without the scans.tsv files.')
        return dict()

    logger.info(f'Summarizing scans.tsv files {scans_fpaths}...')

    # summarize sidecar.json, channels.tsv template
    sidecar_dict = _summarize_sidecar_json(root, scans_fpaths)
    channels_dict = _summarize_channels_tsv(root, scans_fpaths)
    template_dict = dict()
    template_dict.update(**sidecar_dict)
    template_dict.update(**channels_dict)

    return template_dict


def _summarize_sidecar_json(root, scans_fpaths):
    """Summarize scans in BIDS root directory.

    Parameters
    ----------
    root : path-like
        The path of the root of the BIDS compatible folder.
    scans_fpaths : list
        A list of all *_scans.tsv files in ``root``. The summary
        will occur for all scans listed in the *_scans.tsv files.

    Returns
    -------
    template_dict : dict
        A dictionary of values for various template strings.

    """
    n_scans = 0
    powerlinefreqs, sfreqs = set(), set()
    manufacturers = set()
    length_recordings = []

    # loop through each scan
    for scan_fpath in scans_fpaths:
        # load in the scans.tsv file
        # and read metadata for each scan
        scans_tsv = _from_tsv(scan_fpath)
        scans = scans_tsv['filename']
        for scan in scans:
            # summarize metadata of recordings
            bids_path, ext = _parse_ext(scan)
            datatype = op.dirname(scan)
            if datatype not in ALLOWED_DATATYPES:
                continue

            n_scans += 1

            # convert to BIDS Path
            if not isinstance(bids_path, BIDSPath):
                bids_path = get_bids_path_from_fname(bids_path)
            bids_path.root = root

            # XXX: improve to allow emptyroom
            if bids_path.subject == 'emptyroom':
                continue

            sidecar_fname = _find_matching_sidecar(bids_path=bids_path,
                                                   suffix=datatype,
                                                   extension='.json')
            with open(sidecar_fname, 'r', encoding='utf-8-sig') as fin:
                sidecar_json = json.load(fin)

            # aggregate metadata from each scan
            # REQUIRED kwargs
            sfreq = sidecar_json['SamplingFrequency']
            powerlinefreq = str(sidecar_json['PowerLineFrequency'])
            software_filters = sidecar_json.get('SoftwareFilters')
            if not software_filters:
                software_filters = 'n/a'

            # RECOMMENDED kwargs
            manufacturer = sidecar_json.get('Manufacturer', 'n/a')
            record_duration = sidecar_json.get('RecordingDuration', 'n/a')

            sfreqs.add(str(np.round(sfreq, 2)))
            powerlinefreqs.add(str(powerlinefreq))
            if manufacturer != 'n/a':
                manufacturers.add(manufacturer)
            length_recordings.append(record_duration)

    # XXX: length summary is only allowed, if no 'n/a' was found
    if any([dur == 'n/a' for dur in length_recordings]):
        length_recordings = None

    template_dict = {
        'n_scans': n_scans,
        'manufacturer': _pretty_str(manufacturers),
        'sfreq': _pretty_str(sfreqs),
        'powerlinefreq': _pretty_str(powerlinefreqs),
        'software_filters': _summarize_software_filters(software_filters),
        'length_recordings': _length_recording_str(length_recordings),
    }
    return template_dict


def _summarize_channels_tsv(root, scans_fpaths):
    """Summarize channels.tsv data in BIDS root directory.

    Currently, summarizes all REQUIRED components of channels
    data, and some RECOMMENDED and OPTIONAL components.

    Parameters
    ----------
    root : path-like
        The path of the root of the BIDS compatible folder.
    scans_fpaths : list
        A list of all *_scans.tsv files in ``root``. The summary
        will occur for all scans listed in the *_scans.tsv files.

    Returns
    -------
    template_dict : dict
        A dictionary of values for various template strings.
    """
    root = Path(root)

    # keep track of channel type, status
    ch_status_count = {'bad': [], 'good': []}
    ch_count = []

    # loop through each scan
    for scan_fpath in scans_fpaths:
        # load in the scans.tsv file
        # and read metadata for each scan
        scans_tsv = _from_tsv(scan_fpath)
        scans = scans_tsv['filename']
        for scan in scans:
            # summarize metadata of recordings
            bids_path, _ = _parse_ext(scan)
            datatype = op.dirname(scan)
            if datatype not in ['meg', 'eeg', 'ieeg']:
                continue

            # convert to BIDS Path
            if not isinstance(bids_path, BIDSPath):
                bids_path = get_bids_path_from_fname(bids_path)
            bids_path.root = root

            # XXX: improve to allow emptyroom
            if bids_path.subject == 'emptyroom':
                continue

            channels_fname = _find_matching_sidecar(bids_path=bids_path,
                                                    suffix='channels',
                                                    extension='.tsv')

            # summarize channels.tsv
            channels_tsv = _from_tsv(channels_fname)
            for status in ch_status_count.keys():
                ch_status = [ch for ch in channels_tsv['status']
                             if ch == status]
                ch_status_count[status].append(len(ch_status))
            ch_count.append(len(channels_tsv['name']))

    # create summary template strings for status
    template_dict = {
        'mean_chs': np.mean(ch_count),
        'std_chs': np.std(ch_count),
        'mean_good_chs': np.mean(ch_status_count['good']),
        'std_good_chs': np.std(ch_status_count['good']),
        'mean_bad_chs': np.mean(ch_status_count['bad']),
        'std_bad_chs': np.std(ch_status_count['bad']),
    }
    for key, val in template_dict.items():
        template_dict[key] = round(val, 2)
    return template_dict


[docs]@verbose def make_report(root, session=None, verbose=None): """Create a methods paragraph string from BIDS dataset. Summarizes the REQUIRED components in the BIDS specification and also some RECOMMENDED components. Currently, the methods paragraph summarize the: - dataset_description.json file - (optional) participants.tsv file - (optional) datatype-agnostic files for (M/I)EEG data, which reads files from the ``*_scans.tsv`` file. Parameters ---------- root : path-like The path of the root of the BIDS compatible folder. session : str | None The (optional) session for a item. Corresponds to "ses". %(verbose)s Returns ------- paragraph : str The paragraph wrapped with 80 characters per line describing the summary of the subjects. """ # high level summary subjects = get_entity_vals(root, entity_key='subject') sessions = get_entity_vals(root, entity_key='session') modalities = get_datatypes(root) # only summarize allowed modalities (MEG/EEG/iEEG) data # map them to a pretty looking string datatype_map = { 'meg': 'MEG', 'eeg': 'EEG', 'ieeg': 'iEEG', } modalities = [datatype_map[datatype] for datatype in modalities if datatype in datatype_map.keys()] # REQUIRED: dataset_description.json summary dataset_summary = _summarize_dataset(root) # RECOMMENDED: participants summary participant_summary = _summarize_participants_tsv(root) # RECOMMENDED: scans summary scans_summary = _summarize_scans(root, session=session) dataset_agnostic_summary = scans_summary.copy() dataset_agnostic_summary['system'] = _pretty_str(modalities) # turn off 'recommended' report summary # if files are not available to summarize if not participant_summary: participants_info = '' else: particpants_info_template = jinja_env.get_template( 'participants.jinja' ) participants_info = particpants_info_template.render( **participant_summary ) logger.info(f'The participant template found: {participants_info}') if not scans_summary: datatype_agnostic_info = '' else: datatype_agnostic_template = jinja_env.get_template( 'datatype_agnostic.jinja' ) datatype_agnostic_info = datatype_agnostic_template.render( **dataset_agnostic_summary ) dataset_summary.update({ 'n_subjects': len(subjects), 'participants_info': participants_info, 'n_sessions': len(sessions), 'sessions': _pretty_str(sessions), }) # XXX: add channel summary for modalities (ieeg, meg, eeg) # create the content and mne Template # lower-case templates are "Recommended", # while upper-case templates are "Required". dataset_summary_template = jinja_env.get_template('dataset_summary.jinja') dataset_summary_info = dataset_summary_template.render(**dataset_summary) # Concatenate info and clean the paragraph paragraph = f'{dataset_summary_info}\n{datatype_agnostic_info}' paragraph = paragraph.replace('\n', ' ') while ' ' in paragraph: paragraph = paragraph.replace(' ', ' ') return '\n'.join(textwrap.wrap(paragraph, width=80))