Source code for mne_bids.report._report

"""Make BIDS report from dataset and sidecar files."""

# Authors: The MNE-BIDS developers
# SPDX-License-Identifier: BSD-3-Clause

import json
import textwrap
from pathlib import Path

import jinja2
import numpy as np
from mne.utils import logger, verbose

from mne_bids.config import ALLOWED_DATATYPES, DOI
from mne_bids.path import (
    BIDSPath,
    _find_matching_sidecar,
    _parse_ext,
    get_bids_path_from_fname,
    get_datatypes,
    get_entity_vals,
)
from mne_bids.tsv_handler import _from_tsv
from mne_bids.utils import warn

jinja_env = jinja2.Environment(
    loader=jinja2.PackageLoader(
        package_name="mne_bids.report", package_path="templates"
    )
)


def _pretty_str(listed):
    # make strings a sequence of ',' and 'and'
    if not isinstance(listed, list):
        listed = list(listed)

    if len(listed) <= 1:
        return ",".join(listed)
    return "{}, and {}".format(", ".join(listed[:-1]), listed[-1])


def _range_str(minval, maxval, meanval, stdval, n_unknown, typ):
    if minval == "n/a":
        return "ages all unknown"

    if n_unknown > 0:
        unknown_str = f"; {n_unknown} with unknown {typ}"
    else:
        unknown_str = ""
    return (
        f"ages ranged from {round(minval, 2)} to {round(maxval, 2)} "
        f"(mean = {round(meanval, 2)}, std = {round(stdval, 2)}{unknown_str})"
    )


def _summarize_participant_hand(hands):
    n_unknown = len([hand for hand in hands if hand == "n/a"])

    if n_unknown == len(hands):
        return "handedness were all unknown"

    n_rhand = len([hand for hand in hands if hand.upper() == "R"])
    n_lhand = len([hand for hand in hands if hand.upper() == "L"])
    n_ambidex = len([hand for hand in hands if hand.upper() == "A"])

    return (
        f"comprised of {n_rhand} right hand, {n_lhand} left hand "
        f"and {n_ambidex} ambidextrous"
    )


def _summarize_participant_sex(sexs):
    n_unknown = len([sex for sex in sexs if sex == "n/a"])

    if n_unknown == len(sexs):
        return "sex were all unknown"

    n_males = len([sex for sex in sexs if sex.upper() == "M"])
    n_females = len([sex for sex in sexs if sex.upper() == "F"])

    return f"comprised of {n_males} male and {n_females} female participants"


def _length_recording_str(length_recordings):
    import numpy as np

    if length_recordings is None:
        return ""

    min_record_length = round(np.min(length_recordings), 2)
    max_record_length = round(np.max(length_recordings), 2)
    mean_record_length = round(np.mean(length_recordings), 2)
    std_record_length = round(np.std(length_recordings), 2)
    total_record_length = round(sum(length_recordings), 2)

    return (
        f"Recording durations ranged from {min_record_length} to "
        f"{max_record_length} seconds "
        f"(mean = {mean_record_length}, std = {std_record_length}), "
        f"for a total of {total_record_length} seconds of data recorded "
        f"over all scans."
    )


def _summarize_software_filters(software_filters):
    if software_filters in [{}, "n/a"]:
        return ""

    msg = ""
    for key, value in software_filters.items():
        msg += f"{key}"

        if isinstance(value, dict) and value:
            parameters = []
            for param_name, param_value in value.items():
                if param_name and param_value:
                    parameters.append(f"{param_value} {param_name}")
            if parameters:
                msg += " with parameters "
                msg += ", ".join(parameters)
    return msg


def _pretty_dict(template_dict):
    """Remove problematic blank spaces."""
    for key, val in template_dict.items():
        if val == " ":
            template_dict[key] = "n/a"


def _summarize_dataset(root):
    """Summarize the dataset_desecription.json file.

    Required dataset descriptors include:
        - Name
        - BIDSVersion

    Added descriptors include:
        - Authors
        - DOI

    Parameters
    ----------
    root : path-like
        The path of the root of the BIDS compatible folder.

    Returns
    -------
    template_dict : dict
        A dictionary of values for various template strings.
    """
    dataset_descrip_fpath = root / "dataset_description.json"
    if not dataset_descrip_fpath.exists():
        return dict()

    # read file and 'REQUIRED' components of it
    with open(dataset_descrip_fpath, encoding="utf-8-sig") as fin:
        dataset_description = json.load(fin)

    # create dictionary to pass into template string
    name = dataset_description["Name"]
    bids_version = dataset_description["BIDSVersion"]
    authors = dataset_description["Authors"]
    template_dict = {
        "name": name,
        "bids_version": bids_version,
        "mne_bids_doi": DOI,
        "authors": _pretty_str(authors),
    }
    _pretty_dict(template_dict)
    return template_dict


def _summarize_participants_tsv(root):
    """Summarize `participants.tsv` file in BIDS root directory.

    Parameters
    ----------
    root : path-like
        The path of the root of the BIDS compatible folder.

    Returns
    -------
    template_dict : dict
        A dictionary of values for various template strings.
    """
    participants_tsv_fpath = root / "participants.tsv"
    if not participants_tsv_fpath.exists():
        return dict()

    participants_tsv = _from_tsv(str(participants_tsv_fpath))
    p_ids = participants_tsv["participant_id"]
    logger.info(f"Summarizing participants.tsv {participants_tsv_fpath}...")

    # summarize sex count statistics
    keys = ["M", "F", "n/a"]
    p_sex = participants_tsv.get("sex")
    # phrasing works for both sex and gender
    p_gender = participants_tsv.get("gender")
    sexs = ["n/a"]
    if p_sex or p_gender:
        # only summarize sex if it conforms to `keys` referenced above
        p_sex = p_gender if p_sex is None else p_sex
        if all([sex.upper() in keys for sex in p_sex if sex != "n/a"]):
            sexs = p_sex

    # summarize hand count statistics
    keys = ["R", "L", "A", "n/a"]
    p_hands = participants_tsv.get("hand")
    hands = ["n/a"]
    if p_hands:
        # only summarize handedness if it conforms to
        # mne-bids handedness
        if all([hand.upper() in keys for hand in p_hands if hand != "n/a"]):
            hands = p_hands

    # summarize age statistics: mean, std, min, max
    p_ages = participants_tsv.get("age")
    min_age, max_age = "n/a", "n/a"
    mean_age, std_age = "n/a", "n/a"
    n_age_unknown = len(p_ages) if p_ages else len(p_ids)
    if p_ages:
        # only summarize age if they are numerics
        if all([age.isnumeric() for age in p_ages if age != "n/a"]):
            age_list = [float(age) for age in p_ages if age != "n/a"]
            n_age_unknown = len(p_ids) - len(age_list)
            if age_list:
                min_age, max_age = np.min(age_list), np.max(age_list)
                mean_age, std_age = np.mean(age_list), np.std(age_list)

    template_dict = {
        "sexs": _summarize_participant_sex(sexs),
        "hands": _summarize_participant_hand(hands),
        "ages": _range_str(min_age, max_age, mean_age, std_age, n_age_unknown, "age"),
    }
    return template_dict


def _summarize_scans(root, session=None):
    """Summarize scans in BIDS root directory.

    Summarizes scans only if there is a *_scans.tsv file.

    Parameters
    ----------
    root : path-like
        The path of the root of the BIDS compatible folder.
    session : str, optional
        The session for a item. Corresponds to "ses".

    Returns
    -------
    template_dict : dict
        A dictionary of values for various template strings.

    """
    root = Path(root)
    if session is None:
        search_str = "*_scans.tsv"
    else:
        search_str = f"*ses-{session}*_scans.tsv"
    scans_fpaths = list(root.rglob(search_str))
    if len(scans_fpaths) == 0:
        warn(
            "No *scans.tsv files found. Currently, "
            "we do not generate a report without the scans.tsv files."
        )
        return dict()

    logger.info(f"Summarizing scans.tsv files {scans_fpaths}...")

    # summarize sidecar.json, channels.tsv template
    sidecar_dict = _summarize_sidecar_json(root, scans_fpaths)
    channels_dict = _summarize_channels_tsv(root, scans_fpaths)
    template_dict = dict()
    template_dict.update(**sidecar_dict)
    template_dict.update(**channels_dict)

    return template_dict


def _summarize_sidecar_json(root, scans_fpaths):
    """Summarize scans in BIDS root directory.

    Parameters
    ----------
    root : path-like
        The path of the root of the BIDS compatible folder.
    scans_fpaths : list
        A list of all *_scans.tsv files in ``root``. The summary
        will occur for all scans listed in the *_scans.tsv files.

    Returns
    -------
    template_dict : dict
        A dictionary of values for various template strings.

    """
    n_scans = 0
    powerlinefreqs, sfreqs = set(), set()
    manufacturers = set()
    length_recordings = []

    # loop through each scan
    for scan_fpath in scans_fpaths:
        # load in the scans.tsv file
        # and read metadata for each scan
        scans_tsv = _from_tsv(scan_fpath)
        scans = scans_tsv["filename"]
        for scan in scans:
            # summarize metadata of recordings
            bids_path, ext = _parse_ext(scan)
            datatype = str(Path(scan).parent)
            if datatype not in ALLOWED_DATATYPES:
                continue

            n_scans += 1

            # convert to BIDSPath
            if not isinstance(bids_path, BIDSPath):
                bids_path = get_bids_path_from_fname(bids_path)
            bids_path.root = root

            # XXX: improve to allow emptyroom
            if bids_path.subject == "emptyroom":
                continue

            sidecar_fname = _find_matching_sidecar(
                bids_path=bids_path, suffix=datatype, extension=".json"
            )
            with open(sidecar_fname, encoding="utf-8-sig") as fin:
                sidecar_json = json.load(fin)

            # aggregate metadata from each scan
            # REQUIRED kwargs
            sfreq = sidecar_json["SamplingFrequency"]
            powerlinefreq = str(sidecar_json["PowerLineFrequency"])
            software_filters = sidecar_json.get("SoftwareFilters")
            if not software_filters:
                software_filters = "n/a"

            # RECOMMENDED kwargs
            manufacturer = sidecar_json.get("Manufacturer", "n/a")
            record_duration = sidecar_json.get("RecordingDuration", "n/a")

            sfreqs.add(str(np.round(sfreq, 2)))
            powerlinefreqs.add(str(powerlinefreq))
            if manufacturer != "n/a":
                manufacturers.add(manufacturer)
            length_recordings.append(record_duration)

    # XXX: length summary is only allowed, if no 'n/a' was found
    if any([dur == "n/a" for dur in length_recordings]):
        length_recordings = None

    template_dict = {
        "n_scans": n_scans,
        "manufacturer": _pretty_str(manufacturers),
        "sfreq": _pretty_str(sfreqs),
        "powerlinefreq": _pretty_str(powerlinefreqs),
        "software_filters": _summarize_software_filters(software_filters),
        "length_recordings": _length_recording_str(length_recordings),
    }
    return template_dict


def _summarize_channels_tsv(root, scans_fpaths):
    """Summarize channels.tsv data in BIDS root directory.

    Currently, summarizes all REQUIRED components of channels
    data, and some RECOMMENDED and OPTIONAL components.

    Parameters
    ----------
    root : path-like
        The path of the root of the BIDS compatible folder.
    scans_fpaths : list
        A list of all *_scans.tsv files in ``root``. The summary
        will occur for all scans listed in the *_scans.tsv files.

    Returns
    -------
    template_dict : dict
        A dictionary of values for various template strings.
    """
    root = Path(root)

    # keep track of channel type, status
    ch_status_count = {"bad": [], "good": []}
    ch_count = []

    # loop through each scan
    for scan_fpath in scans_fpaths:
        # load in the scans.tsv file
        # and read metadata for each scan
        scans_tsv = _from_tsv(scan_fpath)
        scans = scans_tsv["filename"]
        for scan in scans:
            # summarize metadata of recordings
            bids_path, _ = _parse_ext(scan)
            datatype = str(Path(scan).parent)
            if datatype not in ["meg", "eeg", "ieeg"]:
                continue

            # convert to BIDSPath
            if not isinstance(bids_path, BIDSPath):
                bids_path = get_bids_path_from_fname(bids_path)
            bids_path.root = root

            # XXX: improve to allow emptyroom
            if bids_path.subject == "emptyroom":
                continue

            channels_fname = _find_matching_sidecar(
                bids_path=bids_path, suffix="channels", extension=".tsv"
            )

            # summarize channels.tsv
            channels_tsv = _from_tsv(channels_fname)
            for status in ch_status_count.keys():
                ch_status = [ch for ch in channels_tsv["status"] if ch == status]
                ch_status_count[status].append(len(ch_status))
            ch_count.append(len(channels_tsv["name"]))

    # create summary template strings for status
    template_dict = {
        "mean_chs": np.mean(ch_count),
        "std_chs": np.std(ch_count),
        "mean_good_chs": np.mean(ch_status_count["good"]),
        "std_good_chs": np.std(ch_status_count["good"]),
        "mean_bad_chs": np.mean(ch_status_count["bad"]),
        "std_bad_chs": np.std(ch_status_count["bad"]),
    }
    for key, val in template_dict.items():
        template_dict[key] = round(val, 2)
    return template_dict



[docs]
@verbose
def make_report(root, session=None, verbose=None):
    """Create a methods paragraph string from BIDS dataset.

    Summarizes the REQUIRED components in the BIDS specification
    and also some RECOMMENDED components. Currently, the methods
    paragraph summarize the:

      - dataset_description.json file
      - (optional) participants.tsv file
      - (optional) datatype-agnostic files for (M/I)EEG data,
        which reads files from the ``*_scans.tsv`` file.

    Parameters
    ----------
    root : path-like
        The path of the root of the BIDS compatible folder.
    session : str | None
            The (optional) session for a item. Corresponds to "ses".
    %(verbose)s

    Returns
    -------
    paragraph : str
        The paragraph wrapped with 80 characters per line
        describing the summary of the subjects.
    """
    root = Path(root)

    # high level summary
    subjects = get_entity_vals(root, entity_key="subject")
    sessions = get_entity_vals(root, entity_key="session")
    modalities = get_datatypes(root)

    # only summarize allowed modalities (MEG/EEG/iEEG) data
    # map them to a pretty looking string
    datatype_map = {
        "meg": "MEG",
        "eeg": "EEG",
        "ieeg": "iEEG",
    }
    modalities = [
        datatype_map[datatype]
        for datatype in modalities
        if datatype in datatype_map.keys()
    ]

    # REQUIRED: dataset_description.json summary
    dataset_summary = _summarize_dataset(root)

    # RECOMMENDED: participants summary
    participant_summary = _summarize_participants_tsv(root)

    # RECOMMENDED: scans summary
    scans_summary = _summarize_scans(root, session=session)

    dataset_agnostic_summary = scans_summary.copy()
    dataset_agnostic_summary["system"] = _pretty_str(modalities)

    # turn off 'recommended' report summary
    # if files are not available to summarize
    if not participant_summary:
        participants_info = ""
    else:
        particpants_info_template = jinja_env.get_template("participants.jinja")
        participants_info = particpants_info_template.render(**participant_summary)
        logger.info(f"The participant template found: {participants_info}")

    if not scans_summary:
        datatype_agnostic_info = ""
    else:
        datatype_agnostic_template = jinja_env.get_template("datatype_agnostic.jinja")
        datatype_agnostic_info = datatype_agnostic_template.render(
            **dataset_agnostic_summary
        )

    dataset_summary.update(
        {
            "n_subjects": len(subjects),
            "participants_info": participants_info,
            "n_sessions": len(sessions),
            "sessions": _pretty_str(sessions),
        }
    )

    # XXX: add channel summary for modalities (ieeg, meg, eeg)
    # create the content and mne Template
    # lower-case templates are "Recommended",
    # while upper-case templates are "Required".

    dataset_summary_template = jinja_env.get_template("dataset_summary.jinja")
    dataset_summary_info = dataset_summary_template.render(**dataset_summary)

    # Concatenate info and clean the paragraph
    paragraph = f"{dataset_summary_info}\n{datatype_agnostic_info}"
    paragraph = paragraph.replace("\n", " ")
    while "  " in paragraph:
        paragraph = paragraph.replace("  ", " ")

    return "\n".join(textwrap.wrap(paragraph, width=80))