Source code for mne_bids.copyfiles

"""Utility functions to copy raw data files.

When writing BIDS datasets, we often move and/or rename raw data files. several
original data formats have properties that restrict such operations. That is,
moving/renaming raw data files naively might lead to broken files, for example
due to internal pointers that are not being updated.

"""
# Authors: Mainak Jas <mainak.jas@telecom-paristech.fr>
#          Alexandre Gramfort <alexandre.gramfort@telecom-paristech.fr>
#          Teon Brooks <teon.brooks@gmail.com>
#          Chris Holdgraf <choldgraf@berkeley.edu>
#          Stefan Appelhoff <stefan.appelhoff@mailbox.org>
#          Matt Sanderson <matt.sanderson@mq.edu.au>
#
# License: BSD (3-clause)
import os
import os.path as op
import re
import shutil as sh

from scipy.io import loadmat, savemat

from mne_bids.read import _parse_ext
from mne_bids.utils import _get_mrk_meas_date


def _copytree(src, dst, **kwargs):
    """See: https://github.com/jupyterlab/jupyterlab/pull/5150."""
    try:
        sh.copytree(src, dst, **kwargs)
    except sh.Error as error:
        # `copytree` throws an error if copying to + from NFS even though
        # the copy is successful (see https://bugs.python.org/issue24564)
        if '[Errno 22]' not in str(error) or not op.exists(dst):
            raise


def _get_brainvision_encoding(vhdr_file, verbose=False):
    """Get the encoding of .vhdr and .vmrk files.

    Parameters
    ----------
    vhdr_file : str
        Path to the header file.
    verbose : Bool
        Determine whether results should be logged (default False).

    Returns
    -------
    enc : str
        Encoding of the .vhdr file to pass it on to open() function
        either 'UTF-8' (default) or whatever encoding scheme is specified
        in the header.

    """
    with open(vhdr_file, 'rb') as ef:
        enc = ef.read()
        if enc.find(b'Codepage=') != -1:
            enc = enc[enc.find(b'Codepage=') + 9:]
            enc = enc.split()[0]
            enc = enc.decode()
            src = '(read from header)'
        else:
            enc = 'UTF-8'
            src = '(default)'
        if verbose is True:
            print('Detected file encoding: %s %s.' % (enc, src))
    return enc


def _get_brainvision_paths(vhdr_path):
    """Get the .eeg and .vmrk file paths from a BrainVision header file.

    Parameters
    ----------
    vhdr_path : str
        Path to the header file.

    Returns
    -------
    paths : tuple
        Paths to the .eeg file at index 0 and the .vmrk file at index 1 of
        the returned tuple.

    """
    fname, ext = _parse_ext(vhdr_path)
    if ext != '.vhdr':
        raise ValueError('Expecting file ending in ".vhdr",'
                         ' but got {}'.format(ext))

    # Header file seems fine
    # extract encoding from brainvision header file, or default to utf-8
    enc = _get_brainvision_encoding(vhdr_path)

    # ..and read it
    with open(vhdr_path, 'r', encoding=enc) as f:
        lines = f.readlines()

    # Try to find data file .eeg
    eeg_file_match = re.search(r'DataFile=(.*\.eeg)', ' '.join(lines))
    if not eeg_file_match:
        raise ValueError('Could not find a .eeg file link in'
                         ' {}'.format(vhdr_path))
    else:
        eeg_file = eeg_file_match.groups()[0]

    # Try to find marker file .vmrk
    vmrk_file_match = re.search(r'MarkerFile=(.*\.vmrk)', ' '.join(lines))
    if not vmrk_file_match:
        raise ValueError('Could not find a .vmrk file link in'
                         ' {}'.format(vhdr_path))
    else:
        vmrk_file = vmrk_file_match.groups()[0]

    # Make sure we are dealing with file names as is customary, not paths
    # Paths are problematic when copying the files to another system. Instead,
    # always use the file name and keep the file triplet in the same directory
    assert os.sep not in eeg_file
    assert os.sep not in vmrk_file

    # Assert the paths exist
    head, tail = op.split(vhdr_path)
    eeg_file_path = op.join(head, eeg_file)
    vmrk_file_path = op.join(head, vmrk_file)
    assert op.exists(eeg_file_path)
    assert op.exists(vmrk_file_path)

    # Return the paths
    return (eeg_file_path, vmrk_file_path)


[docs]def copyfile_ctf(src, dest): """Copy and rename CTF files to a new location. Parameters ---------- src : str Path to the source raw .ds folder. dest : str Path to the destination of the new bids folder. """ _copytree(src, dest) # list of file types to rename file_types = ('.acq', '.eeg', '.hc', '.hist', '.infods', '.bak', '.meg4', '.newds', '.res4') # Rename files in dest with the name of the dest directory fnames = [f for f in os.listdir(dest) if f.endswith(file_types)] bids_folder_name = op.splitext(op.split(dest)[-1])[0] for fname in fnames: ext = op.splitext(fname)[-1] os.rename(op.join(dest, fname), op.join(dest, bids_folder_name + ext))
[docs]def copyfile_kit(src, dest, subject_id, session_id, task, run, _init_kwargs): """Copy and rename KIT files to a new location. Parameters ---------- src : str Path to the source raw .con or .sqd folder. dest : str Path to the destination of the new bids folder. subject_id : str | None The subject ID. Corresponds to "sub". session_id : str | None The session identifier. Corresponds to "ses". task : str | None The task identifier. Corresponds to "task". run : int | None The run number. Corresponds to "run". _init_kwargs : dict Extract information of marker and headpoints """ from mne_bids.write import make_bids_basename # KIT data requires the marker file to be copied over too sh.copyfile(src, dest) data_path = op.split(dest)[0] if 'mrk' in _init_kwargs: hpi = _init_kwargs['mrk'] acq_map = dict() if isinstance(hpi, list): if _get_mrk_meas_date(hpi[0]) > _get_mrk_meas_date(hpi[1]): raise ValueError('Markers provided in incorrect order.') _, marker_ext = _parse_ext(hpi[0]) acq_map = dict(zip(['pre', 'post'], hpi)) else: _, marker_ext = _parse_ext(hpi) acq_map[None] = hpi for key, value in acq_map.items(): marker_fname = make_bids_basename( subject=subject_id, session=session_id, task=task, run=run, acquisition=key, suffix='markers%s' % marker_ext, prefix=data_path) sh.copyfile(value, marker_fname) for acq in ['elp', 'hsp']: if acq in _init_kwargs: position_file = _init_kwargs[acq] task, run, acq = None, None, acq.upper() position_ext = '.pos' position_fname = make_bids_basename( subject=subject_id, session=session_id, task=task, run=run, acquisition=acq, suffix='headshape%s' % position_ext, prefix=data_path) sh.copyfile(position_file, position_fname)
[docs]def copyfile_brainvision(vhdr_src, vhdr_dest, verbose=False): """Copy a BrainVision file triplet to a new location and repair links. The BrainVision file format consists of three files: .vhdr, .eeg, and .vmrk The .eeg and .vmrk files associated with the .vhdr file will be given names as in `vhdr_dest` with adjusted extensions. Internal file pointers will be fixed. Parameters ---------- vhdr_src : str The src path of the .vhdr file to be copied. vhdr_dest : str The destination path of the .vhdr file. """ # Get extenstion of the brainvision file fname_src, ext_src = _parse_ext(vhdr_src) fname_dest, ext_dest = _parse_ext(vhdr_dest) if ext_src != ext_dest: raise ValueError('Need to move data with same extension' ' but got "{}", "{}"'.format(ext_src, ext_dest)) eeg_file_path, vmrk_file_path = _get_brainvision_paths(vhdr_src) # extract encoding from brainvision header file, or default to utf-8 enc = _get_brainvision_encoding(vhdr_src, verbose) # Copy data .eeg ... no links to repair sh.copyfile(eeg_file_path, fname_dest + '.eeg') # Write new header and marker files, fixing the file pointer links # For that, we need to replace an old "basename" with a new one # assuming that all .eeg, .vhdr, .vmrk share one basename __, basename_src = op.split(fname_src) assert basename_src + '.eeg' == op.split(eeg_file_path)[-1] assert basename_src + '.vmrk' == op.split(vmrk_file_path)[-1] __, basename_dest = op.split(fname_dest) search_lines = ['DataFile=' + basename_src + '.eeg', 'MarkerFile=' + basename_src + '.vmrk'] with open(vhdr_src, 'r', encoding=enc) as fin: with open(vhdr_dest, 'w', encoding=enc) as fout: for line in fin.readlines(): if line.strip() in search_lines: line = line.replace(basename_src, basename_dest) fout.write(line) with open(vmrk_file_path, 'r', encoding=enc) as fin: with open(fname_dest + '.vmrk', 'w', encoding=enc) as fout: for line in fin.readlines(): if line.strip() in search_lines: line = line.replace(basename_src, basename_dest) fout.write(line) if verbose: for ext in ['.eeg', '.vhdr', '.vmrk']: print('Created "{}" in "{}"' .format(fname_dest + ext, op.dirname(op.realpath(vhdr_dest))))
[docs]def copyfile_eeglab(src, dest): """Copy a EEGLAB files to a new location and adjust pointer to '.fdt' file. Some EEGLAB .set files come with a .fdt binary file that contains the data. When moving a .set file, we need to check for an associated .fdt file and move it to an appropriate location as well as update an internal pointer within the .set file. Parameters ---------- src : str Path to the source raw .set file. dest : str Path to the destination of the new .set file. """ # Get extenstion of the EEGLAB file fname_src, ext_src = _parse_ext(src) fname_dest, ext_dest = _parse_ext(dest) if ext_src != ext_dest: raise ValueError('Need to move data with same extension' ' but got {}, {}'.format(ext_src, ext_dest)) # Extract matlab struct "EEG" from EEGLAB file mat = loadmat(src, squeeze_me=False, chars_as_strings=False, mat_dtype=False, struct_as_record=True) if 'EEG' not in mat: raise ValueError('Could not find "EEG" field in {}'.format(src)) eeg = mat['EEG'] # If the data field is a string, it points to a .fdt file in src dir data = eeg[0][0]['data'] if all([item in data[0, -4:] for item in '.fdt']): head, tail = op.split(src) fdt_pointer = ''.join(data.tolist()[0]) fdt_path = op.join(head, fdt_pointer) fdt_name, fdt_ext = _parse_ext(fdt_path) if fdt_ext != '.fdt': raise IOError('Expected extension {} for linked data but found' ' {}'.format('.fdt', fdt_ext)) # Copy the fdt file and give it a new name sh.copyfile(fdt_path, fname_dest + '.fdt') # Now adjust the pointer in the set file head, tail = op.split(fname_dest + '.fdt') mat['EEG'][0][0]['data'] = tail savemat(dest, mat, appendmat=False) # If no .fdt file, simply copy the .set file, no modifications necessary else: sh.copyfile(src, dest)
[docs]def copyfile_bti(raw, dest): """Copy BTi data. Parameters ---------- raw : instance of Raw An MNE-Python raw object of BTi data. dest : str Destination to copy the BTi data to. """ pdf_fname = 'c,rfDC' if raw.info['highpass'] is not None: pdf_fname = 'c,rf%0.1fHz' % raw.info['highpass'] sh.copyfile(raw._init_kwargs['pdf_fname'], op.join(dest, pdf_fname)) sh.copyfile(raw._init_kwargs['config_fname'], op.join(dest, 'config')) sh.copyfile(raw._init_kwargs['head_shape_fname'], op.join(dest, 'hs_file'))