Source code for mne_connectivity.vector_ar.model_selection

from collections import defaultdict

import numpy as np
from scipy import linalg

from .var import _estimate_var


[docs] def select_order(X, maxlags=None): """Compute lag order selections based on information criterion. Selects a lag order based on each of the available information criteria. Parameters ---------- X : np.ndarray, shape (n_times, n_channels) Endogenous variable, that predicts the exogenous. maxlags : int The maximum number of lags to check. Will then check from ``1`` to ``maxlags``. If None, defaults to ``12 * (n_times / 100.)**(1./4)``. Returns ------- selected_orders : dict The selected orders based on the following information criterion. * aic : Akaike * fpe : Final prediction error * hqic : Hannan-Quinn * bic : Bayesian a.k.a. Schwarz The selected order is then stored as the value. """ # get the number of observations n_total_obs, n_equations = X.shape ntrend = 0 max_estimable = (n_total_obs - n_equations - ntrend) // (1 + n_equations) if maxlags is None: maxlags = int(round(12 * (n_total_obs / 100.0) ** (1 / 4.0))) # TODO: This expression shows up in a bunch of places, but # in some it is `int` and in others `np.ceil`. Also in some # it multiplies by 4 instead of 12. Let's put these all in # one place and document when to use which variant. # Ensure enough obs to estimate model with maxlags maxlags = min(maxlags, max_estimable) else: if maxlags > max_estimable: raise ValueError( "maxlags is too large for the number of observations and " "the number of equations. The largest model cannot be " "estimated." ) # define dictionary of information criterions ics = defaultdict(list) p_min = 1 for p in range(p_min, maxlags + 1): # exclude some periods to same amount of data used for each lag # order params, _, sigma_u = _estimate_var(X, lags=p, offset=maxlags - p) info_criteria = _info_criteria(params, X, sigma_u=sigma_u, lags=p) for k, v in info_criteria.items(): ics[k].append(v) selected_orders = dict((k, np.argmin(v) + p_min) for k, v in ics.items()) return selected_orders
def _logdet_symm(m): """Return log(det(m)) asserting positive definiteness of m. Parameters ---------- m : np.ndarray, shape (N, N) 2d array that is positive-definite (and symmetric) Returns ------- logdet : float The log-determinant of m. """ c, _ = linalg.cho_factor(m, lower=True) return 2 * np.sum(np.log(c.diagonal())) def _sigma_u_mle(df_resid, nobs, sigma_u): """(Biased) maximum likelihood estimate of noise process covariance. Parameters ---------- df_resid : int Number of observations minus number of estimated parameters. nobs : int Number of observations/samples in the dataset. sigma_u : np.ndarray, shape (n_channels, n_channels) Estimate of white noise process variance Returns ------- sigma_u_mle : float The biased MLE of noise process covariance. """ return sigma_u * df_resid / nobs def _info_criteria(params, X, sigma_u, lags): """Compute information criteria for lagorder selection. Parameters ---------- params : np.ndarray, shape (lags, n_channels, n_channels) The coefficient state matrix that governs the linear system (VAR). X : np.ndarray (n_times, n_channels) Endogenous variable, that predicts the exogenous. sigma_u : np.ndarray, shape (n_channels, n_channels) Estimate of white noise process variance lags : int Lags of the endogenous variable. Returns ------- result : dict The AIC, BIC, HQIC and FPE. """ n_totobs, neqs = X.shape nobs = n_totobs - lags lag_order = lags k_trend = 0 k_ar = lags endog_start = k_trend # compute the number of free parameters for degrees of freedom coefs_exog = params[:endog_start].T k_exog = coefs_exog.shape[1] free_params = lag_order * neqs**2 + neqs * k_exog # compute the df_model = neqs * k_ar + k_exog df_resid = nobs - df_model ld = _logdet_symm(_sigma_u_mle(df_resid, nobs, sigma_u)) # See Lütkepohl pp. 146-150 aic = ld + (2.0 / nobs) * free_params bic = ld + (np.log(nobs) / nobs) * free_params hqic = ld + (2.0 * np.log(np.log(nobs)) / nobs) * free_params fpe = ((nobs + df_model) / df_resid) ** neqs * np.exp(ld) return {"aic": aic, "bic": bic, "hqic": hqic, "fpe": fpe}