Source code for mne_nirs.statistics._statsmodels

# Authors: Robert Luke <mail@robertluke.net>
#
# License: BSD (3-clause)

from io import StringIO

import numpy as np
import pandas as pd


def summary_to_dataframe(summary):
    """Convert statsmodels summary to pandas dataframe.

    .. warning:: The summary has precision issues, use the numerical values
                 from it with caution.
    """
    results = summary.tables[1]
    if type(results) is not pd.core.frame.DataFrame:
        results = StringIO(summary.tables[1].as_html())
        results = pd.read_html(results, header=0, index_col=0)[0]
    return results


def expand_summary_dataframe(summary):
    """Expand dataframe index column in to individual columns."""
    # Determine new columns
    new_cols = summary.index[0].split(":")
    col_names = []
    for col in new_cols:
        col_name = col.split("[")[0]
        summary[col_name] = "NaN"
        col_names.append(col_name)

    # Fill in values
    if "Group Var" in summary.index:
        summary = summary[:-1]
    summary = summary.copy(deep=True)
    indices = summary.index
    for row_idx, row in enumerate(indices):
        col_vals = row.split(":")
        for col_idx, col in enumerate(col_names):
            if "]" in col_vals[col_idx]:
                val = col_vals[col_idx].split("[")[1].split("]")[0]
            else:
                val = col
            summary.at[row, col] = val

    summary = summary.copy()  # Copies required to suppress .loc warnings
    sum_copy = summary.copy(deep=True)
    key = "P>|t|" if "P>|t|" in summary.columns else "P>|z|"
    float_p = [float(p) for p in sum_copy[key]]
    summary.loc[:, key] = float_p
    summary.loc[:, "Significant"] = False
    summary.loc[summary[key] < 0.05, "Significant"] = True

    # Standardise returned column name, it seems to vary per test
    if "Coef." in summary.columns:
        summary.loc[:, "Coef."] = [float(c) for c in summary["Coef."]]
    elif "coef" in summary.columns:
        summary = summary.rename(columns={"coef": "Coef."})

    return summary


_REPLACEMENTS = (
    ("P>|z|", "pvalues"),
    ("Coef.", "fe_params"),
    ("z", "tvalues"),
    ("P>|t|", "pvalues"),
)


[docs] def statsmodels_to_results(model, order=None): """ Convert statsmodels summary to a dataframe. Parameters ---------- model : statsmodels model output The output of a statsmodels analysis. For example rlm or mixedlm. order : array of str Requested order of the channels. Returns ------- df : Pandas dataframe. Data frame with the results from the stats model. """ from scipy.stats.distributions import norm from statsmodels.regression.mixed_linear_model import MixedLMResultsWrapper df = summary_to_dataframe(model.summary()) # deal with numerical precision loss in at least some of the values for col, attr in _REPLACEMENTS: if col in df.columns: df[col] = getattr(model, attr, df[col]) # This one messes up the standard error and quartiles, too if isinstance(model, MixedLMResultsWrapper): sl = slice(model.k_fe) mu = np.asarray(df.iloc[sl, df.columns == "Coef."])[:, 0] # Adapted from statsmodels, see # https://github.com/statsmodels/statsmodels/blob/master/statsmodels/regression/mixed_linear_model.py#L2710-L2736 # noqa: E501 stderr = np.sqrt(np.diag(model.cov_params()[sl])) df.iloc[sl, df.columns == "Std.Err."] = stderr # Confidence intervals qm = -norm.ppf(0.05 / 2) df.iloc[sl, df.columns == "[0.025"] = mu - qm * stderr df.iloc[sl, df.columns == "0.975]"] = mu + qm * stderr # All random effects variances and covariances sdf = np.zeros((model.k_re2 + model.k_vc, 2)) jj = 0 for i in range(model.k_re): for j in range(i + 1): sdf[jj, 0] = np.asarray(model.cov_re)[i, j] sdf[jj, 1] = np.sqrt(model.scale) * model.bse.iloc[model.k_fe + jj] jj += 1 # Variance components for i in range(model.k_vc): sdf[jj, 0] = model.vcomp[i] sdf[jj, 1] = np.sqrt(model.scale) * model.bse[model.k_fe + jj] jj += 1 df.iloc[model.k_fe :, df.columns == "Coef."] = sdf[:, 0] df.iloc[model.k_fe :, df.columns == "Std.Err."] = sdf[:, 1] df = expand_summary_dataframe(df) if order is not None: df["old_index"] = df.index df = df.set_index("ch_name") df = df.loc[order, :] df["ch_name"] = df.index df.index = df["old_index"] df.drop(columns="old_index", inplace=True) df.rename_axis(None, inplace=True) return df