Source code for spiketimes.df.correlate

import pandas as pd
import numpy as np
import multiprocessing
from itertools import combinations, product
import spiketimes.correlate
from ..utils import p_adjust


[docs]def spike_count_correlation( df: pd.core.frame.DataFrame, binsize: int, spiketimes_col: str = "spiketimes", spiketrain_col: str = "spiketrain", min_firing_rate: float = None, t_start: float = None, t_stop: float = None, use_multiprocessing: bool = False, max_cores: int = None, ): """ Calculate pearson's correlation coefficient of spike counts between all pairs of spiketrains in a dataframe. Args: df: A pandas DataFrame containing spiketimes indexed by spiketrain binsize: The size of the time bin in seconds spiketimes_col: The label of the column containing spiketimes spiketrain_col: The label of the column containing spiketrain identifiers min_firing_rate: If selected, selects only bins where the geometric mean firing rate of the two spiketrains exeedes this value t_start: start point for first time bin. t_stop: end point for the last time bin. use_multiprocessing: Whether to use multiple cores to compute cross correlation. Useful for large numbers of spiketrains max_cores: If using multiprocessing, specifies the maximum number of cores to use. Defaults to max available Returns: A pandas DataFrame with columns {spiketrain_1, spiketrain_2, R_spike_count} """ if t_start is None: t_start = np.min(df[spiketimes_col]) if t_stop is None: t_stop = np.max(df[spiketimes_col]) spiketrain_ids = df[spiketrain_col].unique() spiketrain_combs = list(combinations(spiketrain_ids, r=2)) args = [ [ df[df[spiketrain_col] == spiketrain_1][spiketimes_col].values, df[df[spiketrain_col] == spiketrain_2][spiketimes_col].values, binsize, min_firing_rate, t_start, t_stop, ] for spiketrain_1, spiketrain_2 in spiketrain_combs ] if not use_multiprocessing: res = [] for i, arg_set in enumerate(args): spiketrain_1, spiketrain_2 = spiketrain_combs[i] res.append( pd.DataFrame( { "spiketrain_1": spiketrain_1, "spiketrain_2": spiketrain_2, "R_spike_count": spiketimes.correlate.spike_count_correlation( *arg_set ), }, index=[0], ) ) return pd.concat(res, axis=0).reset_index(drop=True) if max_cores: with multiprocessing.Pool(max_cores) as p: res = p.starmap(spiketimes.correlate.spike_count_correlation, args) else: with multiprocessing.Pool() as p: res = p.starmap(spiketimes.correlate.spike_count_correlation, args) stiketrain_1 = [st1 for st1, _ in spiketrain_combs] spiketrain_2 = [st2 for _, st2 in spiketrain_combs] return pd.DataFrame( { "spiketrain_1": stiketrain_1, "spiketrain_2": spiketrain_2, "R_spike_count": res, } )
[docs]def spike_count_correlation_between( df: pd.core.frame.DataFrame, binsize: int, spiketimes_col: str = "spiketimes", spiketrain_col: str = "spiketrain", group_col: str = "group", min_firing_rate: float = None, t_start: float = None, t_stop: float = None, use_multiprocessing: bool = False, max_cores: int = None, ): """ Calculate spike count correlation between all pairs of spiketrains of different groups. For example: correlate all pairs of fast-spiking and slow-spining neurons. Args: df: A pandas DataFrame containing spiketimes indexed by spiketrain binsize: The size of the time bin in seconds spiketimes_col: The label of the column containing spiketimes spiketrain_col: The label of the column containing spiketrain identifiers group_col: The label of the column containing group identifiers min_firing_rate: If selected, selects only bins where the geometric mean firing rate of the two spiketrains exeedes this value t_start: start point for first time bin. t_stop: end point for the last time bin. use_multiprocessing: Whether to use multiple cores to compute cross correlation. Useful for large numbers of spiketrains max_cores: If using multiprocessing, specifies the maximum number of cores to use. Defaults to max available Returns: A pandas DataFrame with columns {spiketrain_1, spiketrain_2, R_spike_count} """ if t_start is None: t_start = np.min(df[spiketimes_col]) if t_stop is None: t_stop = np.max(df[spiketimes_col]) groups = df[group_col].unique() frames: list = [] for group_1, group_2 in combinations(groups, r=2): # get spiketrians spiketrains_group_1 = df.loc[df[group_col] == group_1][spiketrain_col].unique() spiketrains_group_2 = df.loc[df[group_col] == group_2][spiketrain_col].unique() spiketrain_combs = list(product(spiketrains_group_1, spiketrains_group_2)) # get args args = [ [ df[df[spiketrain_col] == spiketrain_1][spiketimes_col].values, df[df[spiketrain_col] == spiketrain_2][spiketimes_col].values, binsize, min_firing_rate, t_start, t_stop, ] for spiketrain_1, spiketrain_2 in spiketrain_combs ] # get results for this group combination if not use_multiprocessing: res = [] for i, arg_set in enumerate(args): spiketrain_1, spiketrain_2 = spiketrain_combs[i] res.append( pd.DataFrame( { "spiketrain_1": spiketrain_1, "spiketrain_2": spiketrain_2, "R_spike_count": spiketimes.correlate.spike_count_correlation( *arg_set ), }, index=[0], ) ) frames.append( pd.concat(res, axis=0) .assign(**{"group_1": group_1, "group_2": group_2}) .reset_index(drop=True) ) else: if max_cores: with multiprocessing.Pool(max_cores) as p: res = p.starmap(spiketimes.correlate.spike_count_correlation, args) else: with multiprocessing.Pool() as p: res = p.starmap(spiketimes.correlate.spike_count_correlation, args) stiketrain_1 = [st1 for st1, _ in spiketrain_combs] spiketrain_2 = [st2 for _, st2 in spiketrain_combs] frames.append( pd.DataFrame( { "spiketrain_1": stiketrain_1, "spiketrain_2": spiketrain_2, "group_1": group_1, "group_2": group_2, "R_spike_count": res, } ) ) return pd.concat(frames, axis=0)
[docs]def spike_count_correlation_test( df: pd.core.frame.DataFrame, binsize: int, n_boot: int = 500, spiketimes_col: str = "spiketimes", spiketrain_col: str = "spiketrain", min_firing_rate: float = None, t_start: float = None, t_stop: float = None, tail: str = "two_tailed", adjust_p: bool = True, p_adjust_method: str = "Benjamini-Hochberg", use_multiprocessing: bool = False, max_cores: int = None, ): """ Calculate spike count correlation between all pairs of spiketrains of different groups. For example: correlate all pairs of fast-spiking and slow-spining neurons. Multiprocessing recommeded when computing on large datasets. Args: df: A pandas DataFrame containing spiketimes indexed by spiketrain binsize: The size of the time bin in seconds n_boot: The number of bootstrap replicates to create. spiketimes_col: The label of the column containing spiketimes spiketrain_col: The label of the column containing spiketrain identifiers min_firing_rate: If selected, selects only bins where the geometric mean firing rate of the two spiketrains exeedes this value t_start: The start point for first time bin. t_stop: The end point for the last time bin. tail: Tail for hypothesis test {"two_tailed", "upper", "lower"}. Two tailed reccomended adjust_p: Whether to adjust p-values for multiple comparisons. p_adjust_method: If adjusting p-values, specified which method to use {Benjamini-Hochberg', 'Bonferroni', 'Bonferroni-Holm'} use_multiprocessing: Whether to use multiple cores to compute cross correlation. Useful for large numbers of spiketrains max_cores: If using multiprocessing, specifies the maximum number of cores to use. Defaults to max available Returns: A pandas DataFrame with columns {spiketrain_1, spiketrain_2, R_spike_count} """ if t_start is None: t_start = np.min(df[spiketimes_col]) if t_stop is None: t_stop = np.max(df[spiketimes_col]) spiketrain_ids = df[spiketrain_col].unique() spiketrain_combs = list(combinations(spiketrain_ids, r=2)) args = [ [ df[df[spiketrain_col] == spiketrain_1][spiketimes_col].values, df[df[spiketrain_col] == spiketrain_2][spiketimes_col].values, binsize, n_boot, min_firing_rate, t_start, t_stop, tail, ] for spiketrain_1, spiketrain_2 in spiketrain_combs ] if not use_multiprocessing: res = [] for i, arg_set in enumerate(args): spiketrain_1, spiketrain_2 = spiketrain_combs[i] r, p = spiketimes.correlate.spike_count_correlation_test(*arg_set) res.append( pd.DataFrame( { "spiketrain_1": spiketrain_1, "spiketrain_2": spiketrain_2, "R_spike_count": r, "p": p, }, index=[0], ) ) df = pd.concat(res, axis=0).reset_index(drop=True) if max_cores: with multiprocessing.Pool(max_cores) as p: res = p.starmap(spiketimes.correlate.spike_count_correlation_test, args) else: with multiprocessing.Pool() as p: res = p.starmap(spiketimes.correlate.spike_count_correlation_test, args) stiketrain_1 = [st1 for st1, _ in spiketrain_combs] spiketrain_2 = [st2 for _, st2 in spiketrain_combs] r = [r for r, _ in res] p = [p for _, p in res] df = pd.DataFrame( { "spiketrain_1": stiketrain_1, "spiketrain_2": spiketrain_2, "R_spike_count": r, "p": p, } ) if adjust_p: df["p"] = p_adjust(df["p"].values, method=p_adjust_method) return df
[docs]def spike_count_correlation_between_test( df: pd.core.frame.DataFrame, binsize: int, n_boot: int = 500, spiketimes_col: str = "spiketimes", spiketrain_col: str = "spiketrain", group_col: str = "group", min_firing_rate: float = None, t_start: float = None, t_stop: float = None, tail: str = "two_tailed", adjust_p: bool = True, p_adjust_method: str = "Benjamini-Hochberg", use_multiprocessing: bool = False, max_cores: int = None, ): """ Calculate spike count correlation between all pairs of spiketrains of different groups. Also test significance using a bootstrap procedure. For example: correlate all pairs of fast-spiking and slow-spining neurons. Args: df: A pandas DataFrame containing spiketimes indexed by spiketrain binsize: The size of the time bin in seconds n_boot: The number of bootstrap replicates to create. spiketimes_col: The label of the column containing spiketimes spiketrain_col: The label of the column containing spiketrain identifiers group_col: The label of the column containing group identifiers min_firing_rate: If selected, selects only bins where the geometric mean firing rate of the two spiketrains exeedes this value t_start: The start point for first time bin. t_stop: The end point for the last time bin. tail: Tail for hypothesis test {"two_tailed", "upper", "lower"}. Two tailed reccomended adjust_p: Whether to adjust p-values for multiple comparisons. p_adjust_method: If adjusting p-values, specified which method to use {Benjamini-Hochberg', 'Bonferroni', 'Bonferroni-Holm'} use_multiprocessing: Whether to use multiple cores to compute cross correlation. Useful for large numbers of spiketrains max_cores: If using multiprocessing, specifies the maximum number of cores to use. Defaults to max available Returns: A pandas DataFrame with columns {spiketrain_1, spiketrain_2, R_spike_count} """ if t_start is None: t_start = np.min(df[spiketimes_col]) if t_stop is None: t_stop = np.max(df[spiketimes_col]) groups = df[group_col].unique() frames: list = [] for group_1, group_2 in combinations(groups, r=2): # get spiketrians spiketrains_group_1 = df.loc[df[group_col] == group_1][spiketrain_col].unique() spiketrains_group_2 = df.loc[df[group_col] == group_2][spiketrain_col].unique() spiketrain_combs = list(product(spiketrains_group_1, spiketrains_group_2)) # get args args = [ [ df[df[spiketrain_col] == spiketrain_1][spiketimes_col].values, df[df[spiketrain_col] == spiketrain_2][spiketimes_col].values, binsize, n_boot, min_firing_rate, t_start, t_stop, tail, ] for spiketrain_1, spiketrain_2 in spiketrain_combs ] # get results for this group combination if not use_multiprocessing: res = [] for i, arg_set in enumerate(args): spiketrain_1, spiketrain_2 = spiketrain_combs[i] r, p = spiketimes.correlate.spike_count_correlation_test(*arg_set) res.append( pd.DataFrame( { "spiketrain_1": spiketrain_1, "spiketrain_2": spiketrain_2, "R_spike_count": r, "p": p, }, index=[0], ) ) frames.append( pd.concat(res, axis=0) .assign(**{"group_1": group_1, "group_2": group_2}) .reset_index(drop=True) ) else: if max_cores: with multiprocessing.Pool(max_cores) as p: res = p.starmap( spiketimes.correlate.spike_count_correlation_test, args ) else: with multiprocessing.Pool() as p: res = p.starmap( spiketimes.correlate.spike_count_correlation_test, args ) stiketrain_1 = [st1 for st1, _ in spiketrain_combs] spiketrain_2 = [st2 for _, st2 in spiketrain_combs] r = [r for r, _ in res] p = [p for _, p in res] frames.append( pd.DataFrame( { "spiketrain_1": stiketrain_1, "spiketrain_2": spiketrain_2, "group_1": group_1, "group_2": group_2, "R_spike_count": r, "p": p, } ) ) df = pd.concat(frames, axis=0) if adjust_p: df["p"] = p_adjust(df["p"].values, method=p_adjust_method) return df
[docs]def auto_corr( df: pd.core.frame.DataFrame, binsize: int = 0.01, num_lags: int = 100, spiketimes_col: str = "spiketimes", spiketrain_col: str = "spiketrain", t_start: float = None, t_stop: float = None, ): """ Calculate the autocorrelation function for each spiketrain in a DataFrame. Args: df: A pandas DataFrame containing spiketimes indexed by spiketrain binsize: The size of the time bin in seconds num_lags: The number of lags forward and backwards around lag 0 to return spiketimes_col: The label of the column containing spiketimes spiketrain_col: The label of the column containing spiketrain identifiers t_start: Minimum timepoint t_stop: Maximum timepoint Returns: A pandas DataFrame with columns {spiketrain, time_bin, autocorrelation} """ return ( df.groupby(spiketrain_col) .apply( lambda x: spiketimes.correlate.auto_corr( x[spiketimes_col], binsize=binsize, num_lags=num_lags, as_df=True ) ) .reset_index() .drop("level_1", axis=1) )
[docs]def cross_corr( df: pd.core.frame.DataFrame, binsize: float = 0.01, num_lags: int = 100, spiketimes_col: str = "spiketimes", spiketrain_col: str = "spiketrain", t_start: float = None, t_stop: float = None, use_multiprocessing: bool = False, max_cores: int = None, ): """ Calculate crosscorrelation between each combination of spiketrains in a DataFrame. Args: df: A pandas DataFrame containing spiketimes indexed by spiketrain binsize: The size of the time bin in seconds num_lags: The number of lags forward and backwards around lag 0 to return spiketimes_col: The label of the column containing spiketimes spiketrain_col: The label of the column containing spiketrain identifiers t_start: Minimum timepoint t_stop: Maximum timepoint use_multiprocessing: Whether to use multiple cores to compute cross correlation. Useful for large numbers of spiketrains max_cores: If using multiprocessing, specifies the maximum number of cores to use. Defaults to max available Returns: A pandas DataFrame with columns {spiketrain_1, spiketrain_2, time_bin, crosscorrelation} """ spiketrain_ids = df[spiketrain_col].unique() spiketrain_combs = list(combinations(spiketrain_ids, r=2)) args = [ [ df[df[spiketrain_col] == spiketrain_1][spiketimes_col].values, df[df[spiketrain_col] == spiketrain_2][spiketimes_col].values, binsize, num_lags, True, t_start, t_stop, ] for spiketrain_1, spiketrain_2 in spiketrain_combs ] if not use_multiprocessing: res = [] for i, arg_set in enumerate(args): spiketrain_1, spiketrain_2 = spiketrain_combs[i] res.append( spiketimes.correlate.cross_corr(*arg_set).assign( **{"spiketrain_1": spiketrain_1, "spiketrain_2": spiketrain_2} ) ) else: if max_cores: with multiprocessing.Pool(max_cores) as p: res = p.starmap(spiketimes.correlate.cross_corr, args) else: with multiprocessing.Pool() as p: res = p.starmap(spiketimes.correlate.cross_corr, args) for spiketrain_comb, r in zip(spiketrain_combs, res): r["spiketrain_1"] = spiketrain_comb[0] r["spiketrain_2"] = spiketrain_comb[1] df = pd.concat(res) return df[["spiketrain_1", "spiketrain_2", "time_bin", "crosscorrelation"]]
[docs]def cross_corr_test( df: pd.core.frame.DataFrame, binsize: float = 0.01, num_lags: int = 100, spiketimes_col: str = "spiketimes", spiketrain_col: str = "spiketrain", t_start: int = None, t_stop: int = None, tail: str = "two_tailed", adjust_p: bool = True, p_adjust_method: str = "Benjamini-Hochberg", use_multiprocessing: bool = False, max_cores: int = None, ): """ Calculate spike count correlation between all pairs of spiketrains. Also test significance of crosscorrelation. Significance test performed by comparing observed crosscorrelation to expected cross correlation of poisson spiketrains. Args: df: A pandas DataFrame containing spiketimes indexed by spiketrain binsize: The size of the time bin in seconds num_lags: The number of lags forward and backwards around lag 0 to return spiketimes_col: The label of the column containing spiketimes spiketrain_col: The label of the column containing spiketrain identifiers group_col: The label of the column containing group identifiers t_start: Minimum timepoint t_stop: Maximum timepoint tail: Tail for hypothesis test {"two_tailed", "upper", "lower"}. Two tailed reccomended adjust_p: Whether to adjust p-values for multiple comparisons. p_adjust_method: If adjusting p-values, specified which method to use {Benjamini-Hochberg', 'Bonferroni', 'Bonferroni-Holm'} use_multiprocessing: Whether to use multiple cores to compute cross correlation. Useful for large numbers of spiketrains max_cores: If using multiprocessing, specifies the maximum number of cores to use. Defaults to max available Returns: A pandas DataFrame with columns {spiketrain_1, spiketrain_2, group_1, group_2, time_bin, crosscorrelation, p} """ spiketrain_ids = df[spiketrain_col].unique() spiketrain_combs = list(combinations(spiketrain_ids, r=2)) args = [ [ df[df[spiketrain_col] == spiketrain_1][spiketimes_col].values, df[df[spiketrain_col] == spiketrain_2][spiketimes_col].values, binsize, num_lags, True, # as_df t_start, t_stop, tail, # tail False, # adjust_p None, # p_adust_method ] for spiketrain_1, spiketrain_2 in spiketrain_combs ] if not use_multiprocessing: res = [] for i, arg_set in enumerate(args): spiketrain_1, spiketrain_2 = spiketrain_combs[i] res.append( spiketimes.correlate.cross_corr_test(*arg_set).assign( **{"spiketrain_1": spiketrain_1, "spiketrain_2": spiketrain_2} ) ) else: if max_cores: with multiprocessing.Pool(max_cores) as p: res = p.starmap(spiketimes.correlate.cross_corr_test, args) else: with multiprocessing.Pool() as p: res = p.starmap(spiketimes.correlate.cross_corr_test, args) for spiketrain_comb, r in zip(spiketrain_combs, res): r["spiketrain_1"] = spiketrain_comb[0] r["spiketrain_2"] = spiketrain_comb[1] df = pd.concat(res, axis=0) if adjust_p: df["p"] = p_adjust(df["p"].values, method=p_adjust_method) return df[["spiketrain_1", "spiketrain_2", "time_bin", "crosscorrelation", "p"]]
[docs]def cross_corr_between_test( df: pd.core.frame.DataFrame, binsize: float = 0.01, num_lags: int = 100, spiketimes_col: str = "spiketimes", spiketrain_col: str = "spiketrain", group_col: str = "group", t_start: float = None, t_stop: float = None, tail: str = "two_tailed", adjust_p: bool = True, p_adjust_method: str = "Benjamini-Hochberg", use_multiprocessing: bool = False, max_cores: int = None, ): """ Calculate spike count correlation between all pairs of spiketrains of different groups. For example: correlate all pairs of fast-spiking and slow-spining neurons. Args: df: A pandas DataFrame containing spiketimes indexed by spiketrain binsize: The size of the time bin in seconds num_lags: The number of lags forward and backwards around lag 0 to return spiketimes_col: The label of the column containing spiketimes spiketrain_col: The label of the column containing spiketrain identifiers t_start: Minimum timepoint t_stop: Maximum timepoint tail: Tail for hypothesis test {"two_tailed", "upper", "lower"}. Two tailed reccomended adjust_p: Whether to adjust p-values for multiple comparisons. p_adjust_method: If adjusting p-values, specified which method to use {Benjamini-Hochberg', 'Bonferroni', 'Bonferroni-Holm'} use_multiprocessing: Whether to use multiple cores to compute cross correlation. Useful for large numbers of spiketrains max_cores: If using multiprocessing, specifies the maximum number of cores to use. Defaults to max available Returns: A pandas DataFrame with columns {spiketrain_1, spiketrain_2, group_1, group_2, time_bin, crosscorrelation, p} """ if t_start is None: t_start = np.min(df[spiketimes_col]) if t_stop is None: t_stop = np.max(df[spiketimes_col]) groups = df[group_col].unique() frames: list = [] for group_1, group_2 in combinations(groups, r=2): # get spiketrians spiketrains_group_1 = df.loc[df[group_col] == group_1][spiketrain_col].unique() spiketrains_group_2 = df.loc[df[group_col] == group_2][spiketrain_col].unique() spiketrain_combs = list(product(spiketrains_group_1, spiketrains_group_2)) # get args args = [ [ df[df[spiketrain_col] == spiketrain_1][spiketimes_col].values, df[df[spiketrain_col] == spiketrain_2][spiketimes_col].values, binsize, num_lags, True, t_start, t_stop, tail, False, None, ] for spiketrain_1, spiketrain_2 in spiketrain_combs ] # get results for this group combination if not use_multiprocessing: res = [] for i, arg_set in enumerate(args): spiketrain_1, spiketrain_2 = spiketrain_combs[i] res.append( spiketimes.correlate.cross_corr_test(*arg_set).assign( **{"spiketrain_1": spiketrain_1, "spiketrain_2": spiketrain_2} ) ) else: if max_cores: with multiprocessing.Pool(max_cores) as p: res = p.starmap(spiketimes.correlate.cross_corr_test, args) else: with multiprocessing.Pool() as p: res = p.starmap(spiketimes.correlate.cross_corr_test, args) for spiketrain_comb, r in zip(spiketrain_combs, res): r["spiketrain_1"] = spiketrain_comb[0] r["spiketrain_2"] = spiketrain_comb[1] frames.append( pd.concat(res, axis=0) .assign(**{"group_1": group_1, "group_2": group_2}) .reset_index(drop=True) ) df = pd.concat(frames, axis=0) if adjust_p: df["p"] = p_adjust(df["p"].values, method=p_adjust_method) return df