Source code for spiketimes.df.statistics

import pandas as pd
import numpy as np
import spiketimes
import spiketimes.statistics
from .binning import binned_spiketrain


[docs]def mean_firing_rate_by( df: pd.core.frame.DataFrame, spiketimes_col: str = "spiketimes", spiketrain_col: str = "spiketrain", t_start: float = None, t_stop: float = None, ): """ Estimate the mean firing rate of each spiketrain. Firing rate caluclated by summing spikes and dividing by total time. Args: df: A pandas DataFrame containing spiketimes indexed by spiketrain spiketimes_col: The label of the column containing spiketimes spiketrain_col: The label of the column identifying the spiketrain responsible for the spike t_start: Time point at which to start. Defaults to time of first spike in df. t_stop: Maximum timepoint. Defaults to last spike in df. Returns: A DataFrame containing mean firing rate by neuron """ if t_start is None: t_start = df[spiketimes_col].min() if not t_stop: t_stop = df[spiketimes_col].max() return ( df.groupby(spiketrain_col) .apply( lambda x: spiketimes.statistics.mean_firing_rate( x[spiketimes_col].values, t_start=t_start, t_stop=t_stop, ) ) .reset_index() .rename(columns={0: "mean_firing_rate"}) )
[docs]def ifr_by( df: pd.core.frame.DataFrame, fs: float = 1, sigma: float = None, spiketimes_col: str = "spiketimes", spiketrain_col: str = "spiketrain", t_start: float = None, t_stop: float = None, ): """ Estimate firing rate for each spiketrain at a regular sampling rate. Args: df: A pandas DataFrame containing the spikes data fs: The sampling rate at which to estimate firing rate sigma: Hypterparameter controlling smoothing for firing rate estimates spiketimes_col: The label of the column in df containing spiketimes spiketrain_col: The label of the column in df containing spiketrain idendifiers (which spiketrain was responsible for the spike) t_start: Time point at which to start firing rate estimates. Defaults to time of first spike in df. t_stop: Time point of maximum firing rate estimate. Defaults to last spike in df. Returns: A pandas DataFrame with one row per timepoint per spiketrain with column `ifr` identifying firing rate estimates. """ if t_start is None: t_start = df[spiketimes_col].min() if not t_stop: t_stop = df[spiketimes_col].max() return ( df.groupby(spiketrain_col) .apply( lambda x: spiketimes.statistics.ifr( x[spiketimes_col].values, fs=fs, sigma=sigma, t_start=t_start, t_stop=t_stop, as_df=True, ) ) .reset_index() .drop("level_1", axis=1) )
[docs]def mean_firing_rate_ifr_by( df: pd.core.frame.DataFrame, fs: float = 1, sigma: float = None, spiketimes_col: str = "spiketimes", spiketrain_col: str = "spiketrain", exclude_below: float = None, t_start: float = None, t_stop: float = None, ): """ Estimate mean firing rate of each neuron by first estimating firing rate at a regular interval and then taking the median. Args: df: A pandas Dataframe containing the spike data fs: The sampling rate at which to estimate firing rate sigma: Parameter contolling smoothing level of firing rate estiamtes. exclude_below: If specified, firing rates below this value will not be included in the median calculation. spiketimes_col: The label of the column containing the spiketimes spiketrain_col: The label of the column in df containing spiketrain idendifiers (which spiketrain was responsible for the spike) t_start: Time point at which to start firing rate estimates. Defaults to time of first spike in df. t_stop: Time point of maximum firing rate estimate. Defaults to last spike in df. Returns: A pandas DataFrame containing one row per spiketrain as well as its firing rate estimate. """ if t_start is None: t_start = df[spiketimes_col].min() if not t_stop: t_stop = df[spiketimes_col].max() return ( df.groupby(spiketrain_col) .apply( lambda x: spiketimes.statistics.mean_firing_rate_ifr( x[spiketimes_col].values, fs=fs, sigma=sigma, exclude_below=exclude_below, ) ) .reset_index() .rename(columns={0: "mean_firing_rate_ifr"}) )
[docs]def cv_isi_by( df: pd.core.frame.DataFrame, spiketimes_col: str = "spiketimes", spiketrain_col: str = "spiketrain", ): """ Calculate the coefficient of variation of interspike intervals for each spiketrain in a DataFrame. The cv_isi is a metric of spike regularity. Values near 1 are typical of poisson processes. Values near 0 indicate very regular processes. Args: df: A pandas DataFrame containing spiketimes indexed by spiketrain spiketimes_col: The label of the column containing spiketimes spiketrain_col: The label of the column identifying the spiketrain responsible for the spike Returns: A DataFrame containing cv_isi by neuron """ return ( df.groupby(spiketrain_col) .apply(lambda x: spiketimes.statistics.cv_isi(x[spiketimes_col].values)) .reset_index() .rename(columns={0: "cv_isi"}) )
[docs]def cv2_isi_by( df: pd.core.frame.DataFrame, spiketimes_col: str = "spiketimes", spiketrain_col: str = "spiketrain", ): """ Calculate cv2 of interspike intervals of each spiketrain. cv2 is a metric related to the coefficient of variation. It is adapted to be suitable long-period spiketrains. Args: df: A pandas DataFrame containing spiketimes indexed by spiketrain spiketimes_col: The label of the column containing spiketimes spiketrain_col: The label of the column identifying the spiketrain responsible for the spike Returns: A DataFrame containing cv2_isi by neuron """ return ( df.groupby(spiketrain_col) .apply(lambda x: spiketimes.statistics.cv2_isi(x[spiketimes_col].values)) .reset_index() .rename(columns={0: "cv2_isi"}) )
[docs]def fraction_silent_by( df: pd.core.frame.DataFrame, binsize: float = 1, silent_threshold: float = 0.5, spiketimes_col: str = "spiketimes", spiketrain_col: str = "spiketrain", t_start: float = None, t_stop: float = None, ): """ Estimate the fraction of time a spiketrain was inactivate. Estimate calculated by binning spikes into time bins and calculating the proportion of spikes falling below a specified threshold. Args: df: A pandas DataFrame containing spiketimes indexed by spiketrain binsize: The time period in seconds to use when binning spikes. spiketimes_col: The label of the column containing spiketimes spiketrain_col: The label of the column identifying the spiketrain responsible for the spike t_start: Time point at which to start. Defaults to time of first spike in df. t_stop: Maximum timepoint. Defaults to last spike in df. Returns: A pandas DataFrame containing fraction silent estimates by neuron. """ if t_start is None: t_start = df[spiketimes_col].min() if not t_stop: t_stop = df[spiketimes_col].max() fs = 1 / binsize return ( binned_spiketrain( df, spiketimes_col=spiketimes_col, spiketrain_col=spiketrain_col, fs=fs, t_start=t_start, t_stop=t_stop, ) .groupby(spiketrain_col) .apply(lambda x: np.mean(x["spike_count"] > silent_threshold)) .reset_index() .rename(columns={0: "fraction_silent"}) )
[docs]def auc_roc_test_by( df: pd.core.frame.DataFrame, n_boot: int = 1000, return_distance_from_chance: bool = False, spikecount_col: str = "spike_count", spiketrain_col: str = "spiketrain", condition_col: str = "cond", ): """ Calculates the Area Under the Receiver Operating Characteristic Curve of spike counts for each spiketrain. The AUCROC can be used as a metric of the separability of two distrobutions. Each spiketrain must have been recorded in both conditions during multiple trials. Significance tested using a permutation test. Args: df: A pandas DataFrame containing spiketimes indexed by spiketrain n_boot: The number of permutation replicates to draw. spikecount_col: The label of the column containing spikecounts spiketrain_col: The label of the column identifying the spiketrain responsible for the spike condition_col: A categorical column containing 0 for the baseline condition and 1 for the experimental condition return_distance_from_chance: If True, returns distance from 0.5 Returns: A pandas DataFrame containing one row per spiketrain with columns {'spiketrain', 'AUCROC', 'p'} """ return ( df.groupby(spiketrain_col) .apply( lambda x: pd.Series( spiketimes.statistics.auc_roc_test( x[spikecount_col].values, x[condition_col].values, n_boot=n_boot, return_distance_from_chance=return_distance_from_chance, ) ) ) .reset_index() .rename(columns={0: "AUCROC", 1: "p"}) )
[docs]def diffmeans_test_by( df: pd.core.frame.DataFrame, n_boot: int = 1000, spikecount_col: str = "spike_count", spiketrain_col: str = "spiketrain", condition_col: str = "cond", ): """ Calculates the difference between means of spike counts for each spike in a data frame and also tests significance using a permutation test. Args: df: A pandas DataFrame containing spiketimes indexed by spiketrain n_boot: The number of permutation replicates to draw. spikecount_col: The label of the column containing spikecounts spiketrain_col: The label of the column identifying the spiketrain responsible for the spike condition_col: A categorical column containing 0 for the baseline condition and 1 for the experimental condition Returns: A pandas DataFrame containing one row per spiketrain with columns {'spiketrain', 'diff_of_means', 'p'} """ return ( df.groupby(spiketrain_col) .apply( lambda x: pd.Series( spiketimes.statistics.diffmeans_test( x[spikecount_col].values, x[condition_col].values, n_boot=n_boot, ) ) ) .reset_index() .rename(columns={0: "diff_of_means", 1: "p"}) )