Source code for spiketimes.statistics

import pandas as pd
import numpy as np
from scipy.ndimage import gaussian_filter1d
from scipy.stats import variation, zmap
from sklearn.metrics import roc_auc_score
from .binning import binned_spiketrain
import mlxtend.evaluate


[docs]def ifr( spiketrain: np.ndarray, fs: float, t_start: float = None, t_stop: float = None, sigma: float = None, as_df: float = False, ): """ Estimate instantaneous firing rate at a regular sampling rate. Args: spiketrain: A numpy array of spiketimes in seconds fs: The sampling rate at which to estimate firing rate t_start: If specified, only returns times after this point t_stop: If specified, only returns times before this point sigma: Parameter controling degree of smooting of firing rate estimates. Set to 0 for no smoothing. as_df: Whether to return results as pandas DataFrame Returns: time_bins, ifr """ if len(spiketrain) <= 3: if as_df: return pd.DataFrame({"time": [], "ifr": []}) return np.nan if t_start is None: t_start = np.min(spiketrain) if t_stop is None: t_stop = np.max(spiketrain) df = binned_spiketrain(spiketrain, fs, t_stop=t_stop, t_start=t_start, as_df=True) df["spike_count"] = df["spike_count"].divide(1 / fs) # smoothing if sigma is None: # infer sigma sigma = _optimal_sigma(spike_counts=df["spike_count"].values) if sigma != 0: smoothed = gaussian_filter1d(df["spike_count"], sigma) else: # no smoothing smoothed = df["spike_count"].values if not as_df: return df["time"].values, smoothed else: return pd.DataFrame({"time": df["time"], "ifr": smoothed})
def _optimal_sigma(spike_counts): return 3 * np.std(spike_counts)
[docs]def mean_firing_rate( spiketrain: np.ndarray, t_start: float = None, t_stop: float = None ): """ Calculate the mean firing rate of a spiketrain by summing total spikes and dividing by time. Args: spiketrain: A numpy array of spiketimes in seconds t_start: The start of the time over which mean firing rate will be calculated. Defaults to the timepoint of the first spike t_end: The end of the time over which mean firing rate will be calculated defaults to the timepoint of the last spike Returns: The mean firing rate of the spiketrain """ if len(spiketrain) <= 3: return np.nan if t_start is None: t_start = np.min(spiketrain) if t_stop is None: t_stop = np.max(spiketrain) total_spikes = len(spiketrain) total_time = t_stop - t_start return total_spikes / total_time
[docs]def mean_firing_rate_ifr( spiketrain: np.ndarray, fs: float, sigma: float = None, t_start: float = None, t_stop: float = None, exclude_below: float = None, ): """ Calculate the mean firing rate of a spiketrain by first estimating the instantaneous firing rate at some sampling interval and then taking the median. Usefull when firing rate during active periods only is desired. Args: spiketrain: A numpy array of spiketimes in seconds fs: The sampling rate at which instantaneous rate is calculated sigma: A oarameter controlling the degree of smoothing level of firing rate estimates. t_start: The start of the time over which mean firing rate will be calculated. Defaults to the timepoint of the first spike t_end: The end of the time over which mean firing rate will be calculated defaults to the timepoint of the last spike min_fr: If specified, calculates mean over time bins where mean firing rate is greater than this threshold Returns: A firing rate estiamte """ if len(spiketrain) <= 3: return np.nan if t_start is None: t_start = np.min(spiketrain) if t_stop is None: t_stop = np.max(spiketrain) _, ifr_ = ifr( spiketrain, fs=fs, sigma=sigma, t_start=t_start, t_stop=t_stop, as_df=False ) if exclude_below: ifr_ = ifr_[ifr_ >= exclude_below] return np.nanmedian(ifr_)
[docs]def inter_spike_intervals(spiketrain: np.ndarray): """ Get the inter-spike-intervals of a spiketrain Args: spiketrain: a numpy array spike times Returns: A numpy array of inter spike intervals """ return np.diff(np.sort(spiketrain))
[docs]def cov(arr: np.ndarray, axis: int = 0): """ Computes the coefficient of variation. Simply wraps the scipy.stats variation function Args: arr: A numpy array axis: The axis over which to calculate cov Returns: The coefficient of variation """ return variation(arr, axis=axis)
[docs]def cv2(arr: np.ndarray): """ Compute the cv2 of an array. The Cv2 is a metric similar to the coefficient of variation but which includes a correction for signals which slowly fluctuate over time. [Suitable for long term neuronal recordings.] Args: arr: A numpy array on which to calculate cv2 Returns: The cv2 of the array """ return 2 * np.mean(np.absolute(np.diff(arr)) / (arr[:-1] + arr[1:]))
[docs]def cv2_isi(spiketrain: np.ndarray): """ Calculate the cv2 of inter-spike-intervals. The Cv2 is a metric similar to the coefficient of variation but which includes a correction for signals which slowly fluctuate over time. [Suitable for long term neuronal recordings.] Args: spiketrain: a numpy array of spiketimes in seconds Returns: The cv2 of inter-spike-intervaks value """ return cv2(inter_spike_intervals(spiketrain))
[docs]def cv_isi(spiketrain: np.ndarray): """ Caluclate the coefficient of variation of inter-spike-intervals. Args: spiketrain: A numpy array of spiketimes Returns: The coeffient of variation of inter-spike-intervals """ return cov(inter_spike_intervals(spiketrain))
[docs]def auc_roc( spike_counts: np.ndarray, which_condition: np.ndarray, return_distance_from_chance: bool = False, ): """ Calculates the Area Under the Receiver Operating Characteristic Curve of spike counts from two conditions. The AUCROC can be used as a metric of the separability of two distrobutions. Args: spike_counts: A numpy array containing spike counts from both conditions which_condition: A numpy array indicating the condition of each spike count entry. 0s for the first condition, and 1s for the second condition. For example, if the first two elements in spike_counts were from the first condition and the third element from the second condition, which_condition would contain [0, 0, 1] return_distance_from_chance: If True, returns distance from 0.5 Returns: The AUCROC score """ score = roc_auc_score(which_condition, spike_counts) if return_distance_from_chance: return abs(0.5 - score) else: return score
[docs]def zscore_standardise(to_standardise: np.ndarray, baseline: np.ndarray): """ Convert an array to zscores calculated on a baseline period. Args: to_normalise: A numpy array to be converted to zscores. baseline: A numpy array containing data used to calculate the mean and standard deviation for zscore conversions. This is usually (but not necessarily) a subsection of to_standardise Returns: A numpy array of zscores """ return zmap(to_standardise, baseline)
[docs]def diffmeans_test( spike_counts: np.ndarray, which_condition: np.ndarray, n_boot: int = 1000, ): """ Calculates the difference between means of spike counts and tests significance using a permutation test. Args: spike_counts: A numpy array containing spike counts from both conditions which_condition: A numpy array indicating the condition of each spike count entry. 0s for the first condition, and 1s for the second condition. For example, if the first two elements in spike_counts were from the first condition and the third element from the second condition, which_condition would contain [0, 0, 1] 1 is subtracted from 2 in the difference calculation. n_boot: The number of bootstrap replicates to draw Returns: Difference of means, p """ c1 = spike_counts[which_condition == 0] c2 = spike_counts[which_condition == 1] observed = c2.mean() - c1.mean() p = mlxtend.evaluate.permutation_test( c1, c2, func=_diffmeans_1d, method="approximate", num_rounds=n_boot, ) return observed, p
def _diffmeans_1d(arr1, arr2): """ Calculate the absolute values of the difference between means of two arrays. """ return np.abs(arr2.mean() - arr1.mean())
[docs]def auc_roc_test( spike_counts: np.ndarray, which_condition: np.ndarray, n_boot: int = 1000, return_distance_from_chance: bool = False, ): """ Calculates the Area Under the Receiver Operating Characteristic Curve of spike counts from two conditions. Also Test significance. The AUCROC can be used as a metric of the separability of two distrobutions. Significance tested using a permutation test. Args: spike_counts: A numpy array containing spike counts from both conditions which_condition: A numpy array indicating the condition of each spike count entry. 0s for the first condition, and 1s for the second condition. For example, if the first two elements in spike_counts were from the first condition and the third element from the second condition, which_condition would contain [0, 0, 1] n_boot: The number of bootstrap replicates to draw return_distance_from_chance: If True, returns distance from 0.5 Returns: The AUCROC score, p """ score = roc_auc_score(which_condition, spike_counts) if return_distance_from_chance: score = abs(0.5 - score) c1 = spike_counts[which_condition == 0] c2 = spike_counts[which_condition == 1] p = mlxtend.evaluate.permutation_test( c1, c2, func=_aucroc_1d, method="approximate", num_rounds=n_boot, ) return score, p
def _aucroc_1d(arr1, arr2, return_distance_from_chance=True): """ Calculate AUC ROC on two arrays. """ data = np.concatenate((arr1, arr2)) y_true = np.concatenate((np.zeros(len(arr1)), np.ones(len(arr2)))) score = roc_auc_score(y_true, data) if return_distance_from_chance: return abs(0.5 - score) else: return score