Source code for spiketimes.df.baseline

import pandas as pd
import numpy as np
import spiketimes.df.binning


[docs]def zscore_standardise_by( df: pd.core.frame.DataFrame, baseline_start_stop: np.ndarray, spiketrain_col: str = "spiketrain", time_col: str = "time", data_col: str = "spike_count", returned_colname: str = "zscore", ): """ For each spiketrain, convert a data column to zscores using only data from the baseline period. Args: df: A pandas DataFrame containing multiple data points per spiketrain baseline_start_stop: A numpy array containing the starting and ending time of the baseline period. spiketrain_col: The column containing spiketrain identifiers time_col: The column containing time points data_col: The column containing data to be zscore standardised returned_colname: Returns: A copy of the passed DataFrame with an additional column containing zscores """ dfb = _create_baseline_df( df, baseline_start_stop, data_col="spike_count", time_col=time_col ) dfb = ( dfb.groupby(spiketrain_col) .apply( lambda x: pd.Series({"mean": x[data_col].mean(), "std": x[data_col].std()}) ) .reset_index() ) df = pd.merge(df, dfb, on=spiketrain_col) return df.assign( **{returned_colname: df[data_col].subtract(df["mean"]).divide(df["std"])} ).drop(["mean", "std"], axis=1)
def _create_baseline_df(df, baseline_start_stop: np.ndarray, data_col, time_col="time"): """ Subset a pandas DataFrame to contain only data from the baseline period. """ df = ( spiketimes.df.binning.which_bin( df=df, spiketimes_col=time_col, bin_edges=baseline_start_stop ) .drop("bin_values", axis=1) .rename(columns={"bin_idx": "is_baseline"}) ) df["is_baseline"] = df["is_baseline"].map({0: 1, 1: 0}) return df[df["is_baseline"] == 1].drop("is_baseline", axis=1)