From aa4340d97f554c7434a73a58a02fc4ee994f33dc Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Mon, 17 Nov 2025 10:59:52 -0800
Subject: [PATCH 001/150] Moved readtsq to tdt_step2.py.

---
 src/guppy/saveStoresList.py | 21 ++-------------------
 src/guppy/tdt_step2.py      | 26 ++++++++++++++++++++++++++
 2 files changed, 28 insertions(+), 19 deletions(-)
 create mode 100644 src/guppy/tdt_step2.py

diff --git a/src/guppy/saveStoresList.py b/src/guppy/saveStoresList.py
index ed3a7cf..a837aa9 100755
--- a/src/guppy/saveStoresList.py
+++ b/src/guppy/saveStoresList.py
@@ -21,6 +21,8 @@
 import panel as pn
 from numpy import float32, float64, int32, int64, uint16
 
+from guppy.tdt_step2 import readtsq
+
 # hv.extension()
 pn.extension()
 
@@ -86,25 +88,6 @@ def check_header(df):
     return arr, check_float
 
 
-# function to read 'tsq' file
-def readtsq(filepath):
-    names = ("size", "type", "name", "chan", "sort_code", "timestamp", "fp_loc", "strobe", "format", "frequency")
-    formats = (int32, int32, "S4", uint16, uint16, float64, int64, float64, int32, float32)
-    offsets = 0, 4, 8, 12, 14, 16, 24, 24, 32, 36
-    tsq_dtype = np.dtype({"names": names, "formats": formats, "offsets": offsets}, align=True)
-    path = glob.glob(os.path.join(filepath, "*.tsq"))
-    if len(path) > 1:
-        logger.error("Two tsq files are present at the location.")
-        raise Exception("Two tsq files are present at the location.")
-    elif len(path) == 0:
-        return 0
-    else:
-        path = path[0]
-    tsq = np.fromfile(path, dtype=tsq_dtype)
-    df = pd.DataFrame(tsq)
-    return df
-
-
 # function to show GUI and save
 def saveStorenames(inputParameters, data, event_name, flag, filepath):
 
diff --git a/src/guppy/tdt_step2.py b/src/guppy/tdt_step2.py
new file mode 100644
index 0000000..09456a7
--- /dev/null
+++ b/src/guppy/tdt_step2.py
@@ -0,0 +1,26 @@
+import glob
+import logging
+import os
+import numpy as np
+from numpy import float32, float64, int32, int64, uint16
+import pandas as pd
+
+logger = logging.getLogger(__name__)
+
+# function to read 'tsq' file
+def readtsq(filepath):
+    names = ("size", "type", "name", "chan", "sort_code", "timestamp", "fp_loc", "strobe", "format", "frequency")
+    formats = (int32, int32, "S4", uint16, uint16, float64, int64, float64, int32, float32)
+    offsets = 0, 4, 8, 12, 14, 16, 24, 24, 32, 36
+    tsq_dtype = np.dtype({"names": names, "formats": formats, "offsets": offsets}, align=True)
+    path = glob.glob(os.path.join(filepath, "*.tsq"))
+    if len(path) > 1:
+        logger.error("Two tsq files are present at the location.")
+        raise Exception("Two tsq files are present at the location.")
+    elif len(path) == 0:
+        return 0
+    else:
+        path = path[0]
+    tsq = np.fromfile(path, dtype=tsq_dtype)
+    df = pd.DataFrame(tsq)
+    return df
\ No newline at end of file

From c868823138945399df1fb2b043f1026b2099e3b6 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Mon, 17 Nov 2025 11:19:34 -0800
Subject: [PATCH 002/150] Moved import_np_doric_csv to np_doric_csv_step2.py.

---
 src/guppy/np_doric_csv_step2.py | 523 ++++++++++++++++++++++++++++++++
 src/guppy/saveStoresList.py     | 498 +-----------------------------
 2 files changed, 524 insertions(+), 497 deletions(-)
 create mode 100644 src/guppy/np_doric_csv_step2.py

diff --git a/src/guppy/np_doric_csv_step2.py b/src/guppy/np_doric_csv_step2.py
new file mode 100644
index 0000000..d06dcc1
--- /dev/null
+++ b/src/guppy/np_doric_csv_step2.py
@@ -0,0 +1,523 @@
+import glob
+import logging
+import os
+import tkinter as tk
+from tkinter import StringVar, messagebox, ttk
+
+import h5py
+import numpy as np
+import pandas as pd
+import panel as pn
+
+pn.extension()
+
+logger = logging.getLogger(__name__)
+
+# function to see if there are 'csv' files present
+# and recognize type of 'csv' files either from
+# Neurophotometrics, Doric systems or custom made 'csv' files
+# and read data accordingly
+def import_np_doric_csv(filepath, isosbestic_control, num_ch, inputParameters=None):
+
+    logger.debug("If it exists, importing either NPM or Doric or csv file based on the structure of file")
+    # Headless configuration (used to avoid any UI prompts when running tests)
+    headless = bool(os.environ.get("GUPPY_BASE_DIR"))
+    npm_timestamp_column_name = None
+    npm_time_unit = None
+    npm_split_events = None
+    if isinstance(inputParameters, dict):
+        npm_timestamp_column_name = inputParameters.get("npm_timestamp_column_name")
+        npm_time_unit = inputParameters.get("npm_time_unit", "seconds")
+        npm_split_events = inputParameters.get("npm_split_events", True)
+    path = sorted(glob.glob(os.path.join(filepath, "*.csv"))) + sorted(glob.glob(os.path.join(filepath, "*.doric")))
+    path_chev = glob.glob(os.path.join(filepath, "*chev*"))
+    path_chod = glob.glob(os.path.join(filepath, "*chod*"))
+    path_chpr = glob.glob(os.path.join(filepath, "*chpr*"))
+    path_event = glob.glob(os.path.join(filepath, "event*"))
+    # path_sig = glob.glob(os.path.join(filepath, 'sig*'))
+    path_chev_chod_event = path_chev + path_chod + path_event + path_chpr
+
+    path = sorted(list(set(path) - set(path_chev_chod_event)))
+    flag = "None"
+    event_from_filename = []
+    flag_arr = []
+    for i in range(len(path)):
+        dirname = os.path.dirname(path[i])
+        ext = os.path.basename(path[i]).split(".")[-1]
+        if ext == "doric":
+            key_names = read_doric(path[i])
+            event_from_filename.extend(key_names)
+            flag = "doric_doric"
+        else:
+            df = pd.read_csv(path[i], header=None, nrows=2, index_col=False, dtype=str)
+            df = df.dropna(axis=1, how="all")
+            df_arr = np.array(df).flatten()
+            check_all_str = []
+            for element in df_arr:
+                try:
+                    float(element)
+                except:
+                    check_all_str.append(i)
+            if len(check_all_str) == len(df_arr):
+                df = pd.read_csv(path[i], header=1, index_col=False, nrows=10)
+                df = df.drop(["Time(s)"], axis=1)
+                event_from_filename.extend(list(df.columns))
+                flag = "doric_csv"
+                logger.info(flag)
+            else:
+                df = pd.read_csv(path[i], index_col=False)
+            # with warnings.catch_warnings():
+            #     warnings.simplefilter("error")
+            #     try:
+            #         df = pd.read_csv(path[i], index_col=False, dtype=float)
+            #     except:
+            #         df = pd.read_csv(path[i], header=1, index_col=False, nrows=10)   # to make process faster reading just first 10 rows
+            #         df = df.drop(['Time(s)'], axis=1)
+            #         event_from_filename.extend(list(df.columns))
+            #         flag = 'doric_csv'
+        if flag == "doric_csv" or flag == "doric_doric":
+            continue
+        else:
+            colnames, value = check_header(df)
+            # logger.info(len(colnames), len(value))
+
+            # check dataframe structure and read data accordingly
+            if len(value) > 0:
+                columns_isstr = False
+                df = pd.read_csv(path[i], header=None)
+                cols = np.array(list(df.columns), dtype=str)
+            else:
+                df = df
+                columns_isstr = True
+                cols = np.array(list(df.columns), dtype=str)
+            # check the structure of dataframe and assign flag to the type of file
+            if len(cols) == 1:
+                if cols[0].lower() != "timestamps":
+                    logger.error("\033[1m" + "Column name should be timestamps (all lower-cases)" + "\033[0m")
+                    raise Exception("\033[1m" + "Column name should be timestamps (all lower-cases)" + "\033[0m")
+                else:
+                    flag = "event_csv"
+            elif len(cols) == 3:
+                arr1 = np.array(["timestamps", "data", "sampling_rate"])
+                arr2 = np.char.lower(np.array(cols))
+                if (np.sort(arr1) == np.sort(arr2)).all() == False:
+                    logger.error(
+                        "\033[1m"
+                        + "Column names should be timestamps, data and sampling_rate (all lower-cases)"
+                        + "\033[0m"
+                    )
+                    raise Exception(
+                        "\033[1m"
+                        + "Column names should be timestamps, data and sampling_rate (all lower-cases)"
+                        + "\033[0m"
+                    )
+                else:
+                    flag = "data_csv"
+            elif len(cols) == 2:
+                flag = "event_or_data_np"
+            elif len(cols) >= 2:
+                flag = "data_np"
+            else:
+                logger.error("Number of columns in csv file does not make sense.")
+                raise Exception("Number of columns in csv file does not make sense.")
+
+            if columns_isstr == True and (
+                "flags" in np.char.lower(np.array(cols)) or "ledstate" in np.char.lower(np.array(cols))
+            ):
+                flag = flag + "_v2"
+            else:
+                flag = flag
+
+            # used assigned flags to process the files and read the data
+            if flag == "event_or_data_np":
+                arr = list(df.iloc[:, 1])
+                check_float = [True for i in arr if isinstance(i, float)]
+                if len(arr) == len(check_float) and columns_isstr == False:
+                    flag = "data_np"
+                elif columns_isstr == True and ("value" in np.char.lower(np.array(cols))):
+                    flag = "event_np"
+                else:
+                    flag = "event_np"
+            else:
+                pass
+
+            flag_arr.append(flag)
+            logger.info(flag)
+            if flag == "event_csv" or flag == "data_csv":
+                name = os.path.basename(path[i]).split(".")[0]
+                event_from_filename.append(name)
+            elif flag == "data_np":
+                file = f"file{str(i)}_"
+                df, indices_dict, num_channels = decide_indices(file, df, flag, num_ch)
+                keys = list(indices_dict.keys())
+                for k in range(len(keys)):
+                    for j in range(df.shape[1]):
+                        if j == 0:
+                            timestamps = df.iloc[:, j][indices_dict[keys[k]]]
+                            # timestamps_odd = df.iloc[:,j][odd_indices]
+                        else:
+                            d = dict()
+                            d["timestamps"] = timestamps
+                            d["data"] = df.iloc[:, j][indices_dict[keys[k]]]
+
+                            df_ch = pd.DataFrame(d)
+                            df_ch.to_csv(os.path.join(dirname, keys[k] + str(j) + ".csv"), index=False)
+                            event_from_filename.append(keys[k] + str(j))
+
+            elif flag == "event_np":
+                type_val = np.array(df.iloc[:, 1])
+                type_val_unique = np.unique(type_val)
+                if headless:
+                    response = 1 if bool(npm_split_events) else 0
+                else:
+                    window = tk.Tk()
+                    if len(type_val_unique) > 1:
+                        response = messagebox.askyesno(
+                            "Multiple event TTLs",
+                            "Based on the TTL file,\
+                                                                            it looks like TTLs \
+                                                                            belongs to multiple behavior type. \
+                                                                            Do you want to create multiple files for each \
+                                                                            behavior type ?",
+                        )
+                    else:
+                        response = 0
+                    window.destroy()
+                if response == 1:
+                    timestamps = np.array(df.iloc[:, 0])
+                    for j in range(len(type_val_unique)):
+                        idx = np.where(type_val == type_val_unique[j])
+                        d = dict()
+                        d["timestamps"] = timestamps[idx]
+                        df_new = pd.DataFrame(d)
+                        df_new.to_csv(os.path.join(dirname, "event" + str(type_val_unique[j]) + ".csv"), index=False)
+                        event_from_filename.append("event" + str(type_val_unique[j]))
+                else:
+                    timestamps = np.array(df.iloc[:, 0])
+                    d = dict()
+                    d["timestamps"] = timestamps
+                    df_new = pd.DataFrame(d)
+                    df_new.to_csv(os.path.join(dirname, "event" + str(0) + ".csv"), index=False)
+                    event_from_filename.append("event" + str(0))
+            else:
+                file = f"file{str(i)}_"
+                df, ts_unit = decide_ts_unit_for_npm(
+                    df, timestamp_column_name=npm_timestamp_column_name, time_unit=npm_time_unit, headless=headless
+                )
+                df, indices_dict, num_channels = decide_indices(file, df, flag)
+                keys = list(indices_dict.keys())
+                for k in range(len(keys)):
+                    for j in range(df.shape[1]):
+                        if j == 0:
+                            timestamps = df.iloc[:, j][indices_dict[keys[k]]]
+                            # timestamps_odd = df.iloc[:,j][odd_indices]
+                        else:
+                            d = dict()
+                            d["timestamps"] = timestamps
+                            d["data"] = df.iloc[:, j][indices_dict[keys[k]]]
+
+                            df_ch = pd.DataFrame(d)
+                            df_ch.to_csv(os.path.join(dirname, keys[k] + str(j) + ".csv"), index=False)
+                            event_from_filename.append(keys[k] + str(j))
+
+            path_chev = glob.glob(os.path.join(filepath, "*chev*"))
+            path_chod = glob.glob(os.path.join(filepath, "*chod*"))
+            path_chpr = glob.glob(os.path.join(filepath, "*chpr*"))
+            path_event = glob.glob(os.path.join(filepath, "event*"))
+            # path_sig = glob.glob(os.path.join(filepath, 'sig*'))
+            path_chev_chod_chpr = [path_chev, path_chod, path_chpr]
+            if (
+                ("data_np_v2" in flag_arr or "data_np" in flag_arr)
+                and ("event_np" in flag_arr)
+                and (i == len(path) - 1)
+            ) or (
+                ("data_np_v2" in flag_arr or "data_np" in flag_arr) and (i == len(path) - 1)
+            ):  # i==len(path)-1 and or 'event_np' in flag
+                num_path_chev, num_path_chod, num_path_chpr = len(path_chev), len(path_chod), len(path_chpr)
+                arr_len, no_ch = [], []
+                for i in range(len(path_chev_chod_chpr)):
+                    if len(path_chev_chod_chpr[i]) > 0:
+                        arr_len.append(len(path_chev_chod_chpr[i]))
+                    else:
+                        continue
+
+                unique_arr_len = np.unique(np.array(arr_len))
+                if "data_np_v2" in flag_arr:
+                    if ts_unit == "seconds":
+                        divisor = 1
+                    elif ts_unit == "milliseconds":
+                        divisor = 1e3
+                    else:
+                        divisor = 1e6
+                else:
+                    divisor = 1000
+
+                for j in range(len(path_event)):
+                    df_event = pd.read_csv(path_event[j])
+                    df_chev = pd.read_csv(path_chev[0])
+                    df_event["timestamps"] = (df_event["timestamps"] - df_chev["timestamps"][0]) / divisor
+                    df_event.to_csv(path_event[j], index=False)
+                if unique_arr_len.shape[0] == 1:
+                    for j in range(len(path_chev)):
+                        if file + "chev" in indices_dict.keys():
+                            df_chev = pd.read_csv(path_chev[j])
+                            df_chev["timestamps"] = (df_chev["timestamps"] - df_chev["timestamps"][0]) / divisor
+                            df_chev["sampling_rate"] = np.full(df_chev.shape[0], np.nan)
+                            df_chev.at[0, "sampling_rate"] = df_chev.shape[0] / (
+                                df_chev["timestamps"].iloc[-1] - df_chev["timestamps"].iloc[0]
+                            )
+                            df_chev.to_csv(path_chev[j], index=False)
+
+                        if file + "chod" in indices_dict.keys():
+                            df_chod = pd.read_csv(path_chod[j])
+                            df_chod["timestamps"] = df_chev["timestamps"]
+                            df_chod["sampling_rate"] = np.full(df_chod.shape[0], np.nan)
+                            df_chod.at[0, "sampling_rate"] = df_chev["sampling_rate"][0]
+                            df_chod.to_csv(path_chod[j], index=False)
+
+                        if file + "chpr" in indices_dict.keys():
+                            df_chpr = pd.read_csv(path_chpr[j])
+                            df_chpr["timestamps"] = df_chev["timestamps"]
+                            df_chpr["sampling_rate"] = np.full(df_chpr.shape[0], np.nan)
+                            df_chpr.at[0, "sampling_rate"] = df_chev["sampling_rate"][0]
+                            df_chpr.to_csv(path_chpr[j], index=False)
+                else:
+                    logger.error("Number of channels should be same for all regions.")
+                    raise Exception("Number of channels should be same for all regions.")
+            else:
+                pass
+    logger.info("Importing of either NPM or Doric or csv file is done.")
+    return event_from_filename, flag_arr
+
+# ----------------------------------------------------------------------------------------------------------------------
+# Functions that import_np_doric_csv uses
+# ----------------------------------------------------------------------------------------------------------------------
+
+def read_doric(filepath):
+    with h5py.File(filepath, "r") as f:
+        if "Traces" in list(f.keys()):
+            keys = access_keys_doricV1(f)
+        elif list(f.keys()) == ["Configurations", "DataAcquisition"]:
+            keys = access_keys_doricV6(f)
+
+    return keys
+
+
+def check_header(df):
+    arr = list(df.columns)
+    check_float = []
+    for i in arr:
+        try:
+            check_float.append(float(i))
+        except:
+            pass
+
+    return arr, check_float
+
+# function to decide indices of interleaved channels
+# in neurophotometrics data
+def decide_indices(file, df, flag, num_ch=2):
+    ch_name = [file + "chev", file + "chod", file + "chpr"]
+    if len(ch_name) < num_ch:
+        logger.error(
+            "Number of channels parameters in Input Parameters GUI is more than 3. \
+                    Looks like there are more than 3 channels in the file. Reading of these files\
+                    are not supported. Reach out to us if you get this error message."
+        )
+        raise Exception(
+            "Number of channels parameters in Input Parameters GUI is more than 3. \
+                         Looks like there are more than 3 channels in the file. Reading of these files\
+                         are not supported. Reach out to us if you get this error message."
+        )
+    if flag == "data_np":
+        indices_dict = dict()
+        for i in range(num_ch):
+            indices_dict[ch_name[i]] = np.arange(i, df.shape[0], num_ch)
+
+    else:
+        cols = np.array(list(df.columns))
+        if "flags" in np.char.lower(np.array(cols)):
+            arr = ["FrameCounter", "Flags"]
+            state = np.array(df["Flags"])
+        elif "ledstate" in np.char.lower(np.array(cols)):
+            arr = ["FrameCounter", "LedState"]
+            state = np.array(df["LedState"])
+        else:
+            logger.error(
+                "File type shows Neurophotometrics newer version \
+                    data but column names does not have Flags or LedState"
+            )
+            raise Exception(
+                "File type shows Neurophotometrics newer version \
+                            data but column names does not have Flags or LedState"
+            )
+
+        num_ch, ch = check_channels(state)
+        indices_dict = dict()
+        for i in range(num_ch):
+            first_occurrence = np.where(state == ch[i])[0]
+            indices_dict[ch_name[i]] = np.arange(first_occurrence[0], df.shape[0], num_ch)
+
+        df = df.drop(arr, axis=1)
+
+    return df, indices_dict, num_ch
+
+
+# function to decide NPM timestamps unit (seconds, ms or us)
+def decide_ts_unit_for_npm(df, timestamp_column_name=None, time_unit=None, headless=False):
+    col_names = np.array(list(df.columns))
+    col_names_ts = [""]
+    for name in col_names:
+        if "timestamp" in name.lower():
+            col_names_ts.append(name)
+
+    ts_unit = "seconds"
+    if len(col_names_ts) > 2:
+        # Headless path: auto-select column/unit without any UI
+        if headless:
+            if timestamp_column_name is not None:
+                assert (
+                    timestamp_column_name in col_names_ts
+                ), f"Provided timestamp_column_name '{timestamp_column_name}' not found in columns {col_names_ts[1:]}"
+                chosen = timestamp_column_name
+            else:
+                chosen = col_names_ts[1]
+            df.insert(1, "Timestamp", df[chosen])
+            df = df.drop(col_names_ts[1:], axis=1)
+            valid_units = {"seconds", "milliseconds", "microseconds"}
+            ts_unit = time_unit if (isinstance(time_unit, str) and time_unit in valid_units) else "seconds"
+            return df, ts_unit
+        # def comboBoxSelected(event):
+        #    logger.info(event.widget.get())
+
+        window = tk.Tk()
+        window.title("Select appropriate options for timestamps")
+        window.geometry("500x200")
+        holdComboboxValues = dict()
+
+        timestamps_label = ttk.Label(window, text="Select which timestamps to use : ").grid(
+            row=0, column=1, pady=25, padx=25
+        )
+        holdComboboxValues["timestamps"] = StringVar()
+        timestamps_combo = ttk.Combobox(window, values=col_names_ts, textvariable=holdComboboxValues["timestamps"])
+        timestamps_combo.grid(row=0, column=2, pady=25, padx=25)
+        timestamps_combo.current(0)
+        # timestamps_combo.bind("<<ComboboxSelected>>", comboBoxSelected)
+
+        time_unit_label = ttk.Label(window, text="Select timestamps unit : ").grid(row=1, column=1, pady=25, padx=25)
+        holdComboboxValues["time_unit"] = StringVar()
+        time_unit_combo = ttk.Combobox(
+            window, values=["", "seconds", "milliseconds", "microseconds"], textvariable=holdComboboxValues["time_unit"]
+        )
+        time_unit_combo.grid(row=1, column=2, pady=25, padx=25)
+        time_unit_combo.current(0)
+        # time_unit_combo.bind("<<ComboboxSelected>>", comboBoxSelected)
+        window.lift()
+        window.after(500, lambda: window.lift())
+        window.mainloop()
+
+        if holdComboboxValues["timestamps"].get():
+            df.insert(1, "Timestamp", df[holdComboboxValues["timestamps"].get()])
+            df = df.drop(col_names_ts[1:], axis=1)
+        else:
+            messagebox.showerror(
+                "All options not selected",
+                "All the options for timestamps \
+                                                            were not selected. Please select appropriate options",
+            )
+            logger.error(
+                "All the options for timestamps \
+                        were not selected. Please select appropriate options"
+            )
+            raise Exception(
+                "All the options for timestamps \
+                            were not selected. Please select appropriate options"
+            )
+        if holdComboboxValues["time_unit"].get():
+            if holdComboboxValues["time_unit"].get() == "seconds":
+                ts_unit = holdComboboxValues["time_unit"].get()
+            elif holdComboboxValues["time_unit"].get() == "milliseconds":
+                ts_unit = holdComboboxValues["time_unit"].get()
+            else:
+                ts_unit = holdComboboxValues["time_unit"].get()
+        else:
+            messagebox.showerror(
+                "All options not selected",
+                "All the options for timestamps \
+                                                            were not selected. Please select appropriate options",
+            )
+            logger.error(
+                "All the options for timestamps \
+                        were not selected. Please select appropriate options"
+            )
+            raise Exception(
+                "All the options for timestamps \
+                            were not selected. Please select appropriate options"
+            )
+    else:
+        pass
+
+    return df, ts_unit
+
+
+# ----------------------------------------------------------------------------------------------------------------------
+# Functions that read_doric uses
+# ----------------------------------------------------------------------------------------------------------------------
+
+def access_keys_doricV6(doric_file):
+    data = [doric_file["DataAcquisition"]]
+    res = []
+    while len(data) != 0:
+        members = len(data)
+        while members != 0:
+            members -= 1
+            data, last_element = separate_last_element(data)
+            if isinstance(last_element, h5py.Dataset) and not last_element.name.endswith("/Time"):
+                res.append(last_element.name)
+            elif isinstance(last_element, h5py.Group):
+                data.extend(reversed([last_element[k] for k in last_element.keys()]))
+
+    keys = []
+    for element in res:
+        sep_values = element.split("/")
+        if sep_values[-1] == "Values":
+            keys.append(f"{sep_values[-3]}/{sep_values[-2]}")
+        else:
+            keys.append(f"{sep_values[-2]}/{sep_values[-1]}")
+
+    return keys
+
+
+def access_keys_doricV1(doric_file):
+    keys = list(doric_file["Traces"]["Console"].keys())
+    keys.remove("Time(s)")
+
+    return keys
+
+# ----------------------------------------------------------------------------------------------------------------------
+# Functions that decide_indices uses
+# ----------------------------------------------------------------------------------------------------------------------
+
+# check flag consistency in neurophotometrics data
+def check_channels(state):
+    state = state.astype(int)
+    unique_state = np.unique(state[2:12])
+    if unique_state.shape[0] > 3:
+        logger.error(
+            "Looks like there are more than 3 channels in the file. Reading of these files\
+                        are not supported. Reach out to us if you get this error message."
+        )
+        raise Exception(
+            "Looks like there are more than 3 channels in the file. Reading of these files\
+                        are not supported. Reach out to us if you get this error message."
+        )
+
+    return unique_state.shape[0], unique_state
+
+
+# ----------------------------------------------------------------------------------------------------------------------
+# Functions that access_keys_doricV6 uses
+# ----------------------------------------------------------------------------------------------------------------------
+def separate_last_element(arr):
+    l = arr[-1]
+    return arr[:-1], l
\ No newline at end of file
diff --git a/src/guppy/saveStoresList.py b/src/guppy/saveStoresList.py
index a837aa9..d7380ec 100755
--- a/src/guppy/saveStoresList.py
+++ b/src/guppy/saveStoresList.py
@@ -22,6 +22,7 @@
 from numpy import float32, float64, int32, int64, uint16
 
 from guppy.tdt_step2 import readtsq
+from guppy.np_doric_csv_step2 import import_np_doric_csv
 
 # hv.extension()
 pn.extension()
@@ -76,18 +77,6 @@ def make_dir(filepath):
     return op
 
 
-def check_header(df):
-    arr = list(df.columns)
-    check_float = []
-    for i in arr:
-        try:
-            check_float.append(float(i))
-        except:
-            pass
-
-    return arr, check_float
-
-
 # function to show GUI and save
 def saveStorenames(inputParameters, data, event_name, flag, filepath):
 
@@ -582,491 +571,6 @@ def save_button(event=None):
     template.show(port=number)
 
 
-# check flag consistency in neurophotometrics data
-def check_channels(state):
-    state = state.astype(int)
-    unique_state = np.unique(state[2:12])
-    if unique_state.shape[0] > 3:
-        logger.error(
-            "Looks like there are more than 3 channels in the file. Reading of these files\
-                        are not supported. Reach out to us if you get this error message."
-        )
-        raise Exception(
-            "Looks like there are more than 3 channels in the file. Reading of these files\
-                        are not supported. Reach out to us if you get this error message."
-        )
-
-    return unique_state.shape[0], unique_state
-
-
-# function to decide NPM timestamps unit (seconds, ms or us)
-def decide_ts_unit_for_npm(df, timestamp_column_name=None, time_unit=None, headless=False):
-    col_names = np.array(list(df.columns))
-    col_names_ts = [""]
-    for name in col_names:
-        if "timestamp" in name.lower():
-            col_names_ts.append(name)
-
-    ts_unit = "seconds"
-    if len(col_names_ts) > 2:
-        # Headless path: auto-select column/unit without any UI
-        if headless:
-            if timestamp_column_name is not None:
-                assert (
-                    timestamp_column_name in col_names_ts
-                ), f"Provided timestamp_column_name '{timestamp_column_name}' not found in columns {col_names_ts[1:]}"
-                chosen = timestamp_column_name
-            else:
-                chosen = col_names_ts[1]
-            df.insert(1, "Timestamp", df[chosen])
-            df = df.drop(col_names_ts[1:], axis=1)
-            valid_units = {"seconds", "milliseconds", "microseconds"}
-            ts_unit = time_unit if (isinstance(time_unit, str) and time_unit in valid_units) else "seconds"
-            return df, ts_unit
-        # def comboBoxSelected(event):
-        #    logger.info(event.widget.get())
-
-        window = tk.Tk()
-        window.title("Select appropriate options for timestamps")
-        window.geometry("500x200")
-        holdComboboxValues = dict()
-
-        timestamps_label = ttk.Label(window, text="Select which timestamps to use : ").grid(
-            row=0, column=1, pady=25, padx=25
-        )
-        holdComboboxValues["timestamps"] = StringVar()
-        timestamps_combo = ttk.Combobox(window, values=col_names_ts, textvariable=holdComboboxValues["timestamps"])
-        timestamps_combo.grid(row=0, column=2, pady=25, padx=25)
-        timestamps_combo.current(0)
-        # timestamps_combo.bind("<<ComboboxSelected>>", comboBoxSelected)
-
-        time_unit_label = ttk.Label(window, text="Select timestamps unit : ").grid(row=1, column=1, pady=25, padx=25)
-        holdComboboxValues["time_unit"] = StringVar()
-        time_unit_combo = ttk.Combobox(
-            window, values=["", "seconds", "milliseconds", "microseconds"], textvariable=holdComboboxValues["time_unit"]
-        )
-        time_unit_combo.grid(row=1, column=2, pady=25, padx=25)
-        time_unit_combo.current(0)
-        # time_unit_combo.bind("<<ComboboxSelected>>", comboBoxSelected)
-        window.lift()
-        window.after(500, lambda: window.lift())
-        window.mainloop()
-
-        if holdComboboxValues["timestamps"].get():
-            df.insert(1, "Timestamp", df[holdComboboxValues["timestamps"].get()])
-            df = df.drop(col_names_ts[1:], axis=1)
-        else:
-            messagebox.showerror(
-                "All options not selected",
-                "All the options for timestamps \
-                                                            were not selected. Please select appropriate options",
-            )
-            logger.error(
-                "All the options for timestamps \
-                        were not selected. Please select appropriate options"
-            )
-            raise Exception(
-                "All the options for timestamps \
-                            were not selected. Please select appropriate options"
-            )
-        if holdComboboxValues["time_unit"].get():
-            if holdComboboxValues["time_unit"].get() == "seconds":
-                ts_unit = holdComboboxValues["time_unit"].get()
-            elif holdComboboxValues["time_unit"].get() == "milliseconds":
-                ts_unit = holdComboboxValues["time_unit"].get()
-            else:
-                ts_unit = holdComboboxValues["time_unit"].get()
-        else:
-            messagebox.showerror(
-                "All options not selected",
-                "All the options for timestamps \
-                                                            were not selected. Please select appropriate options",
-            )
-            logger.error(
-                "All the options for timestamps \
-                        were not selected. Please select appropriate options"
-            )
-            raise Exception(
-                "All the options for timestamps \
-                            were not selected. Please select appropriate options"
-            )
-    else:
-        pass
-
-    return df, ts_unit
-
-
-# function to decide indices of interleaved channels
-# in neurophotometrics data
-def decide_indices(file, df, flag, num_ch=2):
-    ch_name = [file + "chev", file + "chod", file + "chpr"]
-    if len(ch_name) < num_ch:
-        logger.error(
-            "Number of channels parameters in Input Parameters GUI is more than 3. \
-                    Looks like there are more than 3 channels in the file. Reading of these files\
-                    are not supported. Reach out to us if you get this error message."
-        )
-        raise Exception(
-            "Number of channels parameters in Input Parameters GUI is more than 3. \
-                         Looks like there are more than 3 channels in the file. Reading of these files\
-                         are not supported. Reach out to us if you get this error message."
-        )
-    if flag == "data_np":
-        indices_dict = dict()
-        for i in range(num_ch):
-            indices_dict[ch_name[i]] = np.arange(i, df.shape[0], num_ch)
-
-    else:
-        cols = np.array(list(df.columns))
-        if "flags" in np.char.lower(np.array(cols)):
-            arr = ["FrameCounter", "Flags"]
-            state = np.array(df["Flags"])
-        elif "ledstate" in np.char.lower(np.array(cols)):
-            arr = ["FrameCounter", "LedState"]
-            state = np.array(df["LedState"])
-        else:
-            logger.error(
-                "File type shows Neurophotometrics newer version \
-                    data but column names does not have Flags or LedState"
-            )
-            raise Exception(
-                "File type shows Neurophotometrics newer version \
-                            data but column names does not have Flags or LedState"
-            )
-
-        num_ch, ch = check_channels(state)
-        indices_dict = dict()
-        for i in range(num_ch):
-            first_occurrence = np.where(state == ch[i])[0]
-            indices_dict[ch_name[i]] = np.arange(first_occurrence[0], df.shape[0], num_ch)
-
-        df = df.drop(arr, axis=1)
-
-    return df, indices_dict, num_ch
-
-
-def separate_last_element(arr):
-    l = arr[-1]
-    return arr[:-1], l
-
-
-def access_keys_doricV6(doric_file):
-    data = [doric_file["DataAcquisition"]]
-    res = []
-    while len(data) != 0:
-        members = len(data)
-        while members != 0:
-            members -= 1
-            data, last_element = separate_last_element(data)
-            if isinstance(last_element, h5py.Dataset) and not last_element.name.endswith("/Time"):
-                res.append(last_element.name)
-            elif isinstance(last_element, h5py.Group):
-                data.extend(reversed([last_element[k] for k in last_element.keys()]))
-
-    keys = []
-    for element in res:
-        sep_values = element.split("/")
-        if sep_values[-1] == "Values":
-            keys.append(f"{sep_values[-3]}/{sep_values[-2]}")
-        else:
-            keys.append(f"{sep_values[-2]}/{sep_values[-1]}")
-
-    return keys
-
-
-def access_keys_doricV1(doric_file):
-    keys = list(doric_file["Traces"]["Console"].keys())
-    keys.remove("Time(s)")
-
-    return keys
-
-
-def read_doric(filepath):
-    with h5py.File(filepath, "r") as f:
-        if "Traces" in list(f.keys()):
-            keys = access_keys_doricV1(f)
-        elif list(f.keys()) == ["Configurations", "DataAcquisition"]:
-            keys = access_keys_doricV6(f)
-
-    return keys
-
-
-# function to see if there are 'csv' files present
-# and recognize type of 'csv' files either from
-# Neurophotometrics, Doric systems or custom made 'csv' files
-# and read data accordingly
-def import_np_doric_csv(filepath, isosbestic_control, num_ch, inputParameters=None):
-
-    logger.debug("If it exists, importing either NPM or Doric or csv file based on the structure of file")
-    # Headless configuration (used to avoid any UI prompts when running tests)
-    headless = bool(os.environ.get("GUPPY_BASE_DIR"))
-    npm_timestamp_column_name = None
-    npm_time_unit = None
-    npm_split_events = None
-    if isinstance(inputParameters, dict):
-        npm_timestamp_column_name = inputParameters.get("npm_timestamp_column_name")
-        npm_time_unit = inputParameters.get("npm_time_unit", "seconds")
-        npm_split_events = inputParameters.get("npm_split_events", True)
-    path = sorted(glob.glob(os.path.join(filepath, "*.csv"))) + sorted(glob.glob(os.path.join(filepath, "*.doric")))
-    path_chev = glob.glob(os.path.join(filepath, "*chev*"))
-    path_chod = glob.glob(os.path.join(filepath, "*chod*"))
-    path_chpr = glob.glob(os.path.join(filepath, "*chpr*"))
-    path_event = glob.glob(os.path.join(filepath, "event*"))
-    # path_sig = glob.glob(os.path.join(filepath, 'sig*'))
-    path_chev_chod_event = path_chev + path_chod + path_event + path_chpr
-
-    path = sorted(list(set(path) - set(path_chev_chod_event)))
-    flag = "None"
-    event_from_filename = []
-    flag_arr = []
-    for i in range(len(path)):
-        dirname = os.path.dirname(path[i])
-        ext = os.path.basename(path[i]).split(".")[-1]
-        if ext == "doric":
-            key_names = read_doric(path[i])
-            event_from_filename.extend(key_names)
-            flag = "doric_doric"
-        else:
-            df = pd.read_csv(path[i], header=None, nrows=2, index_col=False, dtype=str)
-            df = df.dropna(axis=1, how="all")
-            df_arr = np.array(df).flatten()
-            check_all_str = []
-            for element in df_arr:
-                try:
-                    float(element)
-                except:
-                    check_all_str.append(i)
-            if len(check_all_str) == len(df_arr):
-                df = pd.read_csv(path[i], header=1, index_col=False, nrows=10)
-                df = df.drop(["Time(s)"], axis=1)
-                event_from_filename.extend(list(df.columns))
-                flag = "doric_csv"
-                logger.info(flag)
-            else:
-                df = pd.read_csv(path[i], index_col=False)
-            # with warnings.catch_warnings():
-            #     warnings.simplefilter("error")
-            #     try:
-            #         df = pd.read_csv(path[i], index_col=False, dtype=float)
-            #     except:
-            #         df = pd.read_csv(path[i], header=1, index_col=False, nrows=10)   # to make process faster reading just first 10 rows
-            #         df = df.drop(['Time(s)'], axis=1)
-            #         event_from_filename.extend(list(df.columns))
-            #         flag = 'doric_csv'
-        if flag == "doric_csv" or flag == "doric_doric":
-            continue
-        else:
-            colnames, value = check_header(df)
-            # logger.info(len(colnames), len(value))
-
-            # check dataframe structure and read data accordingly
-            if len(value) > 0:
-                columns_isstr = False
-                df = pd.read_csv(path[i], header=None)
-                cols = np.array(list(df.columns), dtype=str)
-            else:
-                df = df
-                columns_isstr = True
-                cols = np.array(list(df.columns), dtype=str)
-            # check the structure of dataframe and assign flag to the type of file
-            if len(cols) == 1:
-                if cols[0].lower() != "timestamps":
-                    logger.error("\033[1m" + "Column name should be timestamps (all lower-cases)" + "\033[0m")
-                    raise Exception("\033[1m" + "Column name should be timestamps (all lower-cases)" + "\033[0m")
-                else:
-                    flag = "event_csv"
-            elif len(cols) == 3:
-                arr1 = np.array(["timestamps", "data", "sampling_rate"])
-                arr2 = np.char.lower(np.array(cols))
-                if (np.sort(arr1) == np.sort(arr2)).all() == False:
-                    logger.error(
-                        "\033[1m"
-                        + "Column names should be timestamps, data and sampling_rate (all lower-cases)"
-                        + "\033[0m"
-                    )
-                    raise Exception(
-                        "\033[1m"
-                        + "Column names should be timestamps, data and sampling_rate (all lower-cases)"
-                        + "\033[0m"
-                    )
-                else:
-                    flag = "data_csv"
-            elif len(cols) == 2:
-                flag = "event_or_data_np"
-            elif len(cols) >= 2:
-                flag = "data_np"
-            else:
-                logger.error("Number of columns in csv file does not make sense.")
-                raise Exception("Number of columns in csv file does not make sense.")
-
-            if columns_isstr == True and (
-                "flags" in np.char.lower(np.array(cols)) or "ledstate" in np.char.lower(np.array(cols))
-            ):
-                flag = flag + "_v2"
-            else:
-                flag = flag
-
-            # used assigned flags to process the files and read the data
-            if flag == "event_or_data_np":
-                arr = list(df.iloc[:, 1])
-                check_float = [True for i in arr if isinstance(i, float)]
-                if len(arr) == len(check_float) and columns_isstr == False:
-                    flag = "data_np"
-                elif columns_isstr == True and ("value" in np.char.lower(np.array(cols))):
-                    flag = "event_np"
-                else:
-                    flag = "event_np"
-            else:
-                pass
-
-            flag_arr.append(flag)
-            logger.info(flag)
-            if flag == "event_csv" or flag == "data_csv":
-                name = os.path.basename(path[i]).split(".")[0]
-                event_from_filename.append(name)
-            elif flag == "data_np":
-                file = f"file{str(i)}_"
-                df, indices_dict, num_channels = decide_indices(file, df, flag, num_ch)
-                keys = list(indices_dict.keys())
-                for k in range(len(keys)):
-                    for j in range(df.shape[1]):
-                        if j == 0:
-                            timestamps = df.iloc[:, j][indices_dict[keys[k]]]
-                            # timestamps_odd = df.iloc[:,j][odd_indices]
-                        else:
-                            d = dict()
-                            d["timestamps"] = timestamps
-                            d["data"] = df.iloc[:, j][indices_dict[keys[k]]]
-
-                            df_ch = pd.DataFrame(d)
-                            df_ch.to_csv(os.path.join(dirname, keys[k] + str(j) + ".csv"), index=False)
-                            event_from_filename.append(keys[k] + str(j))
-
-            elif flag == "event_np":
-                type_val = np.array(df.iloc[:, 1])
-                type_val_unique = np.unique(type_val)
-                if headless:
-                    response = 1 if bool(npm_split_events) else 0
-                else:
-                    window = tk.Tk()
-                    if len(type_val_unique) > 1:
-                        response = messagebox.askyesno(
-                            "Multiple event TTLs",
-                            "Based on the TTL file,\
-                                                                            it looks like TTLs \
-                                                                            belongs to multiple behavior type. \
-                                                                            Do you want to create multiple files for each \
-                                                                            behavior type ?",
-                        )
-                    else:
-                        response = 0
-                    window.destroy()
-                if response == 1:
-                    timestamps = np.array(df.iloc[:, 0])
-                    for j in range(len(type_val_unique)):
-                        idx = np.where(type_val == type_val_unique[j])
-                        d = dict()
-                        d["timestamps"] = timestamps[idx]
-                        df_new = pd.DataFrame(d)
-                        df_new.to_csv(os.path.join(dirname, "event" + str(type_val_unique[j]) + ".csv"), index=False)
-                        event_from_filename.append("event" + str(type_val_unique[j]))
-                else:
-                    timestamps = np.array(df.iloc[:, 0])
-                    d = dict()
-                    d["timestamps"] = timestamps
-                    df_new = pd.DataFrame(d)
-                    df_new.to_csv(os.path.join(dirname, "event" + str(0) + ".csv"), index=False)
-                    event_from_filename.append("event" + str(0))
-            else:
-                file = f"file{str(i)}_"
-                df, ts_unit = decide_ts_unit_for_npm(
-                    df, timestamp_column_name=npm_timestamp_column_name, time_unit=npm_time_unit, headless=headless
-                )
-                df, indices_dict, num_channels = decide_indices(file, df, flag)
-                keys = list(indices_dict.keys())
-                for k in range(len(keys)):
-                    for j in range(df.shape[1]):
-                        if j == 0:
-                            timestamps = df.iloc[:, j][indices_dict[keys[k]]]
-                            # timestamps_odd = df.iloc[:,j][odd_indices]
-                        else:
-                            d = dict()
-                            d["timestamps"] = timestamps
-                            d["data"] = df.iloc[:, j][indices_dict[keys[k]]]
-
-                            df_ch = pd.DataFrame(d)
-                            df_ch.to_csv(os.path.join(dirname, keys[k] + str(j) + ".csv"), index=False)
-                            event_from_filename.append(keys[k] + str(j))
-
-            path_chev = glob.glob(os.path.join(filepath, "*chev*"))
-            path_chod = glob.glob(os.path.join(filepath, "*chod*"))
-            path_chpr = glob.glob(os.path.join(filepath, "*chpr*"))
-            path_event = glob.glob(os.path.join(filepath, "event*"))
-            # path_sig = glob.glob(os.path.join(filepath, 'sig*'))
-            path_chev_chod_chpr = [path_chev, path_chod, path_chpr]
-            if (
-                ("data_np_v2" in flag_arr or "data_np" in flag_arr)
-                and ("event_np" in flag_arr)
-                and (i == len(path) - 1)
-            ) or (
-                ("data_np_v2" in flag_arr or "data_np" in flag_arr) and (i == len(path) - 1)
-            ):  # i==len(path)-1 and or 'event_np' in flag
-                num_path_chev, num_path_chod, num_path_chpr = len(path_chev), len(path_chod), len(path_chpr)
-                arr_len, no_ch = [], []
-                for i in range(len(path_chev_chod_chpr)):
-                    if len(path_chev_chod_chpr[i]) > 0:
-                        arr_len.append(len(path_chev_chod_chpr[i]))
-                    else:
-                        continue
-
-                unique_arr_len = np.unique(np.array(arr_len))
-                if "data_np_v2" in flag_arr:
-                    if ts_unit == "seconds":
-                        divisor = 1
-                    elif ts_unit == "milliseconds":
-                        divisor = 1e3
-                    else:
-                        divisor = 1e6
-                else:
-                    divisor = 1000
-
-                for j in range(len(path_event)):
-                    df_event = pd.read_csv(path_event[j])
-                    df_chev = pd.read_csv(path_chev[0])
-                    df_event["timestamps"] = (df_event["timestamps"] - df_chev["timestamps"][0]) / divisor
-                    df_event.to_csv(path_event[j], index=False)
-                if unique_arr_len.shape[0] == 1:
-                    for j in range(len(path_chev)):
-                        if file + "chev" in indices_dict.keys():
-                            df_chev = pd.read_csv(path_chev[j])
-                            df_chev["timestamps"] = (df_chev["timestamps"] - df_chev["timestamps"][0]) / divisor
-                            df_chev["sampling_rate"] = np.full(df_chev.shape[0], np.nan)
-                            df_chev.at[0, "sampling_rate"] = df_chev.shape[0] / (
-                                df_chev["timestamps"].iloc[-1] - df_chev["timestamps"].iloc[0]
-                            )
-                            df_chev.to_csv(path_chev[j], index=False)
-
-                        if file + "chod" in indices_dict.keys():
-                            df_chod = pd.read_csv(path_chod[j])
-                            df_chod["timestamps"] = df_chev["timestamps"]
-                            df_chod["sampling_rate"] = np.full(df_chod.shape[0], np.nan)
-                            df_chod.at[0, "sampling_rate"] = df_chev["sampling_rate"][0]
-                            df_chod.to_csv(path_chod[j], index=False)
-
-                        if file + "chpr" in indices_dict.keys():
-                            df_chpr = pd.read_csv(path_chpr[j])
-                            df_chpr["timestamps"] = df_chev["timestamps"]
-                            df_chpr["sampling_rate"] = np.full(df_chpr.shape[0], np.nan)
-                            df_chpr.at[0, "sampling_rate"] = df_chev["sampling_rate"][0]
-                            df_chpr.to_csv(path_chpr[j], index=False)
-                else:
-                    logger.error("Number of channels should be same for all regions.")
-                    raise Exception("Number of channels should be same for all regions.")
-            else:
-                pass
-    logger.info("Importing of either NPM or Doric or csv file is done.")
-    return event_from_filename, flag_arr
-
 
 # function to read input parameters and run the saveStorenames function
 def execute(inputParameters):

From a06cae4233657f52a77d5935928bcab9bceb6de7 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Mon, 17 Nov 2025 12:04:16 -0800
Subject: [PATCH 003/150] Split import_csv out from import_np_doric_csv

---
 src/guppy/csv_step2.py      | 99 +++++++++++++++++++++++++++++++++++++
 src/guppy/saveStoresList.py | 16 ++++--
 2 files changed, 111 insertions(+), 4 deletions(-)
 create mode 100644 src/guppy/csv_step2.py

diff --git a/src/guppy/csv_step2.py b/src/guppy/csv_step2.py
new file mode 100644
index 0000000..4d9b800
--- /dev/null
+++ b/src/guppy/csv_step2.py
@@ -0,0 +1,99 @@
+import glob
+import logging
+import os
+import numpy as np
+import pandas as pd
+
+logger = logging.getLogger(__name__)
+
+def check_header(df):
+    arr = list(df.columns)
+    check_float = []
+    for i in arr:
+        try:
+            check_float.append(float(i))
+        except:
+            pass
+
+    return arr, check_float
+
+def import_csv_step2(filepath):
+    logger.debug("If it exists, importing either NPM or Doric or csv file based on the structure of file")
+    path = sorted(glob.glob(os.path.join(filepath, "*.csv")))
+
+    path = sorted(list(set(path)))
+    flag = "None"
+    event_from_filename = []
+    flag_arr = []
+    for i in range(len(path)):
+        ext = os.path.basename(path[i]).split(".")[-1]
+        assert ext == "csv", "Only .csv files are supported by import_csv function."
+        df = pd.read_csv(path[i], header=None, nrows=2, index_col=False, dtype=str)
+        df = df.dropna(axis=1, how="all")
+        df_arr = np.array(df).flatten()
+        check_all_str = []
+        for element in df_arr:
+            try:
+                float(element)
+            except:
+                check_all_str.append(i)
+        assert len(check_all_str) != len(df_arr), "This file appears to be doric .csv. This function only supports standard .csv files."
+        df = pd.read_csv(path[i], index_col=False)
+
+        _, value = check_header(df)
+
+        # check dataframe structure and read data accordingly
+        if len(value) > 0:
+            columns_isstr = False
+            df = pd.read_csv(path[i], header=None)
+            cols = np.array(list(df.columns), dtype=str)
+        else:
+            df = df
+            columns_isstr = True
+            cols = np.array(list(df.columns), dtype=str)
+        # check the structure of dataframe and assign flag to the type of file
+        if len(cols) == 1:
+            if cols[0].lower() != "timestamps":
+                logger.error("\033[1m" + "Column name should be timestamps (all lower-cases)" + "\033[0m")
+                raise Exception("\033[1m" + "Column name should be timestamps (all lower-cases)" + "\033[0m")
+            else:
+                flag = "event_csv"
+        elif len(cols) == 3:
+            arr1 = np.array(["timestamps", "data", "sampling_rate"])
+            arr2 = np.char.lower(np.array(cols))
+            if (np.sort(arr1) == np.sort(arr2)).all() == False:
+                logger.error(
+                    "\033[1m"
+                    + "Column names should be timestamps, data and sampling_rate (all lower-cases)"
+                    + "\033[0m"
+                )
+                raise Exception(
+                    "\033[1m"
+                    + "Column names should be timestamps, data and sampling_rate (all lower-cases)"
+                    + "\033[0m"
+                )
+            else:
+                flag = "data_csv"
+        elif len(cols) == 2:
+            raise ValueError("Data appears to be Neurophotometrics csv. Please use import_npm_csv function to import the data.")
+        elif len(cols) >= 2:
+            raise ValueError("Data appears to be Neurophotometrics csv. Please use import_npm_csv function to import the data.")
+        else:
+            logger.error("Number of columns in csv file does not make sense.")
+            raise Exception("Number of columns in csv file does not make sense.")
+
+        if columns_isstr == True and (
+            "flags" in np.char.lower(np.array(cols)) or "ledstate" in np.char.lower(np.array(cols))
+        ):
+            flag = flag + "_v2"
+        else:
+            flag = flag
+
+        flag_arr.append(flag)
+        logger.info(flag)
+        assert flag == "event_csv" or flag == "data_csv", "This function only supports standard event_csv and data_csv files."
+        name = os.path.basename(path[i]).split(".")[0]
+        event_from_filename.append(name)
+
+    logger.info("Importing of csv file is done.")
+    return event_from_filename, flag_arr
\ No newline at end of file
diff --git a/src/guppy/saveStoresList.py b/src/guppy/saveStoresList.py
index d7380ec..1f6bae7 100755
--- a/src/guppy/saveStoresList.py
+++ b/src/guppy/saveStoresList.py
@@ -21,8 +21,10 @@
 import panel as pn
 from numpy import float32, float64, int32, int64, uint16
 
+from guppy.readTevTsq import import_csv
 from guppy.tdt_step2 import readtsq
 from guppy.np_doric_csv_step2 import import_np_doric_csv
+from guppy.csv_step2 import import_csv_step2
 
 # hv.extension()
 pn.extension()
@@ -585,10 +587,16 @@ def execute(inputParameters):
     try:
         for i in folderNames:
             filepath = os.path.join(inputParameters["abspath"], i)
-            data = readtsq(filepath)
-            event_name, flag = import_np_doric_csv(
-                filepath, isosbestic_control, num_ch, inputParameters=inputParameters
-            )
+            modality = "csv" # TODO: ask for modality from the user
+            if modality == "tdt":
+                data = readtsq(filepath)
+                event_name, flag = None, None
+            elif modality == "csv":
+                data = 0
+                event_name, flag = import_csv_step2(filepath)
+            else:
+                raise ValueError("Modality not recognized. Please use 'tdt' or 'csv'.")
+            
             saveStorenames(inputParameters, data, event_name, flag, filepath)
         logger.info("#" * 400)
     except Exception as e:

From 66d60e2aabf95eac48556d747dd8bbf2a26b0dd6 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Mon, 17 Nov 2025 12:58:33 -0800
Subject: [PATCH 004/150] Fixed TDT

---
 src/guppy/saveStoresList.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/guppy/saveStoresList.py b/src/guppy/saveStoresList.py
index 1f6bae7..392c04e 100755
--- a/src/guppy/saveStoresList.py
+++ b/src/guppy/saveStoresList.py
@@ -587,10 +587,10 @@ def execute(inputParameters):
     try:
         for i in folderNames:
             filepath = os.path.join(inputParameters["abspath"], i)
-            modality = "csv" # TODO: ask for modality from the user
+            modality = "tdt" # TODO: ask for modality from the user
             if modality == "tdt":
                 data = readtsq(filepath)
-                event_name, flag = None, None
+                event_name, flag = [], []
             elif modality == "csv":
                 data = 0
                 event_name, flag = import_csv_step2(filepath)

From 4f4e1c921da919e28d5d595827f8bf397c74c5e4 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Mon, 17 Nov 2025 13:15:47 -0800
Subject: [PATCH 005/150] Split import_doric out from import_np_doric_csv

---
 src/guppy/doric_step2.py    | 92 +++++++++++++++++++++++++++++++++++++
 src/guppy/saveStoresList.py |  6 ++-
 2 files changed, 97 insertions(+), 1 deletion(-)
 create mode 100644 src/guppy/doric_step2.py

diff --git a/src/guppy/doric_step2.py b/src/guppy/doric_step2.py
new file mode 100644
index 0000000..69022aa
--- /dev/null
+++ b/src/guppy/doric_step2.py
@@ -0,0 +1,92 @@
+import glob
+import logging
+import os
+import tkinter as tk
+from tkinter import StringVar, messagebox, ttk
+
+import h5py
+import numpy as np
+import pandas as pd
+import panel as pn
+
+pn.extension()
+
+logger = logging.getLogger(__name__)
+
+def import_doric(filepath):
+
+    logger.debug("If it exists, importing Doric file based on the structure of file")
+    path = sorted(glob.glob(os.path.join(filepath, "*.csv"))) + sorted(glob.glob(os.path.join(filepath, "*.doric")))
+
+    path = sorted(list(set(path)))
+    flag = "None"
+    event_from_filename = []
+    flag_arr = []
+    for i in range(len(path)):
+        ext = os.path.basename(path[i]).split(".")[-1]
+        if ext == "doric":
+            key_names = read_doric(path[i])
+            event_from_filename.extend(key_names)
+            flag = "doric_doric"
+        else:
+            df = pd.read_csv(path[i], header=None, nrows=2, index_col=False, dtype=str)
+            df = df.dropna(axis=1, how="all")
+            df_arr = np.array(df).flatten()
+            check_all_str = []
+            for element in df_arr:
+                try:
+                    float(element)
+                except:
+                    check_all_str.append(i)
+            assert len(check_all_str) == len(df_arr), "This file appears to be standard .csv. This function only supports doric .csv files."
+            df = pd.read_csv(path[i], header=1, index_col=False, nrows=10)
+            df = df.drop(["Time(s)"], axis=1)
+            event_from_filename.extend(list(df.columns))
+            flag = "doric_csv"
+            logger.info(flag)
+    logger.info("Importing of Doric file is done.")
+    return event_from_filename, flag_arr
+
+
+def read_doric(filepath):
+    with h5py.File(filepath, "r") as f:
+        if "Traces" in list(f.keys()):
+            keys = access_keys_doricV1(f)
+        elif list(f.keys()) == ["Configurations", "DataAcquisition"]:
+            keys = access_keys_doricV6(f)
+
+    return keys
+
+def access_keys_doricV6(doric_file):
+    data = [doric_file["DataAcquisition"]]
+    res = []
+    while len(data) != 0:
+        members = len(data)
+        while members != 0:
+            members -= 1
+            data, last_element = separate_last_element(data)
+            if isinstance(last_element, h5py.Dataset) and not last_element.name.endswith("/Time"):
+                res.append(last_element.name)
+            elif isinstance(last_element, h5py.Group):
+                data.extend(reversed([last_element[k] for k in last_element.keys()]))
+
+    keys = []
+    for element in res:
+        sep_values = element.split("/")
+        if sep_values[-1] == "Values":
+            keys.append(f"{sep_values[-3]}/{sep_values[-2]}")
+        else:
+            keys.append(f"{sep_values[-2]}/{sep_values[-1]}")
+
+    return keys
+
+
+def access_keys_doricV1(doric_file):
+    keys = list(doric_file["Traces"]["Console"].keys())
+    keys.remove("Time(s)")
+
+    return keys
+
+def separate_last_element(arr):
+    l = arr[-1]
+    return arr[:-1], l
\ No newline at end of file
diff --git a/src/guppy/saveStoresList.py b/src/guppy/saveStoresList.py
index 392c04e..26065e4 100755
--- a/src/guppy/saveStoresList.py
+++ b/src/guppy/saveStoresList.py
@@ -25,6 +25,7 @@
 from guppy.tdt_step2 import readtsq
 from guppy.np_doric_csv_step2 import import_np_doric_csv
 from guppy.csv_step2 import import_csv_step2
+from guppy.doric_step2 import import_doric
 
 # hv.extension()
 pn.extension()
@@ -587,13 +588,16 @@ def execute(inputParameters):
     try:
         for i in folderNames:
             filepath = os.path.join(inputParameters["abspath"], i)
-            modality = "tdt" # TODO: ask for modality from the user
+            modality = "doric" # TODO: ask for modality from the user
             if modality == "tdt":
                 data = readtsq(filepath)
                 event_name, flag = [], []
             elif modality == "csv":
                 data = 0
                 event_name, flag = import_csv_step2(filepath)
+            elif modality == "doric":
+                data = 0
+                event_name, flag = import_doric(filepath)
             else:
                 raise ValueError("Modality not recognized. Please use 'tdt' or 'csv'.")
             

From 341d77d722844c63fdbfd4c189e446adf390c3f0 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Mon, 17 Nov 2025 13:18:35 -0800
Subject: [PATCH 006/150] Removed unnecessary imports

---
 src/guppy/doric_step2.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/src/guppy/doric_step2.py b/src/guppy/doric_step2.py
index 69022aa..bf402d1 100644
--- a/src/guppy/doric_step2.py
+++ b/src/guppy/doric_step2.py
@@ -1,15 +1,10 @@
 import glob
 import logging
 import os
-import tkinter as tk
-from tkinter import StringVar, messagebox, ttk
 
 import h5py
 import numpy as np
 import pandas as pd
-import panel as pn
-
-pn.extension()
 
 logger = logging.getLogger(__name__)
 

From 0bcd4fee319ba485519bd71f75a7c756bea36157 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Mon, 17 Nov 2025 13:40:01 -0800
Subject: [PATCH 007/150] Split import_npm out from import_np_doric_csv

---
 src/guppy/npm_step2.py      | 408 ++++++++++++++++++++++++++++++++++++
 src/guppy/saveStoresList.py |   6 +-
 2 files changed, 413 insertions(+), 1 deletion(-)
 create mode 100644 src/guppy/npm_step2.py

diff --git a/src/guppy/npm_step2.py b/src/guppy/npm_step2.py
new file mode 100644
index 0000000..f0fafec
--- /dev/null
+++ b/src/guppy/npm_step2.py
@@ -0,0 +1,408 @@
+import glob
+import logging
+import os
+import tkinter as tk
+from tkinter import StringVar, messagebox, ttk
+
+import numpy as np
+import pandas as pd
+import panel as pn
+
+pn.extension()
+
+logger = logging.getLogger(__name__)
+
+def import_npm(filepath, num_ch, inputParameters=None):
+
+    logger.debug("If it exists, importing NPM file based on the structure of file")
+    # Headless configuration (used to avoid any UI prompts when running tests)
+    headless = bool(os.environ.get("GUPPY_BASE_DIR"))
+    npm_timestamp_column_name = None
+    npm_time_unit = None
+    npm_split_events = None
+    if isinstance(inputParameters, dict):
+        npm_timestamp_column_name = inputParameters.get("npm_timestamp_column_name")
+        npm_time_unit = inputParameters.get("npm_time_unit", "seconds")
+        npm_split_events = inputParameters.get("npm_split_events", True)
+    path = sorted(glob.glob(os.path.join(filepath, "*.csv"))) + sorted(glob.glob(os.path.join(filepath, "*.doric")))
+    path_chev = glob.glob(os.path.join(filepath, "*chev*"))
+    path_chod = glob.glob(os.path.join(filepath, "*chod*"))
+    path_chpr = glob.glob(os.path.join(filepath, "*chpr*"))
+    path_event = glob.glob(os.path.join(filepath, "event*"))
+    # path_sig = glob.glob(os.path.join(filepath, 'sig*')) # TODO: what is this for?
+    path_chev_chod_event = path_chev + path_chod + path_event + path_chpr
+
+    path = sorted(list(set(path) - set(path_chev_chod_event)))
+    flag = "None"
+    event_from_filename = []
+    flag_arr = []
+    for i in range(len(path)):
+        dirname = os.path.dirname(path[i])
+        ext = os.path.basename(path[i]).split(".")[-1]
+        assert ext != "doric", "Doric files are not supported by import_npm function."
+        df = pd.read_csv(path[i], header=None, nrows=2, index_col=False, dtype=str)
+        df = df.dropna(axis=1, how="all")
+        df_arr = np.array(df).flatten()
+        check_all_str = []
+        for element in df_arr:
+            try:
+                float(element)
+            except:
+                check_all_str.append(i)
+        assert len(check_all_str) != len(df_arr), "This file appears to be doric .csv. This function only supports NPM .csv files."
+        df = pd.read_csv(path[i], index_col=False)
+        _, value = check_header(df)
+
+        # check dataframe structure and read data accordingly
+        if len(value) > 0:
+            columns_isstr = False
+            df = pd.read_csv(path[i], header=None)
+            cols = np.array(list(df.columns), dtype=str)
+        else:
+            df = df
+            columns_isstr = True
+            cols = np.array(list(df.columns), dtype=str)
+        # check the structure of dataframe and assign flag to the type of file
+        assert len(cols) != 1, "File appears to be event .csv. This function only supports NPM .csv files."
+        assert len(cols) != 3, "File appears to be data .csv. This function only supports NPM .csv files."
+        if len(cols) == 2:
+            flag = "event_or_data_np"
+        elif len(cols) >= 2:
+            flag = "data_np"
+        else:
+            logger.error("Number of columns in csv file does not make sense.")
+            raise Exception("Number of columns in csv file does not make sense.")
+
+        if columns_isstr == True and (
+            "flags" in np.char.lower(np.array(cols)) or "ledstate" in np.char.lower(np.array(cols))
+        ):
+            flag = flag + "_v2"
+        else:
+            flag = flag
+
+        # used assigned flags to process the files and read the data
+        if flag == "event_or_data_np":
+            arr = list(df.iloc[:, 1])
+            check_float = [True for i in arr if isinstance(i, float)]
+            if len(arr) == len(check_float) and columns_isstr == False:
+                flag = "data_np"
+            elif columns_isstr == True and ("value" in np.char.lower(np.array(cols))):
+                flag = "event_np"
+            else:
+                flag = "event_np"
+
+        flag_arr.append(flag)
+        logger.info(flag)
+        if flag == "data_np":
+            file = f"file{str(i)}_"
+            df, indices_dict, _ = decide_indices(file, df, flag, num_ch)
+            keys = list(indices_dict.keys())
+            for k in range(len(keys)):
+                for j in range(df.shape[1]):
+                    if j == 0:
+                        timestamps = df.iloc[:, j][indices_dict[keys[k]]]
+                        # timestamps_odd = df.iloc[:,j][odd_indices]
+                    else:
+                        d = dict()
+                        d["timestamps"] = timestamps
+                        d["data"] = df.iloc[:, j][indices_dict[keys[k]]]
+
+                        df_ch = pd.DataFrame(d)
+                        df_ch.to_csv(os.path.join(dirname, keys[k] + str(j) + ".csv"), index=False)
+                        event_from_filename.append(keys[k] + str(j))
+
+        elif flag == "event_np":
+            type_val = np.array(df.iloc[:, 1])
+            type_val_unique = np.unique(type_val)
+            if headless:
+                response = 1 if bool(npm_split_events) else 0
+            else:
+                window = tk.Tk()
+                if len(type_val_unique) > 1:
+                    response = messagebox.askyesno(
+                        "Multiple event TTLs",
+                        "Based on the TTL file,\
+                                                                        it looks like TTLs \
+                                                                        belongs to multiple behavior type. \
+                                                                        Do you want to create multiple files for each \
+                                                                        behavior type ?",
+                    )
+                else:
+                    response = 0
+                window.destroy()
+            if response == 1:
+                timestamps = np.array(df.iloc[:, 0])
+                for j in range(len(type_val_unique)):
+                    idx = np.where(type_val == type_val_unique[j])
+                    d = dict()
+                    d["timestamps"] = timestamps[idx]
+                    df_new = pd.DataFrame(d)
+                    df_new.to_csv(os.path.join(dirname, "event" + str(type_val_unique[j]) + ".csv"), index=False)
+                    event_from_filename.append("event" + str(type_val_unique[j]))
+            else:
+                timestamps = np.array(df.iloc[:, 0])
+                d = dict()
+                d["timestamps"] = timestamps
+                df_new = pd.DataFrame(d)
+                df_new.to_csv(os.path.join(dirname, "event" + str(0) + ".csv"), index=False)
+                event_from_filename.append("event" + str(0))
+        else:
+            file = f"file{str(i)}_"
+            df, ts_unit = decide_ts_unit_for_npm(
+                df, timestamp_column_name=npm_timestamp_column_name, time_unit=npm_time_unit, headless=headless
+            )
+            df, indices_dict, _ = decide_indices(file, df, flag)
+            keys = list(indices_dict.keys())
+            for k in range(len(keys)):
+                for j in range(df.shape[1]):
+                    if j == 0:
+                        timestamps = df.iloc[:, j][indices_dict[keys[k]]]
+                        # timestamps_odd = df.iloc[:,j][odd_indices]
+                    else:
+                        d = dict()
+                        d["timestamps"] = timestamps
+                        d["data"] = df.iloc[:, j][indices_dict[keys[k]]]
+
+                        df_ch = pd.DataFrame(d)
+                        df_ch.to_csv(os.path.join(dirname, keys[k] + str(j) + ".csv"), index=False)
+                        event_from_filename.append(keys[k] + str(j))
+
+        path_chev = glob.glob(os.path.join(filepath, "*chev*"))
+        path_chod = glob.glob(os.path.join(filepath, "*chod*"))
+        path_chpr = glob.glob(os.path.join(filepath, "*chpr*"))
+        path_event = glob.glob(os.path.join(filepath, "event*"))
+        # path_sig = glob.glob(os.path.join(filepath, 'sig*'))
+        path_chev_chod_chpr = [path_chev, path_chod, path_chpr]
+        if (
+            ("data_np_v2" in flag_arr or "data_np" in flag_arr)
+            and ("event_np" in flag_arr)
+            and (i == len(path) - 1)
+        ) or (
+            ("data_np_v2" in flag_arr or "data_np" in flag_arr) and (i == len(path) - 1)
+        ):  # i==len(path)-1 and or 'event_np' in flag
+            num_path_chev, num_path_chod, num_path_chpr = len(path_chev), len(path_chod), len(path_chpr)
+            arr_len, no_ch = [], []
+            for i in range(len(path_chev_chod_chpr)):
+                if len(path_chev_chod_chpr[i]) > 0:
+                    arr_len.append(len(path_chev_chod_chpr[i]))
+                else:
+                    continue
+
+            unique_arr_len = np.unique(np.array(arr_len))
+            if "data_np_v2" in flag_arr:
+                if ts_unit == "seconds":
+                    divisor = 1
+                elif ts_unit == "milliseconds":
+                    divisor = 1e3
+                else:
+                    divisor = 1e6
+            else:
+                divisor = 1000
+
+            for j in range(len(path_event)):
+                df_event = pd.read_csv(path_event[j])
+                df_chev = pd.read_csv(path_chev[0])
+                df_event["timestamps"] = (df_event["timestamps"] - df_chev["timestamps"][0]) / divisor
+                df_event.to_csv(path_event[j], index=False)
+            if unique_arr_len.shape[0] == 1:
+                for j in range(len(path_chev)):
+                    if file + "chev" in indices_dict.keys():
+                        df_chev = pd.read_csv(path_chev[j])
+                        df_chev["timestamps"] = (df_chev["timestamps"] - df_chev["timestamps"][0]) / divisor
+                        df_chev["sampling_rate"] = np.full(df_chev.shape[0], np.nan)
+                        df_chev.at[0, "sampling_rate"] = df_chev.shape[0] / (
+                            df_chev["timestamps"].iloc[-1] - df_chev["timestamps"].iloc[0]
+                        )
+                        df_chev.to_csv(path_chev[j], index=False)
+
+                    if file + "chod" in indices_dict.keys():
+                        df_chod = pd.read_csv(path_chod[j])
+                        df_chod["timestamps"] = df_chev["timestamps"]
+                        df_chod["sampling_rate"] = np.full(df_chod.shape[0], np.nan)
+                        df_chod.at[0, "sampling_rate"] = df_chev["sampling_rate"][0]
+                        df_chod.to_csv(path_chod[j], index=False)
+
+                    if file + "chpr" in indices_dict.keys():
+                        df_chpr = pd.read_csv(path_chpr[j])
+                        df_chpr["timestamps"] = df_chev["timestamps"]
+                        df_chpr["sampling_rate"] = np.full(df_chpr.shape[0], np.nan)
+                        df_chpr.at[0, "sampling_rate"] = df_chev["sampling_rate"][0]
+                        df_chpr.to_csv(path_chpr[j], index=False)
+            else:
+                logger.error("Number of channels should be same for all regions.")
+                raise Exception("Number of channels should be same for all regions.")
+    logger.info("Importing of NPM file is done.")
+    return event_from_filename, flag_arr
+
+def check_header(df):
+    arr = list(df.columns)
+    check_float = []
+    for i in arr:
+        try:
+            check_float.append(float(i))
+        except:
+            pass
+
+    return arr, check_float
+
+
+# function to decide indices of interleaved channels
+# in neurophotometrics data
+def decide_indices(file, df, flag, num_ch=2):
+    ch_name = [file + "chev", file + "chod", file + "chpr"]
+    if len(ch_name) < num_ch:
+        logger.error(
+            "Number of channels parameters in Input Parameters GUI is more than 3. \
+                    Looks like there are more than 3 channels in the file. Reading of these files\
+                    are not supported. Reach out to us if you get this error message."
+        )
+        raise Exception(
+            "Number of channels parameters in Input Parameters GUI is more than 3. \
+                         Looks like there are more than 3 channels in the file. Reading of these files\
+                         are not supported. Reach out to us if you get this error message."
+        )
+    if flag == "data_np":
+        indices_dict = dict()
+        for i in range(num_ch):
+            indices_dict[ch_name[i]] = np.arange(i, df.shape[0], num_ch)
+
+    else:
+        cols = np.array(list(df.columns))
+        if "flags" in np.char.lower(np.array(cols)):
+            arr = ["FrameCounter", "Flags"]
+            state = np.array(df["Flags"])
+        elif "ledstate" in np.char.lower(np.array(cols)):
+            arr = ["FrameCounter", "LedState"]
+            state = np.array(df["LedState"])
+        else:
+            logger.error(
+                "File type shows Neurophotometrics newer version \
+                    data but column names does not have Flags or LedState"
+            )
+            raise Exception(
+                "File type shows Neurophotometrics newer version \
+                            data but column names does not have Flags or LedState"
+            )
+
+        num_ch, ch = check_channels(state)
+        indices_dict = dict()
+        for i in range(num_ch):
+            first_occurrence = np.where(state == ch[i])[0]
+            indices_dict[ch_name[i]] = np.arange(first_occurrence[0], df.shape[0], num_ch)
+
+        df = df.drop(arr, axis=1)
+
+    return df, indices_dict, num_ch
+
+# check flag consistency in neurophotometrics data
+def check_channels(state):
+    state = state.astype(int)
+    unique_state = np.unique(state[2:12])
+    if unique_state.shape[0] > 3:
+        logger.error(
+            "Looks like there are more than 3 channels in the file. Reading of these files\
+                        are not supported. Reach out to us if you get this error message."
+        )
+        raise Exception(
+            "Looks like there are more than 3 channels in the file. Reading of these files\
+                        are not supported. Reach out to us if you get this error message."
+        )
+
+    return unique_state.shape[0], unique_state
+
+
+# function to decide NPM timestamps unit (seconds, ms or us)
+def decide_ts_unit_for_npm(df, timestamp_column_name=None, time_unit=None, headless=False):
+    col_names = np.array(list(df.columns))
+    col_names_ts = [""]
+    for name in col_names:
+        if "timestamp" in name.lower():
+            col_names_ts.append(name)
+
+    ts_unit = "seconds"
+    if len(col_names_ts) > 2:
+        # Headless path: auto-select column/unit without any UI
+        if headless:
+            if timestamp_column_name is not None:
+                assert (
+                    timestamp_column_name in col_names_ts
+                ), f"Provided timestamp_column_name '{timestamp_column_name}' not found in columns {col_names_ts[1:]}"
+                chosen = timestamp_column_name
+            else:
+                chosen = col_names_ts[1]
+            df.insert(1, "Timestamp", df[chosen])
+            df = df.drop(col_names_ts[1:], axis=1)
+            valid_units = {"seconds", "milliseconds", "microseconds"}
+            ts_unit = time_unit if (isinstance(time_unit, str) and time_unit in valid_units) else "seconds"
+            return df, ts_unit
+        # def comboBoxSelected(event):
+        #    logger.info(event.widget.get())
+
+        window = tk.Tk()
+        window.title("Select appropriate options for timestamps")
+        window.geometry("500x200")
+        holdComboboxValues = dict()
+
+        timestamps_label = ttk.Label(window, text="Select which timestamps to use : ").grid(
+            row=0, column=1, pady=25, padx=25
+        )
+        holdComboboxValues["timestamps"] = StringVar()
+        timestamps_combo = ttk.Combobox(window, values=col_names_ts, textvariable=holdComboboxValues["timestamps"])
+        timestamps_combo.grid(row=0, column=2, pady=25, padx=25)
+        timestamps_combo.current(0)
+        # timestamps_combo.bind("<<ComboboxSelected>>", comboBoxSelected)
+
+        time_unit_label = ttk.Label(window, text="Select timestamps unit : ").grid(row=1, column=1, pady=25, padx=25)
+        holdComboboxValues["time_unit"] = StringVar()
+        time_unit_combo = ttk.Combobox(
+            window, values=["", "seconds", "milliseconds", "microseconds"], textvariable=holdComboboxValues["time_unit"]
+        )
+        time_unit_combo.grid(row=1, column=2, pady=25, padx=25)
+        time_unit_combo.current(0)
+        # time_unit_combo.bind("<<ComboboxSelected>>", comboBoxSelected)
+        window.lift()
+        window.after(500, lambda: window.lift())
+        window.mainloop()
+
+        if holdComboboxValues["timestamps"].get():
+            df.insert(1, "Timestamp", df[holdComboboxValues["timestamps"].get()])
+            df = df.drop(col_names_ts[1:], axis=1)
+        else:
+            messagebox.showerror(
+                "All options not selected",
+                "All the options for timestamps \
+                                                            were not selected. Please select appropriate options",
+            )
+            logger.error(
+                "All the options for timestamps \
+                        were not selected. Please select appropriate options"
+            )
+            raise Exception(
+                "All the options for timestamps \
+                            were not selected. Please select appropriate options"
+            )
+        if holdComboboxValues["time_unit"].get():
+            if holdComboboxValues["time_unit"].get() == "seconds":
+                ts_unit = holdComboboxValues["time_unit"].get()
+            elif holdComboboxValues["time_unit"].get() == "milliseconds":
+                ts_unit = holdComboboxValues["time_unit"].get()
+            else:
+                ts_unit = holdComboboxValues["time_unit"].get()
+        else:
+            messagebox.showerror(
+                "All options not selected",
+                "All the options for timestamps \
+                                                            were not selected. Please select appropriate options",
+            )
+            logger.error(
+                "All the options for timestamps \
+                        were not selected. Please select appropriate options"
+            )
+            raise Exception(
+                "All the options for timestamps \
+                            were not selected. Please select appropriate options"
+            )
+    else:
+        pass
+
+    return df, ts_unit
\ No newline at end of file
diff --git a/src/guppy/saveStoresList.py b/src/guppy/saveStoresList.py
index 26065e4..db9a4fc 100755
--- a/src/guppy/saveStoresList.py
+++ b/src/guppy/saveStoresList.py
@@ -26,6 +26,7 @@
 from guppy.np_doric_csv_step2 import import_np_doric_csv
 from guppy.csv_step2 import import_csv_step2
 from guppy.doric_step2 import import_doric
+from guppy.npm_step2 import import_npm
 
 # hv.extension()
 pn.extension()
@@ -588,7 +589,7 @@ def execute(inputParameters):
     try:
         for i in folderNames:
             filepath = os.path.join(inputParameters["abspath"], i)
-            modality = "doric" # TODO: ask for modality from the user
+            modality = "npm" # TODO: ask for modality from the user
             if modality == "tdt":
                 data = readtsq(filepath)
                 event_name, flag = [], []
@@ -598,6 +599,9 @@ def execute(inputParameters):
             elif modality == "doric":
                 data = 0
                 event_name, flag = import_doric(filepath)
+            elif modality == "npm":
+                data = 0
+                event_name, flag = import_npm(filepath, num_ch)
             else:
                 raise ValueError("Modality not recognized. Please use 'tdt' or 'csv'.")
             

From 7b36f64266a7b5c35b78310272f65fcecd6a6d3b Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Mon, 17 Nov 2025 13:50:35 -0800
Subject: [PATCH 008/150] Added modality selector to the GUI.

---
 src/guppy/saveStoresList.py        |  4 ++--
 src/guppy/savingInputParameters.py | 18 +++++++++++++++++-
 2 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/src/guppy/saveStoresList.py b/src/guppy/saveStoresList.py
index db9a4fc..f9921f9 100755
--- a/src/guppy/saveStoresList.py
+++ b/src/guppy/saveStoresList.py
@@ -589,7 +589,7 @@ def execute(inputParameters):
     try:
         for i in folderNames:
             filepath = os.path.join(inputParameters["abspath"], i)
-            modality = "npm" # TODO: ask for modality from the user
+            modality = inputParameters.get("modality", "tdt")
             if modality == "tdt":
                 data = readtsq(filepath)
                 event_name, flag = [], []
@@ -603,7 +603,7 @@ def execute(inputParameters):
                 data = 0
                 event_name, flag = import_npm(filepath, num_ch)
             else:
-                raise ValueError("Modality not recognized. Please use 'tdt' or 'csv'.")
+                raise ValueError("Modality not recognized. Please use 'tdt', 'csv', 'doric', or 'npm'.")
             
             saveStorenames(inputParameters, data, event_name, flag, filepath)
         logger.info("#" * 400)
diff --git a/src/guppy/savingInputParameters.py b/src/guppy/savingInputParameters.py
index cd515ab..b0a5feb 100644
--- a/src/guppy/savingInputParameters.py
+++ b/src/guppy/savingInputParameters.py
@@ -119,6 +119,21 @@ def readPBIncrementValues(progressBar):
 
     files_1 = pn.widgets.FileSelector(folder_path, name="folderNames", width=950)
 
+    explain_modality = pn.pane.Markdown(
+        """
+        **Data Modality:** Select the type of data acquisition system used for your recordings:
+        - **tdt**: Tucker-Davis Technologies system
+        - **csv**: Generic CSV format
+        - **doric**: Doric Photometry system
+        - **npm**: Neurophotometrics system
+        """,
+        width=600,
+    )
+
+    modality_selector = pn.widgets.Select(
+        name="Data Modality", value="tdt", options=["tdt", "csv", "doric", "npm"], width=320
+    )
+
     explain_time_artifacts = pn.pane.Markdown(
         """
                                                 - ***Number of cores :*** Number of cores used for analysis. Try to
@@ -357,6 +372,7 @@ def getInputParameters():
         inputParameters = {
             "abspath": abspath[0],
             "folderNames": files_1.value,
+            "modality": modality_selector.value,
             "numberOfCores": numberOfCores.value,
             "combine_data": combine_data.value,
             "isosbestic_control": isosbestic_control.value,
@@ -538,7 +554,7 @@ def onclickpsth(event=None):
 
     psth_baseline_param = pn.Column(zscore_param_wd, psth_param_wd, baseline_param_wd, peak_param_wd)
 
-    widget = pn.Column(mark_down_1, files_1, pn.Row(individual_analysis_wd_2, psth_baseline_param))
+    widget = pn.Column(mark_down_1, files_1, explain_modality, modality_selector, pn.Row(individual_analysis_wd_2, psth_baseline_param))
 
     # file_selector = pn.WidgetBox(files_1)
     styles = dict(background="WhiteSmoke")

From 100ad14058e8f07a48aee74083e6f04d46a027fa Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Mon, 17 Nov 2025 13:52:00 -0800
Subject: [PATCH 009/150] Added modality selector to the GUI.

---
 src/guppy/saveStoresList.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/guppy/saveStoresList.py b/src/guppy/saveStoresList.py
index f9921f9..72dc604 100755
--- a/src/guppy/saveStoresList.py
+++ b/src/guppy/saveStoresList.py
@@ -583,13 +583,13 @@ def execute(inputParameters):
     folderNames = inputParameters["folderNames"]
     isosbestic_control = inputParameters["isosbestic_control"]
     num_ch = inputParameters["noChannels"]
+    modality = inputParameters.get("modality", "tdt")
 
     logger.info(folderNames)
 
     try:
         for i in folderNames:
             filepath = os.path.join(inputParameters["abspath"], i)
-            modality = inputParameters.get("modality", "tdt")
             if modality == "tdt":
                 data = readtsq(filepath)
                 event_name, flag = [], []

From ef978ec2cb8f7e51b9eacb8ce2d6f88bf73e01ea Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Mon, 17 Nov 2025 14:17:33 -0800
Subject: [PATCH 010/150] Added modality option to the api and tests

---
 src/guppy/testing/api.py |  7 +++++++
 tests/test_step2.py      | 17 +++++++++++++++--
 tests/test_step3.py      | 18 ++++++++++++++++--
 tests/test_step4.py      | 19 +++++++++++++++++--
 tests/test_step5.py      | 20 ++++++++++++++++++--
 5 files changed, 73 insertions(+), 8 deletions(-)

diff --git a/src/guppy/testing/api.py b/src/guppy/testing/api.py
index 587a022..0e16f23 100644
--- a/src/guppy/testing/api.py
+++ b/src/guppy/testing/api.py
@@ -68,6 +68,7 @@ def step2(
     base_dir: str,
     selected_folders: Iterable[str],
     storenames_map: dict[str, str],
+    modality: str = "tdt",
     npm_timestamp_column_name: str | None = None,
     npm_time_unit: str = "seconds",
     npm_split_events: bool = True,
@@ -150,6 +151,9 @@ def step2(
     # Inject storenames mapping for headless execution
     input_params["storenames_map"] = dict(storenames_map)
 
+    # Inject modality
+    input_params["modality"] = modality
+
     # Add npm parameters
     input_params["npm_timestamp_column_name"] = npm_timestamp_column_name
     input_params["npm_time_unit"] = npm_time_unit
@@ -163,6 +167,7 @@ def step3(
     *,
     base_dir: str,
     selected_folders: Iterable[str],
+    modality: str = "tdt",
     npm_timestamp_column_name: str | None = None,
     npm_time_unit: str = "seconds",
     npm_split_events: bool = True,
@@ -240,6 +245,7 @@ def step4(
     *,
     base_dir: str,
     selected_folders: Iterable[str],
+    modality: str = "tdt",
     npm_timestamp_column_name: str | None = None,
     npm_time_unit: str = "seconds",
     npm_split_events: bool = True,
@@ -317,6 +323,7 @@ def step5(
     *,
     base_dir: str,
     selected_folders: Iterable[str],
+    modality: str = "tdt",
     npm_timestamp_column_name: str | None = None,
     npm_time_unit: str = "seconds",
     npm_split_events: bool = True,
diff --git a/tests/test_step2.py b/tests/test_step2.py
index 55181ab..34777be 100644
--- a/tests/test_step2.py
+++ b/tests/test_step2.py
@@ -10,7 +10,7 @@
 
 
 @pytest.mark.parametrize(
-    "session_subdir, storenames_map",
+    "session_subdir, storenames_map, modality",
     [
         (
             "SampleData_csv/sample_data_csv_1",
@@ -19,6 +19,7 @@
                 "Sample_Signal_Channel": "signal_region",
                 "Sample_TTL": "ttl",
             },
+            "csv",
         ),
         (
             "SampleData_Doric/sample_doric_1",
@@ -27,6 +28,7 @@
                 "AIn-2 - Raw": "signal_region",
                 "DI--O-1": "ttl",
             },
+            "doric",
         ),
         (
             "SampleData_Doric/sample_doric_2",
@@ -35,6 +37,7 @@
                 "AIn-1 - Dem (da)": "signal_region",
                 "DI/O-1": "ttl",
             },
+            "doric",
         ),
         (
             "SampleData_Doric/sample_doric_3",
@@ -43,6 +46,7 @@
                 "CAM1_EXC2/ROI01": "signal_region",
                 "DigitalIO/CAM1": "ttl",
             },
+            "doric",
         ),
         (
             "SampleData_Doric/sample_doric_4",
@@ -50,6 +54,7 @@
                 "Series0001/AIN01xAOUT01-LockIn": "control_region",
                 "Series0001/AIN01xAOUT02-LockIn": "signal_region",
             },
+            "doric",
         ),
         (
             "SampleData_Doric/sample_doric_5",
@@ -57,6 +62,7 @@
                 "Series0001/AIN01xAOUT01-LockIn": "control_region",
                 "Series0001/AIN01xAOUT02-LockIn": "signal_region",
             },
+            "doric",
         ),
         (
             "SampleData_Clean/Photo_63_207-181030-103332",
@@ -65,6 +71,7 @@
                 "Dv2A": "signal_dms",
                 "PrtN": "port_entries_dms",
             },
+            "tdt",
         ),
         (
             "SampleData_with_artifacts/Photo_048_392-200728-121222",
@@ -73,6 +80,7 @@
                 "Dv2A": "signal_dms",
                 "PrtN": "port_entries_dms",
             },
+            "tdt",
         ),
         # TODO: Add sampleData_NPM_1 after fixing Doric vs. NPM determination bug.
         (
@@ -81,6 +89,7 @@
                 "file0_chev6": "control_region",
                 "file1_chev6": "signal_region",
             },
+            "npm",
         ),
         (
             "SampleData_Neurophotometrics/sampleData_NPM_3",
@@ -89,6 +98,7 @@
                 "file0_chod3": "signal_region3",
                 "event3": "ttl_region3",
             },
+            "npm",
         ),
         (
             "SampleData_Neurophotometrics/sampleData_NPM_4",
@@ -97,6 +107,7 @@
                 "file0_chod1": "signal_region1",
                 "eventTrue": "ttl_true_region1",
             },
+            "npm",
         ),
         (
             "SampleData_Neurophotometrics/sampleData_NPM_5",
@@ -105,6 +116,7 @@
                 "file0_chod1": "signal_region1",
                 "event0": "ttl_region1",
             },
+            "npm",
         ),
     ],
     ids=[
@@ -122,7 +134,7 @@
         "sample_npm_5",
     ],
 )
-def test_step2(tmp_path, session_subdir, storenames_map):
+def test_step2(tmp_path, session_subdir, storenames_map, modality):
     """
     Step 2 integration test (Save Storenames) using real sample data, isolated to a temporary workspace.
     For each dataset:
@@ -170,6 +182,7 @@ def test_step2(tmp_path, session_subdir, storenames_map):
         base_dir=str(tmp_base),
         selected_folders=[str(session_copy)],
         storenames_map=storenames_map,
+        modality=modality,
         npm_timestamp_column_name=npm_timestamp_column_name,
         npm_time_unit=npm_time_unit,
         npm_split_events=npm_split_events,
diff --git a/tests/test_step3.py b/tests/test_step3.py
index 655fb10..d167585 100644
--- a/tests/test_step3.py
+++ b/tests/test_step3.py
@@ -20,7 +20,7 @@ def storenames_map():
 
 
 @pytest.mark.parametrize(
-    "session_subdir, storenames_map",
+    "session_subdir, storenames_map, modality",
     [
         (
             "SampleData_csv/sample_data_csv_1",
@@ -29,6 +29,7 @@ def storenames_map():
                 "Sample_Signal_Channel": "signal_region",
                 "Sample_TTL": "ttl",
             },
+            "csv",
         ),
         (
             "SampleData_Doric/sample_doric_1",
@@ -37,6 +38,7 @@ def storenames_map():
                 "AIn-2 - Raw": "signal_region",
                 "DI--O-1": "ttl",
             },
+            "doric",
         ),
         (
             "SampleData_Doric/sample_doric_2",
@@ -45,6 +47,7 @@ def storenames_map():
                 "AIn-1 - Dem (da)": "signal_region",
                 "DI/O-1": "ttl",
             },
+            "doric",
         ),
         (
             "SampleData_Doric/sample_doric_3",
@@ -53,6 +56,7 @@ def storenames_map():
                 "CAM1_EXC2/ROI01": "signal_region",
                 "DigitalIO/CAM1": "ttl",
             },
+            "doric",
         ),
         (
             "SampleData_Doric/sample_doric_4",
@@ -60,6 +64,7 @@ def storenames_map():
                 "Series0001/AIN01xAOUT01-LockIn": "control_region",
                 "Series0001/AIN01xAOUT02-LockIn": "signal_region",
             },
+            "doric",
         ),
         (
             "SampleData_Doric/sample_doric_5",
@@ -67,6 +72,7 @@ def storenames_map():
                 "Series0001/AIN01xAOUT01-LockIn": "control_region",
                 "Series0001/AIN01xAOUT02-LockIn": "signal_region",
             },
+            "doric",
         ),
         (
             "SampleData_Clean/Photo_63_207-181030-103332",
@@ -75,6 +81,7 @@ def storenames_map():
                 "Dv2A": "signal_dms",
                 "PrtN": "port_entries_dms",
             },
+            "tdt",
         ),
         (
             "SampleData_with_artifacts/Photo_048_392-200728-121222",
@@ -83,6 +90,7 @@ def storenames_map():
                 "Dv2A": "signal_dms",
                 "PrtN": "port_entries_dms",
             },
+            "tdt",
         ),
         (
             "SampleData_Neurophotometrics/sampleData_NPM_2",
@@ -90,6 +98,7 @@ def storenames_map():
                 "file0_chev6": "control_region",
                 "file1_chev6": "signal_region",
             },
+            "npm",
         ),
         (
             "SampleData_Neurophotometrics/sampleData_NPM_3",
@@ -98,6 +107,7 @@ def storenames_map():
                 "file0_chod3": "signal_region3",
                 "event3": "ttl_region3",
             },
+            "npm",
         ),
         (
             "SampleData_Neurophotometrics/sampleData_NPM_4",
@@ -106,6 +116,7 @@ def storenames_map():
                 "file0_chod1": "signal_region1",
                 "eventTrue": "ttl_true_region1",
             },
+            "npm",
         ),
         (
             "SampleData_Neurophotometrics/sampleData_NPM_5",
@@ -114,6 +125,7 @@ def storenames_map():
                 "file0_chod1": "signal_region1",
                 "event0": "ttl_region1",
             },
+            "npm",
         ),
     ],
     ids=[
@@ -131,7 +143,7 @@ def storenames_map():
         "sample_npm_5",
     ],
 )
-def test_step3(tmp_path, storenames_map, session_subdir):
+def test_step3(tmp_path, storenames_map, session_subdir, modality):
     """
     Full integration test for Step 3 (Read Raw Data) using real CSV sample data,
     isolated to a temporary workspace to avoid mutating shared sample data.
@@ -182,6 +194,7 @@ def test_step3(tmp_path, storenames_map, session_subdir):
         base_dir=str(tmp_base),
         selected_folders=[str(session_copy)],
         storenames_map=storenames_map,
+        modality=modality,
         npm_timestamp_column_name=npm_timestamp_column_name,
         npm_time_unit=npm_time_unit,
         npm_split_events=npm_split_events,
@@ -191,6 +204,7 @@ def test_step3(tmp_path, storenames_map, session_subdir):
     step3(
         base_dir=str(tmp_base),
         selected_folders=[str(session_copy)],
+        modality=modality,
         npm_timestamp_column_name=npm_timestamp_column_name,
         npm_time_unit=npm_time_unit,
         npm_split_events=npm_split_events,
diff --git a/tests/test_step4.py b/tests/test_step4.py
index 9a2e9bb..80c2d3f 100644
--- a/tests/test_step4.py
+++ b/tests/test_step4.py
@@ -10,7 +10,7 @@
 
 
 @pytest.mark.parametrize(
-    "session_subdir, storenames_map, expected_region, expected_ttl",
+    "session_subdir, storenames_map, expected_region, expected_ttl, modality",
     [
         (
             "SampleData_csv/sample_data_csv_1",
@@ -21,6 +21,7 @@
             },
             "region",
             "ttl",
+            "csv",
         ),
         (
             "SampleData_Doric/sample_doric_1",
@@ -31,6 +32,7 @@
             },
             "region",
             "ttl",
+            "doric",
         ),
         (
             "SampleData_Doric/sample_doric_2",
@@ -41,6 +43,7 @@
             },
             "region",
             "ttl",
+            "doric",
         ),
         (
             "SampleData_Doric/sample_doric_3",
@@ -51,6 +54,7 @@
             },
             "region",
             "ttl",
+            "doric",
         ),
         (
             "SampleData_Doric/sample_doric_4",
@@ -60,6 +64,7 @@
             },
             "region",
             None,
+            "doric",
         ),
         (
             "SampleData_Doric/sample_doric_5",
@@ -69,6 +74,7 @@
             },
             "region",
             None,
+            "doric",
         ),
         (
             "SampleData_Clean/Photo_63_207-181030-103332",
@@ -79,6 +85,7 @@
             },
             "dms",
             "port_entries_dms",
+            "tdt",
         ),
         (
             "SampleData_with_artifacts/Photo_048_392-200728-121222",
@@ -89,6 +96,7 @@
             },
             "dms",
             "port_entries_dms",
+            "tdt",
         ),
         (
             "SampleData_Neurophotometrics/sampleData_NPM_2",
@@ -98,6 +106,7 @@
             },
             "region",
             None,
+            "npm",
         ),
         (
             "SampleData_Neurophotometrics/sampleData_NPM_3",
@@ -108,6 +117,7 @@
             },
             "region3",
             "ttl_region3",
+            "npm",
         ),
         (
             "SampleData_Neurophotometrics/sampleData_NPM_4",
@@ -118,6 +128,7 @@
             },
             "region1",
             "ttl_true_region1",
+            "npm",
         ),
         (
             "SampleData_Neurophotometrics/sampleData_NPM_5",
@@ -128,6 +139,7 @@
             },
             "region1",
             "ttl_region1",
+            "npm",
         ),
     ],
     ids=[
@@ -146,7 +158,7 @@
     ],
 )
 @pytest.mark.filterwarnings("ignore::UserWarning")
-def test_step4(tmp_path, monkeypatch, session_subdir, storenames_map, expected_region, expected_ttl):
+def test_step4(tmp_path, monkeypatch, session_subdir, storenames_map, expected_region, expected_ttl, modality):
     """
     Full integration test for Step 4 (Extract timestamps and signal) using real CSV sample data,
     isolated to a temporary workspace to avoid mutating shared sample data.
@@ -202,6 +214,7 @@ def test_step4(tmp_path, monkeypatch, session_subdir, storenames_map, expected_r
         base_dir=str(tmp_base),
         selected_folders=[str(session_copy)],
         storenames_map=storenames_map,
+        modality=modality,
         npm_timestamp_column_name=npm_timestamp_column_name,
         npm_time_unit=npm_time_unit,
         npm_split_events=npm_split_events,
@@ -211,6 +224,7 @@ def test_step4(tmp_path, monkeypatch, session_subdir, storenames_map, expected_r
     step3(
         base_dir=str(tmp_base),
         selected_folders=[str(session_copy)],
+        modality=modality,
         npm_timestamp_column_name=npm_timestamp_column_name,
         npm_time_unit=npm_time_unit,
         npm_split_events=npm_split_events,
@@ -220,6 +234,7 @@ def test_step4(tmp_path, monkeypatch, session_subdir, storenames_map, expected_r
     step4(
         base_dir=str(tmp_base),
         selected_folders=[str(session_copy)],
+        modality=modality,
         npm_timestamp_column_name=npm_timestamp_column_name,
         npm_time_unit=npm_time_unit,
         npm_split_events=npm_split_events,
diff --git a/tests/test_step5.py b/tests/test_step5.py
index 5593ee0..d2de1f5 100644
--- a/tests/test_step5.py
+++ b/tests/test_step5.py
@@ -10,7 +10,7 @@
 
 
 @pytest.mark.parametrize(
-    "session_subdir, storenames_map, expected_region, expected_ttl",
+    "session_subdir, storenames_map, expected_region, expected_ttl, modality",
     [
         (
             "SampleData_csv/sample_data_csv_1",
@@ -21,6 +21,7 @@
             },
             "region",
             "ttl",
+            "csv",
         ),
         (
             "SampleData_Doric/sample_doric_1",
@@ -31,6 +32,7 @@
             },
             "region",
             "ttl",
+            "doric",
         ),
         (
             "SampleData_Doric/sample_doric_2",
@@ -41,6 +43,7 @@
             },
             "region",
             "ttl",
+            "doric",
         ),
         (
             "SampleData_Doric/sample_doric_3",
@@ -51,6 +54,7 @@
             },
             "region",
             "ttl",
+            "doric",
         ),
         (
             "SampleData_Doric/sample_doric_4",
@@ -60,6 +64,7 @@
             },
             "region",
             None,
+            "doric",
         ),
         (
             "SampleData_Doric/sample_doric_5",
@@ -69,6 +74,7 @@
             },
             "region",
             None,
+            "doric",
         ),
         (
             "SampleData_Clean/Photo_63_207-181030-103332",
@@ -79,6 +85,7 @@
             },
             "dms",
             "port_entries_dms",
+            "tdt",
         ),
         (
             "SampleData_with_artifacts/Photo_048_392-200728-121222",
@@ -89,6 +96,7 @@
             },
             "dms",
             "port_entries_dms",
+            "tdt",
         ),
         (
             "SampleData_Neurophotometrics/sampleData_NPM_2",
@@ -98,6 +106,7 @@
             },
             "region",
             None,
+            "npm",
         ),
         (
             "SampleData_Neurophotometrics/sampleData_NPM_3",
@@ -108,6 +117,7 @@
             },
             "region3",
             "ttl_region3",
+            "npm",
         ),
         (
             "SampleData_Neurophotometrics/sampleData_NPM_4",
@@ -118,6 +128,7 @@
             },
             "region1",
             "ttl_true_region1",
+            "npm",
         ),
         (
             "SampleData_Neurophotometrics/sampleData_NPM_5",
@@ -128,6 +139,7 @@
             },
             "region1",
             "ttl_region1",
+            "npm",
         ),
     ],
     ids=[
@@ -146,7 +158,7 @@
     ],
 )
 @pytest.mark.filterwarnings("ignore::UserWarning")
-def test_step5(tmp_path, monkeypatch, session_subdir, storenames_map, expected_region, expected_ttl):
+def test_step5(tmp_path, monkeypatch, session_subdir, storenames_map, expected_region, expected_ttl, modality):
     """
     Full integration test for Step 5 (PSTH Computation) using real CSV sample data,
     isolated to a temporary workspace to avoid mutating shared sample data.
@@ -204,6 +216,7 @@ def test_step5(tmp_path, monkeypatch, session_subdir, storenames_map, expected_r
         base_dir=str(tmp_base),
         selected_folders=[str(session_copy)],
         storenames_map=storenames_map,
+        modality=modality,
         npm_timestamp_column_name=npm_timestamp_column_name,
         npm_time_unit=npm_time_unit,
         npm_split_events=npm_split_events,
@@ -213,6 +226,7 @@ def test_step5(tmp_path, monkeypatch, session_subdir, storenames_map, expected_r
     step3(
         base_dir=str(tmp_base),
         selected_folders=[str(session_copy)],
+        modality=modality,
         npm_timestamp_column_name=npm_timestamp_column_name,
         npm_time_unit=npm_time_unit,
         npm_split_events=npm_split_events,
@@ -222,6 +236,7 @@ def test_step5(tmp_path, monkeypatch, session_subdir, storenames_map, expected_r
     step4(
         base_dir=str(tmp_base),
         selected_folders=[str(session_copy)],
+        modality=modality,
         npm_timestamp_column_name=npm_timestamp_column_name,
         npm_time_unit=npm_time_unit,
         npm_split_events=npm_split_events,
@@ -231,6 +246,7 @@ def test_step5(tmp_path, monkeypatch, session_subdir, storenames_map, expected_r
     step5(
         base_dir=str(tmp_base),
         selected_folders=[str(session_copy)],
+        modality=modality,
         npm_timestamp_column_name=npm_timestamp_column_name,
         npm_time_unit=npm_time_unit,
         npm_split_events=npm_split_events,

From 6589139f61a55ac673c5867e165c3a5a4cb3d657 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Tue, 18 Nov 2025 11:15:21 -0800
Subject: [PATCH 011/150] Removed intermediate np_doric_csv_step2 module.

---
 src/guppy/np_doric_csv_step2.py | 523 --------------------------------
 1 file changed, 523 deletions(-)
 delete mode 100644 src/guppy/np_doric_csv_step2.py

diff --git a/src/guppy/np_doric_csv_step2.py b/src/guppy/np_doric_csv_step2.py
deleted file mode 100644
index d06dcc1..0000000
--- a/src/guppy/np_doric_csv_step2.py
+++ /dev/null
@@ -1,523 +0,0 @@
-import glob
-import logging
-import os
-import tkinter as tk
-from tkinter import StringVar, messagebox, ttk
-
-import h5py
-import numpy as np
-import pandas as pd
-import panel as pn
-
-pn.extension()
-
-logger = logging.getLogger(__name__)
-
-# function to see if there are 'csv' files present
-# and recognize type of 'csv' files either from
-# Neurophotometrics, Doric systems or custom made 'csv' files
-# and read data accordingly
-def import_np_doric_csv(filepath, isosbestic_control, num_ch, inputParameters=None):
-
-    logger.debug("If it exists, importing either NPM or Doric or csv file based on the structure of file")
-    # Headless configuration (used to avoid any UI prompts when running tests)
-    headless = bool(os.environ.get("GUPPY_BASE_DIR"))
-    npm_timestamp_column_name = None
-    npm_time_unit = None
-    npm_split_events = None
-    if isinstance(inputParameters, dict):
-        npm_timestamp_column_name = inputParameters.get("npm_timestamp_column_name")
-        npm_time_unit = inputParameters.get("npm_time_unit", "seconds")
-        npm_split_events = inputParameters.get("npm_split_events", True)
-    path = sorted(glob.glob(os.path.join(filepath, "*.csv"))) + sorted(glob.glob(os.path.join(filepath, "*.doric")))
-    path_chev = glob.glob(os.path.join(filepath, "*chev*"))
-    path_chod = glob.glob(os.path.join(filepath, "*chod*"))
-    path_chpr = glob.glob(os.path.join(filepath, "*chpr*"))
-    path_event = glob.glob(os.path.join(filepath, "event*"))
-    # path_sig = glob.glob(os.path.join(filepath, 'sig*'))
-    path_chev_chod_event = path_chev + path_chod + path_event + path_chpr
-
-    path = sorted(list(set(path) - set(path_chev_chod_event)))
-    flag = "None"
-    event_from_filename = []
-    flag_arr = []
-    for i in range(len(path)):
-        dirname = os.path.dirname(path[i])
-        ext = os.path.basename(path[i]).split(".")[-1]
-        if ext == "doric":
-            key_names = read_doric(path[i])
-            event_from_filename.extend(key_names)
-            flag = "doric_doric"
-        else:
-            df = pd.read_csv(path[i], header=None, nrows=2, index_col=False, dtype=str)
-            df = df.dropna(axis=1, how="all")
-            df_arr = np.array(df).flatten()
-            check_all_str = []
-            for element in df_arr:
-                try:
-                    float(element)
-                except:
-                    check_all_str.append(i)
-            if len(check_all_str) == len(df_arr):
-                df = pd.read_csv(path[i], header=1, index_col=False, nrows=10)
-                df = df.drop(["Time(s)"], axis=1)
-                event_from_filename.extend(list(df.columns))
-                flag = "doric_csv"
-                logger.info(flag)
-            else:
-                df = pd.read_csv(path[i], index_col=False)
-            # with warnings.catch_warnings():
-            #     warnings.simplefilter("error")
-            #     try:
-            #         df = pd.read_csv(path[i], index_col=False, dtype=float)
-            #     except:
-            #         df = pd.read_csv(path[i], header=1, index_col=False, nrows=10)   # to make process faster reading just first 10 rows
-            #         df = df.drop(['Time(s)'], axis=1)
-            #         event_from_filename.extend(list(df.columns))
-            #         flag = 'doric_csv'
-        if flag == "doric_csv" or flag == "doric_doric":
-            continue
-        else:
-            colnames, value = check_header(df)
-            # logger.info(len(colnames), len(value))
-
-            # check dataframe structure and read data accordingly
-            if len(value) > 0:
-                columns_isstr = False
-                df = pd.read_csv(path[i], header=None)
-                cols = np.array(list(df.columns), dtype=str)
-            else:
-                df = df
-                columns_isstr = True
-                cols = np.array(list(df.columns), dtype=str)
-            # check the structure of dataframe and assign flag to the type of file
-            if len(cols) == 1:
-                if cols[0].lower() != "timestamps":
-                    logger.error("\033[1m" + "Column name should be timestamps (all lower-cases)" + "\033[0m")
-                    raise Exception("\033[1m" + "Column name should be timestamps (all lower-cases)" + "\033[0m")
-                else:
-                    flag = "event_csv"
-            elif len(cols) == 3:
-                arr1 = np.array(["timestamps", "data", "sampling_rate"])
-                arr2 = np.char.lower(np.array(cols))
-                if (np.sort(arr1) == np.sort(arr2)).all() == False:
-                    logger.error(
-                        "\033[1m"
-                        + "Column names should be timestamps, data and sampling_rate (all lower-cases)"
-                        + "\033[0m"
-                    )
-                    raise Exception(
-                        "\033[1m"
-                        + "Column names should be timestamps, data and sampling_rate (all lower-cases)"
-                        + "\033[0m"
-                    )
-                else:
-                    flag = "data_csv"
-            elif len(cols) == 2:
-                flag = "event_or_data_np"
-            elif len(cols) >= 2:
-                flag = "data_np"
-            else:
-                logger.error("Number of columns in csv file does not make sense.")
-                raise Exception("Number of columns in csv file does not make sense.")
-
-            if columns_isstr == True and (
-                "flags" in np.char.lower(np.array(cols)) or "ledstate" in np.char.lower(np.array(cols))
-            ):
-                flag = flag + "_v2"
-            else:
-                flag = flag
-
-            # used assigned flags to process the files and read the data
-            if flag == "event_or_data_np":
-                arr = list(df.iloc[:, 1])
-                check_float = [True for i in arr if isinstance(i, float)]
-                if len(arr) == len(check_float) and columns_isstr == False:
-                    flag = "data_np"
-                elif columns_isstr == True and ("value" in np.char.lower(np.array(cols))):
-                    flag = "event_np"
-                else:
-                    flag = "event_np"
-            else:
-                pass
-
-            flag_arr.append(flag)
-            logger.info(flag)
-            if flag == "event_csv" or flag == "data_csv":
-                name = os.path.basename(path[i]).split(".")[0]
-                event_from_filename.append(name)
-            elif flag == "data_np":
-                file = f"file{str(i)}_"
-                df, indices_dict, num_channels = decide_indices(file, df, flag, num_ch)
-                keys = list(indices_dict.keys())
-                for k in range(len(keys)):
-                    for j in range(df.shape[1]):
-                        if j == 0:
-                            timestamps = df.iloc[:, j][indices_dict[keys[k]]]
-                            # timestamps_odd = df.iloc[:,j][odd_indices]
-                        else:
-                            d = dict()
-                            d["timestamps"] = timestamps
-                            d["data"] = df.iloc[:, j][indices_dict[keys[k]]]
-
-                            df_ch = pd.DataFrame(d)
-                            df_ch.to_csv(os.path.join(dirname, keys[k] + str(j) + ".csv"), index=False)
-                            event_from_filename.append(keys[k] + str(j))
-
-            elif flag == "event_np":
-                type_val = np.array(df.iloc[:, 1])
-                type_val_unique = np.unique(type_val)
-                if headless:
-                    response = 1 if bool(npm_split_events) else 0
-                else:
-                    window = tk.Tk()
-                    if len(type_val_unique) > 1:
-                        response = messagebox.askyesno(
-                            "Multiple event TTLs",
-                            "Based on the TTL file,\
-                                                                            it looks like TTLs \
-                                                                            belongs to multiple behavior type. \
-                                                                            Do you want to create multiple files for each \
-                                                                            behavior type ?",
-                        )
-                    else:
-                        response = 0
-                    window.destroy()
-                if response == 1:
-                    timestamps = np.array(df.iloc[:, 0])
-                    for j in range(len(type_val_unique)):
-                        idx = np.where(type_val == type_val_unique[j])
-                        d = dict()
-                        d["timestamps"] = timestamps[idx]
-                        df_new = pd.DataFrame(d)
-                        df_new.to_csv(os.path.join(dirname, "event" + str(type_val_unique[j]) + ".csv"), index=False)
-                        event_from_filename.append("event" + str(type_val_unique[j]))
-                else:
-                    timestamps = np.array(df.iloc[:, 0])
-                    d = dict()
-                    d["timestamps"] = timestamps
-                    df_new = pd.DataFrame(d)
-                    df_new.to_csv(os.path.join(dirname, "event" + str(0) + ".csv"), index=False)
-                    event_from_filename.append("event" + str(0))
-            else:
-                file = f"file{str(i)}_"
-                df, ts_unit = decide_ts_unit_for_npm(
-                    df, timestamp_column_name=npm_timestamp_column_name, time_unit=npm_time_unit, headless=headless
-                )
-                df, indices_dict, num_channels = decide_indices(file, df, flag)
-                keys = list(indices_dict.keys())
-                for k in range(len(keys)):
-                    for j in range(df.shape[1]):
-                        if j == 0:
-                            timestamps = df.iloc[:, j][indices_dict[keys[k]]]
-                            # timestamps_odd = df.iloc[:,j][odd_indices]
-                        else:
-                            d = dict()
-                            d["timestamps"] = timestamps
-                            d["data"] = df.iloc[:, j][indices_dict[keys[k]]]
-
-                            df_ch = pd.DataFrame(d)
-                            df_ch.to_csv(os.path.join(dirname, keys[k] + str(j) + ".csv"), index=False)
-                            event_from_filename.append(keys[k] + str(j))
-
-            path_chev = glob.glob(os.path.join(filepath, "*chev*"))
-            path_chod = glob.glob(os.path.join(filepath, "*chod*"))
-            path_chpr = glob.glob(os.path.join(filepath, "*chpr*"))
-            path_event = glob.glob(os.path.join(filepath, "event*"))
-            # path_sig = glob.glob(os.path.join(filepath, 'sig*'))
-            path_chev_chod_chpr = [path_chev, path_chod, path_chpr]
-            if (
-                ("data_np_v2" in flag_arr or "data_np" in flag_arr)
-                and ("event_np" in flag_arr)
-                and (i == len(path) - 1)
-            ) or (
-                ("data_np_v2" in flag_arr or "data_np" in flag_arr) and (i == len(path) - 1)
-            ):  # i==len(path)-1 and or 'event_np' in flag
-                num_path_chev, num_path_chod, num_path_chpr = len(path_chev), len(path_chod), len(path_chpr)
-                arr_len, no_ch = [], []
-                for i in range(len(path_chev_chod_chpr)):
-                    if len(path_chev_chod_chpr[i]) > 0:
-                        arr_len.append(len(path_chev_chod_chpr[i]))
-                    else:
-                        continue
-
-                unique_arr_len = np.unique(np.array(arr_len))
-                if "data_np_v2" in flag_arr:
-                    if ts_unit == "seconds":
-                        divisor = 1
-                    elif ts_unit == "milliseconds":
-                        divisor = 1e3
-                    else:
-                        divisor = 1e6
-                else:
-                    divisor = 1000
-
-                for j in range(len(path_event)):
-                    df_event = pd.read_csv(path_event[j])
-                    df_chev = pd.read_csv(path_chev[0])
-                    df_event["timestamps"] = (df_event["timestamps"] - df_chev["timestamps"][0]) / divisor
-                    df_event.to_csv(path_event[j], index=False)
-                if unique_arr_len.shape[0] == 1:
-                    for j in range(len(path_chev)):
-                        if file + "chev" in indices_dict.keys():
-                            df_chev = pd.read_csv(path_chev[j])
-                            df_chev["timestamps"] = (df_chev["timestamps"] - df_chev["timestamps"][0]) / divisor
-                            df_chev["sampling_rate"] = np.full(df_chev.shape[0], np.nan)
-                            df_chev.at[0, "sampling_rate"] = df_chev.shape[0] / (
-                                df_chev["timestamps"].iloc[-1] - df_chev["timestamps"].iloc[0]
-                            )
-                            df_chev.to_csv(path_chev[j], index=False)
-
-                        if file + "chod" in indices_dict.keys():
-                            df_chod = pd.read_csv(path_chod[j])
-                            df_chod["timestamps"] = df_chev["timestamps"]
-                            df_chod["sampling_rate"] = np.full(df_chod.shape[0], np.nan)
-                            df_chod.at[0, "sampling_rate"] = df_chev["sampling_rate"][0]
-                            df_chod.to_csv(path_chod[j], index=False)
-
-                        if file + "chpr" in indices_dict.keys():
-                            df_chpr = pd.read_csv(path_chpr[j])
-                            df_chpr["timestamps"] = df_chev["timestamps"]
-                            df_chpr["sampling_rate"] = np.full(df_chpr.shape[0], np.nan)
-                            df_chpr.at[0, "sampling_rate"] = df_chev["sampling_rate"][0]
-                            df_chpr.to_csv(path_chpr[j], index=False)
-                else:
-                    logger.error("Number of channels should be same for all regions.")
-                    raise Exception("Number of channels should be same for all regions.")
-            else:
-                pass
-    logger.info("Importing of either NPM or Doric or csv file is done.")
-    return event_from_filename, flag_arr
-
-# ----------------------------------------------------------------------------------------------------------------------
-# Functions that import_np_doric_csv uses
-# ----------------------------------------------------------------------------------------------------------------------
-
-def read_doric(filepath):
-    with h5py.File(filepath, "r") as f:
-        if "Traces" in list(f.keys()):
-            keys = access_keys_doricV1(f)
-        elif list(f.keys()) == ["Configurations", "DataAcquisition"]:
-            keys = access_keys_doricV6(f)
-
-    return keys
-
-
-def check_header(df):
-    arr = list(df.columns)
-    check_float = []
-    for i in arr:
-        try:
-            check_float.append(float(i))
-        except:
-            pass
-
-    return arr, check_float
-
-# function to decide indices of interleaved channels
-# in neurophotometrics data
-def decide_indices(file, df, flag, num_ch=2):
-    ch_name = [file + "chev", file + "chod", file + "chpr"]
-    if len(ch_name) < num_ch:
-        logger.error(
-            "Number of channels parameters in Input Parameters GUI is more than 3. \
-                    Looks like there are more than 3 channels in the file. Reading of these files\
-                    are not supported. Reach out to us if you get this error message."
-        )
-        raise Exception(
-            "Number of channels parameters in Input Parameters GUI is more than 3. \
-                         Looks like there are more than 3 channels in the file. Reading of these files\
-                         are not supported. Reach out to us if you get this error message."
-        )
-    if flag == "data_np":
-        indices_dict = dict()
-        for i in range(num_ch):
-            indices_dict[ch_name[i]] = np.arange(i, df.shape[0], num_ch)
-
-    else:
-        cols = np.array(list(df.columns))
-        if "flags" in np.char.lower(np.array(cols)):
-            arr = ["FrameCounter", "Flags"]
-            state = np.array(df["Flags"])
-        elif "ledstate" in np.char.lower(np.array(cols)):
-            arr = ["FrameCounter", "LedState"]
-            state = np.array(df["LedState"])
-        else:
-            logger.error(
-                "File type shows Neurophotometrics newer version \
-                    data but column names does not have Flags or LedState"
-            )
-            raise Exception(
-                "File type shows Neurophotometrics newer version \
-                            data but column names does not have Flags or LedState"
-            )
-
-        num_ch, ch = check_channels(state)
-        indices_dict = dict()
-        for i in range(num_ch):
-            first_occurrence = np.where(state == ch[i])[0]
-            indices_dict[ch_name[i]] = np.arange(first_occurrence[0], df.shape[0], num_ch)
-
-        df = df.drop(arr, axis=1)
-
-    return df, indices_dict, num_ch
-
-
-# function to decide NPM timestamps unit (seconds, ms or us)
-def decide_ts_unit_for_npm(df, timestamp_column_name=None, time_unit=None, headless=False):
-    col_names = np.array(list(df.columns))
-    col_names_ts = [""]
-    for name in col_names:
-        if "timestamp" in name.lower():
-            col_names_ts.append(name)
-
-    ts_unit = "seconds"
-    if len(col_names_ts) > 2:
-        # Headless path: auto-select column/unit without any UI
-        if headless:
-            if timestamp_column_name is not None:
-                assert (
-                    timestamp_column_name in col_names_ts
-                ), f"Provided timestamp_column_name '{timestamp_column_name}' not found in columns {col_names_ts[1:]}"
-                chosen = timestamp_column_name
-            else:
-                chosen = col_names_ts[1]
-            df.insert(1, "Timestamp", df[chosen])
-            df = df.drop(col_names_ts[1:], axis=1)
-            valid_units = {"seconds", "milliseconds", "microseconds"}
-            ts_unit = time_unit if (isinstance(time_unit, str) and time_unit in valid_units) else "seconds"
-            return df, ts_unit
-        # def comboBoxSelected(event):
-        #    logger.info(event.widget.get())
-
-        window = tk.Tk()
-        window.title("Select appropriate options for timestamps")
-        window.geometry("500x200")
-        holdComboboxValues = dict()
-
-        timestamps_label = ttk.Label(window, text="Select which timestamps to use : ").grid(
-            row=0, column=1, pady=25, padx=25
-        )
-        holdComboboxValues["timestamps"] = StringVar()
-        timestamps_combo = ttk.Combobox(window, values=col_names_ts, textvariable=holdComboboxValues["timestamps"])
-        timestamps_combo.grid(row=0, column=2, pady=25, padx=25)
-        timestamps_combo.current(0)
-        # timestamps_combo.bind("<<ComboboxSelected>>", comboBoxSelected)
-
-        time_unit_label = ttk.Label(window, text="Select timestamps unit : ").grid(row=1, column=1, pady=25, padx=25)
-        holdComboboxValues["time_unit"] = StringVar()
-        time_unit_combo = ttk.Combobox(
-            window, values=["", "seconds", "milliseconds", "microseconds"], textvariable=holdComboboxValues["time_unit"]
-        )
-        time_unit_combo.grid(row=1, column=2, pady=25, padx=25)
-        time_unit_combo.current(0)
-        # time_unit_combo.bind("<<ComboboxSelected>>", comboBoxSelected)
-        window.lift()
-        window.after(500, lambda: window.lift())
-        window.mainloop()
-
-        if holdComboboxValues["timestamps"].get():
-            df.insert(1, "Timestamp", df[holdComboboxValues["timestamps"].get()])
-            df = df.drop(col_names_ts[1:], axis=1)
-        else:
-            messagebox.showerror(
-                "All options not selected",
-                "All the options for timestamps \
-                                                            were not selected. Please select appropriate options",
-            )
-            logger.error(
-                "All the options for timestamps \
-                        were not selected. Please select appropriate options"
-            )
-            raise Exception(
-                "All the options for timestamps \
-                            were not selected. Please select appropriate options"
-            )
-        if holdComboboxValues["time_unit"].get():
-            if holdComboboxValues["time_unit"].get() == "seconds":
-                ts_unit = holdComboboxValues["time_unit"].get()
-            elif holdComboboxValues["time_unit"].get() == "milliseconds":
-                ts_unit = holdComboboxValues["time_unit"].get()
-            else:
-                ts_unit = holdComboboxValues["time_unit"].get()
-        else:
-            messagebox.showerror(
-                "All options not selected",
-                "All the options for timestamps \
-                                                            were not selected. Please select appropriate options",
-            )
-            logger.error(
-                "All the options for timestamps \
-                        were not selected. Please select appropriate options"
-            )
-            raise Exception(
-                "All the options for timestamps \
-                            were not selected. Please select appropriate options"
-            )
-    else:
-        pass
-
-    return df, ts_unit
-
-
-# ----------------------------------------------------------------------------------------------------------------------
-# Functions that read_doric uses
-# ----------------------------------------------------------------------------------------------------------------------
-
-def access_keys_doricV6(doric_file):
-    data = [doric_file["DataAcquisition"]]
-    res = []
-    while len(data) != 0:
-        members = len(data)
-        while members != 0:
-            members -= 1
-            data, last_element = separate_last_element(data)
-            if isinstance(last_element, h5py.Dataset) and not last_element.name.endswith("/Time"):
-                res.append(last_element.name)
-            elif isinstance(last_element, h5py.Group):
-                data.extend(reversed([last_element[k] for k in last_element.keys()]))
-
-    keys = []
-    for element in res:
-        sep_values = element.split("/")
-        if sep_values[-1] == "Values":
-            keys.append(f"{sep_values[-3]}/{sep_values[-2]}")
-        else:
-            keys.append(f"{sep_values[-2]}/{sep_values[-1]}")
-
-    return keys
-
-
-def access_keys_doricV1(doric_file):
-    keys = list(doric_file["Traces"]["Console"].keys())
-    keys.remove("Time(s)")
-
-    return keys
-
-# ----------------------------------------------------------------------------------------------------------------------
-# Functions that decide_indices uses
-# ----------------------------------------------------------------------------------------------------------------------
-
-# check flag consistency in neurophotometrics data
-def check_channels(state):
-    state = state.astype(int)
-    unique_state = np.unique(state[2:12])
-    if unique_state.shape[0] > 3:
-        logger.error(
-            "Looks like there are more than 3 channels in the file. Reading of these files\
-                        are not supported. Reach out to us if you get this error message."
-        )
-        raise Exception(
-            "Looks like there are more than 3 channels in the file. Reading of these files\
-                        are not supported. Reach out to us if you get this error message."
-        )
-
-    return unique_state.shape[0], unique_state
-
-
-# ----------------------------------------------------------------------------------------------------------------------
-# Functions that access_keys_doricV6 uses
-# ----------------------------------------------------------------------------------------------------------------------
-def separate_last_element(arr):
-    l = arr[-1]
-    return arr[:-1], l
\ No newline at end of file

From e7ac4d8982da9383b1d50dccfde9c50f7171e90c Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Tue, 18 Nov 2025 11:15:58 -0800
Subject: [PATCH 012/150] Split tdt_step3.py off from read_raw_data.py.

---
 src/guppy/common_step3.py   |  51 +++++++++
 src/guppy/readTevTsq.py     | 204 +-----------------------------------
 src/guppy/saveStoresList.py |   1 -
 src/guppy/tdt_step3.py      | 183 ++++++++++++++++++++++++++++++++
 4 files changed, 237 insertions(+), 202 deletions(-)
 create mode 100644 src/guppy/common_step3.py
 create mode 100644 src/guppy/tdt_step3.py

diff --git a/src/guppy/common_step3.py b/src/guppy/common_step3.py
new file mode 100644
index 0000000..4ea5c95
--- /dev/null
+++ b/src/guppy/common_step3.py
@@ -0,0 +1,51 @@
+import glob
+import json
+import logging
+import multiprocessing as mp
+import os
+import re
+import sys
+import time
+import warnings
+from itertools import repeat
+
+import h5py
+import numpy as np
+import pandas as pd
+from numpy import float32, float64, int32, int64, uint16
+
+logger = logging.getLogger(__name__)
+
+# function to write data to a hdf5 file
+def write_hdf5(data, event, filepath, key):
+
+    # replacing \\ or / in storenames with _ (to avoid errors while saving data)
+    event = event.replace("\\", "_")
+    event = event.replace("/", "_")
+
+    op = os.path.join(filepath, event + ".hdf5")
+
+    # if file does not exist create a new file
+    if not os.path.exists(op):
+        with h5py.File(op, "w") as f:
+            if type(data) is np.ndarray:
+                f.create_dataset(key, data=data, maxshape=(None,), chunks=True)
+            else:
+                f.create_dataset(key, data=data)
+
+    # if file already exists, append data to it or add a new key to it
+    else:
+        with h5py.File(op, "r+") as f:
+            if key in list(f.keys()):
+                if type(data) is np.ndarray:
+                    f[key].resize(data.shape)
+                    arr = f[key]
+                    arr[:] = data
+                else:
+                    arr = f[key]
+                    arr = data
+            else:
+                if type(data) is np.ndarray:
+                    f.create_dataset(key, data=data, maxshape=(None,), chunks=True)
+                else:
+                    f.create_dataset(key, data=data)
\ No newline at end of file
diff --git a/src/guppy/readTevTsq.py b/src/guppy/readTevTsq.py
index 6deb3b1..fe16add 100755
--- a/src/guppy/readTevTsq.py
+++ b/src/guppy/readTevTsq.py
@@ -14,6 +14,9 @@
 import pandas as pd
 from numpy import float32, float64, int32, int64, uint16
 
+from guppy.common_step3 import write_hdf5
+from guppy.tdt_step3 import execute_readtev
+
 logger = logging.getLogger(__name__)
 
 
@@ -91,47 +94,6 @@ def check_doric(filepath):
     return flag_arr[0]
 
 
-# check if a particular element is there in an array or not
-def ismember(arr, element):
-    res = [1 if i == element else 0 for i in arr]
-    return np.asarray(res)
-
-
-# function to write data to a hdf5 file
-def write_hdf5(data, event, filepath, key):
-
-    # replacing \\ or / in storenames with _ (to avoid errors while saving data)
-    event = event.replace("\\", "_")
-    event = event.replace("/", "_")
-
-    op = os.path.join(filepath, event + ".hdf5")
-
-    # if file does not exist create a new file
-    if not os.path.exists(op):
-        with h5py.File(op, "w") as f:
-            if type(data) is np.ndarray:
-                f.create_dataset(key, data=data, maxshape=(None,), chunks=True)
-            else:
-                f.create_dataset(key, data=data)
-
-    # if file already exists, append data to it or add a new key to it
-    else:
-        with h5py.File(op, "r+") as f:
-            if key in list(f.keys()):
-                if type(data) is np.ndarray:
-                    f[key].resize(data.shape)
-                    arr = f[key]
-                    arr[:] = data
-                else:
-                    arr = f[key]
-                    arr = data
-            else:
-                if type(data) is np.ndarray:
-                    f.create_dataset(key, data=data, maxshape=(None,), chunks=True)
-                else:
-                    f.create_dataset(key, data=data)
-
-
 # function to read event timestamps csv file.
 def import_csv(filepath, event, outputPath):
     logger.debug("\033[1m" + "Trying to read data for {} from csv file.".format(event) + "\033[0m")
@@ -177,166 +139,6 @@ def import_csv(filepath, event, outputPath):
     return data, key
 
 
-# function to save data read from tev file to hdf5 file
-def save_dict_to_hdf5(S, event, outputPath):
-    write_hdf5(S["storename"], event, outputPath, "storename")
-    write_hdf5(S["sampling_rate"], event, outputPath, "sampling_rate")
-    write_hdf5(S["timestamps"], event, outputPath, "timestamps")
-
-    write_hdf5(S["data"], event, outputPath, "data")
-    write_hdf5(S["npoints"], event, outputPath, "npoints")
-    write_hdf5(S["channels"], event, outputPath, "channels")
-
-
-# function to check event data (checking whether event timestamps belongs to same event or multiple events)
-def check_data(S, filepath, event, outputPath):
-    # logger.info("Checking event storename data for creating multiple event names from single event storename...")
-    new_event = event.replace("\\", "")
-    new_event = event.replace("/", "")
-    diff = np.diff(S["data"])
-    arr = np.full(diff.shape[0], 1)
-
-    storesList = np.genfromtxt(os.path.join(outputPath, "storesList.csv"), dtype="str", delimiter=",").reshape(2, -1)
-
-    if diff.shape[0] == 0:
-        return 0
-
-    if S["sampling_rate"] == 0 and np.all(diff == diff[0]) == False:
-        logger.info("\033[1m" + "Data in event {} belongs to multiple behavior".format(event) + "\033[0m")
-        logger.debug(
-            "\033[1m" + "Create timestamp files for individual new event and change the stores list file." + "\033[0m"
-        )
-        i_d = np.unique(S["data"])
-        for i in range(i_d.shape[0]):
-            new_S = dict()
-            idx = np.where(S["data"] == i_d[i])[0]
-            new_S["timestamps"] = S["timestamps"][idx]
-            new_S["storename"] = new_event + str(int(i_d[i]))
-            new_S["sampling_rate"] = S["sampling_rate"]
-            new_S["data"] = S["data"]
-            new_S["npoints"] = S["npoints"]
-            new_S["channels"] = S["channels"]
-            storesList = np.concatenate(
-                (storesList, [[new_event + str(int(i_d[i]))], [new_event + "_" + str(int(i_d[i]))]]), axis=1
-            )
-            save_dict_to_hdf5(new_S, new_event + str(int(i_d[i])), outputPath)
-
-        idx = np.where(storesList[0] == event)[0]
-        storesList = np.delete(storesList, idx, axis=1)
-        if not os.path.exists(os.path.join(outputPath, ".cache_storesList.csv")):
-            os.rename(os.path.join(outputPath, "storesList.csv"), os.path.join(outputPath, ".cache_storesList.csv"))
-        if idx.shape[0] == 0:
-            pass
-        else:
-            np.savetxt(os.path.join(outputPath, "storesList.csv"), storesList, delimiter=",", fmt="%s")
-        logger.info(
-            "\033[1m"
-            + "Timestamp files for individual new event are created \
-	    			and the stores list file is changed."
-            + "\033[0m"
-        )
-
-
-# function to read tev file
-def readtev(data, filepath, event, outputPath):
-
-    logger.debug("Reading data for event {} ...".format(event))
-    tevfilepath = glob.glob(os.path.join(filepath, "*.tev"))
-    if len(tevfilepath) > 1:
-        raise Exception("Two tev files are present at the location.")
-    else:
-        tevfilepath = tevfilepath[0]
-
-    data["name"] = np.asarray(data["name"], dtype=str)
-
-    allnames = np.unique(data["name"])
-
-    index = []
-    for i in range(len(allnames)):
-        length = len(str(allnames[i]))
-        if length < 4:
-            index.append(i)
-
-    allnames = np.delete(allnames, index, 0)
-
-    eventNew = np.array(list(event))
-
-    # logger.info(allnames)
-    # logger.info(eventNew)
-    row = ismember(data["name"], event)
-
-    if sum(row) == 0:
-        logger.info("\033[1m" + "Requested store name " + event + " not found (case-sensitive)." + "\033[0m")
-        logger.info("\033[1m" + "File contains the following TDT store names:" + "\033[0m")
-        logger.info("\033[1m" + str(allnames) + "\033[0m")
-        logger.info("\033[1m" + "TDT store name " + str(event) + " not found." + "\033[0m")
-        import_csv(filepath, event, outputPath)
-
-        return 0
-
-    allIndexesWhereEventIsPresent = np.where(row == 1)
-    first_row = allIndexesWhereEventIsPresent[0][0]
-
-    formatNew = data["format"][first_row] + 1
-
-    table = np.array(
-        [
-            [0, 0, 0, 0],
-            [0, "float", 1, np.float32],
-            [0, "long", 1, np.int32],
-            [0, "short", 2, np.int16],
-            [0, "byte", 4, np.int8],
-        ]
-    )
-
-    S = dict()
-
-    S["storename"] = str(event)
-    S["sampling_rate"] = data["frequency"][first_row]
-    S["timestamps"] = np.asarray(data["timestamp"][allIndexesWhereEventIsPresent[0]])
-    S["channels"] = np.asarray(data["chan"][allIndexesWhereEventIsPresent[0]])
-
-    fp_loc = np.asarray(data["fp_loc"][allIndexesWhereEventIsPresent[0]])
-    data_size = np.asarray(data["size"])
-
-    if formatNew != 5:
-        nsample = (data_size[first_row,] - 10) * int(table[formatNew, 2])
-        S["data"] = np.zeros((len(fp_loc), nsample))
-        for i in range(0, len(fp_loc)):
-            with open(tevfilepath, "rb") as fp:
-                fp.seek(fp_loc[i], os.SEEK_SET)
-                S["data"][i, :] = np.fromfile(fp, dtype=table[formatNew, 3], count=nsample).reshape(
-                    1, nsample, order="F"
-                )
-                # S['data'] = S['data'].swapaxes()
-        S["npoints"] = nsample
-    else:
-        S["data"] = np.asarray(data["strobe"][allIndexesWhereEventIsPresent[0]])
-        S["npoints"] = 1
-        S["channels"] = np.tile(1, (S["data"].shape[0],))
-
-    S["data"] = (S["data"].T).reshape(-1, order="F")
-
-    save_dict_to_hdf5(S, event, outputPath)
-
-    check_data(S, filepath, event, outputPath)
-
-    logger.info("Data for event {} fetched and stored.".format(event))
-
-
-# function to execute readtev function using multiprocessing to make it faster
-def execute_readtev(data, filepath, event, outputPath, numProcesses=mp.cpu_count()):
-
-    start = time.time()
-    with mp.Pool(numProcesses) as p:
-        p.starmap(readtev, zip(repeat(data), repeat(filepath), event, repeat(outputPath)))
-    # p = mp.Pool(mp.cpu_count())
-    # p.starmap(readtev, zip(repeat(data), repeat(filepath), event, repeat(outputPath)))
-    # p.close()
-    # p.join()
-    logger.info("Time taken = {0:.5f}".format(time.time() - start))
-
-
 def execute_import_csv(filepath, event, outputPath, numProcesses=mp.cpu_count()):
     # logger.info("Reading data for event {} ...".format(event))
 
diff --git a/src/guppy/saveStoresList.py b/src/guppy/saveStoresList.py
index 72dc604..c2867ba 100755
--- a/src/guppy/saveStoresList.py
+++ b/src/guppy/saveStoresList.py
@@ -23,7 +23,6 @@
 
 from guppy.readTevTsq import import_csv
 from guppy.tdt_step2 import readtsq
-from guppy.np_doric_csv_step2 import import_np_doric_csv
 from guppy.csv_step2 import import_csv_step2
 from guppy.doric_step2 import import_doric
 from guppy.npm_step2 import import_npm
diff --git a/src/guppy/tdt_step3.py b/src/guppy/tdt_step3.py
new file mode 100644
index 0000000..04ba0dd
--- /dev/null
+++ b/src/guppy/tdt_step3.py
@@ -0,0 +1,183 @@
+import glob
+import json
+import logging
+import multiprocessing as mp
+import os
+import re
+import sys
+import time
+import warnings
+from itertools import repeat
+
+import h5py
+import numpy as np
+import pandas as pd
+from numpy import float32, float64, int32, int64, uint16
+
+from guppy.common_step3 import write_hdf5
+
+logger = logging.getLogger(__name__)
+
+# function to execute readtev function using multiprocessing to make it faster
+def execute_readtev(data, filepath, event, outputPath, numProcesses=mp.cpu_count()):
+
+    start = time.time()
+    with mp.Pool(numProcesses) as p:
+        p.starmap(readtev, zip(repeat(data), repeat(filepath), event, repeat(outputPath)))
+    # p = mp.Pool(mp.cpu_count())
+    # p.starmap(readtev, zip(repeat(data), repeat(filepath), event, repeat(outputPath)))
+    # p.close()
+    # p.join()
+    logger.info("Time taken = {0:.5f}".format(time.time() - start))
+
+
+# function to read tev file
+def readtev(data, filepath, event, outputPath):
+
+    logger.debug("Reading data for event {} ...".format(event))
+    tevfilepath = glob.glob(os.path.join(filepath, "*.tev"))
+    if len(tevfilepath) > 1:
+        raise Exception("Two tev files are present at the location.")
+    else:
+        tevfilepath = tevfilepath[0]
+
+    data["name"] = np.asarray(data["name"], dtype=str)
+
+    allnames = np.unique(data["name"])
+
+    index = []
+    for i in range(len(allnames)):
+        length = len(str(allnames[i]))
+        if length < 4:
+            index.append(i)
+
+    allnames = np.delete(allnames, index, 0)
+
+    eventNew = np.array(list(event))
+
+    # logger.info(allnames)
+    # logger.info(eventNew)
+    row = ismember(data["name"], event)
+
+    if sum(row) == 0:
+        logger.info("\033[1m" + "Requested store name " + event + " not found (case-sensitive)." + "\033[0m")
+        logger.info("\033[1m" + "File contains the following TDT store names:" + "\033[0m")
+        logger.info("\033[1m" + str(allnames) + "\033[0m")
+        logger.info("\033[1m" + "TDT store name " + str(event) + " not found." + "\033[0m")
+        import_csv(filepath, event, outputPath)
+
+        return 0
+
+    allIndexesWhereEventIsPresent = np.where(row == 1)
+    first_row = allIndexesWhereEventIsPresent[0][0]
+
+    formatNew = data["format"][first_row] + 1
+
+    table = np.array(
+        [
+            [0, 0, 0, 0],
+            [0, "float", 1, np.float32],
+            [0, "long", 1, np.int32],
+            [0, "short", 2, np.int16],
+            [0, "byte", 4, np.int8],
+        ]
+    )
+
+    S = dict()
+
+    S["storename"] = str(event)
+    S["sampling_rate"] = data["frequency"][first_row]
+    S["timestamps"] = np.asarray(data["timestamp"][allIndexesWhereEventIsPresent[0]])
+    S["channels"] = np.asarray(data["chan"][allIndexesWhereEventIsPresent[0]])
+
+    fp_loc = np.asarray(data["fp_loc"][allIndexesWhereEventIsPresent[0]])
+    data_size = np.asarray(data["size"])
+
+    if formatNew != 5:
+        nsample = (data_size[first_row,] - 10) * int(table[formatNew, 2])
+        S["data"] = np.zeros((len(fp_loc), nsample))
+        for i in range(0, len(fp_loc)):
+            with open(tevfilepath, "rb") as fp:
+                fp.seek(fp_loc[i], os.SEEK_SET)
+                S["data"][i, :] = np.fromfile(fp, dtype=table[formatNew, 3], count=nsample).reshape(
+                    1, nsample, order="F"
+                )
+                # S['data'] = S['data'].swapaxes()
+        S["npoints"] = nsample
+    else:
+        S["data"] = np.asarray(data["strobe"][allIndexesWhereEventIsPresent[0]])
+        S["npoints"] = 1
+        S["channels"] = np.tile(1, (S["data"].shape[0],))
+
+    S["data"] = (S["data"].T).reshape(-1, order="F")
+
+    save_dict_to_hdf5(S, event, outputPath)
+
+    check_data(S, filepath, event, outputPath)
+
+    logger.info("Data for event {} fetched and stored.".format(event))
+
+# check if a particular element is there in an array or not
+def ismember(arr, element):
+    res = [1 if i == element else 0 for i in arr]
+    return np.asarray(res)
+
+
+# function to save data read from tev file to hdf5 file
+def save_dict_to_hdf5(S, event, outputPath):
+    write_hdf5(S["storename"], event, outputPath, "storename")
+    write_hdf5(S["sampling_rate"], event, outputPath, "sampling_rate")
+    write_hdf5(S["timestamps"], event, outputPath, "timestamps")
+
+    write_hdf5(S["data"], event, outputPath, "data")
+    write_hdf5(S["npoints"], event, outputPath, "npoints")
+    write_hdf5(S["channels"], event, outputPath, "channels")
+
+
+# function to check event data (checking whether event timestamps belongs to same event or multiple events)
+def check_data(S, filepath, event, outputPath):
+    # logger.info("Checking event storename data for creating multiple event names from single event storename...")
+    new_event = event.replace("\\", "")
+    new_event = event.replace("/", "")
+    diff = np.diff(S["data"])
+    arr = np.full(diff.shape[0], 1)
+
+    storesList = np.genfromtxt(os.path.join(outputPath, "storesList.csv"), dtype="str", delimiter=",").reshape(2, -1)
+
+    if diff.shape[0] == 0:
+        return 0
+
+    if S["sampling_rate"] == 0 and np.all(diff == diff[0]) == False:
+        logger.info("\033[1m" + "Data in event {} belongs to multiple behavior".format(event) + "\033[0m")
+        logger.debug(
+            "\033[1m" + "Create timestamp files for individual new event and change the stores list file." + "\033[0m"
+        )
+        i_d = np.unique(S["data"])
+        for i in range(i_d.shape[0]):
+            new_S = dict()
+            idx = np.where(S["data"] == i_d[i])[0]
+            new_S["timestamps"] = S["timestamps"][idx]
+            new_S["storename"] = new_event + str(int(i_d[i]))
+            new_S["sampling_rate"] = S["sampling_rate"]
+            new_S["data"] = S["data"]
+            new_S["npoints"] = S["npoints"]
+            new_S["channels"] = S["channels"]
+            storesList = np.concatenate(
+                (storesList, [[new_event + str(int(i_d[i]))], [new_event + "_" + str(int(i_d[i]))]]), axis=1
+            )
+            save_dict_to_hdf5(new_S, new_event + str(int(i_d[i])), outputPath)
+
+        idx = np.where(storesList[0] == event)[0]
+        storesList = np.delete(storesList, idx, axis=1)
+        if not os.path.exists(os.path.join(outputPath, ".cache_storesList.csv")):
+            os.rename(os.path.join(outputPath, "storesList.csv"), os.path.join(outputPath, ".cache_storesList.csv"))
+        if idx.shape[0] == 0:
+            pass
+        else:
+            np.savetxt(os.path.join(outputPath, "storesList.csv"), storesList, delimiter=",", fmt="%s")
+        logger.info(
+            "\033[1m"
+            + "Timestamp files for individual new event are created \
+	    			and the stores list file is changed."
+            + "\033[0m"
+        )
\ No newline at end of file

From 2f57867030294aae62a6e931864b30c0e341c8d2 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Tue, 18 Nov 2025 11:41:03 -0800
Subject: [PATCH 013/150] Hard-coded modality to simplify read.

---
 src/guppy/readTevTsq.py | 50 +++++++----------------------------------
 src/guppy/tdt_step3.py  | 42 ++++++++++++++++++++++++++++------
 2 files changed, 43 insertions(+), 49 deletions(-)

diff --git a/src/guppy/readTevTsq.py b/src/guppy/readTevTsq.py
index fe16add..96fd59e 100755
--- a/src/guppy/readTevTsq.py
+++ b/src/guppy/readTevTsq.py
@@ -33,34 +33,6 @@ def writeToFile(value: str):
         file.write(value)
 
 
-# function to read tsq file
-def readtsq(filepath):
-    logger.debug("Trying to read tsq file.")
-    names = ("size", "type", "name", "chan", "sort_code", "timestamp", "fp_loc", "strobe", "format", "frequency")
-    formats = (int32, int32, "S4", uint16, uint16, float64, int64, float64, int32, float32)
-    offsets = 0, 4, 8, 12, 14, 16, 24, 24, 32, 36
-    tsq_dtype = np.dtype({"names": names, "formats": formats, "offsets": offsets}, align=True)
-    path = glob.glob(os.path.join(filepath, "*.tsq"))
-    if len(path) > 1:
-        logger.error("Two tsq files are present at the location.")
-        raise Exception("Two tsq files are present at the location.")
-    elif len(path) == 0:
-        logger.info("\033[1m" + "tsq file not found." + "\033[1m")
-        return 0, 0
-    else:
-        path = path[0]
-        flag = "tsq"
-
-    # reading tsq file
-    tsq = np.fromfile(path, dtype=tsq_dtype)
-
-    # creating dataframe of the data
-    df = pd.DataFrame(tsq)
-
-    logger.info("Data from tsq file fetched.")
-    return df, flag
-
-
 # function to check if doric file exists
 def check_doric(filepath):
     logger.debug("Checking if doric file exists")
@@ -294,13 +266,7 @@ def readRawData(inputParameters):
         filepath = folderNames[i]
         logger.debug(f"### Reading raw data for folder {folderNames[i]}")
         storesListPath = takeOnlyDirs(glob.glob(os.path.join(filepath, "*_output_*")))
-        # reading tsq file
-        data, flag = readtsq(filepath)
-        # checking if doric file exists
-        if flag == "tsq":
-            pass
-        else:
-            flag = check_doric(filepath)
+        modality = "tdt"
 
         # read data corresponding to each storename selected by user while saving the storeslist file
         for j in range(len(storesListPath)):
@@ -314,14 +280,14 @@ def readRawData(inputParameters):
                     2, -1
                 )
 
-            if isinstance(data, pd.DataFrame) and flag == "tsq":
-                execute_readtev(data, filepath, np.unique(storesList[0, :]), op, numProcesses)
-            elif flag == "doric_csv":
-                execute_import_doric(filepath, storesList, flag, op)
-            elif flag == "doric_doric":
-                execute_import_doric(filepath, storesList, flag, op)
-            else:
+            if modality == "tdt":
+                execute_readtev(filepath, np.unique(storesList[0, :]), op, numProcesses)
+            elif modality == "doric":
+                execute_import_doric(filepath, storesList, modality, op)
+            elif modality == "csv" or modality == "npm":
                 execute_import_csv(filepath, np.unique(storesList[0, :]), op, numProcesses)
+            else:
+                raise ValueError("Modality not recognized. Please use 'tdt', 'csv', 'doric', or 'npm'.")
 
             writeToFile(str(10 + ((step + 1) * 10)) + "\n")
             step += 1
diff --git a/src/guppy/tdt_step3.py b/src/guppy/tdt_step3.py
index 04ba0dd..bc629f0 100644
--- a/src/guppy/tdt_step3.py
+++ b/src/guppy/tdt_step3.py
@@ -18,8 +18,36 @@
 
 logger = logging.getLogger(__name__)
 
+# function to read tsq file
+def readtsq(filepath):
+    logger.debug("Trying to read tsq file.")
+    names = ("size", "type", "name", "chan", "sort_code", "timestamp", "fp_loc", "strobe", "format", "frequency")
+    formats = (int32, int32, "S4", uint16, uint16, float64, int64, float64, int32, float32)
+    offsets = 0, 4, 8, 12, 14, 16, 24, 24, 32, 36
+    tsq_dtype = np.dtype({"names": names, "formats": formats, "offsets": offsets}, align=True)
+    path = glob.glob(os.path.join(filepath, "*.tsq"))
+    if len(path) > 1:
+        logger.error("Two tsq files are present at the location.")
+        raise Exception("Two tsq files are present at the location.")
+    elif len(path) == 0:
+        logger.info("\033[1m" + "tsq file not found." + "\033[1m")
+        return 0, 0
+    else:
+        path = path[0]
+        flag = "tsq"
+
+    # reading tsq file
+    tsq = np.fromfile(path, dtype=tsq_dtype)
+
+    # creating dataframe of the data
+    df = pd.DataFrame(tsq)
+
+    logger.info("Data from tsq file fetched.")
+    return df, flag
+
 # function to execute readtev function using multiprocessing to make it faster
-def execute_readtev(data, filepath, event, outputPath, numProcesses=mp.cpu_count()):
+def execute_readtev(filepath, event, outputPath, numProcesses=mp.cpu_count()):
+    data, _ = readtsq(filepath)
 
     start = time.time()
     with mp.Pool(numProcesses) as p:
@@ -60,13 +88,13 @@ def readtev(data, filepath, event, outputPath):
     row = ismember(data["name"], event)
 
     if sum(row) == 0:
-        logger.info("\033[1m" + "Requested store name " + event + " not found (case-sensitive)." + "\033[0m")
-        logger.info("\033[1m" + "File contains the following TDT store names:" + "\033[0m")
-        logger.info("\033[1m" + str(allnames) + "\033[0m")
-        logger.info("\033[1m" + "TDT store name " + str(event) + " not found." + "\033[0m")
-        import_csv(filepath, event, outputPath)
+        logger.error("\033[1m" + "Requested store name " + event + " not found (case-sensitive)." + "\033[0m")
+        logger.error("\033[1m" + "File contains the following TDT store names:" + "\033[0m")
+        logger.error("\033[1m" + str(allnames) + "\033[0m")
+        logger.error("\033[1m" + "TDT store name " + str(event) + " not found." + "\033[0m")
+        raise ValueError("Requested store name not found.")
+
 
-        return 0
 
     allIndexesWhereEventIsPresent = np.where(row == 1)
     first_row = allIndexesWhereEventIsPresent[0][0]

From 092e1b7f40934bae192fd428b77e0486c9f516c0 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Tue, 18 Nov 2025 13:23:54 -0800
Subject: [PATCH 014/150] Split doric_step3.py off from read_raw_data.py.

---
 src/guppy/doric_step3.py | 126 +++++++++++++++++++++++++++++++++
 src/guppy/readTevTsq.py  | 146 +--------------------------------------
 2 files changed, 128 insertions(+), 144 deletions(-)
 create mode 100644 src/guppy/doric_step3.py

diff --git a/src/guppy/doric_step3.py b/src/guppy/doric_step3.py
new file mode 100644
index 0000000..792c54e
--- /dev/null
+++ b/src/guppy/doric_step3.py
@@ -0,0 +1,126 @@
+import glob
+import logging
+import os
+import re
+
+import h5py
+import numpy as np
+import pandas as pd
+
+from guppy.common_step3 import write_hdf5
+
+logger = logging.getLogger(__name__)
+
+
+def execute_import_doric(filepath, storesList, flag, outputPath):
+
+    if flag == "doric_csv":
+        path = glob.glob(os.path.join(filepath, "*.csv"))
+        if len(path) > 1:
+            logger.error("An error occurred : More than one Doric csv file present at the location")
+            raise Exception("More than one Doric csv file present at the location")
+        else:
+            df = pd.read_csv(path[0], header=1, index_col=False)
+            df = df.dropna(axis=1, how="all")
+            df = df.dropna(axis=0, how="any")
+            df["Time(s)"] = df["Time(s)"] - df["Time(s)"].to_numpy()[0]
+            for i in range(storesList.shape[1]):
+                if "control" in storesList[1, i] or "signal" in storesList[1, i]:
+                    timestamps = np.array(df["Time(s)"])
+                    sampling_rate = np.array([1 / (timestamps[-1] - timestamps[-2])])
+                    write_hdf5(sampling_rate, storesList[0, i], outputPath, "sampling_rate")
+                    write_hdf5(df["Time(s)"].to_numpy(), storesList[0, i], outputPath, "timestamps")
+                    write_hdf5(df[storesList[0, i]].to_numpy(), storesList[0, i], outputPath, "data")
+                else:
+                    ttl = df[storesList[0, i]]
+                    indices = np.where(ttl <= 0)[0]
+                    diff_indices = np.where(np.diff(indices) > 1)[0]
+                    write_hdf5(
+                        df["Time(s)"][indices[diff_indices] + 1].to_numpy(), storesList[0, i], outputPath, "timestamps"
+                    )
+    else:
+        path = glob.glob(os.path.join(filepath, "*.doric"))
+        if len(path) > 1:
+            logger.error("An error occurred : More than one Doric file present at the location")
+            raise Exception("More than one Doric file present at the location")
+        else:
+            with h5py.File(path[0], "r") as f:
+                if "Traces" in list(f.keys()):
+                    keys = access_data_doricV1(f, storesList, outputPath)
+                elif list(f.keys()) == ["Configurations", "DataAcquisition"]:
+                    keys = access_data_doricV6(f, storesList, outputPath)
+
+
+
+def access_data_doricV6(doric_file, storesList, outputPath):
+    data = [doric_file["DataAcquisition"]]
+    res = []
+    while len(data) != 0:
+        members = len(data)
+        while members != 0:
+            members -= 1
+            data, last_element = separate_last_element(data)
+            if isinstance(last_element, h5py.Dataset) and not last_element.name.endswith("/Time"):
+                res.append(last_element.name)
+            elif isinstance(last_element, h5py.Group):
+                data.extend(reversed([last_element[k] for k in last_element.keys()]))
+
+    decide_path = []
+    for element in res:
+        sep_values = element.split("/")
+        if sep_values[-1] == "Values":
+            if f"{sep_values[-3]}/{sep_values[-2]}" in storesList[0, :]:
+                decide_path.append(element)
+        else:
+            if f"{sep_values[-2]}/{sep_values[-1]}" in storesList[0, :]:
+                decide_path.append(element)
+
+    for i in range(storesList.shape[1]):
+        if "control" in storesList[1, i] or "signal" in storesList[1, i]:
+            regex = re.compile("(.*?)" + str(storesList[0, i]) + "(.*?)")
+            idx = [i for i in range(len(decide_path)) if regex.match(decide_path[i])]
+            if len(idx) > 1:
+                logger.error("More than one string matched (which should not be the case)")
+                raise Exception("More than one string matched (which should not be the case)")
+            idx = idx[0]
+            data = np.array(doric_file[decide_path[idx]])
+            timestamps = np.array(doric_file[decide_path[idx].rsplit("/", 1)[0] + "/Time"])
+            sampling_rate = np.array([1 / (timestamps[-1] - timestamps[-2])])
+            write_hdf5(sampling_rate, storesList[0, i], outputPath, "sampling_rate")
+            write_hdf5(timestamps, storesList[0, i], outputPath, "timestamps")
+            write_hdf5(data, storesList[0, i], outputPath, "data")
+        else:
+            regex = re.compile("(.*?)" + storesList[0, i] + "$")
+            idx = [i for i in range(len(decide_path)) if regex.match(decide_path[i])]
+            if len(idx) > 1:
+                logger.error("More than one string matched (which should not be the case)")
+                raise Exception("More than one string matched (which should not be the case)")
+            idx = idx[0]
+            ttl = np.array(doric_file[decide_path[idx]])
+            timestamps = np.array(doric_file[decide_path[idx].rsplit("/", 1)[0] + "/Time"])
+            indices = np.where(ttl <= 0)[0]
+            diff_indices = np.where(np.diff(indices) > 1)[0]
+            write_hdf5(timestamps[indices[diff_indices] + 1], storesList[0, i], outputPath, "timestamps")
+
+
+def access_data_doricV1(doric_file, storesList, outputPath):
+    keys = list(doric_file["Traces"]["Console"].keys())
+    for i in range(storesList.shape[1]):
+        if "control" in storesList[1, i] or "signal" in storesList[1, i]:
+            timestamps = np.array(doric_file["Traces"]["Console"]["Time(s)"]["Console_time(s)"])
+            sampling_rate = np.array([1 / (timestamps[-1] - timestamps[-2])])
+            data = np.array(doric_file["Traces"]["Console"][storesList[0, i]][storesList[0, i]])
+            write_hdf5(sampling_rate, storesList[0, i], outputPath, "sampling_rate")
+            write_hdf5(timestamps, storesList[0, i], outputPath, "timestamps")
+            write_hdf5(data, storesList[0, i], outputPath, "data")
+        else:
+            timestamps = np.array(doric_file["Traces"]["Console"]["Time(s)"]["Console_time(s)"])
+            ttl = np.array(doric_file["Traces"]["Console"][storesList[0, i]][storesList[0, i]])
+            indices = np.where(ttl <= 0)[0]
+            diff_indices = np.where(np.diff(indices) > 1)[0]
+            write_hdf5(timestamps[indices[diff_indices] + 1], storesList[0, i], outputPath, "timestamps")
+
+
+def separate_last_element(arr):
+    l = arr[-1]
+    return arr[:-1], l
\ No newline at end of file
diff --git a/src/guppy/readTevTsq.py b/src/guppy/readTevTsq.py
index 96fd59e..6fdee1e 100755
--- a/src/guppy/readTevTsq.py
+++ b/src/guppy/readTevTsq.py
@@ -16,6 +16,7 @@
 
 from guppy.common_step3 import write_hdf5
 from guppy.tdt_step3 import execute_readtev
+from guppy.doric_step3 import execute_import_doric
 
 logger = logging.getLogger(__name__)
 
@@ -33,39 +34,6 @@ def writeToFile(value: str):
         file.write(value)
 
 
-# function to check if doric file exists
-def check_doric(filepath):
-    logger.debug("Checking if doric file exists")
-    path = glob.glob(os.path.join(filepath, "*.csv")) + glob.glob(os.path.join(filepath, "*.doric"))
-
-    flag_arr = []
-    for i in range(len(path)):
-        ext = os.path.basename(path[i]).split(".")[-1]
-        if ext == "csv":
-            with warnings.catch_warnings():
-                warnings.simplefilter("error")
-                try:
-                    df = pd.read_csv(path[i], index_col=False, dtype=float)
-                except:
-                    df = pd.read_csv(path[i], header=1, index_col=False, nrows=10)
-                    flag = "doric_csv"
-                    flag_arr.append(flag)
-        elif ext == "doric":
-            flag = "doric_doric"
-            flag_arr.append(flag)
-        else:
-            pass
-
-    if len(flag_arr) > 1:
-        logger.error("Two doric files are present at the same location")
-        raise Exception("Two doric files are present at the same location")
-    if len(flag_arr) == 0:
-        logger.error("\033[1m" + "Doric file not found." + "\033[1m")
-        return 0
-    logger.info("Doric file found.")
-    return flag_arr[0]
-
-
 # function to read event timestamps csv file.
 def import_csv(filepath, event, outputPath):
     logger.debug("\033[1m" + "Trying to read data for {} from csv file.".format(event) + "\033[0m")
@@ -120,27 +88,7 @@ def execute_import_csv(filepath, event, outputPath, numProcesses=mp.cpu_count())
     logger.info("Time taken = {0:.5f}".format(time.time() - start))
 
 
-def access_data_doricV1(doric_file, storesList, outputPath):
-    keys = list(doric_file["Traces"]["Console"].keys())
-    for i in range(storesList.shape[1]):
-        if "control" in storesList[1, i] or "signal" in storesList[1, i]:
-            timestamps = np.array(doric_file["Traces"]["Console"]["Time(s)"]["Console_time(s)"])
-            sampling_rate = np.array([1 / (timestamps[-1] - timestamps[-2])])
-            data = np.array(doric_file["Traces"]["Console"][storesList[0, i]][storesList[0, i]])
-            write_hdf5(sampling_rate, storesList[0, i], outputPath, "sampling_rate")
-            write_hdf5(timestamps, storesList[0, i], outputPath, "timestamps")
-            write_hdf5(data, storesList[0, i], outputPath, "data")
-        else:
-            timestamps = np.array(doric_file["Traces"]["Console"]["Time(s)"]["Console_time(s)"])
-            ttl = np.array(doric_file["Traces"]["Console"][storesList[0, i]][storesList[0, i]])
-            indices = np.where(ttl <= 0)[0]
-            diff_indices = np.where(np.diff(indices) > 1)[0]
-            write_hdf5(timestamps[indices[diff_indices] + 1], storesList[0, i], outputPath, "timestamps")
-
 
-def separate_last_element(arr):
-    l = arr[-1]
-    return arr[:-1], l
 
 
 def find_string(regex, arr):
@@ -149,96 +97,6 @@ def find_string(regex, arr):
             return i
 
 
-def access_data_doricV6(doric_file, storesList, outputPath):
-    data = [doric_file["DataAcquisition"]]
-    res = []
-    while len(data) != 0:
-        members = len(data)
-        while members != 0:
-            members -= 1
-            data, last_element = separate_last_element(data)
-            if isinstance(last_element, h5py.Dataset) and not last_element.name.endswith("/Time"):
-                res.append(last_element.name)
-            elif isinstance(last_element, h5py.Group):
-                data.extend(reversed([last_element[k] for k in last_element.keys()]))
-
-    decide_path = []
-    for element in res:
-        sep_values = element.split("/")
-        if sep_values[-1] == "Values":
-            if f"{sep_values[-3]}/{sep_values[-2]}" in storesList[0, :]:
-                decide_path.append(element)
-        else:
-            if f"{sep_values[-2]}/{sep_values[-1]}" in storesList[0, :]:
-                decide_path.append(element)
-
-    for i in range(storesList.shape[1]):
-        if "control" in storesList[1, i] or "signal" in storesList[1, i]:
-            regex = re.compile("(.*?)" + str(storesList[0, i]) + "(.*?)")
-            idx = [i for i in range(len(decide_path)) if regex.match(decide_path[i])]
-            if len(idx) > 1:
-                logger.error("More than one string matched (which should not be the case)")
-                raise Exception("More than one string matched (which should not be the case)")
-            idx = idx[0]
-            data = np.array(doric_file[decide_path[idx]])
-            timestamps = np.array(doric_file[decide_path[idx].rsplit("/", 1)[0] + "/Time"])
-            sampling_rate = np.array([1 / (timestamps[-1] - timestamps[-2])])
-            write_hdf5(sampling_rate, storesList[0, i], outputPath, "sampling_rate")
-            write_hdf5(timestamps, storesList[0, i], outputPath, "timestamps")
-            write_hdf5(data, storesList[0, i], outputPath, "data")
-        else:
-            regex = re.compile("(.*?)" + storesList[0, i] + "$")
-            idx = [i for i in range(len(decide_path)) if regex.match(decide_path[i])]
-            if len(idx) > 1:
-                logger.error("More than one string matched (which should not be the case)")
-                raise Exception("More than one string matched (which should not be the case)")
-            idx = idx[0]
-            ttl = np.array(doric_file[decide_path[idx]])
-            timestamps = np.array(doric_file[decide_path[idx].rsplit("/", 1)[0] + "/Time"])
-            indices = np.where(ttl <= 0)[0]
-            diff_indices = np.where(np.diff(indices) > 1)[0]
-            write_hdf5(timestamps[indices[diff_indices] + 1], storesList[0, i], outputPath, "timestamps")
-
-
-def execute_import_doric(filepath, storesList, flag, outputPath):
-
-    if flag == "doric_csv":
-        path = glob.glob(os.path.join(filepath, "*.csv"))
-        if len(path) > 1:
-            logger.error("An error occurred : More than one Doric csv file present at the location")
-            raise Exception("More than one Doric csv file present at the location")
-        else:
-            df = pd.read_csv(path[0], header=1, index_col=False)
-            df = df.dropna(axis=1, how="all")
-            df = df.dropna(axis=0, how="any")
-            df["Time(s)"] = df["Time(s)"] - df["Time(s)"].to_numpy()[0]
-            for i in range(storesList.shape[1]):
-                if "control" in storesList[1, i] or "signal" in storesList[1, i]:
-                    timestamps = np.array(df["Time(s)"])
-                    sampling_rate = np.array([1 / (timestamps[-1] - timestamps[-2])])
-                    write_hdf5(sampling_rate, storesList[0, i], outputPath, "sampling_rate")
-                    write_hdf5(df["Time(s)"].to_numpy(), storesList[0, i], outputPath, "timestamps")
-                    write_hdf5(df[storesList[0, i]].to_numpy(), storesList[0, i], outputPath, "data")
-                else:
-                    ttl = df[storesList[0, i]]
-                    indices = np.where(ttl <= 0)[0]
-                    diff_indices = np.where(np.diff(indices) > 1)[0]
-                    write_hdf5(
-                        df["Time(s)"][indices[diff_indices] + 1].to_numpy(), storesList[0, i], outputPath, "timestamps"
-                    )
-    else:
-        path = glob.glob(os.path.join(filepath, "*.doric"))
-        if len(path) > 1:
-            logger.error("An error occurred : More than one Doric file present at the location")
-            raise Exception("More than one Doric file present at the location")
-        else:
-            with h5py.File(path[0], "r") as f:
-                if "Traces" in list(f.keys()):
-                    keys = access_data_doricV1(f, storesList, outputPath)
-                elif list(f.keys()) == ["Configurations", "DataAcquisition"]:
-                    keys = access_data_doricV6(f, storesList, outputPath)
-
-
 # function to read data from 'tsq' and 'tev' files
 def readRawData(inputParameters):
 
@@ -266,7 +124,7 @@ def readRawData(inputParameters):
         filepath = folderNames[i]
         logger.debug(f"### Reading raw data for folder {folderNames[i]}")
         storesListPath = takeOnlyDirs(glob.glob(os.path.join(filepath, "*_output_*")))
-        modality = "tdt"
+        modality = "doric"
 
         # read data corresponding to each storename selected by user while saving the storeslist file
         for j in range(len(storesListPath)):

From 7abb8e09dd475ffcc1fb15156393ec04ac2c5c94 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Tue, 18 Nov 2025 13:31:15 -0800
Subject: [PATCH 015/150] Added check_doric to doric_step3.py.

---
 src/guppy/doric_step3.py | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/src/guppy/doric_step3.py b/src/guppy/doric_step3.py
index 792c54e..2c30887 100644
--- a/src/guppy/doric_step3.py
+++ b/src/guppy/doric_step3.py
@@ -2,6 +2,7 @@
 import logging
 import os
 import re
+import warnings
 
 import h5py
 import numpy as np
@@ -11,8 +12,39 @@
 
 logger = logging.getLogger(__name__)
 
+def check_doric(filepath):
+    logger.debug("Checking if doric file exists")
+    path = glob.glob(os.path.join(filepath, "*.csv")) + glob.glob(os.path.join(filepath, "*.doric"))
+
+    flag_arr = []
+    for i in range(len(path)):
+        ext = os.path.basename(path[i]).split(".")[-1]
+        if ext == "csv":
+            with warnings.catch_warnings():
+                warnings.simplefilter("error")
+                try:
+                    df = pd.read_csv(path[i], index_col=False, dtype=float)
+                except:
+                    df = pd.read_csv(path[i], header=1, index_col=False, nrows=10)
+                    flag = "doric_csv"
+                    flag_arr.append(flag)
+        elif ext == "doric":
+            flag = "doric_doric"
+            flag_arr.append(flag)
+        else:
+            pass
+
+    if len(flag_arr) > 1:
+        logger.error("Two doric files are present at the same location")
+        raise Exception("Two doric files are present at the same location")
+    if len(flag_arr) == 0:
+        logger.error("\033[1m" + "Doric file not found." + "\033[1m")
+        return 0
+    logger.info("Doric file found.")
+    return flag_arr[0]
 
 def execute_import_doric(filepath, storesList, flag, outputPath):
+    flag = check_doric(filepath)
 
     if flag == "doric_csv":
         path = glob.glob(os.path.join(filepath, "*.csv"))

From b653538fad3acf017085b8943d846a6b633d7d99 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Tue, 18 Nov 2025 13:45:55 -0800
Subject: [PATCH 016/150] Split csv_step3.py off from read_raw_data.py.

---
 src/guppy/csv_step3.py      | 73 +++++++++++++++++++++++++++++++++++++
 src/guppy/readTevTsq.py     | 67 +---------------------------------
 src/guppy/saveStoresList.py |  1 -
 3 files changed, 75 insertions(+), 66 deletions(-)
 create mode 100644 src/guppy/csv_step3.py

diff --git a/src/guppy/csv_step3.py b/src/guppy/csv_step3.py
new file mode 100644
index 0000000..97d3eb5
--- /dev/null
+++ b/src/guppy/csv_step3.py
@@ -0,0 +1,73 @@
+import glob
+import json
+import logging
+import multiprocessing as mp
+import os
+import re
+import sys
+import time
+import warnings
+from itertools import repeat
+
+import h5py
+import numpy as np
+import pandas as pd
+from numpy import float32, float64, int32, int64, uint16
+
+from guppy.common_step3 import write_hdf5
+
+logger = logging.getLogger(__name__)
+
+
+def execute_import_csv(filepath, event, outputPath, numProcesses=mp.cpu_count()):
+    # logger.info("Reading data for event {} ...".format(event))
+
+    start = time.time()
+    with mp.Pool(numProcesses) as p:
+        p.starmap(import_csv, zip(repeat(filepath), event, repeat(outputPath)))
+    logger.info("Time taken = {0:.5f}".format(time.time() - start))
+
+
+# function to read event timestamps csv file.
+def import_csv(filepath, event, outputPath):
+    logger.debug("\033[1m" + "Trying to read data for {} from csv file.".format(event) + "\033[0m")
+    if not os.path.exists(os.path.join(filepath, event + ".csv")):
+        logger.error("\033[1m" + "No csv file found for event {}".format(event) + "\033[0m")
+        raise Exception("\033[1m" + "No csv file found for event {}".format(event) + "\033[0m")
+
+    df = pd.read_csv(os.path.join(filepath, event + ".csv"), index_col=False)
+    data = df
+    key = list(df.columns)
+
+    if len(key) == 3:
+        arr1 = np.array(["timestamps", "data", "sampling_rate"])
+        arr2 = np.char.lower(np.array(key))
+        if (np.sort(arr1) == np.sort(arr2)).all() == False:
+            logger.error("\033[1m" + "Column names should be timestamps, data and sampling_rate" + "\033[0m")
+            raise Exception("\033[1m" + "Column names should be timestamps, data and sampling_rate" + "\033[0m")
+
+    if len(key) == 1:
+        if key[0].lower() != "timestamps":
+            logger.error("\033[1m" + "Column names should be timestamps, data and sampling_rate" + "\033[0m")
+            raise Exception("\033[1m" + "Column name should be timestamps" + "\033[0m")
+
+    if len(key) != 3 and len(key) != 1:
+        logger.error(
+            "\033[1m"
+            + "Number of columns in csv file should be either three or one. Three columns if \
+						the file is for control or signal data or one column if the file is for event TTLs."
+            + "\033[0m"
+        )
+        raise Exception(
+            "\033[1m"
+            + "Number of columns in csv file should be either three or one. Three columns if \
+						the file is for control or signal data or one column if the file is for event TTLs."
+            + "\033[0m"
+        )
+
+    for i in range(len(key)):
+        write_hdf5(data[key[i]].dropna(), event, outputPath, key[i].lower())
+
+    logger.info("\033[1m" + "Reading data for {} from csv file is completed.".format(event) + "\033[0m")
+
+    return data, key
\ No newline at end of file
diff --git a/src/guppy/readTevTsq.py b/src/guppy/readTevTsq.py
index 6fdee1e..c080b58 100755
--- a/src/guppy/readTevTsq.py
+++ b/src/guppy/readTevTsq.py
@@ -17,6 +17,7 @@
 from guppy.common_step3 import write_hdf5
 from guppy.tdt_step3 import execute_readtev
 from guppy.doric_step3 import execute_import_doric
+from guppy.csv_step3 import execute_import_csv
 
 logger = logging.getLogger(__name__)
 
@@ -33,70 +34,6 @@ def writeToFile(value: str):
     with open(os.path.join(os.path.expanduser("~"), "pbSteps.txt"), "a") as file:
         file.write(value)
 
-
-# function to read event timestamps csv file.
-def import_csv(filepath, event, outputPath):
-    logger.debug("\033[1m" + "Trying to read data for {} from csv file.".format(event) + "\033[0m")
-    if not os.path.exists(os.path.join(filepath, event + ".csv")):
-        logger.error("\033[1m" + "No csv file found for event {}".format(event) + "\033[0m")
-        raise Exception("\033[1m" + "No csv file found for event {}".format(event) + "\033[0m")
-
-    df = pd.read_csv(os.path.join(filepath, event + ".csv"), index_col=False)
-    data = df
-    key = list(df.columns)
-
-    if len(key) == 3:
-        arr1 = np.array(["timestamps", "data", "sampling_rate"])
-        arr2 = np.char.lower(np.array(key))
-        if (np.sort(arr1) == np.sort(arr2)).all() == False:
-            logger.error("\033[1m" + "Column names should be timestamps, data and sampling_rate" + "\033[0m")
-            raise Exception("\033[1m" + "Column names should be timestamps, data and sampling_rate" + "\033[0m")
-
-    if len(key) == 1:
-        if key[0].lower() != "timestamps":
-            logger.error("\033[1m" + "Column names should be timestamps, data and sampling_rate" + "\033[0m")
-            raise Exception("\033[1m" + "Column name should be timestamps" + "\033[0m")
-
-    if len(key) != 3 and len(key) != 1:
-        logger.error(
-            "\033[1m"
-            + "Number of columns in csv file should be either three or one. Three columns if \
-						the file is for control or signal data or one column if the file is for event TTLs."
-            + "\033[0m"
-        )
-        raise Exception(
-            "\033[1m"
-            + "Number of columns in csv file should be either three or one. Three columns if \
-						the file is for control or signal data or one column if the file is for event TTLs."
-            + "\033[0m"
-        )
-
-    for i in range(len(key)):
-        write_hdf5(data[key[i]].dropna(), event, outputPath, key[i].lower())
-
-    logger.info("\033[1m" + "Reading data for {} from csv file is completed.".format(event) + "\033[0m")
-
-    return data, key
-
-
-def execute_import_csv(filepath, event, outputPath, numProcesses=mp.cpu_count()):
-    # logger.info("Reading data for event {} ...".format(event))
-
-    start = time.time()
-    with mp.Pool(numProcesses) as p:
-        p.starmap(import_csv, zip(repeat(filepath), event, repeat(outputPath)))
-    logger.info("Time taken = {0:.5f}".format(time.time() - start))
-
-
-
-
-
-def find_string(regex, arr):
-    for i in range(len(arr)):
-        if regex.match(arr[i]):
-            return i
-
-
 # function to read data from 'tsq' and 'tev' files
 def readRawData(inputParameters):
 
@@ -124,7 +61,7 @@ def readRawData(inputParameters):
         filepath = folderNames[i]
         logger.debug(f"### Reading raw data for folder {folderNames[i]}")
         storesListPath = takeOnlyDirs(glob.glob(os.path.join(filepath, "*_output_*")))
-        modality = "doric"
+        modality = "csv"
 
         # read data corresponding to each storename selected by user while saving the storeslist file
         for j in range(len(storesListPath)):
diff --git a/src/guppy/saveStoresList.py b/src/guppy/saveStoresList.py
index c2867ba..a432546 100755
--- a/src/guppy/saveStoresList.py
+++ b/src/guppy/saveStoresList.py
@@ -21,7 +21,6 @@
 import panel as pn
 from numpy import float32, float64, int32, int64, uint16
 
-from guppy.readTevTsq import import_csv
 from guppy.tdt_step2 import readtsq
 from guppy.csv_step2 import import_csv_step2
 from guppy.doric_step2 import import_doric

From 6d661c291a389f16d7e0c109b2510622c7892289 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Tue, 18 Nov 2025 15:11:32 -0800
Subject: [PATCH 017/150] Added modality to Step 3.

---
 src/guppy/readTevTsq.py  | 2 +-
 src/guppy/testing/api.py | 9 +++++++++
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/guppy/readTevTsq.py b/src/guppy/readTevTsq.py
index c080b58..e0bedfa 100755
--- a/src/guppy/readTevTsq.py
+++ b/src/guppy/readTevTsq.py
@@ -42,6 +42,7 @@ def readRawData(inputParameters):
     inputParameters = inputParameters
     folderNames = inputParameters["folderNames"]
     numProcesses = inputParameters["numberOfCores"]
+    modality = inputParameters["modality"]
     storesListPath = []
     if numProcesses == 0:
         numProcesses = mp.cpu_count()
@@ -61,7 +62,6 @@ def readRawData(inputParameters):
         filepath = folderNames[i]
         logger.debug(f"### Reading raw data for folder {folderNames[i]}")
         storesListPath = takeOnlyDirs(glob.glob(os.path.join(filepath, "*_output_*")))
-        modality = "csv"
 
         # read data corresponding to each storename selected by user while saving the storeslist file
         for j in range(len(storesListPath)):
diff --git a/src/guppy/testing/api.py b/src/guppy/testing/api.py
index 0e16f23..d7e390d 100644
--- a/src/guppy/testing/api.py
+++ b/src/guppy/testing/api.py
@@ -237,6 +237,9 @@ def step3(
     input_params["npm_time_unit"] = npm_time_unit
     input_params["npm_split_events"] = npm_split_events
 
+    # Inject modality
+    input_params["modality"] = modality
+
     # Call the underlying Step 3 worker directly (no subprocess)
     readRawData(input_params)
 
@@ -315,6 +318,9 @@ def step4(
     input_params["npm_time_unit"] = npm_time_unit
     input_params["npm_split_events"] = npm_split_events
 
+    # Inject modality
+    input_params["modality"] = modality
+
     # Call the underlying Step 4 worker directly (no subprocess)
     extractTsAndSignal(input_params)
 
@@ -393,6 +399,9 @@ def step5(
     input_params["npm_time_unit"] = npm_time_unit
     input_params["npm_split_events"] = npm_split_events
 
+    # Inject modality
+    input_params["modality"] = modality
+
     # Call the underlying Step 5 worker directly (no subprocess)
     psthForEachStorename(input_params)
 

From a4f6583ecbd3071929551ecf71df4a5716a49791 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Wed, 19 Nov 2025 09:12:07 -0800
Subject: [PATCH 018/150] Added tdtRecordingExtractor

---
 src/guppy/extractors/__init__.py              |   1 +
 .../extractors/tdt_recording_extractor.py     | 197 ++++++++++++++++++
 src/guppy/readTevTsq.py                       |  11 +-
 src/guppy/saveStoresList.py                   |   5 +-
 4 files changed, 211 insertions(+), 3 deletions(-)
 create mode 100644 src/guppy/extractors/__init__.py
 create mode 100644 src/guppy/extractors/tdt_recording_extractor.py

diff --git a/src/guppy/extractors/__init__.py b/src/guppy/extractors/__init__.py
new file mode 100644
index 0000000..249daf9
--- /dev/null
+++ b/src/guppy/extractors/__init__.py
@@ -0,0 +1 @@
+from .tdt_recording_extractor import TdtRecordingExtractor
\ No newline at end of file
diff --git a/src/guppy/extractors/tdt_recording_extractor.py b/src/guppy/extractors/tdt_recording_extractor.py
new file mode 100644
index 0000000..98ae3cd
--- /dev/null
+++ b/src/guppy/extractors/tdt_recording_extractor.py
@@ -0,0 +1,197 @@
+import glob
+import logging
+import os
+import numpy as np
+from numpy import float32, float64, int32, int64, uint16
+import pandas as pd
+import multiprocessing as mp
+import time
+from itertools import repeat
+
+from guppy.common_step3 import write_hdf5
+
+logger = logging.getLogger(__name__)
+
+class TdtRecordingExtractor:
+
+    def __init__(self, folder_path):
+        self.folder_path = folder_path
+        self.header_df, _ = self.readtsq(folder_path)
+
+    def readtsq(self, folder_path):
+        logger.debug("Trying to read tsq file.")
+        names = ("size", "type", "name", "chan", "sort_code", "timestamp", "fp_loc", "strobe", "format", "frequency")
+        formats = (int32, int32, "S4", uint16, uint16, float64, int64, float64, int32, float32)
+        offsets = 0, 4, 8, 12, 14, 16, 24, 24, 32, 36
+        tsq_dtype = np.dtype({"names": names, "formats": formats, "offsets": offsets}, align=True)
+        path = glob.glob(os.path.join(folder_path, "*.tsq"))
+        if len(path) > 1:
+            logger.error("Two tsq files are present at the location.")
+            raise Exception("Two tsq files are present at the location.")
+        elif len(path) == 0:
+            logger.info("\033[1m" + "tsq file not found." + "\033[1m")
+            return 0, 0
+        else:
+            path = path[0]
+            flag = "tsq"
+
+        # reading tsq file
+        tsq = np.fromfile(path, dtype=tsq_dtype)
+
+        # creating dataframe of the data
+        df = pd.DataFrame(tsq)
+
+        logger.info("Data from tsq file fetched.")
+        return df, flag
+    
+    # function to execute readtev function using multiprocessing to make it faster
+    def execute_readtev(self, filepath, event, outputPath, numProcesses=mp.cpu_count()):
+        start = time.time()
+        with mp.Pool(numProcesses) as p:
+            p.starmap(self.readtev, zip(repeat(self.header_df), repeat(filepath), event, repeat(outputPath)))
+        logger.info("Time taken = {0:.5f}".format(time.time() - start))
+
+
+    # function to read tev file
+    def readtev(self, event):
+        data = self.header_df
+        filepath = self.folder_path
+
+        logger.debug("Reading data for event {} ...".format(event))
+        tevfilepath = glob.glob(os.path.join(filepath, "*.tev"))
+        if len(tevfilepath) > 1:
+            raise Exception("Two tev files are present at the location.")
+        else:
+            tevfilepath = tevfilepath[0]
+
+        data["name"] = np.asarray(data["name"], dtype=str)
+
+        allnames = np.unique(data["name"])
+
+        index = []
+        for i in range(len(allnames)):
+            length = len(str(allnames[i]))
+            if length < 4:
+                index.append(i)
+
+        allnames = np.delete(allnames, index, 0)
+
+        eventNew = np.array(list(event))
+
+        # logger.info(allnames)
+        # logger.info(eventNew)
+        row = self.ismember(data["name"], event)
+
+        if sum(row) == 0:
+            logger.error("\033[1m" + "Requested store name " + event + " not found (case-sensitive)." + "\033[0m")
+            logger.error("\033[1m" + "File contains the following TDT store names:" + "\033[0m")
+            logger.error("\033[1m" + str(allnames) + "\033[0m")
+            logger.error("\033[1m" + "TDT store name " + str(event) + " not found." + "\033[0m")
+            raise ValueError("Requested store name not found.")
+
+
+
+        allIndexesWhereEventIsPresent = np.where(row == 1)
+        first_row = allIndexesWhereEventIsPresent[0][0]
+
+        formatNew = data["format"][first_row] + 1
+
+        table = np.array(
+            [
+                [0, 0, 0, 0],
+                [0, "float", 1, np.float32],
+                [0, "long", 1, np.int32],
+                [0, "short", 2, np.int16],
+                [0, "byte", 4, np.int8],
+            ]
+        )
+
+        S = dict()
+
+        S["storename"] = str(event)
+        S["sampling_rate"] = data["frequency"][first_row]
+        S["timestamps"] = np.asarray(data["timestamp"][allIndexesWhereEventIsPresent[0]])
+        S["channels"] = np.asarray(data["chan"][allIndexesWhereEventIsPresent[0]])
+
+        fp_loc = np.asarray(data["fp_loc"][allIndexesWhereEventIsPresent[0]])
+        data_size = np.asarray(data["size"])
+
+        if formatNew != 5:
+            nsample = (data_size[first_row,] - 10) * int(table[formatNew, 2])
+            S["data"] = np.zeros((len(fp_loc), nsample))
+            for i in range(0, len(fp_loc)):
+                with open(tevfilepath, "rb") as fp:
+                    fp.seek(fp_loc[i], os.SEEK_SET)
+                    S["data"][i, :] = np.fromfile(fp, dtype=table[formatNew, 3], count=nsample).reshape(
+                        1, nsample, order="F"
+                    )
+                    # S['data'] = S['data'].swapaxes()
+            S["npoints"] = nsample
+        else:
+            S["data"] = np.asarray(data["strobe"][allIndexesWhereEventIsPresent[0]])
+            S["npoints"] = 1
+            S["channels"] = np.tile(1, (S["data"].shape[0],))
+
+        S["data"] = (S["data"].T).reshape(-1, order="F")
+
+        return S
+
+    # check if a particular element is there in an array or not
+    def ismember(self, arr, element): # TODO: replace this function with more standard usage
+        res = [1 if i == element else 0 for i in arr]
+        return np.asarray(res)
+
+
+    # function to save data read from tev file to hdf5 file
+    def save_dict_to_hdf5(self, S, event, outputPath):
+        write_hdf5(S["storename"], event, outputPath, "storename")
+        write_hdf5(S["sampling_rate"], event, outputPath, "sampling_rate")
+        write_hdf5(S["timestamps"], event, outputPath, "timestamps")
+
+        write_hdf5(S["data"], event, outputPath, "data")
+        write_hdf5(S["npoints"], event, outputPath, "npoints")
+        write_hdf5(S["channels"], event, outputPath, "channels")
+
+
+    # function to check event data (checking whether event timestamps belongs to same event or multiple events)
+    def check_data(self, S, event, outputPath):
+        # logger.info("Checking event storename data for creating multiple event names from single event storename...")
+        new_event = event.replace("\\", "")
+        new_event = event.replace("/", "")
+        diff = np.diff(S["data"])
+        arr = np.full(diff.shape[0], 1)
+
+        storesList = np.genfromtxt(os.path.join(outputPath, "storesList.csv"), dtype="str", delimiter=",").reshape(2, -1)
+
+        if diff.shape[0] == 0:
+            return 0
+
+        if S["sampling_rate"] == 0 and np.all(diff == diff[0]) == False:
+            logger.info("\033[1m" + "Data in event {} belongs to multiple behavior".format(event) + "\033[0m")
+            logger.debug(
+                "\033[1m" + "Create timestamp files for individual new event and change the stores list file." + "\033[0m"
+            )
+            i_d = np.unique(S["data"])
+            for i in range(i_d.shape[0]):
+                new_S = dict()
+                idx = np.where(S["data"] == i_d[i])[0]
+                new_S["timestamps"] = S["timestamps"][idx]
+                new_S["storename"] = new_event + str(int(i_d[i]))
+                new_S["sampling_rate"] = S["sampling_rate"]
+                new_S["data"] = S["data"]
+                new_S["npoints"] = S["npoints"]
+                new_S["channels"] = S["channels"]
+                storesList = np.concatenate(
+                    (storesList, [[new_event + str(int(i_d[i]))], [new_event + "_" + str(int(i_d[i]))]]), axis=1
+                )
+                self.save_dict_to_hdf5(new_S, new_event + str(int(i_d[i])), outputPath)
+
+            idx = np.where(storesList[0] == event)[0]
+            storesList = np.delete(storesList, idx, axis=1)
+            if not os.path.exists(os.path.join(outputPath, ".cache_storesList.csv")):
+                os.rename(os.path.join(outputPath, "storesList.csv"), os.path.join(outputPath, ".cache_storesList.csv"))
+            if idx.shape[0] == 0:
+                pass
+            else:
+                np.savetxt(os.path.join(outputPath, "storesList.csv"), storesList, delimiter=",", fmt="%s")
+            logger.info("\033[1m Timestamp files for individual new event are created and the stores list file is changed.\033[0m")
\ No newline at end of file
diff --git a/src/guppy/readTevTsq.py b/src/guppy/readTevTsq.py
index e0bedfa..d3c9147 100755
--- a/src/guppy/readTevTsq.py
+++ b/src/guppy/readTevTsq.py
@@ -16,6 +16,7 @@
 
 from guppy.common_step3 import write_hdf5
 from guppy.tdt_step3 import execute_readtev
+from guppy.extractors import TdtRecordingExtractor
 from guppy.doric_step3 import execute_import_doric
 from guppy.csv_step3 import execute_import_csv
 
@@ -76,7 +77,15 @@ def readRawData(inputParameters):
                 )
 
             if modality == "tdt":
-                execute_readtev(filepath, np.unique(storesList[0, :]), op, numProcesses)
+                # execute_readtev(filepath, np.unique(storesList[0, :]), op, numProcesses)
+                extractor = TdtRecordingExtractor(folder_path=filepath)
+                event = np.unique(storesList[0, :])
+                for e in event:
+                    S = extractor.readtev(event=e)
+                    extractor.save_dict_to_hdf5(S=S, event=e, outputPath=op)
+                    extractor.check_data(S=S, event=e, outputPath=op)
+                    logger.info("Data for event {} fetched and stored.".format(e))
+
             elif modality == "doric":
                 execute_import_doric(filepath, storesList, modality, op)
             elif modality == "csv" or modality == "npm":
diff --git a/src/guppy/saveStoresList.py b/src/guppy/saveStoresList.py
index a432546..79fa71a 100755
--- a/src/guppy/saveStoresList.py
+++ b/src/guppy/saveStoresList.py
@@ -21,7 +21,7 @@
 import panel as pn
 from numpy import float32, float64, int32, int64, uint16
 
-from guppy.tdt_step2 import readtsq
+from guppy.extractors import TdtRecordingExtractor
 from guppy.csv_step2 import import_csv_step2
 from guppy.doric_step2 import import_doric
 from guppy.npm_step2 import import_npm
@@ -589,7 +589,8 @@ def execute(inputParameters):
         for i in folderNames:
             filepath = os.path.join(inputParameters["abspath"], i)
             if modality == "tdt":
-                data = readtsq(filepath)
+                extractor = TdtRecordingExtractor(folder_path=filepath)
+                data = extractor.header_df
                 event_name, flag = [], []
             elif modality == "csv":
                 data = 0

From 882556e8b72fca014f51f3cd71ec2b51b9368b4d Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Wed, 19 Nov 2025 09:49:16 -0800
Subject: [PATCH 019/150] Adapted parallel execute function to use new
 extractor.

---
 .../extractors/tdt_recording_extractor.py     | 23 +++++++++++--------
 src/guppy/readTevTsq.py                       | 11 ++-------
 2 files changed, 16 insertions(+), 18 deletions(-)

diff --git a/src/guppy/extractors/tdt_recording_extractor.py b/src/guppy/extractors/tdt_recording_extractor.py
index 98ae3cd..c0b01f9 100644
--- a/src/guppy/extractors/tdt_recording_extractor.py
+++ b/src/guppy/extractors/tdt_recording_extractor.py
@@ -12,6 +12,19 @@
 
 logger = logging.getLogger(__name__)
 
+# function to execute readtev function using multiprocessing to make it faster
+def execute_readtev(folder_path, events, outputPath, numProcesses=mp.cpu_count()):
+    extractor = TdtRecordingExtractor(folder_path=folder_path)
+    start = time.time()
+    with mp.Pool(numProcesses) as p:
+        p.starmap(read_tdt_and_save_hdf5, zip(repeat(extractor), events, repeat(outputPath)))
+    logger.info("Time taken = {0:.5f}".format(time.time() - start))
+
+def read_tdt_and_save_hdf5(extractor, event, outputPath):
+    S = extractor.readtev(event=event)
+    extractor.save_dict_to_hdf5(S=S, event=event, outputPath=outputPath)
+    logger.info("Data for event {} fetched and stored.".format(event))
+
 class TdtRecordingExtractor:
 
     def __init__(self, folder_path):
@@ -43,14 +56,6 @@ def readtsq(self, folder_path):
 
         logger.info("Data from tsq file fetched.")
         return df, flag
-    
-    # function to execute readtev function using multiprocessing to make it faster
-    def execute_readtev(self, filepath, event, outputPath, numProcesses=mp.cpu_count()):
-        start = time.time()
-        with mp.Pool(numProcesses) as p:
-            p.starmap(self.readtev, zip(repeat(self.header_df), repeat(filepath), event, repeat(outputPath)))
-        logger.info("Time taken = {0:.5f}".format(time.time() - start))
-
 
     # function to read tev file
     def readtev(self, event):
@@ -154,7 +159,7 @@ def save_dict_to_hdf5(self, S, event, outputPath):
 
 
     # function to check event data (checking whether event timestamps belongs to same event or multiple events)
-    def check_data(self, S, event, outputPath):
+    def check_data(self, S, event, outputPath): # TODO: fold this function into the main read/get function
         # logger.info("Checking event storename data for creating multiple event names from single event storename...")
         new_event = event.replace("\\", "")
         new_event = event.replace("/", "")
diff --git a/src/guppy/readTevTsq.py b/src/guppy/readTevTsq.py
index d3c9147..47b7962 100755
--- a/src/guppy/readTevTsq.py
+++ b/src/guppy/readTevTsq.py
@@ -16,7 +16,6 @@
 
 from guppy.common_step3 import write_hdf5
 from guppy.tdt_step3 import execute_readtev
-from guppy.extractors import TdtRecordingExtractor
 from guppy.doric_step3 import execute_import_doric
 from guppy.csv_step3 import execute_import_csv
 
@@ -77,14 +76,8 @@ def readRawData(inputParameters):
                 )
 
             if modality == "tdt":
-                # execute_readtev(filepath, np.unique(storesList[0, :]), op, numProcesses)
-                extractor = TdtRecordingExtractor(folder_path=filepath)
-                event = np.unique(storesList[0, :])
-                for e in event:
-                    S = extractor.readtev(event=e)
-                    extractor.save_dict_to_hdf5(S=S, event=e, outputPath=op)
-                    extractor.check_data(S=S, event=e, outputPath=op)
-                    logger.info("Data for event {} fetched and stored.".format(e))
+                events = np.unique(storesList[0, :])
+                execute_readtev(filepath, events, op, numProcesses)
 
             elif modality == "doric":
                 execute_import_doric(filepath, storesList, modality, op)

From df7b9e160a46723c12193946d2aebaa156fe336c Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Wed, 19 Nov 2025 11:09:51 -0800
Subject: [PATCH 020/150] Added CsvRecordingExtractor for step 2

---
 src/guppy/extractors/__init__.py              |   3 +-
 .../extractors/csv_recording_extractor.py     | 115 ++++++++++++++++++
 src/guppy/saveStoresList.py                   |  15 +--
 3 files changed, 123 insertions(+), 10 deletions(-)
 create mode 100644 src/guppy/extractors/csv_recording_extractor.py

diff --git a/src/guppy/extractors/__init__.py b/src/guppy/extractors/__init__.py
index 249daf9..812622b 100644
--- a/src/guppy/extractors/__init__.py
+++ b/src/guppy/extractors/__init__.py
@@ -1 +1,2 @@
-from .tdt_recording_extractor import TdtRecordingExtractor
\ No newline at end of file
+from .tdt_recording_extractor import TdtRecordingExtractor
+from .csv_recording_extractor import CsvRecordingExtractor
diff --git a/src/guppy/extractors/csv_recording_extractor.py b/src/guppy/extractors/csv_recording_extractor.py
new file mode 100644
index 0000000..f5a73e9
--- /dev/null
+++ b/src/guppy/extractors/csv_recording_extractor.py
@@ -0,0 +1,115 @@
+import glob
+import logging
+import os
+
+import numpy as np
+import pandas as pd
+
+logger = logging.getLogger(__name__)
+
+
+class CsvRecordingExtractor:
+
+    def __init__(self, folder_path):
+        self.folder_path = folder_path
+
+        logger.debug("If it exists, importing either NPM or Doric or csv file based on the structure of file")
+        path = sorted(glob.glob(os.path.join(self.folder_path, "*.csv")))
+
+        path = sorted(list(set(path)))
+        flag = "None"
+        event_from_filename = []
+        flag_arr = []
+        for i in range(len(path)):
+            ext = os.path.basename(path[i]).split(".")[-1]
+            assert ext == "csv", "Only .csv files are supported by import_csv function."
+            df = pd.read_csv(path[i], header=None, nrows=2, index_col=False, dtype=str)
+            df = df.dropna(axis=1, how="all")
+            df_arr = np.array(df).flatten()
+            check_all_str = []
+            for element in df_arr:
+                try:
+                    float(element)
+                except:
+                    check_all_str.append(i)
+            assert len(check_all_str) != len(
+                df_arr
+            ), "This file appears to be doric .csv. This function only supports standard .csv files."
+            df = pd.read_csv(path[i], index_col=False)
+
+            _, value = self.check_header(df)
+
+            # check dataframe structure and read data accordingly
+            if len(value) > 0:
+                columns_isstr = False
+                df = pd.read_csv(path[i], header=None)
+                cols = np.array(list(df.columns), dtype=str)
+            else:
+                df = df
+                columns_isstr = True
+                cols = np.array(list(df.columns), dtype=str)
+            # check the structure of dataframe and assign flag to the type of file
+            if len(cols) == 1:
+                if cols[0].lower() != "timestamps":
+                    logger.error("\033[1m" + "Column name should be timestamps (all lower-cases)" + "\033[0m")
+                    raise Exception("\033[1m" + "Column name should be timestamps (all lower-cases)" + "\033[0m")
+                else:
+                    flag = "event_csv"
+            elif len(cols) == 3:
+                arr1 = np.array(["timestamps", "data", "sampling_rate"])
+                arr2 = np.char.lower(np.array(cols))
+                if (np.sort(arr1) == np.sort(arr2)).all() == False:
+                    logger.error(
+                        "\033[1m"
+                        + "Column names should be timestamps, data and sampling_rate (all lower-cases)"
+                        + "\033[0m"
+                    )
+                    raise Exception(
+                        "\033[1m"
+                        + "Column names should be timestamps, data and sampling_rate (all lower-cases)"
+                        + "\033[0m"
+                    )
+                else:
+                    flag = "data_csv"
+            elif len(cols) == 2:
+                raise ValueError(
+                    "Data appears to be Neurophotometrics csv. Please use import_npm_csv function to import the data."
+                )
+            elif len(cols) >= 2:
+                raise ValueError(
+                    "Data appears to be Neurophotometrics csv. Please use import_npm_csv function to import the data."
+                )
+            else:
+                logger.error("Number of columns in csv file does not make sense.")
+                raise Exception("Number of columns in csv file does not make sense.")
+
+            if columns_isstr == True and (
+                "flags" in np.char.lower(np.array(cols)) or "ledstate" in np.char.lower(np.array(cols))
+            ):
+                flag = flag + "_v2"
+            else:
+                flag = flag
+
+            flag_arr.append(flag)
+            logger.info(flag)
+            assert (
+                flag == "event_csv" or flag == "data_csv"
+            ), "This function only supports standard event_csv and data_csv files."
+            name = os.path.basename(path[i]).split(".")[0]
+            event_from_filename.append(name)
+
+        logger.info("Importing of csv file is done.")
+
+        self.events = event_from_filename
+        self.flags = flag_arr
+
+    def check_header(self, df):
+        arr = list(df.columns)
+        check_float = []
+        for i in arr:
+            try:
+                check_float.append(float(i))
+            except:
+                pass
+
+        return arr, check_float
diff --git a/src/guppy/saveStoresList.py b/src/guppy/saveStoresList.py
index 79fa71a..e64be8c 100755
--- a/src/guppy/saveStoresList.py
+++ b/src/guppy/saveStoresList.py
@@ -9,21 +9,16 @@
 import logging
 import os
 import socket
-import tkinter as tk
 from pathlib import Path
 from random import randint
-from tkinter import StringVar, messagebox, ttk
 
-import h5py
 import holoviews as hv
 import numpy as np
 import pandas as pd
 import panel as pn
-from numpy import float32, float64, int32, int64, uint16
 
-from guppy.extractors import TdtRecordingExtractor
-from guppy.csv_step2 import import_csv_step2
 from guppy.doric_step2 import import_doric
+from guppy.extractors import CsvRecordingExtractor, TdtRecordingExtractor
 from guppy.npm_step2 import import_npm
 
 # hv.extension()
@@ -573,7 +568,6 @@ def save_button(event=None):
     template.show(port=number)
 
 
-
 # function to read input parameters and run the saveStorenames function
 def execute(inputParameters):
 
@@ -594,7 +588,10 @@ def execute(inputParameters):
                 event_name, flag = [], []
             elif modality == "csv":
                 data = 0
-                event_name, flag = import_csv_step2(filepath)
+                extractor = CsvRecordingExtractor(folder_path=filepath)
+                event_name = extractor.events
+                flag = extractor.flags
+
             elif modality == "doric":
                 data = 0
                 event_name, flag = import_doric(filepath)
@@ -603,7 +600,7 @@ def execute(inputParameters):
                 event_name, flag = import_npm(filepath, num_ch)
             else:
                 raise ValueError("Modality not recognized. Please use 'tdt', 'csv', 'doric', or 'npm'.")
-            
+
             saveStorenames(inputParameters, data, event_name, flag, filepath)
         logger.info("#" * 400)
     except Exception as e:

From bcb78a51d52b54f3126d50260e735c4929da3a4e Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Wed, 19 Nov 2025 11:10:24 -0800
Subject: [PATCH 021/150] Installed pre-commit.

---
 src/guppy/common_step3.py                     | 13 ++------
 src/guppy/csv_step2.py                        | 21 +++++++++---
 src/guppy/csv_step3.py                        |  9 +-----
 src/guppy/doric_step2.py                      |  9 ++++--
 src/guppy/doric_step3.py                      |  5 +--
 .../extractors/tdt_recording_extractor.py     | 32 +++++++++++--------
 src/guppy/npm_step2.py                        | 13 +++++---
 src/guppy/readTevTsq.py                       | 13 ++------
 src/guppy/savingInputParameters.py            |  4 ++-
 src/guppy/tdt_step2.py                        |  6 ++--
 src/guppy/tdt_step3.py                        | 12 +++----
 11 files changed, 70 insertions(+), 67 deletions(-)

diff --git a/src/guppy/common_step3.py b/src/guppy/common_step3.py
index 4ea5c95..09e763f 100644
--- a/src/guppy/common_step3.py
+++ b/src/guppy/common_step3.py
@@ -1,21 +1,12 @@
-import glob
-import json
 import logging
-import multiprocessing as mp
 import os
-import re
-import sys
-import time
-import warnings
-from itertools import repeat
 
 import h5py
 import numpy as np
-import pandas as pd
-from numpy import float32, float64, int32, int64, uint16
 
 logger = logging.getLogger(__name__)
 
+
 # function to write data to a hdf5 file
 def write_hdf5(data, event, filepath, key):
 
@@ -48,4 +39,4 @@ def write_hdf5(data, event, filepath, key):
                 if type(data) is np.ndarray:
                     f.create_dataset(key, data=data, maxshape=(None,), chunks=True)
                 else:
-                    f.create_dataset(key, data=data)
\ No newline at end of file
+                    f.create_dataset(key, data=data)
diff --git a/src/guppy/csv_step2.py b/src/guppy/csv_step2.py
index 4d9b800..ba4b34f 100644
--- a/src/guppy/csv_step2.py
+++ b/src/guppy/csv_step2.py
@@ -1,11 +1,13 @@
 import glob
 import logging
 import os
+
 import numpy as np
 import pandas as pd
 
 logger = logging.getLogger(__name__)
 
+
 def check_header(df):
     arr = list(df.columns)
     check_float = []
@@ -17,6 +19,7 @@ def check_header(df):
 
     return arr, check_float
 
+
 def import_csv_step2(filepath):
     logger.debug("If it exists, importing either NPM or Doric or csv file based on the structure of file")
     path = sorted(glob.glob(os.path.join(filepath, "*.csv")))
@@ -37,7 +40,9 @@ def import_csv_step2(filepath):
                 float(element)
             except:
                 check_all_str.append(i)
-        assert len(check_all_str) != len(df_arr), "This file appears to be doric .csv. This function only supports standard .csv files."
+        assert len(check_all_str) != len(
+            df_arr
+        ), "This file appears to be doric .csv. This function only supports standard .csv files."
         df = pd.read_csv(path[i], index_col=False)
 
         _, value = check_header(df)
@@ -75,9 +80,13 @@ def import_csv_step2(filepath):
             else:
                 flag = "data_csv"
         elif len(cols) == 2:
-            raise ValueError("Data appears to be Neurophotometrics csv. Please use import_npm_csv function to import the data.")
+            raise ValueError(
+                "Data appears to be Neurophotometrics csv. Please use import_npm_csv function to import the data."
+            )
         elif len(cols) >= 2:
-            raise ValueError("Data appears to be Neurophotometrics csv. Please use import_npm_csv function to import the data.")
+            raise ValueError(
+                "Data appears to be Neurophotometrics csv. Please use import_npm_csv function to import the data."
+            )
         else:
             logger.error("Number of columns in csv file does not make sense.")
             raise Exception("Number of columns in csv file does not make sense.")
@@ -91,9 +100,11 @@ def import_csv_step2(filepath):
 
         flag_arr.append(flag)
         logger.info(flag)
-        assert flag == "event_csv" or flag == "data_csv", "This function only supports standard event_csv and data_csv files."
+        assert (
+            flag == "event_csv" or flag == "data_csv"
+        ), "This function only supports standard event_csv and data_csv files."
         name = os.path.basename(path[i]).split(".")[0]
         event_from_filename.append(name)
 
     logger.info("Importing of csv file is done.")
-    return event_from_filename, flag_arr
\ No newline at end of file
+    return event_from_filename, flag_arr
diff --git a/src/guppy/csv_step3.py b/src/guppy/csv_step3.py
index 97d3eb5..985959a 100644
--- a/src/guppy/csv_step3.py
+++ b/src/guppy/csv_step3.py
@@ -1,18 +1,11 @@
-import glob
-import json
 import logging
 import multiprocessing as mp
 import os
-import re
-import sys
 import time
-import warnings
 from itertools import repeat
 
-import h5py
 import numpy as np
 import pandas as pd
-from numpy import float32, float64, int32, int64, uint16
 
 from guppy.common_step3 import write_hdf5
 
@@ -70,4 +63,4 @@ def import_csv(filepath, event, outputPath):
 
     logger.info("\033[1m" + "Reading data for {} from csv file is completed.".format(event) + "\033[0m")
 
-    return data, key
\ No newline at end of file
+    return data, key
diff --git a/src/guppy/doric_step2.py b/src/guppy/doric_step2.py
index bf402d1..26ab22e 100644
--- a/src/guppy/doric_step2.py
+++ b/src/guppy/doric_step2.py
@@ -8,6 +8,7 @@
 
 logger = logging.getLogger(__name__)
 
+
 def import_doric(filepath):
 
     logger.debug("If it exists, importing Doric file based on the structure of file")
@@ -33,7 +34,9 @@ def import_doric(filepath):
                     float(element)
                 except:
                     check_all_str.append(i)
-            assert len(check_all_str) == len(df_arr), "This file appears to be standard .csv. This function only supports doric .csv files."
+            assert len(check_all_str) == len(
+                df_arr
+            ), "This file appears to be standard .csv. This function only supports doric .csv files."
             df = pd.read_csv(path[i], header=1, index_col=False, nrows=10)
             df = df.drop(["Time(s)"], axis=1)
             event_from_filename.extend(list(df.columns))
@@ -52,6 +55,7 @@ def read_doric(filepath):
 
     return keys
 
+
 def access_keys_doricV6(doric_file):
     data = [doric_file["DataAcquisition"]]
     res = []
@@ -82,6 +86,7 @@ def access_keys_doricV1(doric_file):
 
     return keys
 
+
 def separate_last_element(arr):
     l = arr[-1]
-    return arr[:-1], l
\ No newline at end of file
+    return arr[:-1], l
diff --git a/src/guppy/doric_step3.py b/src/guppy/doric_step3.py
index 2c30887..e9fd7cc 100644
--- a/src/guppy/doric_step3.py
+++ b/src/guppy/doric_step3.py
@@ -12,6 +12,7 @@
 
 logger = logging.getLogger(__name__)
 
+
 def check_doric(filepath):
     logger.debug("Checking if doric file exists")
     path = glob.glob(os.path.join(filepath, "*.csv")) + glob.glob(os.path.join(filepath, "*.doric"))
@@ -43,6 +44,7 @@ def check_doric(filepath):
     logger.info("Doric file found.")
     return flag_arr[0]
 
+
 def execute_import_doric(filepath, storesList, flag, outputPath):
     flag = check_doric(filepath)
 
@@ -83,7 +85,6 @@ def execute_import_doric(filepath, storesList, flag, outputPath):
                     keys = access_data_doricV6(f, storesList, outputPath)
 
 
-
 def access_data_doricV6(doric_file, storesList, outputPath):
     data = [doric_file["DataAcquisition"]]
     res = []
@@ -155,4 +156,4 @@ def access_data_doricV1(doric_file, storesList, outputPath):
 
 def separate_last_element(arr):
     l = arr[-1]
-    return arr[:-1], l
\ No newline at end of file
+    return arr[:-1], l
diff --git a/src/guppy/extractors/tdt_recording_extractor.py b/src/guppy/extractors/tdt_recording_extractor.py
index c0b01f9..1d46b1e 100644
--- a/src/guppy/extractors/tdt_recording_extractor.py
+++ b/src/guppy/extractors/tdt_recording_extractor.py
@@ -1,17 +1,19 @@
 import glob
 import logging
-import os
-import numpy as np
-from numpy import float32, float64, int32, int64, uint16
-import pandas as pd
 import multiprocessing as mp
+import os
 import time
 from itertools import repeat
 
+import numpy as np
+import pandas as pd
+from numpy import float32, float64, int32, int64, uint16
+
 from guppy.common_step3 import write_hdf5
 
 logger = logging.getLogger(__name__)
 
+
 # function to execute readtev function using multiprocessing to make it faster
 def execute_readtev(folder_path, events, outputPath, numProcesses=mp.cpu_count()):
     extractor = TdtRecordingExtractor(folder_path=folder_path)
@@ -20,11 +22,13 @@ def execute_readtev(folder_path, events, outputPath, numProcesses=mp.cpu_count()
         p.starmap(read_tdt_and_save_hdf5, zip(repeat(extractor), events, repeat(outputPath)))
     logger.info("Time taken = {0:.5f}".format(time.time() - start))
 
+
 def read_tdt_and_save_hdf5(extractor, event, outputPath):
     S = extractor.readtev(event=event)
     extractor.save_dict_to_hdf5(S=S, event=event, outputPath=outputPath)
     logger.info("Data for event {} fetched and stored.".format(event))
 
+
 class TdtRecordingExtractor:
 
     def __init__(self, folder_path):
@@ -94,8 +98,6 @@ def readtev(self, event):
             logger.error("\033[1m" + "TDT store name " + str(event) + " not found." + "\033[0m")
             raise ValueError("Requested store name not found.")
 
-
-
         allIndexesWhereEventIsPresent = np.where(row == 1)
         first_row = allIndexesWhereEventIsPresent[0][0]
 
@@ -142,11 +144,10 @@ def readtev(self, event):
         return S
 
     # check if a particular element is there in an array or not
-    def ismember(self, arr, element): # TODO: replace this function with more standard usage
+    def ismember(self, arr, element):  # TODO: replace this function with more standard usage
         res = [1 if i == element else 0 for i in arr]
         return np.asarray(res)
 
-
     # function to save data read from tev file to hdf5 file
     def save_dict_to_hdf5(self, S, event, outputPath):
         write_hdf5(S["storename"], event, outputPath, "storename")
@@ -157,16 +158,17 @@ def save_dict_to_hdf5(self, S, event, outputPath):
         write_hdf5(S["npoints"], event, outputPath, "npoints")
         write_hdf5(S["channels"], event, outputPath, "channels")
 
-
     # function to check event data (checking whether event timestamps belongs to same event or multiple events)
-    def check_data(self, S, event, outputPath): # TODO: fold this function into the main read/get function
+    def check_data(self, S, event, outputPath):  # TODO: fold this function into the main read/get function
         # logger.info("Checking event storename data for creating multiple event names from single event storename...")
         new_event = event.replace("\\", "")
         new_event = event.replace("/", "")
         diff = np.diff(S["data"])
         arr = np.full(diff.shape[0], 1)
 
-        storesList = np.genfromtxt(os.path.join(outputPath, "storesList.csv"), dtype="str", delimiter=",").reshape(2, -1)
+        storesList = np.genfromtxt(os.path.join(outputPath, "storesList.csv"), dtype="str", delimiter=",").reshape(
+            2, -1
+        )
 
         if diff.shape[0] == 0:
             return 0
@@ -174,7 +176,9 @@ def check_data(self, S, event, outputPath): # TODO: fold this function into the
         if S["sampling_rate"] == 0 and np.all(diff == diff[0]) == False:
             logger.info("\033[1m" + "Data in event {} belongs to multiple behavior".format(event) + "\033[0m")
             logger.debug(
-                "\033[1m" + "Create timestamp files for individual new event and change the stores list file." + "\033[0m"
+                "\033[1m"
+                + "Create timestamp files for individual new event and change the stores list file."
+                + "\033[0m"
             )
             i_d = np.unique(S["data"])
             for i in range(i_d.shape[0]):
@@ -199,4 +203,6 @@ def check_data(self, S, event, outputPath): # TODO: fold this function into the
                 pass
             else:
                 np.savetxt(os.path.join(outputPath, "storesList.csv"), storesList, delimiter=",", fmt="%s")
-            logger.info("\033[1m Timestamp files for individual new event are created and the stores list file is changed.\033[0m")
\ No newline at end of file
+            logger.info(
+                "\033[1m Timestamp files for individual new event are created and the stores list file is changed.\033[0m"
+            )
diff --git a/src/guppy/npm_step2.py b/src/guppy/npm_step2.py
index f0fafec..14b776f 100644
--- a/src/guppy/npm_step2.py
+++ b/src/guppy/npm_step2.py
@@ -12,6 +12,7 @@
 
 logger = logging.getLogger(__name__)
 
+
 def import_npm(filepath, num_ch, inputParameters=None):
 
     logger.debug("If it exists, importing NPM file based on the structure of file")
@@ -49,7 +50,9 @@ def import_npm(filepath, num_ch, inputParameters=None):
                 float(element)
             except:
                 check_all_str.append(i)
-        assert len(check_all_str) != len(df_arr), "This file appears to be doric .csv. This function only supports NPM .csv files."
+        assert len(check_all_str) != len(
+            df_arr
+        ), "This file appears to be doric .csv. This function only supports NPM .csv files."
         df = pd.read_csv(path[i], index_col=False)
         _, value = check_header(df)
 
@@ -174,9 +177,7 @@ def import_npm(filepath, num_ch, inputParameters=None):
         # path_sig = glob.glob(os.path.join(filepath, 'sig*'))
         path_chev_chod_chpr = [path_chev, path_chod, path_chpr]
         if (
-            ("data_np_v2" in flag_arr or "data_np" in flag_arr)
-            and ("event_np" in flag_arr)
-            and (i == len(path) - 1)
+            ("data_np_v2" in flag_arr or "data_np" in flag_arr) and ("event_np" in flag_arr) and (i == len(path) - 1)
         ) or (
             ("data_np_v2" in flag_arr or "data_np" in flag_arr) and (i == len(path) - 1)
         ):  # i==len(path)-1 and or 'event_np' in flag
@@ -234,6 +235,7 @@ def import_npm(filepath, num_ch, inputParameters=None):
     logger.info("Importing of NPM file is done.")
     return event_from_filename, flag_arr
 
+
 def check_header(df):
     arr = list(df.columns)
     check_float = []
@@ -294,6 +296,7 @@ def decide_indices(file, df, flag, num_ch=2):
 
     return df, indices_dict, num_ch
 
+
 # check flag consistency in neurophotometrics data
 def check_channels(state):
     state = state.astype(int)
@@ -405,4 +408,4 @@ def decide_ts_unit_for_npm(df, timestamp_column_name=None, time_unit=None, headl
     else:
         pass
 
-    return df, ts_unit
\ No newline at end of file
+    return df, ts_unit
diff --git a/src/guppy/readTevTsq.py b/src/guppy/readTevTsq.py
index 47b7962..b86f6a2 100755
--- a/src/guppy/readTevTsq.py
+++ b/src/guppy/readTevTsq.py
@@ -3,21 +3,13 @@
 import logging
 import multiprocessing as mp
 import os
-import re
 import sys
-import time
-import warnings
-from itertools import repeat
 
-import h5py
 import numpy as np
-import pandas as pd
-from numpy import float32, float64, int32, int64, uint16
 
-from guppy.common_step3 import write_hdf5
-from guppy.tdt_step3 import execute_readtev
-from guppy.doric_step3 import execute_import_doric
 from guppy.csv_step3 import execute_import_csv
+from guppy.doric_step3 import execute_import_doric
+from guppy.tdt_step3 import execute_readtev
 
 logger = logging.getLogger(__name__)
 
@@ -34,6 +26,7 @@ def writeToFile(value: str):
     with open(os.path.join(os.path.expanduser("~"), "pbSteps.txt"), "a") as file:
         file.write(value)
 
+
 # function to read data from 'tsq' and 'tev' files
 def readRawData(inputParameters):
 
diff --git a/src/guppy/savingInputParameters.py b/src/guppy/savingInputParameters.py
index b0a5feb..a1bd35e 100644
--- a/src/guppy/savingInputParameters.py
+++ b/src/guppy/savingInputParameters.py
@@ -554,7 +554,9 @@ def onclickpsth(event=None):
 
     psth_baseline_param = pn.Column(zscore_param_wd, psth_param_wd, baseline_param_wd, peak_param_wd)
 
-    widget = pn.Column(mark_down_1, files_1, explain_modality, modality_selector, pn.Row(individual_analysis_wd_2, psth_baseline_param))
+    widget = pn.Column(
+        mark_down_1, files_1, explain_modality, modality_selector, pn.Row(individual_analysis_wd_2, psth_baseline_param)
+    )
 
     # file_selector = pn.WidgetBox(files_1)
     styles = dict(background="WhiteSmoke")
diff --git a/src/guppy/tdt_step2.py b/src/guppy/tdt_step2.py
index 09456a7..130ace8 100644
--- a/src/guppy/tdt_step2.py
+++ b/src/guppy/tdt_step2.py
@@ -1,12 +1,14 @@
 import glob
 import logging
 import os
+
 import numpy as np
-from numpy import float32, float64, int32, int64, uint16
 import pandas as pd
+from numpy import float32, float64, int32, int64, uint16
 
 logger = logging.getLogger(__name__)
 
+
 # function to read 'tsq' file
 def readtsq(filepath):
     names = ("size", "type", "name", "chan", "sort_code", "timestamp", "fp_loc", "strobe", "format", "frequency")
@@ -23,4 +25,4 @@ def readtsq(filepath):
         path = path[0]
     tsq = np.fromfile(path, dtype=tsq_dtype)
     df = pd.DataFrame(tsq)
-    return df
\ No newline at end of file
+    return df
diff --git a/src/guppy/tdt_step3.py b/src/guppy/tdt_step3.py
index bc629f0..be92d4c 100644
--- a/src/guppy/tdt_step3.py
+++ b/src/guppy/tdt_step3.py
@@ -1,15 +1,10 @@
 import glob
-import json
 import logging
 import multiprocessing as mp
 import os
-import re
-import sys
 import time
-import warnings
 from itertools import repeat
 
-import h5py
 import numpy as np
 import pandas as pd
 from numpy import float32, float64, int32, int64, uint16
@@ -18,6 +13,7 @@
 
 logger = logging.getLogger(__name__)
 
+
 # function to read tsq file
 def readtsq(filepath):
     logger.debug("Trying to read tsq file.")
@@ -45,6 +41,7 @@ def readtsq(filepath):
     logger.info("Data from tsq file fetched.")
     return df, flag
 
+
 # function to execute readtev function using multiprocessing to make it faster
 def execute_readtev(filepath, event, outputPath, numProcesses=mp.cpu_count()):
     data, _ = readtsq(filepath)
@@ -94,8 +91,6 @@ def readtev(data, filepath, event, outputPath):
         logger.error("\033[1m" + "TDT store name " + str(event) + " not found." + "\033[0m")
         raise ValueError("Requested store name not found.")
 
-
-
     allIndexesWhereEventIsPresent = np.where(row == 1)
     first_row = allIndexesWhereEventIsPresent[0][0]
 
@@ -145,6 +140,7 @@ def readtev(data, filepath, event, outputPath):
 
     logger.info("Data for event {} fetched and stored.".format(event))
 
+
 # check if a particular element is there in an array or not
 def ismember(arr, element):
     res = [1 if i == element else 0 for i in arr]
@@ -208,4 +204,4 @@ def check_data(S, filepath, event, outputPath):
             + "Timestamp files for individual new event are created \
 	    			and the stores list file is changed."
             + "\033[0m"
-        )
\ No newline at end of file
+        )

From 1c8ee07e09d566578219e08e41bc87a54bb9854a Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Wed, 19 Nov 2025 11:33:31 -0800
Subject: [PATCH 022/150] Added CsvRecordingExtractor for step 3

---
 src/guppy/extractors/__init__.py              |  4 +-
 .../extractors/csv_recording_extractor.py     | 65 +++++++++++++++++++
 src/guppy/readTevTsq.py                       |  7 +-
 3 files changed, 71 insertions(+), 5 deletions(-)

diff --git a/src/guppy/extractors/__init__.py b/src/guppy/extractors/__init__.py
index 812622b..a421290 100644
--- a/src/guppy/extractors/__init__.py
+++ b/src/guppy/extractors/__init__.py
@@ -1,2 +1,2 @@
-from .tdt_recording_extractor import TdtRecordingExtractor
-from .csv_recording_extractor import CsvRecordingExtractor
+from .tdt_recording_extractor import TdtRecordingExtractor, execute_readtev
+from .csv_recording_extractor import CsvRecordingExtractor, execute_import_csv
diff --git a/src/guppy/extractors/csv_recording_extractor.py b/src/guppy/extractors/csv_recording_extractor.py
index f5a73e9..3df76f6 100644
--- a/src/guppy/extractors/csv_recording_extractor.py
+++ b/src/guppy/extractors/csv_recording_extractor.py
@@ -1,13 +1,34 @@
 import glob
 import logging
+import multiprocessing as mp
 import os
+import time
+from itertools import repeat
 
 import numpy as np
 import pandas as pd
 
+from guppy.common_step3 import write_hdf5
+
 logger = logging.getLogger(__name__)
 
 
+def execute_import_csv(filepath, events, outputPath, numProcesses=mp.cpu_count()):
+    logger.info("Reading data for event {} ...".format(events))
+
+    extractor = CsvRecordingExtractor(folder_path=filepath)
+    start = time.time()
+    with mp.Pool(numProcesses) as p:
+        p.starmap(read_csv_and_save_hdf5, zip(repeat(extractor), events, repeat(outputPath)))
+    logger.info("Time taken = {0:.5f}".format(time.time() - start))
+
+
+def read_csv_and_save_hdf5(extractor, event, outputPath):
+    df = extractor.read_csv(event=event)
+    extractor.save_to_hdf5(df=df, event=event, outputPath=outputPath)
+    logger.info("Data for event {} fetched and stored.".format(event))
+
+
 class CsvRecordingExtractor:
 
     def __init__(self, folder_path):
@@ -113,3 +134,47 @@ def check_header(self, df):
                 pass
 
         return arr, check_float
+
+    def read_csv(self, event):
+        logger.debug("\033[1m" + "Trying to read data for {} from csv file.".format(event) + "\033[0m")
+        if not os.path.exists(os.path.join(self.folder_path, event + ".csv")):
+            logger.error("\033[1m" + "No csv file found for event {}".format(event) + "\033[0m")
+            raise Exception("\033[1m" + "No csv file found for event {}".format(event) + "\033[0m")
+
+        df = pd.read_csv(os.path.join(self.folder_path, event + ".csv"), index_col=False)
+        return df
+
+    def save_to_hdf5(self, df, event, outputPath):
+        key = list(df.columns)
+
+        # TODO: clean up these if branches
+        if len(key) == 3:
+            arr1 = np.array(["timestamps", "data", "sampling_rate"])
+            arr2 = np.char.lower(np.array(key))
+            if (np.sort(arr1) == np.sort(arr2)).all() == False:
+                logger.error("\033[1m" + "Column names should be timestamps, data and sampling_rate" + "\033[0m")
+                raise Exception("\033[1m" + "Column names should be timestamps, data and sampling_rate" + "\033[0m")
+
+        if len(key) == 1:
+            if key[0].lower() != "timestamps":
+                logger.error("\033[1m" + "Column names should be timestamps, data and sampling_rate" + "\033[0m")
+                raise Exception("\033[1m" + "Column name should be timestamps" + "\033[0m")
+
+        if len(key) != 3 and len(key) != 1:
+            logger.error(
+                "\033[1m"
+                + "Number of columns in csv file should be either three or one. Three columns if \
+                            the file is for control or signal data or one column if the file is for event TTLs."
+                + "\033[0m"
+            )
+            raise Exception(
+                "\033[1m"
+                + "Number of columns in csv file should be either three or one. Three columns if \
+                            the file is for control or signal data or one column if the file is for event TTLs."
+                + "\033[0m"
+            )
+
+        for i in range(len(key)):
+            write_hdf5(df[key[i]].dropna(), event, outputPath, key[i].lower())
+
+        logger.info("\033[1m" + "Reading data for {} from csv file is completed.".format(event) + "\033[0m")
diff --git a/src/guppy/readTevTsq.py b/src/guppy/readTevTsq.py
index b86f6a2..c67f075 100755
--- a/src/guppy/readTevTsq.py
+++ b/src/guppy/readTevTsq.py
@@ -7,9 +7,8 @@
 
 import numpy as np
 
-from guppy.csv_step3 import execute_import_csv
 from guppy.doric_step3 import execute_import_doric
-from guppy.tdt_step3 import execute_readtev
+from guppy.extractors import execute_import_csv, execute_readtev
 
 logger = logging.getLogger(__name__)
 
@@ -74,8 +73,10 @@ def readRawData(inputParameters):
 
             elif modality == "doric":
                 execute_import_doric(filepath, storesList, modality, op)
-            elif modality == "csv" or modality == "npm":
+            elif modality == "csv":
                 execute_import_csv(filepath, np.unique(storesList[0, :]), op, numProcesses)
+            elif modality == "npm":
+                raise NotImplementedError("NPM modality is not yet implemented.")
             else:
                 raise ValueError("Modality not recognized. Please use 'tdt', 'csv', 'doric', or 'npm'.")
 

From 9262a5ad3cf21497a6a489183a7b093768cd15cb Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Wed, 19 Nov 2025 11:51:24 -0800
Subject: [PATCH 023/150] Added DoricRecordingExtractor for step 2

---
 src/guppy/extractors/__init__.py |  1 +
 src/guppy/saveStoresList.py      | 12 +++++++++---
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/src/guppy/extractors/__init__.py b/src/guppy/extractors/__init__.py
index a421290..ebb9fb0 100644
--- a/src/guppy/extractors/__init__.py
+++ b/src/guppy/extractors/__init__.py
@@ -1,2 +1,3 @@
 from .tdt_recording_extractor import TdtRecordingExtractor, execute_readtev
 from .csv_recording_extractor import CsvRecordingExtractor, execute_import_csv
+from .doric_recording_extractor import DoricRecordingExtractor
diff --git a/src/guppy/saveStoresList.py b/src/guppy/saveStoresList.py
index e64be8c..baec41e 100755
--- a/src/guppy/saveStoresList.py
+++ b/src/guppy/saveStoresList.py
@@ -17,8 +17,11 @@
 import pandas as pd
 import panel as pn
 
-from guppy.doric_step2 import import_doric
-from guppy.extractors import CsvRecordingExtractor, TdtRecordingExtractor
+from guppy.extractors import (
+    CsvRecordingExtractor,
+    DoricRecordingExtractor,
+    TdtRecordingExtractor,
+)
 from guppy.npm_step2 import import_npm
 
 # hv.extension()
@@ -594,7 +597,10 @@ def execute(inputParameters):
 
             elif modality == "doric":
                 data = 0
-                event_name, flag = import_doric(filepath)
+                extractor = DoricRecordingExtractor(folder_path=filepath)
+                event_name = extractor.events
+                flag = extractor.flags
+
             elif modality == "npm":
                 data = 0
                 event_name, flag = import_npm(filepath, num_ch)

From 9c5afced4ccabb31edc87eaafaa1b54df5d95eb9 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Wed, 19 Nov 2025 11:51:49 -0800
Subject: [PATCH 024/150] Added DoricRecordingExtractor for step 2

---
 .../extractors/doric_recording_extractor.py   | 94 +++++++++++++++++++
 1 file changed, 94 insertions(+)
 create mode 100644 src/guppy/extractors/doric_recording_extractor.py

diff --git a/src/guppy/extractors/doric_recording_extractor.py b/src/guppy/extractors/doric_recording_extractor.py
new file mode 100644
index 0000000..f45df50
--- /dev/null
+++ b/src/guppy/extractors/doric_recording_extractor.py
@@ -0,0 +1,94 @@
+import glob
+import logging
+import os
+
+import h5py
+import numpy as np
+import pandas as pd
+
+logger = logging.getLogger(__name__)
+
+
+class DoricRecordingExtractor:
+
+    def __init__(self, folder_path):
+        self.folder_path = folder_path
+        logger.debug("If it exists, importing Doric file based on the structure of file")
+        path = sorted(glob.glob(os.path.join(self.folder_path, "*.csv"))) + sorted(
+            glob.glob(os.path.join(self.folder_path, "*.doric"))
+        )
+
+        path = sorted(list(set(path)))
+        flag = "None"
+        event_from_filename = []
+        flag_arr = []
+        for i in range(len(path)):
+            ext = os.path.basename(path[i]).split(".")[-1]
+            if ext == "doric":
+                key_names = self.read_doric(path[i])
+                event_from_filename.extend(key_names)
+                flag = "doric_doric"
+            else:
+                df = pd.read_csv(path[i], header=None, nrows=2, index_col=False, dtype=str)
+                df = df.dropna(axis=1, how="all")
+                df_arr = np.array(df).flatten()
+                check_all_str = []
+                for element in df_arr:
+                    try:
+                        float(element)
+                    except:
+                        check_all_str.append(i)
+                assert len(check_all_str) == len(
+                    df_arr
+                ), "This file appears to be standard .csv. This function only supports doric .csv files."
+                df = pd.read_csv(path[i], header=1, index_col=False, nrows=10)
+                df = df.drop(["Time(s)"], axis=1)
+                event_from_filename.extend(list(df.columns))
+                flag = "doric_csv"
+                logger.info(flag)
+        logger.info("Importing of Doric file is done.")
+
+        self.events = event_from_filename
+        self.flags = flag_arr
+
+    def read_doric(self, filepath):
+        with h5py.File(filepath, "r") as f:
+            if "Traces" in list(f.keys()):
+                keys = self.access_keys_doricV1(f)
+            elif list(f.keys()) == ["Configurations", "DataAcquisition"]:
+                keys = self.access_keys_doricV6(f)
+
+        return keys
+
+    def access_keys_doricV6(self, doric_file):
+        data = [doric_file["DataAcquisition"]]
+        res = []
+        while len(data) != 0:
+            members = len(data)
+            while members != 0:
+                members -= 1
+                data, last_element = self.separate_last_element(data)
+                if isinstance(last_element, h5py.Dataset) and not last_element.name.endswith("/Time"):
+                    res.append(last_element.name)
+                elif isinstance(last_element, h5py.Group):
+                    data.extend(reversed([last_element[k] for k in last_element.keys()]))
+
+        keys = []
+        for element in res:
+            sep_values = element.split("/")
+            if sep_values[-1] == "Values":
+                keys.append(f"{sep_values[-3]}/{sep_values[-2]}")
+            else:
+                keys.append(f"{sep_values[-2]}/{sep_values[-1]}")
+
+        return keys
+
+    def access_keys_doricV1(self, doric_file):
+        keys = list(doric_file["Traces"]["Console"].keys())
+        keys.remove("Time(s)")
+
+        return keys
+
+    def separate_last_element(self, arr):
+        l = arr[-1]
+        return arr[:-1], l

From 914f23f36b7a4adc9a4edeb9af1a316c146e9586 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Wed, 19 Nov 2025 14:22:40 -0800
Subject: [PATCH 025/150] Added DoricRecordingExtractor for step 3

---
 src/guppy/extractors/__init__.py              |   2 +-
 .../extractors/doric_recording_extractor.py   | 152 ++++++++++++++++++
 2 files changed, 153 insertions(+), 1 deletion(-)

diff --git a/src/guppy/extractors/__init__.py b/src/guppy/extractors/__init__.py
index ebb9fb0..b3c2c3a 100644
--- a/src/guppy/extractors/__init__.py
+++ b/src/guppy/extractors/__init__.py
@@ -1,3 +1,3 @@
 from .tdt_recording_extractor import TdtRecordingExtractor, execute_readtev
 from .csv_recording_extractor import CsvRecordingExtractor, execute_import_csv
-from .doric_recording_extractor import DoricRecordingExtractor
+from .doric_recording_extractor import DoricRecordingExtractor, execute_import_doric
diff --git a/src/guppy/extractors/doric_recording_extractor.py b/src/guppy/extractors/doric_recording_extractor.py
index f45df50..cbade8b 100644
--- a/src/guppy/extractors/doric_recording_extractor.py
+++ b/src/guppy/extractors/doric_recording_extractor.py
@@ -1,14 +1,31 @@
 import glob
 import logging
 import os
+import re
+import warnings
 
 import h5py
 import numpy as np
 import pandas as pd
 
+from guppy.common_step3 import write_hdf5
+
 logger = logging.getLogger(__name__)
 
 
+def execute_import_doric(folder_path, storesList, flag, outputPath):
+    extractor = DoricRecordingExtractor(folder_path=folder_path)
+    flag = extractor.check_doric(folder_path)
+
+    if flag == "doric_csv":
+        extractor.read_doric_csv(folder_path, storesList, outputPath)
+    elif flag == "doric_doric":
+        extractor.read_doric_doric(folder_path, storesList, outputPath)
+    else:
+        logger.error("Doric file not found or not recognized.")
+        raise FileNotFoundError("Doric file not found or not recognized.")
+
+
 class DoricRecordingExtractor:
 
     def __init__(self, folder_path):
@@ -92,3 +109,138 @@ def access_keys_doricV1(self, doric_file):
     def separate_last_element(self, arr):
         l = arr[-1]
         return arr[:-1], l
+
+    def check_doric(self, filepath):
+        logger.debug("Checking if doric file exists")
+        path = glob.glob(os.path.join(filepath, "*.csv")) + glob.glob(os.path.join(filepath, "*.doric"))
+
+        flag_arr = []
+        for i in range(len(path)):
+            ext = os.path.basename(path[i]).split(".")[-1]
+            if ext == "csv":
+                with warnings.catch_warnings():
+                    warnings.simplefilter("error")
+                    try:
+                        df = pd.read_csv(path[i], index_col=False, dtype=float)
+                    except:  # TODO: fix this bare try-except
+                        df = pd.read_csv(path[i], header=1, index_col=False, nrows=10)
+                        flag = "doric_csv"
+                        flag_arr.append(flag)
+            elif ext == "doric":
+                flag = "doric_doric"
+                flag_arr.append(flag)
+            else:
+                pass
+
+        if len(flag_arr) > 1:
+            logger.error("Two doric files are present at the same location")
+            raise Exception("Two doric files are present at the same location")
+        if len(flag_arr) == 0:
+            logger.error("\033[1m" + "Doric file not found." + "\033[1m")
+            return 0
+        logger.info("Doric file found.")
+        return flag_arr[0]
+
+    def read_doric_csv(self, filepath, storesList, outputPath):
+        path = glob.glob(os.path.join(filepath, "*.csv"))
+        if len(path) > 1:
+            logger.error("An error occurred : More than one Doric csv file present at the location")
+            raise Exception("More than one Doric csv file present at the location")
+        else:
+            df = pd.read_csv(path[0], header=1, index_col=False)
+            df = df.dropna(axis=1, how="all")
+            df = df.dropna(axis=0, how="any")
+            df["Time(s)"] = df["Time(s)"] - df["Time(s)"].to_numpy()[0]
+            for i in range(storesList.shape[1]):
+                if "control" in storesList[1, i] or "signal" in storesList[1, i]:
+                    timestamps = np.array(df["Time(s)"])
+                    sampling_rate = np.array([1 / (timestamps[-1] - timestamps[-2])])
+                    write_hdf5(sampling_rate, storesList[0, i], outputPath, "sampling_rate")
+                    write_hdf5(df["Time(s)"].to_numpy(), storesList[0, i], outputPath, "timestamps")
+                    write_hdf5(df[storesList[0, i]].to_numpy(), storesList[0, i], outputPath, "data")
+                else:
+                    ttl = df[storesList[0, i]]
+                    indices = np.where(ttl <= 0)[0]
+                    diff_indices = np.where(np.diff(indices) > 1)[0]
+                    write_hdf5(
+                        df["Time(s)"][indices[diff_indices] + 1].to_numpy(), storesList[0, i], outputPath, "timestamps"
+                    )
+
+    def read_doric_doric(self, filepath, storesList, outputPath):
+        path = glob.glob(os.path.join(filepath, "*.doric"))
+        if len(path) > 1:
+            logger.error("An error occurred : More than one Doric file present at the location")
+            raise Exception("More than one Doric file present at the location")
+        else:
+            with h5py.File(path[0], "r") as f:
+                if "Traces" in list(f.keys()):
+                    keys = self.access_data_doricV1(f, storesList, outputPath)
+                elif list(f.keys()) == ["Configurations", "DataAcquisition"]:
+                    keys = self.access_data_doricV6(f, storesList, outputPath)
+
+    def access_data_doricV6(self, doric_file, storesList, outputPath):
+        data = [doric_file["DataAcquisition"]]
+        res = []
+        while len(data) != 0:
+            members = len(data)
+            while members != 0:
+                members -= 1
+                data, last_element = self.separate_last_element(data)
+                if isinstance(last_element, h5py.Dataset) and not last_element.name.endswith("/Time"):
+                    res.append(last_element.name)
+                elif isinstance(last_element, h5py.Group):
+                    data.extend(reversed([last_element[k] for k in last_element.keys()]))
+
+        decide_path = []
+        for element in res:
+            sep_values = element.split("/")
+            if sep_values[-1] == "Values":
+                if f"{sep_values[-3]}/{sep_values[-2]}" in storesList[0, :]:
+                    decide_path.append(element)
+            else:
+                if f"{sep_values[-2]}/{sep_values[-1]}" in storesList[0, :]:
+                    decide_path.append(element)
+
+        for i in range(storesList.shape[1]):
+            if "control" in storesList[1, i] or "signal" in storesList[1, i]:
+                regex = re.compile("(.*?)" + str(storesList[0, i]) + "(.*?)")
+                idx = [i for i in range(len(decide_path)) if regex.match(decide_path[i])]
+                if len(idx) > 1:
+                    logger.error("More than one string matched (which should not be the case)")
+                    raise Exception("More than one string matched (which should not be the case)")
+                idx = idx[0]
+                data = np.array(doric_file[decide_path[idx]])
+                timestamps = np.array(doric_file[decide_path[idx].rsplit("/", 1)[0] + "/Time"])
+                sampling_rate = np.array([1 / (timestamps[-1] - timestamps[-2])])
+                write_hdf5(sampling_rate, storesList[0, i], outputPath, "sampling_rate")
+                write_hdf5(timestamps, storesList[0, i], outputPath, "timestamps")
+                write_hdf5(data, storesList[0, i], outputPath, "data")
+            else:
+                regex = re.compile("(.*?)" + storesList[0, i] + "$")
+                idx = [i for i in range(len(decide_path)) if regex.match(decide_path[i])]
+                if len(idx) > 1:
+                    logger.error("More than one string matched (which should not be the case)")
+                    raise Exception("More than one string matched (which should not be the case)")
+                idx = idx[0]
+                ttl = np.array(doric_file[decide_path[idx]])
+                timestamps = np.array(doric_file[decide_path[idx].rsplit("/", 1)[0] + "/Time"])
+                indices = np.where(ttl <= 0)[0]
+                diff_indices = np.where(np.diff(indices) > 1)[0]
+                write_hdf5(timestamps[indices[diff_indices] + 1], storesList[0, i], outputPath, "timestamps")
+
+    def access_data_doricV1(self, doric_file, storesList, outputPath):
+        keys = list(doric_file["Traces"]["Console"].keys())
+        for i in range(storesList.shape[1]):
+            if "control" in storesList[1, i] or "signal" in storesList[1, i]:
+                timestamps = np.array(doric_file["Traces"]["Console"]["Time(s)"]["Console_time(s)"])
+                sampling_rate = np.array([1 / (timestamps[-1] - timestamps[-2])])
+                data = np.array(doric_file["Traces"]["Console"][storesList[0, i]][storesList[0, i]])
+                write_hdf5(sampling_rate, storesList[0, i], outputPath, "sampling_rate")
+                write_hdf5(timestamps, storesList[0, i], outputPath, "timestamps")
+                write_hdf5(data, storesList[0, i], outputPath, "data")
+            else:
+                timestamps = np.array(doric_file["Traces"]["Console"]["Time(s)"]["Console_time(s)"])
+                ttl = np.array(doric_file["Traces"]["Console"][storesList[0, i]][storesList[0, i]])
+                indices = np.where(ttl <= 0)[0]
+                diff_indices = np.where(np.diff(indices) > 1)[0]
+                write_hdf5(timestamps[indices[diff_indices] + 1], storesList[0, i], outputPath, "timestamps")

From cd966ae4acf2c07bcae716ed69b403758d7e819f Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Wed, 19 Nov 2025 14:34:52 -0800
Subject: [PATCH 026/150] streamlined inputs

---
 src/guppy/extractors/doric_recording_extractor.py | 2 +-
 src/guppy/readTevTsq.py                           | 7 +++----
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/src/guppy/extractors/doric_recording_extractor.py b/src/guppy/extractors/doric_recording_extractor.py
index cbade8b..e5a97cb 100644
--- a/src/guppy/extractors/doric_recording_extractor.py
+++ b/src/guppy/extractors/doric_recording_extractor.py
@@ -13,7 +13,7 @@
 logger = logging.getLogger(__name__)
 
 
-def execute_import_doric(folder_path, storesList, flag, outputPath):
+def execute_import_doric(folder_path, storesList, outputPath):
     extractor = DoricRecordingExtractor(folder_path=folder_path)
     flag = extractor.check_doric(folder_path)
 
diff --git a/src/guppy/readTevTsq.py b/src/guppy/readTevTsq.py
index c67f075..c5c52da 100755
--- a/src/guppy/readTevTsq.py
+++ b/src/guppy/readTevTsq.py
@@ -67,14 +67,13 @@ def readRawData(inputParameters):
                     2, -1
                 )
 
+            events = np.unique(storesList[0, :])
             if modality == "tdt":
-                events = np.unique(storesList[0, :])
                 execute_readtev(filepath, events, op, numProcesses)
-
             elif modality == "doric":
-                execute_import_doric(filepath, storesList, modality, op)
+                execute_import_doric(filepath, storesList, op)
             elif modality == "csv":
-                execute_import_csv(filepath, np.unique(storesList[0, :]), op, numProcesses)
+                execute_import_csv(filepath, events, op, numProcesses)
             elif modality == "npm":
                 raise NotImplementedError("NPM modality is not yet implemented.")
             else:

From ac158de53025dbe370238a0080c71f1dbf9fb9d1 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Wed, 19 Nov 2025 15:14:44 -0800
Subject: [PATCH 027/150] Added NpmRecordingExtractor for step 2

---
 src/guppy/extractors/__init__.py              |   1 +
 .../extractors/npm_recording_extractor.py     | 429 ++++++++++++++++++
 src/guppy/saveStoresList.py                   |   6 +-
 3 files changed, 434 insertions(+), 2 deletions(-)
 create mode 100644 src/guppy/extractors/npm_recording_extractor.py

diff --git a/src/guppy/extractors/__init__.py b/src/guppy/extractors/__init__.py
index b3c2c3a..b876012 100644
--- a/src/guppy/extractors/__init__.py
+++ b/src/guppy/extractors/__init__.py
@@ -1,3 +1,4 @@
 from .tdt_recording_extractor import TdtRecordingExtractor, execute_readtev
 from .csv_recording_extractor import CsvRecordingExtractor, execute_import_csv
 from .doric_recording_extractor import DoricRecordingExtractor, execute_import_doric
+from .npm_recording_extractor import NpmRecordingExtractor, execute_import_npm
diff --git a/src/guppy/extractors/npm_recording_extractor.py b/src/guppy/extractors/npm_recording_extractor.py
new file mode 100644
index 0000000..c15987f
--- /dev/null
+++ b/src/guppy/extractors/npm_recording_extractor.py
@@ -0,0 +1,429 @@
+import glob
+import logging
+import os
+import tkinter as tk
+from tkinter import StringVar, messagebox, ttk
+
+import numpy as np
+import pandas as pd
+import panel as pn
+
+pn.extension()
+
+logger = logging.getLogger(__name__)
+
+
+def execute_import_npm():
+    raise NotImplementedError("This function is a placeholder for execute_import_npm functionality.")
+
+
+class NpmRecordingExtractor:
+
+    def __init__(self, folder_path, num_ch, inputParameters=None):
+        self.folder_path = folder_path
+        self.num_ch = num_ch
+        self.inputParameters = inputParameters
+        self.events, self.flags = self.import_npm(
+            folder_path=folder_path, num_ch=num_ch, inputParameters=inputParameters
+        )
+
+    def import_npm(self, folder_path, num_ch, inputParameters=None):
+
+        logger.debug("If it exists, importing NPM file based on the structure of file")
+        # Headless configuration (used to avoid any UI prompts when running tests)
+        headless = bool(os.environ.get("GUPPY_BASE_DIR"))
+        npm_timestamp_column_name = None
+        npm_time_unit = None
+        npm_split_events = None
+        if isinstance(inputParameters, dict):
+            npm_timestamp_column_name = inputParameters.get("npm_timestamp_column_name")
+            npm_time_unit = inputParameters.get("npm_time_unit", "seconds")
+            npm_split_events = inputParameters.get("npm_split_events", True)
+        path = sorted(glob.glob(os.path.join(folder_path, "*.csv"))) + sorted(
+            glob.glob(os.path.join(folder_path, "*.doric"))
+        )
+        path_chev = glob.glob(os.path.join(folder_path, "*chev*"))
+        path_chod = glob.glob(os.path.join(folder_path, "*chod*"))
+        path_chpr = glob.glob(os.path.join(folder_path, "*chpr*"))
+        path_event = glob.glob(os.path.join(folder_path, "event*"))
+        # path_sig = glob.glob(os.path.join(filepath, 'sig*')) # TODO: what is this for?
+        path_chev_chod_event = path_chev + path_chod + path_event + path_chpr
+
+        path = sorted(list(set(path) - set(path_chev_chod_event)))
+        flag = "None"
+        event_from_filename = []
+        flag_arr = []
+        for i in range(len(path)):
+            dirname = os.path.dirname(path[i])
+            ext = os.path.basename(path[i]).split(".")[-1]
+            assert ext != "doric", "Doric files are not supported by import_npm function."
+            df = pd.read_csv(path[i], header=None, nrows=2, index_col=False, dtype=str)
+            df = df.dropna(axis=1, how="all")
+            df_arr = np.array(df).flatten()
+            check_all_str = []
+            for element in df_arr:
+                try:
+                    float(element)
+                except:
+                    check_all_str.append(i)
+            assert len(check_all_str) != len(
+                df_arr
+            ), "This file appears to be doric .csv. This function only supports NPM .csv files."
+            df = pd.read_csv(path[i], index_col=False)
+            _, value = self.check_header(df)
+
+            # check dataframe structure and read data accordingly
+            if len(value) > 0:
+                columns_isstr = False
+                df = pd.read_csv(path[i], header=None)
+                cols = np.array(list(df.columns), dtype=str)
+            else:
+                df = df
+                columns_isstr = True
+                cols = np.array(list(df.columns), dtype=str)
+            # check the structure of dataframe and assign flag to the type of file
+            assert len(cols) != 1, "File appears to be event .csv. This function only supports NPM .csv files."
+            assert len(cols) != 3, "File appears to be data .csv. This function only supports NPM .csv files."
+            if len(cols) == 2:
+                flag = "event_or_data_np"
+            elif len(cols) >= 2:
+                flag = "data_np"
+            else:
+                logger.error("Number of columns in csv file does not make sense.")
+                raise Exception("Number of columns in csv file does not make sense.")
+
+            if columns_isstr == True and (
+                "flags" in np.char.lower(np.array(cols)) or "ledstate" in np.char.lower(np.array(cols))
+            ):
+                flag = flag + "_v2"
+            else:
+                flag = flag
+
+            # used assigned flags to process the files and read the data
+            if flag == "event_or_data_np":
+                arr = list(df.iloc[:, 1])
+                check_float = [True for i in arr if isinstance(i, float)]
+                if len(arr) == len(check_float) and columns_isstr == False:
+                    flag = "data_np"
+                elif columns_isstr == True and ("value" in np.char.lower(np.array(cols))):
+                    flag = "event_np"
+                else:
+                    flag = "event_np"
+
+            flag_arr.append(flag)
+            logger.info(flag)
+            if flag == "data_np":
+                file = f"file{str(i)}_"
+                df, indices_dict, _ = self.decide_indices(file, df, flag, num_ch)
+                keys = list(indices_dict.keys())
+                for k in range(len(keys)):
+                    for j in range(df.shape[1]):
+                        if j == 0:
+                            timestamps = df.iloc[:, j][indices_dict[keys[k]]]
+                            # timestamps_odd = df.iloc[:,j][odd_indices]
+                        else:
+                            d = dict()
+                            d["timestamps"] = timestamps
+                            d["data"] = df.iloc[:, j][indices_dict[keys[k]]]
+
+                            df_ch = pd.DataFrame(d)
+                            df_ch.to_csv(os.path.join(dirname, keys[k] + str(j) + ".csv"), index=False)
+                            event_from_filename.append(keys[k] + str(j))
+
+            elif flag == "event_np":
+                type_val = np.array(df.iloc[:, 1])
+                type_val_unique = np.unique(type_val)
+                if headless:
+                    response = 1 if bool(npm_split_events) else 0
+                else:
+                    window = tk.Tk()
+                    if len(type_val_unique) > 1:
+                        response = messagebox.askyesno(
+                            "Multiple event TTLs",
+                            "Based on the TTL file,\
+                                                                            it looks like TTLs \
+                                                                            belongs to multiple behavior type. \
+                                                                            Do you want to create multiple files for each \
+                                                                            behavior type ?",
+                        )
+                    else:
+                        response = 0
+                    window.destroy()
+                if response == 1:
+                    timestamps = np.array(df.iloc[:, 0])
+                    for j in range(len(type_val_unique)):
+                        idx = np.where(type_val == type_val_unique[j])
+                        d = dict()
+                        d["timestamps"] = timestamps[idx]
+                        df_new = pd.DataFrame(d)
+                        df_new.to_csv(os.path.join(dirname, "event" + str(type_val_unique[j]) + ".csv"), index=False)
+                        event_from_filename.append("event" + str(type_val_unique[j]))
+                else:
+                    timestamps = np.array(df.iloc[:, 0])
+                    d = dict()
+                    d["timestamps"] = timestamps
+                    df_new = pd.DataFrame(d)
+                    df_new.to_csv(os.path.join(dirname, "event" + str(0) + ".csv"), index=False)
+                    event_from_filename.append("event" + str(0))
+            else:
+                file = f"file{str(i)}_"
+                df, ts_unit = self.decide_ts_unit_for_npm(
+                    df, timestamp_column_name=npm_timestamp_column_name, time_unit=npm_time_unit, headless=headless
+                )
+                df, indices_dict, _ = self.decide_indices(file, df, flag)
+                keys = list(indices_dict.keys())
+                for k in range(len(keys)):
+                    for j in range(df.shape[1]):
+                        if j == 0:
+                            timestamps = df.iloc[:, j][indices_dict[keys[k]]]
+                            # timestamps_odd = df.iloc[:,j][odd_indices]
+                        else:
+                            d = dict()
+                            d["timestamps"] = timestamps
+                            d["data"] = df.iloc[:, j][indices_dict[keys[k]]]
+
+                            df_ch = pd.DataFrame(d)
+                            df_ch.to_csv(os.path.join(dirname, keys[k] + str(j) + ".csv"), index=False)
+                            event_from_filename.append(keys[k] + str(j))
+
+            path_chev = glob.glob(os.path.join(folder_path, "*chev*"))
+            path_chod = glob.glob(os.path.join(folder_path, "*chod*"))
+            path_chpr = glob.glob(os.path.join(folder_path, "*chpr*"))
+            path_event = glob.glob(os.path.join(folder_path, "event*"))
+            # path_sig = glob.glob(os.path.join(filepath, 'sig*'))
+            path_chev_chod_chpr = [path_chev, path_chod, path_chpr]
+            if (
+                ("data_np_v2" in flag_arr or "data_np" in flag_arr)
+                and ("event_np" in flag_arr)
+                and (i == len(path) - 1)
+            ) or (
+                ("data_np_v2" in flag_arr or "data_np" in flag_arr) and (i == len(path) - 1)
+            ):  # i==len(path)-1 and or 'event_np' in flag
+                num_path_chev, num_path_chod, num_path_chpr = len(path_chev), len(path_chod), len(path_chpr)
+                arr_len, no_ch = [], []
+                for i in range(len(path_chev_chod_chpr)):
+                    if len(path_chev_chod_chpr[i]) > 0:
+                        arr_len.append(len(path_chev_chod_chpr[i]))
+                    else:
+                        continue
+
+                unique_arr_len = np.unique(np.array(arr_len))
+                if "data_np_v2" in flag_arr:
+                    if ts_unit == "seconds":
+                        divisor = 1
+                    elif ts_unit == "milliseconds":
+                        divisor = 1e3
+                    else:
+                        divisor = 1e6
+                else:
+                    divisor = 1000
+
+                for j in range(len(path_event)):
+                    df_event = pd.read_csv(path_event[j])
+                    df_chev = pd.read_csv(path_chev[0])
+                    df_event["timestamps"] = (df_event["timestamps"] - df_chev["timestamps"][0]) / divisor
+                    df_event.to_csv(path_event[j], index=False)
+                if unique_arr_len.shape[0] == 1:
+                    for j in range(len(path_chev)):
+                        if file + "chev" in indices_dict.keys():
+                            df_chev = pd.read_csv(path_chev[j])
+                            df_chev["timestamps"] = (df_chev["timestamps"] - df_chev["timestamps"][0]) / divisor
+                            df_chev["sampling_rate"] = np.full(df_chev.shape[0], np.nan)
+                            df_chev.at[0, "sampling_rate"] = df_chev.shape[0] / (
+                                df_chev["timestamps"].iloc[-1] - df_chev["timestamps"].iloc[0]
+                            )
+                            df_chev.to_csv(path_chev[j], index=False)
+
+                        if file + "chod" in indices_dict.keys():
+                            df_chod = pd.read_csv(path_chod[j])
+                            df_chod["timestamps"] = df_chev["timestamps"]
+                            df_chod["sampling_rate"] = np.full(df_chod.shape[0], np.nan)
+                            df_chod.at[0, "sampling_rate"] = df_chev["sampling_rate"][0]
+                            df_chod.to_csv(path_chod[j], index=False)
+
+                        if file + "chpr" in indices_dict.keys():
+                            df_chpr = pd.read_csv(path_chpr[j])
+                            df_chpr["timestamps"] = df_chev["timestamps"]
+                            df_chpr["sampling_rate"] = np.full(df_chpr.shape[0], np.nan)
+                            df_chpr.at[0, "sampling_rate"] = df_chev["sampling_rate"][0]
+                            df_chpr.to_csv(path_chpr[j], index=False)
+                else:
+                    logger.error("Number of channels should be same for all regions.")
+                    raise Exception("Number of channels should be same for all regions.")
+        logger.info("Importing of NPM file is done.")
+        return event_from_filename, flag_arr
+
+    def check_header(self, df):
+        arr = list(df.columns)
+        check_float = []
+        for i in arr:
+            try:
+                check_float.append(float(i))
+            except:
+                pass
+
+        return arr, check_float
+
+    # function to decide indices of interleaved channels
+    # in neurophotometrics data
+    def decide_indices(self, file, df, flag, num_ch=2):
+        ch_name = [file + "chev", file + "chod", file + "chpr"]
+        if len(ch_name) < num_ch:
+            logger.error(
+                "Number of channels parameters in Input Parameters GUI is more than 3. \
+                        Looks like there are more than 3 channels in the file. Reading of these files\
+                        are not supported. Reach out to us if you get this error message."
+            )
+            raise Exception(
+                "Number of channels parameters in Input Parameters GUI is more than 3. \
+                            Looks like there are more than 3 channels in the file. Reading of these files\
+                            are not supported. Reach out to us if you get this error message."
+            )
+        if flag == "data_np":
+            indices_dict = dict()
+            for i in range(num_ch):
+                indices_dict[ch_name[i]] = np.arange(i, df.shape[0], num_ch)
+
+        else:
+            cols = np.array(list(df.columns))
+            if "flags" in np.char.lower(np.array(cols)):
+                arr = ["FrameCounter", "Flags"]
+                state = np.array(df["Flags"])
+            elif "ledstate" in np.char.lower(np.array(cols)):
+                arr = ["FrameCounter", "LedState"]
+                state = np.array(df["LedState"])
+            else:
+                logger.error(
+                    "File type shows Neurophotometrics newer version \
+                        data but column names does not have Flags or LedState"
+                )
+                raise Exception(
+                    "File type shows Neurophotometrics newer version \
+                                data but column names does not have Flags or LedState"
+                )
+
+            num_ch, ch = self.check_channels(state)
+            indices_dict = dict()
+            for i in range(num_ch):
+                first_occurrence = np.where(state == ch[i])[0]
+                indices_dict[ch_name[i]] = np.arange(first_occurrence[0], df.shape[0], num_ch)
+
+            df = df.drop(arr, axis=1)
+
+        return df, indices_dict, num_ch
+
+    # check flag consistency in neurophotometrics data
+    def check_channels(self, state):
+        state = state.astype(int)
+        unique_state = np.unique(state[2:12])
+        if unique_state.shape[0] > 3:
+            logger.error(
+                "Looks like there are more than 3 channels in the file. Reading of these files\
+                            are not supported. Reach out to us if you get this error message."
+            )
+            raise Exception(
+                "Looks like there are more than 3 channels in the file. Reading of these files\
+                            are not supported. Reach out to us if you get this error message."
+            )
+
+        return unique_state.shape[0], unique_state
+
+    # function to decide NPM timestamps unit (seconds, ms or us)
+    def decide_ts_unit_for_npm(self, df, timestamp_column_name=None, time_unit=None, headless=False):
+        col_names = np.array(list(df.columns))
+        col_names_ts = [""]
+        for name in col_names:
+            if "timestamp" in name.lower():
+                col_names_ts.append(name)
+
+        ts_unit = "seconds"
+        if len(col_names_ts) > 2:
+            # Headless path: auto-select column/unit without any UI
+            if headless:
+                if timestamp_column_name is not None:
+                    assert (
+                        timestamp_column_name in col_names_ts
+                    ), f"Provided timestamp_column_name '{timestamp_column_name}' not found in columns {col_names_ts[1:]}"
+                    chosen = timestamp_column_name
+                else:
+                    chosen = col_names_ts[1]
+                df.insert(1, "Timestamp", df[chosen])
+                df = df.drop(col_names_ts[1:], axis=1)
+                valid_units = {"seconds", "milliseconds", "microseconds"}
+                ts_unit = time_unit if (isinstance(time_unit, str) and time_unit in valid_units) else "seconds"
+                return df, ts_unit
+            # def comboBoxSelected(event):
+            #    logger.info(event.widget.get())
+
+            window = tk.Tk()
+            window.title("Select appropriate options for timestamps")
+            window.geometry("500x200")
+            holdComboboxValues = dict()
+
+            timestamps_label = ttk.Label(window, text="Select which timestamps to use : ").grid(
+                row=0, column=1, pady=25, padx=25
+            )
+            holdComboboxValues["timestamps"] = StringVar()
+            timestamps_combo = ttk.Combobox(window, values=col_names_ts, textvariable=holdComboboxValues["timestamps"])
+            timestamps_combo.grid(row=0, column=2, pady=25, padx=25)
+            timestamps_combo.current(0)
+            # timestamps_combo.bind("<<ComboboxSelected>>", comboBoxSelected)
+
+            time_unit_label = ttk.Label(window, text="Select timestamps unit : ").grid(
+                row=1, column=1, pady=25, padx=25
+            )
+            holdComboboxValues["time_unit"] = StringVar()
+            time_unit_combo = ttk.Combobox(
+                window,
+                values=["", "seconds", "milliseconds", "microseconds"],
+                textvariable=holdComboboxValues["time_unit"],
+            )
+            time_unit_combo.grid(row=1, column=2, pady=25, padx=25)
+            time_unit_combo.current(0)
+            # time_unit_combo.bind("<<ComboboxSelected>>", comboBoxSelected)
+            window.lift()
+            window.after(500, lambda: window.lift())
+            window.mainloop()
+
+            if holdComboboxValues["timestamps"].get():
+                df.insert(1, "Timestamp", df[holdComboboxValues["timestamps"].get()])
+                df = df.drop(col_names_ts[1:], axis=1)
+            else:
+                messagebox.showerror(
+                    "All options not selected",
+                    "All the options for timestamps \
+                                                                were not selected. Please select appropriate options",
+                )
+                logger.error(
+                    "All the options for timestamps \
+                            were not selected. Please select appropriate options"
+                )
+                raise Exception(
+                    "All the options for timestamps \
+                                were not selected. Please select appropriate options"
+                )
+            if holdComboboxValues["time_unit"].get():
+                if holdComboboxValues["time_unit"].get() == "seconds":
+                    ts_unit = holdComboboxValues["time_unit"].get()
+                elif holdComboboxValues["time_unit"].get() == "milliseconds":
+                    ts_unit = holdComboboxValues["time_unit"].get()
+                else:
+                    ts_unit = holdComboboxValues["time_unit"].get()
+            else:
+                messagebox.showerror(
+                    "All options not selected",
+                    "All the options for timestamps \
+                                                                were not selected. Please select appropriate options",
+                )
+                logger.error(
+                    "All the options for timestamps \
+                            were not selected. Please select appropriate options"
+                )
+                raise Exception(
+                    "All the options for timestamps \
+                                were not selected. Please select appropriate options"
+                )
+        else:
+            pass
+
+        return df, ts_unit
diff --git a/src/guppy/saveStoresList.py b/src/guppy/saveStoresList.py
index baec41e..daf7457 100755
--- a/src/guppy/saveStoresList.py
+++ b/src/guppy/saveStoresList.py
@@ -20,9 +20,9 @@
 from guppy.extractors import (
     CsvRecordingExtractor,
     DoricRecordingExtractor,
+    NpmRecordingExtractor,
     TdtRecordingExtractor,
 )
-from guppy.npm_step2 import import_npm
 
 # hv.extension()
 pn.extension()
@@ -603,7 +603,9 @@ def execute(inputParameters):
 
             elif modality == "npm":
                 data = 0
-                event_name, flag = import_npm(filepath, num_ch)
+                extractor = NpmRecordingExtractor(folder_path=filepath, num_ch=num_ch, inputParameters=inputParameters)
+                event_name = extractor.events
+                flag = extractor.flags
             else:
                 raise ValueError("Modality not recognized. Please use 'tdt', 'csv', 'doric', or 'npm'.")
 

From 6a470a1a9d11c8e1abd6a32de7eaf7390cad1472 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Wed, 19 Nov 2025 16:48:57 -0800
Subject: [PATCH 028/150] Added NpmRecordingExtractor for step 3

---
 .../extractors/npm_recording_extractor.py     | 65 ++++++++++++++++++-
 src/guppy/readTevTsq.py                       | 11 +++-
 2 files changed, 71 insertions(+), 5 deletions(-)

diff --git a/src/guppy/extractors/npm_recording_extractor.py b/src/guppy/extractors/npm_recording_extractor.py
index c15987f..a8cfd98 100644
--- a/src/guppy/extractors/npm_recording_extractor.py
+++ b/src/guppy/extractors/npm_recording_extractor.py
@@ -1,20 +1,37 @@
 import glob
 import logging
+import multiprocessing as mp
 import os
+import time
 import tkinter as tk
+from itertools import repeat
 from tkinter import StringVar, messagebox, ttk
 
 import numpy as np
 import pandas as pd
 import panel as pn
 
+from guppy.common_step3 import write_hdf5
+
 pn.extension()
 
 logger = logging.getLogger(__name__)
 
 
-def execute_import_npm():
-    raise NotImplementedError("This function is a placeholder for execute_import_npm functionality.")
+def execute_import_npm(folder_path, num_ch, inputParameters, events, outputPath, numProcesses=mp.cpu_count()):
+    logger.info("Reading data for event {} ...".format(events))
+
+    extractor = NpmRecordingExtractor(folder_path=folder_path, num_ch=num_ch, inputParameters=inputParameters)
+    start = time.time()
+    with mp.Pool(numProcesses) as p:
+        p.starmap(read_npm_and_save_hdf5, zip(repeat(extractor), events, repeat(outputPath)))
+    logger.info("Time taken = {0:.5f}".format(time.time() - start))
+
+
+def read_npm_and_save_hdf5(extractor, event, outputPath):
+    df = extractor.read_npm(event=event)
+    extractor.save_to_hdf5(df=df, event=event, outputPath=outputPath)
+    logger.info("Data for event {} fetched and stored.".format(event))
 
 
 class NpmRecordingExtractor:
@@ -427,3 +444,47 @@ def decide_ts_unit_for_npm(self, df, timestamp_column_name=None, time_unit=None,
             pass
 
         return df, ts_unit
+
+    def read_npm(self, event):
+        logger.debug("\033[1m" + "Trying to read data for {} from csv file.".format(event) + "\033[0m")
+        if not os.path.exists(os.path.join(self.folder_path, event + ".csv")):
+            logger.error("\033[1m" + "No csv file found for event {}".format(event) + "\033[0m")
+            raise Exception("\033[1m" + "No csv file found for event {}".format(event) + "\033[0m")
+
+        df = pd.read_csv(os.path.join(self.folder_path, event + ".csv"), index_col=False)
+        return df
+
+    def save_to_hdf5(self, df, event, outputPath):
+        key = list(df.columns)
+
+        # TODO: clean up these if branches
+        if len(key) == 3:
+            arr1 = np.array(["timestamps", "data", "sampling_rate"])
+            arr2 = np.char.lower(np.array(key))
+            if (np.sort(arr1) == np.sort(arr2)).all() == False:
+                logger.error("\033[1m" + "Column names should be timestamps, data and sampling_rate" + "\033[0m")
+                raise Exception("\033[1m" + "Column names should be timestamps, data and sampling_rate" + "\033[0m")
+
+        if len(key) == 1:
+            if key[0].lower() != "timestamps":
+                logger.error("\033[1m" + "Column names should be timestamps, data and sampling_rate" + "\033[0m")
+                raise Exception("\033[1m" + "Column name should be timestamps" + "\033[0m")
+
+        if len(key) != 3 and len(key) != 1:
+            logger.error(
+                "\033[1m"
+                + "Number of columns in csv file should be either three or one. Three columns if \
+                            the file is for control or signal data or one column if the file is for event TTLs."
+                + "\033[0m"
+            )
+            raise Exception(
+                "\033[1m"
+                + "Number of columns in csv file should be either three or one. Three columns if \
+                            the file is for control or signal data or one column if the file is for event TTLs."
+                + "\033[0m"
+            )
+
+        for i in range(len(key)):
+            write_hdf5(df[key[i]].dropna(), event, outputPath, key[i].lower())
+
+        logger.info("\033[1m" + "Reading data for {} from csv file is completed.".format(event) + "\033[0m")
diff --git a/src/guppy/readTevTsq.py b/src/guppy/readTevTsq.py
index c5c52da..f2c9419 100755
--- a/src/guppy/readTevTsq.py
+++ b/src/guppy/readTevTsq.py
@@ -7,8 +7,12 @@
 
 import numpy as np
 
-from guppy.doric_step3 import execute_import_doric
-from guppy.extractors import execute_import_csv, execute_readtev
+from guppy.extractors import (
+    execute_import_csv,
+    execute_import_doric,
+    execute_import_npm,
+    execute_readtev,
+)
 
 logger = logging.getLogger(__name__)
 
@@ -35,6 +39,7 @@ def readRawData(inputParameters):
     folderNames = inputParameters["folderNames"]
     numProcesses = inputParameters["numberOfCores"]
     modality = inputParameters["modality"]
+    num_ch = inputParameters["noChannels"]
     storesListPath = []
     if numProcesses == 0:
         numProcesses = mp.cpu_count()
@@ -75,7 +80,7 @@ def readRawData(inputParameters):
             elif modality == "csv":
                 execute_import_csv(filepath, events, op, numProcesses)
             elif modality == "npm":
-                raise NotImplementedError("NPM modality is not yet implemented.")
+                execute_import_npm(filepath, num_ch, inputParameters, events, op, numProcesses)
             else:
                 raise ValueError("Modality not recognized. Please use 'tdt', 'csv', 'doric', or 'npm'.")
 

From 9b88cad73cbf64fa7648b4210682aeefff9d2782 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Thu, 20 Nov 2025 16:45:18 -0800
Subject: [PATCH 029/150] Add a tdt_check_data example session to the tests.

---
 tests/test_step2.py | 3 ++-
 tests/test_step3.py | 3 ++-
 tests/test_step4.py | 3 ++-
 tests/test_step5.py | 3 ++-
 4 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/tests/test_step2.py b/tests/test_step2.py
index 01d32e2..b34fe64 100644
--- a/tests/test_step2.py
+++ b/tests/test_step2.py
@@ -87,8 +87,9 @@
             {
                 "405R": "control_region",
                 "490R": "signal_region",
-                "Tick": "ttl",
+                "PAB/": "ttl",
             },
+            "tdt",
         ),
         # TODO: Add sampleData_NPM_1 after fixing Doric vs. NPM determination bug.
         (
diff --git a/tests/test_step3.py b/tests/test_step3.py
index cfe2294..330d017 100644
--- a/tests/test_step3.py
+++ b/tests/test_step3.py
@@ -88,8 +88,9 @@ def storenames_map():
             {
                 "405R": "control_region",
                 "490R": "signal_region",
-                "Tick": "ttl",
+                "PAB/": "ttl",
             },
+            "tdt",
         ),
         (
             "SampleData_with_artifacts/Photo_048_392-200728-121222",
diff --git a/tests/test_step4.py b/tests/test_step4.py
index d691d06..cdaf0ec 100644
--- a/tests/test_step4.py
+++ b/tests/test_step4.py
@@ -92,10 +92,11 @@
             {
                 "405R": "control_region",
                 "490R": "signal_region",
-                "Tick": "ttl",
+                "PAB/": "ttl",
             },
             "region",
             "ttl",
+            "tdt",
         ),
         (
             "SampleData_with_artifacts/Photo_048_392-200728-121222",
diff --git a/tests/test_step5.py b/tests/test_step5.py
index ddd6935..4bed772 100644
--- a/tests/test_step5.py
+++ b/tests/test_step5.py
@@ -92,10 +92,11 @@
             {
                 "405R": "control_region",
                 "490R": "signal_region",
-                "Tick": "ttl",
+                "PAB/": "ttl",
             },
             "region",
             "ttl",
+            "tdt",
         ),
         (
             "SampleData_with_artifacts/Photo_048_392-200728-121222",

From 73e6a1c3586ec361155bde7cec610729412d7041 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Fri, 21 Nov 2025 09:28:59 -0800
Subject: [PATCH 030/150] Added event-splitting to tdt

---
 .../extractors/tdt_recording_extractor.py     | 49 +++++++++++++++++++
 1 file changed, 49 insertions(+)

diff --git a/src/guppy/extractors/tdt_recording_extractor.py b/src/guppy/extractors/tdt_recording_extractor.py
index 1d46b1e..2cc2f15 100644
--- a/src/guppy/extractors/tdt_recording_extractor.py
+++ b/src/guppy/extractors/tdt_recording_extractor.py
@@ -26,6 +26,8 @@ def execute_readtev(folder_path, events, outputPath, numProcesses=mp.cpu_count()
 def read_tdt_and_save_hdf5(extractor, event, outputPath):
     S = extractor.readtev(event=event)
     extractor.save_dict_to_hdf5(S=S, event=event, outputPath=outputPath)
+    if extractor.event_needs_splitting(data=S["data"], sampling_rate=S["sampling_rate"]):
+        extractor.split_event_data(S, event, outputPath)
     logger.info("Data for event {} fetched and stored.".format(event))
 
 
@@ -148,6 +150,53 @@ def ismember(self, arr, element):  # TODO: replace this function with more stand
         res = [1 if i == element else 0 for i in arr]
         return np.asarray(res)
 
+    # TODO: this is broken, and I need to fix it.
+    def event_needs_splitting(self, data, sampling_rate):
+        diff = np.diff(data)
+        if diff.shape[0] == 0:
+            return False
+        if sampling_rate == 0 and not (np.all(diff == diff[0])):
+            return True
+        return False
+
+    def split_event_data(self, S, event, outputPath):
+        event = event.replace("\\", "")
+        event = event.replace("/", "")
+        logger.info("Checking event storename data for creating multiple event names from single event storename...")
+        storesList = np.genfromtxt(os.path.join(outputPath, "storesList.csv"), dtype="str", delimiter=",").reshape(
+            2, -1
+        )
+        logger.info("\033[1m" + "Data in event {} belongs to multiple behavior".format(event) + "\033[0m")
+        logger.debug(
+            "\033[1m" + "Create timestamp files for individual new event and change the stores list file." + "\033[0m"
+        )
+        i_d = np.unique(S["data"])
+        for i in range(i_d.shape[0]):
+            new_S = dict()
+            idx = np.where(S["data"] == i_d[i])[0]
+            new_S["timestamps"] = S["timestamps"][idx]
+            new_S["storename"] = event + str(int(i_d[i]))
+            new_S["sampling_rate"] = S["sampling_rate"]
+            new_S["data"] = S["data"]
+            new_S["npoints"] = S["npoints"]
+            new_S["channels"] = S["channels"]
+            storesList = np.concatenate(
+                (storesList, [[event + str(int(i_d[i]))], [event + "_" + str(int(i_d[i]))]]), axis=1
+            )
+            self.save_dict_to_hdf5(new_S, event + str(int(i_d[i])), outputPath)
+
+        idx = np.where(storesList[0] == event)[0]
+        storesList = np.delete(storesList, idx, axis=1)
+        if not os.path.exists(os.path.join(outputPath, ".cache_storesList.csv")):
+            os.rename(os.path.join(outputPath, "storesList.csv"), os.path.join(outputPath, ".cache_storesList.csv"))
+        if idx.shape[0] == 0:
+            pass
+        else:
+            np.savetxt(os.path.join(outputPath, "storesList.csv"), storesList, delimiter=",", fmt="%s")
+        logger.info(
+            "\033[1m Timestamp files for individual new event are created and the stores list file is changed.\033[0m"
+        )
+
     # function to save data read from tev file to hdf5 file
     def save_dict_to_hdf5(self, S, event, outputPath):
         write_hdf5(S["storename"], event, outputPath, "storename")

From a036090c79e166a6d454e3997d2b984867e4d469 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Fri, 21 Nov 2025 10:12:05 -0800
Subject: [PATCH 031/150] Fixed event vs. new event bug.

---
 src/guppy/extractors/tdt_recording_extractor.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/guppy/extractors/tdt_recording_extractor.py b/src/guppy/extractors/tdt_recording_extractor.py
index 2cc2f15..527235f 100644
--- a/src/guppy/extractors/tdt_recording_extractor.py
+++ b/src/guppy/extractors/tdt_recording_extractor.py
@@ -160,8 +160,9 @@ def event_needs_splitting(self, data, sampling_rate):
         return False
 
     def split_event_data(self, S, event, outputPath):
-        event = event.replace("\\", "")
-        event = event.replace("/", "")
+        # Note that new_event is only used for the new storesList and event is still used for the old storesList
+        new_event = event.replace("\\", "")
+        new_event = event.replace("/", "")
         logger.info("Checking event storename data for creating multiple event names from single event storename...")
         storesList = np.genfromtxt(os.path.join(outputPath, "storesList.csv"), dtype="str", delimiter=",").reshape(
             2, -1
@@ -175,15 +176,15 @@ def split_event_data(self, S, event, outputPath):
             new_S = dict()
             idx = np.where(S["data"] == i_d[i])[0]
             new_S["timestamps"] = S["timestamps"][idx]
-            new_S["storename"] = event + str(int(i_d[i]))
+            new_S["storename"] = new_event + str(int(i_d[i]))
             new_S["sampling_rate"] = S["sampling_rate"]
             new_S["data"] = S["data"]
             new_S["npoints"] = S["npoints"]
             new_S["channels"] = S["channels"]
             storesList = np.concatenate(
-                (storesList, [[event + str(int(i_d[i]))], [event + "_" + str(int(i_d[i]))]]), axis=1
+                (storesList, [[new_event + str(int(i_d[i]))], [new_event + "_" + str(int(i_d[i]))]]), axis=1
             )
-            self.save_dict_to_hdf5(new_S, event + str(int(i_d[i])), outputPath)
+            self.save_dict_to_hdf5(new_S, new_event + str(int(i_d[i])), outputPath)
 
         idx = np.where(storesList[0] == event)[0]
         storesList = np.delete(storesList, idx, axis=1)

From 7ecdf7809454bd5aee9b9b3a3a9164437784edd1 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Fri, 21 Nov 2025 10:12:54 -0800
Subject: [PATCH 032/150] Fixed event vs. new event bug.

---
 src/guppy/extractors/tdt_recording_extractor.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/guppy/extractors/tdt_recording_extractor.py b/src/guppy/extractors/tdt_recording_extractor.py
index 527235f..71c8d29 100644
--- a/src/guppy/extractors/tdt_recording_extractor.py
+++ b/src/guppy/extractors/tdt_recording_extractor.py
@@ -150,7 +150,6 @@ def ismember(self, arr, element):  # TODO: replace this function with more stand
         res = [1 if i == element else 0 for i in arr]
         return np.asarray(res)
 
-    # TODO: this is broken, and I need to fix it.
     def event_needs_splitting(self, data, sampling_rate):
         diff = np.diff(data)
         if diff.shape[0] == 0:

From b87e79ff4409d889fcdb4536d328f4189043aec8 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Fri, 21 Nov 2025 12:07:35 -0800
Subject: [PATCH 033/150] Refactored save_dict_to_hdf5 to compute event from S.

---
 .../extractors/tdt_recording_extractor.py     | 67 +++++--------------
 1 file changed, 15 insertions(+), 52 deletions(-)

diff --git a/src/guppy/extractors/tdt_recording_extractor.py b/src/guppy/extractors/tdt_recording_extractor.py
index 71c8d29..530ccc5 100644
--- a/src/guppy/extractors/tdt_recording_extractor.py
+++ b/src/guppy/extractors/tdt_recording_extractor.py
@@ -25,7 +25,7 @@ def execute_readtev(folder_path, events, outputPath, numProcesses=mp.cpu_count()
 
 def read_tdt_and_save_hdf5(extractor, event, outputPath):
     S = extractor.readtev(event=event)
-    extractor.save_dict_to_hdf5(S=S, event=event, outputPath=outputPath)
+    extractor.save_dict_to_hdf5(S=S, outputPath=outputPath)
     if extractor.event_needs_splitting(data=S["data"], sampling_rate=S["sampling_rate"]):
         extractor.split_event_data(S, event, outputPath)
     logger.info("Data for event {} fetched and stored.".format(event))
@@ -145,6 +145,17 @@ def readtev(self, event):
 
         return S
 
+    def read(self, events):
+        output_dicts = []
+        for event in events:
+            S = self.readtev(event=event)
+            if self.event_needs_splitting(data=S["data"], sampling_rate=S["sampling_rate"]):
+                event_dicts = self.split_event_data(S, event, None)
+            else:
+                event_dicts = [S]
+            output_dicts.extend(event_dicts)
+        return output_dicts
+
     # check if a particular element is there in an array or not
     def ismember(self, arr, element):  # TODO: replace this function with more standard usage
         res = [1 if i == element else 0 for i in arr]
@@ -183,7 +194,7 @@ def split_event_data(self, S, event, outputPath):
             storesList = np.concatenate(
                 (storesList, [[new_event + str(int(i_d[i]))], [new_event + "_" + str(int(i_d[i]))]]), axis=1
             )
-            self.save_dict_to_hdf5(new_S, new_event + str(int(i_d[i])), outputPath)
+            self.save_dict_to_hdf5(new_S, outputPath)
 
         idx = np.where(storesList[0] == event)[0]
         storesList = np.delete(storesList, idx, axis=1)
@@ -198,7 +209,8 @@ def split_event_data(self, S, event, outputPath):
         )
 
     # function to save data read from tev file to hdf5 file
-    def save_dict_to_hdf5(self, S, event, outputPath):
+    def save_dict_to_hdf5(self, S, outputPath):
+        event = S["storename"]
         write_hdf5(S["storename"], event, outputPath, "storename")
         write_hdf5(S["sampling_rate"], event, outputPath, "sampling_rate")
         write_hdf5(S["timestamps"], event, outputPath, "timestamps")
@@ -206,52 +218,3 @@ def save_dict_to_hdf5(self, S, event, outputPath):
         write_hdf5(S["data"], event, outputPath, "data")
         write_hdf5(S["npoints"], event, outputPath, "npoints")
         write_hdf5(S["channels"], event, outputPath, "channels")
-
-    # function to check event data (checking whether event timestamps belongs to same event or multiple events)
-    def check_data(self, S, event, outputPath):  # TODO: fold this function into the main read/get function
-        # logger.info("Checking event storename data for creating multiple event names from single event storename...")
-        new_event = event.replace("\\", "")
-        new_event = event.replace("/", "")
-        diff = np.diff(S["data"])
-        arr = np.full(diff.shape[0], 1)
-
-        storesList = np.genfromtxt(os.path.join(outputPath, "storesList.csv"), dtype="str", delimiter=",").reshape(
-            2, -1
-        )
-
-        if diff.shape[0] == 0:
-            return 0
-
-        if S["sampling_rate"] == 0 and np.all(diff == diff[0]) == False:
-            logger.info("\033[1m" + "Data in event {} belongs to multiple behavior".format(event) + "\033[0m")
-            logger.debug(
-                "\033[1m"
-                + "Create timestamp files for individual new event and change the stores list file."
-                + "\033[0m"
-            )
-            i_d = np.unique(S["data"])
-            for i in range(i_d.shape[0]):
-                new_S = dict()
-                idx = np.where(S["data"] == i_d[i])[0]
-                new_S["timestamps"] = S["timestamps"][idx]
-                new_S["storename"] = new_event + str(int(i_d[i]))
-                new_S["sampling_rate"] = S["sampling_rate"]
-                new_S["data"] = S["data"]
-                new_S["npoints"] = S["npoints"]
-                new_S["channels"] = S["channels"]
-                storesList = np.concatenate(
-                    (storesList, [[new_event + str(int(i_d[i]))], [new_event + "_" + str(int(i_d[i]))]]), axis=1
-                )
-                self.save_dict_to_hdf5(new_S, new_event + str(int(i_d[i])), outputPath)
-
-            idx = np.where(storesList[0] == event)[0]
-            storesList = np.delete(storesList, idx, axis=1)
-            if not os.path.exists(os.path.join(outputPath, ".cache_storesList.csv")):
-                os.rename(os.path.join(outputPath, "storesList.csv"), os.path.join(outputPath, ".cache_storesList.csv"))
-            if idx.shape[0] == 0:
-                pass
-            else:
-                np.savetxt(os.path.join(outputPath, "storesList.csv"), storesList, delimiter=",", fmt="%s")
-            logger.info(
-                "\033[1m Timestamp files for individual new event are created and the stores list file is changed.\033[0m"
-            )

From 11922663bd537b4c6dddf5f460ddb959ff1cc993 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Fri, 21 Nov 2025 12:27:33 -0800
Subject: [PATCH 034/150] Peeled split_event_storesList from split_event_data.

---
 .../extractors/tdt_recording_extractor.py     | 65 +++++++++++++------
 1 file changed, 46 insertions(+), 19 deletions(-)

diff --git a/src/guppy/extractors/tdt_recording_extractor.py b/src/guppy/extractors/tdt_recording_extractor.py
index 530ccc5..0659d3a 100644
--- a/src/guppy/extractors/tdt_recording_extractor.py
+++ b/src/guppy/extractors/tdt_recording_extractor.py
@@ -3,7 +3,6 @@
 import multiprocessing as mp
 import os
 import time
-from itertools import repeat
 
 import numpy as np
 import pandas as pd
@@ -14,23 +13,32 @@
 logger = logging.getLogger(__name__)
 
 
-# function to execute readtev function using multiprocessing to make it faster
+# # function to execute readtev function using multiprocessing to make it faster
+# def execute_readtev(folder_path, events, outputPath, numProcesses=mp.cpu_count()):
+#     extractor = TdtRecordingExtractor(folder_path=folder_path)
+#     start = time.time()
+#     with mp.Pool(numProcesses) as p:
+#         p.starmap(read_tdt_and_save_hdf5, zip(repeat(extractor), events, repeat(outputPath)))
+#     logger.info("Time taken = {0:.5f}".format(time.time() - start))
+
+
+# def read_tdt_and_save_hdf5(extractor, event, outputPath):
+#     S = extractor.readtev(event=event)
+#     extractor.save_dict_to_hdf5(S=S, outputPath=outputPath)
+#     if extractor.event_needs_splitting(data=S["data"], sampling_rate=S["sampling_rate"]):
+#         extractor.split_event_data(S, event, outputPath)
+#     logger.info("Data for event {} fetched and stored.".format(event))
+
+
 def execute_readtev(folder_path, events, outputPath, numProcesses=mp.cpu_count()):
     extractor = TdtRecordingExtractor(folder_path=folder_path)
     start = time.time()
-    with mp.Pool(numProcesses) as p:
-        p.starmap(read_tdt_and_save_hdf5, zip(repeat(extractor), events, repeat(outputPath)))
+    output_dicts = extractor.read(events=events, outputPath=outputPath)
+    for S in output_dicts:
+        extractor.save_dict_to_hdf5(S=S, outputPath=outputPath)
     logger.info("Time taken = {0:.5f}".format(time.time() - start))
 
 
-def read_tdt_and_save_hdf5(extractor, event, outputPath):
-    S = extractor.readtev(event=event)
-    extractor.save_dict_to_hdf5(S=S, outputPath=outputPath)
-    if extractor.event_needs_splitting(data=S["data"], sampling_rate=S["sampling_rate"]):
-        extractor.split_event_data(S, event, outputPath)
-    logger.info("Data for event {} fetched and stored.".format(event))
-
-
 class TdtRecordingExtractor:
 
     def __init__(self, folder_path):
@@ -145,12 +153,13 @@ def readtev(self, event):
 
         return S
 
-    def read(self, events):
+    def read(self, events, outputPath):
         output_dicts = []
         for event in events:
             S = self.readtev(event=event)
             if self.event_needs_splitting(data=S["data"], sampling_rate=S["sampling_rate"]):
-                event_dicts = self.split_event_data(S, event, None)
+                event_dicts = self.split_event_data(S, event)
+                self.split_event_storesList(S, event, outputPath)
             else:
                 event_dicts = [S]
             output_dicts.extend(event_dicts)
@@ -169,19 +178,17 @@ def event_needs_splitting(self, data, sampling_rate):
             return True
         return False
 
-    def split_event_data(self, S, event, outputPath):
+    def split_event_data(self, S, event):
         # Note that new_event is only used for the new storesList and event is still used for the old storesList
         new_event = event.replace("\\", "")
         new_event = event.replace("/", "")
         logger.info("Checking event storename data for creating multiple event names from single event storename...")
-        storesList = np.genfromtxt(os.path.join(outputPath, "storesList.csv"), dtype="str", delimiter=",").reshape(
-            2, -1
-        )
         logger.info("\033[1m" + "Data in event {} belongs to multiple behavior".format(event) + "\033[0m")
         logger.debug(
             "\033[1m" + "Create timestamp files for individual new event and change the stores list file." + "\033[0m"
         )
         i_d = np.unique(S["data"])
+        event_dicts = [S]
         for i in range(i_d.shape[0]):
             new_S = dict()
             idx = np.where(S["data"] == i_d[i])[0]
@@ -191,10 +198,30 @@ def split_event_data(self, S, event, outputPath):
             new_S["data"] = S["data"]
             new_S["npoints"] = S["npoints"]
             new_S["channels"] = S["channels"]
+            event_dicts.append(new_S)
+        logger.info(
+            "\033[1m Timestamp files for individual new event are created and the stores list file is changed.\033[0m"
+        )
+
+        return event_dicts
+
+    def split_event_storesList(self, S, event, outputPath):
+        # Note that new_event is only used for the new storesList and event is still used for the old storesList
+        new_event = event.replace("\\", "")
+        new_event = event.replace("/", "")
+        logger.info("Checking event storename data for creating multiple event names from single event storename...")
+        storesList = np.genfromtxt(os.path.join(outputPath, "storesList.csv"), dtype="str", delimiter=",").reshape(
+            2, -1
+        )
+        logger.info("\033[1m" + "Data in event {} belongs to multiple behavior".format(event) + "\033[0m")
+        logger.debug(
+            "\033[1m" + "Create timestamp files for individual new event and change the stores list file." + "\033[0m"
+        )
+        i_d = np.unique(S["data"])
+        for i in range(i_d.shape[0]):
             storesList = np.concatenate(
                 (storesList, [[new_event + str(int(i_d[i]))], [new_event + "_" + str(int(i_d[i]))]]), axis=1
             )
-            self.save_dict_to_hdf5(new_S, outputPath)
 
         idx = np.where(storesList[0] == event)[0]
         storesList = np.delete(storesList, idx, axis=1)

From 9231f5fc01192b810eb82d216bee819a06bc934e Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Fri, 21 Nov 2025 12:31:49 -0800
Subject: [PATCH 035/150]  updated logging.

---
 .../extractors/tdt_recording_extractor.py     | 21 ++++++-------------
 1 file changed, 6 insertions(+), 15 deletions(-)

diff --git a/src/guppy/extractors/tdt_recording_extractor.py b/src/guppy/extractors/tdt_recording_extractor.py
index 0659d3a..4743185 100644
--- a/src/guppy/extractors/tdt_recording_extractor.py
+++ b/src/guppy/extractors/tdt_recording_extractor.py
@@ -171,6 +171,7 @@ def ismember(self, arr, element):  # TODO: replace this function with more stand
         return np.asarray(res)
 
     def event_needs_splitting(self, data, sampling_rate):
+        logger.info("Checking event storename data for creating multiple event names from single event storename...")
         diff = np.diff(data)
         if diff.shape[0] == 0:
             return False
@@ -182,11 +183,8 @@ def split_event_data(self, S, event):
         # Note that new_event is only used for the new storesList and event is still used for the old storesList
         new_event = event.replace("\\", "")
         new_event = event.replace("/", "")
-        logger.info("Checking event storename data for creating multiple event names from single event storename...")
         logger.info("\033[1m" + "Data in event {} belongs to multiple behavior".format(event) + "\033[0m")
-        logger.debug(
-            "\033[1m" + "Create timestamp files for individual new event and change the stores list file." + "\033[0m"
-        )
+        logger.debug("\033[1m" + "Create timestamp files for individual new event." + "\033[0m")
         i_d = np.unique(S["data"])
         event_dicts = [S]
         for i in range(i_d.shape[0]):
@@ -199,9 +197,7 @@ def split_event_data(self, S, event):
             new_S["npoints"] = S["npoints"]
             new_S["channels"] = S["channels"]
             event_dicts.append(new_S)
-        logger.info(
-            "\033[1m Timestamp files for individual new event are created and the stores list file is changed.\033[0m"
-        )
+        logger.info("\033[1m Timestamp files for individual new event are created.\033[0m")
 
         return event_dicts
 
@@ -209,14 +205,11 @@ def split_event_storesList(self, S, event, outputPath):
         # Note that new_event is only used for the new storesList and event is still used for the old storesList
         new_event = event.replace("\\", "")
         new_event = event.replace("/", "")
-        logger.info("Checking event storename data for creating multiple event names from single event storename...")
         storesList = np.genfromtxt(os.path.join(outputPath, "storesList.csv"), dtype="str", delimiter=",").reshape(
             2, -1
         )
-        logger.info("\033[1m" + "Data in event {} belongs to multiple behavior".format(event) + "\033[0m")
-        logger.debug(
-            "\033[1m" + "Create timestamp files for individual new event and change the stores list file." + "\033[0m"
-        )
+        logger.info("\033[1m" + "StoresList in event {} belongs to multiple behavior".format(event) + "\033[0m")
+        logger.debug("\033[1m" + "Change the stores list file for individual new event." + "\033[0m")
         i_d = np.unique(S["data"])
         for i in range(i_d.shape[0]):
             storesList = np.concatenate(
@@ -231,9 +224,7 @@ def split_event_storesList(self, S, event, outputPath):
             pass
         else:
             np.savetxt(os.path.join(outputPath, "storesList.csv"), storesList, delimiter=",", fmt="%s")
-        logger.info(
-            "\033[1m Timestamp files for individual new event are created and the stores list file is changed.\033[0m"
-        )
+        logger.info("\033[1m The stores list file is changed.\033[0m")
 
     # function to save data read from tev file to hdf5 file
     def save_dict_to_hdf5(self, S, outputPath):

From ddf6ae5a34effe3e835e5107b18242307dcaa42c Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Fri, 21 Nov 2025 12:37:10 -0800
Subject: [PATCH 036/150] Added high-level save

---
 src/guppy/extractors/tdt_recording_extractor.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/guppy/extractors/tdt_recording_extractor.py b/src/guppy/extractors/tdt_recording_extractor.py
index 4743185..a503bb2 100644
--- a/src/guppy/extractors/tdt_recording_extractor.py
+++ b/src/guppy/extractors/tdt_recording_extractor.py
@@ -34,8 +34,7 @@ def execute_readtev(folder_path, events, outputPath, numProcesses=mp.cpu_count()
     extractor = TdtRecordingExtractor(folder_path=folder_path)
     start = time.time()
     output_dicts = extractor.read(events=events, outputPath=outputPath)
-    for S in output_dicts:
-        extractor.save_dict_to_hdf5(S=S, outputPath=outputPath)
+    extractor.save(output_dicts=output_dicts, outputPath=outputPath)
     logger.info("Time taken = {0:.5f}".format(time.time() - start))
 
 
@@ -236,3 +235,7 @@ def save_dict_to_hdf5(self, S, outputPath):
         write_hdf5(S["data"], event, outputPath, "data")
         write_hdf5(S["npoints"], event, outputPath, "npoints")
         write_hdf5(S["channels"], event, outputPath, "channels")
+
+    def save(self, output_dicts, outputPath):
+        for S in output_dicts:
+            self.save_dict_to_hdf5(S=S, outputPath=outputPath)

From 212c7c5a7cf3f22e84804d77762978493d06aa5c Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Fri, 21 Nov 2025 12:40:03 -0800
Subject: [PATCH 037/150] Added TODO

---
 src/guppy/extractors/tdt_recording_extractor.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/guppy/extractors/tdt_recording_extractor.py b/src/guppy/extractors/tdt_recording_extractor.py
index a503bb2..b5dc670 100644
--- a/src/guppy/extractors/tdt_recording_extractor.py
+++ b/src/guppy/extractors/tdt_recording_extractor.py
@@ -200,6 +200,9 @@ def split_event_data(self, S, event):
 
         return event_dicts
 
+    # This function saves a new storesList.csv file, which is a bit of a side effect in the overall read path,
+    # which is supposed to just return a list of dictionaries.
+    # TODO: long term I'd like to move these storesList shenanigans somewhere else, likely outside of the extractor.
     def split_event_storesList(self, S, event, outputPath):
         # Note that new_event is only used for the new storesList and event is still used for the old storesList
         new_event = event.replace("\\", "")

From 33682d26b074ac9f44bc8fd64f9c9bcae5171656 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Fri, 21 Nov 2025 12:55:18 -0800
Subject: [PATCH 038/150] Added multi-processing back in.

---
 .../extractors/tdt_recording_extractor.py     | 23 +++++--------------
 1 file changed, 6 insertions(+), 17 deletions(-)

diff --git a/src/guppy/extractors/tdt_recording_extractor.py b/src/guppy/extractors/tdt_recording_extractor.py
index b5dc670..58cde99 100644
--- a/src/guppy/extractors/tdt_recording_extractor.py
+++ b/src/guppy/extractors/tdt_recording_extractor.py
@@ -3,6 +3,7 @@
 import multiprocessing as mp
 import os
 import time
+from itertools import repeat
 
 import numpy as np
 import pandas as pd
@@ -13,28 +14,16 @@
 logger = logging.getLogger(__name__)
 
 
-# # function to execute readtev function using multiprocessing to make it faster
-# def execute_readtev(folder_path, events, outputPath, numProcesses=mp.cpu_count()):
-#     extractor = TdtRecordingExtractor(folder_path=folder_path)
-#     start = time.time()
-#     with mp.Pool(numProcesses) as p:
-#         p.starmap(read_tdt_and_save_hdf5, zip(repeat(extractor), events, repeat(outputPath)))
-#     logger.info("Time taken = {0:.5f}".format(time.time() - start))
-
-
-# def read_tdt_and_save_hdf5(extractor, event, outputPath):
-#     S = extractor.readtev(event=event)
-#     extractor.save_dict_to_hdf5(S=S, outputPath=outputPath)
-#     if extractor.event_needs_splitting(data=S["data"], sampling_rate=S["sampling_rate"]):
-#         extractor.split_event_data(S, event, outputPath)
-#     logger.info("Data for event {} fetched and stored.".format(event))
+def read_and_save_tdt(extractor, event, outputPath):
+    output_dicts = extractor.read(events=[event], outputPath=outputPath)
+    extractor.save(output_dicts=output_dicts, outputPath=outputPath)
 
 
 def execute_readtev(folder_path, events, outputPath, numProcesses=mp.cpu_count()):
     extractor = TdtRecordingExtractor(folder_path=folder_path)
     start = time.time()
-    output_dicts = extractor.read(events=events, outputPath=outputPath)
-    extractor.save(output_dicts=output_dicts, outputPath=outputPath)
+    with mp.Pool(numProcesses) as p:
+        p.starmap(read_and_save_tdt, zip(repeat(extractor), events, repeat(outputPath)))
     logger.info("Time taken = {0:.5f}".format(time.time() - start))
 
 

From f84c550bb181fa51a53587fd4374266746c6c88e Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Fri, 21 Nov 2025 14:47:45 -0800
Subject: [PATCH 039/150] Fixed test_step5.py for tdt_check_data

---
 tests/test_step5.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/tests/test_step5.py b/tests/test_step5.py
index 4bed772..870fb7c 100644
--- a/tests/test_step5.py
+++ b/tests/test_step5.py
@@ -95,7 +95,7 @@
                 "PAB/": "ttl",
             },
             "region",
-            "ttl",
+            ["PAB_0", "PAB_16", "PAB_2064"],  # This session has an event which gets split into three sub-events.
             "tdt",
         ),
         (
@@ -278,7 +278,13 @@ def test_step5(tmp_path, monkeypatch, session_subdir, storenames_map, expected_r
     assert os.path.exists(stores_fp), "Missing storesList.csv after Steps 2-5"
 
     # Expected PSTH outputs (defaults compute z_score PSTH) - only for datasets with TTLs
-    if expected_ttl is not None:
+    if expected_ttl is None:
+        expected_ttls = []
+    elif isinstance(expected_ttl, str):
+        expected_ttls = [expected_ttl]
+    else:
+        expected_ttls = expected_ttl
+    for expected_ttl in expected_ttls:
         psth_h5 = os.path.join(out_dir, f"{expected_ttl}_{expected_region}_z_score_{expected_region}.h5")
         psth_baseline_uncorr_h5 = os.path.join(
             out_dir, f"{expected_ttl}_{expected_region}_baselineUncorrected_z_score_{expected_region}.h5"

From c55a230bd8034a608d5e7cbd259bed5d20a4b282 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Fri, 21 Nov 2025 14:50:04 -0800
Subject: [PATCH 040/150] Fixed test_step4.py for tdt_check_data

---
 tests/test_step4.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/tests/test_step4.py b/tests/test_step4.py
index cdaf0ec..109e7da 100644
--- a/tests/test_step4.py
+++ b/tests/test_step4.py
@@ -95,7 +95,7 @@
                 "PAB/": "ttl",
             },
             "region",
-            "ttl",
+            ["PAB_0", "PAB_16", "PAB_2064"],  # This session has an event which gets split into three sub-events.
             "tdt",
         ),
         (
@@ -272,7 +272,13 @@ def test_step4(tmp_path, monkeypatch, session_subdir, storenames_map, expected_r
         assert "timestampNew" in f, f"Expected 'timestampNew' dataset in {timecorr}"
 
     # If TTLs exist, check their per-region 'ts' outputs
-    if expected_ttl is not None:
+    if expected_ttl is None:
+        expected_ttls = []
+    elif isinstance(expected_ttl, str):
+        expected_ttls = [expected_ttl]
+    else:
+        expected_ttls = expected_ttl
+    for expected_ttl in expected_ttls:
         ttl_fp = os.path.join(out_dir, f"{expected_ttl}_{expected_region}.hdf5")
         assert os.path.exists(ttl_fp), f"Missing TTL-aligned file {ttl_fp}"
         with h5py.File(ttl_fp, "r") as f:

From 03ffd54c7c61d449cab7ac077f3bf1e746e206fb Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Fri, 21 Nov 2025 15:00:59 -0800
Subject: [PATCH 041/150] Renamed test_case from tdt_check_data to
 tdt_split_event.

---
 tests/test_step2.py | 2 +-
 tests/test_step3.py | 2 +-
 tests/test_step4.py | 2 +-
 tests/test_step5.py | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/test_step2.py b/tests/test_step2.py
index b34fe64..f7e34d1 100644
--- a/tests/test_step2.py
+++ b/tests/test_step2.py
@@ -136,7 +136,7 @@
         "sample_doric_4",
         "sample_doric_5",
         "tdt_clean",
-        "tdt_check_data",
+        "tdt_split_event",
         "tdt_with_artifacts",
         "sample_npm_2",
         "sample_npm_3",
diff --git a/tests/test_step3.py b/tests/test_step3.py
index 330d017..26dac14 100644
--- a/tests/test_step3.py
+++ b/tests/test_step3.py
@@ -145,7 +145,7 @@ def storenames_map():
         "sample_doric_4",
         "sample_doric_5",
         "tdt_clean",
-        "tdt_check_data",
+        "tdt_split_event",
         "tdt_with_artifacts",
         "sample_npm_2",
         "sample_npm_3",
diff --git a/tests/test_step4.py b/tests/test_step4.py
index 109e7da..df18f75 100644
--- a/tests/test_step4.py
+++ b/tests/test_step4.py
@@ -161,7 +161,7 @@
         "sample_doric_4",
         "sample_doric_5",
         "tdt_clean",
-        "tdt_check_data",
+        "tdt_split_event",
         "tdt_with_artifacts",
         "sample_npm_2",
         "sample_npm_3",
diff --git a/tests/test_step5.py b/tests/test_step5.py
index 870fb7c..a8cdeb4 100644
--- a/tests/test_step5.py
+++ b/tests/test_step5.py
@@ -161,7 +161,7 @@
         "sample_doric_4",
         "sample_doric_5",
         "tdt_clean",
-        "tdt_check_data",
+        "tdt_split_event",
         "tdt_with_artifacts",
         "sample_npm_2",
         "sample_npm_3",

From 27acc6cecee233b83dd8d6961bbf5fc5bb669a74 Mon Sep 17 00:00:00 2001
From: Paul Adkisson-Floro <paul.wesley.adkisson@gmail.com>
Date: Mon, 1 Dec 2025 19:43:14 -0500
Subject: [PATCH 042/150] Standardize read and save (#188)

---
 .../extractors/csv_recording_extractor.py     |  23 ++-
 .../extractors/doric_recording_extractor.py   | 139 +++++++++++-------
 .../extractors/npm_recording_extractor.py     |  23 ++-
 3 files changed, 126 insertions(+), 59 deletions(-)

diff --git a/src/guppy/extractors/csv_recording_extractor.py b/src/guppy/extractors/csv_recording_extractor.py
index 3df76f6..5a42bd1 100644
--- a/src/guppy/extractors/csv_recording_extractor.py
+++ b/src/guppy/extractors/csv_recording_extractor.py
@@ -19,13 +19,13 @@ def execute_import_csv(filepath, events, outputPath, numProcesses=mp.cpu_count()
     extractor = CsvRecordingExtractor(folder_path=filepath)
     start = time.time()
     with mp.Pool(numProcesses) as p:
-        p.starmap(read_csv_and_save_hdf5, zip(repeat(extractor), events, repeat(outputPath)))
+        p.starmap(read_and_save_csv, zip(repeat(extractor), events, repeat(outputPath)))
     logger.info("Time taken = {0:.5f}".format(time.time() - start))
 
 
-def read_csv_and_save_hdf5(extractor, event, outputPath):
-    df = extractor.read_csv(event=event)
-    extractor.save_to_hdf5(df=df, event=event, outputPath=outputPath)
+def read_and_save_csv(extractor, event, outputPath):
+    output_dicts = extractor.read(events=[event], outputPath=outputPath)
+    extractor.save(output_dicts=output_dicts, outputPath=outputPath)
     logger.info("Data for event {} fetched and stored.".format(event))
 
 
@@ -178,3 +178,18 @@ def save_to_hdf5(self, df, event, outputPath):
             write_hdf5(df[key[i]].dropna(), event, outputPath, key[i].lower())
 
         logger.info("\033[1m" + "Reading data for {} from csv file is completed.".format(event) + "\033[0m")
+
+    def read(self, events, outputPath):
+        output_dicts = []
+        for event in events:
+            df = self.read_csv(event=event)
+            S = df.to_dict()
+            S["storename"] = event
+            output_dicts.append(S)
+        return output_dicts
+
+    def save(self, output_dicts, outputPath):
+        for S in output_dicts:
+            event = S.pop("storename")
+            df = pd.DataFrame.from_dict(S)
+            self.save_to_hdf5(df=df, event=event, outputPath=outputPath)
diff --git a/src/guppy/extractors/doric_recording_extractor.py b/src/guppy/extractors/doric_recording_extractor.py
index e5a97cb..2966ec6 100644
--- a/src/guppy/extractors/doric_recording_extractor.py
+++ b/src/guppy/extractors/doric_recording_extractor.py
@@ -15,18 +15,12 @@
 
 def execute_import_doric(folder_path, storesList, outputPath):
     extractor = DoricRecordingExtractor(folder_path=folder_path)
-    flag = extractor.check_doric(folder_path)
-
-    if flag == "doric_csv":
-        extractor.read_doric_csv(folder_path, storesList, outputPath)
-    elif flag == "doric_doric":
-        extractor.read_doric_doric(folder_path, storesList, outputPath)
-    else:
-        logger.error("Doric file not found or not recognized.")
-        raise FileNotFoundError("Doric file not found or not recognized.")
+    output_dicts = extractor.read(storesList=storesList)
+    extractor.save(output_dicts=output_dicts, outputPath=outputPath)
 
 
 class DoricRecordingExtractor:
+    # TODO: consolidate duplicate flag logic between the `__init__` and the `check_doric` method.
 
     def __init__(self, folder_path):
         self.folder_path = folder_path
@@ -110,9 +104,9 @@ def separate_last_element(self, arr):
         l = arr[-1]
         return arr[:-1], l
 
-    def check_doric(self, filepath):
+    def check_doric(self):
         logger.debug("Checking if doric file exists")
-        path = glob.glob(os.path.join(filepath, "*.csv")) + glob.glob(os.path.join(filepath, "*.doric"))
+        path = glob.glob(os.path.join(self.folder_path, "*.csv")) + glob.glob(os.path.join(self.folder_path, "*.doric"))
 
         flag_arr = []
         for i in range(len(path)):
@@ -141,44 +135,50 @@ def check_doric(self, filepath):
         logger.info("Doric file found.")
         return flag_arr[0]
 
-    def read_doric_csv(self, filepath, storesList, outputPath):
-        path = glob.glob(os.path.join(filepath, "*.csv"))
+    def read_doric_csv(self, storesList):
+        path = glob.glob(os.path.join(self.folder_path, "*.csv"))
         if len(path) > 1:
             logger.error("An error occurred : More than one Doric csv file present at the location")
             raise Exception("More than one Doric csv file present at the location")
-        else:
-            df = pd.read_csv(path[0], header=1, index_col=False)
-            df = df.dropna(axis=1, how="all")
-            df = df.dropna(axis=0, how="any")
-            df["Time(s)"] = df["Time(s)"] - df["Time(s)"].to_numpy()[0]
-            for i in range(storesList.shape[1]):
-                if "control" in storesList[1, i] or "signal" in storesList[1, i]:
-                    timestamps = np.array(df["Time(s)"])
-                    sampling_rate = np.array([1 / (timestamps[-1] - timestamps[-2])])
-                    write_hdf5(sampling_rate, storesList[0, i], outputPath, "sampling_rate")
-                    write_hdf5(df["Time(s)"].to_numpy(), storesList[0, i], outputPath, "timestamps")
-                    write_hdf5(df[storesList[0, i]].to_numpy(), storesList[0, i], outputPath, "data")
-                else:
-                    ttl = df[storesList[0, i]]
-                    indices = np.where(ttl <= 0)[0]
-                    diff_indices = np.where(np.diff(indices) > 1)[0]
-                    write_hdf5(
-                        df["Time(s)"][indices[diff_indices] + 1].to_numpy(), storesList[0, i], outputPath, "timestamps"
-                    )
-
-    def read_doric_doric(self, filepath, storesList, outputPath):
-        path = glob.glob(os.path.join(filepath, "*.doric"))
+
+        df = pd.read_csv(path[0], header=1, index_col=False)
+        df = df.dropna(axis=1, how="all")
+        df = df.dropna(axis=0, how="any")
+        df["Time(s)"] = df["Time(s)"] - df["Time(s)"].to_numpy()[0]
+
+        output_dicts = []
+        for i in range(storesList.shape[1]):
+            if "control" in storesList[1, i] or "signal" in storesList[1, i]:
+                timestamps = np.array(df["Time(s)"])
+                sampling_rate = np.array([1 / (timestamps[-1] - timestamps[-2])])
+                data = np.array(df[storesList[0, i]])
+                storename = storesList[0, i]
+                S = {"storename": storename, "sampling_rate": sampling_rate, "timestamps": timestamps, "data": data}
+                output_dicts.append(S)
+            else:
+                ttl = df[storesList[0, i]]
+                indices = np.where(ttl <= 0)[0]
+                diff_indices = np.where(np.diff(indices) > 1)[0]
+                timestamps = df["Time(s)"][indices[diff_indices] + 1].to_numpy()
+                storename = storesList[0, i]
+                S = {"storename": storename, "timestamps": timestamps}
+                output_dicts.append(S)
+
+        return output_dicts
+
+    def read_doric_doric(self, storesList):
+        path = glob.glob(os.path.join(self.folder_path, "*.doric"))
         if len(path) > 1:
             logger.error("An error occurred : More than one Doric file present at the location")
             raise Exception("More than one Doric file present at the location")
-        else:
-            with h5py.File(path[0], "r") as f:
-                if "Traces" in list(f.keys()):
-                    keys = self.access_data_doricV1(f, storesList, outputPath)
-                elif list(f.keys()) == ["Configurations", "DataAcquisition"]:
-                    keys = self.access_data_doricV6(f, storesList, outputPath)
+        with h5py.File(path[0], "r") as f:
+            if "Traces" in list(f.keys()):
+                output_dicts = self.access_data_doricV1(f, storesList)
+            elif list(f.keys()) == ["Configurations", "DataAcquisition"]:
+                output_dicts = self.access_data_doricV6(f, storesList)
+        return output_dicts
 
-    def access_data_doricV6(self, doric_file, storesList, outputPath):
+    def access_data_doricV6(self, doric_file, storesList):
         data = [doric_file["DataAcquisition"]]
         res = []
         while len(data) != 0:
@@ -201,6 +201,7 @@ def access_data_doricV6(self, doric_file, storesList, outputPath):
                 if f"{sep_values[-2]}/{sep_values[-1]}" in storesList[0, :]:
                     decide_path.append(element)
 
+        output_dicts = []
         for i in range(storesList.shape[1]):
             if "control" in storesList[1, i] or "signal" in storesList[1, i]:
                 regex = re.compile("(.*?)" + str(storesList[0, i]) + "(.*?)")
@@ -212,9 +213,9 @@ def access_data_doricV6(self, doric_file, storesList, outputPath):
                 data = np.array(doric_file[decide_path[idx]])
                 timestamps = np.array(doric_file[decide_path[idx].rsplit("/", 1)[0] + "/Time"])
                 sampling_rate = np.array([1 / (timestamps[-1] - timestamps[-2])])
-                write_hdf5(sampling_rate, storesList[0, i], outputPath, "sampling_rate")
-                write_hdf5(timestamps, storesList[0, i], outputPath, "timestamps")
-                write_hdf5(data, storesList[0, i], outputPath, "data")
+                storename = storesList[0, i]
+                S = {"storename": storename, "sampling_rate": sampling_rate, "timestamps": timestamps, "data": data}
+                output_dicts.append(S)
             else:
                 regex = re.compile("(.*?)" + storesList[0, i] + "$")
                 idx = [i for i in range(len(decide_path)) if regex.match(decide_path[i])]
@@ -226,21 +227,57 @@ def access_data_doricV6(self, doric_file, storesList, outputPath):
                 timestamps = np.array(doric_file[decide_path[idx].rsplit("/", 1)[0] + "/Time"])
                 indices = np.where(ttl <= 0)[0]
                 diff_indices = np.where(np.diff(indices) > 1)[0]
-                write_hdf5(timestamps[indices[diff_indices] + 1], storesList[0, i], outputPath, "timestamps")
+                timestamps = timestamps[indices[diff_indices] + 1]
+                storename = storesList[0, i]
+                S = {"storename": storename, "timestamps": timestamps}
+                output_dicts.append(S)
 
-    def access_data_doricV1(self, doric_file, storesList, outputPath):
+        return output_dicts
+
+    def access_data_doricV1(self, doric_file, storesList):
         keys = list(doric_file["Traces"]["Console"].keys())
+        output_dicts = []
         for i in range(storesList.shape[1]):
             if "control" in storesList[1, i] or "signal" in storesList[1, i]:
                 timestamps = np.array(doric_file["Traces"]["Console"]["Time(s)"]["Console_time(s)"])
                 sampling_rate = np.array([1 / (timestamps[-1] - timestamps[-2])])
                 data = np.array(doric_file["Traces"]["Console"][storesList[0, i]][storesList[0, i]])
-                write_hdf5(sampling_rate, storesList[0, i], outputPath, "sampling_rate")
-                write_hdf5(timestamps, storesList[0, i], outputPath, "timestamps")
-                write_hdf5(data, storesList[0, i], outputPath, "data")
+                storename = storesList[0, i]
+                S = {"storename": storename, "sampling_rate": sampling_rate, "timestamps": timestamps, "data": data}
+                output_dicts.append(S)
             else:
                 timestamps = np.array(doric_file["Traces"]["Console"]["Time(s)"]["Console_time(s)"])
                 ttl = np.array(doric_file["Traces"]["Console"][storesList[0, i]][storesList[0, i]])
                 indices = np.where(ttl <= 0)[0]
                 diff_indices = np.where(np.diff(indices) > 1)[0]
-                write_hdf5(timestamps[indices[diff_indices] + 1], storesList[0, i], outputPath, "timestamps")
+                timestamps = timestamps[indices[diff_indices] + 1]
+                storename = storesList[0, i]
+                S = {"storename": storename, "timestamps": timestamps}
+                output_dicts.append(S)
+
+        return output_dicts
+
+    def save_dict_to_hdf5(self, S, outputPath):
+        event = S["storename"]
+        write_hdf5(S["timestamps"], event, outputPath, "timestamps")
+
+        if "sampling_rate" in S:
+            write_hdf5(S["sampling_rate"], event, outputPath, "sampling_rate")
+        if "data" in S:
+            write_hdf5(S["data"], event, outputPath, "data")
+
+    def read(self, storesList):
+        flag = self.check_doric()
+        if flag == "doric_csv":
+            output_dicts = self.read_doric_csv(storesList)
+        elif flag == "doric_doric":
+            output_dicts = self.read_doric_doric(storesList)
+        else:
+            logger.error("Doric file not found or not recognized.")
+            raise FileNotFoundError("Doric file not found or not recognized.")
+
+        return output_dicts
+
+    def save(self, output_dicts, outputPath):
+        for S in output_dicts:
+            self.save_dict_to_hdf5(S=S, outputPath=outputPath)
diff --git a/src/guppy/extractors/npm_recording_extractor.py b/src/guppy/extractors/npm_recording_extractor.py
index a8cfd98..bc9b210 100644
--- a/src/guppy/extractors/npm_recording_extractor.py
+++ b/src/guppy/extractors/npm_recording_extractor.py
@@ -24,13 +24,13 @@ def execute_import_npm(folder_path, num_ch, inputParameters, events, outputPath,
     extractor = NpmRecordingExtractor(folder_path=folder_path, num_ch=num_ch, inputParameters=inputParameters)
     start = time.time()
     with mp.Pool(numProcesses) as p:
-        p.starmap(read_npm_and_save_hdf5, zip(repeat(extractor), events, repeat(outputPath)))
+        p.starmap(read_and_save_npm, zip(repeat(extractor), events, repeat(outputPath)))
     logger.info("Time taken = {0:.5f}".format(time.time() - start))
 
 
-def read_npm_and_save_hdf5(extractor, event, outputPath):
-    df = extractor.read_npm(event=event)
-    extractor.save_to_hdf5(df=df, event=event, outputPath=outputPath)
+def read_and_save_npm(extractor, event, outputPath):
+    output_dicts = extractor.read(events=[event], outputPath=outputPath)
+    extractor.save(output_dicts=output_dicts, outputPath=outputPath)
     logger.info("Data for event {} fetched and stored.".format(event))
 
 
@@ -488,3 +488,18 @@ def save_to_hdf5(self, df, event, outputPath):
             write_hdf5(df[key[i]].dropna(), event, outputPath, key[i].lower())
 
         logger.info("\033[1m" + "Reading data for {} from csv file is completed.".format(event) + "\033[0m")
+
+    def read(self, events, outputPath):
+        output_dicts = []
+        for event in events:
+            df = self.read_npm(event=event)
+            S = df.to_dict()
+            S["storename"] = event
+            output_dicts.append(S)
+        return output_dicts
+
+    def save(self, output_dicts, outputPath):
+        for S in output_dicts:
+            event = S.pop("storename")
+            df = pd.DataFrame.from_dict(S)
+            self.save_to_hdf5(df=df, event=event, outputPath=outputPath)

From a633550144b26b2ed6cc1a4d86696f1296a6e9f1 Mon Sep 17 00:00:00 2001
From: Paul Adkisson-Floro <paul.wesley.adkisson@gmail.com>
Date: Wed, 3 Dec 2025 13:18:21 -0500
Subject: [PATCH 043/150] Remove tkinter from NPM (#189)

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 .../extractors/npm_recording_extractor.py     | 293 ++++++++++--------
 src/guppy/saveStoresList.py                   | 118 +++++++
 src/guppy/testing/api.py                      |  78 +++--
 tests/test_step2.py                           |  19 +-
 tests/test_step3.py                           |  22 +-
 tests/test_step4.py                           |  26 +-
 tests/test_step5.py                           |  30 +-
 7 files changed, 387 insertions(+), 199 deletions(-)

diff --git a/src/guppy/extractors/npm_recording_extractor.py b/src/guppy/extractors/npm_recording_extractor.py
index bc9b210..ae4f540 100644
--- a/src/guppy/extractors/npm_recording_extractor.py
+++ b/src/guppy/extractors/npm_recording_extractor.py
@@ -3,9 +3,7 @@
 import multiprocessing as mp
 import os
 import time
-import tkinter as tk
 from itertools import repeat
-from tkinter import StringVar, messagebox, ttk
 
 import numpy as np
 import pandas as pd
@@ -36,7 +34,7 @@ def read_and_save_npm(extractor, event, outputPath):
 
 class NpmRecordingExtractor:
 
-    def __init__(self, folder_path, num_ch, inputParameters=None):
+    def __init__(self, folder_path, num_ch, inputParameters=None):  # TODO: make inputParameters mandatory
         self.folder_path = folder_path
         self.num_ch = num_ch
         self.inputParameters = inputParameters
@@ -44,18 +42,70 @@ def __init__(self, folder_path, num_ch, inputParameters=None):
             folder_path=folder_path, num_ch=num_ch, inputParameters=inputParameters
         )
 
+    @classmethod
+    def has_multiple_event_ttls(cls, folder_path):
+        path = sorted(glob.glob(os.path.join(folder_path, "*.csv")))
+        path_chev = glob.glob(os.path.join(folder_path, "*chev*"))
+        path_chod = glob.glob(os.path.join(folder_path, "*chod*"))
+        path_chpr = glob.glob(os.path.join(folder_path, "*chpr*"))
+        path_event = glob.glob(os.path.join(folder_path, "event*"))
+        path_chev_chod_event = path_chev + path_chod + path_event + path_chpr
+
+        path = sorted(list(set(path) - set(path_chev_chod_event)))
+        multiple_event_ttls = []
+        for i in range(len(path)):
+            df = pd.read_csv(path[i], index_col=False)
+            _, value = cls.check_header(df)
+
+            # check dataframe structure and read data accordingly
+            if len(value) > 0:
+                columns_isstr = False
+                df = pd.read_csv(path[i], header=None)
+                cols = np.array(list(df.columns), dtype=str)
+            else:
+                columns_isstr = True
+                cols = np.array(list(df.columns), dtype=str)
+            if len(cols) == 2:
+                flag = "event_or_data_np"
+            elif len(cols) > 2:
+                flag = "data_np"
+            else:
+                logger.error("Number of columns in csv file does not make sense.")
+                raise Exception("Number of columns in csv file does not make sense.")
+
+            # used assigned flags to process the files and read the data
+            if flag == "event_or_data_np":
+                arr = list(df.iloc[:, 1])
+                check_float = [True for i in arr if isinstance(i, float)]
+                if len(arr) == len(check_float) and columns_isstr == False:
+                    flag = "data_np"
+                elif columns_isstr == True and ("value" in np.char.lower(np.array(cols))):
+                    flag = "event_np"
+                else:
+                    flag = "event_np"
+
+            if flag == "event_np":
+                type_val = np.array(df.iloc[:, 1])
+                type_val_unique = np.unique(type_val)
+                if len(type_val_unique) > 1:
+                    multiple_event_ttls.append(True)
+                else:
+                    multiple_event_ttls.append(False)
+            else:
+                multiple_event_ttls.append(False)
+
+        return multiple_event_ttls
+
     def import_npm(self, folder_path, num_ch, inputParameters=None):
 
         logger.debug("If it exists, importing NPM file based on the structure of file")
         # Headless configuration (used to avoid any UI prompts when running tests)
         headless = bool(os.environ.get("GUPPY_BASE_DIR"))
-        npm_timestamp_column_name = None
-        npm_time_unit = None
-        npm_split_events = None
         if isinstance(inputParameters, dict):
-            npm_timestamp_column_name = inputParameters.get("npm_timestamp_column_name")
-            npm_time_unit = inputParameters.get("npm_time_unit", "seconds")
-            npm_split_events = inputParameters.get("npm_split_events", True)
+            npm_timestamp_column_names = inputParameters.get("npm_timestamp_column_names")
+            npm_time_units = inputParameters.get("npm_time_units")
+            # TODO: come up with a better name for npm_split_events that can be appropriately pluralized for a list
+            npm_split_events = inputParameters.get("npm_split_events")
         path = sorted(glob.glob(os.path.join(folder_path, "*.csv"))) + sorted(
             glob.glob(os.path.join(folder_path, "*.doric"))
         )
@@ -71,6 +121,20 @@ def import_npm(self, folder_path, num_ch, inputParameters=None):
         event_from_filename = []
         flag_arr = []
         for i in range(len(path)):
+            # TODO: validate npm_timestamp_column_names, npm_time_units, npm_split_events lengths
+            if npm_timestamp_column_names is None:
+                npm_timestamp_column_name = None
+            else:
+                npm_timestamp_column_name = npm_timestamp_column_names[i]
+            if npm_time_units is None:
+                npm_time_unit = "seconds"
+            else:
+                npm_time_unit = npm_time_units[i]
+            if npm_split_events is None:
+                split_events = False
+            else:
+                split_events = npm_split_events[i]
+
             dirname = os.path.dirname(path[i])
             ext = os.path.basename(path[i]).split(".")[-1]
             assert ext != "doric", "Doric files are not supported by import_npm function."
@@ -103,7 +167,7 @@ def import_npm(self, folder_path, num_ch, inputParameters=None):
             assert len(cols) != 3, "File appears to be data .csv. This function only supports NPM .csv files."
             if len(cols) == 2:
                 flag = "event_or_data_np"
-            elif len(cols) >= 2:
+            elif len(cols) > 2:
                 flag = "data_np"
             else:
                 logger.error("Number of columns in csv file does not make sense.")
@@ -150,23 +214,7 @@ def import_npm(self, folder_path, num_ch, inputParameters=None):
             elif flag == "event_np":
                 type_val = np.array(df.iloc[:, 1])
                 type_val_unique = np.unique(type_val)
-                if headless:
-                    response = 1 if bool(npm_split_events) else 0
-                else:
-                    window = tk.Tk()
-                    if len(type_val_unique) > 1:
-                        response = messagebox.askyesno(
-                            "Multiple event TTLs",
-                            "Based on the TTL file,\
-                                                                            it looks like TTLs \
-                                                                            belongs to multiple behavior type. \
-                                                                            Do you want to create multiple files for each \
-                                                                            behavior type ?",
-                        )
-                    else:
-                        response = 0
-                    window.destroy()
-                if response == 1:
+                if split_events:
                     timestamps = np.array(df.iloc[:, 0])
                     for j in range(len(type_val_unique)):
                         idx = np.where(type_val == type_val_unique[j])
@@ -184,9 +232,8 @@ def import_npm(self, folder_path, num_ch, inputParameters=None):
                     event_from_filename.append("event" + str(0))
             else:
                 file = f"file{str(i)}_"
-                df, ts_unit = self.decide_ts_unit_for_npm(
-                    df, timestamp_column_name=npm_timestamp_column_name, time_unit=npm_time_unit, headless=headless
-                )
+                ts_unit = npm_time_unit
+                df = self.update_df_with_timestamp_columns(df, timestamp_column_name=npm_timestamp_column_name)
                 df, indices_dict, _ = self.decide_indices(file, df, flag)
                 keys = list(indices_dict.keys())
                 for k in range(len(keys)):
@@ -270,7 +317,8 @@ def import_npm(self, folder_path, num_ch, inputParameters=None):
         logger.info("Importing of NPM file is done.")
         return event_from_filename, flag_arr
 
-    def check_header(self, df):
+    @classmethod
+    def check_header(cls, df):
         arr = list(df.columns)
         check_float = []
         for i in arr:
@@ -283,7 +331,8 @@ def check_header(self, df):
 
     # function to decide indices of interleaved channels
     # in neurophotometrics data
-    def decide_indices(self, file, df, flag, num_ch=2):
+    @classmethod
+    def decide_indices(cls, file, df, flag, num_ch=2):
         ch_name = [file + "chev", file + "chod", file + "chpr"]
         if len(ch_name) < num_ch:
             logger.error(
@@ -319,7 +368,7 @@ def decide_indices(self, file, df, flag, num_ch=2):
                                 data but column names does not have Flags or LedState"
                 )
 
-            num_ch, ch = self.check_channels(state)
+            num_ch, ch = cls.check_channels(state)
             indices_dict = dict()
             for i in range(num_ch):
                 first_occurrence = np.where(state == ch[i])[0]
@@ -330,7 +379,8 @@ def decide_indices(self, file, df, flag, num_ch=2):
         return df, indices_dict, num_ch
 
     # check flag consistency in neurophotometrics data
-    def check_channels(self, state):
+    @classmethod
+    def check_channels(cls, state):
         state = state.astype(int)
         unique_state = np.unique(state[2:12])
         if unique_state.shape[0] > 3:
@@ -345,105 +395,94 @@ def check_channels(self, state):
 
         return unique_state.shape[0], unique_state
 
-    # function to decide NPM timestamps unit (seconds, ms or us)
-    def decide_ts_unit_for_npm(self, df, timestamp_column_name=None, time_unit=None, headless=False):
-        col_names = np.array(list(df.columns))
+    @classmethod
+    def needs_ts_unit(cls, folder_path, num_ch):
+        path = sorted(glob.glob(os.path.join(folder_path, "*.csv"))) + sorted(
+            glob.glob(os.path.join(folder_path, "*.doric"))
+        )
+        path_chev = glob.glob(os.path.join(folder_path, "*chev*"))
+        path_chod = glob.glob(os.path.join(folder_path, "*chod*"))
+        path_chpr = glob.glob(os.path.join(folder_path, "*chpr*"))
+        path_event = glob.glob(os.path.join(folder_path, "event*"))
+        # path_sig = glob.glob(os.path.join(filepath, 'sig*')) # TODO: what is this for?
+        path_chev_chod_event = path_chev + path_chod + path_event + path_chpr
+
+        path = sorted(list(set(path) - set(path_chev_chod_event)))
+        ts_unit_needs = []
         col_names_ts = [""]
-        for name in col_names:
-            if "timestamp" in name.lower():
-                col_names_ts.append(name)
+        for i in range(len(path)):
+            df = pd.read_csv(path[i], index_col=False)
+            _, value = cls.check_header(df)
 
-        ts_unit = "seconds"
-        if len(col_names_ts) > 2:
-            # Headless path: auto-select column/unit without any UI
-            if headless:
-                if timestamp_column_name is not None:
-                    assert (
-                        timestamp_column_name in col_names_ts
-                    ), f"Provided timestamp_column_name '{timestamp_column_name}' not found in columns {col_names_ts[1:]}"
-                    chosen = timestamp_column_name
-                else:
-                    chosen = col_names_ts[1]
-                df.insert(1, "Timestamp", df[chosen])
-                df = df.drop(col_names_ts[1:], axis=1)
-                valid_units = {"seconds", "milliseconds", "microseconds"}
-                ts_unit = time_unit if (isinstance(time_unit, str) and time_unit in valid_units) else "seconds"
-                return df, ts_unit
-            # def comboBoxSelected(event):
-            #    logger.info(event.widget.get())
-
-            window = tk.Tk()
-            window.title("Select appropriate options for timestamps")
-            window.geometry("500x200")
-            holdComboboxValues = dict()
-
-            timestamps_label = ttk.Label(window, text="Select which timestamps to use : ").grid(
-                row=0, column=1, pady=25, padx=25
-            )
-            holdComboboxValues["timestamps"] = StringVar()
-            timestamps_combo = ttk.Combobox(window, values=col_names_ts, textvariable=holdComboboxValues["timestamps"])
-            timestamps_combo.grid(row=0, column=2, pady=25, padx=25)
-            timestamps_combo.current(0)
-            # timestamps_combo.bind("<<ComboboxSelected>>", comboBoxSelected)
-
-            time_unit_label = ttk.Label(window, text="Select timestamps unit : ").grid(
-                row=1, column=1, pady=25, padx=25
-            )
-            holdComboboxValues["time_unit"] = StringVar()
-            time_unit_combo = ttk.Combobox(
-                window,
-                values=["", "seconds", "milliseconds", "microseconds"],
-                textvariable=holdComboboxValues["time_unit"],
-            )
-            time_unit_combo.grid(row=1, column=2, pady=25, padx=25)
-            time_unit_combo.current(0)
-            # time_unit_combo.bind("<<ComboboxSelected>>", comboBoxSelected)
-            window.lift()
-            window.after(500, lambda: window.lift())
-            window.mainloop()
-
-            if holdComboboxValues["timestamps"].get():
-                df.insert(1, "Timestamp", df[holdComboboxValues["timestamps"].get()])
-                df = df.drop(col_names_ts[1:], axis=1)
+            # check dataframe structure and read data accordingly
+            if len(value) > 0:
+                df = pd.read_csv(path[i], header=None)
+                cols = np.array(list(df.columns), dtype=str)
+                columns_isstr = False
             else:
-                messagebox.showerror(
-                    "All options not selected",
-                    "All the options for timestamps \
-                                                                were not selected. Please select appropriate options",
-                )
-                logger.error(
-                    "All the options for timestamps \
-                            were not selected. Please select appropriate options"
-                )
-                raise Exception(
-                    "All the options for timestamps \
-                                were not selected. Please select appropriate options"
-                )
-            if holdComboboxValues["time_unit"].get():
-                if holdComboboxValues["time_unit"].get() == "seconds":
-                    ts_unit = holdComboboxValues["time_unit"].get()
-                elif holdComboboxValues["time_unit"].get() == "milliseconds":
-                    ts_unit = holdComboboxValues["time_unit"].get()
+                columns_isstr = True
+                cols = np.array(list(df.columns), dtype=str)
+            # check the structure of dataframe and assign flag to the type of file
+            if len(cols) == 2:
+                flag = "event_or_data_np"
+            elif len(cols) > 2:
+                flag = "data_np"
+            else:
+                logger.error("Number of columns in csv file does not make sense.")
+                raise Exception("Number of columns in csv file does not make sense.")
+
+            if columns_isstr == True and (
+                "flags" in np.char.lower(np.array(cols)) or "ledstate" in np.char.lower(np.array(cols))
+            ):
+                flag = flag + "_v2"
+
+            # used assigned flags to process the files and read the data
+            if flag == "event_or_data_np":
+                arr = list(df.iloc[:, 1])
+                check_float = [True for i in arr if isinstance(i, float)]
+                if len(arr) == len(check_float) and columns_isstr == False:
+                    flag = "data_np"
+                elif columns_isstr == True and ("value" in np.char.lower(np.array(cols))):
+                    flag = "event_np"
                 else:
-                    ts_unit = holdComboboxValues["time_unit"].get()
+                    flag = "event_np"
+
+            if flag == "data_np":
+                file = f"file{str(i)}_"
+                df, _, _ = cls.decide_indices(file, df, flag, num_ch)
+
+            if flag == "event_np" or flag == "data_np":
+                ts_unit_needs.append(False)
+                continue
+
+            col_names = np.array(list(df.columns))
+            for name in col_names:
+                if "timestamp" in name.lower():
+                    col_names_ts.append(name)
+
+            if len(col_names_ts) > 2:
+                ts_unit_needs.append(True)
             else:
-                messagebox.showerror(
-                    "All options not selected",
-                    "All the options for timestamps \
-                                                                were not selected. Please select appropriate options",
-                )
-                logger.error(
-                    "All the options for timestamps \
-                            were not selected. Please select appropriate options"
-                )
-                raise Exception(
-                    "All the options for timestamps \
-                                were not selected. Please select appropriate options"
-                )
-        else:
-            pass
+                ts_unit_needs.append(False)
 
-        return df, ts_unit
+        return ts_unit_needs, col_names_ts
+
+    def update_df_with_timestamp_columns(self, df, timestamp_column_name):
+        col_names = np.array(list(df.columns))
+        col_names_ts = [""]
+        for name in col_names:
+            if "timestamp" in name.lower():
+                col_names_ts.append(name)
+        if len(col_names_ts) <= 2:
+            return df
+
+        timestamp_column_name = timestamp_column_name if timestamp_column_name is not None else col_names_ts[1]
+        assert (
+            timestamp_column_name in col_names_ts
+        ), f"Provided timestamp_column_name '{timestamp_column_name}' not found in columns {col_names_ts[1:]}"
+        df.insert(1, "Timestamp", df[timestamp_column_name])
+        df = df.drop(col_names_ts[1:], axis=1)
+        return df
 
     def read_npm(self, event):
         logger.debug("\033[1m" + "Trying to read data for {} from csv file.".format(event) + "\033[0m")
diff --git a/src/guppy/saveStoresList.py b/src/guppy/saveStoresList.py
index daf7457..552d76c 100755
--- a/src/guppy/saveStoresList.py
+++ b/src/guppy/saveStoresList.py
@@ -9,8 +9,10 @@
 import logging
 import os
 import socket
+import tkinter as tk
 from pathlib import Path
 from random import randint
+from tkinter import StringVar, messagebox, ttk
 
 import holoviews as hv
 import numpy as np
@@ -602,6 +604,23 @@ def execute(inputParameters):
                 flag = extractor.flags
 
             elif modality == "npm":
+                headless = bool(os.environ.get("GUPPY_BASE_DIR"))
+                if not headless:
+                    # Resolve multiple event TTLs
+                    multiple_event_ttls = NpmRecordingExtractor.has_multiple_event_ttls(folder_path=filepath)
+                    responses = get_multi_event_responses(multiple_event_ttls)
+                    inputParameters["npm_split_events"] = responses
+
+                    # Resolve timestamp units and columns
+                    ts_unit_needs, col_names_ts = NpmRecordingExtractor.needs_ts_unit(
+                        folder_path=filepath, num_ch=num_ch
+                    )
+                    ts_units, npm_timestamp_column_names = get_timestamp_configuration(ts_unit_needs, col_names_ts)
+                    inputParameters["npm_time_units"] = ts_units if ts_units else None
+                    inputParameters["npm_timestamp_column_names"] = (
+                        npm_timestamp_column_names if npm_timestamp_column_names else None
+                    )
+
                 data = 0
                 extractor = NpmRecordingExtractor(folder_path=filepath, num_ch=num_ch, inputParameters=inputParameters)
                 event_name = extractor.events
@@ -614,3 +633,102 @@ def execute(inputParameters):
     except Exception as e:
         logger.error(str(e))
         raise e
+
+
+def get_multi_event_responses(multiple_event_ttls):
+    responses = []
+    for has_multiple in multiple_event_ttls:
+        if not has_multiple:
+            responses.append(False)
+            continue
+        window = tk.Tk()
+        response = messagebox.askyesno(
+            "Multiple event TTLs",
+            (
+                "Based on the TTL file, "
+                "it looks like TTLs "
+                "belong to multiple behavior types. "
+                "Do you want to create multiple files for each "
+                "behavior type?"
+            ),
+        )
+        window.destroy()
+        responses.append(response)
+    return responses
+
+
+def get_timestamp_configuration(ts_unit_needs, col_names_ts):
+    ts_units, npm_timestamp_column_names = [], []
+    for need in ts_unit_needs:
+        if not need:
+            ts_units.append("seconds")
+            npm_timestamp_column_names.append(None)
+            continue
+        window = tk.Tk()
+        window.title("Select appropriate options for timestamps")
+        window.geometry("500x200")
+        holdComboboxValues = dict()
+
+        timestamps_label = ttk.Label(window, text="Select which timestamps to use : ").grid(
+            row=0, column=1, pady=25, padx=25
+        )
+        holdComboboxValues["timestamps"] = StringVar()
+        timestamps_combo = ttk.Combobox(window, values=col_names_ts, textvariable=holdComboboxValues["timestamps"])
+        timestamps_combo.grid(row=0, column=2, pady=25, padx=25)
+        timestamps_combo.current(0)
+        # timestamps_combo.bind("<<ComboboxSelected>>", comboBoxSelected)
+
+        time_unit_label = ttk.Label(window, text="Select timestamps unit : ").grid(row=1, column=1, pady=25, padx=25)
+        holdComboboxValues["time_unit"] = StringVar()
+        time_unit_combo = ttk.Combobox(
+            window,
+            values=["", "seconds", "milliseconds", "microseconds"],
+            textvariable=holdComboboxValues["time_unit"],
+        )
+        time_unit_combo.grid(row=1, column=2, pady=25, padx=25)
+        time_unit_combo.current(0)
+        # time_unit_combo.bind("<<ComboboxSelected>>", comboBoxSelected)
+        window.lift()
+        window.after(500, lambda: window.lift())
+        window.mainloop()
+
+        if holdComboboxValues["timestamps"].get():
+            npm_timestamp_column_name = holdComboboxValues["timestamps"].get()
+        else:
+            messagebox.showerror(
+                "All options not selected",
+                "All the options for timestamps \
+                                                            were not selected. Please select appropriate options",
+            )
+            logger.error(
+                "All the options for timestamps \
+                        were not selected. Please select appropriate options"
+            )
+            raise Exception(
+                "All the options for timestamps \
+                            were not selected. Please select appropriate options"
+            )
+        if holdComboboxValues["time_unit"].get():
+            if holdComboboxValues["time_unit"].get() == "seconds":
+                ts_unit = holdComboboxValues["time_unit"].get()
+            elif holdComboboxValues["time_unit"].get() == "milliseconds":
+                ts_unit = holdComboboxValues["time_unit"].get()
+            else:
+                ts_unit = holdComboboxValues["time_unit"].get()
+        else:
+            messagebox.showerror(
+                "All options not selected",
+                "All the options for timestamps \
+                                                            were not selected. Please select appropriate options",
+            )
+            logger.error(
+                "All the options for timestamps \
+                        were not selected. Please select appropriate options"
+            )
+            raise Exception(
+                "All the options for timestamps \
+                            were not selected. Please select appropriate options"
+            )
+        ts_units.append(ts_unit)
+        npm_timestamp_column_names.append(npm_timestamp_column_name)
+    return ts_units, npm_timestamp_column_names
diff --git a/src/guppy/testing/api.py b/src/guppy/testing/api.py
index d7e390d..c647907 100644
--- a/src/guppy/testing/api.py
+++ b/src/guppy/testing/api.py
@@ -69,9 +69,9 @@ def step2(
     selected_folders: Iterable[str],
     storenames_map: dict[str, str],
     modality: str = "tdt",
-    npm_timestamp_column_name: str | None = None,
-    npm_time_unit: str = "seconds",
-    npm_split_events: bool = True,
+    npm_timestamp_column_names: list[str | None] | None = None,
+    npm_time_units: list[str] | None = None,
+    npm_split_events: list[bool] | None = None,
 ) -> None:
     """
     Run pipeline Step 2 (Save Storenames) via the actual Panel-backed logic.
@@ -94,6 +94,14 @@ def step2(
     storenames_map : dict[str, str]
         Mapping from raw storenames (e.g., "Dv1A") to semantic names
         (e.g., "control_DMS"). Insertion order is preserved.
+    modality : str
+        Data acquisition modality (e.g., 'tdt', 'csv', 'doric', 'npm').
+    npm_timestamp_column_names : list[str | None] | None
+        List of timestamp column names for NPM files, one per CSV file. None if not applicable.
+    npm_time_units : list[str] | None
+        List of time units for NPM files, one per CSV file (e.g., 'seconds', 'milliseconds'). None if not applicable.
+    npm_split_events : list[bool] | None
+        List of booleans indicating whether to split events for NPM files, one per CSV file. None if not applicable.
 
     Raises
     ------
@@ -155,8 +163,8 @@ def step2(
     input_params["modality"] = modality
 
     # Add npm parameters
-    input_params["npm_timestamp_column_name"] = npm_timestamp_column_name
-    input_params["npm_time_unit"] = npm_time_unit
+    input_params["npm_timestamp_column_names"] = npm_timestamp_column_names
+    input_params["npm_time_units"] = npm_time_units
     input_params["npm_split_events"] = npm_split_events
 
     # Call the underlying Step 2 executor (now headless-aware)
@@ -168,9 +176,9 @@ def step3(
     base_dir: str,
     selected_folders: Iterable[str],
     modality: str = "tdt",
-    npm_timestamp_column_name: str | None = None,
-    npm_time_unit: str = "seconds",
-    npm_split_events: bool = True,
+    npm_timestamp_column_names: list[str | None] | None = None,
+    npm_time_units: list[str] | None = None,
+    npm_split_events: list[bool] | None = None,
 ) -> None:
     """
     Run pipeline Step 3 (Read Raw Data) via the actual Panel-backed logic, headlessly.
@@ -188,6 +196,14 @@ def step3(
         must reside directly under this path.
     selected_folders : Iterable[str]
         Absolute paths to the session directories to process.
+    modality : str
+        Data acquisition modality (e.g., 'tdt', 'csv', 'doric', 'npm').
+    npm_timestamp_column_names : list[str | None] | None
+        List of timestamp column names for NPM files, one per CSV file. None if not applicable.
+    npm_time_units : list[str] | None
+        List of time units for NPM files, one per CSV file (e.g., 'seconds', 'milliseconds'). None if not applicable.
+    npm_split_events : list[bool] | None
+        List of booleans indicating whether to split events for NPM files, one per CSV file. None if not applicable.
 
     Raises
     ------
@@ -232,9 +248,9 @@ def step3(
     template._widgets["files_1"].value = abs_sessions
     input_params = template._hooks["getInputParameters"]()
 
-    # Inject explicit NPM parameters (match Step 2 style)
-    input_params["npm_timestamp_column_name"] = npm_timestamp_column_name
-    input_params["npm_time_unit"] = npm_time_unit
+    # Inject explicit NPM parameters
+    input_params["npm_timestamp_column_names"] = npm_timestamp_column_names
+    input_params["npm_time_units"] = npm_time_units
     input_params["npm_split_events"] = npm_split_events
 
     # Inject modality
@@ -249,9 +265,9 @@ def step4(
     base_dir: str,
     selected_folders: Iterable[str],
     modality: str = "tdt",
-    npm_timestamp_column_name: str | None = None,
-    npm_time_unit: str = "seconds",
-    npm_split_events: bool = True,
+    npm_timestamp_column_names: list[str | None] | None = None,
+    npm_time_units: list[str] | None = None,
+    npm_split_events: list[bool] | None = None,
 ) -> None:
     """
     Run pipeline Step 4 (Extract timestamps and signal) via the Panel-backed logic, headlessly.
@@ -269,6 +285,14 @@ def step4(
         must reside directly under this path.
     selected_folders : Iterable[str]
         Absolute paths to the session directories to process.
+    modality : str
+        Data acquisition modality (e.g., 'tdt', 'csv', 'doric', 'npm').
+    npm_timestamp_column_names : list[str | None] | None
+        List of timestamp column names for NPM files, one per CSV file. None if not applicable.
+    npm_time_units : list[str] | None
+        List of time units for NPM files, one per CSV file (e.g., 'seconds', 'milliseconds'). None if not applicable.
+    npm_split_events : list[bool] | None
+        List of booleans indicating whether to split events for NPM files, one per CSV file. None if not applicable.
 
     Raises
     ------
@@ -313,9 +337,9 @@ def step4(
     template._widgets["files_1"].value = abs_sessions
     input_params = template._hooks["getInputParameters"]()
 
-    # Inject explicit NPM parameters (match Step 2 style)
-    input_params["npm_timestamp_column_name"] = npm_timestamp_column_name
-    input_params["npm_time_unit"] = npm_time_unit
+    # Inject explicit NPM parameters
+    input_params["npm_timestamp_column_names"] = npm_timestamp_column_names
+    input_params["npm_time_units"] = npm_time_units
     input_params["npm_split_events"] = npm_split_events
 
     # Inject modality
@@ -330,9 +354,9 @@ def step5(
     base_dir: str,
     selected_folders: Iterable[str],
     modality: str = "tdt",
-    npm_timestamp_column_name: str | None = None,
-    npm_time_unit: str = "seconds",
-    npm_split_events: bool = True,
+    npm_timestamp_column_names: list[str | None] | None = None,
+    npm_time_units: list[str] | None = None,
+    npm_split_events: list[bool] | None = None,
 ) -> None:
     """
     Run pipeline Step 5 (PSTH Computation) via the Panel-backed logic, headlessly.
@@ -350,6 +374,14 @@ def step5(
         must reside directly under this path.
     selected_folders : Iterable[str]
         Absolute paths to the session directories to process.
+    modality : str
+        Data acquisition modality (e.g., 'tdt', 'csv', 'doric', 'npm').
+    npm_timestamp_column_names : list[str | None] | None
+        List of timestamp column names for NPM files, one per CSV file. None if not applicable.
+    npm_time_units : list[str] | None
+        List of time units for NPM files, one per CSV file (e.g., 'seconds', 'milliseconds'). None if not applicable.
+    npm_split_events : list[bool] | None
+        List of booleans indicating whether to split events for NPM files, one per CSV file. None if not applicable.
 
     Raises
     ------
@@ -394,9 +426,9 @@ def step5(
     template._widgets["files_1"].value = abs_sessions
     input_params = template._hooks["getInputParameters"]()
 
-    # Inject explicit NPM parameters (match Step 2 style)
-    input_params["npm_timestamp_column_name"] = npm_timestamp_column_name
-    input_params["npm_time_unit"] = npm_time_unit
+    # Inject explicit NPM parameters
+    input_params["npm_timestamp_column_names"] = npm_timestamp_column_names
+    input_params["npm_time_units"] = npm_time_units
     input_params["npm_split_events"] = npm_split_events
 
     # Inject modality
diff --git a/tests/test_step2.py b/tests/test_step2.py
index f7e34d1..6ab85eb 100644
--- a/tests/test_step2.py
+++ b/tests/test_step2.py
@@ -154,16 +154,15 @@ def test_step2(tmp_path, session_subdir, storenames_map, modality):
       - Asserts storesList.csv exists and exactly matches the provided mapping (2xN)
     """
     if session_subdir == "SampleData_Neurophotometrics/sampleData_NPM_3":
-        npm_timestamp_column_name = "ComputerTimestamp"
-        npm_time_unit = "milliseconds"
+        npm_timestamp_column_names = ["ComputerTimestamp", None]
+        npm_time_units = ["milliseconds", "seconds"]
+        npm_split_events = [False, True]
     else:
-        npm_timestamp_column_name = None
-        npm_time_unit = None
+        npm_timestamp_column_names = None
+        npm_time_units = None
+        npm_split_events = [True, True]
     if session_subdir == "SampleData_Neurophotometrics/sampleData_NPM_5":
-        npm_split_events = False
-    else:
-        npm_split_events = True
-
+        npm_split_events = None
     # Source sample data
     src_base_dir = str(Path(".") / "testing_data")
     src_session = os.path.join(src_base_dir, session_subdir)
@@ -193,8 +192,8 @@ def test_step2(tmp_path, session_subdir, storenames_map, modality):
         selected_folders=[str(session_copy)],
         storenames_map=storenames_map,
         modality=modality,
-        npm_timestamp_column_name=npm_timestamp_column_name,
-        npm_time_unit=npm_time_unit,
+        npm_timestamp_column_names=npm_timestamp_column_names,
+        npm_time_units=npm_time_units,
         npm_split_events=npm_split_events,
     )
 
diff --git a/tests/test_step3.py b/tests/test_step3.py
index 26dac14..e4b5150 100644
--- a/tests/test_step3.py
+++ b/tests/test_step3.py
@@ -167,15 +167,15 @@ def test_step3(tmp_path, storenames_map, session_subdir, modality):
       the temp copy (never touching the original sample path).
     """
     if session_subdir == "SampleData_Neurophotometrics/sampleData_NPM_3":
-        npm_timestamp_column_name = "ComputerTimestamp"
-        npm_time_unit = "milliseconds"
+        npm_timestamp_column_names = ["ComputerTimestamp", None]
+        npm_time_units = ["milliseconds", "seconds"]
+        npm_split_events = [False, True]
     else:
-        npm_timestamp_column_name = None
-        npm_time_unit = None
+        npm_timestamp_column_names = None
+        npm_time_units = None
+        npm_split_events = [True, True]
     if session_subdir == "SampleData_Neurophotometrics/sampleData_NPM_5":
-        npm_split_events = False
-    else:
-        npm_split_events = True
+        npm_split_events = None
 
     src_base_dir = str(Path(".") / "testing_data")
     src_session = os.path.join(src_base_dir, session_subdir)
@@ -205,8 +205,8 @@ def test_step3(tmp_path, storenames_map, session_subdir, modality):
         selected_folders=[str(session_copy)],
         storenames_map=storenames_map,
         modality=modality,
-        npm_timestamp_column_name=npm_timestamp_column_name,
-        npm_time_unit=npm_time_unit,
+        npm_timestamp_column_names=npm_timestamp_column_names,
+        npm_time_units=npm_time_units,
         npm_split_events=npm_split_events,
     )
 
@@ -215,8 +215,8 @@ def test_step3(tmp_path, storenames_map, session_subdir, modality):
         base_dir=str(tmp_base),
         selected_folders=[str(session_copy)],
         modality=modality,
-        npm_timestamp_column_name=npm_timestamp_column_name,
-        npm_time_unit=npm_time_unit,
+        npm_timestamp_column_names=npm_timestamp_column_names,
+        npm_time_units=npm_time_units,
         npm_split_events=npm_split_events,
     )
 
diff --git a/tests/test_step4.py b/tests/test_step4.py
index df18f75..8e5f989 100644
--- a/tests/test_step4.py
+++ b/tests/test_step4.py
@@ -185,15 +185,15 @@ def test_step4(tmp_path, monkeypatch, session_subdir, storenames_map, expected_r
       - Assertions confirm creation of key HDF5 outputs expected from Step 4.
     """
     if session_subdir == "SampleData_Neurophotometrics/sampleData_NPM_3":
-        npm_timestamp_column_name = "ComputerTimestamp"
-        npm_time_unit = "milliseconds"
+        npm_timestamp_column_names = ["ComputerTimestamp", None]
+        npm_time_units = ["milliseconds", "seconds"]
+        npm_split_events = [False, True]
     else:
-        npm_timestamp_column_name = None
-        npm_time_unit = None
+        npm_timestamp_column_names = None
+        npm_time_units = None
+        npm_split_events = [True, True]
     if session_subdir == "SampleData_Neurophotometrics/sampleData_NPM_5":
-        npm_split_events = False
-    else:
-        npm_split_events = True
+        npm_split_events = None
 
     # Use the CSV sample session
     src_base_dir = str(Path(".") / "testing_data")
@@ -227,8 +227,8 @@ def test_step4(tmp_path, monkeypatch, session_subdir, storenames_map, expected_r
         selected_folders=[str(session_copy)],
         storenames_map=storenames_map,
         modality=modality,
-        npm_timestamp_column_name=npm_timestamp_column_name,
-        npm_time_unit=npm_time_unit,
+        npm_timestamp_column_names=npm_timestamp_column_names,
+        npm_time_units=npm_time_units,
         npm_split_events=npm_split_events,
     )
 
@@ -237,8 +237,8 @@ def test_step4(tmp_path, monkeypatch, session_subdir, storenames_map, expected_r
         base_dir=str(tmp_base),
         selected_folders=[str(session_copy)],
         modality=modality,
-        npm_timestamp_column_name=npm_timestamp_column_name,
-        npm_time_unit=npm_time_unit,
+        npm_timestamp_column_names=npm_timestamp_column_names,
+        npm_time_units=npm_time_units,
         npm_split_events=npm_split_events,
     )
 
@@ -247,8 +247,8 @@ def test_step4(tmp_path, monkeypatch, session_subdir, storenames_map, expected_r
         base_dir=str(tmp_base),
         selected_folders=[str(session_copy)],
         modality=modality,
-        npm_timestamp_column_name=npm_timestamp_column_name,
-        npm_time_unit=npm_time_unit,
+        npm_timestamp_column_names=npm_timestamp_column_names,
+        npm_time_units=npm_time_units,
         npm_split_events=npm_split_events,
     )
 
diff --git a/tests/test_step5.py b/tests/test_step5.py
index a8cdeb4..1837ebf 100644
--- a/tests/test_step5.py
+++ b/tests/test_step5.py
@@ -187,15 +187,15 @@ def test_step5(tmp_path, monkeypatch, session_subdir, storenames_map, expected_r
       - Defaults are used for input parameters; PSTH computation defaults to z_score.
     """
     if session_subdir == "SampleData_Neurophotometrics/sampleData_NPM_3":
-        npm_timestamp_column_name = "ComputerTimestamp"
-        npm_time_unit = "milliseconds"
+        npm_timestamp_column_names = ["ComputerTimestamp", None]
+        npm_time_units = ["milliseconds", "seconds"]
+        npm_split_events = [False, True]
     else:
-        npm_timestamp_column_name = None
-        npm_time_unit = None
+        npm_timestamp_column_names = None
+        npm_time_units = None
+        npm_split_events = [True, True]
     if session_subdir == "SampleData_Neurophotometrics/sampleData_NPM_5":
-        npm_split_events = False
-    else:
-        npm_split_events = True
+        npm_split_events = None
 
     # Use the sample session
     src_base_dir = str(Path(".") / "testing_data")
@@ -229,8 +229,8 @@ def test_step5(tmp_path, monkeypatch, session_subdir, storenames_map, expected_r
         selected_folders=[str(session_copy)],
         storenames_map=storenames_map,
         modality=modality,
-        npm_timestamp_column_name=npm_timestamp_column_name,
-        npm_time_unit=npm_time_unit,
+        npm_timestamp_column_names=npm_timestamp_column_names,
+        npm_time_units=npm_time_units,
         npm_split_events=npm_split_events,
     )
 
@@ -239,8 +239,8 @@ def test_step5(tmp_path, monkeypatch, session_subdir, storenames_map, expected_r
         base_dir=str(tmp_base),
         selected_folders=[str(session_copy)],
         modality=modality,
-        npm_timestamp_column_name=npm_timestamp_column_name,
-        npm_time_unit=npm_time_unit,
+        npm_timestamp_column_names=npm_timestamp_column_names,
+        npm_time_units=npm_time_units,
         npm_split_events=npm_split_events,
     )
 
@@ -249,8 +249,8 @@ def test_step5(tmp_path, monkeypatch, session_subdir, storenames_map, expected_r
         base_dir=str(tmp_base),
         selected_folders=[str(session_copy)],
         modality=modality,
-        npm_timestamp_column_name=npm_timestamp_column_name,
-        npm_time_unit=npm_time_unit,
+        npm_timestamp_column_names=npm_timestamp_column_names,
+        npm_time_units=npm_time_units,
         npm_split_events=npm_split_events,
     )
 
@@ -259,8 +259,8 @@ def test_step5(tmp_path, monkeypatch, session_subdir, storenames_map, expected_r
         base_dir=str(tmp_base),
         selected_folders=[str(session_copy)],
         modality=modality,
-        npm_timestamp_column_name=npm_timestamp_column_name,
-        npm_time_unit=npm_time_unit,
+        npm_timestamp_column_names=npm_timestamp_column_names,
+        npm_time_units=npm_time_units,
         npm_split_events=npm_split_events,
     )
 

From d55bba7887bfc2c94c05b6c26214dc4350495395 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Wed, 3 Dec 2025 10:59:09 -0800
Subject: [PATCH 044/150] Defined BaseRecordingExtractor.

---
 src/guppy/extractors/__init__.py              |   1 +
 .../extractors/base_recording_extractor.py    | 128 ++++++++++++++++++
 2 files changed, 129 insertions(+)
 create mode 100644 src/guppy/extractors/base_recording_extractor.py

diff --git a/src/guppy/extractors/__init__.py b/src/guppy/extractors/__init__.py
index b876012..75933c7 100644
--- a/src/guppy/extractors/__init__.py
+++ b/src/guppy/extractors/__init__.py
@@ -1,3 +1,4 @@
+from .base_recording_extractor import BaseRecordingExtractor
 from .tdt_recording_extractor import TdtRecordingExtractor, execute_readtev
 from .csv_recording_extractor import CsvRecordingExtractor, execute_import_csv
 from .doric_recording_extractor import DoricRecordingExtractor, execute_import_doric
diff --git a/src/guppy/extractors/base_recording_extractor.py b/src/guppy/extractors/base_recording_extractor.py
new file mode 100644
index 0000000..7058a0a
--- /dev/null
+++ b/src/guppy/extractors/base_recording_extractor.py
@@ -0,0 +1,128 @@
+"""Base class for recording extractors."""
+
+import os
+from abc import ABC, abstractmethod
+from typing import Any
+
+import h5py
+import numpy as np
+
+
+class BaseRecordingExtractor(ABC):
+    """
+    Abstract base class for recording extractors.
+
+    Defines the interface contract for reading and saving fiber photometry
+    data from various acquisition formats (TDT, Doric, CSV, NPM, etc.).
+    """
+
+    @property
+    @abstractmethod
+    def events(self) -> list[str]:
+        """
+        List of available event/store names in the data.
+
+        Returns
+        -------
+        list of str
+            Names of all events or stores available in the dataset.
+        """
+        pass
+
+    @property
+    @abstractmethod
+    def flags(self) -> list:
+        """
+        Format indicators or file type flags.
+
+        Returns
+        -------
+        list
+            Flags indicating file types or data formats.
+        """
+        pass
+
+    @abstractmethod
+    def read(self, *, events: list[str], outputPath: str, **kwargs) -> list[dict[str, Any]]:
+        """
+        Read data from source files for specified events.
+
+        Parameters
+        ----------
+        events : list of str
+            List of event/store names to extract from the data.
+        outputPath : str
+            Path to the output directory.
+        **kwargs
+            Additional extractor-specific parameters.
+
+        Returns
+        -------
+        list of dict
+            List of dictionaries containing extracted data. Each dictionary
+            represents one event/store and contains keys such as 'storename',
+            'timestamps', 'data', 'sampling_rate', etc.
+        """
+        pass
+
+    @abstractmethod
+    def save(self, *, output_dicts: list[dict[str, Any]], outputPath: str, **kwargs) -> None:
+        """
+        Save extracted data dictionaries to HDF5 format.
+
+        Parameters
+        ----------
+        output_dicts : list of dict
+            List of data dictionaries from read().
+        outputPath : str
+            Path to the output directory.
+        **kwargs
+            Additional extractor-specific parameters.
+        """
+        pass
+
+    @staticmethod
+    def _write_hdf5(data: Any, storename: str, output_path: str, key: str) -> None:
+        """
+        Write data to HDF5 file.
+
+        Parameters
+        ----------
+        data : array-like
+            Data to write to the HDF5 file.
+        storename : str
+            Name of the store/event.
+        output_path : str
+            Directory path where HDF5 file will be written.
+        key : str
+            Key name for this data field in the HDF5 file.
+        """
+        # Replace invalid characters in storename to avoid filesystem errors
+        storename = storename.replace("\\", "_")
+        storename = storename.replace("/", "_")
+
+        filepath = os.path.join(output_path, storename + ".hdf5")
+
+        # Create new file if it doesn't exist
+        if not os.path.exists(filepath):
+            with h5py.File(filepath, "w") as f:
+                if isinstance(data, np.ndarray):
+                    f.create_dataset(key, data=data, maxshape=(None,), chunks=True)
+                else:
+                    f.create_dataset(key, data=data)
+        # Append to existing file
+        else:
+            with h5py.File(filepath, "r+") as f:
+                if key in list(f.keys()):
+                    if isinstance(data, np.ndarray):
+                        f[key].resize(data.shape)
+                        arr = f[key]
+                        arr[:] = data
+                    else:
+                        arr = f[key]
+                        arr[()] = data
+                else:
+                    if isinstance(data, np.ndarray):
+                        f.create_dataset(key, data=data, maxshape=(None,), chunks=True)
+                    else:
+                        f.create_dataset(key, data=data)

From 1689b7ef15c188e62f9f3a38fa63cc7329b08d2c Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Wed, 3 Dec 2025 11:00:47 -0800
Subject: [PATCH 045/150] Removed obsolete intermediates extractor steps

---
 src/guppy/csv_step2.py   | 110 -----------
 src/guppy/csv_step3.py   |  66 -------
 src/guppy/doric_step2.py |  92 ---------
 src/guppy/doric_step3.py | 159 ---------------
 src/guppy/npm_step2.py   | 411 ---------------------------------------
 src/guppy/tdt_step2.py   |  28 ---
 src/guppy/tdt_step3.py   | 207 --------------------
 7 files changed, 1073 deletions(-)
 delete mode 100644 src/guppy/csv_step2.py
 delete mode 100644 src/guppy/csv_step3.py
 delete mode 100644 src/guppy/doric_step2.py
 delete mode 100644 src/guppy/doric_step3.py
 delete mode 100644 src/guppy/npm_step2.py
 delete mode 100644 src/guppy/tdt_step2.py
 delete mode 100644 src/guppy/tdt_step3.py

diff --git a/src/guppy/csv_step2.py b/src/guppy/csv_step2.py
deleted file mode 100644
index ba4b34f..0000000
--- a/src/guppy/csv_step2.py
+++ /dev/null
@@ -1,110 +0,0 @@
-import glob
-import logging
-import os
-
-import numpy as np
-import pandas as pd
-
-logger = logging.getLogger(__name__)
-
-
-def check_header(df):
-    arr = list(df.columns)
-    check_float = []
-    for i in arr:
-        try:
-            check_float.append(float(i))
-        except:
-            pass
-
-    return arr, check_float
-
-
-def import_csv_step2(filepath):
-    logger.debug("If it exists, importing either NPM or Doric or csv file based on the structure of file")
-    path = sorted(glob.glob(os.path.join(filepath, "*.csv")))
-
-    path = sorted(list(set(path)))
-    flag = "None"
-    event_from_filename = []
-    flag_arr = []
-    for i in range(len(path)):
-        ext = os.path.basename(path[i]).split(".")[-1]
-        assert ext == "csv", "Only .csv files are supported by import_csv function."
-        df = pd.read_csv(path[i], header=None, nrows=2, index_col=False, dtype=str)
-        df = df.dropna(axis=1, how="all")
-        df_arr = np.array(df).flatten()
-        check_all_str = []
-        for element in df_arr:
-            try:
-                float(element)
-            except:
-                check_all_str.append(i)
-        assert len(check_all_str) != len(
-            df_arr
-        ), "This file appears to be doric .csv. This function only supports standard .csv files."
-        df = pd.read_csv(path[i], index_col=False)
-
-        _, value = check_header(df)
-
-        # check dataframe structure and read data accordingly
-        if len(value) > 0:
-            columns_isstr = False
-            df = pd.read_csv(path[i], header=None)
-            cols = np.array(list(df.columns), dtype=str)
-        else:
-            df = df
-            columns_isstr = True
-            cols = np.array(list(df.columns), dtype=str)
-        # check the structure of dataframe and assign flag to the type of file
-        if len(cols) == 1:
-            if cols[0].lower() != "timestamps":
-                logger.error("\033[1m" + "Column name should be timestamps (all lower-cases)" + "\033[0m")
-                raise Exception("\033[1m" + "Column name should be timestamps (all lower-cases)" + "\033[0m")
-            else:
-                flag = "event_csv"
-        elif len(cols) == 3:
-            arr1 = np.array(["timestamps", "data", "sampling_rate"])
-            arr2 = np.char.lower(np.array(cols))
-            if (np.sort(arr1) == np.sort(arr2)).all() == False:
-                logger.error(
-                    "\033[1m"
-                    + "Column names should be timestamps, data and sampling_rate (all lower-cases)"
-                    + "\033[0m"
-                )
-                raise Exception(
-                    "\033[1m"
-                    + "Column names should be timestamps, data and sampling_rate (all lower-cases)"
-                    + "\033[0m"
-                )
-            else:
-                flag = "data_csv"
-        elif len(cols) == 2:
-            raise ValueError(
-                "Data appears to be Neurophotometrics csv. Please use import_npm_csv function to import the data."
-            )
-        elif len(cols) >= 2:
-            raise ValueError(
-                "Data appears to be Neurophotometrics csv. Please use import_npm_csv function to import the data."
-            )
-        else:
-            logger.error("Number of columns in csv file does not make sense.")
-            raise Exception("Number of columns in csv file does not make sense.")
-
-        if columns_isstr == True and (
-            "flags" in np.char.lower(np.array(cols)) or "ledstate" in np.char.lower(np.array(cols))
-        ):
-            flag = flag + "_v2"
-        else:
-            flag = flag
-
-        flag_arr.append(flag)
-        logger.info(flag)
-        assert (
-            flag == "event_csv" or flag == "data_csv"
-        ), "This function only supports standard event_csv and data_csv files."
-        name = os.path.basename(path[i]).split(".")[0]
-        event_from_filename.append(name)
-
-    logger.info("Importing of csv file is done.")
-    return event_from_filename, flag_arr
diff --git a/src/guppy/csv_step3.py b/src/guppy/csv_step3.py
deleted file mode 100644
index 985959a..0000000
--- a/src/guppy/csv_step3.py
+++ /dev/null
@@ -1,66 +0,0 @@
-import logging
-import multiprocessing as mp
-import os
-import time
-from itertools import repeat
-
-import numpy as np
-import pandas as pd
-
-from guppy.common_step3 import write_hdf5
-
-logger = logging.getLogger(__name__)
-
-
-def execute_import_csv(filepath, event, outputPath, numProcesses=mp.cpu_count()):
-    # logger.info("Reading data for event {} ...".format(event))
-
-    start = time.time()
-    with mp.Pool(numProcesses) as p:
-        p.starmap(import_csv, zip(repeat(filepath), event, repeat(outputPath)))
-    logger.info("Time taken = {0:.5f}".format(time.time() - start))
-
-
-# function to read event timestamps csv file.
-def import_csv(filepath, event, outputPath):
-    logger.debug("\033[1m" + "Trying to read data for {} from csv file.".format(event) + "\033[0m")
-    if not os.path.exists(os.path.join(filepath, event + ".csv")):
-        logger.error("\033[1m" + "No csv file found for event {}".format(event) + "\033[0m")
-        raise Exception("\033[1m" + "No csv file found for event {}".format(event) + "\033[0m")
-
-    df = pd.read_csv(os.path.join(filepath, event + ".csv"), index_col=False)
-    data = df
-    key = list(df.columns)
-
-    if len(key) == 3:
-        arr1 = np.array(["timestamps", "data", "sampling_rate"])
-        arr2 = np.char.lower(np.array(key))
-        if (np.sort(arr1) == np.sort(arr2)).all() == False:
-            logger.error("\033[1m" + "Column names should be timestamps, data and sampling_rate" + "\033[0m")
-            raise Exception("\033[1m" + "Column names should be timestamps, data and sampling_rate" + "\033[0m")
-
-    if len(key) == 1:
-        if key[0].lower() != "timestamps":
-            logger.error("\033[1m" + "Column names should be timestamps, data and sampling_rate" + "\033[0m")
-            raise Exception("\033[1m" + "Column name should be timestamps" + "\033[0m")
-
-    if len(key) != 3 and len(key) != 1:
-        logger.error(
-            "\033[1m"
-            + "Number of columns in csv file should be either three or one. Three columns if \
-						the file is for control or signal data or one column if the file is for event TTLs."
-            + "\033[0m"
-        )
-        raise Exception(
-            "\033[1m"
-            + "Number of columns in csv file should be either three or one. Three columns if \
-						the file is for control or signal data or one column if the file is for event TTLs."
-            + "\033[0m"
-        )
-
-    for i in range(len(key)):
-        write_hdf5(data[key[i]].dropna(), event, outputPath, key[i].lower())
-
-    logger.info("\033[1m" + "Reading data for {} from csv file is completed.".format(event) + "\033[0m")
-
-    return data, key
diff --git a/src/guppy/doric_step2.py b/src/guppy/doric_step2.py
deleted file mode 100644
index 26ab22e..0000000
--- a/src/guppy/doric_step2.py
+++ /dev/null
@@ -1,92 +0,0 @@
-import glob
-import logging
-import os
-
-import h5py
-import numpy as np
-import pandas as pd
-
-logger = logging.getLogger(__name__)
-
-
-def import_doric(filepath):
-
-    logger.debug("If it exists, importing Doric file based on the structure of file")
-    path = sorted(glob.glob(os.path.join(filepath, "*.csv"))) + sorted(glob.glob(os.path.join(filepath, "*.doric")))
-
-    path = sorted(list(set(path)))
-    flag = "None"
-    event_from_filename = []
-    flag_arr = []
-    for i in range(len(path)):
-        ext = os.path.basename(path[i]).split(".")[-1]
-        if ext == "doric":
-            key_names = read_doric(path[i])
-            event_from_filename.extend(key_names)
-            flag = "doric_doric"
-        else:
-            df = pd.read_csv(path[i], header=None, nrows=2, index_col=False, dtype=str)
-            df = df.dropna(axis=1, how="all")
-            df_arr = np.array(df).flatten()
-            check_all_str = []
-            for element in df_arr:
-                try:
-                    float(element)
-                except:
-                    check_all_str.append(i)
-            assert len(check_all_str) == len(
-                df_arr
-            ), "This file appears to be standard .csv. This function only supports doric .csv files."
-            df = pd.read_csv(path[i], header=1, index_col=False, nrows=10)
-            df = df.drop(["Time(s)"], axis=1)
-            event_from_filename.extend(list(df.columns))
-            flag = "doric_csv"
-            logger.info(flag)
-    logger.info("Importing of Doric file is done.")
-    return event_from_filename, flag_arr
-
-
-def read_doric(filepath):
-    with h5py.File(filepath, "r") as f:
-        if "Traces" in list(f.keys()):
-            keys = access_keys_doricV1(f)
-        elif list(f.keys()) == ["Configurations", "DataAcquisition"]:
-            keys = access_keys_doricV6(f)
-
-    return keys
-
-
-def access_keys_doricV6(doric_file):
-    data = [doric_file["DataAcquisition"]]
-    res = []
-    while len(data) != 0:
-        members = len(data)
-        while members != 0:
-            members -= 1
-            data, last_element = separate_last_element(data)
-            if isinstance(last_element, h5py.Dataset) and not last_element.name.endswith("/Time"):
-                res.append(last_element.name)
-            elif isinstance(last_element, h5py.Group):
-                data.extend(reversed([last_element[k] for k in last_element.keys()]))
-
-    keys = []
-    for element in res:
-        sep_values = element.split("/")
-        if sep_values[-1] == "Values":
-            keys.append(f"{sep_values[-3]}/{sep_values[-2]}")
-        else:
-            keys.append(f"{sep_values[-2]}/{sep_values[-1]}")
-
-    return keys
-
-
-def access_keys_doricV1(doric_file):
-    keys = list(doric_file["Traces"]["Console"].keys())
-    keys.remove("Time(s)")
-
-    return keys
-
-
-def separate_last_element(arr):
-    l = arr[-1]
-    return arr[:-1], l
diff --git a/src/guppy/doric_step3.py b/src/guppy/doric_step3.py
deleted file mode 100644
index e9fd7cc..0000000
--- a/src/guppy/doric_step3.py
+++ /dev/null
@@ -1,159 +0,0 @@
-import glob
-import logging
-import os
-import re
-import warnings
-
-import h5py
-import numpy as np
-import pandas as pd
-
-from guppy.common_step3 import write_hdf5
-
-logger = logging.getLogger(__name__)
-
-
-def check_doric(filepath):
-    logger.debug("Checking if doric file exists")
-    path = glob.glob(os.path.join(filepath, "*.csv")) + glob.glob(os.path.join(filepath, "*.doric"))
-
-    flag_arr = []
-    for i in range(len(path)):
-        ext = os.path.basename(path[i]).split(".")[-1]
-        if ext == "csv":
-            with warnings.catch_warnings():
-                warnings.simplefilter("error")
-                try:
-                    df = pd.read_csv(path[i], index_col=False, dtype=float)
-                except:
-                    df = pd.read_csv(path[i], header=1, index_col=False, nrows=10)
-                    flag = "doric_csv"
-                    flag_arr.append(flag)
-        elif ext == "doric":
-            flag = "doric_doric"
-            flag_arr.append(flag)
-        else:
-            pass
-
-    if len(flag_arr) > 1:
-        logger.error("Two doric files are present at the same location")
-        raise Exception("Two doric files are present at the same location")
-    if len(flag_arr) == 0:
-        logger.error("\033[1m" + "Doric file not found." + "\033[1m")
-        return 0
-    logger.info("Doric file found.")
-    return flag_arr[0]
-
-
-def execute_import_doric(filepath, storesList, flag, outputPath):
-    flag = check_doric(filepath)
-
-    if flag == "doric_csv":
-        path = glob.glob(os.path.join(filepath, "*.csv"))
-        if len(path) > 1:
-            logger.error("An error occurred : More than one Doric csv file present at the location")
-            raise Exception("More than one Doric csv file present at the location")
-        else:
-            df = pd.read_csv(path[0], header=1, index_col=False)
-            df = df.dropna(axis=1, how="all")
-            df = df.dropna(axis=0, how="any")
-            df["Time(s)"] = df["Time(s)"] - df["Time(s)"].to_numpy()[0]
-            for i in range(storesList.shape[1]):
-                if "control" in storesList[1, i] or "signal" in storesList[1, i]:
-                    timestamps = np.array(df["Time(s)"])
-                    sampling_rate = np.array([1 / (timestamps[-1] - timestamps[-2])])
-                    write_hdf5(sampling_rate, storesList[0, i], outputPath, "sampling_rate")
-                    write_hdf5(df["Time(s)"].to_numpy(), storesList[0, i], outputPath, "timestamps")
-                    write_hdf5(df[storesList[0, i]].to_numpy(), storesList[0, i], outputPath, "data")
-                else:
-                    ttl = df[storesList[0, i]]
-                    indices = np.where(ttl <= 0)[0]
-                    diff_indices = np.where(np.diff(indices) > 1)[0]
-                    write_hdf5(
-                        df["Time(s)"][indices[diff_indices] + 1].to_numpy(), storesList[0, i], outputPath, "timestamps"
-                    )
-    else:
-        path = glob.glob(os.path.join(filepath, "*.doric"))
-        if len(path) > 1:
-            logger.error("An error occurred : More than one Doric file present at the location")
-            raise Exception("More than one Doric file present at the location")
-        else:
-            with h5py.File(path[0], "r") as f:
-                if "Traces" in list(f.keys()):
-                    keys = access_data_doricV1(f, storesList, outputPath)
-                elif list(f.keys()) == ["Configurations", "DataAcquisition"]:
-                    keys = access_data_doricV6(f, storesList, outputPath)
-
-
-def access_data_doricV6(doric_file, storesList, outputPath):
-    data = [doric_file["DataAcquisition"]]
-    res = []
-    while len(data) != 0:
-        members = len(data)
-        while members != 0:
-            members -= 1
-            data, last_element = separate_last_element(data)
-            if isinstance(last_element, h5py.Dataset) and not last_element.name.endswith("/Time"):
-                res.append(last_element.name)
-            elif isinstance(last_element, h5py.Group):
-                data.extend(reversed([last_element[k] for k in last_element.keys()]))
-
-    decide_path = []
-    for element in res:
-        sep_values = element.split("/")
-        if sep_values[-1] == "Values":
-            if f"{sep_values[-3]}/{sep_values[-2]}" in storesList[0, :]:
-                decide_path.append(element)
-        else:
-            if f"{sep_values[-2]}/{sep_values[-1]}" in storesList[0, :]:
-                decide_path.append(element)
-
-    for i in range(storesList.shape[1]):
-        if "control" in storesList[1, i] or "signal" in storesList[1, i]:
-            regex = re.compile("(.*?)" + str(storesList[0, i]) + "(.*?)")
-            idx = [i for i in range(len(decide_path)) if regex.match(decide_path[i])]
-            if len(idx) > 1:
-                logger.error("More than one string matched (which should not be the case)")
-                raise Exception("More than one string matched (which should not be the case)")
-            idx = idx[0]
-            data = np.array(doric_file[decide_path[idx]])
-            timestamps = np.array(doric_file[decide_path[idx].rsplit("/", 1)[0] + "/Time"])
-            sampling_rate = np.array([1 / (timestamps[-1] - timestamps[-2])])
-            write_hdf5(sampling_rate, storesList[0, i], outputPath, "sampling_rate")
-            write_hdf5(timestamps, storesList[0, i], outputPath, "timestamps")
-            write_hdf5(data, storesList[0, i], outputPath, "data")
-        else:
-            regex = re.compile("(.*?)" + storesList[0, i] + "$")
-            idx = [i for i in range(len(decide_path)) if regex.match(decide_path[i])]
-            if len(idx) > 1:
-                logger.error("More than one string matched (which should not be the case)")
-                raise Exception("More than one string matched (which should not be the case)")
-            idx = idx[0]
-            ttl = np.array(doric_file[decide_path[idx]])
-            timestamps = np.array(doric_file[decide_path[idx].rsplit("/", 1)[0] + "/Time"])
-            indices = np.where(ttl <= 0)[0]
-            diff_indices = np.where(np.diff(indices) > 1)[0]
-            write_hdf5(timestamps[indices[diff_indices] + 1], storesList[0, i], outputPath, "timestamps")
-
-
-def access_data_doricV1(doric_file, storesList, outputPath):
-    keys = list(doric_file["Traces"]["Console"].keys())
-    for i in range(storesList.shape[1]):
-        if "control" in storesList[1, i] or "signal" in storesList[1, i]:
-            timestamps = np.array(doric_file["Traces"]["Console"]["Time(s)"]["Console_time(s)"])
-            sampling_rate = np.array([1 / (timestamps[-1] - timestamps[-2])])
-            data = np.array(doric_file["Traces"]["Console"][storesList[0, i]][storesList[0, i]])
-            write_hdf5(sampling_rate, storesList[0, i], outputPath, "sampling_rate")
-            write_hdf5(timestamps, storesList[0, i], outputPath, "timestamps")
-            write_hdf5(data, storesList[0, i], outputPath, "data")
-        else:
-            timestamps = np.array(doric_file["Traces"]["Console"]["Time(s)"]["Console_time(s)"])
-            ttl = np.array(doric_file["Traces"]["Console"][storesList[0, i]][storesList[0, i]])
-            indices = np.where(ttl <= 0)[0]
-            diff_indices = np.where(np.diff(indices) > 1)[0]
-            write_hdf5(timestamps[indices[diff_indices] + 1], storesList[0, i], outputPath, "timestamps")
-
-
-def separate_last_element(arr):
-    l = arr[-1]
-    return arr[:-1], l
diff --git a/src/guppy/npm_step2.py b/src/guppy/npm_step2.py
deleted file mode 100644
index 14b776f..0000000
--- a/src/guppy/npm_step2.py
+++ /dev/null
@@ -1,411 +0,0 @@
-import glob
-import logging
-import os
-import tkinter as tk
-from tkinter import StringVar, messagebox, ttk
-
-import numpy as np
-import pandas as pd
-import panel as pn
-
-pn.extension()
-
-logger = logging.getLogger(__name__)
-
-
-def import_npm(filepath, num_ch, inputParameters=None):
-
-    logger.debug("If it exists, importing NPM file based on the structure of file")
-    # Headless configuration (used to avoid any UI prompts when running tests)
-    headless = bool(os.environ.get("GUPPY_BASE_DIR"))
-    npm_timestamp_column_name = None
-    npm_time_unit = None
-    npm_split_events = None
-    if isinstance(inputParameters, dict):
-        npm_timestamp_column_name = inputParameters.get("npm_timestamp_column_name")
-        npm_time_unit = inputParameters.get("npm_time_unit", "seconds")
-        npm_split_events = inputParameters.get("npm_split_events", True)
-    path = sorted(glob.glob(os.path.join(filepath, "*.csv"))) + sorted(glob.glob(os.path.join(filepath, "*.doric")))
-    path_chev = glob.glob(os.path.join(filepath, "*chev*"))
-    path_chod = glob.glob(os.path.join(filepath, "*chod*"))
-    path_chpr = glob.glob(os.path.join(filepath, "*chpr*"))
-    path_event = glob.glob(os.path.join(filepath, "event*"))
-    # path_sig = glob.glob(os.path.join(filepath, 'sig*')) # TODO: what is this for?
-    path_chev_chod_event = path_chev + path_chod + path_event + path_chpr
-
-    path = sorted(list(set(path) - set(path_chev_chod_event)))
-    flag = "None"
-    event_from_filename = []
-    flag_arr = []
-    for i in range(len(path)):
-        dirname = os.path.dirname(path[i])
-        ext = os.path.basename(path[i]).split(".")[-1]
-        assert ext != "doric", "Doric files are not supported by import_npm function."
-        df = pd.read_csv(path[i], header=None, nrows=2, index_col=False, dtype=str)
-        df = df.dropna(axis=1, how="all")
-        df_arr = np.array(df).flatten()
-        check_all_str = []
-        for element in df_arr:
-            try:
-                float(element)
-            except:
-                check_all_str.append(i)
-        assert len(check_all_str) != len(
-            df_arr
-        ), "This file appears to be doric .csv. This function only supports NPM .csv files."
-        df = pd.read_csv(path[i], index_col=False)
-        _, value = check_header(df)
-
-        # check dataframe structure and read data accordingly
-        if len(value) > 0:
-            columns_isstr = False
-            df = pd.read_csv(path[i], header=None)
-            cols = np.array(list(df.columns), dtype=str)
-        else:
-            df = df
-            columns_isstr = True
-            cols = np.array(list(df.columns), dtype=str)
-        # check the structure of dataframe and assign flag to the type of file
-        assert len(cols) != 1, "File appears to be event .csv. This function only supports NPM .csv files."
-        assert len(cols) != 3, "File appears to be data .csv. This function only supports NPM .csv files."
-        if len(cols) == 2:
-            flag = "event_or_data_np"
-        elif len(cols) >= 2:
-            flag = "data_np"
-        else:
-            logger.error("Number of columns in csv file does not make sense.")
-            raise Exception("Number of columns in csv file does not make sense.")
-
-        if columns_isstr == True and (
-            "flags" in np.char.lower(np.array(cols)) or "ledstate" in np.char.lower(np.array(cols))
-        ):
-            flag = flag + "_v2"
-        else:
-            flag = flag
-
-        # used assigned flags to process the files and read the data
-        if flag == "event_or_data_np":
-            arr = list(df.iloc[:, 1])
-            check_float = [True for i in arr if isinstance(i, float)]
-            if len(arr) == len(check_float) and columns_isstr == False:
-                flag = "data_np"
-            elif columns_isstr == True and ("value" in np.char.lower(np.array(cols))):
-                flag = "event_np"
-            else:
-                flag = "event_np"
-
-        flag_arr.append(flag)
-        logger.info(flag)
-        if flag == "data_np":
-            file = f"file{str(i)}_"
-            df, indices_dict, _ = decide_indices(file, df, flag, num_ch)
-            keys = list(indices_dict.keys())
-            for k in range(len(keys)):
-                for j in range(df.shape[1]):
-                    if j == 0:
-                        timestamps = df.iloc[:, j][indices_dict[keys[k]]]
-                        # timestamps_odd = df.iloc[:,j][odd_indices]
-                    else:
-                        d = dict()
-                        d["timestamps"] = timestamps
-                        d["data"] = df.iloc[:, j][indices_dict[keys[k]]]
-
-                        df_ch = pd.DataFrame(d)
-                        df_ch.to_csv(os.path.join(dirname, keys[k] + str(j) + ".csv"), index=False)
-                        event_from_filename.append(keys[k] + str(j))
-
-        elif flag == "event_np":
-            type_val = np.array(df.iloc[:, 1])
-            type_val_unique = np.unique(type_val)
-            if headless:
-                response = 1 if bool(npm_split_events) else 0
-            else:
-                window = tk.Tk()
-                if len(type_val_unique) > 1:
-                    response = messagebox.askyesno(
-                        "Multiple event TTLs",
-                        "Based on the TTL file,\
-                                                                        it looks like TTLs \
-                                                                        belongs to multiple behavior type. \
-                                                                        Do you want to create multiple files for each \
-                                                                        behavior type ?",
-                    )
-                else:
-                    response = 0
-                window.destroy()
-            if response == 1:
-                timestamps = np.array(df.iloc[:, 0])
-                for j in range(len(type_val_unique)):
-                    idx = np.where(type_val == type_val_unique[j])
-                    d = dict()
-                    d["timestamps"] = timestamps[idx]
-                    df_new = pd.DataFrame(d)
-                    df_new.to_csv(os.path.join(dirname, "event" + str(type_val_unique[j]) + ".csv"), index=False)
-                    event_from_filename.append("event" + str(type_val_unique[j]))
-            else:
-                timestamps = np.array(df.iloc[:, 0])
-                d = dict()
-                d["timestamps"] = timestamps
-                df_new = pd.DataFrame(d)
-                df_new.to_csv(os.path.join(dirname, "event" + str(0) + ".csv"), index=False)
-                event_from_filename.append("event" + str(0))
-        else:
-            file = f"file{str(i)}_"
-            df, ts_unit = decide_ts_unit_for_npm(
-                df, timestamp_column_name=npm_timestamp_column_name, time_unit=npm_time_unit, headless=headless
-            )
-            df, indices_dict, _ = decide_indices(file, df, flag)
-            keys = list(indices_dict.keys())
-            for k in range(len(keys)):
-                for j in range(df.shape[1]):
-                    if j == 0:
-                        timestamps = df.iloc[:, j][indices_dict[keys[k]]]
-                        # timestamps_odd = df.iloc[:,j][odd_indices]
-                    else:
-                        d = dict()
-                        d["timestamps"] = timestamps
-                        d["data"] = df.iloc[:, j][indices_dict[keys[k]]]
-
-                        df_ch = pd.DataFrame(d)
-                        df_ch.to_csv(os.path.join(dirname, keys[k] + str(j) + ".csv"), index=False)
-                        event_from_filename.append(keys[k] + str(j))
-
-        path_chev = glob.glob(os.path.join(filepath, "*chev*"))
-        path_chod = glob.glob(os.path.join(filepath, "*chod*"))
-        path_chpr = glob.glob(os.path.join(filepath, "*chpr*"))
-        path_event = glob.glob(os.path.join(filepath, "event*"))
-        # path_sig = glob.glob(os.path.join(filepath, 'sig*'))
-        path_chev_chod_chpr = [path_chev, path_chod, path_chpr]
-        if (
-            ("data_np_v2" in flag_arr or "data_np" in flag_arr) and ("event_np" in flag_arr) and (i == len(path) - 1)
-        ) or (
-            ("data_np_v2" in flag_arr or "data_np" in flag_arr) and (i == len(path) - 1)
-        ):  # i==len(path)-1 and or 'event_np' in flag
-            num_path_chev, num_path_chod, num_path_chpr = len(path_chev), len(path_chod), len(path_chpr)
-            arr_len, no_ch = [], []
-            for i in range(len(path_chev_chod_chpr)):
-                if len(path_chev_chod_chpr[i]) > 0:
-                    arr_len.append(len(path_chev_chod_chpr[i]))
-                else:
-                    continue
-
-            unique_arr_len = np.unique(np.array(arr_len))
-            if "data_np_v2" in flag_arr:
-                if ts_unit == "seconds":
-                    divisor = 1
-                elif ts_unit == "milliseconds":
-                    divisor = 1e3
-                else:
-                    divisor = 1e6
-            else:
-                divisor = 1000
-
-            for j in range(len(path_event)):
-                df_event = pd.read_csv(path_event[j])
-                df_chev = pd.read_csv(path_chev[0])
-                df_event["timestamps"] = (df_event["timestamps"] - df_chev["timestamps"][0]) / divisor
-                df_event.to_csv(path_event[j], index=False)
-            if unique_arr_len.shape[0] == 1:
-                for j in range(len(path_chev)):
-                    if file + "chev" in indices_dict.keys():
-                        df_chev = pd.read_csv(path_chev[j])
-                        df_chev["timestamps"] = (df_chev["timestamps"] - df_chev["timestamps"][0]) / divisor
-                        df_chev["sampling_rate"] = np.full(df_chev.shape[0], np.nan)
-                        df_chev.at[0, "sampling_rate"] = df_chev.shape[0] / (
-                            df_chev["timestamps"].iloc[-1] - df_chev["timestamps"].iloc[0]
-                        )
-                        df_chev.to_csv(path_chev[j], index=False)
-
-                    if file + "chod" in indices_dict.keys():
-                        df_chod = pd.read_csv(path_chod[j])
-                        df_chod["timestamps"] = df_chev["timestamps"]
-                        df_chod["sampling_rate"] = np.full(df_chod.shape[0], np.nan)
-                        df_chod.at[0, "sampling_rate"] = df_chev["sampling_rate"][0]
-                        df_chod.to_csv(path_chod[j], index=False)
-
-                    if file + "chpr" in indices_dict.keys():
-                        df_chpr = pd.read_csv(path_chpr[j])
-                        df_chpr["timestamps"] = df_chev["timestamps"]
-                        df_chpr["sampling_rate"] = np.full(df_chpr.shape[0], np.nan)
-                        df_chpr.at[0, "sampling_rate"] = df_chev["sampling_rate"][0]
-                        df_chpr.to_csv(path_chpr[j], index=False)
-            else:
-                logger.error("Number of channels should be same for all regions.")
-                raise Exception("Number of channels should be same for all regions.")
-    logger.info("Importing of NPM file is done.")
-    return event_from_filename, flag_arr
-
-
-def check_header(df):
-    arr = list(df.columns)
-    check_float = []
-    for i in arr:
-        try:
-            check_float.append(float(i))
-        except:
-            pass
-
-    return arr, check_float
-
-
-# function to decide indices of interleaved channels
-# in neurophotometrics data
-def decide_indices(file, df, flag, num_ch=2):
-    ch_name = [file + "chev", file + "chod", file + "chpr"]
-    if len(ch_name) < num_ch:
-        logger.error(
-            "Number of channels parameters in Input Parameters GUI is more than 3. \
-                    Looks like there are more than 3 channels in the file. Reading of these files\
-                    are not supported. Reach out to us if you get this error message."
-        )
-        raise Exception(
-            "Number of channels parameters in Input Parameters GUI is more than 3. \
-                         Looks like there are more than 3 channels in the file. Reading of these files\
-                         are not supported. Reach out to us if you get this error message."
-        )
-    if flag == "data_np":
-        indices_dict = dict()
-        for i in range(num_ch):
-            indices_dict[ch_name[i]] = np.arange(i, df.shape[0], num_ch)
-
-    else:
-        cols = np.array(list(df.columns))
-        if "flags" in np.char.lower(np.array(cols)):
-            arr = ["FrameCounter", "Flags"]
-            state = np.array(df["Flags"])
-        elif "ledstate" in np.char.lower(np.array(cols)):
-            arr = ["FrameCounter", "LedState"]
-            state = np.array(df["LedState"])
-        else:
-            logger.error(
-                "File type shows Neurophotometrics newer version \
-                    data but column names does not have Flags or LedState"
-            )
-            raise Exception(
-                "File type shows Neurophotometrics newer version \
-                            data but column names does not have Flags or LedState"
-            )
-
-        num_ch, ch = check_channels(state)
-        indices_dict = dict()
-        for i in range(num_ch):
-            first_occurrence = np.where(state == ch[i])[0]
-            indices_dict[ch_name[i]] = np.arange(first_occurrence[0], df.shape[0], num_ch)
-
-        df = df.drop(arr, axis=1)
-
-    return df, indices_dict, num_ch
-
-
-# check flag consistency in neurophotometrics data
-def check_channels(state):
-    state = state.astype(int)
-    unique_state = np.unique(state[2:12])
-    if unique_state.shape[0] > 3:
-        logger.error(
-            "Looks like there are more than 3 channels in the file. Reading of these files\
-                        are not supported. Reach out to us if you get this error message."
-        )
-        raise Exception(
-            "Looks like there are more than 3 channels in the file. Reading of these files\
-                        are not supported. Reach out to us if you get this error message."
-        )
-
-    return unique_state.shape[0], unique_state
-
-
-# function to decide NPM timestamps unit (seconds, ms or us)
-def decide_ts_unit_for_npm(df, timestamp_column_name=None, time_unit=None, headless=False):
-    col_names = np.array(list(df.columns))
-    col_names_ts = [""]
-    for name in col_names:
-        if "timestamp" in name.lower():
-            col_names_ts.append(name)
-
-    ts_unit = "seconds"
-    if len(col_names_ts) > 2:
-        # Headless path: auto-select column/unit without any UI
-        if headless:
-            if timestamp_column_name is not None:
-                assert (
-                    timestamp_column_name in col_names_ts
-                ), f"Provided timestamp_column_name '{timestamp_column_name}' not found in columns {col_names_ts[1:]}"
-                chosen = timestamp_column_name
-            else:
-                chosen = col_names_ts[1]
-            df.insert(1, "Timestamp", df[chosen])
-            df = df.drop(col_names_ts[1:], axis=1)
-            valid_units = {"seconds", "milliseconds", "microseconds"}
-            ts_unit = time_unit if (isinstance(time_unit, str) and time_unit in valid_units) else "seconds"
-            return df, ts_unit
-        # def comboBoxSelected(event):
-        #    logger.info(event.widget.get())
-
-        window = tk.Tk()
-        window.title("Select appropriate options for timestamps")
-        window.geometry("500x200")
-        holdComboboxValues = dict()
-
-        timestamps_label = ttk.Label(window, text="Select which timestamps to use : ").grid(
-            row=0, column=1, pady=25, padx=25
-        )
-        holdComboboxValues["timestamps"] = StringVar()
-        timestamps_combo = ttk.Combobox(window, values=col_names_ts, textvariable=holdComboboxValues["timestamps"])
-        timestamps_combo.grid(row=0, column=2, pady=25, padx=25)
-        timestamps_combo.current(0)
-        # timestamps_combo.bind("<<ComboboxSelected>>", comboBoxSelected)
-
-        time_unit_label = ttk.Label(window, text="Select timestamps unit : ").grid(row=1, column=1, pady=25, padx=25)
-        holdComboboxValues["time_unit"] = StringVar()
-        time_unit_combo = ttk.Combobox(
-            window, values=["", "seconds", "milliseconds", "microseconds"], textvariable=holdComboboxValues["time_unit"]
-        )
-        time_unit_combo.grid(row=1, column=2, pady=25, padx=25)
-        time_unit_combo.current(0)
-        # time_unit_combo.bind("<<ComboboxSelected>>", comboBoxSelected)
-        window.lift()
-        window.after(500, lambda: window.lift())
-        window.mainloop()
-
-        if holdComboboxValues["timestamps"].get():
-            df.insert(1, "Timestamp", df[holdComboboxValues["timestamps"].get()])
-            df = df.drop(col_names_ts[1:], axis=1)
-        else:
-            messagebox.showerror(
-                "All options not selected",
-                "All the options for timestamps \
-                                                            were not selected. Please select appropriate options",
-            )
-            logger.error(
-                "All the options for timestamps \
-                        were not selected. Please select appropriate options"
-            )
-            raise Exception(
-                "All the options for timestamps \
-                            were not selected. Please select appropriate options"
-            )
-        if holdComboboxValues["time_unit"].get():
-            if holdComboboxValues["time_unit"].get() == "seconds":
-                ts_unit = holdComboboxValues["time_unit"].get()
-            elif holdComboboxValues["time_unit"].get() == "milliseconds":
-                ts_unit = holdComboboxValues["time_unit"].get()
-            else:
-                ts_unit = holdComboboxValues["time_unit"].get()
-        else:
-            messagebox.showerror(
-                "All options not selected",
-                "All the options for timestamps \
-                                                            were not selected. Please select appropriate options",
-            )
-            logger.error(
-                "All the options for timestamps \
-                        were not selected. Please select appropriate options"
-            )
-            raise Exception(
-                "All the options for timestamps \
-                            were not selected. Please select appropriate options"
-            )
-    else:
-        pass
-
-    return df, ts_unit
diff --git a/src/guppy/tdt_step2.py b/src/guppy/tdt_step2.py
deleted file mode 100644
index 130ace8..0000000
--- a/src/guppy/tdt_step2.py
+++ /dev/null
@@ -1,28 +0,0 @@
-import glob
-import logging
-import os
-
-import numpy as np
-import pandas as pd
-from numpy import float32, float64, int32, int64, uint16
-
-logger = logging.getLogger(__name__)
-
-
-# function to read 'tsq' file
-def readtsq(filepath):
-    names = ("size", "type", "name", "chan", "sort_code", "timestamp", "fp_loc", "strobe", "format", "frequency")
-    formats = (int32, int32, "S4", uint16, uint16, float64, int64, float64, int32, float32)
-    offsets = 0, 4, 8, 12, 14, 16, 24, 24, 32, 36
-    tsq_dtype = np.dtype({"names": names, "formats": formats, "offsets": offsets}, align=True)
-    path = glob.glob(os.path.join(filepath, "*.tsq"))
-    if len(path) > 1:
-        logger.error("Two tsq files are present at the location.")
-        raise Exception("Two tsq files are present at the location.")
-    elif len(path) == 0:
-        return 0
-    else:
-        path = path[0]
-    tsq = np.fromfile(path, dtype=tsq_dtype)
-    df = pd.DataFrame(tsq)
-    return df
diff --git a/src/guppy/tdt_step3.py b/src/guppy/tdt_step3.py
deleted file mode 100644
index be92d4c..0000000
--- a/src/guppy/tdt_step3.py
+++ /dev/null
@@ -1,207 +0,0 @@
-import glob
-import logging
-import multiprocessing as mp
-import os
-import time
-from itertools import repeat
-
-import numpy as np
-import pandas as pd
-from numpy import float32, float64, int32, int64, uint16
-
-from guppy.common_step3 import write_hdf5
-
-logger = logging.getLogger(__name__)
-
-
-# function to read tsq file
-def readtsq(filepath):
-    logger.debug("Trying to read tsq file.")
-    names = ("size", "type", "name", "chan", "sort_code", "timestamp", "fp_loc", "strobe", "format", "frequency")
-    formats = (int32, int32, "S4", uint16, uint16, float64, int64, float64, int32, float32)
-    offsets = 0, 4, 8, 12, 14, 16, 24, 24, 32, 36
-    tsq_dtype = np.dtype({"names": names, "formats": formats, "offsets": offsets}, align=True)
-    path = glob.glob(os.path.join(filepath, "*.tsq"))
-    if len(path) > 1:
-        logger.error("Two tsq files are present at the location.")
-        raise Exception("Two tsq files are present at the location.")
-    elif len(path) == 0:
-        logger.info("\033[1m" + "tsq file not found." + "\033[1m")
-        return 0, 0
-    else:
-        path = path[0]
-        flag = "tsq"
-
-    # reading tsq file
-    tsq = np.fromfile(path, dtype=tsq_dtype)
-
-    # creating dataframe of the data
-    df = pd.DataFrame(tsq)
-
-    logger.info("Data from tsq file fetched.")
-    return df, flag
-
-
-# function to execute readtev function using multiprocessing to make it faster
-def execute_readtev(filepath, event, outputPath, numProcesses=mp.cpu_count()):
-    data, _ = readtsq(filepath)
-
-    start = time.time()
-    with mp.Pool(numProcesses) as p:
-        p.starmap(readtev, zip(repeat(data), repeat(filepath), event, repeat(outputPath)))
-    # p = mp.Pool(mp.cpu_count())
-    # p.starmap(readtev, zip(repeat(data), repeat(filepath), event, repeat(outputPath)))
-    # p.close()
-    # p.join()
-    logger.info("Time taken = {0:.5f}".format(time.time() - start))
-
-
-# function to read tev file
-def readtev(data, filepath, event, outputPath):
-
-    logger.debug("Reading data for event {} ...".format(event))
-    tevfilepath = glob.glob(os.path.join(filepath, "*.tev"))
-    if len(tevfilepath) > 1:
-        raise Exception("Two tev files are present at the location.")
-    else:
-        tevfilepath = tevfilepath[0]
-
-    data["name"] = np.asarray(data["name"], dtype=str)
-
-    allnames = np.unique(data["name"])
-
-    index = []
-    for i in range(len(allnames)):
-        length = len(str(allnames[i]))
-        if length < 4:
-            index.append(i)
-
-    allnames = np.delete(allnames, index, 0)
-
-    eventNew = np.array(list(event))
-
-    # logger.info(allnames)
-    # logger.info(eventNew)
-    row = ismember(data["name"], event)
-
-    if sum(row) == 0:
-        logger.error("\033[1m" + "Requested store name " + event + " not found (case-sensitive)." + "\033[0m")
-        logger.error("\033[1m" + "File contains the following TDT store names:" + "\033[0m")
-        logger.error("\033[1m" + str(allnames) + "\033[0m")
-        logger.error("\033[1m" + "TDT store name " + str(event) + " not found." + "\033[0m")
-        raise ValueError("Requested store name not found.")
-
-    allIndexesWhereEventIsPresent = np.where(row == 1)
-    first_row = allIndexesWhereEventIsPresent[0][0]
-
-    formatNew = data["format"][first_row] + 1
-
-    table = np.array(
-        [
-            [0, 0, 0, 0],
-            [0, "float", 1, np.float32],
-            [0, "long", 1, np.int32],
-            [0, "short", 2, np.int16],
-            [0, "byte", 4, np.int8],
-        ]
-    )
-
-    S = dict()
-
-    S["storename"] = str(event)
-    S["sampling_rate"] = data["frequency"][first_row]
-    S["timestamps"] = np.asarray(data["timestamp"][allIndexesWhereEventIsPresent[0]])
-    S["channels"] = np.asarray(data["chan"][allIndexesWhereEventIsPresent[0]])
-
-    fp_loc = np.asarray(data["fp_loc"][allIndexesWhereEventIsPresent[0]])
-    data_size = np.asarray(data["size"])
-
-    if formatNew != 5:
-        nsample = (data_size[first_row,] - 10) * int(table[formatNew, 2])
-        S["data"] = np.zeros((len(fp_loc), nsample))
-        for i in range(0, len(fp_loc)):
-            with open(tevfilepath, "rb") as fp:
-                fp.seek(fp_loc[i], os.SEEK_SET)
-                S["data"][i, :] = np.fromfile(fp, dtype=table[formatNew, 3], count=nsample).reshape(
-                    1, nsample, order="F"
-                )
-                # S['data'] = S['data'].swapaxes()
-        S["npoints"] = nsample
-    else:
-        S["data"] = np.asarray(data["strobe"][allIndexesWhereEventIsPresent[0]])
-        S["npoints"] = 1
-        S["channels"] = np.tile(1, (S["data"].shape[0],))
-
-    S["data"] = (S["data"].T).reshape(-1, order="F")
-
-    save_dict_to_hdf5(S, event, outputPath)
-
-    check_data(S, filepath, event, outputPath)
-
-    logger.info("Data for event {} fetched and stored.".format(event))
-
-
-# check if a particular element is there in an array or not
-def ismember(arr, element):
-    res = [1 if i == element else 0 for i in arr]
-    return np.asarray(res)
-
-
-# function to save data read from tev file to hdf5 file
-def save_dict_to_hdf5(S, event, outputPath):
-    write_hdf5(S["storename"], event, outputPath, "storename")
-    write_hdf5(S["sampling_rate"], event, outputPath, "sampling_rate")
-    write_hdf5(S["timestamps"], event, outputPath, "timestamps")
-
-    write_hdf5(S["data"], event, outputPath, "data")
-    write_hdf5(S["npoints"], event, outputPath, "npoints")
-    write_hdf5(S["channels"], event, outputPath, "channels")
-
-
-# function to check event data (checking whether event timestamps belongs to same event or multiple events)
-def check_data(S, filepath, event, outputPath):
-    # logger.info("Checking event storename data for creating multiple event names from single event storename...")
-    new_event = event.replace("\\", "")
-    new_event = event.replace("/", "")
-    diff = np.diff(S["data"])
-    arr = np.full(diff.shape[0], 1)
-
-    storesList = np.genfromtxt(os.path.join(outputPath, "storesList.csv"), dtype="str", delimiter=",").reshape(2, -1)
-
-    if diff.shape[0] == 0:
-        return 0
-
-    if S["sampling_rate"] == 0 and np.all(diff == diff[0]) == False:
-        logger.info("\033[1m" + "Data in event {} belongs to multiple behavior".format(event) + "\033[0m")
-        logger.debug(
-            "\033[1m" + "Create timestamp files for individual new event and change the stores list file." + "\033[0m"
-        )
-        i_d = np.unique(S["data"])
-        for i in range(i_d.shape[0]):
-            new_S = dict()
-            idx = np.where(S["data"] == i_d[i])[0]
-            new_S["timestamps"] = S["timestamps"][idx]
-            new_S["storename"] = new_event + str(int(i_d[i]))
-            new_S["sampling_rate"] = S["sampling_rate"]
-            new_S["data"] = S["data"]
-            new_S["npoints"] = S["npoints"]
-            new_S["channels"] = S["channels"]
-            storesList = np.concatenate(
-                (storesList, [[new_event + str(int(i_d[i]))], [new_event + "_" + str(int(i_d[i]))]]), axis=1
-            )
-            save_dict_to_hdf5(new_S, new_event + str(int(i_d[i])), outputPath)
-
-        idx = np.where(storesList[0] == event)[0]
-        storesList = np.delete(storesList, idx, axis=1)
-        if not os.path.exists(os.path.join(outputPath, ".cache_storesList.csv")):
-            os.rename(os.path.join(outputPath, "storesList.csv"), os.path.join(outputPath, ".cache_storesList.csv"))
-        if idx.shape[0] == 0:
-            pass
-        else:
-            np.savetxt(os.path.join(outputPath, "storesList.csv"), storesList, delimiter=",", fmt="%s")
-        logger.info(
-            "\033[1m"
-            + "Timestamp files for individual new event are created \
-	    			and the stores list file is changed."
-            + "\033[0m"
-        )

From b35e04b0db575f6ca72ea198d9db12bde06e6b68 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Wed, 3 Dec 2025 11:08:20 -0800
Subject: [PATCH 046/150] Refactored csv_recording_extractor to inherit from
 base_recording_extractor.

---
 .../extractors/csv_recording_extractor.py     | 35 ++++++++++++-------
 1 file changed, 22 insertions(+), 13 deletions(-)

diff --git a/src/guppy/extractors/csv_recording_extractor.py b/src/guppy/extractors/csv_recording_extractor.py
index 5a42bd1..792ad01 100644
--- a/src/guppy/extractors/csv_recording_extractor.py
+++ b/src/guppy/extractors/csv_recording_extractor.py
@@ -4,11 +4,12 @@
 import os
 import time
 from itertools import repeat
+from typing import Any
 
 import numpy as np
 import pandas as pd
 
-from guppy.common_step3 import write_hdf5
+from guppy.extractors import BaseRecordingExtractor
 
 logger = logging.getLogger(__name__)
 
@@ -29,7 +30,7 @@ def read_and_save_csv(extractor, event, outputPath):
     logger.info("Data for event {} fetched and stored.".format(event))
 
 
-class CsvRecordingExtractor:
+class CsvRecordingExtractor(BaseRecordingExtractor):
 
     def __init__(self, folder_path):
         self.folder_path = folder_path
@@ -58,7 +59,7 @@ def __init__(self, folder_path):
             ), "This file appears to be doric .csv. This function only supports standard .csv files."
             df = pd.read_csv(path[i], index_col=False)
 
-            _, value = self.check_header(df)
+            _, value = self._check_header(df)
 
             # check dataframe structure and read data accordingly
             if len(value) > 0:
@@ -121,10 +122,18 @@ def __init__(self, folder_path):
 
         logger.info("Importing of csv file is done.")
 
-        self.events = event_from_filename
-        self.flags = flag_arr
+        self._events = event_from_filename
+        self._flags = flag_arr
 
-    def check_header(self, df):
+    @property
+    def events(self) -> list[str]:
+        return self._events
+
+    @property
+    def flags(self) -> list:
+        return self._flags
+
+    def _check_header(self, df):
         arr = list(df.columns)
         check_float = []
         for i in arr:
@@ -135,7 +144,7 @@ def check_header(self, df):
 
         return arr, check_float
 
-    def read_csv(self, event):
+    def _read_csv(self, event):
         logger.debug("\033[1m" + "Trying to read data for {} from csv file.".format(event) + "\033[0m")
         if not os.path.exists(os.path.join(self.folder_path, event + ".csv")):
             logger.error("\033[1m" + "No csv file found for event {}".format(event) + "\033[0m")
@@ -144,7 +153,7 @@ def read_csv(self, event):
         df = pd.read_csv(os.path.join(self.folder_path, event + ".csv"), index_col=False)
         return df
 
-    def save_to_hdf5(self, df, event, outputPath):
+    def _save_to_hdf5(self, df, event, outputPath):
         key = list(df.columns)
 
         # TODO: clean up these if branches
@@ -175,21 +184,21 @@ def save_to_hdf5(self, df, event, outputPath):
             )
 
         for i in range(len(key)):
-            write_hdf5(df[key[i]].dropna(), event, outputPath, key[i].lower())
+            self._write_hdf5(df[key[i]].dropna(), event, outputPath, key[i].lower())
 
         logger.info("\033[1m" + "Reading data for {} from csv file is completed.".format(event) + "\033[0m")
 
-    def read(self, events, outputPath):
+    def read(self, *, events: list[str], outputPath: str, **kwargs) -> list[dict[str, Any]]:
         output_dicts = []
         for event in events:
-            df = self.read_csv(event=event)
+            df = self._read_csv(event=event)
             S = df.to_dict()
             S["storename"] = event
             output_dicts.append(S)
         return output_dicts
 
-    def save(self, output_dicts, outputPath):
+    def save(self, *, output_dicts: list[dict[str, Any]], outputPath: str, **kwargs) -> None:
         for S in output_dicts:
             event = S.pop("storename")
             df = pd.DataFrame.from_dict(S)
-            self.save_to_hdf5(df=df, event=event, outputPath=outputPath)
+            self._save_to_hdf5(df=df, event=event, outputPath=outputPath)

From b330a64b43e87ec20536e3cdfa815efcb3b7f054 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Wed, 3 Dec 2025 11:46:39 -0800
Subject: [PATCH 047/150] Refactored tdt_recording_extractor to inherit from
 base_recording_extractor.

---
 .../extractors/tdt_recording_extractor.py     | 84 +++++++++++--------
 src/guppy/saveStoresList.py                   | 30 ++-----
 2 files changed, 57 insertions(+), 57 deletions(-)

diff --git a/src/guppy/extractors/tdt_recording_extractor.py b/src/guppy/extractors/tdt_recording_extractor.py
index 58cde99..6e712fb 100644
--- a/src/guppy/extractors/tdt_recording_extractor.py
+++ b/src/guppy/extractors/tdt_recording_extractor.py
@@ -4,12 +4,13 @@
 import os
 import time
 from itertools import repeat
+from typing import Any
 
 import numpy as np
 import pandas as pd
 from numpy import float32, float64, int32, int64, uint16
 
-from guppy.common_step3 import write_hdf5
+from guppy.extractors import BaseRecordingExtractor
 
 logger = logging.getLogger(__name__)
 
@@ -27,13 +28,37 @@ def execute_readtev(folder_path, events, outputPath, numProcesses=mp.cpu_count()
     logger.info("Time taken = {0:.5f}".format(time.time() - start))
 
 
-class TdtRecordingExtractor:
+class TdtRecordingExtractor(BaseRecordingExtractor):
 
     def __init__(self, folder_path):
         self.folder_path = folder_path
-        self.header_df, _ = self.readtsq(folder_path)
+        self._header_df, _ = self._readtsq(folder_path)
+
+        # Populate events from header_df
+        if isinstance(self._header_df, pd.DataFrame):
+            self._header_df["name"] = np.asarray(self._header_df["name"], dtype=str)
+            allnames = np.unique(self._header_df["name"])
+            index = []
+            for i in range(len(allnames)):
+                length = len(str(allnames[i]))
+                if length < 4:
+                    index.append(i)
+            allnames = np.delete(allnames, index, 0)
+            self._events = list(allnames)
+        else:
+            self._events = []
+
+        self._flags = []
+
+    @property
+    def events(self) -> list[str]:
+        return self._events
 
-    def readtsq(self, folder_path):
+    @property
+    def flags(self) -> list:
+        return self._flags
+
+    def _readtsq(self, folder_path):
         logger.debug("Trying to read tsq file.")
         names = ("size", "type", "name", "chan", "sort_code", "timestamp", "fp_loc", "strobe", "format", "frequency")
         formats = (int32, int32, "S4", uint16, uint16, float64, int64, float64, int32, float32)
@@ -59,9 +84,8 @@ def readtsq(self, folder_path):
         logger.info("Data from tsq file fetched.")
         return df, flag
 
-    # function to read tev file
-    def readtev(self, event):
-        data = self.header_df
+    def _readtev(self, event):
+        data = self._header_df
         filepath = self.folder_path
 
         logger.debug("Reading data for event {} ...".format(event))
@@ -87,7 +111,7 @@ def readtev(self, event):
 
         # logger.info(allnames)
         # logger.info(eventNew)
-        row = self.ismember(data["name"], event)
+        row = self._ismember(data["name"], event)
 
         if sum(row) == 0:
             logger.error("\033[1m" + "Requested store name " + event + " not found (case-sensitive)." + "\033[0m")
@@ -141,24 +165,23 @@ def readtev(self, event):
 
         return S
 
-    def read(self, events, outputPath):
+    def read(self, *, events: list[str], outputPath: str, **kwargs) -> list[dict[str, Any]]:
         output_dicts = []
         for event in events:
-            S = self.readtev(event=event)
-            if self.event_needs_splitting(data=S["data"], sampling_rate=S["sampling_rate"]):
-                event_dicts = self.split_event_data(S, event)
-                self.split_event_storesList(S, event, outputPath)
+            S = self._readtev(event=event)
+            if self._event_needs_splitting(data=S["data"], sampling_rate=S["sampling_rate"]):
+                event_dicts = self._split_event_data(S, event)
+                self._split_event_storesList(S, event, outputPath)
             else:
                 event_dicts = [S]
             output_dicts.extend(event_dicts)
         return output_dicts
 
-    # check if a particular element is there in an array or not
-    def ismember(self, arr, element):  # TODO: replace this function with more standard usage
+    def _ismember(self, arr, element):
         res = [1 if i == element else 0 for i in arr]
         return np.asarray(res)
 
-    def event_needs_splitting(self, data, sampling_rate):
+    def _event_needs_splitting(self, data, sampling_rate):
         logger.info("Checking event storename data for creating multiple event names from single event storename...")
         diff = np.diff(data)
         if diff.shape[0] == 0:
@@ -167,7 +190,7 @@ def event_needs_splitting(self, data, sampling_rate):
             return True
         return False
 
-    def split_event_data(self, S, event):
+    def _split_event_data(self, S, event):
         # Note that new_event is only used for the new storesList and event is still used for the old storesList
         new_event = event.replace("\\", "")
         new_event = event.replace("/", "")
@@ -189,10 +212,7 @@ def split_event_data(self, S, event):
 
         return event_dicts
 
-    # This function saves a new storesList.csv file, which is a bit of a side effect in the overall read path,
-    # which is supposed to just return a list of dictionaries.
-    # TODO: long term I'd like to move these storesList shenanigans somewhere else, likely outside of the extractor.
-    def split_event_storesList(self, S, event, outputPath):
+    def _split_event_storesList(self, S, event, outputPath):
         # Note that new_event is only used for the new storesList and event is still used for the old storesList
         new_event = event.replace("\\", "")
         new_event = event.replace("/", "")
@@ -217,17 +237,15 @@ def split_event_storesList(self, S, event, outputPath):
             np.savetxt(os.path.join(outputPath, "storesList.csv"), storesList, delimiter=",", fmt="%s")
         logger.info("\033[1m The stores list file is changed.\033[0m")
 
-    # function to save data read from tev file to hdf5 file
-    def save_dict_to_hdf5(self, S, outputPath):
+    def _save_dict_to_hdf5(self, S, outputPath):
         event = S["storename"]
-        write_hdf5(S["storename"], event, outputPath, "storename")
-        write_hdf5(S["sampling_rate"], event, outputPath, "sampling_rate")
-        write_hdf5(S["timestamps"], event, outputPath, "timestamps")
-
-        write_hdf5(S["data"], event, outputPath, "data")
-        write_hdf5(S["npoints"], event, outputPath, "npoints")
-        write_hdf5(S["channels"], event, outputPath, "channels")
-
-    def save(self, output_dicts, outputPath):
+        self._write_hdf5(S["storename"], event, outputPath, "storename")
+        self._write_hdf5(S["sampling_rate"], event, outputPath, "sampling_rate")
+        self._write_hdf5(S["timestamps"], event, outputPath, "timestamps")
+        self._write_hdf5(S["data"], event, outputPath, "data")
+        self._write_hdf5(S["npoints"], event, outputPath, "npoints")
+        self._write_hdf5(S["channels"], event, outputPath, "channels")
+
+    def save(self, *, output_dicts: list[dict[str, Any]], outputPath: str, **kwargs) -> None:
         for S in output_dicts:
-            self.save_dict_to_hdf5(S=S, outputPath=outputPath)
+            self._save_dict_to_hdf5(S=S, outputPath=outputPath)
diff --git a/src/guppy/saveStoresList.py b/src/guppy/saveStoresList.py
index 552d76c..74602a5 100755
--- a/src/guppy/saveStoresList.py
+++ b/src/guppy/saveStoresList.py
@@ -80,7 +80,7 @@ def make_dir(filepath):
 
 
 # function to show GUI and save
-def saveStorenames(inputParameters, data, event_name, flag, filepath):
+def saveStorenames(inputParameters, event_name, flag, filepath):
 
     logger.debug("Saving stores list file.")
     # getting input parameters
@@ -96,20 +96,8 @@ def saveStorenames(inputParameters, data, event_name, flag, filepath):
         logger.info("Storeslist : \n" + str(arr))
         return
 
-    # reading storenames from the data fetched using 'readtsq' function
-    if isinstance(data, pd.DataFrame):
-        data["name"] = np.asarray(data["name"], dtype=str)
-        allnames = np.unique(data["name"])
-        index = []
-        for i in range(len(allnames)):
-            length = len(str(allnames[i]))
-            if length < 4:
-                index.append(i)
-        allnames = np.delete(allnames, index, 0)
-        allnames = list(allnames)
-
-    else:
-        allnames = []
+    # Get storenames from extractor's events property
+    allnames = event_name
 
     if "data_np_v2" in flag or "data_np" in flag or "event_np" in flag:
         path_chev = glob.glob(os.path.join(filepath, "*chev*"))
@@ -152,9 +140,6 @@ def plot(plot_select):
     else:
         pass
 
-    # finalizing all the storenames
-    allnames = allnames + event_name
-
     # instructions about how to save the storeslist file
     mark_down = pn.pane.Markdown(
         """
@@ -589,16 +574,14 @@ def execute(inputParameters):
             filepath = os.path.join(inputParameters["abspath"], i)
             if modality == "tdt":
                 extractor = TdtRecordingExtractor(folder_path=filepath)
-                data = extractor.header_df
-                event_name, flag = [], []
+                event_name = extractor.events
+                flag = extractor.flags
             elif modality == "csv":
-                data = 0
                 extractor = CsvRecordingExtractor(folder_path=filepath)
                 event_name = extractor.events
                 flag = extractor.flags
 
             elif modality == "doric":
-                data = 0
                 extractor = DoricRecordingExtractor(folder_path=filepath)
                 event_name = extractor.events
                 flag = extractor.flags
@@ -621,14 +604,13 @@ def execute(inputParameters):
                         npm_timestamp_column_names if npm_timestamp_column_names else None
                     )
 
-                data = 0
                 extractor = NpmRecordingExtractor(folder_path=filepath, num_ch=num_ch, inputParameters=inputParameters)
                 event_name = extractor.events
                 flag = extractor.flags
             else:
                 raise ValueError("Modality not recognized. Please use 'tdt', 'csv', 'doric', or 'npm'.")
 
-            saveStorenames(inputParameters, data, event_name, flag, filepath)
+            saveStorenames(inputParameters, event_name, flag, filepath)
         logger.info("#" * 400)
     except Exception as e:
         logger.error(str(e))

From 8af3b2be7e73eeaa326de65344bb36e8955f4207 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Wed, 3 Dec 2025 11:49:11 -0800
Subject: [PATCH 048/150] Updated parameter names for saveStoresList.

---
 src/guppy/saveStoresList.py | 62 ++++++++++++++++++++-----------------
 1 file changed, 33 insertions(+), 29 deletions(-)

diff --git a/src/guppy/saveStoresList.py b/src/guppy/saveStoresList.py
index 74602a5..318bc5f 100755
--- a/src/guppy/saveStoresList.py
+++ b/src/guppy/saveStoresList.py
@@ -80,7 +80,7 @@ def make_dir(filepath):
 
 
 # function to show GUI and save
-def saveStorenames(inputParameters, event_name, flag, filepath):
+def saveStorenames(inputParameters, events, flags, folder_path):
 
     logger.debug("Saving stores list file.")
     # getting input parameters
@@ -89,7 +89,7 @@ def saveStorenames(inputParameters, event_name, flag, filepath):
     # Headless path: if storenames_map provided, write storesList.csv without building the Panel UI
     storenames_map = inputParameters.get("storenames_map")
     if isinstance(storenames_map, dict) and len(storenames_map) > 0:
-        op = make_dir(filepath)
+        op = make_dir(folder_path)
         arr = np.asarray([list(storenames_map.keys()), list(storenames_map.values())], dtype=str)
         np.savetxt(os.path.join(op, "storesList.csv"), arr, delimiter=",", fmt="%s")
         logger.info(f"Storeslist file saved at {op}")
@@ -97,12 +97,12 @@ def saveStorenames(inputParameters, event_name, flag, filepath):
         return
 
     # Get storenames from extractor's events property
-    allnames = event_name
+    allnames = events
 
-    if "data_np_v2" in flag or "data_np" in flag or "event_np" in flag:
-        path_chev = glob.glob(os.path.join(filepath, "*chev*"))
-        path_chod = glob.glob(os.path.join(filepath, "*chod*"))
-        path_chpr = glob.glob(os.path.join(filepath, "*chpr*"))
+    if "data_np_v2" in flags or "data_np" in flags or "event_np" in flags:
+        path_chev = glob.glob(os.path.join(folder_path, "*chev*"))
+        path_chod = glob.glob(os.path.join(folder_path, "*chod*"))
+        path_chpr = glob.glob(os.path.join(folder_path, "*chpr*"))
         combine_paths = path_chev + path_chod + path_chpr
         d = dict()
         for i in range(len(combine_paths)):
@@ -179,7 +179,9 @@ def plot(plot_select):
     )
 
     # creating GUI template
-    template = pn.template.BootstrapTemplate(title="Storenames GUI - {}".format(os.path.basename(filepath), mark_down))
+    template = pn.template.BootstrapTemplate(
+        title="Storenames GUI - {}".format(os.path.basename(folder_path), mark_down)
+    )
 
     # creating different buttons and selectors for the GUI
     cross_selector = pn.widgets.CrossSelector(name="Store Names Selection", value=[], options=allnames, width=600)
@@ -253,10 +255,10 @@ def callback(target, event):
     # on clicking overwrite_button, following function is executed
     def overwrite_button_actions(event):
         if event.new == "over_write_file":
-            select_location.options = takeOnlyDirs(glob.glob(os.path.join(filepath, "*_output_*")))
+            select_location.options = takeOnlyDirs(glob.glob(os.path.join(folder_path, "*_output_*")))
             # select_location.value = select_location.options[0]
         else:
-            select_location.options = [show_dir(filepath)]
+            select_location.options = [show_dir(folder_path)]
             # select_location.value = select_location.options[0]
 
     def fetchValues(event):
@@ -513,8 +515,8 @@ def save_button(event=None):
     # creating widgets, adding them to template and showing a GUI on a new browser window
     number = scanPortsAndFind(start_port=5000, end_port=5200)
 
-    if "data_np_v2" in flag or "data_np" in flag or "event_np" in flag:
-        widget_1 = pn.Column("# " + os.path.basename(filepath), mark_down, mark_down_np, plot_select, plot)
+    if "data_np_v2" in flags or "data_np" in flags or "event_np" in flags:
+        widget_1 = pn.Column("# " + os.path.basename(folder_path), mark_down, mark_down_np, plot_select, plot)
         widget_2 = pn.Column(
             repeat_storenames,
             repeat_storename_wd,
@@ -535,7 +537,7 @@ def save_button(event=None):
         template.main.append(pn.Row(widget_1, widget_2))
 
     else:
-        widget_1 = pn.Column("# " + os.path.basename(filepath), mark_down)
+        widget_1 = pn.Column("# " + os.path.basename(folder_path), mark_down)
         widget_2 = pn.Column(
             repeat_storenames,
             repeat_storename_wd,
@@ -571,32 +573,32 @@ def execute(inputParameters):
 
     try:
         for i in folderNames:
-            filepath = os.path.join(inputParameters["abspath"], i)
+            folder_path = os.path.join(inputParameters["abspath"], i)
             if modality == "tdt":
-                extractor = TdtRecordingExtractor(folder_path=filepath)
-                event_name = extractor.events
-                flag = extractor.flags
+                extractor = TdtRecordingExtractor(folder_path=folder_path)
+                events = extractor.events
+                flags = extractor.flags
             elif modality == "csv":
-                extractor = CsvRecordingExtractor(folder_path=filepath)
-                event_name = extractor.events
-                flag = extractor.flags
+                extractor = CsvRecordingExtractor(folder_path=folder_path)
+                events = extractor.events
+                flags = extractor.flags
 
             elif modality == "doric":
-                extractor = DoricRecordingExtractor(folder_path=filepath)
-                event_name = extractor.events
-                flag = extractor.flags
+                extractor = DoricRecordingExtractor(folder_path=folder_path)
+                events = extractor.events
+                flags = extractor.flags
 
             elif modality == "npm":
                 headless = bool(os.environ.get("GUPPY_BASE_DIR"))
                 if not headless:
                     # Resolve multiple event TTLs
-                    multiple_event_ttls = NpmRecordingExtractor.has_multiple_event_ttls(folder_path=filepath)
+                    multiple_event_ttls = NpmRecordingExtractor.has_multiple_event_ttls(folder_path=folder_path)
                     responses = get_multi_event_responses(multiple_event_ttls)
                     inputParameters["npm_split_events"] = responses
 
                     # Resolve timestamp units and columns
                     ts_unit_needs, col_names_ts = NpmRecordingExtractor.needs_ts_unit(
-                        folder_path=filepath, num_ch=num_ch
+                        folder_path=folder_path, num_ch=num_ch
                     )
                     ts_units, npm_timestamp_column_names = get_timestamp_configuration(ts_unit_needs, col_names_ts)
                     inputParameters["npm_time_units"] = ts_units if ts_units else None
@@ -604,13 +606,15 @@ def execute(inputParameters):
                         npm_timestamp_column_names if npm_timestamp_column_names else None
                     )
 
-                extractor = NpmRecordingExtractor(folder_path=filepath, num_ch=num_ch, inputParameters=inputParameters)
-                event_name = extractor.events
-                flag = extractor.flags
+                extractor = NpmRecordingExtractor(
+                    folder_path=folder_path, num_ch=num_ch, inputParameters=inputParameters
+                )
+                events = extractor.events
+                flags = extractor.flags
             else:
                 raise ValueError("Modality not recognized. Please use 'tdt', 'csv', 'doric', or 'npm'.")
 
-            saveStorenames(inputParameters, event_name, flag, filepath)
+            saveStorenames(inputParameters, events, flags, folder_path)
         logger.info("#" * 400)
     except Exception as e:
         logger.error(str(e))

From 5dc6d78626a796bb2fde267ec9996b372f040eba Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Wed, 3 Dec 2025 13:22:03 -0800
Subject: [PATCH 049/150] Refactored npm_recording_extractor to inherit from
 base_recording_extractor.

---
 .../extractors/npm_recording_extractor.py     | 35 ++++++++++++-------
 1 file changed, 22 insertions(+), 13 deletions(-)

diff --git a/src/guppy/extractors/npm_recording_extractor.py b/src/guppy/extractors/npm_recording_extractor.py
index ae4f540..6d9b26a 100644
--- a/src/guppy/extractors/npm_recording_extractor.py
+++ b/src/guppy/extractors/npm_recording_extractor.py
@@ -4,12 +4,13 @@
 import os
 import time
 from itertools import repeat
+from typing import Any
 
 import numpy as np
 import pandas as pd
 import panel as pn
 
-from guppy.common_step3 import write_hdf5
+from guppy.extractors import BaseRecordingExtractor
 
 pn.extension()
 
@@ -32,16 +33,24 @@ def read_and_save_npm(extractor, event, outputPath):
     logger.info("Data for event {} fetched and stored.".format(event))
 
 
-class NpmRecordingExtractor:
+class NpmRecordingExtractor(BaseRecordingExtractor):
 
     def __init__(self, folder_path, num_ch, inputParameters=None):  # TODO: make inputParameters mandatory
         self.folder_path = folder_path
         self.num_ch = num_ch
         self.inputParameters = inputParameters
-        self.events, self.flags = self.import_npm(
+        self._events, self._flags = self._import_npm(
             folder_path=folder_path, num_ch=num_ch, inputParameters=inputParameters
         )
 
+    @property
+    def events(self) -> list[str]:
+        return self._events
+
+    @property
+    def flags(self) -> list:
+        return self._flags
+
     @classmethod
     def has_multiple_event_ttls(cls, folder_path):
         path = sorted(glob.glob(os.path.join(folder_path, "*.csv")))
@@ -96,7 +105,7 @@ def has_multiple_event_ttls(cls, folder_path):
 
         return multiple_event_ttls
 
-    def import_npm(self, folder_path, num_ch, inputParameters=None):
+    def _import_npm(self, folder_path, num_ch, inputParameters=None):
 
         logger.debug("If it exists, importing NPM file based on the structure of file")
         # Headless configuration (used to avoid any UI prompts when running tests)
@@ -233,7 +242,7 @@ def import_npm(self, folder_path, num_ch, inputParameters=None):
             else:
                 file = f"file{str(i)}_"
                 ts_unit = npm_time_unit
-                df = self.update_df_with_timestamp_columns(df, timestamp_column_name=npm_timestamp_column_name)
+                df = self._update_df_with_timestamp_columns(df, timestamp_column_name=npm_timestamp_column_name)
                 df, indices_dict, _ = self.decide_indices(file, df, flag)
                 keys = list(indices_dict.keys())
                 for k in range(len(keys)):
@@ -467,7 +476,7 @@ def needs_ts_unit(cls, folder_path, num_ch):
 
         return ts_unit_needs, col_names_ts
 
-    def update_df_with_timestamp_columns(self, df, timestamp_column_name):
+    def _update_df_with_timestamp_columns(self, df, timestamp_column_name):
         col_names = np.array(list(df.columns))
         col_names_ts = [""]
         for name in col_names:
@@ -484,7 +493,7 @@ def update_df_with_timestamp_columns(self, df, timestamp_column_name):
         df = df.drop(col_names_ts[1:], axis=1)
         return df
 
-    def read_npm(self, event):
+    def _read_npm(self, event):
         logger.debug("\033[1m" + "Trying to read data for {} from csv file.".format(event) + "\033[0m")
         if not os.path.exists(os.path.join(self.folder_path, event + ".csv")):
             logger.error("\033[1m" + "No csv file found for event {}".format(event) + "\033[0m")
@@ -493,7 +502,7 @@ def read_npm(self, event):
         df = pd.read_csv(os.path.join(self.folder_path, event + ".csv"), index_col=False)
         return df
 
-    def save_to_hdf5(self, df, event, outputPath):
+    def _save_to_hdf5(self, df, event, outputPath):
         key = list(df.columns)
 
         # TODO: clean up these if branches
@@ -524,21 +533,21 @@ def save_to_hdf5(self, df, event, outputPath):
             )
 
         for i in range(len(key)):
-            write_hdf5(df[key[i]].dropna(), event, outputPath, key[i].lower())
+            self._write_hdf5(data=df[key[i]].dropna(), storename=event, output_path=outputPath, key=key[i].lower())
 
         logger.info("\033[1m" + "Reading data for {} from csv file is completed.".format(event) + "\033[0m")
 
-    def read(self, events, outputPath):
+    def read(self, *, events: list[str], outputPath: str, **kwargs) -> list[dict[str, Any]]:
         output_dicts = []
         for event in events:
-            df = self.read_npm(event=event)
+            df = self._read_npm(event=event)
             S = df.to_dict()
             S["storename"] = event
             output_dicts.append(S)
         return output_dicts
 
-    def save(self, output_dicts, outputPath):
+    def save(self, *, output_dicts: list[dict[str, Any]], outputPath: str, **kwargs) -> None:
         for S in output_dicts:
             event = S.pop("storename")
             df = pd.DataFrame.from_dict(S)
-            self.save_to_hdf5(df=df, event=event, outputPath=outputPath)
+            self._save_to_hdf5(df=df, event=event, outputPath=outputPath)

From 861e991cbacaf8165d3023b33ada9961e0f14424 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Wed, 3 Dec 2025 13:44:03 -0800
Subject: [PATCH 050/150] Refactored doric_recording_extractor to inherit from
 base_recording_extractor.

---
 .../extractors/doric_recording_extractor.py   | 128 ++++++++++--------
 1 file changed, 72 insertions(+), 56 deletions(-)

diff --git a/src/guppy/extractors/doric_recording_extractor.py b/src/guppy/extractors/doric_recording_extractor.py
index 2966ec6..51c22ca 100644
--- a/src/guppy/extractors/doric_recording_extractor.py
+++ b/src/guppy/extractors/doric_recording_extractor.py
@@ -3,23 +3,28 @@
 import os
 import re
 import warnings
+from typing import Any
 
 import h5py
 import numpy as np
 import pandas as pd
 
-from guppy.common_step3 import write_hdf5
+from guppy.extractors import BaseRecordingExtractor
 
 logger = logging.getLogger(__name__)
 
 
 def execute_import_doric(folder_path, storesList, outputPath):
+    # Parse storesList into events and event_types
+    events = list(storesList[0, :])
+    event_types = {storesList[0, i]: storesList[1, i] for i in range(storesList.shape[1])}
+
     extractor = DoricRecordingExtractor(folder_path=folder_path)
-    output_dicts = extractor.read(storesList=storesList)
+    output_dicts = extractor.read(events=events, outputPath=outputPath, event_types=event_types)
     extractor.save(output_dicts=output_dicts, outputPath=outputPath)
 
 
-class DoricRecordingExtractor:
+class DoricRecordingExtractor(BaseRecordingExtractor):
     # TODO: consolidate duplicate flag logic between the `__init__` and the `check_doric` method.
 
     def __init__(self, folder_path):
@@ -36,7 +41,7 @@ def __init__(self, folder_path):
         for i in range(len(path)):
             ext = os.path.basename(path[i]).split(".")[-1]
             if ext == "doric":
-                key_names = self.read_doric(path[i])
+                key_names = self._read_doric(path[i])
                 event_from_filename.extend(key_names)
                 flag = "doric_doric"
             else:
@@ -59,26 +64,34 @@ def __init__(self, folder_path):
                 logger.info(flag)
         logger.info("Importing of Doric file is done.")
 
-        self.events = event_from_filename
-        self.flags = flag_arr
+        self._events = event_from_filename
+        self._flags = flag_arr
+
+    @property
+    def events(self) -> list[str]:
+        return self._events
+
+    @property
+    def flags(self) -> list:
+        return self._flags
 
-    def read_doric(self, filepath):
+    def _read_doric(self, filepath):
         with h5py.File(filepath, "r") as f:
             if "Traces" in list(f.keys()):
-                keys = self.access_keys_doricV1(f)
+                keys = self._access_keys_doricV1(f)
             elif list(f.keys()) == ["Configurations", "DataAcquisition"]:
-                keys = self.access_keys_doricV6(f)
+                keys = self._access_keys_doricV6(f)
 
         return keys
 
-    def access_keys_doricV6(self, doric_file):
+    def _access_keys_doricV6(self, doric_file):
         data = [doric_file["DataAcquisition"]]
         res = []
         while len(data) != 0:
             members = len(data)
             while members != 0:
                 members -= 1
-                data, last_element = self.separate_last_element(data)
+                data, last_element = self._separate_last_element(data)
                 if isinstance(last_element, h5py.Dataset) and not last_element.name.endswith("/Time"):
                     res.append(last_element.name)
                 elif isinstance(last_element, h5py.Group):
@@ -94,17 +107,17 @@ def access_keys_doricV6(self, doric_file):
 
         return keys
 
-    def access_keys_doricV1(self, doric_file):
+    def _access_keys_doricV1(self, doric_file):
         keys = list(doric_file["Traces"]["Console"].keys())
         keys.remove("Time(s)")
 
         return keys
 
-    def separate_last_element(self, arr):
+    def _separate_last_element(self, arr):
         l = arr[-1]
         return arr[:-1], l
 
-    def check_doric(self):
+    def _check_doric(self):
         logger.debug("Checking if doric file exists")
         path = glob.glob(os.path.join(self.folder_path, "*.csv")) + glob.glob(os.path.join(self.folder_path, "*.doric"))
 
@@ -135,7 +148,7 @@ def check_doric(self):
         logger.info("Doric file found.")
         return flag_arr[0]
 
-    def read_doric_csv(self, storesList):
+    def _read_doric_csv(self, events, event_types):
         path = glob.glob(os.path.join(self.folder_path, "*.csv"))
         if len(path) > 1:
             logger.error("An error occurred : More than one Doric csv file present at the location")
@@ -147,45 +160,46 @@ def read_doric_csv(self, storesList):
         df["Time(s)"] = df["Time(s)"] - df["Time(s)"].to_numpy()[0]
 
         output_dicts = []
-        for i in range(storesList.shape[1]):
-            if "control" in storesList[1, i] or "signal" in storesList[1, i]:
+        for event in events:
+            event_type = event_types[event]
+            if "control" in event_type or "signal" in event_type:
                 timestamps = np.array(df["Time(s)"])
                 sampling_rate = np.array([1 / (timestamps[-1] - timestamps[-2])])
-                data = np.array(df[storesList[0, i]])
-                storename = storesList[0, i]
+                data = np.array(df[event])
+                storename = event
                 S = {"storename": storename, "sampling_rate": sampling_rate, "timestamps": timestamps, "data": data}
                 output_dicts.append(S)
             else:
-                ttl = df[storesList[0, i]]
+                ttl = df[event]
                 indices = np.where(ttl <= 0)[0]
                 diff_indices = np.where(np.diff(indices) > 1)[0]
                 timestamps = df["Time(s)"][indices[diff_indices] + 1].to_numpy()
-                storename = storesList[0, i]
+                storename = event
                 S = {"storename": storename, "timestamps": timestamps}
                 output_dicts.append(S)
 
         return output_dicts
 
-    def read_doric_doric(self, storesList):
+    def _read_doric_doric(self, events, event_types):
         path = glob.glob(os.path.join(self.folder_path, "*.doric"))
         if len(path) > 1:
             logger.error("An error occurred : More than one Doric file present at the location")
             raise Exception("More than one Doric file present at the location")
         with h5py.File(path[0], "r") as f:
             if "Traces" in list(f.keys()):
-                output_dicts = self.access_data_doricV1(f, storesList)
+                output_dicts = self._access_data_doricV1(f, events, event_types)
             elif list(f.keys()) == ["Configurations", "DataAcquisition"]:
-                output_dicts = self.access_data_doricV6(f, storesList)
+                output_dicts = self._access_data_doricV6(f, events, event_types)
         return output_dicts
 
-    def access_data_doricV6(self, doric_file, storesList):
+    def _access_data_doricV6(self, doric_file, events, event_types):
         data = [doric_file["DataAcquisition"]]
         res = []
         while len(data) != 0:
             members = len(data)
             while members != 0:
                 members -= 1
-                data, last_element = self.separate_last_element(data)
+                data, last_element = self._separate_last_element(data)
                 if isinstance(last_element, h5py.Dataset) and not last_element.name.endswith("/Time"):
                     res.append(last_element.name)
                 elif isinstance(last_element, h5py.Group):
@@ -195,16 +209,17 @@ def access_data_doricV6(self, doric_file, storesList):
         for element in res:
             sep_values = element.split("/")
             if sep_values[-1] == "Values":
-                if f"{sep_values[-3]}/{sep_values[-2]}" in storesList[0, :]:
+                if f"{sep_values[-3]}/{sep_values[-2]}" in events:
                     decide_path.append(element)
             else:
-                if f"{sep_values[-2]}/{sep_values[-1]}" in storesList[0, :]:
+                if f"{sep_values[-2]}/{sep_values[-1]}" in events:
                     decide_path.append(element)
 
         output_dicts = []
-        for i in range(storesList.shape[1]):
-            if "control" in storesList[1, i] or "signal" in storesList[1, i]:
-                regex = re.compile("(.*?)" + str(storesList[0, i]) + "(.*?)")
+        for event in events:
+            event_type = event_types[event]
+            if "control" in event_type or "signal" in event_type:
+                regex = re.compile("(.*?)" + str(event) + "(.*?)")
                 idx = [i for i in range(len(decide_path)) if regex.match(decide_path[i])]
                 if len(idx) > 1:
                     logger.error("More than one string matched (which should not be the case)")
@@ -213,11 +228,11 @@ def access_data_doricV6(self, doric_file, storesList):
                 data = np.array(doric_file[decide_path[idx]])
                 timestamps = np.array(doric_file[decide_path[idx].rsplit("/", 1)[0] + "/Time"])
                 sampling_rate = np.array([1 / (timestamps[-1] - timestamps[-2])])
-                storename = storesList[0, i]
+                storename = event
                 S = {"storename": storename, "sampling_rate": sampling_rate, "timestamps": timestamps, "data": data}
                 output_dicts.append(S)
             else:
-                regex = re.compile("(.*?)" + storesList[0, i] + "$")
+                regex = re.compile("(.*?)" + event + "$")
                 idx = [i for i in range(len(decide_path)) if regex.match(decide_path[i])]
                 if len(idx) > 1:
                     logger.error("More than one string matched (which should not be the case)")
@@ -228,56 +243,57 @@ def access_data_doricV6(self, doric_file, storesList):
                 indices = np.where(ttl <= 0)[0]
                 diff_indices = np.where(np.diff(indices) > 1)[0]
                 timestamps = timestamps[indices[diff_indices] + 1]
-                storename = storesList[0, i]
+                storename = event
                 S = {"storename": storename, "timestamps": timestamps}
                 output_dicts.append(S)
 
         return output_dicts
 
-    def access_data_doricV1(self, doric_file, storesList):
+    def _access_data_doricV1(self, doric_file, events, event_types):
         keys = list(doric_file["Traces"]["Console"].keys())
         output_dicts = []
-        for i in range(storesList.shape[1]):
-            if "control" in storesList[1, i] or "signal" in storesList[1, i]:
+        for event in events:
+            event_type = event_types[event]
+            if "control" in event_type or "signal" in event_type:
                 timestamps = np.array(doric_file["Traces"]["Console"]["Time(s)"]["Console_time(s)"])
                 sampling_rate = np.array([1 / (timestamps[-1] - timestamps[-2])])
-                data = np.array(doric_file["Traces"]["Console"][storesList[0, i]][storesList[0, i]])
-                storename = storesList[0, i]
+                data = np.array(doric_file["Traces"]["Console"][event][event])
+                storename = event
                 S = {"storename": storename, "sampling_rate": sampling_rate, "timestamps": timestamps, "data": data}
                 output_dicts.append(S)
             else:
                 timestamps = np.array(doric_file["Traces"]["Console"]["Time(s)"]["Console_time(s)"])
-                ttl = np.array(doric_file["Traces"]["Console"][storesList[0, i]][storesList[0, i]])
+                ttl = np.array(doric_file["Traces"]["Console"][event][event])
                 indices = np.where(ttl <= 0)[0]
                 diff_indices = np.where(np.diff(indices) > 1)[0]
                 timestamps = timestamps[indices[diff_indices] + 1]
-                storename = storesList[0, i]
+                storename = event
                 S = {"storename": storename, "timestamps": timestamps}
                 output_dicts.append(S)
 
         return output_dicts
 
-    def save_dict_to_hdf5(self, S, outputPath):
-        event = S["storename"]
-        write_hdf5(S["timestamps"], event, outputPath, "timestamps")
-
-        if "sampling_rate" in S:
-            write_hdf5(S["sampling_rate"], event, outputPath, "sampling_rate")
-        if "data" in S:
-            write_hdf5(S["data"], event, outputPath, "data")
-
-    def read(self, storesList):
-        flag = self.check_doric()
+    def read(self, *, events: list[str], outputPath: str, **kwargs) -> list[dict[str, Any]]:
+        event_types = kwargs["event_types"]
+        flag = self._check_doric()
         if flag == "doric_csv":
-            output_dicts = self.read_doric_csv(storesList)
+            output_dicts = self._read_doric_csv(events, event_types)
         elif flag == "doric_doric":
-            output_dicts = self.read_doric_doric(storesList)
+            output_dicts = self._read_doric_doric(events, event_types)
         else:
             logger.error("Doric file not found or not recognized.")
             raise FileNotFoundError("Doric file not found or not recognized.")
 
         return output_dicts
 
-    def save(self, output_dicts, outputPath):
+    def save(self, *, output_dicts: list[dict[str, Any]], outputPath: str, **kwargs) -> None:
         for S in output_dicts:
-            self.save_dict_to_hdf5(S=S, outputPath=outputPath)
+            storename = S["storename"]
+            self._write_hdf5(data=S["timestamps"], storename=storename, output_path=outputPath, key="timestamps")
+
+            if "sampling_rate" in S:
+                self._write_hdf5(
+                    data=S["sampling_rate"], storename=storename, output_path=outputPath, key="sampling_rate"
+                )
+            if "data" in S:
+                self._write_hdf5(data=S["data"], storename=storename, output_path=outputPath, key="data")

From dd40cb4aa0629e539322e100c908f05509acdd07 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Wed, 3 Dec 2025 14:23:53 -0800
Subject: [PATCH 051/150] Refactored doric_recording_extractor to use class
 method for events and flags.

---
 .../extractors/doric_recording_extractor.py   | 87 ++++++++++++-------
 src/guppy/saveStoresList.py                   |  4 +-
 2 files changed, 56 insertions(+), 35 deletions(-)

diff --git a/src/guppy/extractors/doric_recording_extractor.py b/src/guppy/extractors/doric_recording_extractor.py
index 51c22ca..f67e3f1 100644
--- a/src/guppy/extractors/doric_recording_extractor.py
+++ b/src/guppy/extractors/doric_recording_extractor.py
@@ -15,33 +15,48 @@
 
 
 def execute_import_doric(folder_path, storesList, outputPath):
-    # Parse storesList into events and event_types
     events = list(storesList[0, :])
-    event_types = {storesList[0, i]: storesList[1, i] for i in range(storesList.shape[1])}
+    event_name_to_event_type = {storesList[0, i]: storesList[1, i] for i in range(storesList.shape[1])}
 
-    extractor = DoricRecordingExtractor(folder_path=folder_path)
-    output_dicts = extractor.read(events=events, outputPath=outputPath, event_types=event_types)
+    extractor = DoricRecordingExtractor(folder_path=folder_path, event_name_to_event_type=event_name_to_event_type)
+    output_dicts = extractor.read(events=events, outputPath=outputPath)
     extractor.save(output_dicts=output_dicts, outputPath=outputPath)
 
 
 class DoricRecordingExtractor(BaseRecordingExtractor):
     # TODO: consolidate duplicate flag logic between the `__init__` and the `check_doric` method.
 
-    def __init__(self, folder_path):
-        self.folder_path = folder_path
-        logger.debug("If it exists, importing Doric file based on the structure of file")
-        path = sorted(glob.glob(os.path.join(self.folder_path, "*.csv"))) + sorted(
-            glob.glob(os.path.join(self.folder_path, "*.doric"))
+    @classmethod
+    def discover_events_and_flags(cls, folder_path):
+        """
+        Discover available events and file format flags from Doric files.
+
+        Parameters
+        ----------
+        folder_path : str
+            Path to the folder containing Doric files
+
+        Returns
+        -------
+        events : list
+            List of discovered event names
+        flags : list
+            List of format flags (e.g., 'doric_csv', 'doric_doric')
+        """
+        logger.debug("Discovering Doric events from file headers")
+        path = sorted(glob.glob(os.path.join(folder_path, "*.csv"))) + sorted(
+            glob.glob(os.path.join(folder_path, "*.doric"))
         )
 
         path = sorted(list(set(path)))
         flag = "None"
         event_from_filename = []
         flag_arr = []
+
         for i in range(len(path)):
             ext = os.path.basename(path[i]).split(".")[-1]
             if ext == "doric":
-                key_names = self._read_doric(path[i])
+                key_names = cls._read_doric_file(path[i])
                 event_from_filename.extend(key_names)
                 flag = "doric_doric"
             else:
@@ -62,10 +77,14 @@ def __init__(self, folder_path):
                 event_from_filename.extend(list(df.columns))
                 flag = "doric_csv"
                 logger.info(flag)
-        logger.info("Importing of Doric file is done.")
 
-        self._events = event_from_filename
-        self._flags = flag_arr
+        logger.info("Doric event discovery complete.")
+        return event_from_filename, flag_arr
+
+    def __init__(self, folder_path, event_name_to_event_type):
+        self.folder_path = folder_path
+        self._event_name_to_event_type = event_name_to_event_type
+        self._events, self._flags = self.discover_events_and_flags(folder_path)
 
     @property
     def events(self) -> list[str]:
@@ -75,23 +94,26 @@ def events(self) -> list[str]:
     def flags(self) -> list:
         return self._flags
 
-    def _read_doric(self, filepath):
+    @staticmethod
+    def _read_doric_file(filepath):
+        """Static helper to read Doric file headers for event discovery."""
         with h5py.File(filepath, "r") as f:
             if "Traces" in list(f.keys()):
-                keys = self._access_keys_doricV1(f)
+                keys = DoricRecordingExtractor._access_keys_doricV1(f)
             elif list(f.keys()) == ["Configurations", "DataAcquisition"]:
-                keys = self._access_keys_doricV6(f)
+                keys = DoricRecordingExtractor._access_keys_doricV6(f)
 
         return keys
 
-    def _access_keys_doricV6(self, doric_file):
+    @staticmethod
+    def _access_keys_doricV6(doric_file):
         data = [doric_file["DataAcquisition"]]
         res = []
         while len(data) != 0:
             members = len(data)
             while members != 0:
                 members -= 1
-                data, last_element = self._separate_last_element(data)
+                data, last_element = DoricRecordingExtractor._separate_last_element(data)
                 if isinstance(last_element, h5py.Dataset) and not last_element.name.endswith("/Time"):
                     res.append(last_element.name)
                 elif isinstance(last_element, h5py.Group):
@@ -107,13 +129,15 @@ def _access_keys_doricV6(self, doric_file):
 
         return keys
 
-    def _access_keys_doricV1(self, doric_file):
+    @staticmethod
+    def _access_keys_doricV1(doric_file):
         keys = list(doric_file["Traces"]["Console"].keys())
         keys.remove("Time(s)")
 
         return keys
 
-    def _separate_last_element(self, arr):
+    @staticmethod
+    def _separate_last_element(arr):
         l = arr[-1]
         return arr[:-1], l
 
@@ -148,7 +172,7 @@ def _check_doric(self):
         logger.info("Doric file found.")
         return flag_arr[0]
 
-    def _read_doric_csv(self, events, event_types):
+    def _read_doric_csv(self, events):
         path = glob.glob(os.path.join(self.folder_path, "*.csv"))
         if len(path) > 1:
             logger.error("An error occurred : More than one Doric csv file present at the location")
@@ -161,7 +185,7 @@ def _read_doric_csv(self, events, event_types):
 
         output_dicts = []
         for event in events:
-            event_type = event_types[event]
+            event_type = self._event_name_to_event_type[event]
             if "control" in event_type or "signal" in event_type:
                 timestamps = np.array(df["Time(s)"])
                 sampling_rate = np.array([1 / (timestamps[-1] - timestamps[-2])])
@@ -180,19 +204,19 @@ def _read_doric_csv(self, events, event_types):
 
         return output_dicts
 
-    def _read_doric_doric(self, events, event_types):
+    def _read_doric_doric(self, events):
         path = glob.glob(os.path.join(self.folder_path, "*.doric"))
         if len(path) > 1:
             logger.error("An error occurred : More than one Doric file present at the location")
             raise Exception("More than one Doric file present at the location")
         with h5py.File(path[0], "r") as f:
             if "Traces" in list(f.keys()):
-                output_dicts = self._access_data_doricV1(f, events, event_types)
+                output_dicts = self._access_data_doricV1(f, events)
             elif list(f.keys()) == ["Configurations", "DataAcquisition"]:
-                output_dicts = self._access_data_doricV6(f, events, event_types)
+                output_dicts = self._access_data_doricV6(f, events)
         return output_dicts
 
-    def _access_data_doricV6(self, doric_file, events, event_types):
+    def _access_data_doricV6(self, doric_file, events):
         data = [doric_file["DataAcquisition"]]
         res = []
         while len(data) != 0:
@@ -217,7 +241,7 @@ def _access_data_doricV6(self, doric_file, events, event_types):
 
         output_dicts = []
         for event in events:
-            event_type = event_types[event]
+            event_type = self._event_name_to_event_type[event]
             if "control" in event_type or "signal" in event_type:
                 regex = re.compile("(.*?)" + str(event) + "(.*?)")
                 idx = [i for i in range(len(decide_path)) if regex.match(decide_path[i])]
@@ -249,11 +273,11 @@ def _access_data_doricV6(self, doric_file, events, event_types):
 
         return output_dicts
 
-    def _access_data_doricV1(self, doric_file, events, event_types):
+    def _access_data_doricV1(self, doric_file, events):
         keys = list(doric_file["Traces"]["Console"].keys())
         output_dicts = []
         for event in events:
-            event_type = event_types[event]
+            event_type = self._event_name_to_event_type[event]
             if "control" in event_type or "signal" in event_type:
                 timestamps = np.array(doric_file["Traces"]["Console"]["Time(s)"]["Console_time(s)"])
                 sampling_rate = np.array([1 / (timestamps[-1] - timestamps[-2])])
@@ -274,12 +298,11 @@ def _access_data_doricV1(self, doric_file, events, event_types):
         return output_dicts
 
     def read(self, *, events: list[str], outputPath: str, **kwargs) -> list[dict[str, Any]]:
-        event_types = kwargs["event_types"]
         flag = self._check_doric()
         if flag == "doric_csv":
-            output_dicts = self._read_doric_csv(events, event_types)
+            output_dicts = self._read_doric_csv(events)
         elif flag == "doric_doric":
-            output_dicts = self._read_doric_doric(events, event_types)
+            output_dicts = self._read_doric_doric(events)
         else:
             logger.error("Doric file not found or not recognized.")
             raise FileNotFoundError("Doric file not found or not recognized.")
diff --git a/src/guppy/saveStoresList.py b/src/guppy/saveStoresList.py
index 318bc5f..acc62f4 100755
--- a/src/guppy/saveStoresList.py
+++ b/src/guppy/saveStoresList.py
@@ -584,9 +584,7 @@ def execute(inputParameters):
                 flags = extractor.flags
 
             elif modality == "doric":
-                extractor = DoricRecordingExtractor(folder_path=folder_path)
-                events = extractor.events
-                flags = extractor.flags
+                events, flags = DoricRecordingExtractor.discover_events_and_flags(folder_path=folder_path)
 
             elif modality == "npm":
                 headless = bool(os.environ.get("GUPPY_BASE_DIR"))

From 4619964733e040b64f375254dce8d6dde99d94d1 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Wed, 3 Dec 2025 16:23:19 -0800
Subject: [PATCH 052/150] Refactored Extractors to use class method
 discover_events and flags instead of properties.

---
 .../extractors/base_recording_extractor.py    |  25 +--
 .../extractors/csv_recording_extractor.py     |  40 +++--
 .../extractors/npm_recording_extractor.py     | 166 +++++++++---------
 .../extractors/tdt_recording_extractor.py     |  47 +++--
 src/guppy/readTevTsq.py                       |   2 +-
 src/guppy/saveStoresList.py                   |  12 +-
 6 files changed, 151 insertions(+), 141 deletions(-)

diff --git a/src/guppy/extractors/base_recording_extractor.py b/src/guppy/extractors/base_recording_extractor.py
index 7058a0a..76d4f3c 100644
--- a/src/guppy/extractors/base_recording_extractor.py
+++ b/src/guppy/extractors/base_recording_extractor.py
@@ -16,29 +16,18 @@ class BaseRecordingExtractor(ABC):
     data from various acquisition formats (TDT, Doric, CSV, NPM, etc.).
     """
 
-    @property
+    @classmethod
     @abstractmethod
-    def events(self) -> list[str]:
+    def discover_events_and_flags(cls) -> tuple[list[str], list[str]]:
         """
-        List of available event/store names in the data.
+        Discover available events and format flags from data files.
 
         Returns
         -------
-        list of str
-            Names of all events or stores available in the dataset.
-        """
-        pass
-
-    @property
-    @abstractmethod
-    def flags(self) -> list:
-        """
-        Format indicators or file type flags.
-
-        Returns
-        -------
-        list
-            Flags indicating file types or data formats.
+        events : list of str
+            Names of all events/stores available in the dataset.
+        flags : list of str
+            Format indicators or file type flags.
         """
         pass
 
diff --git a/src/guppy/extractors/csv_recording_extractor.py b/src/guppy/extractors/csv_recording_extractor.py
index 792ad01..41ee7ab 100644
--- a/src/guppy/extractors/csv_recording_extractor.py
+++ b/src/guppy/extractors/csv_recording_extractor.py
@@ -32,11 +32,25 @@ def read_and_save_csv(extractor, event, outputPath):
 
 class CsvRecordingExtractor(BaseRecordingExtractor):
 
-    def __init__(self, folder_path):
-        self.folder_path = folder_path
-
+    @classmethod
+    def discover_events_and_flags(cls, folder_path) -> tuple[list[str], list[str]]:
+        """
+        Discover available events and format flags from CSV files.
+
+        Parameters
+        ----------
+        folder_path : str
+            Path to the folder containing CSV files.
+
+        Returns
+        -------
+        events : list of str
+            Names of all events/stores available in the dataset.
+        flags : list of str
+            Format indicators or file type flags.
+        """
         logger.debug("If it exists, importing either NPM or Doric or csv file based on the structure of file")
-        path = sorted(glob.glob(os.path.join(self.folder_path, "*.csv")))
+        path = sorted(glob.glob(os.path.join(folder_path, "*.csv")))
 
         path = sorted(list(set(path)))
         flag = "None"
@@ -59,7 +73,7 @@ def __init__(self, folder_path):
             ), "This file appears to be doric .csv. This function only supports standard .csv files."
             df = pd.read_csv(path[i], index_col=False)
 
-            _, value = self._check_header(df)
+            _, value = cls._check_header(df)
 
             # check dataframe structure and read data accordingly
             if len(value) > 0:
@@ -121,19 +135,13 @@ def __init__(self, folder_path):
             event_from_filename.append(name)
 
         logger.info("Importing of csv file is done.")
+        return event_from_filename, flag_arr
 
-        self._events = event_from_filename
-        self._flags = flag_arr
-
-    @property
-    def events(self) -> list[str]:
-        return self._events
-
-    @property
-    def flags(self) -> list:
-        return self._flags
+    def __init__(self, folder_path):
+        self.folder_path = folder_path
 
-    def _check_header(self, df):
+    @staticmethod
+    def _check_header(df):
         arr = list(df.columns)
         check_float = []
         for i in arr:
diff --git a/src/guppy/extractors/npm_recording_extractor.py b/src/guppy/extractors/npm_recording_extractor.py
index 6d9b26a..110ba56 100644
--- a/src/guppy/extractors/npm_recording_extractor.py
+++ b/src/guppy/extractors/npm_recording_extractor.py
@@ -17,10 +17,10 @@
 logger = logging.getLogger(__name__)
 
 
-def execute_import_npm(folder_path, num_ch, inputParameters, events, outputPath, numProcesses=mp.cpu_count()):
+def execute_import_npm(folder_path, events, outputPath, numProcesses=mp.cpu_count()):
     logger.info("Reading data for event {} ...".format(events))
 
-    extractor = NpmRecordingExtractor(folder_path=folder_path, num_ch=num_ch, inputParameters=inputParameters)
+    extractor = NpmRecordingExtractor(folder_path=folder_path)
     start = time.time()
     with mp.Pool(numProcesses) as p:
         p.starmap(read_and_save_npm, zip(repeat(extractor), events, repeat(outputPath)))
@@ -35,81 +35,29 @@ def read_and_save_npm(extractor, event, outputPath):
 
 class NpmRecordingExtractor(BaseRecordingExtractor):
 
-    def __init__(self, folder_path, num_ch, inputParameters=None):  # TODO: make inputParameters mandatory
-        self.folder_path = folder_path
-        self.num_ch = num_ch
-        self.inputParameters = inputParameters
-        self._events, self._flags = self._import_npm(
-            folder_path=folder_path, num_ch=num_ch, inputParameters=inputParameters
-        )
-
-    @property
-    def events(self) -> list[str]:
-        return self._events
-
-    @property
-    def flags(self) -> list:
-        return self._flags
-
+    # TODO: make inputParameters mandatory
     @classmethod
-    def has_multiple_event_ttls(cls, folder_path):
-        path = sorted(glob.glob(os.path.join(folder_path, "*.csv")))
-        path_chev = glob.glob(os.path.join(folder_path, "*chev*"))
-        path_chod = glob.glob(os.path.join(folder_path, "*chod*"))
-        path_chpr = glob.glob(os.path.join(folder_path, "*chpr*"))
-        path_event = glob.glob(os.path.join(folder_path, "event*"))
-        path_chev_chod_event = path_chev + path_chod + path_event + path_chpr
-
-        path = sorted(list(set(path) - set(path_chev_chod_event)))
-        multiple_event_ttls = []
-        for i in range(len(path)):
-            df = pd.read_csv(path[i], index_col=False)
-            _, value = cls.check_header(df)
-
-            # check dataframe structure and read data accordingly
-            if len(value) > 0:
-                columns_isstr = False
-                df = pd.read_csv(path[i], header=None)
-                cols = np.array(list(df.columns), dtype=str)
-            else:
-                columns_isstr = True
-                cols = np.array(list(df.columns), dtype=str)
-            if len(cols) == 2:
-                flag = "event_or_data_np"
-            elif len(cols) > 2:
-                flag = "data_np"
-            else:
-                logger.error("Number of columns in csv file does not make sense.")
-                raise Exception("Number of columns in csv file does not make sense.")
-
-            # used assigned flags to process the files and read the data
-            if flag == "event_or_data_np":
-                arr = list(df.iloc[:, 1])
-                check_float = [True for i in arr if isinstance(i, float)]
-                if len(arr) == len(check_float) and columns_isstr == False:
-                    flag = "data_np"
-                elif columns_isstr == True and ("value" in np.char.lower(np.array(cols))):
-                    flag = "event_np"
-                else:
-                    flag = "event_np"
-
-            if flag == "event_np":
-                type_val = np.array(df.iloc[:, 1])
-                type_val_unique = np.unique(type_val)
-                if len(type_val_unique) > 1:
-                    multiple_event_ttls.append(True)
-                else:
-                    multiple_event_ttls.append(False)
-            else:
-                multiple_event_ttls.append(False)
-
-        return multiple_event_ttls
-
-    def _import_npm(self, folder_path, num_ch, inputParameters=None):
-
+    def discover_events_and_flags(cls, folder_path, num_ch, inputParameters=None) -> tuple[list[str], list[str]]:
+        """
+        Discover available events and format flags from NPM files.
+
+        Parameters
+        ----------
+        folder_path : str
+            Path to the folder containing NPM files.
+        num_ch : int
+            Number of channels in the recording.
+        inputParameters : dict, optional
+            Input parameters containing NPM-specific configuration.
+
+        Returns
+        -------
+        events : list of str
+            Names of all events/stores available in the dataset.
+        flags : list of str
+            Format indicators or file type flags.
+        """
         logger.debug("If it exists, importing NPM file based on the structure of file")
-        # Headless configuration (used to avoid any UI prompts when running tests)
-        headless = bool(os.environ.get("GUPPY_BASE_DIR"))
         if isinstance(inputParameters, dict):
             npm_timestamp_column_names = inputParameters.get("npm_timestamp_column_names")
             npm_time_units = inputParameters.get("npm_time_units")
@@ -160,7 +108,7 @@ def _import_npm(self, folder_path, num_ch, inputParameters=None):
                 df_arr
             ), "This file appears to be doric .csv. This function only supports NPM .csv files."
             df = pd.read_csv(path[i], index_col=False)
-            _, value = self.check_header(df)
+            _, value = cls.check_header(df)
 
             # check dataframe structure and read data accordingly
             if len(value) > 0:
@@ -204,7 +152,7 @@ def _import_npm(self, folder_path, num_ch, inputParameters=None):
             logger.info(flag)
             if flag == "data_np":
                 file = f"file{str(i)}_"
-                df, indices_dict, _ = self.decide_indices(file, df, flag, num_ch)
+                df, indices_dict, _ = cls.decide_indices(file, df, flag, num_ch)
                 keys = list(indices_dict.keys())
                 for k in range(len(keys)):
                     for j in range(df.shape[1]):
@@ -242,8 +190,8 @@ def _import_npm(self, folder_path, num_ch, inputParameters=None):
             else:
                 file = f"file{str(i)}_"
                 ts_unit = npm_time_unit
-                df = self._update_df_with_timestamp_columns(df, timestamp_column_name=npm_timestamp_column_name)
-                df, indices_dict, _ = self.decide_indices(file, df, flag)
+                df = cls._update_df_with_timestamp_columns(df, timestamp_column_name=npm_timestamp_column_name)
+                df, indices_dict, _ = cls.decide_indices(file, df, flag)
                 keys = list(indices_dict.keys())
                 for k in range(len(keys)):
                     for j in range(df.shape[1]):
@@ -326,6 +274,63 @@ def _import_npm(self, folder_path, num_ch, inputParameters=None):
         logger.info("Importing of NPM file is done.")
         return event_from_filename, flag_arr
 
+    def __init__(self, folder_path):
+        self.folder_path = folder_path
+
+    @classmethod
+    def has_multiple_event_ttls(cls, folder_path):
+        path = sorted(glob.glob(os.path.join(folder_path, "*.csv")))
+        path_chev = glob.glob(os.path.join(folder_path, "*chev*"))
+        path_chod = glob.glob(os.path.join(folder_path, "*chod*"))
+        path_chpr = glob.glob(os.path.join(folder_path, "*chpr*"))
+        path_event = glob.glob(os.path.join(folder_path, "event*"))
+        path_chev_chod_event = path_chev + path_chod + path_event + path_chpr
+
+        path = sorted(list(set(path) - set(path_chev_chod_event)))
+        multiple_event_ttls = []
+        for i in range(len(path)):
+            df = pd.read_csv(path[i], index_col=False)
+            _, value = cls.check_header(df)
+
+            # check dataframe structure and read data accordingly
+            if len(value) > 0:
+                columns_isstr = False
+                df = pd.read_csv(path[i], header=None)
+                cols = np.array(list(df.columns), dtype=str)
+            else:
+                columns_isstr = True
+                cols = np.array(list(df.columns), dtype=str)
+            if len(cols) == 2:
+                flag = "event_or_data_np"
+            elif len(cols) > 2:
+                flag = "data_np"
+            else:
+                logger.error("Number of columns in csv file does not make sense.")
+                raise Exception("Number of columns in csv file does not make sense.")
+
+            # used assigned flags to process the files and read the data
+            if flag == "event_or_data_np":
+                arr = list(df.iloc[:, 1])
+                check_float = [True for i in arr if isinstance(i, float)]
+                if len(arr) == len(check_float) and columns_isstr == False:
+                    flag = "data_np"
+                elif columns_isstr == True and ("value" in np.char.lower(np.array(cols))):
+                    flag = "event_np"
+                else:
+                    flag = "event_np"
+
+            if flag == "event_np":
+                type_val = np.array(df.iloc[:, 1])
+                type_val_unique = np.unique(type_val)
+                if len(type_val_unique) > 1:
+                    multiple_event_ttls.append(True)
+                else:
+                    multiple_event_ttls.append(False)
+            else:
+                multiple_event_ttls.append(False)
+
+        return multiple_event_ttls
+
     @classmethod
     def check_header(cls, df):
         arr = list(df.columns)
@@ -476,7 +481,8 @@ def needs_ts_unit(cls, folder_path, num_ch):
 
         return ts_unit_needs, col_names_ts
 
-    def _update_df_with_timestamp_columns(self, df, timestamp_column_name):
+    @staticmethod
+    def _update_df_with_timestamp_columns(df, timestamp_column_name):
         col_names = np.array(list(df.columns))
         col_names_ts = [""]
         for name in col_names:
diff --git a/src/guppy/extractors/tdt_recording_extractor.py b/src/guppy/extractors/tdt_recording_extractor.py
index 6e712fb..949c9ec 100644
--- a/src/guppy/extractors/tdt_recording_extractor.py
+++ b/src/guppy/extractors/tdt_recording_extractor.py
@@ -30,35 +30,48 @@ def execute_readtev(folder_path, events, outputPath, numProcesses=mp.cpu_count()
 
 class TdtRecordingExtractor(BaseRecordingExtractor):
 
-    def __init__(self, folder_path):
-        self.folder_path = folder_path
-        self._header_df, _ = self._readtsq(folder_path)
+    @classmethod
+    def discover_events_and_flags(cls, folder_path) -> tuple[list[str], list[str]]:
+        """
+        Discover available events and format flags from TDT files.
+
+        Parameters
+        ----------
+        folder_path : str
+            Path to the folder containing TDT files.
+
+        Returns
+        -------
+        events : list of str
+            Names of all events/stores available in the dataset.
+        flags : list of str
+            Format indicators or file type flags.
+        """
+        header_df, _ = cls._readtsq(folder_path)
 
         # Populate events from header_df
-        if isinstance(self._header_df, pd.DataFrame):
-            self._header_df["name"] = np.asarray(self._header_df["name"], dtype=str)
-            allnames = np.unique(self._header_df["name"])
+        if isinstance(header_df, pd.DataFrame):
+            header_df["name"] = np.asarray(header_df["name"], dtype=str)
+            allnames = np.unique(header_df["name"])
             index = []
             for i in range(len(allnames)):
                 length = len(str(allnames[i]))
                 if length < 4:
                     index.append(i)
             allnames = np.delete(allnames, index, 0)
-            self._events = list(allnames)
+            events = list(allnames)
         else:
-            self._events = []
-
-        self._flags = []
+            events = []
 
-    @property
-    def events(self) -> list[str]:
-        return self._events
+        flags = []
+        return events, flags
 
-    @property
-    def flags(self) -> list:
-        return self._flags
+    def __init__(self, folder_path):
+        self.folder_path = folder_path
+        self._header_df, _ = self._readtsq(folder_path)
 
-    def _readtsq(self, folder_path):
+    @staticmethod
+    def _readtsq(folder_path):
         logger.debug("Trying to read tsq file.")
         names = ("size", "type", "name", "chan", "sort_code", "timestamp", "fp_loc", "strobe", "format", "frequency")
         formats = (int32, int32, "S4", uint16, uint16, float64, int64, float64, int32, float32)
diff --git a/src/guppy/readTevTsq.py b/src/guppy/readTevTsq.py
index f2c9419..2ae0c59 100755
--- a/src/guppy/readTevTsq.py
+++ b/src/guppy/readTevTsq.py
@@ -80,7 +80,7 @@ def readRawData(inputParameters):
             elif modality == "csv":
                 execute_import_csv(filepath, events, op, numProcesses)
             elif modality == "npm":
-                execute_import_npm(filepath, num_ch, inputParameters, events, op, numProcesses)
+                execute_import_npm(filepath, events, op, numProcesses)
             else:
                 raise ValueError("Modality not recognized. Please use 'tdt', 'csv', 'doric', or 'npm'.")
 
diff --git a/src/guppy/saveStoresList.py b/src/guppy/saveStoresList.py
index acc62f4..20a5c94 100755
--- a/src/guppy/saveStoresList.py
+++ b/src/guppy/saveStoresList.py
@@ -575,13 +575,9 @@ def execute(inputParameters):
         for i in folderNames:
             folder_path = os.path.join(inputParameters["abspath"], i)
             if modality == "tdt":
-                extractor = TdtRecordingExtractor(folder_path=folder_path)
-                events = extractor.events
-                flags = extractor.flags
+                events, flags = TdtRecordingExtractor.discover_events_and_flags(folder_path=folder_path)
             elif modality == "csv":
-                extractor = CsvRecordingExtractor(folder_path=folder_path)
-                events = extractor.events
-                flags = extractor.flags
+                events, flags = CsvRecordingExtractor.discover_events_and_flags(folder_path=folder_path)
 
             elif modality == "doric":
                 events, flags = DoricRecordingExtractor.discover_events_and_flags(folder_path=folder_path)
@@ -604,11 +600,9 @@ def execute(inputParameters):
                         npm_timestamp_column_names if npm_timestamp_column_names else None
                     )
 
-                extractor = NpmRecordingExtractor(
+                events, flags = NpmRecordingExtractor.discover_events_and_flags(
                     folder_path=folder_path, num_ch=num_ch, inputParameters=inputParameters
                 )
-                events = extractor.events
-                flags = extractor.flags
             else:
                 raise ValueError("Modality not recognized. Please use 'tdt', 'csv', 'doric', or 'npm'.")
 

From beb585fab80a38d728f354364d91a7875680db0b Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Wed, 3 Dec 2025 16:34:09 -0800
Subject: [PATCH 053/150] Refactored Extractors to use class method
 discover_events and flags instead of properties.

---
 src/guppy/extractors/doric_recording_extractor.py | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/src/guppy/extractors/doric_recording_extractor.py b/src/guppy/extractors/doric_recording_extractor.py
index f67e3f1..dd0ecdd 100644
--- a/src/guppy/extractors/doric_recording_extractor.py
+++ b/src/guppy/extractors/doric_recording_extractor.py
@@ -84,15 +84,6 @@ def discover_events_and_flags(cls, folder_path):
     def __init__(self, folder_path, event_name_to_event_type):
         self.folder_path = folder_path
         self._event_name_to_event_type = event_name_to_event_type
-        self._events, self._flags = self.discover_events_and_flags(folder_path)
-
-    @property
-    def events(self) -> list[str]:
-        return self._events
-
-    @property
-    def flags(self) -> list:
-        return self._flags
 
     @staticmethod
     def _read_doric_file(filepath):

From 1b5e8ca6b4ea454636978e295eab8ca70a38027e Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Wed, 3 Dec 2025 16:42:36 -0800
Subject: [PATCH 054/150] Added comment about discover_events_and_flags
 signature

---
 src/guppy/extractors/base_recording_extractor.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/guppy/extractors/base_recording_extractor.py b/src/guppy/extractors/base_recording_extractor.py
index 76d4f3c..c71297b 100644
--- a/src/guppy/extractors/base_recording_extractor.py
+++ b/src/guppy/extractors/base_recording_extractor.py
@@ -29,6 +29,11 @@ def discover_events_and_flags(cls) -> tuple[list[str], list[str]]:
         flags : list of str
             Format indicators or file type flags.
         """
+        # NOTE: This method signature is intentionally minimal and flexible.
+        # Different formats have different discovery requirements:
+        # - TDT/CSV/Doric: need only folder_path parameter
+        # - NPM: needs folder_path, num_ch, and optional inputParameters for interleaved channels
+        # Each child class defines its own signature with the parameters it needs.
         pass
 
     @abstractmethod

From 2e38ee8afd6b5c061c0f4618ba606619b1ce142c Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Wed, 3 Dec 2025 16:46:28 -0800
Subject: [PATCH 055/150] Removed unused quarks.

---
 src/guppy/extractors/base_recording_extractor.py  | 8 ++------
 src/guppy/extractors/csv_recording_extractor.py   | 4 ++--
 src/guppy/extractors/doric_recording_extractor.py | 4 ++--
 src/guppy/extractors/npm_recording_extractor.py   | 4 ++--
 src/guppy/extractors/tdt_recording_extractor.py   | 4 ++--
 5 files changed, 10 insertions(+), 14 deletions(-)

diff --git a/src/guppy/extractors/base_recording_extractor.py b/src/guppy/extractors/base_recording_extractor.py
index c71297b..839c3db 100644
--- a/src/guppy/extractors/base_recording_extractor.py
+++ b/src/guppy/extractors/base_recording_extractor.py
@@ -37,7 +37,7 @@ def discover_events_and_flags(cls) -> tuple[list[str], list[str]]:
         pass
 
     @abstractmethod
-    def read(self, *, events: list[str], outputPath: str, **kwargs) -> list[dict[str, Any]]:
+    def read(self, *, events: list[str], outputPath: str) -> list[dict[str, Any]]:
         """
         Read data from source files for specified events.
 
@@ -47,8 +47,6 @@ def read(self, *, events: list[str], outputPath: str, **kwargs) -> list[dict[str
             List of event/store names to extract from the data.
         outputPath : str
             Path to the output directory.
-        **kwargs
-            Additional extractor-specific parameters.
 
         Returns
         -------
@@ -60,7 +58,7 @@ def read(self, *, events: list[str], outputPath: str, **kwargs) -> list[dict[str
         pass
 
     @abstractmethod
-    def save(self, *, output_dicts: list[dict[str, Any]], outputPath: str, **kwargs) -> None:
+    def save(self, *, output_dicts: list[dict[str, Any]], outputPath: str) -> None:
         """
         Save extracted data dictionaries to HDF5 format.
 
@@ -70,8 +68,6 @@ def save(self, *, output_dicts: list[dict[str, Any]], outputPath: str, **kwargs)
             List of data dictionaries from read().
         outputPath : str
             Path to the output directory.
-        **kwargs
-            Additional extractor-specific parameters.
         """
         pass
 
diff --git a/src/guppy/extractors/csv_recording_extractor.py b/src/guppy/extractors/csv_recording_extractor.py
index 41ee7ab..d74cfde 100644
--- a/src/guppy/extractors/csv_recording_extractor.py
+++ b/src/guppy/extractors/csv_recording_extractor.py
@@ -196,7 +196,7 @@ def _save_to_hdf5(self, df, event, outputPath):
 
         logger.info("\033[1m" + "Reading data for {} from csv file is completed.".format(event) + "\033[0m")
 
-    def read(self, *, events: list[str], outputPath: str, **kwargs) -> list[dict[str, Any]]:
+    def read(self, *, events: list[str], outputPath: str) -> list[dict[str, Any]]:
         output_dicts = []
         for event in events:
             df = self._read_csv(event=event)
@@ -205,7 +205,7 @@ def read(self, *, events: list[str], outputPath: str, **kwargs) -> list[dict[str
             output_dicts.append(S)
         return output_dicts
 
-    def save(self, *, output_dicts: list[dict[str, Any]], outputPath: str, **kwargs) -> None:
+    def save(self, *, output_dicts: list[dict[str, Any]], outputPath: str) -> None:
         for S in output_dicts:
             event = S.pop("storename")
             df = pd.DataFrame.from_dict(S)
diff --git a/src/guppy/extractors/doric_recording_extractor.py b/src/guppy/extractors/doric_recording_extractor.py
index dd0ecdd..62a8586 100644
--- a/src/guppy/extractors/doric_recording_extractor.py
+++ b/src/guppy/extractors/doric_recording_extractor.py
@@ -288,7 +288,7 @@ def _access_data_doricV1(self, doric_file, events):
 
         return output_dicts
 
-    def read(self, *, events: list[str], outputPath: str, **kwargs) -> list[dict[str, Any]]:
+    def read(self, *, events: list[str], outputPath: str) -> list[dict[str, Any]]:
         flag = self._check_doric()
         if flag == "doric_csv":
             output_dicts = self._read_doric_csv(events)
@@ -300,7 +300,7 @@ def read(self, *, events: list[str], outputPath: str, **kwargs) -> list[dict[str
 
         return output_dicts
 
-    def save(self, *, output_dicts: list[dict[str, Any]], outputPath: str, **kwargs) -> None:
+    def save(self, *, output_dicts: list[dict[str, Any]], outputPath: str) -> None:
         for S in output_dicts:
             storename = S["storename"]
             self._write_hdf5(data=S["timestamps"], storename=storename, output_path=outputPath, key="timestamps")
diff --git a/src/guppy/extractors/npm_recording_extractor.py b/src/guppy/extractors/npm_recording_extractor.py
index 110ba56..e3042c2 100644
--- a/src/guppy/extractors/npm_recording_extractor.py
+++ b/src/guppy/extractors/npm_recording_extractor.py
@@ -543,7 +543,7 @@ def _save_to_hdf5(self, df, event, outputPath):
 
         logger.info("\033[1m" + "Reading data for {} from csv file is completed.".format(event) + "\033[0m")
 
-    def read(self, *, events: list[str], outputPath: str, **kwargs) -> list[dict[str, Any]]:
+    def read(self, *, events: list[str], outputPath: str) -> list[dict[str, Any]]:
         output_dicts = []
         for event in events:
             df = self._read_npm(event=event)
@@ -552,7 +552,7 @@ def read(self, *, events: list[str], outputPath: str, **kwargs) -> list[dict[str
             output_dicts.append(S)
         return output_dicts
 
-    def save(self, *, output_dicts: list[dict[str, Any]], outputPath: str, **kwargs) -> None:
+    def save(self, *, output_dicts: list[dict[str, Any]], outputPath: str) -> None:
         for S in output_dicts:
             event = S.pop("storename")
             df = pd.DataFrame.from_dict(S)
diff --git a/src/guppy/extractors/tdt_recording_extractor.py b/src/guppy/extractors/tdt_recording_extractor.py
index 949c9ec..a877a8b 100644
--- a/src/guppy/extractors/tdt_recording_extractor.py
+++ b/src/guppy/extractors/tdt_recording_extractor.py
@@ -178,7 +178,7 @@ def _readtev(self, event):
 
         return S
 
-    def read(self, *, events: list[str], outputPath: str, **kwargs) -> list[dict[str, Any]]:
+    def read(self, *, events: list[str], outputPath: str) -> list[dict[str, Any]]:
         output_dicts = []
         for event in events:
             S = self._readtev(event=event)
@@ -259,6 +259,6 @@ def _save_dict_to_hdf5(self, S, outputPath):
         self._write_hdf5(S["npoints"], event, outputPath, "npoints")
         self._write_hdf5(S["channels"], event, outputPath, "channels")
 
-    def save(self, *, output_dicts: list[dict[str, Any]], outputPath: str, **kwargs) -> None:
+    def save(self, *, output_dicts: list[dict[str, Any]], outputPath: str) -> None:
         for S in output_dicts:
             self._save_dict_to_hdf5(S=S, outputPath=outputPath)

From cdecf428d97d4db8e36dbc9cd44510c7d529016f Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Wed, 3 Dec 2025 16:51:26 -0800
Subject: [PATCH 056/150] Refactored NpmRecordingExtractor to inherit from
 CsvRecordingExtractor.

---
 .../extractors/npm_recording_extractor.py     | 87 ++-----------------
 1 file changed, 7 insertions(+), 80 deletions(-)

diff --git a/src/guppy/extractors/npm_recording_extractor.py b/src/guppy/extractors/npm_recording_extractor.py
index e3042c2..68d13f7 100644
--- a/src/guppy/extractors/npm_recording_extractor.py
+++ b/src/guppy/extractors/npm_recording_extractor.py
@@ -4,13 +4,12 @@
 import os
 import time
 from itertools import repeat
-from typing import Any
 
 import numpy as np
 import pandas as pd
 import panel as pn
 
-from guppy.extractors import BaseRecordingExtractor
+from guppy.extractors import CsvRecordingExtractor
 
 pn.extension()
 
@@ -33,7 +32,9 @@ def read_and_save_npm(extractor, event, outputPath):
     logger.info("Data for event {} fetched and stored.".format(event))
 
 
-class NpmRecordingExtractor(BaseRecordingExtractor):
+class NpmRecordingExtractor(CsvRecordingExtractor):
+    # Inherits from CsvRecordingExtractor to reuse identical read/save logic.
+    # Only overrides discover_events_and_flags() and adds NPM-specific helper methods.
 
     # TODO: make inputParameters mandatory
     @classmethod
@@ -108,7 +109,7 @@ def discover_events_and_flags(cls, folder_path, num_ch, inputParameters=None) ->
                 df_arr
             ), "This file appears to be doric .csv. This function only supports NPM .csv files."
             df = pd.read_csv(path[i], index_col=False)
-            _, value = cls.check_header(df)
+            _, value = cls._check_header(df)
 
             # check dataframe structure and read data accordingly
             if len(value) > 0:
@@ -274,9 +275,6 @@ def discover_events_and_flags(cls, folder_path, num_ch, inputParameters=None) ->
         logger.info("Importing of NPM file is done.")
         return event_from_filename, flag_arr
 
-    def __init__(self, folder_path):
-        self.folder_path = folder_path
-
     @classmethod
     def has_multiple_event_ttls(cls, folder_path):
         path = sorted(glob.glob(os.path.join(folder_path, "*.csv")))
@@ -290,7 +288,7 @@ def has_multiple_event_ttls(cls, folder_path):
         multiple_event_ttls = []
         for i in range(len(path)):
             df = pd.read_csv(path[i], index_col=False)
-            _, value = cls.check_header(df)
+            _, value = cls._check_header(df)
 
             # check dataframe structure and read data accordingly
             if len(value) > 0:
@@ -331,18 +329,6 @@ def has_multiple_event_ttls(cls, folder_path):
 
         return multiple_event_ttls
 
-    @classmethod
-    def check_header(cls, df):
-        arr = list(df.columns)
-        check_float = []
-        for i in arr:
-            try:
-                check_float.append(float(i))
-            except:
-                pass
-
-        return arr, check_float
-
     # function to decide indices of interleaved channels
     # in neurophotometrics data
     @classmethod
@@ -426,7 +412,7 @@ def needs_ts_unit(cls, folder_path, num_ch):
         col_names_ts = [""]
         for i in range(len(path)):
             df = pd.read_csv(path[i], index_col=False)
-            _, value = cls.check_header(df)
+            _, value = cls._check_header(df)
 
             # check dataframe structure and read data accordingly
             if len(value) > 0:
@@ -498,62 +484,3 @@ def _update_df_with_timestamp_columns(df, timestamp_column_name):
         df.insert(1, "Timestamp", df[timestamp_column_name])
         df = df.drop(col_names_ts[1:], axis=1)
         return df
-
-    def _read_npm(self, event):
-        logger.debug("\033[1m" + "Trying to read data for {} from csv file.".format(event) + "\033[0m")
-        if not os.path.exists(os.path.join(self.folder_path, event + ".csv")):
-            logger.error("\033[1m" + "No csv file found for event {}".format(event) + "\033[0m")
-            raise Exception("\033[1m" + "No csv file found for event {}".format(event) + "\033[0m")
-
-        df = pd.read_csv(os.path.join(self.folder_path, event + ".csv"), index_col=False)
-        return df
-
-    def _save_to_hdf5(self, df, event, outputPath):
-        key = list(df.columns)
-
-        # TODO: clean up these if branches
-        if len(key) == 3:
-            arr1 = np.array(["timestamps", "data", "sampling_rate"])
-            arr2 = np.char.lower(np.array(key))
-            if (np.sort(arr1) == np.sort(arr2)).all() == False:
-                logger.error("\033[1m" + "Column names should be timestamps, data and sampling_rate" + "\033[0m")
-                raise Exception("\033[1m" + "Column names should be timestamps, data and sampling_rate" + "\033[0m")
-
-        if len(key) == 1:
-            if key[0].lower() != "timestamps":
-                logger.error("\033[1m" + "Column names should be timestamps, data and sampling_rate" + "\033[0m")
-                raise Exception("\033[1m" + "Column name should be timestamps" + "\033[0m")
-
-        if len(key) != 3 and len(key) != 1:
-            logger.error(
-                "\033[1m"
-                + "Number of columns in csv file should be either three or one. Three columns if \
-                            the file is for control or signal data or one column if the file is for event TTLs."
-                + "\033[0m"
-            )
-            raise Exception(
-                "\033[1m"
-                + "Number of columns in csv file should be either three or one. Three columns if \
-                            the file is for control or signal data or one column if the file is for event TTLs."
-                + "\033[0m"
-            )
-
-        for i in range(len(key)):
-            self._write_hdf5(data=df[key[i]].dropna(), storename=event, output_path=outputPath, key=key[i].lower())
-
-        logger.info("\033[1m" + "Reading data for {} from csv file is completed.".format(event) + "\033[0m")
-
-    def read(self, *, events: list[str], outputPath: str) -> list[dict[str, Any]]:
-        output_dicts = []
-        for event in events:
-            df = self._read_npm(event=event)
-            S = df.to_dict()
-            S["storename"] = event
-            output_dicts.append(S)
-        return output_dicts
-
-    def save(self, *, output_dicts: list[dict[str, Any]], outputPath: str) -> None:
-        for S in output_dicts:
-            event = S.pop("storename")
-            df = pd.DataFrame.from_dict(S)
-            self._save_to_hdf5(df=df, event=event, outputPath=outputPath)

From d43670ffa39f5a7668867b0f21307d99bd240c48 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Wed, 3 Dec 2025 17:41:09 -0800
Subject: [PATCH 057/150] Updated TODO

---
 src/guppy/extractors/doric_recording_extractor.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/guppy/extractors/doric_recording_extractor.py b/src/guppy/extractors/doric_recording_extractor.py
index 62a8586..13f7fdb 100644
--- a/src/guppy/extractors/doric_recording_extractor.py
+++ b/src/guppy/extractors/doric_recording_extractor.py
@@ -24,7 +24,7 @@ def execute_import_doric(folder_path, storesList, outputPath):
 
 
 class DoricRecordingExtractor(BaseRecordingExtractor):
-    # TODO: consolidate duplicate flag logic between the `__init__` and the `check_doric` method.
+    # TODO: consolidate duplicate flag logic between the `discover_events_and_flags` and the `check_doric` method.
 
     @classmethod
     def discover_events_and_flags(cls, folder_path):

From cd245a165ba8afea06780fbd12e007f33a99f218 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Wed, 3 Dec 2025 18:01:33 -0800
Subject: [PATCH 058/150] Centralized read_and_save_all_events and
 read_and_save_event functions into the base_recording_extractor and removed
 all duplicates.

---
 src/guppy/extractors/__init__.py              | 10 ++++-----
 .../extractors/base_recording_extractor.py    | 21 +++++++++++++++++++
 .../extractors/csv_recording_extractor.py     | 19 -----------------
 .../extractors/doric_recording_extractor.py   |  9 --------
 .../extractors/npm_recording_extractor.py     | 19 -----------------
 .../extractors/tdt_recording_extractor.py     | 16 --------------
 src/guppy/readTevTsq.py                       | 21 ++++++++++++-------
 7 files changed, 39 insertions(+), 76 deletions(-)

diff --git a/src/guppy/extractors/__init__.py b/src/guppy/extractors/__init__.py
index 75933c7..ca2fbe0 100644
--- a/src/guppy/extractors/__init__.py
+++ b/src/guppy/extractors/__init__.py
@@ -1,5 +1,5 @@
-from .base_recording_extractor import BaseRecordingExtractor
-from .tdt_recording_extractor import TdtRecordingExtractor, execute_readtev
-from .csv_recording_extractor import CsvRecordingExtractor, execute_import_csv
-from .doric_recording_extractor import DoricRecordingExtractor, execute_import_doric
-from .npm_recording_extractor import NpmRecordingExtractor, execute_import_npm
+from .base_recording_extractor import BaseRecordingExtractor, read_and_save_event, read_and_save_all_events
+from .tdt_recording_extractor import TdtRecordingExtractor
+from .csv_recording_extractor import CsvRecordingExtractor
+from .doric_recording_extractor import DoricRecordingExtractor
+from .npm_recording_extractor import NpmRecordingExtractor
diff --git a/src/guppy/extractors/base_recording_extractor.py b/src/guppy/extractors/base_recording_extractor.py
index 839c3db..a8f274b 100644
--- a/src/guppy/extractors/base_recording_extractor.py
+++ b/src/guppy/extractors/base_recording_extractor.py
@@ -1,12 +1,18 @@
 """Base class for recording extractors."""
 
+import logging
+import multiprocessing as mp
 import os
+import time
 from abc import ABC, abstractmethod
+from itertools import repeat
 from typing import Any
 
 import h5py
 import numpy as np
 
+logger = logging.getLogger(__name__)
+
 
 class BaseRecordingExtractor(ABC):
     """
@@ -116,3 +122,18 @@ def _write_hdf5(data: Any, storename: str, output_path: str, key: str) -> None:
                         f.create_dataset(key, data=data, maxshape=(None,), chunks=True)
                     else:
                         f.create_dataset(key, data=data)
+
+
+def read_and_save_event(extractor, event, outputPath):
+    output_dicts = extractor.read(events=[event], outputPath=outputPath)
+    extractor.save(output_dicts=output_dicts, outputPath=outputPath)
+    logger.info("Data for event {} fetched and stored.".format(event))
+
+
+def read_and_save_all_events(extractor, events, outputPath, numProcesses=mp.cpu_count()):
+    logger.info("Reading data for event {} ...".format(events))
+
+    start = time.time()
+    with mp.Pool(numProcesses) as p:
+        p.starmap(read_and_save_event, zip(repeat(extractor), events, repeat(outputPath)))
+    logger.info("Time taken = {0:.5f}".format(time.time() - start))
diff --git a/src/guppy/extractors/csv_recording_extractor.py b/src/guppy/extractors/csv_recording_extractor.py
index d74cfde..cfa9a8d 100644
--- a/src/guppy/extractors/csv_recording_extractor.py
+++ b/src/guppy/extractors/csv_recording_extractor.py
@@ -1,9 +1,6 @@
 import glob
 import logging
-import multiprocessing as mp
 import os
-import time
-from itertools import repeat
 from typing import Any
 
 import numpy as np
@@ -14,22 +11,6 @@
 logger = logging.getLogger(__name__)
 
 
-def execute_import_csv(filepath, events, outputPath, numProcesses=mp.cpu_count()):
-    logger.info("Reading data for event {} ...".format(events))
-
-    extractor = CsvRecordingExtractor(folder_path=filepath)
-    start = time.time()
-    with mp.Pool(numProcesses) as p:
-        p.starmap(read_and_save_csv, zip(repeat(extractor), events, repeat(outputPath)))
-    logger.info("Time taken = {0:.5f}".format(time.time() - start))
-
-
-def read_and_save_csv(extractor, event, outputPath):
-    output_dicts = extractor.read(events=[event], outputPath=outputPath)
-    extractor.save(output_dicts=output_dicts, outputPath=outputPath)
-    logger.info("Data for event {} fetched and stored.".format(event))
-
-
 class CsvRecordingExtractor(BaseRecordingExtractor):
 
     @classmethod
diff --git a/src/guppy/extractors/doric_recording_extractor.py b/src/guppy/extractors/doric_recording_extractor.py
index 13f7fdb..047e087 100644
--- a/src/guppy/extractors/doric_recording_extractor.py
+++ b/src/guppy/extractors/doric_recording_extractor.py
@@ -14,15 +14,6 @@
 logger = logging.getLogger(__name__)
 
 
-def execute_import_doric(folder_path, storesList, outputPath):
-    events = list(storesList[0, :])
-    event_name_to_event_type = {storesList[0, i]: storesList[1, i] for i in range(storesList.shape[1])}
-
-    extractor = DoricRecordingExtractor(folder_path=folder_path, event_name_to_event_type=event_name_to_event_type)
-    output_dicts = extractor.read(events=events, outputPath=outputPath)
-    extractor.save(output_dicts=output_dicts, outputPath=outputPath)
-
-
 class DoricRecordingExtractor(BaseRecordingExtractor):
     # TODO: consolidate duplicate flag logic between the `discover_events_and_flags` and the `check_doric` method.
 
diff --git a/src/guppy/extractors/npm_recording_extractor.py b/src/guppy/extractors/npm_recording_extractor.py
index 68d13f7..e3455b2 100644
--- a/src/guppy/extractors/npm_recording_extractor.py
+++ b/src/guppy/extractors/npm_recording_extractor.py
@@ -1,9 +1,6 @@
 import glob
 import logging
-import multiprocessing as mp
 import os
-import time
-from itertools import repeat
 
 import numpy as np
 import pandas as pd
@@ -16,22 +13,6 @@
 logger = logging.getLogger(__name__)
 
 
-def execute_import_npm(folder_path, events, outputPath, numProcesses=mp.cpu_count()):
-    logger.info("Reading data for event {} ...".format(events))
-
-    extractor = NpmRecordingExtractor(folder_path=folder_path)
-    start = time.time()
-    with mp.Pool(numProcesses) as p:
-        p.starmap(read_and_save_npm, zip(repeat(extractor), events, repeat(outputPath)))
-    logger.info("Time taken = {0:.5f}".format(time.time() - start))
-
-
-def read_and_save_npm(extractor, event, outputPath):
-    output_dicts = extractor.read(events=[event], outputPath=outputPath)
-    extractor.save(output_dicts=output_dicts, outputPath=outputPath)
-    logger.info("Data for event {} fetched and stored.".format(event))
-
-
 class NpmRecordingExtractor(CsvRecordingExtractor):
     # Inherits from CsvRecordingExtractor to reuse identical read/save logic.
     # Only overrides discover_events_and_flags() and adds NPM-specific helper methods.
diff --git a/src/guppy/extractors/tdt_recording_extractor.py b/src/guppy/extractors/tdt_recording_extractor.py
index a877a8b..f65f7a9 100644
--- a/src/guppy/extractors/tdt_recording_extractor.py
+++ b/src/guppy/extractors/tdt_recording_extractor.py
@@ -1,9 +1,6 @@
 import glob
 import logging
-import multiprocessing as mp
 import os
-import time
-from itertools import repeat
 from typing import Any
 
 import numpy as np
@@ -15,19 +12,6 @@
 logger = logging.getLogger(__name__)
 
 
-def read_and_save_tdt(extractor, event, outputPath):
-    output_dicts = extractor.read(events=[event], outputPath=outputPath)
-    extractor.save(output_dicts=output_dicts, outputPath=outputPath)
-
-
-def execute_readtev(folder_path, events, outputPath, numProcesses=mp.cpu_count()):
-    extractor = TdtRecordingExtractor(folder_path=folder_path)
-    start = time.time()
-    with mp.Pool(numProcesses) as p:
-        p.starmap(read_and_save_tdt, zip(repeat(extractor), events, repeat(outputPath)))
-    logger.info("Time taken = {0:.5f}".format(time.time() - start))
-
-
 class TdtRecordingExtractor(BaseRecordingExtractor):
 
     @classmethod
diff --git a/src/guppy/readTevTsq.py b/src/guppy/readTevTsq.py
index 2ae0c59..19a0a4a 100755
--- a/src/guppy/readTevTsq.py
+++ b/src/guppy/readTevTsq.py
@@ -8,10 +8,11 @@
 import numpy as np
 
 from guppy.extractors import (
-    execute_import_csv,
-    execute_import_doric,
-    execute_import_npm,
-    execute_readtev,
+    CsvRecordingExtractor,
+    DoricRecordingExtractor,
+    NpmRecordingExtractor,
+    TdtRecordingExtractor,
+    read_and_save_all_events,
 )
 
 logger = logging.getLogger(__name__)
@@ -74,15 +75,19 @@ def readRawData(inputParameters):
 
             events = np.unique(storesList[0, :])
             if modality == "tdt":
-                execute_readtev(filepath, events, op, numProcesses)
+                extractor = TdtRecordingExtractor(folder_path=filepath)
             elif modality == "doric":
-                execute_import_doric(filepath, storesList, op)
+                event_name_to_event_type = {storesList[0, i]: storesList[1, i] for i in range(storesList.shape[1])}
+                extractor = DoricRecordingExtractor(
+                    folder_path=filepath, event_name_to_event_type=event_name_to_event_type
+                )
             elif modality == "csv":
-                execute_import_csv(filepath, events, op, numProcesses)
+                extractor = CsvRecordingExtractor(folder_path=filepath)
             elif modality == "npm":
-                execute_import_npm(filepath, events, op, numProcesses)
+                extractor = NpmRecordingExtractor(folder_path=filepath)
             else:
                 raise ValueError("Modality not recognized. Please use 'tdt', 'csv', 'doric', or 'npm'.")
+            read_and_save_all_events(extractor, events, op, numProcesses)
 
             writeToFile(str(10 + ((step + 1) * 10)) + "\n")
             step += 1

From 7e69cc747dfff63d93dd733ff584c6cdbd459b03 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Wed, 3 Dec 2025 18:04:56 -0800
Subject: [PATCH 059/150] Removed redundant intermediate common_step3.py.

---
 src/guppy/common_step3.py | 42 ---------------------------------------
 1 file changed, 42 deletions(-)
 delete mode 100644 src/guppy/common_step3.py

diff --git a/src/guppy/common_step3.py b/src/guppy/common_step3.py
deleted file mode 100644
index 09e763f..0000000
--- a/src/guppy/common_step3.py
+++ /dev/null
@@ -1,42 +0,0 @@
-import logging
-import os
-
-import h5py
-import numpy as np
-
-logger = logging.getLogger(__name__)
-
-
-# function to write data to a hdf5 file
-def write_hdf5(data, event, filepath, key):
-
-    # replacing \\ or / in storenames with _ (to avoid errors while saving data)
-    event = event.replace("\\", "_")
-    event = event.replace("/", "_")
-
-    op = os.path.join(filepath, event + ".hdf5")
-
-    # if file does not exist create a new file
-    if not os.path.exists(op):
-        with h5py.File(op, "w") as f:
-            if type(data) is np.ndarray:
-                f.create_dataset(key, data=data, maxshape=(None,), chunks=True)
-            else:
-                f.create_dataset(key, data=data)
-
-    # if file already exists, append data to it or add a new key to it
-    else:
-        with h5py.File(op, "r+") as f:
-            if key in list(f.keys()):
-                if type(data) is np.ndarray:
-                    f[key].resize(data.shape)
-                    arr = f[key]
-                    arr[:] = data
-                else:
-                    arr = f[key]
-                    arr = data
-            else:
-                if type(data) is np.ndarray:
-                    f.create_dataset(key, data=data, maxshape=(None,), chunks=True)
-                else:
-                    f.create_dataset(key, data=data)

From 792e421ba5c6d22674e6b6558f480524a5f0c461 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Fri, 5 Dec 2025 09:27:31 -0800
Subject: [PATCH 060/150] Added Claude code docs to gitignore.

---
 .gitignore | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.gitignore b/.gitignore
index 0628429..f684eec 100755
--- a/.gitignore
+++ b/.gitignore
@@ -9,3 +9,5 @@ GuPPy/runFiberPhotometryAnalysis.ipynb
 .clinerules/
 
 testing_data/
+
+CLAUDE.md

From 60fa0bc67761ed648e08c2944f0da9a413ca5a53 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Fri, 5 Dec 2025 11:46:09 -0800
Subject: [PATCH 061/150] Pulled out analysis-specific functions and io_utils
 from preprocess.py.

---
 src/guppy/analysis/__init__.py |   0
 src/guppy/analysis/analysis.py | 268 ++++++++++++++++++++
 src/guppy/analysis/io_utils.py | 163 ++++++++++++
 src/guppy/preprocess.py        | 441 +++++----------------------------
 step4_data_flow_analysis.md    | 348 ++++++++++++++++++++++++++
 5 files changed, 841 insertions(+), 379 deletions(-)
 create mode 100644 src/guppy/analysis/__init__.py
 create mode 100644 src/guppy/analysis/analysis.py
 create mode 100644 src/guppy/analysis/io_utils.py
 create mode 100644 step4_data_flow_analysis.md

diff --git a/src/guppy/analysis/__init__.py b/src/guppy/analysis/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/guppy/analysis/analysis.py b/src/guppy/analysis/analysis.py
new file mode 100644
index 0000000..4ec8960
--- /dev/null
+++ b/src/guppy/analysis/analysis.py
@@ -0,0 +1,268 @@
+import logging
+
+import numpy as np
+from scipy import signal as ss
+from scipy.optimize import curve_fit
+
+from .io_utils import fetchCoords, read_hdf5
+
+logger = logging.getLogger(__name__)
+
+
+# Category: Analysis
+# Reason: Pure mathematical function for exponential curve fitting - no dependencies, pure computation
+# curve fit exponential function
+def curveFitFn(x, a, b, c):
+    return a + (b * np.exp(-(1 / c) * x))
+
+
+# Category: Analysis
+# Reason: Pure algorithmic function - applies Savitzky-Golay filter and curve fitting to generate synthetic control channel
+# helper function to create control channel using signal channel
+# by curve fitting signal channel to exponential function
+# when there is no isosbestic control channel is present
+def helper_create_control_channel(signal, timestamps, window):
+    # check if window is greater than signal shape
+    if window > signal.shape[0]:
+        window = ((signal.shape[0] + 1) / 2) + 1
+        if window % 2 != 0:
+            window = window
+        else:
+            window = window + 1
+
+    filtered_signal = ss.savgol_filter(signal, window_length=window, polyorder=3)
+
+    p0 = [5, 50, 60]
+
+    try:
+        popt, pcov = curve_fit(curveFitFn, timestamps, filtered_signal, p0)
+    except Exception as e:
+        logger.error(str(e))
+
+    # logger.info('Curve Fit Parameters : ', popt)
+    control = curveFitFn(timestamps, *popt)
+
+    return control
+
+
+# Category: Analysis
+# Reason: Data validation function - compares array lengths and returns indices for processing
+# function to check control and signal channel has same length
+# if not, take a smaller length and do pre-processing
+def check_cntrl_sig_length(filepath, channels_arr, storenames, storesList):
+
+    indices = []
+    for i in range(channels_arr.shape[1]):
+        idx_c = np.where(storesList == channels_arr[0, i])[0]
+        idx_s = np.where(storesList == channels_arr[1, i])[0]
+        control = read_hdf5(storenames[idx_c[0]], filepath, "data")
+        signal = read_hdf5(storenames[idx_s[0]], filepath, "data")
+        if control.shape[0] < signal.shape[0]:
+            indices.append(storesList[idx_c[0]])
+        elif control.shape[0] > signal.shape[0]:
+            indices.append(storesList[idx_s[0]])
+        else:
+            indices.append(storesList[idx_s[0]])
+
+    return indices
+
+
+# Category: Analysis
+# Reason: Pure algorithmic function - concatenates data chunks based on coordinate boundaries, adjusts timestamps mathematically
+# helper function to process control and signal timestamps
+def eliminateData(filepath, timeForLightsTurnOn, event, sampling_rate, naming):
+
+    ts = read_hdf5("timeCorrection_" + naming, filepath, "timestampNew")
+    data = read_hdf5(event, filepath, "data").reshape(-1)
+    coords = fetchCoords(filepath, naming, ts)
+
+    if (data == 0).all() == True:
+        data = np.zeros(ts.shape[0])
+
+    arr = np.array([])
+    ts_arr = np.array([])
+    for i in range(coords.shape[0]):
+
+        index = np.where((ts > coords[i, 0]) & (ts < coords[i, 1]))[0]
+
+        if len(arr) == 0:
+            arr = np.concatenate((arr, data[index]))
+            sub = ts[index][0] - timeForLightsTurnOn
+            new_ts = ts[index] - sub
+            ts_arr = np.concatenate((ts_arr, new_ts))
+        else:
+            temp = data[index]
+            # new = temp + (arr[-1]-temp[0])
+            temp_ts = ts[index]
+            new_ts = temp_ts - (temp_ts[0] - ts_arr[-1])
+            arr = np.concatenate((arr, temp))
+            ts_arr = np.concatenate((ts_arr, new_ts + (1 / sampling_rate)))
+
+    # logger.info(arr.shape, ts_arr.shape)
+    return arr, ts_arr
+
+
+# Category: Analysis
+# Reason: Pure algorithmic function - processes event timestamps based on coordinate boundaries, aligns with data timeline
+# helper function to align event timestamps with the control and signal timestamps
+def eliminateTs(filepath, timeForLightsTurnOn, event, sampling_rate, naming):
+
+    tsNew = read_hdf5("timeCorrection_" + naming, filepath, "timestampNew")
+    ts = read_hdf5(event + "_" + naming, filepath, "ts").reshape(-1)
+    coords = fetchCoords(filepath, naming, tsNew)
+
+    ts_arr = np.array([])
+    tsNew_arr = np.array([])
+    for i in range(coords.shape[0]):
+        tsNew_index = np.where((tsNew > coords[i, 0]) & (tsNew < coords[i, 1]))[0]
+        ts_index = np.where((ts > coords[i, 0]) & (ts < coords[i, 1]))[0]
+
+        if len(tsNew_arr) == 0:
+            sub = tsNew[tsNew_index][0] - timeForLightsTurnOn
+            tsNew_arr = np.concatenate((tsNew_arr, tsNew[tsNew_index] - sub))
+            ts_arr = np.concatenate((ts_arr, ts[ts_index] - sub))
+        else:
+            temp_tsNew = tsNew[tsNew_index]
+            temp_ts = ts[ts_index]
+            new_ts = temp_ts - (temp_tsNew[0] - tsNew_arr[-1])
+            new_tsNew = temp_tsNew - (temp_tsNew[0] - tsNew_arr[-1])
+            tsNew_arr = np.concatenate((tsNew_arr, new_tsNew + (1 / sampling_rate)))
+            ts_arr = np.concatenate((ts_arr, new_ts + (1 / sampling_rate)))
+
+    return ts_arr
+
+
+# Category: Analysis
+# Reason: Pure algorithmic function - replaces specified data chunks with NaN based on coordinate boundaries
+# adding nan values to removed chunks
+# when using artifacts removal method - replace with NaN
+def addingNaNValues(filepath, event, naming):
+
+    ts = read_hdf5("timeCorrection_" + naming, filepath, "timestampNew")
+    data = read_hdf5(event, filepath, "data").reshape(-1)
+    coords = fetchCoords(filepath, naming, ts)
+
+    if (data == 0).all() == True:
+        data = np.zeros(ts.shape[0])
+
+    arr = np.array([])
+    ts_index = np.arange(ts.shape[0])
+    for i in range(coords.shape[0]):
+
+        index = np.where((ts > coords[i, 0]) & (ts < coords[i, 1]))[0]
+        arr = np.concatenate((arr, index))
+
+    nan_indices = list(set(ts_index).symmetric_difference(arr))
+    data[nan_indices] = np.nan
+
+    return data
+
+
+# Category: Analysis
+# Reason: Pure algorithmic function - filters event timestamps to exclude artifact regions based on coordinates
+# remove event TTLs which falls in the removed chunks
+# when using artifacts removal method - replace with NaN
+def removeTTLs(filepath, event, naming):
+    tsNew = read_hdf5("timeCorrection_" + naming, filepath, "timestampNew")
+    ts = read_hdf5(event + "_" + naming, filepath, "ts").reshape(-1)
+    coords = fetchCoords(filepath, naming, tsNew)
+
+    ts_arr = np.array([])
+    for i in range(coords.shape[0]):
+        ts_index = np.where((ts > coords[i, 0]) & (ts < coords[i, 1]))[0]
+        ts_arr = np.concatenate((ts_arr, ts[ts_index]))
+
+    return ts_arr
+
+
+# Category: Analysis
+# Reason: Pure mathematical computation - calculates deltaF/F from signal and control arrays with simple formula
+# function to compute deltaF/F using fitted control channel and filtered signal channel
+def deltaFF(signal, control):
+
+    res = np.subtract(signal, control)
+    normData = np.divide(res, control)
+    # deltaFF = normData
+    normData = normData * 100
+
+    return normData
+
+
+# Category: Analysis
+# Reason: Pure algorithmic function - performs polynomial linear regression to fit control to signal
+# function to fit control channel to signal channel
+def controlFit(control, signal):
+
+    p = np.polyfit(control, signal, 1)
+    arr = (p[0] * control) + p[1]
+    return arr
+
+
+# Category: Analysis
+# Reason: Pure signal processing function - applies moving average filter using scipy filtfilt
+def filterSignal(filter_window, signal):
+    if filter_window == 0:
+        return signal
+    elif filter_window > 1:
+        b = np.divide(np.ones((filter_window,)), filter_window)
+        a = 1
+        filtered_signal = ss.filtfilt(b, a, signal)
+        return filtered_signal
+    else:
+        raise Exception("Moving average filter window value is not correct.")
+
+
+# Category: Routing
+# Reason: Orchestrates signal processing pipeline - sequences calls to filterSignal, controlFit, and deltaFF with conditional logic
+# function to filter control and signal channel, also execute above two function : controlFit and deltaFF
+# function will also take care if there is only signal channel and no control channel
+# if there is only signal channel, z-score will be computed using just signal channel
+def execute_controlFit_dff(control, signal, isosbestic_control, filter_window):
+
+    if isosbestic_control == False:
+        signal_smooth = filterSignal(filter_window, signal)  # ss.filtfilt(b, a, signal)
+        control_fit = controlFit(control, signal_smooth)
+        norm_data = deltaFF(signal_smooth, control_fit)
+    else:
+        control_smooth = filterSignal(filter_window, control)  # ss.filtfilt(b, a, control)
+        signal_smooth = filterSignal(filter_window, signal)  # ss.filtfilt(b, a, signal)
+        control_fit = controlFit(control_smooth, signal_smooth)
+        norm_data = deltaFF(signal_smooth, control_fit)
+
+    return norm_data, control_fit
+
+
+# Category: Analysis
+# Reason: Pure statistical computation - calculates z-score from deltaF/F using different methods (standard, baseline, robust)
+# function to compute z-score based on z-score computation method
+def z_score_computation(dff, timestamps, inputParameters):
+
+    zscore_method = inputParameters["zscore_method"]
+    baseline_start, baseline_end = inputParameters["baselineWindowStart"], inputParameters["baselineWindowEnd"]
+
+    if zscore_method == "standard z-score":
+        numerator = np.subtract(dff, np.nanmean(dff))
+        zscore = np.divide(numerator, np.nanstd(dff))
+    elif zscore_method == "baseline z-score":
+        idx = np.where((timestamps > baseline_start) & (timestamps < baseline_end))[0]
+        if idx.shape[0] == 0:
+            logger.error(
+                "Baseline Window Parameters for baseline z-score computation zscore_method \
+							are not correct."
+            )
+            raise Exception(
+                "Baseline Window Parameters for baseline z-score computation zscore_method \
+							are not correct."
+            )
+        else:
+            baseline_mean = np.nanmean(dff[idx])
+            baseline_std = np.nanstd(dff[idx])
+            numerator = np.subtract(dff, baseline_mean)
+            zscore = np.divide(numerator, baseline_std)
+    else:
+        median = np.median(dff)
+        mad = np.median(np.abs(dff - median))
+        numerator = 0.6745 * (dff - median)
+        zscore = np.divide(numerator, mad)
+
+    return zscore
diff --git a/src/guppy/analysis/io_utils.py b/src/guppy/analysis/io_utils.py
new file mode 100644
index 0000000..33b6650
--- /dev/null
+++ b/src/guppy/analysis/io_utils.py
@@ -0,0 +1,163 @@
+import fnmatch
+import glob
+import logging
+import os
+import re
+
+import h5py
+import numpy as np
+
+logger = logging.getLogger(__name__)
+
+
+# Category: Analysis
+# Reason: Utility function for path filtering - pure data transformation with no GUI or orchestration
+def takeOnlyDirs(paths):
+    removePaths = []
+    for p in paths:
+        if os.path.isfile(p):
+            removePaths.append(p)
+    return list(set(paths) - set(removePaths))
+
+
+# Category: Analysis
+# Reason: File system utility for case-insensitive file discovery - pure I/O helper with no orchestration
+# find files by ignoring the case sensitivity
+def find_files(path, glob_path, ignore_case=False):
+    rule = (
+        re.compile(fnmatch.translate(glob_path), re.IGNORECASE)
+        if ignore_case
+        else re.compile(fnmatch.translate(glob_path))
+    )
+
+    no_bytes_path = os.listdir(os.path.expanduser(path))
+    str_path = []
+
+    # converting byte object to string
+    for x in no_bytes_path:
+        try:
+            str_path.append(x.decode("utf-8"))
+        except:
+            str_path.append(x)
+    return [os.path.join(path, n) for n in str_path if rule.match(n)]
+
+
+# Category: Analysis
+# Reason: Simple file type detection utility - pure file system check with no orchestration
+# check if dealing with TDT files or csv files
+def check_TDT(filepath):
+    path = glob.glob(os.path.join(filepath, "*.tsq"))
+    if len(path) > 0:
+        return True
+    else:
+        return False
+
+
+# Category: Analysis
+# Reason: I/O utility function for reading HDF5 files - pure file access with no business logic or orchestration
+# function to read hdf5 file
+def read_hdf5(event, filepath, key):
+    if event:
+        event = event.replace("\\", "_")
+        event = event.replace("/", "_")
+        op = os.path.join(filepath, event + ".hdf5")
+    else:
+        op = filepath
+
+    if os.path.exists(op):
+        with h5py.File(op, "r") as f:
+            arr = np.asarray(f[key])
+    else:
+        logger.error(f"{event}.hdf5 file does not exist")
+        raise Exception("{}.hdf5 file does not exist".format(event))
+
+    return arr
+
+
+# Category: Analysis
+# Reason: I/O utility function for writing HDF5 files - pure file access with no business logic or orchestration
+# function to write hdf5 file
+def write_hdf5(data, event, filepath, key):
+    event = event.replace("\\", "_")
+    event = event.replace("/", "_")
+    op = os.path.join(filepath, event + ".hdf5")
+
+    # if file does not exist create a new file
+    if not os.path.exists(op):
+        with h5py.File(op, "w") as f:
+            if type(data) is np.ndarray:
+                f.create_dataset(key, data=data, maxshape=(None,), chunks=True)
+            else:
+                f.create_dataset(key, data=data)
+
+    # if file already exists, append data to it or add a new key to it
+    else:
+        with h5py.File(op, "r+") as f:
+            if key in list(f.keys()):
+                if type(data) is np.ndarray:
+                    f[key].resize(data.shape)
+                    arr = f[key]
+                    arr[:] = data
+                else:
+                    arr = f[key]
+                    arr = data
+            else:
+                if type(data) is np.ndarray:
+                    f.create_dataset(key, data=data, maxshape=(None,), chunks=True)
+                else:
+                    f.create_dataset(key, data=data)
+
+
+# Category: Analysis
+# Reason: Validation utility - checks file naming conventions and returns structured path array with no orchestration
+# function to check if the naming convention for saving storeslist file was followed or not
+def decide_naming_convention(filepath):
+    path_1 = find_files(filepath, "control_*", ignore_case=True)  # glob.glob(os.path.join(filepath, 'control*'))
+
+    path_2 = find_files(filepath, "signal_*", ignore_case=True)  # glob.glob(os.path.join(filepath, 'signal*'))
+
+    path = sorted(path_1 + path_2, key=str.casefold)
+    if len(path) % 2 != 0:
+        logger.error("There are not equal number of Control and Signal data")
+        raise Exception("There are not equal number of Control and Signal data")
+
+    path = np.asarray(path).reshape(2, -1)
+
+    return path
+
+
+# Category: Analysis
+# Reason: I/O utility that loads artifact coordinates from .npy file or provides default - pure file loading with simple logic
+# function to read coordinates file which was saved by selecting chunks for artifacts removal
+def fetchCoords(filepath, naming, data):
+
+    path = os.path.join(filepath, "coordsForPreProcessing_" + naming + ".npy")
+
+    if not os.path.exists(path):
+        coords = np.array([0, data[-1]])
+    else:
+        coords = np.load(os.path.join(filepath, "coordsForPreProcessing_" + naming + ".npy"))[:, 0]
+
+    if coords.shape[0] % 2 != 0:
+        logger.error("Number of values in coordsForPreProcessing file is not even.")
+        raise Exception("Number of values in coordsForPreProcessing file is not even.")
+
+    coords = coords.reshape(-1, 2)
+
+    return coords
+
+
+# Category: Routing
+# Reason: Organizes output folders for data combination - loops through numbered outputs and groups related folders
+def get_all_stores_for_combining_data(folderNames):
+    op = []
+    for i in range(100):
+        temp = []
+        match = r"[\s\S]*" + "_output_" + str(i)
+        for j in folderNames:
+            temp.append(re.findall(match, j))
+        temp = sorted(list(np.concatenate(temp).flatten()), key=str.casefold)
+        if len(temp) > 0:
+            op.append(temp)
+
+    return op
diff --git a/src/guppy/preprocess.py b/src/guppy/preprocess.py
index 8b79039..69616d9 100755
--- a/src/guppy/preprocess.py
+++ b/src/guppy/preprocess.py
@@ -1,95 +1,52 @@
-import fnmatch
 import glob
 import json
 import logging
 import os
-import re
 import shutil
 import sys
 
-import h5py
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
-from scipy import signal as ss
-from scipy.optimize import curve_fit
 
+from .analysis.analysis import (
+    addingNaNValues,
+    check_cntrl_sig_length,
+    eliminateData,
+    eliminateTs,
+    execute_controlFit_dff,
+    helper_create_control_channel,
+    removeTTLs,
+    z_score_computation,
+)
+from .analysis.io_utils import (
+    check_TDT,
+    decide_naming_convention,
+    fetchCoords,
+    find_files,
+    get_all_stores_for_combining_data,
+    read_hdf5,
+    takeOnlyDirs,
+    write_hdf5,
+)
 from .combineDataFn import processTimestampsForCombiningData
 
 logger = logging.getLogger(__name__)
 
-logger = logging.getLogger(__name__)
-
 # Only set matplotlib backend if not in CI environment
 if not os.getenv("CI"):
     plt.switch_backend("TKAgg")
 
 
-def takeOnlyDirs(paths):
-    removePaths = []
-    for p in paths:
-        if os.path.isfile(p):
-            removePaths.append(p)
-    return list(set(paths) - set(removePaths))
-
-
+# Category: Visualization/User Input
+# Reason: Writes progress updates to file for GUI progress bar - couples backend to GUI feedback mechanism
 def writeToFile(value: str):
     with open(os.path.join(os.path.expanduser("~"), "pbSteps.txt"), "a") as file:
         file.write(value)
 
 
-# find files by ignoring the case sensitivity
-def find_files(path, glob_path, ignore_case=False):
-    rule = (
-        re.compile(fnmatch.translate(glob_path), re.IGNORECASE)
-        if ignore_case
-        else re.compile(fnmatch.translate(glob_path))
-    )
-
-    no_bytes_path = os.listdir(os.path.expanduser(path))
-    str_path = []
-
-    # converting byte object to string
-    for x in no_bytes_path:
-        try:
-            str_path.append(x.decode("utf-8"))
-        except:
-            str_path.append(x)
-    return [os.path.join(path, n) for n in str_path if rule.match(n)]
-
-
-# curve fit exponential function
-def curveFitFn(x, a, b, c):
-    return a + (b * np.exp(-(1 / c) * x))
-
-
-# helper function to create control channel using signal channel
-# by curve fitting signal channel to exponential function
-# when there is no isosbestic control channel is present
-def helper_create_control_channel(signal, timestamps, window):
-    # check if window is greater than signal shape
-    if window > signal.shape[0]:
-        window = ((signal.shape[0] + 1) / 2) + 1
-        if window % 2 != 0:
-            window = window
-        else:
-            window = window + 1
-
-    filtered_signal = ss.savgol_filter(signal, window_length=window, polyorder=3)
-
-    p0 = [5, 50, 60]
-
-    try:
-        popt, pcov = curve_fit(curveFitFn, timestamps, filtered_signal, p0)
-    except Exception as e:
-        logger.error(str(e))
-
-    # logger.info('Curve Fit Parameters : ', popt)
-    control = curveFitFn(timestamps, *popt)
-
-    return control
-
-
+# Category: Routing
+# Reason: Orchestrates reading HDF5 files, calling helper_create_control_channel, and writing results - coordinates I/O with computation
 # main function to create control channel using
 # signal channel and save it to a file
 def create_control_channel(filepath, arr, window=5001):
@@ -116,6 +73,8 @@ def create_control_channel(filepath, arr, window=5001):
             logger.info("Control channel from signal channel created using curve-fitting")
 
 
+# Category: Routing
+# Reason: Orchestrates validation logic, file copying, and storesList updates - coordinates multiple operations and file manipulations
 # function to add control channel when there is no
 # isosbestic control channel and update the storeslist file
 def add_control_channel(filepath, arr):
@@ -162,86 +121,8 @@ def add_control_channel(filepath, arr):
     return arr
 
 
-# check if dealing with TDT files or csv files
-def check_TDT(filepath):
-    path = glob.glob(os.path.join(filepath, "*.tsq"))
-    if len(path) > 0:
-        return True
-    else:
-        return False
-
-
-# function to read hdf5 file
-def read_hdf5(event, filepath, key):
-    if event:
-        event = event.replace("\\", "_")
-        event = event.replace("/", "_")
-        op = os.path.join(filepath, event + ".hdf5")
-    else:
-        op = filepath
-
-    if os.path.exists(op):
-        with h5py.File(op, "r") as f:
-            arr = np.asarray(f[key])
-    else:
-        logger.error(f"{event}.hdf5 file does not exist")
-        raise Exception("{}.hdf5 file does not exist".format(event))
-
-    return arr
-
-
-# function to write hdf5 file
-def write_hdf5(data, event, filepath, key):
-    event = event.replace("\\", "_")
-    event = event.replace("/", "_")
-    op = os.path.join(filepath, event + ".hdf5")
-
-    # if file does not exist create a new file
-    if not os.path.exists(op):
-        with h5py.File(op, "w") as f:
-            if type(data) is np.ndarray:
-                f.create_dataset(key, data=data, maxshape=(None,), chunks=True)
-            else:
-                f.create_dataset(key, data=data)
-
-    # if file already exists, append data to it or add a new key to it
-    else:
-        with h5py.File(op, "r+") as f:
-            if key in list(f.keys()):
-                if type(data) is np.ndarray:
-                    f[key].resize(data.shape)
-                    arr = f[key]
-                    arr[:] = data
-                else:
-                    arr = f[key]
-                    arr = data
-            else:
-                if type(data) is np.ndarray:
-                    f.create_dataset(key, data=data, maxshape=(None,), chunks=True)
-                else:
-                    f.create_dataset(key, data=data)
-
-
-# function to check control and signal channel has same length
-# if not, take a smaller length and do pre-processing
-def check_cntrl_sig_length(filepath, channels_arr, storenames, storesList):
-
-    indices = []
-    for i in range(channels_arr.shape[1]):
-        idx_c = np.where(storesList == channels_arr[0, i])[0]
-        idx_s = np.where(storesList == channels_arr[1, i])[0]
-        control = read_hdf5(storenames[idx_c[0]], filepath, "data")
-        signal = read_hdf5(storenames[idx_s[0]], filepath, "data")
-        if control.shape[0] < signal.shape[0]:
-            indices.append(storesList[idx_c[0]])
-        elif control.shape[0] > signal.shape[0]:
-            indices.append(storesList[idx_s[0]])
-        else:
-            indices.append(storesList[idx_s[0]])
-
-    return indices
-
-
+# Category: Routing
+# Reason: Orchestrates timestamp correction workflow - loops through stores, coordinates reading/writing, calls validation and correction logic
 # function to correct timestamps after eliminating first few seconds of the data (for csv data)
 def timestampCorrection_csv(filepath, timeForLightsTurnOn, storesList):
 
@@ -292,6 +173,8 @@ def timestampCorrection_csv(filepath, timeForLightsTurnOn, storesList):
     logger.info("Timestamps corrected and converted to seconds.")
 
 
+# Category: Routing
+# Reason: Orchestrates timestamp correction workflow for TDT format - loops through stores, coordinates timestamp expansion algorithm with I/O
 # function to correct timestamps after eliminating first few seconds of the data (for TDT data)
 def timestampCorrection_tdt(filepath, timeForLightsTurnOn, storesList):
 
@@ -354,6 +237,8 @@ def timestampCorrection_tdt(filepath, timeForLightsTurnOn, storesList):
     # return timeRecStart, correctionIndex, timestampNew
 
 
+# Category: Routing
+# Reason: Orchestrates applying timestamp corrections - reads correction indices, applies different logic based on data type, writes results
 # function to apply correction to control, signal and event timestamps
 def applyCorrection(filepath, timeForLightsTurnOn, event, displayName, naming):
 
@@ -395,6 +280,8 @@ def applyCorrection(filepath, timeForLightsTurnOn, event, displayName, naming):
     # 	write_hdf5(control, displayName, filepath, 'data')
 
 
+# Category: Routing
+# Reason: Orchestrates naming validation and correction application - loops through channel pairs and delegates to applyCorrection
 # function to check if naming convention was followed while saving storeslist file
 # and apply timestamps correction using the function applyCorrection
 def decide_naming_convention_and_applyCorrection(filepath, timeForLightsTurnOn, event, displayName, storesList):
@@ -423,6 +310,8 @@ def decide_naming_convention_and_applyCorrection(filepath, timeForLightsTurnOn,
     logger.info("Timestamps corrections applied to the data and event timestamps.")
 
 
+# Category: Visualization/User Input
+# Reason: Creates matplotlib plots to display z-score results - pure visualization with no computation
 # function to plot z_score
 def visualize_z_score(filepath):
 
@@ -445,6 +334,8 @@ def visualize_z_score(filepath):
     # plt.show()
 
 
+# Category: Visualization/User Input
+# Reason: Creates matplotlib plots to display deltaF/F results - pure visualization with no computation
 # function to plot deltaF/F
 def visualize_dff(filepath):
     name = os.path.basename(filepath)
@@ -466,6 +357,8 @@ def visualize_dff(filepath):
     # plt.show()
 
 
+# Category: Visualization/User Input
+# Reason: Interactive matplotlib GUI with keyboard event handlers for artifact selection - core user input mechanism that saves coordinates to disk
 def visualize(filepath, x, y1, y2, y3, plot_name, removeArtifacts):
 
     # plotting control and signal data
@@ -555,6 +448,8 @@ def plt_close_event(event):
     # return fig
 
 
+# Category: Visualization/User Input
+# Reason: Orchestrates visualization of all control/signal pairs - reads data and delegates to visualize() for user interaction
 # function to plot control and signal, also provide a feature to select chunks for artifacts removal
 def visualizeControlAndSignal(filepath, removeArtifacts):
     path_1 = find_files(filepath, "control_*", ignore_case=True)  # glob.glob(os.path.join(filepath, 'control*'))
@@ -590,141 +485,8 @@ def visualizeControlAndSignal(filepath, removeArtifacts):
         visualize(filepath, ts, control, signal, cntrl_sig_fit, plot_name, removeArtifacts)
 
 
-# function to check if the naming convention for saving storeslist file was followed or not
-def decide_naming_convention(filepath):
-    path_1 = find_files(filepath, "control_*", ignore_case=True)  # glob.glob(os.path.join(filepath, 'control*'))
-
-    path_2 = find_files(filepath, "signal_*", ignore_case=True)  # glob.glob(os.path.join(filepath, 'signal*'))
-
-    path = sorted(path_1 + path_2, key=str.casefold)
-    if len(path) % 2 != 0:
-        logger.error("There are not equal number of Control and Signal data")
-        raise Exception("There are not equal number of Control and Signal data")
-
-    path = np.asarray(path).reshape(2, -1)
-
-    return path
-
-
-# function to read coordinates file which was saved by selecting chunks for artifacts removal
-def fetchCoords(filepath, naming, data):
-
-    path = os.path.join(filepath, "coordsForPreProcessing_" + naming + ".npy")
-
-    if not os.path.exists(path):
-        coords = np.array([0, data[-1]])
-    else:
-        coords = np.load(os.path.join(filepath, "coordsForPreProcessing_" + naming + ".npy"))[:, 0]
-
-    if coords.shape[0] % 2 != 0:
-        logger.error("Number of values in coordsForPreProcessing file is not even.")
-        raise Exception("Number of values in coordsForPreProcessing file is not even.")
-
-    coords = coords.reshape(-1, 2)
-
-    return coords
-
-
-# helper function to process control and signal timestamps
-def eliminateData(filepath, timeForLightsTurnOn, event, sampling_rate, naming):
-
-    ts = read_hdf5("timeCorrection_" + naming, filepath, "timestampNew")
-    data = read_hdf5(event, filepath, "data").reshape(-1)
-    coords = fetchCoords(filepath, naming, ts)
-
-    if (data == 0).all() == True:
-        data = np.zeros(ts.shape[0])
-
-    arr = np.array([])
-    ts_arr = np.array([])
-    for i in range(coords.shape[0]):
-
-        index = np.where((ts > coords[i, 0]) & (ts < coords[i, 1]))[0]
-
-        if len(arr) == 0:
-            arr = np.concatenate((arr, data[index]))
-            sub = ts[index][0] - timeForLightsTurnOn
-            new_ts = ts[index] - sub
-            ts_arr = np.concatenate((ts_arr, new_ts))
-        else:
-            temp = data[index]
-            # new = temp + (arr[-1]-temp[0])
-            temp_ts = ts[index]
-            new_ts = temp_ts - (temp_ts[0] - ts_arr[-1])
-            arr = np.concatenate((arr, temp))
-            ts_arr = np.concatenate((ts_arr, new_ts + (1 / sampling_rate)))
-
-    # logger.info(arr.shape, ts_arr.shape)
-    return arr, ts_arr
-
-
-# helper function to align event timestamps with the control and signal timestamps
-def eliminateTs(filepath, timeForLightsTurnOn, event, sampling_rate, naming):
-
-    tsNew = read_hdf5("timeCorrection_" + naming, filepath, "timestampNew")
-    ts = read_hdf5(event + "_" + naming, filepath, "ts").reshape(-1)
-    coords = fetchCoords(filepath, naming, tsNew)
-
-    ts_arr = np.array([])
-    tsNew_arr = np.array([])
-    for i in range(coords.shape[0]):
-        tsNew_index = np.where((tsNew > coords[i, 0]) & (tsNew < coords[i, 1]))[0]
-        ts_index = np.where((ts > coords[i, 0]) & (ts < coords[i, 1]))[0]
-
-        if len(tsNew_arr) == 0:
-            sub = tsNew[tsNew_index][0] - timeForLightsTurnOn
-            tsNew_arr = np.concatenate((tsNew_arr, tsNew[tsNew_index] - sub))
-            ts_arr = np.concatenate((ts_arr, ts[ts_index] - sub))
-        else:
-            temp_tsNew = tsNew[tsNew_index]
-            temp_ts = ts[ts_index]
-            new_ts = temp_ts - (temp_tsNew[0] - tsNew_arr[-1])
-            new_tsNew = temp_tsNew - (temp_tsNew[0] - tsNew_arr[-1])
-            tsNew_arr = np.concatenate((tsNew_arr, new_tsNew + (1 / sampling_rate)))
-            ts_arr = np.concatenate((ts_arr, new_ts + (1 / sampling_rate)))
-
-    return ts_arr
-
-
-# adding nan values to removed chunks
-# when using artifacts removal method - replace with NaN
-def addingNaNValues(filepath, event, naming):
-
-    ts = read_hdf5("timeCorrection_" + naming, filepath, "timestampNew")
-    data = read_hdf5(event, filepath, "data").reshape(-1)
-    coords = fetchCoords(filepath, naming, ts)
-
-    if (data == 0).all() == True:
-        data = np.zeros(ts.shape[0])
-
-    arr = np.array([])
-    ts_index = np.arange(ts.shape[0])
-    for i in range(coords.shape[0]):
-
-        index = np.where((ts > coords[i, 0]) & (ts < coords[i, 1]))[0]
-        arr = np.concatenate((arr, index))
-
-    nan_indices = list(set(ts_index).symmetric_difference(arr))
-    data[nan_indices] = np.nan
-
-    return data
-
-
-# remove event TTLs which falls in the removed chunks
-# when using artifacts removal method - replace with NaN
-def removeTTLs(filepath, event, naming):
-    tsNew = read_hdf5("timeCorrection_" + naming, filepath, "timestampNew")
-    ts = read_hdf5(event + "_" + naming, filepath, "ts").reshape(-1)
-    coords = fetchCoords(filepath, naming, tsNew)
-
-    ts_arr = np.array([])
-    for i in range(coords.shape[0]):
-        ts_index = np.where((ts > coords[i, 0]) & (ts < coords[i, 1]))[0]
-        ts_arr = np.concatenate((ts_arr, ts[ts_index]))
-
-    return ts_arr
-
-
+# Category: Routing
+# Reason: Orchestrates NaN replacement for all stores - loops through channels and coordinates calls to addingNaNValues and removeTTLs
 def addingNaNtoChunksWithArtifacts(filepath, events):
 
     logger.debug("Replacing chunks with artifacts by NaN values.")
@@ -759,6 +521,8 @@ def addingNaNtoChunksWithArtifacts(filepath, events):
     logger.info("Chunks with artifacts are replaced by NaN values.")
 
 
+# Category: Routing
+# Reason: Orchestrates timestamp concatenation for artifact removal - loops through stores, coordinates eliminateData/eliminateTs calls and writes results
 # main function to align timestamps for control, signal and event timestamps for artifacts removal
 def processTimestampsForArtifacts(filepath, timeForLightsTurnOn, events):
 
@@ -800,89 +564,8 @@ def processTimestampsForArtifacts(filepath, timeForLightsTurnOn, events):
     logger.info("Timestamps processed, artifacts are removed and good chunks are concatenated.")
 
 
-# function to compute deltaF/F using fitted control channel and filtered signal channel
-def deltaFF(signal, control):
-
-    res = np.subtract(signal, control)
-    normData = np.divide(res, control)
-    # deltaFF = normData
-    normData = normData * 100
-
-    return normData
-
-
-# function to fit control channel to signal channel
-def controlFit(control, signal):
-
-    p = np.polyfit(control, signal, 1)
-    arr = (p[0] * control) + p[1]
-    return arr
-
-
-def filterSignal(filter_window, signal):
-    if filter_window == 0:
-        return signal
-    elif filter_window > 1:
-        b = np.divide(np.ones((filter_window,)), filter_window)
-        a = 1
-        filtered_signal = ss.filtfilt(b, a, signal)
-        return filtered_signal
-    else:
-        raise Exception("Moving average filter window value is not correct.")
-
-
-# function to filter control and signal channel, also execute above two function : controlFit and deltaFF
-# function will also take care if there is only signal channel and no control channel
-# if there is only signal channel, z-score will be computed using just signal channel
-def execute_controlFit_dff(control, signal, isosbestic_control, filter_window):
-
-    if isosbestic_control == False:
-        signal_smooth = filterSignal(filter_window, signal)  # ss.filtfilt(b, a, signal)
-        control_fit = controlFit(control, signal_smooth)
-        norm_data = deltaFF(signal_smooth, control_fit)
-    else:
-        control_smooth = filterSignal(filter_window, control)  # ss.filtfilt(b, a, control)
-        signal_smooth = filterSignal(filter_window, signal)  # ss.filtfilt(b, a, signal)
-        control_fit = controlFit(control_smooth, signal_smooth)
-        norm_data = deltaFF(signal_smooth, control_fit)
-
-    return norm_data, control_fit
-
-
-# function to compute z-score based on z-score computation method
-def z_score_computation(dff, timestamps, inputParameters):
-
-    zscore_method = inputParameters["zscore_method"]
-    baseline_start, baseline_end = inputParameters["baselineWindowStart"], inputParameters["baselineWindowEnd"]
-
-    if zscore_method == "standard z-score":
-        numerator = np.subtract(dff, np.nanmean(dff))
-        zscore = np.divide(numerator, np.nanstd(dff))
-    elif zscore_method == "baseline z-score":
-        idx = np.where((timestamps > baseline_start) & (timestamps < baseline_end))[0]
-        if idx.shape[0] == 0:
-            logger.error(
-                "Baseline Window Parameters for baseline z-score computation zscore_method \
-							are not correct."
-            )
-            raise Exception(
-                "Baseline Window Parameters for baseline z-score computation zscore_method \
-							are not correct."
-            )
-        else:
-            baseline_mean = np.nanmean(dff[idx])
-            baseline_std = np.nanstd(dff[idx])
-            numerator = np.subtract(dff, baseline_mean)
-            zscore = np.divide(numerator, baseline_std)
-    else:
-        median = np.median(dff)
-        mad = np.median(np.abs(dff - median))
-        numerator = 0.6745 * (dff - median)
-        zscore = np.divide(numerator, mad)
-
-    return zscore
-
-
+# Category: Routing
+# Reason: Orchestrates z-score computation for one channel - handles artifact removal logic, coordinates calls to execute_controlFit_dff and z_score_computation
 # helper function to compute z-score and deltaF/F
 def helper_z_score(control, signal, filepath, name, inputParameters):  # helper_z_score(control_smooth, signal_smooth):
 
@@ -957,6 +640,8 @@ def helper_z_score(control, signal, filepath, name, inputParameters):  # helper_
     return z_score_arr, norm_data_arr, control_fit_arr
 
 
+# Category: Routing
+# Reason: Orchestrates z-score computation for all channels in a session - loops through control/signal pairs, calls helper_z_score, writes results
 # compute z-score and deltaF/F and save it to hdf5 file
 def compute_z_score(filepath, inputParameters):
 
@@ -1005,6 +690,8 @@ def compute_z_score(filepath, inputParameters):
     logger.info(f"z-score for the data in {filepath} computed.")
 
 
+# Category: Routing
+# Reason: Top-level orchestrator for timestamp correction across all sessions - loops through folders, coordinates timestamp correction workflow
 # function to execute timestamps corrections using functions timestampCorrection and decide_naming_convention_and_applyCorrection
 def execute_timestamp_correction(folderNames, inputParameters):
 
@@ -1044,6 +731,8 @@ def execute_timestamp_correction(folderNames, inputParameters):
         logger.info(f"Timestamps corrections finished for {filepath}")
 
 
+# Category: Routing
+# Reason: Orchestrates reading and merging storeslist files from multiple sessions - loops through folders and consolidates results
 # for combining data, reading storeslist file from both data and create a new storeslist array
 def check_storeslistfile(folderNames):
     storesList = np.array([[], []])
@@ -1065,20 +754,8 @@ def check_storeslistfile(folderNames):
     return storesList
 
 
-def get_all_stores_for_combining_data(folderNames):
-    op = []
-    for i in range(100):
-        temp = []
-        match = r"[\s\S]*" + "_output_" + str(i)
-        for j in folderNames:
-            temp.append(re.findall(match, j))
-        temp = sorted(list(np.concatenate(temp).flatten()), key=str.casefold)
-        if len(temp) > 0:
-            op.append(temp)
-
-    return op
-
-
+# Category: Routing
+# Reason: Orchestrates data combination workflow - validates sampling rates, coordinates processTimestampsForCombiningData, manages multi-session I/O
 # function to combine data when there are two different data files for the same recording session
 # it will combine the data, do timestamps processing and save the combined data in the first output folder.
 def combineData(folderNames, inputParameters, storesList):
@@ -1123,6 +800,8 @@ def combineData(folderNames, inputParameters, storesList):
     return op
 
 
+# Category: Routing
+# Reason: Top-level orchestrator for z-score computation and artifact removal - coordinates compute_z_score, artifact processing, and visualization calls
 # function to compute z-score and deltaF/F using functions : compute_z_score and/or processTimestampsForArtifacts
 def execute_zscore(folderNames, inputParameters):
 
@@ -1175,6 +854,8 @@ def execute_zscore(folderNames, inputParameters):
     logger.info("Signal data and event timestamps are extracted.")
 
 
+# Category: Routing
+# Reason: Main entry point for Step 4 - orchestrates entire preprocessing workflow including timestamp correction, data combination, and z-score computation
 def extractTsAndSignal(inputParameters):
 
     logger.debug("Extracting signal data and event timestamps...")
@@ -1212,6 +893,8 @@ def extractTsAndSignal(inputParameters):
         execute_zscore(op_folder, inputParameters)
 
 
+# Category: Routing
+# Reason: Top-level entry point wrapper - handles error catching and calls extractTsAndSignal
 def main(input_parameters):
     try:
         extractTsAndSignal(input_parameters)
diff --git a/step4_data_flow_analysis.md b/step4_data_flow_analysis.md
new file mode 100644
index 0000000..d86e938
--- /dev/null
+++ b/step4_data_flow_analysis.md
@@ -0,0 +1,348 @@
+# Step 4 (preprocess.py) Data Flow Analysis
+
+## Overview
+
+Step 4 processes timestamp-corrected photometry data and computes normalized signals (ΔF/F and z-scores). It handles artifact removal, data combination from multiple sessions, and generates quality control visualizations.
+
+## High-Level Data Flow
+
+```mermaid
+flowchart TD
+    A[Entry: extractTsAndSignal] --> B{combine_data?}
+
+    B -->|False| C[execute_timestamp_correction]
+    B -->|True| D[execute_timestamp_correction]
+
+    C --> E[execute_zscore]
+
+    D --> F[check_storeslistfile]
+    F --> G[combineData]
+    G --> H[execute_zscore]
+
+    E --> I[Output: z_score, dff, cntrl_sig_fit HDF5 files]
+    H --> I
+
+    style A fill:#e1f5ff
+    style I fill:#d4edda
+```
+
+## Main Processing Paths
+
+### Entry Point
+**`extractTsAndSignal(inputParameters)`** (line 1178) is the main entry point called by the GUI or API.
+
+### Path 1: Normal Processing (combine_data = False)
+1. `execute_timestamp_correction()` → Correct timestamps and align data
+2. `execute_zscore()` → Compute z-scores and ΔF/F
+
+### Path 2: Combined Data Processing (combine_data = True)
+1. `execute_timestamp_correction()` → Correct timestamps for each file
+2. `check_storeslistfile()` → Merge store lists from multiple files
+3. `combineData()` → Combine data from multiple recording sessions
+4. `execute_zscore()` → Compute z-scores and ΔF/F on combined data
+
+## Detailed Processing Stages
+
+### Stage 1: Timestamp Correction
+
+```mermaid
+flowchart LR
+    A[Raw HDF5 files] --> B[Read storesList.csv]
+    B --> C{isosbestic_control?}
+    C -->|No| D[add_control_channel]
+    C -->|Yes| E[timestampCorrection_tdt/csv]
+    D --> E
+    E --> F[Eliminate first N seconds]
+    F --> G[decide_naming_convention_and_applyCorrection]
+    G --> H[applyCorrection for each store]
+    H --> I{isosbestic_control?}
+    I -->|No| J[create_control_channel via curve fitting]
+    I -->|Yes| K[timeCorrection_*.hdf5 files]
+    J --> K
+
+    style A fill:#e1f5ff
+    style K fill:#d4edda
+```
+
+#### Function: `execute_timestamp_correction(folderNames, inputParameters)`
+
+**Input:**
+- Raw HDF5 files from extractors: `control_*.hdf5`, `signal_*.hdf5`, `event_*.hdf5`
+
+**Process:**
+1. For each session folder:
+   - Read `storesList.csv` (mapping of raw names to semantic names)
+   - If no isosbestic control: `add_control_channel()` creates placeholder control files
+   - **`timestampCorrection_tdt()`** or **`timestampCorrection_csv()`**:
+     - Eliminates first N seconds (`timeForLightsTurnOn`)
+     - For TDT: expands timestamps from block timestamps + sampling rate
+     - For CSV: uses timestamps as-is
+     - Writes `timeCorrection_*.hdf5` with keys: `timestampNew`, `correctionIndex`, `sampling_rate`
+   - **`decide_naming_convention_and_applyCorrection()`**:
+     - For each store, calls `applyCorrection()` to crop data using `correctionIndex`
+     - For control/signal channels: crops data arrays
+     - For event channels: subtracts time offset from timestamps
+   - If no isosbestic control: **`create_control_channel()`** generates synthetic control via curve fitting
+
+**Output:**
+- Timestamp-corrected HDF5 files with trimmed data
+- `timeCorrection_*.hdf5` files containing corrected timestamps
+
+### Stage 2: Z-Score Computation
+
+```mermaid
+flowchart TD
+    A[Timestamp-corrected HDF5] --> B[compute_z_score]
+    B --> C{removeArtifacts?}
+
+    C -->|No| D[helper_z_score: full data]
+    C -->|Yes| E[helper_z_score: chunk-by-chunk]
+
+    D --> F[filterSignal]
+    E --> F
+
+    F --> G[controlFit: linear regression]
+    G --> H[deltaFF: compute ΔF/F]
+    H --> I[z_score_computation]
+
+    I --> J{removeArtifacts?}
+
+    J -->|No| K[Write z_score, dff, cntrl_sig_fit]
+    J -->|Yes| L{artifactsRemovalMethod?}
+
+    L -->|concatenate| M[processTimestampsForArtifacts]
+    L -->|NaN| N[addingNaNtoChunksWithArtifacts]
+
+    M --> K
+    N --> K
+
+    K --> O[visualizeControlAndSignal]
+
+    style A fill:#e1f5ff
+    style K fill:#d4edda
+    style O fill:#fff3cd
+```
+
+#### Function: `execute_zscore(folderNames, inputParameters)`
+
+**Input:**
+- Timestamp-corrected HDF5 files
+
+**Process:**
+1. For each output folder:
+
+   **`compute_z_score(filepath, inputParameters)`**:
+   - For each control/signal pair:
+     - **`helper_z_score(control, signal, filepath, name, inputParameters)`**:
+
+       **Without artifacts removal:**
+       - `execute_controlFit_dff()`: Filter signals → fit control to signal → compute ΔF/F
+       - `z_score_computation()`: Compute z-score from ΔF/F
+
+       **With artifacts removal:**
+       - For each user-selected chunk (from `coordsForPreProcessing_*.npy`):
+         - If no isosbestic: `helper_create_control_channel()` creates synthetic control
+         - `execute_controlFit_dff()` on chunk
+       - Concatenate or NaN-fill between chunks
+       - `z_score_computation()` on processed data
+
+     - Writes: `z_score_*.hdf5`, `dff_*.hdf5`, `cntrl_sig_fit_*.hdf5`
+
+   **If artifacts removal with concatenate method:**
+   - **`processTimestampsForArtifacts()`**:
+     - `eliminateData()`: Concatenates good chunks, adjusts timestamps to be continuous
+     - `eliminateTs()`: Aligns event timestamps with new timeline
+     - Overwrites data files with concatenated versions
+
+   **If artifacts removal with NaN method:**
+   - **`addingNaNtoChunksWithArtifacts()`**:
+     - `addingNaNValues()`: Replaces bad chunks with NaN
+     - `removeTTLs()`: Filters event timestamps to keep only valid times
+
+   - **`visualizeControlAndSignal()`**: Plots control, signal, cntrl_sig_fit for QC
+
+**Output:**
+- `z_score_*.hdf5` (z-scored signal)
+- `dff_*.hdf5` (ΔF/F)
+- `cntrl_sig_fit_*.hdf5` (fitted control channel)
+
+## Key Data Transformations
+
+### Signal Processing Pipeline
+
+```mermaid
+flowchart LR
+    A[Raw Signal] --> B[filterSignal: Moving Average]
+    C[Raw Control] --> D[filterSignal: Moving Average]
+
+    B --> E[controlFit: Linear Regression]
+    D --> E
+
+    E --> F[control_fit = p0*control + p1]
+    F --> G[deltaFF]
+
+    B --> G
+
+    G --> H[ΔF/F = signal - control_fit / control_fit * 100]
+    H --> I[z_score_computation]
+
+    I --> J{zscore_method?}
+    J -->|standard| K[z = ΔF/F - mean / std]
+    J -->|baseline| L[z = ΔF/F - baseline_mean / baseline_std]
+    J -->|robust| M[z = 0.6745 * ΔF/F - median / MAD]
+
+    K --> N[Z-Score Output]
+    L --> N
+    M --> N
+
+    style A fill:#e1f5ff
+    style C fill:#e1f5ff
+    style N fill:#d4edda
+```
+
+### Transformation Functions
+
+1. **`filterSignal(filter_window, signal)`** (line 822)
+   - Applies moving average filter with configurable window
+   - Uses `scipy.signal.filtfilt` for zero-phase filtering
+
+2. **`controlFit(control, signal)`** (line 815)
+   - Linear regression: fits control to signal
+   - Returns: `fitted_control = p[0] * control + p[1]`
+
+3. **`deltaFF(signal, control)`** (line 804)
+   - Formula: `((signal - control) / control) * 100`
+   - Computes normalized fluorescence change
+
+4. **`z_score_computation(dff, timestamps, inputParameters)`** (line 853)
+   - **Standard z-score:** `(ΔF/F - mean(ΔF/F)) / std(ΔF/F)`
+   - **Baseline z-score:** `(ΔF/F - mean(baseline)) / std(baseline)`
+   - **Robust z-score:** `0.6745 * (ΔF/F - median) / MAD`
+
+## Artifact Removal Workflow
+
+### Interactive Artifact Selection
+
+The `visualize()` function (line 469) provides an interactive matplotlib plot:
+- **Space key:** Mark artifact boundary (vertical line drawn)
+- **'d' key:** Delete last marked boundary
+- **Close plot:** Save coordinates to `coordsForPreProcessing_*.npy`
+
+### Two Removal Methods
+
+**Concatenate Method:**
+- Removes artifact chunks completely
+- Concatenates good chunks end-to-end
+- Adjusts timestamps to be continuous
+- Event timestamps realigned to new timeline
+
+**NaN Method:**
+- Replaces artifact chunks with NaN values
+- Preserves original timeline
+- Filters out event timestamps in artifact regions
+
+## Supporting Functions
+
+### Control Channel Creation
+
+**`helper_create_control_channel(signal, timestamps, window)`** (line 69)
+- Used when no isosbestic control is available
+- Applies Savitzky-Golay filter to signal
+- Fits to exponential function: `f(x) = a + b * exp(-(1/c) * x)`
+- Returns synthetic control channel
+
+### Data Combination
+
+**`combineData(folderNames, inputParameters, storesList)`** (line 1084)
+- Merges data from multiple recording sessions
+- Validates that sampling rates match across sessions
+- Calls `processTimestampsForCombiningData()` to align timelines
+- Saves combined data to first output folder
+
+### Coordinate Fetching
+
+**`fetchCoords(filepath, naming, data)`** (line 610)
+- Reads `coordsForPreProcessing_*.npy` (artifact boundary coordinates)
+- If file doesn't exist: uses `[0, data[-1]]` (entire recording)
+- Validates even number of coordinates (pairs of boundaries)
+- Returns reshaped array of coordinate pairs
+
+## File I/O Summary
+
+### Files Read
+
+| File Pattern | Content | Source |
+|-------------|---------|--------|
+| `control_*.hdf5` | Control channel data | Extractors (Step 3) |
+| `signal_*.hdf5` | Signal channel data | Extractors (Step 3) |
+| `event_*.hdf5` | Event timestamps | Extractors (Step 3) |
+| `storesList.csv` | Channel name mapping | Step 2 |
+| `coordsForPreProcessing_*.npy` | Artifact boundaries | User selection (optional) |
+
+### Files Written
+
+| File Pattern | Content | Keys |
+|-------------|---------|------|
+| `timeCorrection_*.hdf5` | Corrected timestamps | `timestampNew`, `correctionIndex`, `sampling_rate`, `timeRecStart` (TDT only) |
+| `z_score_*.hdf5` | Z-scored signal | `data` |
+| `dff_*.hdf5` | ΔF/F signal | `data` |
+| `cntrl_sig_fit_*.hdf5` | Fitted control | `data` |
+| `event_*_*.hdf5` | Corrected event timestamps | `ts` |
+
+## Key Parameters from inputParameters
+
+| Parameter | Purpose | Default/Options |
+|-----------|---------|-----------------|
+| `timeForLightsTurnOn` | Seconds to eliminate from start | 1 |
+| `filter_window` | Moving average window size | 100 |
+| `isosbestic_control` | Use isosbestic control channel? | True/False |
+| `removeArtifacts` | Enable artifact removal? | True/False |
+| `artifactsRemovalMethod` | How to handle artifacts | "concatenate" / "NaN" |
+| `zscore_method` | Z-score computation method | "standard z-score" / "baseline z-score" / "robust z-score" |
+| `baselineWindowStart` | Baseline window start (seconds) | 0 |
+| `baselineWindowEnd` | Baseline window end (seconds) | 0 |
+| `combine_data` | Combine multiple recordings? | True/False |
+
+## Architecture Notes for Refactoring
+
+### Current Coupling Issues
+
+1. **GUI Progress Tracking:** `writeToFile()` writes to `~/pbSteps.txt` for progress bar updates (lines 36-38, 1042, 1171, 1203, 1208, 1220)
+2. **Interactive Plotting:** `visualize()` requires user interaction (matplotlib event handlers)
+3. **File Path Assumptions:** Hard-coded path patterns (`*_output_*`, naming conventions)
+4. **Mixed Responsibilities:** Single functions handle both computation and I/O
+
+### Recommended Separation Points
+
+**Backend Analysis Layer Should Include:**
+- `filterSignal()` - pure signal processing
+- `controlFit()` - pure regression
+- `deltaFF()` - pure computation
+- `z_score_computation()` - pure statistical computation
+- `helper_create_control_channel()` - algorithmic control generation
+- Core timestamp correction logic (separated from I/O)
+- Core artifact removal logic (separated from I/O)
+
+**Data I/O Layer Should Include:**
+- `read_hdf5()`, `write_hdf5()` - file operations
+- Store list reading/writing
+- Coordinate file handling
+- HDF5 file discovery and path management
+
+**Frontend Visualization Layer Should Include:**
+- `visualize()` - interactive artifact selection
+- `visualizeControlAndSignal()` - QC plots
+- `visualize_z_score()`, `visualize_dff()` - result visualization
+- Progress tracking callbacks (replace `writeToFile()`)
+
+### Potential Refactoring Strategy
+
+1. **Extract pure computation functions** into a `signal_processing` module
+2. **Create data models** (dataclasses) for:
+   - TimeCorrectionResult
+   - ProcessedSignal (with z_score, dff, control_fit)
+   - ArtifactRegions
+3. **Separate I/O operations** into `io_utils` module with consistent interfaces
+4. **Create processing pipelines** that accept data objects, return data objects
+5. **Move visualization to separate module** with callbacks for progress/interaction
+6. **Use dependency injection** for progress callbacks instead of hard-coded file writes

From eadb22f62670ffd10301ae85eb08060c45f6a133 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Fri, 5 Dec 2025 13:26:52 -0800
Subject: [PATCH 062/150] Organized step 4 analysis functions into various
 conceptual sub-steps.

---
 src/guppy/analysis/analysis.py             | 268 ----------
 src/guppy/analysis/artifact_removal.py     | 200 ++++++++
 src/guppy/analysis/combine_data.py         | 398 ++++++++++++++
 src/guppy/analysis/control_channel.py      |  42 ++
 src/guppy/analysis/io_utils.py             |  23 +
 src/guppy/analysis/timestamp_correction.py | 302 +++++++++++
 src/guppy/analysis/z_score.py              | 234 +++++++++
 src/guppy/preprocess.py                    | 570 +--------------------
 8 files changed, 1213 insertions(+), 824 deletions(-)
 delete mode 100644 src/guppy/analysis/analysis.py
 create mode 100644 src/guppy/analysis/artifact_removal.py
 create mode 100644 src/guppy/analysis/combine_data.py
 create mode 100644 src/guppy/analysis/control_channel.py
 create mode 100644 src/guppy/analysis/timestamp_correction.py
 create mode 100644 src/guppy/analysis/z_score.py

diff --git a/src/guppy/analysis/analysis.py b/src/guppy/analysis/analysis.py
deleted file mode 100644
index 4ec8960..0000000
--- a/src/guppy/analysis/analysis.py
+++ /dev/null
@@ -1,268 +0,0 @@
-import logging
-
-import numpy as np
-from scipy import signal as ss
-from scipy.optimize import curve_fit
-
-from .io_utils import fetchCoords, read_hdf5
-
-logger = logging.getLogger(__name__)
-
-
-# Category: Analysis
-# Reason: Pure mathematical function for exponential curve fitting - no dependencies, pure computation
-# curve fit exponential function
-def curveFitFn(x, a, b, c):
-    return a + (b * np.exp(-(1 / c) * x))
-
-
-# Category: Analysis
-# Reason: Pure algorithmic function - applies Savitzky-Golay filter and curve fitting to generate synthetic control channel
-# helper function to create control channel using signal channel
-# by curve fitting signal channel to exponential function
-# when there is no isosbestic control channel is present
-def helper_create_control_channel(signal, timestamps, window):
-    # check if window is greater than signal shape
-    if window > signal.shape[0]:
-        window = ((signal.shape[0] + 1) / 2) + 1
-        if window % 2 != 0:
-            window = window
-        else:
-            window = window + 1
-
-    filtered_signal = ss.savgol_filter(signal, window_length=window, polyorder=3)
-
-    p0 = [5, 50, 60]
-
-    try:
-        popt, pcov = curve_fit(curveFitFn, timestamps, filtered_signal, p0)
-    except Exception as e:
-        logger.error(str(e))
-
-    # logger.info('Curve Fit Parameters : ', popt)
-    control = curveFitFn(timestamps, *popt)
-
-    return control
-
-
-# Category: Analysis
-# Reason: Data validation function - compares array lengths and returns indices for processing
-# function to check control and signal channel has same length
-# if not, take a smaller length and do pre-processing
-def check_cntrl_sig_length(filepath, channels_arr, storenames, storesList):
-
-    indices = []
-    for i in range(channels_arr.shape[1]):
-        idx_c = np.where(storesList == channels_arr[0, i])[0]
-        idx_s = np.where(storesList == channels_arr[1, i])[0]
-        control = read_hdf5(storenames[idx_c[0]], filepath, "data")
-        signal = read_hdf5(storenames[idx_s[0]], filepath, "data")
-        if control.shape[0] < signal.shape[0]:
-            indices.append(storesList[idx_c[0]])
-        elif control.shape[0] > signal.shape[0]:
-            indices.append(storesList[idx_s[0]])
-        else:
-            indices.append(storesList[idx_s[0]])
-
-    return indices
-
-
-# Category: Analysis
-# Reason: Pure algorithmic function - concatenates data chunks based on coordinate boundaries, adjusts timestamps mathematically
-# helper function to process control and signal timestamps
-def eliminateData(filepath, timeForLightsTurnOn, event, sampling_rate, naming):
-
-    ts = read_hdf5("timeCorrection_" + naming, filepath, "timestampNew")
-    data = read_hdf5(event, filepath, "data").reshape(-1)
-    coords = fetchCoords(filepath, naming, ts)
-
-    if (data == 0).all() == True:
-        data = np.zeros(ts.shape[0])
-
-    arr = np.array([])
-    ts_arr = np.array([])
-    for i in range(coords.shape[0]):
-
-        index = np.where((ts > coords[i, 0]) & (ts < coords[i, 1]))[0]
-
-        if len(arr) == 0:
-            arr = np.concatenate((arr, data[index]))
-            sub = ts[index][0] - timeForLightsTurnOn
-            new_ts = ts[index] - sub
-            ts_arr = np.concatenate((ts_arr, new_ts))
-        else:
-            temp = data[index]
-            # new = temp + (arr[-1]-temp[0])
-            temp_ts = ts[index]
-            new_ts = temp_ts - (temp_ts[0] - ts_arr[-1])
-            arr = np.concatenate((arr, temp))
-            ts_arr = np.concatenate((ts_arr, new_ts + (1 / sampling_rate)))
-
-    # logger.info(arr.shape, ts_arr.shape)
-    return arr, ts_arr
-
-
-# Category: Analysis
-# Reason: Pure algorithmic function - processes event timestamps based on coordinate boundaries, aligns with data timeline
-# helper function to align event timestamps with the control and signal timestamps
-def eliminateTs(filepath, timeForLightsTurnOn, event, sampling_rate, naming):
-
-    tsNew = read_hdf5("timeCorrection_" + naming, filepath, "timestampNew")
-    ts = read_hdf5(event + "_" + naming, filepath, "ts").reshape(-1)
-    coords = fetchCoords(filepath, naming, tsNew)
-
-    ts_arr = np.array([])
-    tsNew_arr = np.array([])
-    for i in range(coords.shape[0]):
-        tsNew_index = np.where((tsNew > coords[i, 0]) & (tsNew < coords[i, 1]))[0]
-        ts_index = np.where((ts > coords[i, 0]) & (ts < coords[i, 1]))[0]
-
-        if len(tsNew_arr) == 0:
-            sub = tsNew[tsNew_index][0] - timeForLightsTurnOn
-            tsNew_arr = np.concatenate((tsNew_arr, tsNew[tsNew_index] - sub))
-            ts_arr = np.concatenate((ts_arr, ts[ts_index] - sub))
-        else:
-            temp_tsNew = tsNew[tsNew_index]
-            temp_ts = ts[ts_index]
-            new_ts = temp_ts - (temp_tsNew[0] - tsNew_arr[-1])
-            new_tsNew = temp_tsNew - (temp_tsNew[0] - tsNew_arr[-1])
-            tsNew_arr = np.concatenate((tsNew_arr, new_tsNew + (1 / sampling_rate)))
-            ts_arr = np.concatenate((ts_arr, new_ts + (1 / sampling_rate)))
-
-    return ts_arr
-
-
-# Category: Analysis
-# Reason: Pure algorithmic function - replaces specified data chunks with NaN based on coordinate boundaries
-# adding nan values to removed chunks
-# when using artifacts removal method - replace with NaN
-def addingNaNValues(filepath, event, naming):
-
-    ts = read_hdf5("timeCorrection_" + naming, filepath, "timestampNew")
-    data = read_hdf5(event, filepath, "data").reshape(-1)
-    coords = fetchCoords(filepath, naming, ts)
-
-    if (data == 0).all() == True:
-        data = np.zeros(ts.shape[0])
-
-    arr = np.array([])
-    ts_index = np.arange(ts.shape[0])
-    for i in range(coords.shape[0]):
-
-        index = np.where((ts > coords[i, 0]) & (ts < coords[i, 1]))[0]
-        arr = np.concatenate((arr, index))
-
-    nan_indices = list(set(ts_index).symmetric_difference(arr))
-    data[nan_indices] = np.nan
-
-    return data
-
-
-# Category: Analysis
-# Reason: Pure algorithmic function - filters event timestamps to exclude artifact regions based on coordinates
-# remove event TTLs which falls in the removed chunks
-# when using artifacts removal method - replace with NaN
-def removeTTLs(filepath, event, naming):
-    tsNew = read_hdf5("timeCorrection_" + naming, filepath, "timestampNew")
-    ts = read_hdf5(event + "_" + naming, filepath, "ts").reshape(-1)
-    coords = fetchCoords(filepath, naming, tsNew)
-
-    ts_arr = np.array([])
-    for i in range(coords.shape[0]):
-        ts_index = np.where((ts > coords[i, 0]) & (ts < coords[i, 1]))[0]
-        ts_arr = np.concatenate((ts_arr, ts[ts_index]))
-
-    return ts_arr
-
-
-# Category: Analysis
-# Reason: Pure mathematical computation - calculates deltaF/F from signal and control arrays with simple formula
-# function to compute deltaF/F using fitted control channel and filtered signal channel
-def deltaFF(signal, control):
-
-    res = np.subtract(signal, control)
-    normData = np.divide(res, control)
-    # deltaFF = normData
-    normData = normData * 100
-
-    return normData
-
-
-# Category: Analysis
-# Reason: Pure algorithmic function - performs polynomial linear regression to fit control to signal
-# function to fit control channel to signal channel
-def controlFit(control, signal):
-
-    p = np.polyfit(control, signal, 1)
-    arr = (p[0] * control) + p[1]
-    return arr
-
-
-# Category: Analysis
-# Reason: Pure signal processing function - applies moving average filter using scipy filtfilt
-def filterSignal(filter_window, signal):
-    if filter_window == 0:
-        return signal
-    elif filter_window > 1:
-        b = np.divide(np.ones((filter_window,)), filter_window)
-        a = 1
-        filtered_signal = ss.filtfilt(b, a, signal)
-        return filtered_signal
-    else:
-        raise Exception("Moving average filter window value is not correct.")
-
-
-# Category: Routing
-# Reason: Orchestrates signal processing pipeline - sequences calls to filterSignal, controlFit, and deltaFF with conditional logic
-# function to filter control and signal channel, also execute above two function : controlFit and deltaFF
-# function will also take care if there is only signal channel and no control channel
-# if there is only signal channel, z-score will be computed using just signal channel
-def execute_controlFit_dff(control, signal, isosbestic_control, filter_window):
-
-    if isosbestic_control == False:
-        signal_smooth = filterSignal(filter_window, signal)  # ss.filtfilt(b, a, signal)
-        control_fit = controlFit(control, signal_smooth)
-        norm_data = deltaFF(signal_smooth, control_fit)
-    else:
-        control_smooth = filterSignal(filter_window, control)  # ss.filtfilt(b, a, control)
-        signal_smooth = filterSignal(filter_window, signal)  # ss.filtfilt(b, a, signal)
-        control_fit = controlFit(control_smooth, signal_smooth)
-        norm_data = deltaFF(signal_smooth, control_fit)
-
-    return norm_data, control_fit
-
-
-# Category: Analysis
-# Reason: Pure statistical computation - calculates z-score from deltaF/F using different methods (standard, baseline, robust)
-# function to compute z-score based on z-score computation method
-def z_score_computation(dff, timestamps, inputParameters):
-
-    zscore_method = inputParameters["zscore_method"]
-    baseline_start, baseline_end = inputParameters["baselineWindowStart"], inputParameters["baselineWindowEnd"]
-
-    if zscore_method == "standard z-score":
-        numerator = np.subtract(dff, np.nanmean(dff))
-        zscore = np.divide(numerator, np.nanstd(dff))
-    elif zscore_method == "baseline z-score":
-        idx = np.where((timestamps > baseline_start) & (timestamps < baseline_end))[0]
-        if idx.shape[0] == 0:
-            logger.error(
-                "Baseline Window Parameters for baseline z-score computation zscore_method \
-							are not correct."
-            )
-            raise Exception(
-                "Baseline Window Parameters for baseline z-score computation zscore_method \
-							are not correct."
-            )
-        else:
-            baseline_mean = np.nanmean(dff[idx])
-            baseline_std = np.nanstd(dff[idx])
-            numerator = np.subtract(dff, baseline_mean)
-            zscore = np.divide(numerator, baseline_std)
-    else:
-        median = np.median(dff)
-        mad = np.median(np.abs(dff - median))
-        numerator = 0.6745 * (dff - median)
-        zscore = np.divide(numerator, mad)
-
-    return zscore
diff --git a/src/guppy/analysis/artifact_removal.py b/src/guppy/analysis/artifact_removal.py
new file mode 100644
index 0000000..3c51830
--- /dev/null
+++ b/src/guppy/analysis/artifact_removal.py
@@ -0,0 +1,200 @@
+import logging
+import os
+
+import numpy as np
+
+from .io_utils import (
+    decide_naming_convention,
+    fetchCoords,
+    read_hdf5,
+    write_hdf5,
+)
+
+logger = logging.getLogger(__name__)
+
+
+# Category: Routing
+# Reason: Orchestrates NaN replacement for all stores - loops through channels and coordinates calls to addingNaNValues and removeTTLs
+def addingNaNtoChunksWithArtifacts(filepath, events):
+
+    logger.debug("Replacing chunks with artifacts by NaN values.")
+    storesList = events[1, :]
+
+    path = decide_naming_convention(filepath)
+
+    for j in range(path.shape[1]):
+        name_1 = ((os.path.basename(path[0, j])).split(".")[0]).split("_")
+        name_2 = ((os.path.basename(path[1, j])).split(".")[0]).split("_")
+        # dirname = os.path.dirname(path[i])
+        if name_1[-1] == name_2[-1]:
+            name = name_1[-1]
+            sampling_rate = read_hdf5("timeCorrection_" + name, filepath, "sampling_rate")[0]
+            for i in range(len(storesList)):
+                if (
+                    "control_" + name.lower() in storesList[i].lower()
+                    or "signal_" + name.lower() in storesList[i].lower()
+                ):  # changes done
+                    data = addingNaNValues(filepath, storesList[i], name)
+                    write_hdf5(data, storesList[i], filepath, "data")
+                else:
+                    if "control" in storesList[i].lower() or "signal" in storesList[i].lower():
+                        continue
+                    else:
+                        ts = removeTTLs(filepath, storesList[i], name)
+                        write_hdf5(ts, storesList[i] + "_" + name, filepath, "ts")
+
+        else:
+            logger.error("Error in naming convention of files or Error in storesList file")
+            raise Exception("Error in naming convention of files or Error in storesList file")
+    logger.info("Chunks with artifacts are replaced by NaN values.")
+
+
+# Category: Routing
+# Reason: Orchestrates timestamp concatenation for artifact removal - loops through stores, coordinates eliminateData/eliminateTs calls and writes results
+# main function to align timestamps for control, signal and event timestamps for artifacts removal
+def processTimestampsForArtifacts(filepath, timeForLightsTurnOn, events):
+
+    logger.debug("Processing timestamps to get rid of artifacts using concatenate method...")
+    storesList = events[1, :]
+
+    path = decide_naming_convention(filepath)
+
+    timestamp_dict = dict()
+    for j in range(path.shape[1]):
+        name_1 = ((os.path.basename(path[0, j])).split(".")[0]).split("_")
+        name_2 = ((os.path.basename(path[1, j])).split(".")[0]).split("_")
+        # dirname = os.path.dirname(path[i])
+        if name_1[-1] == name_2[-1]:
+            name = name_1[-1]
+            sampling_rate = read_hdf5("timeCorrection_" + name, filepath, "sampling_rate")[0]
+
+            for i in range(len(storesList)):
+                if (
+                    "control_" + name.lower() in storesList[i].lower()
+                    or "signal_" + name.lower() in storesList[i].lower()
+                ):  # changes done
+                    data, timestampNew = eliminateData(
+                        filepath, timeForLightsTurnOn, storesList[i], sampling_rate, name
+                    )
+                    write_hdf5(data, storesList[i], filepath, "data")
+                else:
+                    if "control" in storesList[i].lower() or "signal" in storesList[i].lower():
+                        continue
+                    else:
+                        ts = eliminateTs(filepath, timeForLightsTurnOn, storesList[i], sampling_rate, name)
+                        write_hdf5(ts, storesList[i] + "_" + name, filepath, "ts")
+
+            # timestamp_dict[name] = timestampNew
+            write_hdf5(timestampNew, "timeCorrection_" + name, filepath, "timestampNew")
+        else:
+            logger.error("Error in naming convention of files or Error in storesList file")
+            raise Exception("Error in naming convention of files or Error in storesList file")
+    logger.info("Timestamps processed, artifacts are removed and good chunks are concatenated.")
+
+
+# Category: Analysis
+# Reason: Pure algorithmic function - concatenates data chunks based on coordinate boundaries, adjusts timestamps mathematically
+# helper function to process control and signal timestamps
+def eliminateData(filepath, timeForLightsTurnOn, event, sampling_rate, naming):
+
+    ts = read_hdf5("timeCorrection_" + naming, filepath, "timestampNew")
+    data = read_hdf5(event, filepath, "data").reshape(-1)
+    coords = fetchCoords(filepath, naming, ts)
+
+    if (data == 0).all() == True:
+        data = np.zeros(ts.shape[0])
+
+    arr = np.array([])
+    ts_arr = np.array([])
+    for i in range(coords.shape[0]):
+
+        index = np.where((ts > coords[i, 0]) & (ts < coords[i, 1]))[0]
+
+        if len(arr) == 0:
+            arr = np.concatenate((arr, data[index]))
+            sub = ts[index][0] - timeForLightsTurnOn
+            new_ts = ts[index] - sub
+            ts_arr = np.concatenate((ts_arr, new_ts))
+        else:
+            temp = data[index]
+            # new = temp + (arr[-1]-temp[0])
+            temp_ts = ts[index]
+            new_ts = temp_ts - (temp_ts[0] - ts_arr[-1])
+            arr = np.concatenate((arr, temp))
+            ts_arr = np.concatenate((ts_arr, new_ts + (1 / sampling_rate)))
+
+    # logger.info(arr.shape, ts_arr.shape)
+    return arr, ts_arr
+
+
+# Category: Analysis
+# Reason: Pure algorithmic function - processes event timestamps based on coordinate boundaries, aligns with data timeline
+# helper function to align event timestamps with the control and signal timestamps
+def eliminateTs(filepath, timeForLightsTurnOn, event, sampling_rate, naming):
+
+    tsNew = read_hdf5("timeCorrection_" + naming, filepath, "timestampNew")
+    ts = read_hdf5(event + "_" + naming, filepath, "ts").reshape(-1)
+    coords = fetchCoords(filepath, naming, tsNew)
+
+    ts_arr = np.array([])
+    tsNew_arr = np.array([])
+    for i in range(coords.shape[0]):
+        tsNew_index = np.where((tsNew > coords[i, 0]) & (tsNew < coords[i, 1]))[0]
+        ts_index = np.where((ts > coords[i, 0]) & (ts < coords[i, 1]))[0]
+
+        if len(tsNew_arr) == 0:
+            sub = tsNew[tsNew_index][0] - timeForLightsTurnOn
+            tsNew_arr = np.concatenate((tsNew_arr, tsNew[tsNew_index] - sub))
+            ts_arr = np.concatenate((ts_arr, ts[ts_index] - sub))
+        else:
+            temp_tsNew = tsNew[tsNew_index]
+            temp_ts = ts[ts_index]
+            new_ts = temp_ts - (temp_tsNew[0] - tsNew_arr[-1])
+            new_tsNew = temp_tsNew - (temp_tsNew[0] - tsNew_arr[-1])
+            tsNew_arr = np.concatenate((tsNew_arr, new_tsNew + (1 / sampling_rate)))
+            ts_arr = np.concatenate((ts_arr, new_ts + (1 / sampling_rate)))
+
+    return ts_arr
+
+
+# Category: Analysis
+# Reason: Pure algorithmic function - replaces specified data chunks with NaN based on coordinate boundaries
+# adding nan values to removed chunks
+# when using artifacts removal method - replace with NaN
+def addingNaNValues(filepath, event, naming):
+
+    ts = read_hdf5("timeCorrection_" + naming, filepath, "timestampNew")
+    data = read_hdf5(event, filepath, "data").reshape(-1)
+    coords = fetchCoords(filepath, naming, ts)
+
+    if (data == 0).all() == True:
+        data = np.zeros(ts.shape[0])
+
+    arr = np.array([])
+    ts_index = np.arange(ts.shape[0])
+    for i in range(coords.shape[0]):
+
+        index = np.where((ts > coords[i, 0]) & (ts < coords[i, 1]))[0]
+        arr = np.concatenate((arr, index))
+
+    nan_indices = list(set(ts_index).symmetric_difference(arr))
+    data[nan_indices] = np.nan
+
+    return data
+
+
+# Category: Analysis
+# Reason: Pure algorithmic function - filters event timestamps to exclude artifact regions based on coordinates
+# remove event TTLs which falls in the removed chunks
+# when using artifacts removal method - replace with NaN
+def removeTTLs(filepath, event, naming):
+    tsNew = read_hdf5("timeCorrection_" + naming, filepath, "timestampNew")
+    ts = read_hdf5(event + "_" + naming, filepath, "ts").reshape(-1)
+    coords = fetchCoords(filepath, naming, tsNew)
+
+    ts_arr = np.array([])
+    for i in range(coords.shape[0]):
+        ts_index = np.where((ts > coords[i, 0]) & (ts < coords[i, 1]))[0]
+        ts_arr = np.concatenate((ts_arr, ts[ts_index]))
+
+    return ts_arr
diff --git a/src/guppy/analysis/combine_data.py b/src/guppy/analysis/combine_data.py
new file mode 100644
index 0000000..29e4b9d
--- /dev/null
+++ b/src/guppy/analysis/combine_data.py
@@ -0,0 +1,398 @@
+# TODO: remove redundant function implementations such as eliminateData, eliminateTs, read_hdf5, et cetera.
+
+import fnmatch
+import glob
+import logging
+import os
+import re
+
+import numpy as np
+
+from .io_utils import (
+    get_all_stores_for_combining_data,
+    read_hdf5,
+    takeOnlyDirs,
+)
+
+logger = logging.getLogger(__name__)
+
+
+# Category: Routing
+# Reason: Orchestrates data combination workflow - validates sampling rates, coordinates processTimestampsForCombiningData, manages multi-session I/O
+# function to combine data when there are two different data files for the same recording session
+# it will combine the data, do timestamps processing and save the combined data in the first output folder.
+def combineData(folderNames, inputParameters, storesList):
+
+    logger.debug("Combining Data from different data files...")
+    timeForLightsTurnOn = inputParameters["timeForLightsTurnOn"]
+    op_folder = []
+    for i in range(len(folderNames)):
+        filepath = folderNames[i]
+        op_folder.append(takeOnlyDirs(glob.glob(os.path.join(filepath, "*_output_*"))))
+
+    op_folder = list(np.concatenate(op_folder).flatten())
+    sampling_rate_fp = []
+    for i in range(len(folderNames)):
+        filepath = folderNames[i]
+        storesListPath = takeOnlyDirs(glob.glob(os.path.join(filepath, "*_output_*")))
+        for j in range(len(storesListPath)):
+            filepath = storesListPath[j]
+            storesList_new = np.genfromtxt(
+                os.path.join(filepath, "storesList.csv"), dtype="str", delimiter=","
+            ).reshape(2, -1)
+            sampling_rate_fp.append(glob.glob(os.path.join(filepath, "timeCorrection_*")))
+
+    # check if sampling rate is same for both data
+    sampling_rate_fp = np.concatenate(sampling_rate_fp)
+    sampling_rate = []
+    for i in range(sampling_rate_fp.shape[0]):
+        sampling_rate.append(read_hdf5("", sampling_rate_fp[i], "sampling_rate"))
+
+    res = all(i == sampling_rate[0] for i in sampling_rate)
+    if res == False:
+        logger.error("To combine the data, sampling rate for both the data should be same.")
+        raise Exception("To combine the data, sampling rate for both the data should be same.")
+
+    # get the output folders informatinos
+    op = get_all_stores_for_combining_data(op_folder)
+
+    # processing timestamps for combining the data
+    processTimestampsForCombiningData(op, timeForLightsTurnOn, storesList, sampling_rate[0])
+    logger.info("Data is combined from different data files.")
+
+    return op
+
+
+def find_files(path, glob_path, ignore_case=False):
+    rule = (
+        re.compile(fnmatch.translate(glob_path), re.IGNORECASE)
+        if ignore_case
+        else re.compile(fnmatch.translate(glob_path))
+    )
+    no_bytes_path = os.listdir(os.path.expanduser(path))
+    str_path = []
+
+    # converting byte object to string
+    for x in no_bytes_path:
+        try:
+            str_path.append(x.decode("utf-8"))
+        except:
+            str_path.append(x)
+
+    return [os.path.join(path, n) for n in str_path if rule.match(n)]
+
+
+def read_hdf5(event, filepath, key):
+    if event:
+        op = os.path.join(filepath, event + ".hdf5")
+    else:
+        op = filepath
+
+    if os.path.exists(op):
+        with h5py.File(op, "r") as f:
+            arr = np.asarray(f[key])
+    else:
+        raise Exception("{}.hdf5 file does not exist".format(event))
+
+    return arr
+
+
+def write_hdf5(data, event, filepath, key):
+    op = os.path.join(filepath, event + ".hdf5")
+
+    if not os.path.exists(op):
+        with h5py.File(op, "w") as f:
+            if type(data) is np.ndarray:
+                f.create_dataset(key, data=data, maxshape=(None,), chunks=True)
+            else:
+                f.create_dataset(key, data=data)
+    else:
+        with h5py.File(op, "r+") as f:
+            if key in list(f.keys()):
+                if type(data) is np.ndarray:
+                    f[key].resize(data.shape)
+                    arr = f[key]
+                    arr[:] = data
+                else:
+                    arr = f[key]
+                    arr = data
+            else:
+                f.create_dataset(key, data=data, maxshape=(None,), chunks=True)
+
+
+def decide_naming_convention(filepath):
+    path_1 = find_files(filepath, "control*", ignore_case=True)  # glob.glob(os.path.join(filepath, 'control*'))
+
+    path_2 = find_files(filepath, "signal*", ignore_case=True)  # glob.glob(os.path.join(filepath, 'signal*'))
+
+    path = sorted(path_1 + path_2, key=str.casefold)
+
+    if len(path) % 2 != 0:
+        raise Exception("There are not equal number of Control and Signal data")
+
+    path = np.asarray(path).reshape(2, -1)
+
+    return path
+
+
+def eliminateData(filepath, timeForLightsTurnOn, event, sampling_rate, naming):
+
+    arr = np.array([])
+    ts_arr = np.array([])
+    for i in range(len(filepath)):
+        ts = read_hdf5("timeCorrection_" + naming, filepath[i], "timestampNew")
+        data = read_hdf5(event, filepath[i], "data").reshape(-1)
+
+        # index = np.where((ts>coords[i,0]) & (ts<coords[i,1]))[0]
+
+        if len(arr) == 0:
+            arr = np.concatenate((arr, data))
+            sub = ts[0] - timeForLightsTurnOn
+            new_ts = ts - sub
+            ts_arr = np.concatenate((ts_arr, new_ts))
+        else:
+            temp = data
+            temp_ts = ts
+            new_ts = temp_ts - (temp_ts[0] - ts_arr[-1])
+            arr = np.concatenate((arr, temp))
+            ts_arr = np.concatenate((ts_arr, new_ts + (1 / sampling_rate)))
+
+    return arr, ts_arr
+
+
+def eliminateTs(filepath, timeForLightsTurnOn, event, sampling_rate, naming):
+
+    ts_arr = np.array([])
+    tsNew_arr = np.array([])
+    for i in range(len(filepath)):
+        tsNew = read_hdf5("timeCorrection_" + naming, filepath[i], "timestampNew")
+        if os.path.exists(os.path.join(filepath[i], event + "_" + naming + ".hdf5")):
+            ts = read_hdf5(event + "_" + naming, filepath[i], "ts").reshape(-1)
+        else:
+            ts = np.array([])
+
+        # logger.info("total time : ", tsNew[-1])
+        if len(tsNew_arr) == 0:
+            sub = tsNew[0] - timeForLightsTurnOn
+            tsNew_arr = np.concatenate((tsNew_arr, tsNew - sub))
+            ts_arr = np.concatenate((ts_arr, ts - sub))
+        else:
+            temp_tsNew = tsNew
+            temp_ts = ts
+            new_ts = temp_ts - (temp_tsNew[0] - tsNew_arr[-1])
+            new_tsNew = temp_tsNew - (temp_tsNew[0] - tsNew_arr[-1])
+            tsNew_arr = np.concatenate((tsNew_arr, new_tsNew + (1 / sampling_rate)))
+            ts_arr = np.concatenate((ts_arr, new_ts + (1 / sampling_rate)))
+
+        # logger.info(event)
+        # logger.info(ts_arr)
+    return ts_arr
+
+
+def processTimestampsForCombiningData(filepath, timeForLightsTurnOn, events, sampling_rate):
+
+    logger.debug("Processing timestamps for combining data...")
+
+    storesList = events[1, :]
+
+    for k in range(len(filepath)):
+
+        path = decide_naming_convention(filepath[k][0])
+
+        for j in range(path.shape[1]):
+            name_1 = ((os.path.basename(path[0, j])).split(".")[0]).split("_")
+            name_2 = ((os.path.basename(path[1, j])).split(".")[0]).split("_")
+            # dirname = os.path.dirname(path[i])
+            if name_1[-1] == name_2[-1]:
+                name = name_1[-1]
+
+                for i in range(len(storesList)):
+                    if (
+                        "control_" + name.lower() in storesList[i].lower()
+                        or "signal_" + name.lower() in storesList[i].lower()
+                    ):
+                        data, timestampNew = eliminateData(
+                            filepath[k], timeForLightsTurnOn, storesList[i], sampling_rate, name
+                        )
+                        write_hdf5(data, storesList[i], filepath[k][0], "data")
+                    else:
+                        if "control" in storesList[i].lower() or "signal" in storesList[i].lower():
+                            continue
+                        else:
+                            ts = eliminateTs(filepath[k], timeForLightsTurnOn, storesList[i], sampling_rate, name)
+                            write_hdf5(ts, storesList[i] + "_" + name, filepath[k][0], "ts")
+
+
+import h5py
+import numpy as np
+
+logger = logging.getLogger(__name__)
+
+
+def find_files(path, glob_path, ignore_case=False):
+    rule = (
+        re.compile(fnmatch.translate(glob_path), re.IGNORECASE)
+        if ignore_case
+        else re.compile(fnmatch.translate(glob_path))
+    )
+    no_bytes_path = os.listdir(os.path.expanduser(path))
+    str_path = []
+
+    # converting byte object to string
+    for x in no_bytes_path:
+        try:
+            str_path.append(x.decode("utf-8"))
+        except:
+            str_path.append(x)
+
+    return [os.path.join(path, n) for n in str_path if rule.match(n)]
+
+
+def read_hdf5(event, filepath, key):
+    if event:
+        op = os.path.join(filepath, event + ".hdf5")
+    else:
+        op = filepath
+
+    if os.path.exists(op):
+        with h5py.File(op, "r") as f:
+            arr = np.asarray(f[key])
+    else:
+        raise Exception("{}.hdf5 file does not exist".format(event))
+
+    return arr
+
+
+def write_hdf5(data, event, filepath, key):
+    op = os.path.join(filepath, event + ".hdf5")
+
+    if not os.path.exists(op):
+        with h5py.File(op, "w") as f:
+            if type(data) is np.ndarray:
+                f.create_dataset(key, data=data, maxshape=(None,), chunks=True)
+            else:
+                f.create_dataset(key, data=data)
+    else:
+        with h5py.File(op, "r+") as f:
+            if key in list(f.keys()):
+                if type(data) is np.ndarray:
+                    f[key].resize(data.shape)
+                    arr = f[key]
+                    arr[:] = data
+                else:
+                    arr = f[key]
+                    arr = data
+            else:
+                f.create_dataset(key, data=data, maxshape=(None,), chunks=True)
+
+
+def decide_naming_convention(filepath):
+    path_1 = find_files(filepath, "control*", ignore_case=True)  # glob.glob(os.path.join(filepath, 'control*'))
+
+    path_2 = find_files(filepath, "signal*", ignore_case=True)  # glob.glob(os.path.join(filepath, 'signal*'))
+
+    path = sorted(path_1 + path_2, key=str.casefold)
+
+    if len(path) % 2 != 0:
+        raise Exception("There are not equal number of Control and Signal data")
+
+    path = np.asarray(path).reshape(2, -1)
+
+    return path
+
+
+def eliminateData(filepath, timeForLightsTurnOn, event, sampling_rate, naming):
+
+    arr = np.array([])
+    ts_arr = np.array([])
+    for i in range(len(filepath)):
+        ts = read_hdf5("timeCorrection_" + naming, filepath[i], "timestampNew")
+        data = read_hdf5(event, filepath[i], "data").reshape(-1)
+
+        # index = np.where((ts>coords[i,0]) & (ts<coords[i,1]))[0]
+
+        if len(arr) == 0:
+            arr = np.concatenate((arr, data))
+            sub = ts[0] - timeForLightsTurnOn
+            new_ts = ts - sub
+            ts_arr = np.concatenate((ts_arr, new_ts))
+        else:
+            temp = data
+            temp_ts = ts
+            new_ts = temp_ts - (temp_ts[0] - ts_arr[-1])
+            arr = np.concatenate((arr, temp))
+            ts_arr = np.concatenate((ts_arr, new_ts + (1 / sampling_rate)))
+
+    return arr, ts_arr
+
+
+def eliminateTs(filepath, timeForLightsTurnOn, event, sampling_rate, naming):
+
+    ts_arr = np.array([])
+    tsNew_arr = np.array([])
+    for i in range(len(filepath)):
+        tsNew = read_hdf5("timeCorrection_" + naming, filepath[i], "timestampNew")
+        if os.path.exists(os.path.join(filepath[i], event + "_" + naming + ".hdf5")):
+            ts = read_hdf5(event + "_" + naming, filepath[i], "ts").reshape(-1)
+        else:
+            ts = np.array([])
+
+        # logger.info("total time : ", tsNew[-1])
+        if len(tsNew_arr) == 0:
+            sub = tsNew[0] - timeForLightsTurnOn
+            tsNew_arr = np.concatenate((tsNew_arr, tsNew - sub))
+            ts_arr = np.concatenate((ts_arr, ts - sub))
+        else:
+            temp_tsNew = tsNew
+            temp_ts = ts
+            new_ts = temp_ts - (temp_tsNew[0] - tsNew_arr[-1])
+            new_tsNew = temp_tsNew - (temp_tsNew[0] - tsNew_arr[-1])
+            tsNew_arr = np.concatenate((tsNew_arr, new_tsNew + (1 / sampling_rate)))
+            ts_arr = np.concatenate((ts_arr, new_ts + (1 / sampling_rate)))
+
+        # logger.info(event)
+        # logger.info(ts_arr)
+    return ts_arr
+
+
+def processTimestampsForCombiningData(filepath, timeForLightsTurnOn, events, sampling_rate):
+
+    logger.debug("Processing timestamps for combining data...")
+
+    storesList = events[1, :]
+
+    for k in range(len(filepath)):
+
+        path = decide_naming_convention(filepath[k][0])
+
+        for j in range(path.shape[1]):
+            name_1 = ((os.path.basename(path[0, j])).split(".")[0]).split("_")
+            name_2 = ((os.path.basename(path[1, j])).split(".")[0]).split("_")
+            # dirname = os.path.dirname(path[i])
+            if name_1[-1] == name_2[-1]:
+                name = name_1[-1]
+
+                for i in range(len(storesList)):
+                    if (
+                        "control_" + name.lower() in storesList[i].lower()
+                        or "signal_" + name.lower() in storesList[i].lower()
+                    ):
+                        data, timestampNew = eliminateData(
+                            filepath[k], timeForLightsTurnOn, storesList[i], sampling_rate, name
+                        )
+                        write_hdf5(data, storesList[i], filepath[k][0], "data")
+                    else:
+                        if "control" in storesList[i].lower() or "signal" in storesList[i].lower():
+                            continue
+                        else:
+                            ts = eliminateTs(filepath[k], timeForLightsTurnOn, storesList[i], sampling_rate, name)
+                            write_hdf5(ts, storesList[i] + "_" + name, filepath[k][0], "ts")
+
+                write_hdf5(timestampNew, "timeCorrection_" + name, filepath[k][0], "timestampNew")
+
+            else:
+                raise Exception("Error in naming convention of files or Error in storesList file")
+
+        np.savetxt(os.path.join(filepath[k][0], "combine_storesList.csv"), events, delimiter=",", fmt="%s")
+
+    logger.info("Timestamps processed and data is combined.")
diff --git a/src/guppy/analysis/control_channel.py b/src/guppy/analysis/control_channel.py
new file mode 100644
index 0000000..96665f2
--- /dev/null
+++ b/src/guppy/analysis/control_channel.py
@@ -0,0 +1,42 @@
+import logging
+
+import numpy as np
+from scipy import signal as ss
+from scipy.optimize import curve_fit
+
+logger = logging.getLogger(__name__)
+
+
+# TODO: figure out why a control channel is created for both timestamp correction and z-score steps.
+# helper function to create control channel using signal channel
+# by curve fitting signal channel to exponential function
+# when there is no isosbestic control channel is present
+def helper_create_control_channel(signal, timestamps, window):
+    # check if window is greater than signal shape
+    if window > signal.shape[0]:
+        window = ((signal.shape[0] + 1) / 2) + 1
+        if window % 2 != 0:
+            window = window
+        else:
+            window = window + 1
+
+    filtered_signal = ss.savgol_filter(signal, window_length=window, polyorder=3)
+
+    p0 = [5, 50, 60]
+
+    try:
+        popt, pcov = curve_fit(curveFitFn, timestamps, filtered_signal, p0)
+    except Exception as e:
+        logger.error(str(e))
+
+    # logger.info('Curve Fit Parameters : ', popt)
+    control = curveFitFn(timestamps, *popt)
+
+    return control
+
+
+# Category: Analysis
+# Reason: Pure mathematical function for exponential curve fitting - no dependencies, pure computation
+# curve fit exponential function
+def curveFitFn(x, a, b, c):
+    return a + (b * np.exp(-(1 / c) * x))
diff --git a/src/guppy/analysis/io_utils.py b/src/guppy/analysis/io_utils.py
index 33b6650..999c190 100644
--- a/src/guppy/analysis/io_utils.py
+++ b/src/guppy/analysis/io_utils.py
@@ -161,3 +161,26 @@ def get_all_stores_for_combining_data(folderNames):
             op.append(temp)
 
     return op
+
+
+# Category: Routing
+# Reason: Orchestrates reading and merging storeslist files from multiple sessions - loops through folders and consolidates results
+# for combining data, reading storeslist file from both data and create a new storeslist array
+def check_storeslistfile(folderNames):
+    storesList = np.array([[], []])
+    for i in range(len(folderNames)):
+        filepath = folderNames[i]
+        storesListPath = takeOnlyDirs(glob.glob(os.path.join(filepath, "*_output_*")))
+        for j in range(len(storesListPath)):
+            filepath = storesListPath[j]
+            storesList = np.concatenate(
+                (
+                    storesList,
+                    np.genfromtxt(os.path.join(filepath, "storesList.csv"), dtype="str", delimiter=",").reshape(2, -1),
+                ),
+                axis=1,
+            )
+
+    storesList = np.unique(storesList, axis=1)
+
+    return storesList
diff --git a/src/guppy/analysis/timestamp_correction.py b/src/guppy/analysis/timestamp_correction.py
new file mode 100644
index 0000000..350dd5d
--- /dev/null
+++ b/src/guppy/analysis/timestamp_correction.py
@@ -0,0 +1,302 @@
+import logging
+import os
+import shutil
+
+import numpy as np
+import pandas as pd
+
+from .control_channel import helper_create_control_channel
+from .io_utils import (
+    check_TDT,
+    read_hdf5,
+    write_hdf5,
+)
+
+logger = logging.getLogger(__name__)
+
+
+# Category: Routing
+# Reason: Orchestrates validation logic, file copying, and storesList updates - coordinates multiple operations and file manipulations
+# function to add control channel when there is no
+# isosbestic control channel and update the storeslist file
+def add_control_channel(filepath, arr):
+
+    storenames = arr[0, :]
+    storesList = np.char.lower(arr[1, :])
+
+    keep_control = np.array([])
+    # check a case if there is isosbestic control channel present
+    for i in range(storesList.shape[0]):
+        if "control" in storesList[i].lower():
+            name = storesList[i].split("_")[-1]
+            new_str = "signal_" + str(name).lower()
+            find_signal = [True for i in storesList if i == new_str]
+            if len(find_signal) > 1:
+                logger.error("Error in naming convention of files or Error in storesList file")
+                raise Exception("Error in naming convention of files or Error in storesList file")
+            if len(find_signal) == 0:
+                logger.error(
+                    "Isosbectic control channel parameter is set to False and still \
+							 	 storeslist file shows there is control channel present"
+                )
+                raise Exception(
+                    "Isosbectic control channel parameter is set to False and still \
+							 	 storeslist file shows there is control channel present"
+                )
+        else:
+            continue
+
+    for i in range(storesList.shape[0]):
+        if "signal" in storesList[i].lower():
+            name = storesList[i].split("_")[-1]
+            new_str = "control_" + str(name).lower()
+            find_signal = [True for i in storesList if i == new_str]
+            if len(find_signal) == 0:
+                src, dst = os.path.join(filepath, arr[0, i] + ".hdf5"), os.path.join(
+                    filepath, "cntrl" + str(i) + ".hdf5"
+                )
+                shutil.copyfile(src, dst)
+                arr = np.concatenate((arr, [["cntrl" + str(i)], ["control_" + str(arr[1, i].split("_")[-1])]]), axis=1)
+
+    np.savetxt(os.path.join(filepath, "storesList.csv"), arr, delimiter=",", fmt="%s")
+
+    return arr
+
+
+# Category: Routing
+# Reason: Orchestrates timestamp correction workflow - loops through stores, coordinates reading/writing, calls validation and correction logic
+# function to correct timestamps after eliminating first few seconds of the data (for csv data)
+def timestampCorrection_csv(filepath, timeForLightsTurnOn, storesList):
+
+    logger.debug(
+        f"Correcting timestamps by getting rid of the first {timeForLightsTurnOn} seconds and convert timestamps to seconds"
+    )
+    storenames = storesList[0, :]
+    storesList = storesList[1, :]
+
+    arr = []
+    for i in range(storesList.shape[0]):
+        if "control" in storesList[i].lower() or "signal" in storesList[i].lower():
+            arr.append(storesList[i])
+
+    arr = sorted(arr, key=str.casefold)
+    try:
+        arr = np.asarray(arr).reshape(2, -1)
+    except:
+        logger.error("Error in saving stores list file or spelling mistake for control or signal")
+        raise Exception("Error in saving stores list file or spelling mistake for control or signal")
+
+    indices = check_cntrl_sig_length(filepath, arr, storenames, storesList)
+
+    for i in range(arr.shape[1]):
+        name_1 = arr[0, i].split("_")[-1]
+        name_2 = arr[1, i].split("_")[-1]
+        # dirname = os.path.dirname(path[i])
+        idx = np.where(storesList == indices[i])[0]
+
+        if idx.shape[0] == 0:
+            logger.error(f"{arr[0,i]} does not exist in the stores list file.")
+            raise Exception("{} does not exist in the stores list file.".format(arr[0, i]))
+
+        timestamp = read_hdf5(storenames[idx][0], filepath, "timestamps")
+        sampling_rate = read_hdf5(storenames[idx][0], filepath, "sampling_rate")
+
+        if name_1 == name_2:
+            correctionIndex = np.where(timestamp >= timeForLightsTurnOn)[0]
+            timestampNew = timestamp[correctionIndex]
+            write_hdf5(timestampNew, "timeCorrection_" + name_1, filepath, "timestampNew")
+            write_hdf5(correctionIndex, "timeCorrection_" + name_1, filepath, "correctionIndex")
+            write_hdf5(np.asarray(sampling_rate), "timeCorrection_" + name_1, filepath, "sampling_rate")
+
+        else:
+            logger.error("Error in naming convention of files or Error in storesList file")
+            raise Exception("Error in naming convention of files or Error in storesList file")
+
+    logger.info("Timestamps corrected and converted to seconds.")
+
+
+# Category: Routing
+# Reason: Orchestrates timestamp correction workflow for TDT format - loops through stores, coordinates timestamp expansion algorithm with I/O
+# function to correct timestamps after eliminating first few seconds of the data (for TDT data)
+def timestampCorrection_tdt(filepath, timeForLightsTurnOn, storesList):
+
+    logger.debug(
+        f"Correcting timestamps by getting rid of the first {timeForLightsTurnOn} seconds and convert timestamps to seconds"
+    )
+    storenames = storesList[0, :]
+    storesList = storesList[1, :]
+
+    arr = []
+    for i in range(storesList.shape[0]):
+        if "control" in storesList[i].lower() or "signal" in storesList[i].lower():
+            arr.append(storesList[i])
+
+    arr = sorted(arr, key=str.casefold)
+
+    try:
+        arr = np.asarray(arr).reshape(2, -1)
+    except:
+        logger.error("Error in saving stores list file or spelling mistake for control or signal")
+        raise Exception("Error in saving stores list file or spelling mistake for control or signal")
+
+    indices = check_cntrl_sig_length(filepath, arr, storenames, storesList)
+
+    for i in range(arr.shape[1]):
+        name_1 = arr[0, i].split("_")[-1]
+        name_2 = arr[1, i].split("_")[-1]
+        # dirname = os.path.dirname(path[i])
+        idx = np.where(storesList == indices[i])[0]
+
+        if idx.shape[0] == 0:
+            logger.error(f"{arr[0,i]} does not exist in the stores list file.")
+            raise Exception("{} does not exist in the stores list file.".format(arr[0, i]))
+
+        timestamp = read_hdf5(storenames[idx][0], filepath, "timestamps")
+        npoints = read_hdf5(storenames[idx][0], filepath, "npoints")
+        sampling_rate = read_hdf5(storenames[idx][0], filepath, "sampling_rate")
+
+        if name_1 == name_2:
+            timeRecStart = timestamp[0]
+            timestamps = np.subtract(timestamp, timeRecStart)
+            adder = np.arange(npoints) / sampling_rate
+            lengthAdder = adder.shape[0]
+            timestampNew = np.zeros((len(timestamps), lengthAdder))
+            for i in range(lengthAdder):
+                timestampNew[:, i] = np.add(timestamps, adder[i])
+            timestampNew = (timestampNew.T).reshape(-1, order="F")
+            correctionIndex = np.where(timestampNew >= timeForLightsTurnOn)[0]
+            timestampNew = timestampNew[correctionIndex]
+
+            write_hdf5(np.asarray([timeRecStart]), "timeCorrection_" + name_1, filepath, "timeRecStart")
+            write_hdf5(timestampNew, "timeCorrection_" + name_1, filepath, "timestampNew")
+            write_hdf5(correctionIndex, "timeCorrection_" + name_1, filepath, "correctionIndex")
+            write_hdf5(np.asarray([sampling_rate]), "timeCorrection_" + name_1, filepath, "sampling_rate")
+        else:
+            logger.error("Error in naming convention of files or Error in storesList file")
+            raise Exception("Error in naming convention of files or Error in storesList file")
+
+    logger.info("Timestamps corrected and converted to seconds.")
+    # return timeRecStart, correctionIndex, timestampNew
+
+
+# Category: Routing
+# Reason: Orchestrates naming validation and correction application - loops through channel pairs and delegates to applyCorrection
+# function to check if naming convention was followed while saving storeslist file
+# and apply timestamps correction using the function applyCorrection
+def decide_naming_convention_and_applyCorrection(filepath, timeForLightsTurnOn, event, displayName, storesList):
+
+    logger.debug("Applying correction of timestamps to the data and event timestamps")
+    storesList = storesList[1, :]
+
+    arr = []
+    for i in range(storesList.shape[0]):
+        if "control" in storesList[i].lower() or "signal" in storesList[i].lower():
+            arr.append(storesList[i])
+
+    arr = sorted(arr, key=str.casefold)
+    arr = np.asarray(arr).reshape(2, -1)
+
+    for i in range(arr.shape[1]):
+        name_1 = arr[0, i].split("_")[-1]
+        name_2 = arr[1, i].split("_")[-1]
+        # dirname = os.path.dirname(path[i])
+        if name_1 == name_2:
+            applyCorrection(filepath, timeForLightsTurnOn, event, displayName, name_1)
+        else:
+            logger.error("Error in naming convention of files or Error in storesList file")
+            raise Exception("Error in naming convention of files or Error in storesList file")
+
+    logger.info("Timestamps corrections applied to the data and event timestamps.")
+
+
+# Category: Routing
+# Reason: Orchestrates applying timestamp corrections - reads correction indices, applies different logic based on data type, writes results
+# function to apply correction to control, signal and event timestamps
+def applyCorrection(filepath, timeForLightsTurnOn, event, displayName, naming):
+
+    cond = check_TDT(os.path.dirname(filepath))
+
+    if cond == True:
+        timeRecStart = read_hdf5("timeCorrection_" + naming, filepath, "timeRecStart")[0]
+
+    timestampNew = read_hdf5("timeCorrection_" + naming, filepath, "timestampNew")
+    correctionIndex = read_hdf5("timeCorrection_" + naming, filepath, "correctionIndex")
+
+    if "control" in displayName.lower() or "signal" in displayName.lower():
+        split_name = displayName.split("_")[-1]
+        if split_name == naming:
+            pass
+        else:
+            correctionIndex = read_hdf5("timeCorrection_" + split_name, filepath, "correctionIndex")
+        arr = read_hdf5(event, filepath, "data")
+        if (arr == 0).all() == True:
+            arr = arr
+        else:
+            arr = arr[correctionIndex]
+        write_hdf5(arr, displayName, filepath, "data")
+    else:
+        arr = read_hdf5(event, filepath, "timestamps")
+        if cond == True:
+            res = (arr >= timeRecStart).all()
+            if res == True:
+                arr = np.subtract(arr, timeRecStart)
+                arr = np.subtract(arr, timeForLightsTurnOn)
+            else:
+                arr = np.subtract(arr, timeForLightsTurnOn)
+        else:
+            arr = np.subtract(arr, timeForLightsTurnOn)
+        write_hdf5(arr, displayName + "_" + naming, filepath, "ts")
+
+    # if isosbestic_control==False and 'control' in displayName.lower():
+    # 	control = create_control_channel(filepath, displayName)
+    # 	write_hdf5(control, displayName, filepath, 'data')
+
+
+# Category: Routing
+# Reason: Orchestrates reading HDF5 files, calling helper_create_control_channel, and writing results - coordinates I/O with computation
+# main function to create control channel using
+# signal channel and save it to a file
+def create_control_channel(filepath, arr, window=5001):
+
+    storenames = arr[0, :]
+    storesList = arr[1, :]
+
+    for i in range(storesList.shape[0]):
+        event_name, event = storesList[i], storenames[i]
+        if "control" in event_name.lower() and "cntrl" in event.lower():
+            logger.debug("Creating control channel from signal channel using curve-fitting")
+            name = event_name.split("_")[-1]
+            signal = read_hdf5("signal_" + name, filepath, "data")
+            timestampNew = read_hdf5("timeCorrection_" + name, filepath, "timestampNew")
+            sampling_rate = np.full(timestampNew.shape, np.nan)
+            sampling_rate[0] = read_hdf5("timeCorrection_" + name, filepath, "sampling_rate")[0]
+
+            control = helper_create_control_channel(signal, timestampNew, window)
+
+            write_hdf5(control, event_name, filepath, "data")
+            d = {"timestamps": timestampNew, "data": control, "sampling_rate": sampling_rate}
+            df = pd.DataFrame(d)
+            df.to_csv(os.path.join(os.path.dirname(filepath), event.lower() + ".csv"), index=False)
+            logger.info("Control channel from signal channel created using curve-fitting")
+
+
+# Category: Analysis
+# Reason: Data validation function - compares array lengths and returns indices for processing
+# function to check control and signal channel has same length
+# if not, take a smaller length and do pre-processing
+def check_cntrl_sig_length(filepath, channels_arr, storenames, storesList):
+
+    indices = []
+    for i in range(channels_arr.shape[1]):
+        idx_c = np.where(storesList == channels_arr[0, i])[0]
+        idx_s = np.where(storesList == channels_arr[1, i])[0]
+        control = read_hdf5(storenames[idx_c[0]], filepath, "data")
+        signal = read_hdf5(storenames[idx_s[0]], filepath, "data")
+        if control.shape[0] < signal.shape[0]:
+            indices.append(storesList[idx_c[0]])
+        elif control.shape[0] > signal.shape[0]:
+            indices.append(storesList[idx_s[0]])
+        else:
+            indices.append(storesList[idx_s[0]])
+
+    return indices
diff --git a/src/guppy/analysis/z_score.py b/src/guppy/analysis/z_score.py
new file mode 100644
index 0000000..d8cc1bc
--- /dev/null
+++ b/src/guppy/analysis/z_score.py
@@ -0,0 +1,234 @@
+import logging
+import os
+
+import numpy as np
+from scipy import signal as ss
+
+from .control_channel import helper_create_control_channel
+from .io_utils import (
+    fetchCoords,
+    find_files,
+    read_hdf5,
+    write_hdf5,
+)
+
+logger = logging.getLogger(__name__)
+
+
+# Category: Routing
+# Reason: Orchestrates z-score computation for all channels in a session - loops through control/signal pairs, calls helper_z_score, writes results
+# compute z-score and deltaF/F and save it to hdf5 file
+def compute_z_score(filepath, inputParameters):
+
+    logger.debug(f"Computing z-score for each of the data in {filepath}")
+    remove_artifacts = inputParameters["removeArtifacts"]
+
+    path_1 = find_files(filepath, "control_*", ignore_case=True)  # glob.glob(os.path.join(filepath, 'control*'))
+    path_2 = find_files(filepath, "signal_*", ignore_case=True)  # glob.glob(os.path.join(filepath, 'signal*'))
+
+    path = sorted(path_1 + path_2, key=str.casefold)
+
+    b = np.divide(np.ones((100,)), 100)
+    a = 1
+
+    if len(path) % 2 != 0:
+        logger.error("There are not equal number of Control and Signal data")
+        raise Exception("There are not equal number of Control and Signal data")
+
+    path = np.asarray(path).reshape(2, -1)
+
+    for i in range(path.shape[1]):
+        name_1 = ((os.path.basename(path[0, i])).split(".")[0]).split("_")
+        name_2 = ((os.path.basename(path[1, i])).split(".")[0]).split("_")
+        # dirname = os.path.dirname(path[i])
+
+        if name_1[-1] == name_2[-1]:
+            name = name_1[-1]
+            control = read_hdf5("", path[0, i], "data").reshape(-1)
+            signal = read_hdf5("", path[1, i], "data").reshape(-1)
+            # control_smooth = ss.filtfilt(b, a, control)
+            # signal_smooth = ss.filtfilt(b, a, signal)
+            # _score, dff = helper_z_score(control_smooth, signal_smooth)
+            z_score, dff, control_fit = helper_z_score(control, signal, filepath, name, inputParameters)
+            if remove_artifacts == True:
+                write_hdf5(z_score, "z_score_" + name, filepath, "data")
+                write_hdf5(dff, "dff_" + name, filepath, "data")
+                write_hdf5(control_fit, "cntrl_sig_fit_" + name, filepath, "data")
+            else:
+                write_hdf5(z_score, "z_score_" + name, filepath, "data")
+                write_hdf5(dff, "dff_" + name, filepath, "data")
+                write_hdf5(control_fit, "cntrl_sig_fit_" + name, filepath, "data")
+        else:
+            logger.error("Error in naming convention of files or Error in storesList file")
+            raise Exception("Error in naming convention of files or Error in storesList file")
+
+    logger.info(f"z-score for the data in {filepath} computed.")
+
+
+# Category: Routing
+# Reason: Orchestrates z-score computation for one channel - handles artifact removal logic, coordinates calls to execute_controlFit_dff and z_score_computation
+# helper function to compute z-score and deltaF/F
+def helper_z_score(control, signal, filepath, name, inputParameters):  # helper_z_score(control_smooth, signal_smooth):
+
+    removeArtifacts = inputParameters["removeArtifacts"]
+    artifactsRemovalMethod = inputParameters["artifactsRemovalMethod"]
+    filter_window = inputParameters["filter_window"]
+
+    isosbestic_control = inputParameters["isosbestic_control"]
+    tsNew = read_hdf5("timeCorrection_" + name, filepath, "timestampNew")
+    coords_path = os.path.join(filepath, "coordsForPreProcessing_" + name + ".npy")
+
+    logger.info("Remove Artifacts : ", removeArtifacts)
+
+    if (control == 0).all() == True:
+        control = np.zeros(tsNew.shape[0])
+
+    z_score_arr = np.array([])
+    norm_data_arr = np.full(tsNew.shape[0], np.nan)
+    control_fit_arr = np.full(tsNew.shape[0], np.nan)
+    temp_control_arr = np.full(tsNew.shape[0], np.nan)
+
+    if removeArtifacts == True:
+        coords = fetchCoords(filepath, name, tsNew)
+
+        # for artifacts removal, each chunk which was selected by user is being processed individually and then
+        # z-score is calculated
+        for i in range(coords.shape[0]):
+            tsNew_index = np.where((tsNew > coords[i, 0]) & (tsNew < coords[i, 1]))[0]
+            if isosbestic_control == False:
+                control_arr = helper_create_control_channel(signal[tsNew_index], tsNew[tsNew_index], window=101)
+                signal_arr = signal[tsNew_index]
+                norm_data, control_fit = execute_controlFit_dff(
+                    control_arr, signal_arr, isosbestic_control, filter_window
+                )
+                temp_control_arr[tsNew_index] = control_arr
+                if i < coords.shape[0] - 1:
+                    blank_index = np.where((tsNew > coords[i, 1]) & (tsNew < coords[i + 1, 0]))[0]
+                    temp_control_arr[blank_index] = np.full(blank_index.shape[0], np.nan)
+            else:
+                control_arr = control[tsNew_index]
+                signal_arr = signal[tsNew_index]
+                norm_data, control_fit = execute_controlFit_dff(
+                    control_arr, signal_arr, isosbestic_control, filter_window
+                )
+            norm_data_arr[tsNew_index] = norm_data
+            control_fit_arr[tsNew_index] = control_fit
+
+        if artifactsRemovalMethod == "concatenate":
+            norm_data_arr = norm_data_arr[~np.isnan(norm_data_arr)]
+            control_fit_arr = control_fit_arr[~np.isnan(control_fit_arr)]
+        z_score = z_score_computation(norm_data_arr, tsNew, inputParameters)
+        z_score_arr = np.concatenate((z_score_arr, z_score))
+    else:
+        tsNew_index = np.arange(tsNew.shape[0])
+        norm_data, control_fit = execute_controlFit_dff(control, signal, isosbestic_control, filter_window)
+        z_score = z_score_computation(norm_data, tsNew, inputParameters)
+        z_score_arr = np.concatenate((z_score_arr, z_score))
+        norm_data_arr[tsNew_index] = norm_data  # np.concatenate((norm_data_arr, norm_data))
+        control_fit_arr[tsNew_index] = control_fit  # np.concatenate((control_fit_arr, control_fit))
+
+    # handle the case if there are chunks being cut in the front and the end
+    if isosbestic_control == False and removeArtifacts == True:
+        coords = coords.flatten()
+        # front chunk
+        idx = np.where((tsNew >= tsNew[0]) & (tsNew < coords[0]))[0]
+        temp_control_arr[idx] = np.full(idx.shape[0], np.nan)
+        # end chunk
+        idx = np.where((tsNew > coords[-1]) & (tsNew <= tsNew[-1]))[0]
+        temp_control_arr[idx] = np.full(idx.shape[0], np.nan)
+        write_hdf5(temp_control_arr, "control_" + name, filepath, "data")
+
+    return z_score_arr, norm_data_arr, control_fit_arr
+
+
+# Category: Routing
+# Reason: Orchestrates signal processing pipeline - sequences calls to filterSignal, controlFit, and deltaFF with conditional logic
+# function to filter control and signal channel, also execute above two function : controlFit and deltaFF
+# function will also take care if there is only signal channel and no control channel
+# if there is only signal channel, z-score will be computed using just signal channel
+def execute_controlFit_dff(control, signal, isosbestic_control, filter_window):
+
+    if isosbestic_control == False:
+        signal_smooth = filterSignal(filter_window, signal)  # ss.filtfilt(b, a, signal)
+        control_fit = controlFit(control, signal_smooth)
+        norm_data = deltaFF(signal_smooth, control_fit)
+    else:
+        control_smooth = filterSignal(filter_window, control)  # ss.filtfilt(b, a, control)
+        signal_smooth = filterSignal(filter_window, signal)  # ss.filtfilt(b, a, signal)
+        control_fit = controlFit(control_smooth, signal_smooth)
+        norm_data = deltaFF(signal_smooth, control_fit)
+
+    return norm_data, control_fit
+
+
+# Category: Analysis
+# Reason: Pure mathematical computation - calculates deltaF/F from signal and control arrays with simple formula
+# function to compute deltaF/F using fitted control channel and filtered signal channel
+def deltaFF(signal, control):
+
+    res = np.subtract(signal, control)
+    normData = np.divide(res, control)
+    # deltaFF = normData
+    normData = normData * 100
+
+    return normData
+
+
+# Category: Analysis
+# Reason: Pure algorithmic function - performs polynomial linear regression to fit control to signal
+# function to fit control channel to signal channel
+def controlFit(control, signal):
+
+    p = np.polyfit(control, signal, 1)
+    arr = (p[0] * control) + p[1]
+    return arr
+
+
+# Category: Analysis
+# Reason: Pure signal processing function - applies moving average filter using scipy filtfilt
+def filterSignal(filter_window, signal):
+    if filter_window == 0:
+        return signal
+    elif filter_window > 1:
+        b = np.divide(np.ones((filter_window,)), filter_window)
+        a = 1
+        filtered_signal = ss.filtfilt(b, a, signal)
+        return filtered_signal
+    else:
+        raise Exception("Moving average filter window value is not correct.")
+
+
+# Category: Analysis
+# Reason: Pure statistical computation - calculates z-score from deltaF/F using different methods (standard, baseline, robust)
+# function to compute z-score based on z-score computation method
+def z_score_computation(dff, timestamps, inputParameters):
+
+    zscore_method = inputParameters["zscore_method"]
+    baseline_start, baseline_end = inputParameters["baselineWindowStart"], inputParameters["baselineWindowEnd"]
+
+    if zscore_method == "standard z-score":
+        numerator = np.subtract(dff, np.nanmean(dff))
+        zscore = np.divide(numerator, np.nanstd(dff))
+    elif zscore_method == "baseline z-score":
+        idx = np.where((timestamps > baseline_start) & (timestamps < baseline_end))[0]
+        if idx.shape[0] == 0:
+            logger.error(
+                "Baseline Window Parameters for baseline z-score computation zscore_method \
+							are not correct."
+            )
+            raise Exception(
+                "Baseline Window Parameters for baseline z-score computation zscore_method \
+							are not correct."
+            )
+        else:
+            baseline_mean = np.nanmean(dff[idx])
+            baseline_std = np.nanstd(dff[idx])
+            numerator = np.subtract(dff, baseline_mean)
+            zscore = np.divide(numerator, baseline_std)
+    else:
+        median = np.median(dff)
+        mad = np.median(np.abs(dff - median))
+        numerator = 0.6745 * (dff - median)
+        zscore = np.divide(numerator, mad)
+
+    return zscore
diff --git a/src/guppy/preprocess.py b/src/guppy/preprocess.py
index 69616d9..78f046a 100755
--- a/src/guppy/preprocess.py
+++ b/src/guppy/preprocess.py
@@ -2,34 +2,31 @@
 import json
 import logging
 import os
-import shutil
 import sys
 
 import matplotlib.pyplot as plt
 import numpy as np
-import pandas as pd
-
-from .analysis.analysis import (
-    addingNaNValues,
-    check_cntrl_sig_length,
-    eliminateData,
-    eliminateTs,
-    execute_controlFit_dff,
-    helper_create_control_channel,
-    removeTTLs,
-    z_score_computation,
+
+from .analysis.artifact_removal import (
+    addingNaNtoChunksWithArtifacts,
+    processTimestampsForArtifacts,
 )
+from .analysis.combine_data import combineData
 from .analysis.io_utils import (
+    check_storeslistfile,
     check_TDT,
-    decide_naming_convention,
-    fetchCoords,
     find_files,
-    get_all_stores_for_combining_data,
     read_hdf5,
     takeOnlyDirs,
-    write_hdf5,
+)  # Necessary for other modules that depend on preprocess.py
+from .analysis.timestamp_correction import (
+    add_control_channel,
+    create_control_channel,
+    decide_naming_convention_and_applyCorrection,
+    timestampCorrection_csv,
+    timestampCorrection_tdt,
 )
-from .combineDataFn import processTimestampsForCombiningData
+from .analysis.z_score import compute_z_score
 
 logger = logging.getLogger(__name__)
 
@@ -45,271 +42,6 @@ def writeToFile(value: str):
         file.write(value)
 
 
-# Category: Routing
-# Reason: Orchestrates reading HDF5 files, calling helper_create_control_channel, and writing results - coordinates I/O with computation
-# main function to create control channel using
-# signal channel and save it to a file
-def create_control_channel(filepath, arr, window=5001):
-
-    storenames = arr[0, :]
-    storesList = arr[1, :]
-
-    for i in range(storesList.shape[0]):
-        event_name, event = storesList[i], storenames[i]
-        if "control" in event_name.lower() and "cntrl" in event.lower():
-            logger.debug("Creating control channel from signal channel using curve-fitting")
-            name = event_name.split("_")[-1]
-            signal = read_hdf5("signal_" + name, filepath, "data")
-            timestampNew = read_hdf5("timeCorrection_" + name, filepath, "timestampNew")
-            sampling_rate = np.full(timestampNew.shape, np.nan)
-            sampling_rate[0] = read_hdf5("timeCorrection_" + name, filepath, "sampling_rate")[0]
-
-            control = helper_create_control_channel(signal, timestampNew, window)
-
-            write_hdf5(control, event_name, filepath, "data")
-            d = {"timestamps": timestampNew, "data": control, "sampling_rate": sampling_rate}
-            df = pd.DataFrame(d)
-            df.to_csv(os.path.join(os.path.dirname(filepath), event.lower() + ".csv"), index=False)
-            logger.info("Control channel from signal channel created using curve-fitting")
-
-
-# Category: Routing
-# Reason: Orchestrates validation logic, file copying, and storesList updates - coordinates multiple operations and file manipulations
-# function to add control channel when there is no
-# isosbestic control channel and update the storeslist file
-def add_control_channel(filepath, arr):
-
-    storenames = arr[0, :]
-    storesList = np.char.lower(arr[1, :])
-
-    keep_control = np.array([])
-    # check a case if there is isosbestic control channel present
-    for i in range(storesList.shape[0]):
-        if "control" in storesList[i].lower():
-            name = storesList[i].split("_")[-1]
-            new_str = "signal_" + str(name).lower()
-            find_signal = [True for i in storesList if i == new_str]
-            if len(find_signal) > 1:
-                logger.error("Error in naming convention of files or Error in storesList file")
-                raise Exception("Error in naming convention of files or Error in storesList file")
-            if len(find_signal) == 0:
-                logger.error(
-                    "Isosbectic control channel parameter is set to False and still \
-							 	 storeslist file shows there is control channel present"
-                )
-                raise Exception(
-                    "Isosbectic control channel parameter is set to False and still \
-							 	 storeslist file shows there is control channel present"
-                )
-        else:
-            continue
-
-    for i in range(storesList.shape[0]):
-        if "signal" in storesList[i].lower():
-            name = storesList[i].split("_")[-1]
-            new_str = "control_" + str(name).lower()
-            find_signal = [True for i in storesList if i == new_str]
-            if len(find_signal) == 0:
-                src, dst = os.path.join(filepath, arr[0, i] + ".hdf5"), os.path.join(
-                    filepath, "cntrl" + str(i) + ".hdf5"
-                )
-                shutil.copyfile(src, dst)
-                arr = np.concatenate((arr, [["cntrl" + str(i)], ["control_" + str(arr[1, i].split("_")[-1])]]), axis=1)
-
-    np.savetxt(os.path.join(filepath, "storesList.csv"), arr, delimiter=",", fmt="%s")
-
-    return arr
-
-
-# Category: Routing
-# Reason: Orchestrates timestamp correction workflow - loops through stores, coordinates reading/writing, calls validation and correction logic
-# function to correct timestamps after eliminating first few seconds of the data (for csv data)
-def timestampCorrection_csv(filepath, timeForLightsTurnOn, storesList):
-
-    logger.debug(
-        f"Correcting timestamps by getting rid of the first {timeForLightsTurnOn} seconds and convert timestamps to seconds"
-    )
-    storenames = storesList[0, :]
-    storesList = storesList[1, :]
-
-    arr = []
-    for i in range(storesList.shape[0]):
-        if "control" in storesList[i].lower() or "signal" in storesList[i].lower():
-            arr.append(storesList[i])
-
-    arr = sorted(arr, key=str.casefold)
-    try:
-        arr = np.asarray(arr).reshape(2, -1)
-    except:
-        logger.error("Error in saving stores list file or spelling mistake for control or signal")
-        raise Exception("Error in saving stores list file or spelling mistake for control or signal")
-
-    indices = check_cntrl_sig_length(filepath, arr, storenames, storesList)
-
-    for i in range(arr.shape[1]):
-        name_1 = arr[0, i].split("_")[-1]
-        name_2 = arr[1, i].split("_")[-1]
-        # dirname = os.path.dirname(path[i])
-        idx = np.where(storesList == indices[i])[0]
-
-        if idx.shape[0] == 0:
-            logger.error(f"{arr[0,i]} does not exist in the stores list file.")
-            raise Exception("{} does not exist in the stores list file.".format(arr[0, i]))
-
-        timestamp = read_hdf5(storenames[idx][0], filepath, "timestamps")
-        sampling_rate = read_hdf5(storenames[idx][0], filepath, "sampling_rate")
-
-        if name_1 == name_2:
-            correctionIndex = np.where(timestamp >= timeForLightsTurnOn)[0]
-            timestampNew = timestamp[correctionIndex]
-            write_hdf5(timestampNew, "timeCorrection_" + name_1, filepath, "timestampNew")
-            write_hdf5(correctionIndex, "timeCorrection_" + name_1, filepath, "correctionIndex")
-            write_hdf5(np.asarray(sampling_rate), "timeCorrection_" + name_1, filepath, "sampling_rate")
-
-        else:
-            logger.error("Error in naming convention of files or Error in storesList file")
-            raise Exception("Error in naming convention of files or Error in storesList file")
-
-    logger.info("Timestamps corrected and converted to seconds.")
-
-
-# Category: Routing
-# Reason: Orchestrates timestamp correction workflow for TDT format - loops through stores, coordinates timestamp expansion algorithm with I/O
-# function to correct timestamps after eliminating first few seconds of the data (for TDT data)
-def timestampCorrection_tdt(filepath, timeForLightsTurnOn, storesList):
-
-    logger.debug(
-        f"Correcting timestamps by getting rid of the first {timeForLightsTurnOn} seconds and convert timestamps to seconds"
-    )
-    storenames = storesList[0, :]
-    storesList = storesList[1, :]
-
-    arr = []
-    for i in range(storesList.shape[0]):
-        if "control" in storesList[i].lower() or "signal" in storesList[i].lower():
-            arr.append(storesList[i])
-
-    arr = sorted(arr, key=str.casefold)
-
-    try:
-        arr = np.asarray(arr).reshape(2, -1)
-    except:
-        logger.error("Error in saving stores list file or spelling mistake for control or signal")
-        raise Exception("Error in saving stores list file or spelling mistake for control or signal")
-
-    indices = check_cntrl_sig_length(filepath, arr, storenames, storesList)
-
-    for i in range(arr.shape[1]):
-        name_1 = arr[0, i].split("_")[-1]
-        name_2 = arr[1, i].split("_")[-1]
-        # dirname = os.path.dirname(path[i])
-        idx = np.where(storesList == indices[i])[0]
-
-        if idx.shape[0] == 0:
-            logger.error(f"{arr[0,i]} does not exist in the stores list file.")
-            raise Exception("{} does not exist in the stores list file.".format(arr[0, i]))
-
-        timestamp = read_hdf5(storenames[idx][0], filepath, "timestamps")
-        npoints = read_hdf5(storenames[idx][0], filepath, "npoints")
-        sampling_rate = read_hdf5(storenames[idx][0], filepath, "sampling_rate")
-
-        if name_1 == name_2:
-            timeRecStart = timestamp[0]
-            timestamps = np.subtract(timestamp, timeRecStart)
-            adder = np.arange(npoints) / sampling_rate
-            lengthAdder = adder.shape[0]
-            timestampNew = np.zeros((len(timestamps), lengthAdder))
-            for i in range(lengthAdder):
-                timestampNew[:, i] = np.add(timestamps, adder[i])
-            timestampNew = (timestampNew.T).reshape(-1, order="F")
-            correctionIndex = np.where(timestampNew >= timeForLightsTurnOn)[0]
-            timestampNew = timestampNew[correctionIndex]
-
-            write_hdf5(np.asarray([timeRecStart]), "timeCorrection_" + name_1, filepath, "timeRecStart")
-            write_hdf5(timestampNew, "timeCorrection_" + name_1, filepath, "timestampNew")
-            write_hdf5(correctionIndex, "timeCorrection_" + name_1, filepath, "correctionIndex")
-            write_hdf5(np.asarray([sampling_rate]), "timeCorrection_" + name_1, filepath, "sampling_rate")
-        else:
-            logger.error("Error in naming convention of files or Error in storesList file")
-            raise Exception("Error in naming convention of files or Error in storesList file")
-
-    logger.info("Timestamps corrected and converted to seconds.")
-    # return timeRecStart, correctionIndex, timestampNew
-
-
-# Category: Routing
-# Reason: Orchestrates applying timestamp corrections - reads correction indices, applies different logic based on data type, writes results
-# function to apply correction to control, signal and event timestamps
-def applyCorrection(filepath, timeForLightsTurnOn, event, displayName, naming):
-
-    cond = check_TDT(os.path.dirname(filepath))
-
-    if cond == True:
-        timeRecStart = read_hdf5("timeCorrection_" + naming, filepath, "timeRecStart")[0]
-
-    timestampNew = read_hdf5("timeCorrection_" + naming, filepath, "timestampNew")
-    correctionIndex = read_hdf5("timeCorrection_" + naming, filepath, "correctionIndex")
-
-    if "control" in displayName.lower() or "signal" in displayName.lower():
-        split_name = displayName.split("_")[-1]
-        if split_name == naming:
-            pass
-        else:
-            correctionIndex = read_hdf5("timeCorrection_" + split_name, filepath, "correctionIndex")
-        arr = read_hdf5(event, filepath, "data")
-        if (arr == 0).all() == True:
-            arr = arr
-        else:
-            arr = arr[correctionIndex]
-        write_hdf5(arr, displayName, filepath, "data")
-    else:
-        arr = read_hdf5(event, filepath, "timestamps")
-        if cond == True:
-            res = (arr >= timeRecStart).all()
-            if res == True:
-                arr = np.subtract(arr, timeRecStart)
-                arr = np.subtract(arr, timeForLightsTurnOn)
-            else:
-                arr = np.subtract(arr, timeForLightsTurnOn)
-        else:
-            arr = np.subtract(arr, timeForLightsTurnOn)
-        write_hdf5(arr, displayName + "_" + naming, filepath, "ts")
-
-    # if isosbestic_control==False and 'control' in displayName.lower():
-    # 	control = create_control_channel(filepath, displayName)
-    # 	write_hdf5(control, displayName, filepath, 'data')
-
-
-# Category: Routing
-# Reason: Orchestrates naming validation and correction application - loops through channel pairs and delegates to applyCorrection
-# function to check if naming convention was followed while saving storeslist file
-# and apply timestamps correction using the function applyCorrection
-def decide_naming_convention_and_applyCorrection(filepath, timeForLightsTurnOn, event, displayName, storesList):
-
-    logger.debug("Applying correction of timestamps to the data and event timestamps")
-    storesList = storesList[1, :]
-
-    arr = []
-    for i in range(storesList.shape[0]):
-        if "control" in storesList[i].lower() or "signal" in storesList[i].lower():
-            arr.append(storesList[i])
-
-    arr = sorted(arr, key=str.casefold)
-    arr = np.asarray(arr).reshape(2, -1)
-
-    for i in range(arr.shape[1]):
-        name_1 = arr[0, i].split("_")[-1]
-        name_2 = arr[1, i].split("_")[-1]
-        # dirname = os.path.dirname(path[i])
-        if name_1 == name_2:
-            applyCorrection(filepath, timeForLightsTurnOn, event, displayName, name_1)
-        else:
-            logger.error("Error in naming convention of files or Error in storesList file")
-            raise Exception("Error in naming convention of files or Error in storesList file")
-
-    logger.info("Timestamps corrections applied to the data and event timestamps.")
-
-
 # Category: Visualization/User Input
 # Reason: Creates matplotlib plots to display z-score results - pure visualization with no computation
 # function to plot z_score
@@ -485,211 +217,6 @@ def visualizeControlAndSignal(filepath, removeArtifacts):
         visualize(filepath, ts, control, signal, cntrl_sig_fit, plot_name, removeArtifacts)
 
 
-# Category: Routing
-# Reason: Orchestrates NaN replacement for all stores - loops through channels and coordinates calls to addingNaNValues and removeTTLs
-def addingNaNtoChunksWithArtifacts(filepath, events):
-
-    logger.debug("Replacing chunks with artifacts by NaN values.")
-    storesList = events[1, :]
-
-    path = decide_naming_convention(filepath)
-
-    for j in range(path.shape[1]):
-        name_1 = ((os.path.basename(path[0, j])).split(".")[0]).split("_")
-        name_2 = ((os.path.basename(path[1, j])).split(".")[0]).split("_")
-        # dirname = os.path.dirname(path[i])
-        if name_1[-1] == name_2[-1]:
-            name = name_1[-1]
-            sampling_rate = read_hdf5("timeCorrection_" + name, filepath, "sampling_rate")[0]
-            for i in range(len(storesList)):
-                if (
-                    "control_" + name.lower() in storesList[i].lower()
-                    or "signal_" + name.lower() in storesList[i].lower()
-                ):  # changes done
-                    data = addingNaNValues(filepath, storesList[i], name)
-                    write_hdf5(data, storesList[i], filepath, "data")
-                else:
-                    if "control" in storesList[i].lower() or "signal" in storesList[i].lower():
-                        continue
-                    else:
-                        ts = removeTTLs(filepath, storesList[i], name)
-                        write_hdf5(ts, storesList[i] + "_" + name, filepath, "ts")
-
-        else:
-            logger.error("Error in naming convention of files or Error in storesList file")
-            raise Exception("Error in naming convention of files or Error in storesList file")
-    logger.info("Chunks with artifacts are replaced by NaN values.")
-
-
-# Category: Routing
-# Reason: Orchestrates timestamp concatenation for artifact removal - loops through stores, coordinates eliminateData/eliminateTs calls and writes results
-# main function to align timestamps for control, signal and event timestamps for artifacts removal
-def processTimestampsForArtifacts(filepath, timeForLightsTurnOn, events):
-
-    logger.debug("Processing timestamps to get rid of artifacts using concatenate method...")
-    storesList = events[1, :]
-
-    path = decide_naming_convention(filepath)
-
-    timestamp_dict = dict()
-    for j in range(path.shape[1]):
-        name_1 = ((os.path.basename(path[0, j])).split(".")[0]).split("_")
-        name_2 = ((os.path.basename(path[1, j])).split(".")[0]).split("_")
-        # dirname = os.path.dirname(path[i])
-        if name_1[-1] == name_2[-1]:
-            name = name_1[-1]
-            sampling_rate = read_hdf5("timeCorrection_" + name, filepath, "sampling_rate")[0]
-
-            for i in range(len(storesList)):
-                if (
-                    "control_" + name.lower() in storesList[i].lower()
-                    or "signal_" + name.lower() in storesList[i].lower()
-                ):  # changes done
-                    data, timestampNew = eliminateData(
-                        filepath, timeForLightsTurnOn, storesList[i], sampling_rate, name
-                    )
-                    write_hdf5(data, storesList[i], filepath, "data")
-                else:
-                    if "control" in storesList[i].lower() or "signal" in storesList[i].lower():
-                        continue
-                    else:
-                        ts = eliminateTs(filepath, timeForLightsTurnOn, storesList[i], sampling_rate, name)
-                        write_hdf5(ts, storesList[i] + "_" + name, filepath, "ts")
-
-            # timestamp_dict[name] = timestampNew
-            write_hdf5(timestampNew, "timeCorrection_" + name, filepath, "timestampNew")
-        else:
-            logger.error("Error in naming convention of files or Error in storesList file")
-            raise Exception("Error in naming convention of files or Error in storesList file")
-    logger.info("Timestamps processed, artifacts are removed and good chunks are concatenated.")
-
-
-# Category: Routing
-# Reason: Orchestrates z-score computation for one channel - handles artifact removal logic, coordinates calls to execute_controlFit_dff and z_score_computation
-# helper function to compute z-score and deltaF/F
-def helper_z_score(control, signal, filepath, name, inputParameters):  # helper_z_score(control_smooth, signal_smooth):
-
-    removeArtifacts = inputParameters["removeArtifacts"]
-    artifactsRemovalMethod = inputParameters["artifactsRemovalMethod"]
-    filter_window = inputParameters["filter_window"]
-
-    isosbestic_control = inputParameters["isosbestic_control"]
-    tsNew = read_hdf5("timeCorrection_" + name, filepath, "timestampNew")
-    coords_path = os.path.join(filepath, "coordsForPreProcessing_" + name + ".npy")
-
-    logger.info("Remove Artifacts : ", removeArtifacts)
-
-    if (control == 0).all() == True:
-        control = np.zeros(tsNew.shape[0])
-
-    z_score_arr = np.array([])
-    norm_data_arr = np.full(tsNew.shape[0], np.nan)
-    control_fit_arr = np.full(tsNew.shape[0], np.nan)
-    temp_control_arr = np.full(tsNew.shape[0], np.nan)
-
-    if removeArtifacts == True:
-        coords = fetchCoords(filepath, name, tsNew)
-
-        # for artifacts removal, each chunk which was selected by user is being processed individually and then
-        # z-score is calculated
-        for i in range(coords.shape[0]):
-            tsNew_index = np.where((tsNew > coords[i, 0]) & (tsNew < coords[i, 1]))[0]
-            if isosbestic_control == False:
-                control_arr = helper_create_control_channel(signal[tsNew_index], tsNew[tsNew_index], window=101)
-                signal_arr = signal[tsNew_index]
-                norm_data, control_fit = execute_controlFit_dff(
-                    control_arr, signal_arr, isosbestic_control, filter_window
-                )
-                temp_control_arr[tsNew_index] = control_arr
-                if i < coords.shape[0] - 1:
-                    blank_index = np.where((tsNew > coords[i, 1]) & (tsNew < coords[i + 1, 0]))[0]
-                    temp_control_arr[blank_index] = np.full(blank_index.shape[0], np.nan)
-            else:
-                control_arr = control[tsNew_index]
-                signal_arr = signal[tsNew_index]
-                norm_data, control_fit = execute_controlFit_dff(
-                    control_arr, signal_arr, isosbestic_control, filter_window
-                )
-            norm_data_arr[tsNew_index] = norm_data
-            control_fit_arr[tsNew_index] = control_fit
-
-        if artifactsRemovalMethod == "concatenate":
-            norm_data_arr = norm_data_arr[~np.isnan(norm_data_arr)]
-            control_fit_arr = control_fit_arr[~np.isnan(control_fit_arr)]
-        z_score = z_score_computation(norm_data_arr, tsNew, inputParameters)
-        z_score_arr = np.concatenate((z_score_arr, z_score))
-    else:
-        tsNew_index = np.arange(tsNew.shape[0])
-        norm_data, control_fit = execute_controlFit_dff(control, signal, isosbestic_control, filter_window)
-        z_score = z_score_computation(norm_data, tsNew, inputParameters)
-        z_score_arr = np.concatenate((z_score_arr, z_score))
-        norm_data_arr[tsNew_index] = norm_data  # np.concatenate((norm_data_arr, norm_data))
-        control_fit_arr[tsNew_index] = control_fit  # np.concatenate((control_fit_arr, control_fit))
-
-    # handle the case if there are chunks being cut in the front and the end
-    if isosbestic_control == False and removeArtifacts == True:
-        coords = coords.flatten()
-        # front chunk
-        idx = np.where((tsNew >= tsNew[0]) & (tsNew < coords[0]))[0]
-        temp_control_arr[idx] = np.full(idx.shape[0], np.nan)
-        # end chunk
-        idx = np.where((tsNew > coords[-1]) & (tsNew <= tsNew[-1]))[0]
-        temp_control_arr[idx] = np.full(idx.shape[0], np.nan)
-        write_hdf5(temp_control_arr, "control_" + name, filepath, "data")
-
-    return z_score_arr, norm_data_arr, control_fit_arr
-
-
-# Category: Routing
-# Reason: Orchestrates z-score computation for all channels in a session - loops through control/signal pairs, calls helper_z_score, writes results
-# compute z-score and deltaF/F and save it to hdf5 file
-def compute_z_score(filepath, inputParameters):
-
-    logger.debug(f"Computing z-score for each of the data in {filepath}")
-    remove_artifacts = inputParameters["removeArtifacts"]
-
-    path_1 = find_files(filepath, "control_*", ignore_case=True)  # glob.glob(os.path.join(filepath, 'control*'))
-    path_2 = find_files(filepath, "signal_*", ignore_case=True)  # glob.glob(os.path.join(filepath, 'signal*'))
-
-    path = sorted(path_1 + path_2, key=str.casefold)
-
-    b = np.divide(np.ones((100,)), 100)
-    a = 1
-
-    if len(path) % 2 != 0:
-        logger.error("There are not equal number of Control and Signal data")
-        raise Exception("There are not equal number of Control and Signal data")
-
-    path = np.asarray(path).reshape(2, -1)
-
-    for i in range(path.shape[1]):
-        name_1 = ((os.path.basename(path[0, i])).split(".")[0]).split("_")
-        name_2 = ((os.path.basename(path[1, i])).split(".")[0]).split("_")
-        # dirname = os.path.dirname(path[i])
-
-        if name_1[-1] == name_2[-1]:
-            name = name_1[-1]
-            control = read_hdf5("", path[0, i], "data").reshape(-1)
-            signal = read_hdf5("", path[1, i], "data").reshape(-1)
-            # control_smooth = ss.filtfilt(b, a, control)
-            # signal_smooth = ss.filtfilt(b, a, signal)
-            # _score, dff = helper_z_score(control_smooth, signal_smooth)
-            z_score, dff, control_fit = helper_z_score(control, signal, filepath, name, inputParameters)
-            if remove_artifacts == True:
-                write_hdf5(z_score, "z_score_" + name, filepath, "data")
-                write_hdf5(dff, "dff_" + name, filepath, "data")
-                write_hdf5(control_fit, "cntrl_sig_fit_" + name, filepath, "data")
-            else:
-                write_hdf5(z_score, "z_score_" + name, filepath, "data")
-                write_hdf5(dff, "dff_" + name, filepath, "data")
-                write_hdf5(control_fit, "cntrl_sig_fit_" + name, filepath, "data")
-        else:
-            logger.error("Error in naming convention of files or Error in storesList file")
-            raise Exception("Error in naming convention of files or Error in storesList file")
-
-    logger.info(f"z-score for the data in {filepath} computed.")
-
-
 # Category: Routing
 # Reason: Top-level orchestrator for timestamp correction across all sessions - loops through folders, coordinates timestamp correction workflow
 # function to execute timestamps corrections using functions timestampCorrection and decide_naming_convention_and_applyCorrection
@@ -731,75 +258,6 @@ def execute_timestamp_correction(folderNames, inputParameters):
         logger.info(f"Timestamps corrections finished for {filepath}")
 
 
-# Category: Routing
-# Reason: Orchestrates reading and merging storeslist files from multiple sessions - loops through folders and consolidates results
-# for combining data, reading storeslist file from both data and create a new storeslist array
-def check_storeslistfile(folderNames):
-    storesList = np.array([[], []])
-    for i in range(len(folderNames)):
-        filepath = folderNames[i]
-        storesListPath = takeOnlyDirs(glob.glob(os.path.join(filepath, "*_output_*")))
-        for j in range(len(storesListPath)):
-            filepath = storesListPath[j]
-            storesList = np.concatenate(
-                (
-                    storesList,
-                    np.genfromtxt(os.path.join(filepath, "storesList.csv"), dtype="str", delimiter=",").reshape(2, -1),
-                ),
-                axis=1,
-            )
-
-    storesList = np.unique(storesList, axis=1)
-
-    return storesList
-
-
-# Category: Routing
-# Reason: Orchestrates data combination workflow - validates sampling rates, coordinates processTimestampsForCombiningData, manages multi-session I/O
-# function to combine data when there are two different data files for the same recording session
-# it will combine the data, do timestamps processing and save the combined data in the first output folder.
-def combineData(folderNames, inputParameters, storesList):
-
-    logger.debug("Combining Data from different data files...")
-    timeForLightsTurnOn = inputParameters["timeForLightsTurnOn"]
-    op_folder = []
-    for i in range(len(folderNames)):
-        filepath = folderNames[i]
-        op_folder.append(takeOnlyDirs(glob.glob(os.path.join(filepath, "*_output_*"))))
-
-    op_folder = list(np.concatenate(op_folder).flatten())
-    sampling_rate_fp = []
-    for i in range(len(folderNames)):
-        filepath = folderNames[i]
-        storesListPath = takeOnlyDirs(glob.glob(os.path.join(filepath, "*_output_*")))
-        for j in range(len(storesListPath)):
-            filepath = storesListPath[j]
-            storesList_new = np.genfromtxt(
-                os.path.join(filepath, "storesList.csv"), dtype="str", delimiter=","
-            ).reshape(2, -1)
-            sampling_rate_fp.append(glob.glob(os.path.join(filepath, "timeCorrection_*")))
-
-    # check if sampling rate is same for both data
-    sampling_rate_fp = np.concatenate(sampling_rate_fp)
-    sampling_rate = []
-    for i in range(sampling_rate_fp.shape[0]):
-        sampling_rate.append(read_hdf5("", sampling_rate_fp[i], "sampling_rate"))
-
-    res = all(i == sampling_rate[0] for i in sampling_rate)
-    if res == False:
-        logger.error("To combine the data, sampling rate for both the data should be same.")
-        raise Exception("To combine the data, sampling rate for both the data should be same.")
-
-    # get the output folders informatinos
-    op = get_all_stores_for_combining_data(op_folder)
-
-    # processing timestamps for combining the data
-    processTimestampsForCombiningData(op, timeForLightsTurnOn, storesList, sampling_rate[0])
-    logger.info("Data is combined from different data files.")
-
-    return op
-
-
 # Category: Routing
 # Reason: Top-level orchestrator for z-score computation and artifact removal - coordinates compute_z_score, artifact processing, and visualization calls
 # function to compute z-score and deltaF/F using functions : compute_z_score and/or processTimestampsForArtifacts

From 29d5f9ac7f700957e2c0171e835c5201edb53442 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Fri, 5 Dec 2025 13:36:21 -0800
Subject: [PATCH 063/150] Removed categorization comments.

---
 src/guppy/analysis/artifact_removal.py     | 12 ------------
 src/guppy/analysis/combine_data.py         |  2 --
 src/guppy/analysis/control_channel.py      |  2 --
 src/guppy/analysis/io_utils.py             | 18 ------------------
 src/guppy/analysis/timestamp_correction.py | 14 --------------
 src/guppy/analysis/z_score.py              | 14 --------------
 src/guppy/preprocess.py                    | 18 ------------------
 7 files changed, 80 deletions(-)

diff --git a/src/guppy/analysis/artifact_removal.py b/src/guppy/analysis/artifact_removal.py
index 3c51830..ac483bb 100644
--- a/src/guppy/analysis/artifact_removal.py
+++ b/src/guppy/analysis/artifact_removal.py
@@ -13,8 +13,6 @@
 logger = logging.getLogger(__name__)
 
 
-# Category: Routing
-# Reason: Orchestrates NaN replacement for all stores - loops through channels and coordinates calls to addingNaNValues and removeTTLs
 def addingNaNtoChunksWithArtifacts(filepath, events):
 
     logger.debug("Replacing chunks with artifacts by NaN values.")
@@ -49,8 +47,6 @@ def addingNaNtoChunksWithArtifacts(filepath, events):
     logger.info("Chunks with artifacts are replaced by NaN values.")
 
 
-# Category: Routing
-# Reason: Orchestrates timestamp concatenation for artifact removal - loops through stores, coordinates eliminateData/eliminateTs calls and writes results
 # main function to align timestamps for control, signal and event timestamps for artifacts removal
 def processTimestampsForArtifacts(filepath, timeForLightsTurnOn, events):
 
@@ -92,8 +88,6 @@ def processTimestampsForArtifacts(filepath, timeForLightsTurnOn, events):
     logger.info("Timestamps processed, artifacts are removed and good chunks are concatenated.")
 
 
-# Category: Analysis
-# Reason: Pure algorithmic function - concatenates data chunks based on coordinate boundaries, adjusts timestamps mathematically
 # helper function to process control and signal timestamps
 def eliminateData(filepath, timeForLightsTurnOn, event, sampling_rate, naming):
 
@@ -127,8 +121,6 @@ def eliminateData(filepath, timeForLightsTurnOn, event, sampling_rate, naming):
     return arr, ts_arr
 
 
-# Category: Analysis
-# Reason: Pure algorithmic function - processes event timestamps based on coordinate boundaries, aligns with data timeline
 # helper function to align event timestamps with the control and signal timestamps
 def eliminateTs(filepath, timeForLightsTurnOn, event, sampling_rate, naming):
 
@@ -157,8 +149,6 @@ def eliminateTs(filepath, timeForLightsTurnOn, event, sampling_rate, naming):
     return ts_arr
 
 
-# Category: Analysis
-# Reason: Pure algorithmic function - replaces specified data chunks with NaN based on coordinate boundaries
 # adding nan values to removed chunks
 # when using artifacts removal method - replace with NaN
 def addingNaNValues(filepath, event, naming):
@@ -183,8 +173,6 @@ def addingNaNValues(filepath, event, naming):
     return data
 
 
-# Category: Analysis
-# Reason: Pure algorithmic function - filters event timestamps to exclude artifact regions based on coordinates
 # remove event TTLs which falls in the removed chunks
 # when using artifacts removal method - replace with NaN
 def removeTTLs(filepath, event, naming):
diff --git a/src/guppy/analysis/combine_data.py b/src/guppy/analysis/combine_data.py
index 29e4b9d..d8f0ce6 100644
--- a/src/guppy/analysis/combine_data.py
+++ b/src/guppy/analysis/combine_data.py
@@ -17,8 +17,6 @@
 logger = logging.getLogger(__name__)
 
 
-# Category: Routing
-# Reason: Orchestrates data combination workflow - validates sampling rates, coordinates processTimestampsForCombiningData, manages multi-session I/O
 # function to combine data when there are two different data files for the same recording session
 # it will combine the data, do timestamps processing and save the combined data in the first output folder.
 def combineData(folderNames, inputParameters, storesList):
diff --git a/src/guppy/analysis/control_channel.py b/src/guppy/analysis/control_channel.py
index 96665f2..2da82e2 100644
--- a/src/guppy/analysis/control_channel.py
+++ b/src/guppy/analysis/control_channel.py
@@ -35,8 +35,6 @@ def helper_create_control_channel(signal, timestamps, window):
     return control
 
 
-# Category: Analysis
-# Reason: Pure mathematical function for exponential curve fitting - no dependencies, pure computation
 # curve fit exponential function
 def curveFitFn(x, a, b, c):
     return a + (b * np.exp(-(1 / c) * x))
diff --git a/src/guppy/analysis/io_utils.py b/src/guppy/analysis/io_utils.py
index 999c190..8b10127 100644
--- a/src/guppy/analysis/io_utils.py
+++ b/src/guppy/analysis/io_utils.py
@@ -10,8 +10,6 @@
 logger = logging.getLogger(__name__)
 
 
-# Category: Analysis
-# Reason: Utility function for path filtering - pure data transformation with no GUI or orchestration
 def takeOnlyDirs(paths):
     removePaths = []
     for p in paths:
@@ -20,8 +18,6 @@ def takeOnlyDirs(paths):
     return list(set(paths) - set(removePaths))
 
 
-# Category: Analysis
-# Reason: File system utility for case-insensitive file discovery - pure I/O helper with no orchestration
 # find files by ignoring the case sensitivity
 def find_files(path, glob_path, ignore_case=False):
     rule = (
@@ -42,8 +38,6 @@ def find_files(path, glob_path, ignore_case=False):
     return [os.path.join(path, n) for n in str_path if rule.match(n)]
 
 
-# Category: Analysis
-# Reason: Simple file type detection utility - pure file system check with no orchestration
 # check if dealing with TDT files or csv files
 def check_TDT(filepath):
     path = glob.glob(os.path.join(filepath, "*.tsq"))
@@ -53,8 +47,6 @@ def check_TDT(filepath):
         return False
 
 
-# Category: Analysis
-# Reason: I/O utility function for reading HDF5 files - pure file access with no business logic or orchestration
 # function to read hdf5 file
 def read_hdf5(event, filepath, key):
     if event:
@@ -74,8 +66,6 @@ def read_hdf5(event, filepath, key):
     return arr
 
 
-# Category: Analysis
-# Reason: I/O utility function for writing HDF5 files - pure file access with no business logic or orchestration
 # function to write hdf5 file
 def write_hdf5(data, event, filepath, key):
     event = event.replace("\\", "_")
@@ -108,8 +98,6 @@ def write_hdf5(data, event, filepath, key):
                     f.create_dataset(key, data=data)
 
 
-# Category: Analysis
-# Reason: Validation utility - checks file naming conventions and returns structured path array with no orchestration
 # function to check if the naming convention for saving storeslist file was followed or not
 def decide_naming_convention(filepath):
     path_1 = find_files(filepath, "control_*", ignore_case=True)  # glob.glob(os.path.join(filepath, 'control*'))
@@ -126,8 +114,6 @@ def decide_naming_convention(filepath):
     return path
 
 
-# Category: Analysis
-# Reason: I/O utility that loads artifact coordinates from .npy file or provides default - pure file loading with simple logic
 # function to read coordinates file which was saved by selecting chunks for artifacts removal
 def fetchCoords(filepath, naming, data):
 
@@ -147,8 +133,6 @@ def fetchCoords(filepath, naming, data):
     return coords
 
 
-# Category: Routing
-# Reason: Organizes output folders for data combination - loops through numbered outputs and groups related folders
 def get_all_stores_for_combining_data(folderNames):
     op = []
     for i in range(100):
@@ -163,8 +147,6 @@ def get_all_stores_for_combining_data(folderNames):
     return op
 
 
-# Category: Routing
-# Reason: Orchestrates reading and merging storeslist files from multiple sessions - loops through folders and consolidates results
 # for combining data, reading storeslist file from both data and create a new storeslist array
 def check_storeslistfile(folderNames):
     storesList = np.array([[], []])
diff --git a/src/guppy/analysis/timestamp_correction.py b/src/guppy/analysis/timestamp_correction.py
index 350dd5d..2e3185a 100644
--- a/src/guppy/analysis/timestamp_correction.py
+++ b/src/guppy/analysis/timestamp_correction.py
@@ -15,8 +15,6 @@
 logger = logging.getLogger(__name__)
 
 
-# Category: Routing
-# Reason: Orchestrates validation logic, file copying, and storesList updates - coordinates multiple operations and file manipulations
 # function to add control channel when there is no
 # isosbestic control channel and update the storeslist file
 def add_control_channel(filepath, arr):
@@ -63,8 +61,6 @@ def add_control_channel(filepath, arr):
     return arr
 
 
-# Category: Routing
-# Reason: Orchestrates timestamp correction workflow - loops through stores, coordinates reading/writing, calls validation and correction logic
 # function to correct timestamps after eliminating first few seconds of the data (for csv data)
 def timestampCorrection_csv(filepath, timeForLightsTurnOn, storesList):
 
@@ -115,8 +111,6 @@ def timestampCorrection_csv(filepath, timeForLightsTurnOn, storesList):
     logger.info("Timestamps corrected and converted to seconds.")
 
 
-# Category: Routing
-# Reason: Orchestrates timestamp correction workflow for TDT format - loops through stores, coordinates timestamp expansion algorithm with I/O
 # function to correct timestamps after eliminating first few seconds of the data (for TDT data)
 def timestampCorrection_tdt(filepath, timeForLightsTurnOn, storesList):
 
@@ -179,8 +173,6 @@ def timestampCorrection_tdt(filepath, timeForLightsTurnOn, storesList):
     # return timeRecStart, correctionIndex, timestampNew
 
 
-# Category: Routing
-# Reason: Orchestrates naming validation and correction application - loops through channel pairs and delegates to applyCorrection
 # function to check if naming convention was followed while saving storeslist file
 # and apply timestamps correction using the function applyCorrection
 def decide_naming_convention_and_applyCorrection(filepath, timeForLightsTurnOn, event, displayName, storesList):
@@ -209,8 +201,6 @@ def decide_naming_convention_and_applyCorrection(filepath, timeForLightsTurnOn,
     logger.info("Timestamps corrections applied to the data and event timestamps.")
 
 
-# Category: Routing
-# Reason: Orchestrates applying timestamp corrections - reads correction indices, applies different logic based on data type, writes results
 # function to apply correction to control, signal and event timestamps
 def applyCorrection(filepath, timeForLightsTurnOn, event, displayName, naming):
 
@@ -252,8 +242,6 @@ def applyCorrection(filepath, timeForLightsTurnOn, event, displayName, naming):
     # 	write_hdf5(control, displayName, filepath, 'data')
 
 
-# Category: Routing
-# Reason: Orchestrates reading HDF5 files, calling helper_create_control_channel, and writing results - coordinates I/O with computation
 # main function to create control channel using
 # signal channel and save it to a file
 def create_control_channel(filepath, arr, window=5001):
@@ -280,8 +268,6 @@ def create_control_channel(filepath, arr, window=5001):
             logger.info("Control channel from signal channel created using curve-fitting")
 
 
-# Category: Analysis
-# Reason: Data validation function - compares array lengths and returns indices for processing
 # function to check control and signal channel has same length
 # if not, take a smaller length and do pre-processing
 def check_cntrl_sig_length(filepath, channels_arr, storenames, storesList):
diff --git a/src/guppy/analysis/z_score.py b/src/guppy/analysis/z_score.py
index d8cc1bc..b5032be 100644
--- a/src/guppy/analysis/z_score.py
+++ b/src/guppy/analysis/z_score.py
@@ -15,8 +15,6 @@
 logger = logging.getLogger(__name__)
 
 
-# Category: Routing
-# Reason: Orchestrates z-score computation for all channels in a session - loops through control/signal pairs, calls helper_z_score, writes results
 # compute z-score and deltaF/F and save it to hdf5 file
 def compute_z_score(filepath, inputParameters):
 
@@ -65,8 +63,6 @@ def compute_z_score(filepath, inputParameters):
     logger.info(f"z-score for the data in {filepath} computed.")
 
 
-# Category: Routing
-# Reason: Orchestrates z-score computation for one channel - handles artifact removal logic, coordinates calls to execute_controlFit_dff and z_score_computation
 # helper function to compute z-score and deltaF/F
 def helper_z_score(control, signal, filepath, name, inputParameters):  # helper_z_score(control_smooth, signal_smooth):
 
@@ -141,8 +137,6 @@ def helper_z_score(control, signal, filepath, name, inputParameters):  # helper_
     return z_score_arr, norm_data_arr, control_fit_arr
 
 
-# Category: Routing
-# Reason: Orchestrates signal processing pipeline - sequences calls to filterSignal, controlFit, and deltaFF with conditional logic
 # function to filter control and signal channel, also execute above two function : controlFit and deltaFF
 # function will also take care if there is only signal channel and no control channel
 # if there is only signal channel, z-score will be computed using just signal channel
@@ -161,8 +155,6 @@ def execute_controlFit_dff(control, signal, isosbestic_control, filter_window):
     return norm_data, control_fit
 
 
-# Category: Analysis
-# Reason: Pure mathematical computation - calculates deltaF/F from signal and control arrays with simple formula
 # function to compute deltaF/F using fitted control channel and filtered signal channel
 def deltaFF(signal, control):
 
@@ -174,8 +166,6 @@ def deltaFF(signal, control):
     return normData
 
 
-# Category: Analysis
-# Reason: Pure algorithmic function - performs polynomial linear regression to fit control to signal
 # function to fit control channel to signal channel
 def controlFit(control, signal):
 
@@ -184,8 +174,6 @@ def controlFit(control, signal):
     return arr
 
 
-# Category: Analysis
-# Reason: Pure signal processing function - applies moving average filter using scipy filtfilt
 def filterSignal(filter_window, signal):
     if filter_window == 0:
         return signal
@@ -198,8 +186,6 @@ def filterSignal(filter_window, signal):
         raise Exception("Moving average filter window value is not correct.")
 
 
-# Category: Analysis
-# Reason: Pure statistical computation - calculates z-score from deltaF/F using different methods (standard, baseline, robust)
 # function to compute z-score based on z-score computation method
 def z_score_computation(dff, timestamps, inputParameters):
 
diff --git a/src/guppy/preprocess.py b/src/guppy/preprocess.py
index 78f046a..5ff8de6 100755
--- a/src/guppy/preprocess.py
+++ b/src/guppy/preprocess.py
@@ -35,15 +35,11 @@
     plt.switch_backend("TKAgg")
 
 
-# Category: Visualization/User Input
-# Reason: Writes progress updates to file for GUI progress bar - couples backend to GUI feedback mechanism
 def writeToFile(value: str):
     with open(os.path.join(os.path.expanduser("~"), "pbSteps.txt"), "a") as file:
         file.write(value)
 
 
-# Category: Visualization/User Input
-# Reason: Creates matplotlib plots to display z-score results - pure visualization with no computation
 # function to plot z_score
 def visualize_z_score(filepath):
 
@@ -66,8 +62,6 @@ def visualize_z_score(filepath):
     # plt.show()
 
 
-# Category: Visualization/User Input
-# Reason: Creates matplotlib plots to display deltaF/F results - pure visualization with no computation
 # function to plot deltaF/F
 def visualize_dff(filepath):
     name = os.path.basename(filepath)
@@ -89,8 +83,6 @@ def visualize_dff(filepath):
     # plt.show()
 
 
-# Category: Visualization/User Input
-# Reason: Interactive matplotlib GUI with keyboard event handlers for artifact selection - core user input mechanism that saves coordinates to disk
 def visualize(filepath, x, y1, y2, y3, plot_name, removeArtifacts):
 
     # plotting control and signal data
@@ -180,8 +172,6 @@ def plt_close_event(event):
     # return fig
 
 
-# Category: Visualization/User Input
-# Reason: Orchestrates visualization of all control/signal pairs - reads data and delegates to visualize() for user interaction
 # function to plot control and signal, also provide a feature to select chunks for artifacts removal
 def visualizeControlAndSignal(filepath, removeArtifacts):
     path_1 = find_files(filepath, "control_*", ignore_case=True)  # glob.glob(os.path.join(filepath, 'control*'))
@@ -217,8 +207,6 @@ def visualizeControlAndSignal(filepath, removeArtifacts):
         visualize(filepath, ts, control, signal, cntrl_sig_fit, plot_name, removeArtifacts)
 
 
-# Category: Routing
-# Reason: Top-level orchestrator for timestamp correction across all sessions - loops through folders, coordinates timestamp correction workflow
 # function to execute timestamps corrections using functions timestampCorrection and decide_naming_convention_and_applyCorrection
 def execute_timestamp_correction(folderNames, inputParameters):
 
@@ -258,8 +246,6 @@ def execute_timestamp_correction(folderNames, inputParameters):
         logger.info(f"Timestamps corrections finished for {filepath}")
 
 
-# Category: Routing
-# Reason: Top-level orchestrator for z-score computation and artifact removal - coordinates compute_z_score, artifact processing, and visualization calls
 # function to compute z-score and deltaF/F using functions : compute_z_score and/or processTimestampsForArtifacts
 def execute_zscore(folderNames, inputParameters):
 
@@ -312,8 +298,6 @@ def execute_zscore(folderNames, inputParameters):
     logger.info("Signal data and event timestamps are extracted.")
 
 
-# Category: Routing
-# Reason: Main entry point for Step 4 - orchestrates entire preprocessing workflow including timestamp correction, data combination, and z-score computation
 def extractTsAndSignal(inputParameters):
 
     logger.debug("Extracting signal data and event timestamps...")
@@ -351,8 +335,6 @@ def extractTsAndSignal(inputParameters):
         execute_zscore(op_folder, inputParameters)
 
 
-# Category: Routing
-# Reason: Top-level entry point wrapper - handles error catching and calls extractTsAndSignal
 def main(input_parameters):
     try:
         extractTsAndSignal(input_parameters)

From a9a65abf0b31e1aca2bc874efd6c4187c0801634 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Fri, 5 Dec 2025 15:00:15 -0800
Subject: [PATCH 064/150] Removed redundant fns

---
 src/guppy/analysis/combine_data.py | 251 +----------------------------
 src/guppy/preprocess.py            |   3 +-
 2 files changed, 4 insertions(+), 250 deletions(-)

diff --git a/src/guppy/analysis/combine_data.py b/src/guppy/analysis/combine_data.py
index d8f0ce6..aa5a1dd 100644
--- a/src/guppy/analysis/combine_data.py
+++ b/src/guppy/analysis/combine_data.py
@@ -1,17 +1,17 @@
 # TODO: remove redundant function implementations such as eliminateData, eliminateTs, read_hdf5, et cetera.
 
-import fnmatch
 import glob
 import logging
 import os
-import re
 
 import numpy as np
 
 from .io_utils import (
+    decide_naming_convention,
     get_all_stores_for_combining_data,
     read_hdf5,
     takeOnlyDirs,
+    write_hdf5,
 )
 
 logger = logging.getLogger(__name__)
@@ -61,78 +61,6 @@ def combineData(folderNames, inputParameters, storesList):
     return op
 
 
-def find_files(path, glob_path, ignore_case=False):
-    rule = (
-        re.compile(fnmatch.translate(glob_path), re.IGNORECASE)
-        if ignore_case
-        else re.compile(fnmatch.translate(glob_path))
-    )
-    no_bytes_path = os.listdir(os.path.expanduser(path))
-    str_path = []
-
-    # converting byte object to string
-    for x in no_bytes_path:
-        try:
-            str_path.append(x.decode("utf-8"))
-        except:
-            str_path.append(x)
-
-    return [os.path.join(path, n) for n in str_path if rule.match(n)]
-
-
-def read_hdf5(event, filepath, key):
-    if event:
-        op = os.path.join(filepath, event + ".hdf5")
-    else:
-        op = filepath
-
-    if os.path.exists(op):
-        with h5py.File(op, "r") as f:
-            arr = np.asarray(f[key])
-    else:
-        raise Exception("{}.hdf5 file does not exist".format(event))
-
-    return arr
-
-
-def write_hdf5(data, event, filepath, key):
-    op = os.path.join(filepath, event + ".hdf5")
-
-    if not os.path.exists(op):
-        with h5py.File(op, "w") as f:
-            if type(data) is np.ndarray:
-                f.create_dataset(key, data=data, maxshape=(None,), chunks=True)
-            else:
-                f.create_dataset(key, data=data)
-    else:
-        with h5py.File(op, "r+") as f:
-            if key in list(f.keys()):
-                if type(data) is np.ndarray:
-                    f[key].resize(data.shape)
-                    arr = f[key]
-                    arr[:] = data
-                else:
-                    arr = f[key]
-                    arr = data
-            else:
-                f.create_dataset(key, data=data, maxshape=(None,), chunks=True)
-
-
-def decide_naming_convention(filepath):
-    path_1 = find_files(filepath, "control*", ignore_case=True)  # glob.glob(os.path.join(filepath, 'control*'))
-
-    path_2 = find_files(filepath, "signal*", ignore_case=True)  # glob.glob(os.path.join(filepath, 'signal*'))
-
-    path = sorted(path_1 + path_2, key=str.casefold)
-
-    if len(path) % 2 != 0:
-        raise Exception("There are not equal number of Control and Signal data")
-
-    path = np.asarray(path).reshape(2, -1)
-
-    return path
-
-
 def eliminateData(filepath, timeForLightsTurnOn, event, sampling_rate, naming):
 
     arr = np.array([])
@@ -219,178 +147,3 @@ def processTimestampsForCombiningData(filepath, timeForLightsTurnOn, events, sam
                         else:
                             ts = eliminateTs(filepath[k], timeForLightsTurnOn, storesList[i], sampling_rate, name)
                             write_hdf5(ts, storesList[i] + "_" + name, filepath[k][0], "ts")
-
-
-import h5py
-import numpy as np
-
-logger = logging.getLogger(__name__)
-
-
-def find_files(path, glob_path, ignore_case=False):
-    rule = (
-        re.compile(fnmatch.translate(glob_path), re.IGNORECASE)
-        if ignore_case
-        else re.compile(fnmatch.translate(glob_path))
-    )
-    no_bytes_path = os.listdir(os.path.expanduser(path))
-    str_path = []
-
-    # converting byte object to string
-    for x in no_bytes_path:
-        try:
-            str_path.append(x.decode("utf-8"))
-        except:
-            str_path.append(x)
-
-    return [os.path.join(path, n) for n in str_path if rule.match(n)]
-
-
-def read_hdf5(event, filepath, key):
-    if event:
-        op = os.path.join(filepath, event + ".hdf5")
-    else:
-        op = filepath
-
-    if os.path.exists(op):
-        with h5py.File(op, "r") as f:
-            arr = np.asarray(f[key])
-    else:
-        raise Exception("{}.hdf5 file does not exist".format(event))
-
-    return arr
-
-
-def write_hdf5(data, event, filepath, key):
-    op = os.path.join(filepath, event + ".hdf5")
-
-    if not os.path.exists(op):
-        with h5py.File(op, "w") as f:
-            if type(data) is np.ndarray:
-                f.create_dataset(key, data=data, maxshape=(None,), chunks=True)
-            else:
-                f.create_dataset(key, data=data)
-    else:
-        with h5py.File(op, "r+") as f:
-            if key in list(f.keys()):
-                if type(data) is np.ndarray:
-                    f[key].resize(data.shape)
-                    arr = f[key]
-                    arr[:] = data
-                else:
-                    arr = f[key]
-                    arr = data
-            else:
-                f.create_dataset(key, data=data, maxshape=(None,), chunks=True)
-
-
-def decide_naming_convention(filepath):
-    path_1 = find_files(filepath, "control*", ignore_case=True)  # glob.glob(os.path.join(filepath, 'control*'))
-
-    path_2 = find_files(filepath, "signal*", ignore_case=True)  # glob.glob(os.path.join(filepath, 'signal*'))
-
-    path = sorted(path_1 + path_2, key=str.casefold)
-
-    if len(path) % 2 != 0:
-        raise Exception("There are not equal number of Control and Signal data")
-
-    path = np.asarray(path).reshape(2, -1)
-
-    return path
-
-
-def eliminateData(filepath, timeForLightsTurnOn, event, sampling_rate, naming):
-
-    arr = np.array([])
-    ts_arr = np.array([])
-    for i in range(len(filepath)):
-        ts = read_hdf5("timeCorrection_" + naming, filepath[i], "timestampNew")
-        data = read_hdf5(event, filepath[i], "data").reshape(-1)
-
-        # index = np.where((ts>coords[i,0]) & (ts<coords[i,1]))[0]
-
-        if len(arr) == 0:
-            arr = np.concatenate((arr, data))
-            sub = ts[0] - timeForLightsTurnOn
-            new_ts = ts - sub
-            ts_arr = np.concatenate((ts_arr, new_ts))
-        else:
-            temp = data
-            temp_ts = ts
-            new_ts = temp_ts - (temp_ts[0] - ts_arr[-1])
-            arr = np.concatenate((arr, temp))
-            ts_arr = np.concatenate((ts_arr, new_ts + (1 / sampling_rate)))
-
-    return arr, ts_arr
-
-
-def eliminateTs(filepath, timeForLightsTurnOn, event, sampling_rate, naming):
-
-    ts_arr = np.array([])
-    tsNew_arr = np.array([])
-    for i in range(len(filepath)):
-        tsNew = read_hdf5("timeCorrection_" + naming, filepath[i], "timestampNew")
-        if os.path.exists(os.path.join(filepath[i], event + "_" + naming + ".hdf5")):
-            ts = read_hdf5(event + "_" + naming, filepath[i], "ts").reshape(-1)
-        else:
-            ts = np.array([])
-
-        # logger.info("total time : ", tsNew[-1])
-        if len(tsNew_arr) == 0:
-            sub = tsNew[0] - timeForLightsTurnOn
-            tsNew_arr = np.concatenate((tsNew_arr, tsNew - sub))
-            ts_arr = np.concatenate((ts_arr, ts - sub))
-        else:
-            temp_tsNew = tsNew
-            temp_ts = ts
-            new_ts = temp_ts - (temp_tsNew[0] - tsNew_arr[-1])
-            new_tsNew = temp_tsNew - (temp_tsNew[0] - tsNew_arr[-1])
-            tsNew_arr = np.concatenate((tsNew_arr, new_tsNew + (1 / sampling_rate)))
-            ts_arr = np.concatenate((ts_arr, new_ts + (1 / sampling_rate)))
-
-        # logger.info(event)
-        # logger.info(ts_arr)
-    return ts_arr
-
-
-def processTimestampsForCombiningData(filepath, timeForLightsTurnOn, events, sampling_rate):
-
-    logger.debug("Processing timestamps for combining data...")
-
-    storesList = events[1, :]
-
-    for k in range(len(filepath)):
-
-        path = decide_naming_convention(filepath[k][0])
-
-        for j in range(path.shape[1]):
-            name_1 = ((os.path.basename(path[0, j])).split(".")[0]).split("_")
-            name_2 = ((os.path.basename(path[1, j])).split(".")[0]).split("_")
-            # dirname = os.path.dirname(path[i])
-            if name_1[-1] == name_2[-1]:
-                name = name_1[-1]
-
-                for i in range(len(storesList)):
-                    if (
-                        "control_" + name.lower() in storesList[i].lower()
-                        or "signal_" + name.lower() in storesList[i].lower()
-                    ):
-                        data, timestampNew = eliminateData(
-                            filepath[k], timeForLightsTurnOn, storesList[i], sampling_rate, name
-                        )
-                        write_hdf5(data, storesList[i], filepath[k][0], "data")
-                    else:
-                        if "control" in storesList[i].lower() or "signal" in storesList[i].lower():
-                            continue
-                        else:
-                            ts = eliminateTs(filepath[k], timeForLightsTurnOn, storesList[i], sampling_rate, name)
-                            write_hdf5(ts, storesList[i] + "_" + name, filepath[k][0], "ts")
-
-                write_hdf5(timestampNew, "timeCorrection_" + name, filepath[k][0], "timestampNew")
-
-            else:
-                raise Exception("Error in naming convention of files or Error in storesList file")
-
-        np.savetxt(os.path.join(filepath[k][0], "combine_storesList.csv"), events, delimiter=",", fmt="%s")
-
-    logger.info("Timestamps processed and data is combined.")
diff --git a/src/guppy/preprocess.py b/src/guppy/preprocess.py
index 5ff8de6..15c547f 100755
--- a/src/guppy/preprocess.py
+++ b/src/guppy/preprocess.py
@@ -16,9 +16,10 @@
     check_storeslistfile,
     check_TDT,
     find_files,
+    get_all_stores_for_combining_data,  # noqa: F401 -- Necessary for other modules that depend on preprocess.py
     read_hdf5,
     takeOnlyDirs,
-)  # Necessary for other modules that depend on preprocess.py
+)
 from .analysis.timestamp_correction import (
     add_control_channel,
     create_control_channel,

From 37a2f8df88003a3c7307c2de3409a59a1088689a Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Fri, 5 Dec 2025 15:19:35 -0800
Subject: [PATCH 065/150] Removed redundant fns

---
 src/guppy/analysis/combine_data.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/guppy/analysis/combine_data.py b/src/guppy/analysis/combine_data.py
index aa5a1dd..f89315f 100644
--- a/src/guppy/analysis/combine_data.py
+++ b/src/guppy/analysis/combine_data.py
@@ -1,5 +1,3 @@
-# TODO: remove redundant function implementations such as eliminateData, eliminateTs, read_hdf5, et cetera.
-
 import glob
 import logging
 import os

From 1bb8de4a2df3544f656bc4f52c48c75c0f0b338e Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Fri, 5 Dec 2025 17:54:37 -0800
Subject: [PATCH 066/150] Peeled off read operations from timestamp_correction
 CSV function.

---
 src/guppy/analysis/timestamp_correction.py | 146 ++---
 src/guppy/preprocess.py                    |  58 +-
 timestamp_correction_analysis.md           | 723 +++++++++++++++++++++
 3 files changed, 851 insertions(+), 76 deletions(-)
 create mode 100644 timestamp_correction_analysis.md

diff --git a/src/guppy/analysis/timestamp_correction.py b/src/guppy/analysis/timestamp_correction.py
index 2e3185a..e179d26 100644
--- a/src/guppy/analysis/timestamp_correction.py
+++ b/src/guppy/analysis/timestamp_correction.py
@@ -1,6 +1,5 @@
 import logging
 import os
-import shutil
 
 import numpy as np
 import pandas as pd
@@ -15,91 +14,37 @@
 logger = logging.getLogger(__name__)
 
 
-# function to add control channel when there is no
-# isosbestic control channel and update the storeslist file
-def add_control_channel(filepath, arr):
-
-    storenames = arr[0, :]
-    storesList = np.char.lower(arr[1, :])
-
-    keep_control = np.array([])
-    # check a case if there is isosbestic control channel present
-    for i in range(storesList.shape[0]):
-        if "control" in storesList[i].lower():
-            name = storesList[i].split("_")[-1]
-            new_str = "signal_" + str(name).lower()
-            find_signal = [True for i in storesList if i == new_str]
-            if len(find_signal) > 1:
-                logger.error("Error in naming convention of files or Error in storesList file")
-                raise Exception("Error in naming convention of files or Error in storesList file")
-            if len(find_signal) == 0:
-                logger.error(
-                    "Isosbectic control channel parameter is set to False and still \
-							 	 storeslist file shows there is control channel present"
-                )
-                raise Exception(
-                    "Isosbectic control channel parameter is set to False and still \
-							 	 storeslist file shows there is control channel present"
-                )
-        else:
-            continue
-
-    for i in range(storesList.shape[0]):
-        if "signal" in storesList[i].lower():
-            name = storesList[i].split("_")[-1]
-            new_str = "control_" + str(name).lower()
-            find_signal = [True for i in storesList if i == new_str]
-            if len(find_signal) == 0:
-                src, dst = os.path.join(filepath, arr[0, i] + ".hdf5"), os.path.join(
-                    filepath, "cntrl" + str(i) + ".hdf5"
-                )
-                shutil.copyfile(src, dst)
-                arr = np.concatenate((arr, [["cntrl" + str(i)], ["control_" + str(arr[1, i].split("_")[-1])]]), axis=1)
-
-    np.savetxt(os.path.join(filepath, "storesList.csv"), arr, delimiter=",", fmt="%s")
-
-    return arr
-
-
 # function to correct timestamps after eliminating first few seconds of the data (for csv data)
-def timestampCorrection_csv(filepath, timeForLightsTurnOn, storesList):
-
+def timestampCorrection_csv(
+    filepath, timeForLightsTurnOn, storesList, name_to_data, name_to_timestamps, name_to_sampling_rate
+):
     logger.debug(
         f"Correcting timestamps by getting rid of the first {timeForLightsTurnOn} seconds and convert timestamps to seconds"
     )
     storenames = storesList[0, :]
-    storesList = storesList[1, :]
+    names_for_storenames = storesList[1, :]
+    arr = get_control_and_signal_channel_names(storesList)
 
-    arr = []
-    for i in range(storesList.shape[0]):
-        if "control" in storesList[i].lower() or "signal" in storesList[i].lower():
-            arr.append(storesList[i])
-
-    arr = sorted(arr, key=str.casefold)
-    try:
-        arr = np.asarray(arr).reshape(2, -1)
-    except:
-        logger.error("Error in saving stores list file or spelling mistake for control or signal")
-        raise Exception("Error in saving stores list file or spelling mistake for control or signal")
-
-    indices = check_cntrl_sig_length(filepath, arr, storenames, storesList)
+    indices = check_cntrl_sig_length(arr, name_to_data)
 
     for i in range(arr.shape[1]):
         name_1 = arr[0, i].split("_")[-1]
         name_2 = arr[1, i].split("_")[-1]
         # dirname = os.path.dirname(path[i])
-        idx = np.where(storesList == indices[i])[0]
+        idx = np.where(names_for_storenames == indices[i])[0]
 
         if idx.shape[0] == 0:
             logger.error(f"{arr[0,i]} does not exist in the stores list file.")
             raise Exception("{} does not exist in the stores list file.".format(arr[0, i]))
 
-        timestamp = read_hdf5(storenames[idx][0], filepath, "timestamps")
-        sampling_rate = read_hdf5(storenames[idx][0], filepath, "sampling_rate")
+        name = names_for_storenames[idx][0]
+        timestamp = name_to_timestamps[name]
+        sampling_rate = name_to_sampling_rate[name]
 
         if name_1 == name_2:
             correctionIndex = np.where(timestamp >= timeForLightsTurnOn)[0]
             timestampNew = timestamp[correctionIndex]
+            # TODO: Pull out write operations into preprocess.py
             write_hdf5(timestampNew, "timeCorrection_" + name_1, filepath, "timestampNew")
             write_hdf5(correctionIndex, "timeCorrection_" + name_1, filepath, "correctionIndex")
             write_hdf5(np.asarray(sampling_rate), "timeCorrection_" + name_1, filepath, "sampling_rate")
@@ -270,19 +215,72 @@ def create_control_channel(filepath, arr, window=5001):
 
 # function to check control and signal channel has same length
 # if not, take a smaller length and do pre-processing
-def check_cntrl_sig_length(filepath, channels_arr, storenames, storesList):
+def check_cntrl_sig_length(channels_arr, name_to_data):
 
     indices = []
     for i in range(channels_arr.shape[1]):
-        idx_c = np.where(storesList == channels_arr[0, i])[0]
-        idx_s = np.where(storesList == channels_arr[1, i])[0]
-        control = read_hdf5(storenames[idx_c[0]], filepath, "data")
-        signal = read_hdf5(storenames[idx_s[0]], filepath, "data")
+        control_name = channels_arr[0, i]
+        signal_name = channels_arr[1, i]
+        control = name_to_data[control_name]
+        signal = name_to_data[signal_name]
         if control.shape[0] < signal.shape[0]:
-            indices.append(storesList[idx_c[0]])
+            indices.append(control_name)
         elif control.shape[0] > signal.shape[0]:
-            indices.append(storesList[idx_s[0]])
+            indices.append(signal_name)
         else:
-            indices.append(storesList[idx_s[0]])
+            indices.append(signal_name)
 
     return indices
+
+
+def get_control_and_signal_channel_names(storesList):
+    storenames = storesList[0, :]
+    names_for_storenames = storesList[1, :]
+
+    channels_arr = []
+    for i in range(names_for_storenames.shape[0]):
+        if "control" in names_for_storenames[i].lower() or "signal" in names_for_storenames[i].lower():
+            channels_arr.append(names_for_storenames[i])
+
+    channels_arr = sorted(channels_arr, key=str.casefold)
+    try:
+        channels_arr = np.asarray(channels_arr).reshape(2, -1)
+    except:
+        logger.error("Error in saving stores list file or spelling mistake for control or signal")
+        raise Exception("Error in saving stores list file or spelling mistake for control or signal")
+
+    return channels_arr
+
+
+def read_control_and_signal(filepath, storesList):
+    channels_arr = get_control_and_signal_channel_names(storesList)
+    storenames = storesList[0, :]
+    names_for_storenames = storesList[1, :]
+
+    name_to_data = {}
+    name_to_timestamps = {}
+    name_to_sampling_rate = {}
+
+    for i in range(channels_arr.shape[1]):
+        control_name = channels_arr[0, i]
+        signal_name = channels_arr[1, i]
+        idx_c = np.where(storesList == control_name)[0]
+        idx_s = np.where(storesList == signal_name)[0]
+        control_storename = storenames[idx_c[0]]
+        signal_storename = storenames[idx_s[0]]
+
+        control_data = read_hdf5(control_storename, filepath, "data")
+        signal_data = read_hdf5(signal_storename, filepath, "data")
+        control_timestamps = read_hdf5(control_storename, filepath, "timestamps")
+        signal_timestamps = read_hdf5(signal_storename, filepath, "timestamps")
+        control_sampling_rate = read_hdf5(control_storename, filepath, "sampling_rate")
+        signal_sampling_rate = read_hdf5(signal_storename, filepath, "sampling_rate")
+
+        name_to_data[control_name] = control_data
+        name_to_data[signal_name] = signal_data
+        name_to_timestamps[control_name] = control_timestamps
+        name_to_timestamps[signal_name] = signal_timestamps
+        name_to_sampling_rate[control_name] = control_sampling_rate
+        name_to_sampling_rate[signal_name] = signal_sampling_rate
+
+    return name_to_data, name_to_timestamps, name_to_sampling_rate
diff --git a/src/guppy/preprocess.py b/src/guppy/preprocess.py
index 15c547f..74033f8 100755
--- a/src/guppy/preprocess.py
+++ b/src/guppy/preprocess.py
@@ -2,6 +2,7 @@
 import json
 import logging
 import os
+import shutil
 import sys
 
 import matplotlib.pyplot as plt
@@ -21,9 +22,9 @@
     takeOnlyDirs,
 )
 from .analysis.timestamp_correction import (
-    add_control_channel,
     create_control_channel,
     decide_naming_convention_and_applyCorrection,
+    read_control_and_signal,
     timestampCorrection_csv,
     timestampCorrection_tdt,
 )
@@ -208,6 +209,54 @@ def visualizeControlAndSignal(filepath, removeArtifacts):
         visualize(filepath, ts, control, signal, cntrl_sig_fit, plot_name, removeArtifacts)
 
 
+# This function just creates placeholder Control-HDF5 files that are then immediately overwritten later on in the pipeline.
+# TODO: Refactor this function to avoid unnecessary file creation.
+# function to add control channel when there is no
+# isosbestic control channel and update the storeslist file
+def add_control_channel(filepath, arr):
+
+    storenames = arr[0, :]
+    storesList = np.char.lower(arr[1, :])
+
+    keep_control = np.array([])
+    # check a case if there is isosbestic control channel present
+    for i in range(storesList.shape[0]):
+        if "control" in storesList[i].lower():
+            name = storesList[i].split("_")[-1]
+            new_str = "signal_" + str(name).lower()
+            find_signal = [True for i in storesList if i == new_str]
+            if len(find_signal) > 1:
+                logger.error("Error in naming convention of files or Error in storesList file")
+                raise Exception("Error in naming convention of files or Error in storesList file")
+            if len(find_signal) == 0:
+                logger.error(
+                    "Isosbectic control channel parameter is set to False and still \
+							 	 storeslist file shows there is control channel present"
+                )
+                raise Exception(
+                    "Isosbectic control channel parameter is set to False and still \
+							 	 storeslist file shows there is control channel present"
+                )
+        else:
+            continue
+
+    for i in range(storesList.shape[0]):
+        if "signal" in storesList[i].lower():
+            name = storesList[i].split("_")[-1]
+            new_str = "control_" + str(name).lower()
+            find_signal = [True for i in storesList if i == new_str]
+            if len(find_signal) == 0:
+                src, dst = os.path.join(filepath, arr[0, i] + ".hdf5"), os.path.join(
+                    filepath, "cntrl" + str(i) + ".hdf5"
+                )
+                shutil.copyfile(src, dst)
+                arr = np.concatenate((arr, [["cntrl" + str(i)], ["control_" + str(arr[1, i].split("_")[-1])]]), axis=1)
+
+    np.savetxt(os.path.join(filepath, "storesList.csv"), arr, delimiter=",", fmt="%s")
+
+    return arr
+
+
 # function to execute timestamps corrections using functions timestampCorrection and decide_naming_convention_and_applyCorrection
 def execute_timestamp_correction(folderNames, inputParameters):
 
@@ -231,7 +280,12 @@ def execute_timestamp_correction(folderNames, inputParameters):
             if cond == True:
                 timestampCorrection_tdt(filepath, timeForLightsTurnOn, storesList)
             else:
-                timestampCorrection_csv(filepath, timeForLightsTurnOn, storesList)
+
+                control_and_signal_dicts = read_control_and_signal(filepath, storesList)
+                name_to_data, name_to_timestamps, name_to_sampling_rate = control_and_signal_dicts
+                timestampCorrection_csv(
+                    filepath, timeForLightsTurnOn, storesList, name_to_data, name_to_timestamps, name_to_sampling_rate
+                )
 
             for k in range(storesList.shape[1]):
                 decide_naming_convention_and_applyCorrection(
diff --git a/timestamp_correction_analysis.md b/timestamp_correction_analysis.md
new file mode 100644
index 0000000..121aa3f
--- /dev/null
+++ b/timestamp_correction_analysis.md
@@ -0,0 +1,723 @@
+# Timestamp Correction Module Analysis
+
+## Overview
+
+The `timestamp_correction.py` module handles the correction of timestamps for photometry data, including:
+- Eliminating the first N seconds of recording (light stabilization period)
+- Expanding TDT block timestamps into continuous timestamps
+- Creating synthetic control channels when no isosbestic control is present
+- Applying corrections to both data channels and event markers
+
+## Module Structure
+
+### Entry Point from preprocess.py
+
+```python
+execute_timestamp_correction(folderNames, inputParameters)  # preprocess.py:212
+```
+
+This orchestrator loops through all session folders and calls functions in this module.
+
+## Two-Phase Control Channel Creation Pattern
+
+### Understanding add_control_channel vs create_control_channel
+
+These two functions work together in a **two-phase process** to handle synthetic control channel generation. They are **not redundant** but serve distinct purposes:
+
+#### Phase 1: `add_control_channel` (Called BEFORE timestamp correction)
+
+**Execution:** Line 229 in `execute_timestamp_correction`
+
+**Purpose:** Create **PLACEHOLDER** control files to satisfy workflow requirements
+
+**What it does:**
+1. Validates that if `isosbestic_control=False`, no real control channels exist
+2. For each signal channel without a matching control:
+   - Copies the raw signal HDF5 file to `cntrl{i}.hdf5` (placeholder)
+   - Adds entry to storesList: `[["cntrl{i}"], ["control_{region}"]]`
+3. Saves updated `storesList.csv`
+
+**Files created:**
+- `cntrl0.hdf5`, `cntrl1.hdf5`, etc. (copies of **RAW** signal data)
+- Updated `storesList.csv` with placeholder entries
+
+**Why it's needed:**
+- Timestamp correction workflow expects **paired** control/signal channels in storesList
+- Without placeholders, the pairing logic in `timestampCorrection_xxx` and `check_cntrl_sig_length` would fail
+- The placeholder **data is never actually used** - it just satisfies structural requirements
+
+#### Phase 2: `create_control_channel` (Called AFTER timestamp correction)
+
+**Execution:** Line 243 in `execute_timestamp_correction`
+
+**Purpose:** Generate **ACTUAL** synthetic control via curve fitting and overwrite placeholders
+
+**What it does:**
+1. Looks for placeholder files (checks: `"control" in event_name.lower() and "cntrl" in event.lower()`)
+2. Reads the **CORRECTED** signal data: `signal_{region}.hdf5` (after timestamp correction)
+3. Calls `helper_create_control_channel()` to:
+   - Apply Savitzky-Golay filter to cleaned signal
+   - Fit to exponential function: `f(x) = a + b * exp(-(1/c) * x)`
+4. **OVERWRITES** the placeholder `control_{region}.hdf5` with real synthetic control
+5. Also exports to CSV format (legacy)
+
+**Files written:**
+- `control_{region}.hdf5` → `data` (replaces placeholder with curve-fitted control)
+- `{raw_name}.csv` (timestamps, data, sampling_rate columns)
+
+**Why it's separate:**
+- Requires **timestamp-corrected** signal data (doesn't exist until after lines 232-239)
+- Curve fitting algorithm needs clean timestamps (first N seconds eliminated)
+- Cannot be done before timestamp correction without re-correcting the synthetic control
+
+#### Execution Timeline
+
+```python
+# When isosbestic_control == False:
+
+# ========== PHASE 1: BEFORE TIMESTAMP CORRECTION ==========
+# Line 229: Create placeholders (just file copies)
+storesList = add_control_channel(filepath, storesList)
+# Result: storesList now has paired structure
+#   [["Dv1A", "cntrl0"], ["signal_dms", "control_dms"]]
+# Files: cntrl0.hdf5 (copy of raw signal, never used)
+
+# ========== TIMESTAMP CORRECTION PHASE ==========
+# Lines 232-234: Process both signal AND placeholder control
+timestampCorrection_tdt(filepath, timeForLightsTurnOn, storesList)
+# Result: Creates timeCorrection_dms.hdf5 with correctionIndex
+
+# Lines 236-239: Apply corrections to all channels
+decide_naming_convention_and_applyCorrection(...)
+# Result: signal_dms.hdf5 now contains corrected signal data
+#         control_dms.hdf5 still contains uncorrected placeholder copy
+
+# ========== PHASE 2: AFTER TIMESTAMP CORRECTION ==========
+# Line 243: Generate REAL synthetic controls
+create_control_channel(filepath, storesList, window=101)
+# Result: control_dms.hdf5 OVERWRITTEN with curve-fitted synthetic control
+#         Now contains valid control data derived from corrected signal
+```
+
+#### Why This Design Exists
+
+This is a **chicken-and-egg problem solved with placeholders:**
+
+1. **Requirement:** Timestamp correction expects paired control/signal channels
+2. **Constraint:** Synthetic control generation requires timestamp-corrected signal data
+3. **Solution:** Create dummy placeholders → correct everything → replace placeholders with real data
+
+#### Visual Flow
+
+```mermaid
+flowchart TD
+    A[isosbestic_control = False] --> B[add_control_channel]
+    B --> C[Copy signal.hdf5 to cntrl0.hdf5]
+    C --> D[Update storesList.csv]
+
+    D --> E[timestampCorrection_xxx]
+    E --> F[Creates timeCorrection_dms.hdf5]
+
+    F --> G[decide_naming_convention_and_applyCorrection]
+    G --> H[Corrects signal_dms.hdf5]
+    G --> I[Corrects control_dms.hdf5<br/>still contains placeholder]
+
+    I --> J[create_control_channel]
+    J --> K[Read corrected signal_dms.hdf5]
+    K --> L[helper_create_control_channel<br/>curve fit]
+    L --> M[OVERWRITE control_dms.hdf5<br/>with synthetic control]
+
+    style C fill:#fff3cd
+    style I fill:#fff3cd
+    style M fill:#d4edda
+```
+
+#### Refactoring Opportunity
+
+This placeholder pattern is a **code smell** indicating potential design improvements:
+
+**Issues:**
+1. **Unnecessary I/O:** Placeholder files are written and then overwritten
+2. **Confusing flow:** Hard to understand that placeholders are temporary
+3. **Tight coupling:** Timestamp correction assumes paired files exist
+4. **Wasted computation:** Placeholder controls get timestamp-corrected unnecessarily
+
+**Potential Improvements:**
+
+**Option 1: Lazy Control Creation**
+- Modify timestamp correction to handle missing controls gracefully
+- Only create synthetic controls after all corrections complete
+- Remove placeholder file creation entirely
+
+**Option 2: Data Structure Refactoring**
+- Use a data structure that doesn't require physical paired files upfront
+- Track "needs synthetic control" as metadata rather than file presence
+- Generate and write controls only once at the end
+
+**Option 3: Two-Pass Workflow**
+- First pass: Correct only signal channels
+- Second pass: Generate synthetic controls from corrected signals
+- Would require refactoring `check_cntrl_sig_length` and pairing logic
+
+## Function Catalog
+
+### 1. add_control_channel
+**Location:** `timestamp_correction.py:20`
+**Purpose:** Create placeholder control channel files when no isosbestic control exists
+
+```python
+def add_control_channel(filepath, arr) -> arr
+```
+
+**Input:**
+- `filepath`: Path to session output folder
+- `arr`: 2D array `[[storenames], [storesList]]` from storesList.csv
+
+**Process:**
+1. Validates that control/signal pairs match (raises error if mismatched)
+2. For each signal channel without a matching control:
+   - Copies signal HDF5 file to `cntrl{i}.hdf5` (placeholder)
+   - Adds entry to storesList array: `[["cntrl{i}"], ["control_{region}"]]`
+3. Writes updated storesList.csv
+
+**Output:**
+- Updated `arr` with new control channel entries
+- **Files Written:** Updated `storesList.csv`, copied `cntrl*.hdf5` files
+
+**I/O Summary:**
+- **Reads:** Signal HDF5 files (via shutil.copyfile)
+- **Writes:** `storesList.csv`, placeholder `cntrl*.hdf5` files
+
+---
+
+### 2. timestampCorrection_csv
+**Location:** `timestamp_correction.py:65`
+**Purpose:** Correct timestamps for CSV-format data (Doric, NPM, custom CSV)
+
+```python
+def timestampCorrection_csv(filepath, timeForLightsTurnOn, storesList)
+```
+
+**Input:**
+- `filepath`: Path to session output folder
+- `timeForLightsTurnOn`: Seconds to eliminate from start (default: 1)
+- `storesList`: 2D array `[[storenames], [storesList]]`
+
+**Process:**
+1. Filters storesList to control/signal channels only
+2. Pairs control/signal channels, validates naming matches
+3. Calls `check_cntrl_sig_length()` to determine which channel to use (shorter one)
+4. For each control/signal pair:
+   - **Reads:** `timestamps` and `sampling_rate` from raw HDF5
+   - **Computes:** `correctionIndex = np.where(timestamp >= timeForLightsTurnOn)`
+   - **Writes:** `timeCorrection_{region}.hdf5` with keys:
+     - `timestampNew`: Corrected timestamps
+     - `correctionIndex`: Indices to keep
+     - `sampling_rate`: Sampling rate
+
+**Output:**
+- **Files Written:** `timeCorrection_{region}.hdf5` for each control/signal pair
+
+**I/O Summary:**
+- **Reads:** `{storename}.hdf5` → `timestamps`, `sampling_rate`
+- **Writes:** `timeCorrection_{region}.hdf5` → `timestampNew`, `correctionIndex`, `sampling_rate`
+
+---
+
+### 3. timestampCorrection_tdt
+**Location:** `timestamp_correction.py:115`
+**Purpose:** Correct timestamps for TDT-format data (expands block timestamps)
+
+```python
+def timestampCorrection_tdt(filepath, timeForLightsTurnOn, storesList)
+```
+
+**Input:** Same as `timestampCorrection_csv`
+
+**Process:**
+1. Filters storesList to control/signal channels only
+2. Pairs control/signal channels, validates naming matches
+3. Calls `check_cntrl_sig_length()` to determine which channel to use
+4. For each control/signal pair:
+   - **Reads:** `timestamps`, `npoints`, `sampling_rate` from raw HDF5
+   - **TDT-specific expansion algorithm:**
+     ```python
+     timeRecStart = timestamp[0]
+     timestamps = np.subtract(timestamp, timeRecStart)  # Zero-base
+     adder = np.arange(npoints) / sampling_rate         # Within-block offsets
+     # Expand: for each block timestamp, add within-block offsets
+     timestampNew = np.zeros((len(timestamps), lengthAdder))
+     for i in range(lengthAdder):
+         timestampNew[:, i] = np.add(timestamps, adder[i])
+     timestampNew = (timestampNew.T).reshape(-1, order="F")  # Flatten
+     correctionIndex = np.where(timestampNew >= timeForLightsTurnOn)
+     timestampNew = timestampNew[correctionIndex]
+     ```
+   - **Writes:** `timeCorrection_{region}.hdf5` with keys:
+     - `timeRecStart`: Recording start time (TDT-specific)
+     - `timestampNew`: Expanded, corrected timestamps
+     - `correctionIndex`: Indices to keep
+     - `sampling_rate`: Sampling rate
+
+**Output:**
+- **Files Written:** `timeCorrection_{region}.hdf5` with TDT-specific `timeRecStart` key
+
+**I/O Summary:**
+- **Reads:** `{storename}.hdf5` → `timestamps`, `npoints`, `sampling_rate`
+- **Writes:** `timeCorrection_{region}.hdf5` → `timeRecStart`, `timestampNew`, `correctionIndex`, `sampling_rate`
+
+---
+
+### 4. check_cntrl_sig_length
+**Location:** `timestamp_correction.py:273`
+**Purpose:** Determine which channel (control or signal) to use as reference based on length
+
+```python
+def check_cntrl_sig_length(filepath, channels_arr, storenames, storesList) -> indices
+```
+
+**Input:**
+- `filepath`: Path to session output folder
+- `channels_arr`: Paired control/signal array `[["control_A", "control_B"], ["signal_A", "signal_B"]]`
+- `storenames`: Raw HDF5 filenames
+- `storesList`: Semantic channel names
+
+**Process:**
+1. For each control/signal pair:
+   - **Reads:** `data` from both control and signal HDF5
+   - Compares lengths: `control.shape[0]` vs `signal.shape[0]`
+   - Returns the shorter one's storename (or signal if equal)
+
+**Output:**
+- List of storenames to use for timestamp correction (one per pair)
+
+**I/O Summary:**
+- **Reads:** `{control_storename}.hdf5` → `data`, `{signal_storename}.hdf5` → `data`
+
+**Note:** This is a pure analysis function but performs I/O to determine which data to use.
+
+---
+
+### 5. decide_naming_convention_and_applyCorrection
+**Location:** `timestamp_correction.py:178`
+**Purpose:** Loop through all channels and apply timestamp corrections
+
+```python
+def decide_naming_convention_and_applyCorrection(filepath, timeForLightsTurnOn, event, displayName, storesList)
+```
+
+**Input:**
+- `filepath`: Path to session output folder
+- `timeForLightsTurnOn`: Seconds eliminated from start
+- `event`: Raw storename (e.g., "Dv1A")
+- `displayName`: Semantic name (e.g., "control_DMS")
+- `storesList`: Full storesList array
+
+**Process:**
+1. Filters storesList to control/signal channels
+2. Pairs channels and validates naming conventions
+3. For each pair, calls `applyCorrection(filepath, timeForLightsTurnOn, event, displayName, region)`
+
+**Output:**
+- Delegates to `applyCorrection()` (no direct I/O)
+
+---
+
+### 6. applyCorrection
+**Location:** `timestamp_correction.py:205`
+**Purpose:** Apply timestamp corrections to data channels or event markers
+
+```python
+def applyCorrection(filepath, timeForLightsTurnOn, event, displayName, naming)
+```
+
+**Input:**
+- `filepath`: Path to session output folder
+- `timeForLightsTurnOn`: Seconds eliminated from start
+- `event`: Raw storename
+- `displayName`: Semantic display name
+- `naming`: Region identifier (e.g., "dms")
+
+**Process:**
+
+**For Control/Signal Channels:**
+1. **Reads:** `timeCorrection_{naming}.hdf5` → `correctionIndex`
+2. **Reads:** `{event}.hdf5` → `data`
+3. **Applies:** `arr = arr[correctionIndex]` (crops data)
+4. **Writes:** `{displayName}.hdf5` → `data` (overwrites with corrected data)
+
+**For Event Channels:**
+1. Detects TDT format: `check_TDT(os.path.dirname(filepath))`
+2. **Reads:** `timeCorrection_{naming}.hdf5` → `timeRecStart` (if TDT)
+3. **Reads:** `{event}.hdf5` → `timestamps`
+4. **Applies corrections:**
+   - If TDT and timestamps >= timeRecStart: subtract both `timeRecStart` and `timeForLightsTurnOn`
+   - Otherwise: subtract only `timeForLightsTurnOn`
+5. **Writes:** `{event}_{naming}.hdf5` → `ts` (corrected event timestamps)
+
+**Output:**
+- **Files Written:**
+  - `{displayName}.hdf5` → `data` (for control/signal)
+  - `{event}_{naming}.hdf5` → `ts` (for events)
+
+**I/O Summary:**
+- **Reads:** `timeCorrection_{naming}.hdf5`, `{event}.hdf5`
+- **Writes:** `{displayName}.hdf5` or `{event}_{naming}.hdf5`
+
+---
+
+### 7. create_control_channel
+**Location:** `timestamp_correction.py:247`
+**Purpose:** Generate synthetic control channel using curve fitting (when no isosbestic control exists)
+
+```python
+def create_control_channel(filepath, arr, window=5001)
+```
+
+**Input:**
+- `filepath`: Path to session output folder
+- `arr`: storesList array `[[storenames], [storesList]]`
+- `window`: Savitzky-Golay filter window (default: 5001)
+
+**Process:**
+1. Loops through storesList to find placeholder control channels (`cntrl` in storename)
+2. For each placeholder:
+   - **Reads:** `signal_{region}.hdf5` → `data` (corrected signal)
+   - **Reads:** `timeCorrection_{region}.hdf5` → `timestampNew`, `sampling_rate`
+   - **Calls:** `helper_create_control_channel(signal, timestampNew, window)` from `control_channel.py`
+     - Applies Savitzky-Golay filter
+     - Fits to exponential: `f(x) = a + b * exp(-(1/c) * x)`
+   - **Writes:** `{control_name}.hdf5` → `data` (synthetic control)
+   - **Writes:** `{event_name}.csv` with columns: `timestamps`, `data`, `sampling_rate`
+
+**Output:**
+- **Files Written:**
+  - `control_{region}.hdf5` → `data` (replaces placeholder)
+  - `{raw_name}.csv` (legacy format export)
+
+**I/O Summary:**
+- **Reads:** `signal_{region}.hdf5` → `data`, `timeCorrection_{region}.hdf5` → `timestampNew`, `sampling_rate`
+- **Writes:** `control_{region}.hdf5` → `data`, `{raw_name}.csv`
+
+---
+
+## Data Flow Diagram
+
+### High-Level Flow (called from execute_timestamp_correction)
+
+```mermaid
+flowchart TD
+    A[execute_timestamp_correction] --> B[Read storesList.csv]
+    B --> C{isosbestic_control?}
+
+    C -->|False| D[add_control_channel]
+    C -->|True| E{Check format}
+    D --> E
+
+    E -->|TDT| F[timestampCorrection_tdt]
+    E -->|CSV/Doric/NPM| G[timestampCorrection_csv]
+
+    F --> H[Loop: decide_naming_convention_and_applyCorrection]
+    G --> H
+
+    H --> I[For each store: applyCorrection]
+
+    I --> J{isosbestic_control?}
+    J -->|False| K[create_control_channel]
+    J -->|True| L[Done]
+    K --> L
+
+    style A fill:#e1f5ff
+    style L fill:#d4edda
+```
+
+### Detailed Flow: timestampCorrection Functions
+
+```mermaid
+flowchart LR
+    A[Raw HDF5 files] --> B[check_cntrl_sig_length]
+    B --> C[Read control & signal data]
+    C --> D[Return shorter channel name]
+
+    D --> E{Format?}
+    E -->|CSV| F[timestampCorrection_csv]
+    E -->|TDT| G[timestampCorrection_tdt]
+
+    F --> H[Read timestamps from selected channel]
+    G --> I[Read timestamps, npoints, sampling_rate]
+
+    H --> J[correctionIndex = where >= timeForLightsTurnOn]
+    I --> K[Expand block timestamps]
+    K --> J
+
+    J --> L[Write timeCorrection_{region}.hdf5]
+
+    style A fill:#e1f5ff
+    style L fill:#d4edda
+```
+
+### Detailed Flow: applyCorrection
+
+```mermaid
+flowchart TD
+    A[applyCorrection called] --> B{Channel type?}
+
+    B -->|control/signal| C[Read correctionIndex]
+    B -->|event| D[Read event timestamps]
+
+    C --> E[Read raw data]
+    E --> F[data = data correctionIndex]
+    F --> G[Write displayName.hdf5]
+
+    D --> H{TDT format?}
+    H -->|Yes| I[Read timeRecStart]
+    H -->|No| J[ts -= timeForLightsTurnOn]
+
+    I --> K[ts -= timeRecStart]
+    K --> J
+    J --> L[Write event_region.hdf5]
+
+    style A fill:#e1f5ff
+    style G fill:#d4edda
+    style L fill:#d4edda
+```
+
+### Detailed Flow: Control Channel Creation
+
+```mermaid
+flowchart LR
+    A[add_control_channel] --> B[For each signal without control]
+    B --> C[Copy signal.hdf5 to cntrl_i.hdf5]
+    C --> D[Update storesList.csv]
+
+    D --> E[... timestamp correction ...]
+
+    E --> F[create_control_channel]
+    F --> G[For each cntrl_i placeholder]
+    G --> H[Read signal_{region}.hdf5]
+    H --> I[helper_create_control_channel]
+    I --> J[Savitzky-Golay filter]
+    J --> K[Curve fit to exponential]
+    K --> L[Write control_{region}.hdf5]
+    L --> M[Export to CSV]
+
+    style A fill:#fff3cd
+    style M fill:#d4edda
+```
+
+## Execution Order in execute_timestamp_correction
+
+```python
+# preprocess.py:212-247
+for each session in folderNames:
+    for each output_folder in session:
+        # Step 1: Read metadata
+        storesList = np.genfromtxt("storesList.csv")
+
+        # Step 2: Add placeholder controls if needed
+        if isosbestic_control == False:
+            storesList = add_control_channel(filepath, storesList)
+
+        # Step 3: Compute correctionIndex and timestampNew
+        if check_TDT(folderName):
+            timestampCorrection_tdt(filepath, timeForLightsTurnOn, storesList)
+        else:
+            timestampCorrection_csv(filepath, timeForLightsTurnOn, storesList)
+
+        # Step 4: Apply corrections to all channels/events
+        for each store in storesList:
+            decide_naming_convention_and_applyCorrection(
+                filepath, timeForLightsTurnOn, storename, displayName, storesList
+            )
+            # ^ This calls applyCorrection for each channel
+
+        # Step 5: Generate synthetic controls via curve fitting
+        if isosbestic_control == False:
+            create_control_channel(filepath, storesList, window=101)
+```
+
+## File I/O Summary
+
+### Files Read
+
+| Function | Files Read | Keys |
+|----------|-----------|------|
+| `add_control_channel` | `signal_*.hdf5` (for copying) | - |
+| `timestampCorrection_csv` | `{storename}.hdf5` | `timestamps`, `sampling_rate` |
+| `timestampCorrection_tdt` | `{storename}.hdf5` | `timestamps`, `npoints`, `sampling_rate` |
+| `check_cntrl_sig_length` | `control_*.hdf5`, `signal_*.hdf5` | `data` |
+| `applyCorrection` | `timeCorrection_{region}.hdf5`<br>`{event}.hdf5` | `correctionIndex`, `timeRecStart` (TDT)<br>`data` or `timestamps` |
+| `create_control_channel` | `signal_{region}.hdf5`<br>`timeCorrection_{region}.hdf5` | `data`<br>`timestampNew`, `sampling_rate` |
+
+### Files Written
+
+| Function | Files Written | Keys | Notes |
+|----------|--------------|------|-------|
+| `add_control_channel` | `storesList.csv`<br>`cntrl{i}.hdf5` | -<br>(copy of signal) | Placeholder files |
+| `timestampCorrection_csv` | `timeCorrection_{region}.hdf5` | `timestampNew`, `correctionIndex`, `sampling_rate` | One per region |
+| `timestampCorrection_tdt` | `timeCorrection_{region}.hdf5` | `timeRecStart`, `timestampNew`, `correctionIndex`, `sampling_rate` | TDT-specific |
+| `applyCorrection` | `{displayName}.hdf5`<br>`{event}_{region}.hdf5` | `data`<br>`ts` | Overwrites with corrected data |
+| `create_control_channel` | `control_{region}.hdf5`<br>`{raw_name}.csv` | `data`<br>timestamps, data, sampling_rate | Replaces placeholder |
+
+## Key Transformations
+
+### 1. Timestamp Expansion (TDT only)
+
+**Input:** Block timestamps (one per acquisition block)
+**Algorithm:**
+```python
+timeRecStart = timestamp[0]
+timestamps = timestamp - timeRecStart  # Zero-base
+adder = np.arange(npoints) / sampling_rate  # Within-block offsets [0, 1/fs, 2/fs, ...]
+# Matrix multiplication to expand:
+timestampNew = zeros((n_blocks, npoints))
+for i in range(npoints):
+    timestampNew[:, i] = timestamps + adder[i]
+timestampNew = timestampNew.T.reshape(-1, order='F')  # Column-major flatten
+```
+**Output:** Continuous timestamps at full sampling rate
+
+### 2. Correction Index Computation
+
+**Input:** Timestamps array, `timeForLightsTurnOn`
+**Algorithm:**
+```python
+correctionIndex = np.where(timestamp >= timeForLightsTurnOn)[0]
+```
+**Output:** Indices of timestamps to keep (after eliminating first N seconds)
+
+### 3. Data Cropping
+
+**Applied to:** Control/signal data channels
+**Algorithm:**
+```python
+data_corrected = data[correctionIndex]
+```
+
+### 4. Event Timestamp Adjustment
+
+**Applied to:** Event markers (TTL pulses)
+**Algorithm:**
+```python
+# CSV format:
+ts_corrected = ts - timeForLightsTurnOn
+
+# TDT format (if ts >= timeRecStart):
+ts_corrected = ts - timeRecStart - timeForLightsTurnOn
+```
+
+### 5. Synthetic Control Generation
+
+**Input:** Signal channel (already corrected)
+**Algorithm:**
+1. Apply Savitzky-Golay filter: `filtered_signal = savgol_filter(signal, window, polyorder=3)`
+2. Curve fit to exponential: `control = a + b * exp(-(1/c) * t)`
+3. Return fitted curve as synthetic control
+
+## Analysis for I/O Separation
+
+### Pure Analysis Functions (Minimal I/O)
+These could be extracted with I/O injected:
+- ❌ None - all functions perform substantial I/O
+
+### Orchestration Functions (Heavy I/O, Light Analysis)
+These coordinate reading/writing and delegate computation:
+- `add_control_channel` - File copying and CSV writing
+- `decide_naming_convention_and_applyCorrection` - Loops and delegates
+- `create_control_channel` - Orchestrates read → process → write
+
+### Mixed Functions (I/O + Analysis)
+These perform both I/O and computation inline:
+- `timestampCorrection_csv` - Reads data, computes correctionIndex, writes results
+- `timestampCorrection_tdt` - Reads data, expands timestamps, computes correctionIndex, writes
+- `applyCorrection` - Reads multiple files, applies transformations, writes
+- `check_cntrl_sig_length` - Reads data just to compare lengths
+
+## Refactoring Recommendations for I/O Separation
+
+### Option 1: Extract Pure Computation Functions
+
+Create new pure functions:
+```python
+# Pure analysis (no I/O)
+def compute_correction_index(timestamps, timeForLightsTurnOn):
+    return np.where(timestamps >= timeForLightsTurnOn)[0]
+
+def expand_tdt_timestamps(block_timestamps, npoints, sampling_rate):
+    # TDT expansion algorithm
+    ...
+    return expanded_timestamps
+
+def crop_data_by_index(data, correctionIndex):
+    return data[correctionIndex]
+
+def adjust_event_timestamps(ts, timeRecStart, timeForLightsTurnOn, is_tdt):
+    # Event adjustment logic
+    ...
+    return adjusted_ts
+```
+
+Then modify existing functions to use these pure functions, keeping I/O separate.
+
+### Option 2: Reader/Writer Pattern
+
+Create dedicated I/O classes:
+```python
+class TimestampCorrectionReader:
+    def read_raw_timestamps(self, filepath, storename):
+        ...
+
+    def read_correction_data(self, filepath, region):
+        ...
+
+class TimestampCorrectionWriter:
+    def write_correction_file(self, filepath, region, data):
+        ...
+
+    def write_corrected_data(self, filepath, displayName, data):
+        ...
+```
+
+### Option 3: Data Class Pattern
+
+Return data objects instead of writing directly:
+```python
+@dataclass
+class TimestampCorrection:
+    timestampNew: np.ndarray
+    correctionIndex: np.ndarray
+    sampling_rate: float
+    timeRecStart: Optional[float] = None  # TDT only
+
+def timestampCorrection_tdt(...) -> TimestampCorrection:
+    # Compute all values
+    return TimestampCorrection(
+        timestampNew=...,
+        correctionIndex=...,
+        sampling_rate=...,
+        timeRecStart=...
+    )
+
+# Separate writer function
+def write_timestamp_correction(filepath, region, correction: TimestampCorrection):
+    write_hdf5(correction.timestampNew, f"timeCorrection_{region}", filepath, "timestampNew")
+    # ... etc
+```
+
+## Current I/O Patterns to Refactor
+
+1. **Inline writes in computation functions:**
+   - `timestampCorrection_csv` and `timestampCorrection_tdt` compute AND write
+   - Should separate: compute → return data → write in caller
+
+2. **Reading for validation only:**
+   - `check_cntrl_sig_length` reads full data arrays just to compare shapes
+   - Could be optimized to read only array metadata/shapes
+
+3. **Side-effect file creation:**
+   - `add_control_channel` creates files as side effect
+   - `create_control_channel` both generates data AND writes multiple formats (HDF5 + CSV)
+
+4. **Mixed responsibilities in applyCorrection:**
+   - Handles both control/signal cropping AND event timestamp adjustment
+   - Could be split into two separate functions

From aa36e330f790eaccea333f89703e8d49bfb31bfd Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Thu, 11 Dec 2025 15:07:26 -0800
Subject: [PATCH 067/150] Inverted name check

---
 src/guppy/analysis/timestamp_correction.py | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/src/guppy/analysis/timestamp_correction.py b/src/guppy/analysis/timestamp_correction.py
index e179d26..71b4760 100644
--- a/src/guppy/analysis/timestamp_correction.py
+++ b/src/guppy/analysis/timestamp_correction.py
@@ -30,6 +30,10 @@ def timestampCorrection_csv(
     for i in range(arr.shape[1]):
         name_1 = arr[0, i].split("_")[-1]
         name_2 = arr[1, i].split("_")[-1]
+        if name_1 != name_2:
+            logger.error("Error in naming convention of files or Error in storesList file")
+            raise Exception("Error in naming convention of files or Error in storesList file")
+
         # dirname = os.path.dirname(path[i])
         idx = np.where(names_for_storenames == indices[i])[0]
 
@@ -41,17 +45,12 @@ def timestampCorrection_csv(
         timestamp = name_to_timestamps[name]
         sampling_rate = name_to_sampling_rate[name]
 
-        if name_1 == name_2:
-            correctionIndex = np.where(timestamp >= timeForLightsTurnOn)[0]
-            timestampNew = timestamp[correctionIndex]
-            # TODO: Pull out write operations into preprocess.py
-            write_hdf5(timestampNew, "timeCorrection_" + name_1, filepath, "timestampNew")
-            write_hdf5(correctionIndex, "timeCorrection_" + name_1, filepath, "correctionIndex")
-            write_hdf5(np.asarray(sampling_rate), "timeCorrection_" + name_1, filepath, "sampling_rate")
-
-        else:
-            logger.error("Error in naming convention of files or Error in storesList file")
-            raise Exception("Error in naming convention of files or Error in storesList file")
+        correctionIndex = np.where(timestamp >= timeForLightsTurnOn)[0]
+        timestampNew = timestamp[correctionIndex]
+        # TODO: Pull out write operations into preprocess.py
+        write_hdf5(timestampNew, "timeCorrection_" + name_1, filepath, "timestampNew")
+        write_hdf5(correctionIndex, "timeCorrection_" + name_1, filepath, "correctionIndex")
+        write_hdf5(np.asarray(sampling_rate), "timeCorrection_" + name_1, filepath, "sampling_rate")
 
     logger.info("Timestamps corrected and converted to seconds.")
 

From 2049c4a5bd2337324a8b2bde0a7da88ba2922013 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Thu, 11 Dec 2025 15:40:52 -0800
Subject: [PATCH 068/150] Refactored out write

---
 src/guppy/analysis/timestamp_correction.py | 26 +++++++++++++++-------
 src/guppy/preprocess.py                    |  9 +++++---
 2 files changed, 24 insertions(+), 11 deletions(-)

diff --git a/src/guppy/analysis/timestamp_correction.py b/src/guppy/analysis/timestamp_correction.py
index 71b4760..8fbb8f9 100644
--- a/src/guppy/analysis/timestamp_correction.py
+++ b/src/guppy/analysis/timestamp_correction.py
@@ -15,12 +15,11 @@
 
 
 # function to correct timestamps after eliminating first few seconds of the data (for csv data)
-def timestampCorrection_csv(
-    filepath, timeForLightsTurnOn, storesList, name_to_data, name_to_timestamps, name_to_sampling_rate
-):
+def timestampCorrection_csv(timeForLightsTurnOn, storesList, name_to_data, name_to_timestamps):
     logger.debug(
         f"Correcting timestamps by getting rid of the first {timeForLightsTurnOn} seconds and convert timestamps to seconds"
     )
+    name_to_timestamps = name_to_timestamps.copy()
     storenames = storesList[0, :]
     names_for_storenames = storesList[1, :]
     arr = get_control_and_signal_channel_names(storesList)
@@ -43,16 +42,27 @@ def timestampCorrection_csv(
 
         name = names_for_storenames[idx][0]
         timestamp = name_to_timestamps[name]
-        sampling_rate = name_to_sampling_rate[name]
 
         correctionIndex = np.where(timestamp >= timeForLightsTurnOn)[0]
         timestampNew = timestamp[correctionIndex]
-        # TODO: Pull out write operations into preprocess.py
-        write_hdf5(timestampNew, "timeCorrection_" + name_1, filepath, "timestampNew")
-        write_hdf5(correctionIndex, "timeCorrection_" + name_1, filepath, "correctionIndex")
-        write_hdf5(np.asarray(sampling_rate), "timeCorrection_" + name_1, filepath, "sampling_rate")
+        name_to_timestamps[name] = timestampNew
 
     logger.info("Timestamps corrected and converted to seconds.")
+    return name_to_timestamps
+
+
+def write_corrected_timestamps(filepath, corrected_name_to_timestamps, name_to_timestamps, name_to_sampling_rate):
+    for name, timestamps in name_to_timestamps.items():
+        corrected_timestamps = corrected_name_to_timestamps[name]
+        correctionIndex = np.where(timestamps >= corrected_timestamps[0])[0]
+        sampling_rate = name_to_sampling_rate[name]
+        name_1 = name.split("_")[-1]
+        assert np.array_equal(
+            corrected_timestamps, timestamps[correctionIndex]
+        ), "Timestamps do not match after correction"
+        write_hdf5(corrected_timestamps, "timeCorrection_" + name_1, filepath, "timestampNew")
+        write_hdf5(correctionIndex, "timeCorrection_" + name_1, filepath, "correctionIndex")
+        write_hdf5(np.asarray(sampling_rate), "timeCorrection_" + name_1, filepath, "sampling_rate")
 
 
 # function to correct timestamps after eliminating first few seconds of the data (for TDT data)
diff --git a/src/guppy/preprocess.py b/src/guppy/preprocess.py
index 74033f8..413246d 100755
--- a/src/guppy/preprocess.py
+++ b/src/guppy/preprocess.py
@@ -27,6 +27,7 @@
     read_control_and_signal,
     timestampCorrection_csv,
     timestampCorrection_tdt,
+    write_corrected_timestamps,
 )
 from .analysis.z_score import compute_z_score
 
@@ -280,11 +281,13 @@ def execute_timestamp_correction(folderNames, inputParameters):
             if cond == True:
                 timestampCorrection_tdt(filepath, timeForLightsTurnOn, storesList)
             else:
-
                 control_and_signal_dicts = read_control_and_signal(filepath, storesList)
                 name_to_data, name_to_timestamps, name_to_sampling_rate = control_and_signal_dicts
-                timestampCorrection_csv(
-                    filepath, timeForLightsTurnOn, storesList, name_to_data, name_to_timestamps, name_to_sampling_rate
+                corrected_name_to_timestamps = timestampCorrection_csv(
+                    timeForLightsTurnOn, storesList, name_to_data, name_to_timestamps
+                )
+                write_corrected_timestamps(
+                    filepath, corrected_name_to_timestamps, name_to_timestamps, name_to_sampling_rate
                 )
 
             for k in range(storesList.shape[1]):

From 8b50fb70522732a3413c60262262845548c4e4da Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Fri, 12 Dec 2025 11:44:59 -0800
Subject: [PATCH 069/150] Refactored read and write out of
 timestampcorrection_tdt

---
 src/guppy/analysis/timestamp_correction.py | 103 +++++++++++----------
 src/guppy/preprocess.py                    |  29 +++++-
 2 files changed, 78 insertions(+), 54 deletions(-)

diff --git a/src/guppy/analysis/timestamp_correction.py b/src/guppy/analysis/timestamp_correction.py
index 8fbb8f9..4e37efe 100644
--- a/src/guppy/analysis/timestamp_correction.py
+++ b/src/guppy/analysis/timestamp_correction.py
@@ -20,6 +20,7 @@ def timestampCorrection_csv(timeForLightsTurnOn, storesList, name_to_data, name_
         f"Correcting timestamps by getting rid of the first {timeForLightsTurnOn} seconds and convert timestamps to seconds"
     )
     name_to_timestamps = name_to_timestamps.copy()
+    name_to_correctionIndex = {}
     storenames = storesList[0, :]
     names_for_storenames = storesList[1, :]
     arr = get_control_and_signal_channel_names(storesList)
@@ -46,85 +47,78 @@ def timestampCorrection_csv(timeForLightsTurnOn, storesList, name_to_data, name_
         correctionIndex = np.where(timestamp >= timeForLightsTurnOn)[0]
         timestampNew = timestamp[correctionIndex]
         name_to_timestamps[name] = timestampNew
+        name_to_correctionIndex[name] = correctionIndex
 
     logger.info("Timestamps corrected and converted to seconds.")
-    return name_to_timestamps
+    return name_to_timestamps, name_to_correctionIndex
 
 
-def write_corrected_timestamps(filepath, corrected_name_to_timestamps, name_to_timestamps, name_to_sampling_rate):
-    for name, timestamps in name_to_timestamps.items():
+def write_corrected_timestamps(
+    filepath, corrected_name_to_timestamps, name_to_timestamps, name_to_sampling_rate, name_to_correctionIndex
+):
+    for name, correctionIndex in name_to_correctionIndex.items():
+        timestamps = name_to_timestamps[name]
         corrected_timestamps = corrected_name_to_timestamps[name]
-        correctionIndex = np.where(timestamps >= corrected_timestamps[0])[0]
         sampling_rate = name_to_sampling_rate[name]
+        if sampling_rate.shape == ():  # numpy scalar
+            sampling_rate = np.asarray([sampling_rate])
         name_1 = name.split("_")[-1]
-        assert np.array_equal(
-            corrected_timestamps, timestamps[correctionIndex]
-        ), "Timestamps do not match after correction"
+        write_hdf5(np.asarray([timestamps[0]]), "timeCorrection_" + name_1, filepath, "timeRecStart")
         write_hdf5(corrected_timestamps, "timeCorrection_" + name_1, filepath, "timestampNew")
         write_hdf5(correctionIndex, "timeCorrection_" + name_1, filepath, "correctionIndex")
-        write_hdf5(np.asarray(sampling_rate), "timeCorrection_" + name_1, filepath, "sampling_rate")
+        write_hdf5(sampling_rate, "timeCorrection_" + name_1, filepath, "sampling_rate")
 
 
 # function to correct timestamps after eliminating first few seconds of the data (for TDT data)
-def timestampCorrection_tdt(filepath, timeForLightsTurnOn, storesList):
-
+def timestampCorrection_tdt(
+    filepath, timeForLightsTurnOn, storesList, name_to_timestamps, name_to_data, name_to_sampling_rate, name_to_npoints
+):
     logger.debug(
         f"Correcting timestamps by getting rid of the first {timeForLightsTurnOn} seconds and convert timestamps to seconds"
     )
+    name_to_timestamps = name_to_timestamps.copy()
+    name_to_correctionIndex = {}
     storenames = storesList[0, :]
-    storesList = storesList[1, :]
-
-    arr = []
-    for i in range(storesList.shape[0]):
-        if "control" in storesList[i].lower() or "signal" in storesList[i].lower():
-            arr.append(storesList[i])
-
-    arr = sorted(arr, key=str.casefold)
-
-    try:
-        arr = np.asarray(arr).reshape(2, -1)
-    except:
-        logger.error("Error in saving stores list file or spelling mistake for control or signal")
-        raise Exception("Error in saving stores list file or spelling mistake for control or signal")
+    names_for_storenames = storesList[1, :]
+    arr = get_control_and_signal_channel_names(storesList)
 
-    indices = check_cntrl_sig_length(filepath, arr, storenames, storesList)
+    indices = check_cntrl_sig_length(arr, name_to_data)
 
     for i in range(arr.shape[1]):
         name_1 = arr[0, i].split("_")[-1]
         name_2 = arr[1, i].split("_")[-1]
+        if name_1 != name_2:
+            logger.error("Error in naming convention of files or Error in storesList file")
+            raise Exception("Error in naming convention of files or Error in storesList file")
+
         # dirname = os.path.dirname(path[i])
-        idx = np.where(storesList == indices[i])[0]
+        idx = np.where(names_for_storenames == indices[i])[0]
 
         if idx.shape[0] == 0:
             logger.error(f"{arr[0,i]} does not exist in the stores list file.")
             raise Exception("{} does not exist in the stores list file.".format(arr[0, i]))
 
-        timestamp = read_hdf5(storenames[idx][0], filepath, "timestamps")
-        npoints = read_hdf5(storenames[idx][0], filepath, "npoints")
-        sampling_rate = read_hdf5(storenames[idx][0], filepath, "sampling_rate")
+        name = names_for_storenames[idx][0]
+        timestamp = name_to_timestamps[name]
+        sampling_rate = name_to_sampling_rate[name]
+        npoints = name_to_npoints[name]
+
+        timeRecStart = timestamp[0]
+        timestamps = np.subtract(timestamp, timeRecStart)
+        adder = np.arange(npoints) / sampling_rate
+        lengthAdder = adder.shape[0]
+        timestampNew = np.zeros((len(timestamps), lengthAdder))
+        for i in range(lengthAdder):
+            timestampNew[:, i] = np.add(timestamps, adder[i])
+        timestampNew = (timestampNew.T).reshape(-1, order="F")
+        correctionIndex = np.where(timestampNew >= timeForLightsTurnOn)[0]
+        timestampNew = timestampNew[correctionIndex]
 
-        if name_1 == name_2:
-            timeRecStart = timestamp[0]
-            timestamps = np.subtract(timestamp, timeRecStart)
-            adder = np.arange(npoints) / sampling_rate
-            lengthAdder = adder.shape[0]
-            timestampNew = np.zeros((len(timestamps), lengthAdder))
-            for i in range(lengthAdder):
-                timestampNew[:, i] = np.add(timestamps, adder[i])
-            timestampNew = (timestampNew.T).reshape(-1, order="F")
-            correctionIndex = np.where(timestampNew >= timeForLightsTurnOn)[0]
-            timestampNew = timestampNew[correctionIndex]
-
-            write_hdf5(np.asarray([timeRecStart]), "timeCorrection_" + name_1, filepath, "timeRecStart")
-            write_hdf5(timestampNew, "timeCorrection_" + name_1, filepath, "timestampNew")
-            write_hdf5(correctionIndex, "timeCorrection_" + name_1, filepath, "correctionIndex")
-            write_hdf5(np.asarray([sampling_rate]), "timeCorrection_" + name_1, filepath, "sampling_rate")
-        else:
-            logger.error("Error in naming convention of files or Error in storesList file")
-            raise Exception("Error in naming convention of files or Error in storesList file")
+        name_to_timestamps[name] = timestampNew
+        name_to_correctionIndex[name] = correctionIndex
 
     logger.info("Timestamps corrected and converted to seconds.")
-    # return timeRecStart, correctionIndex, timestampNew
+    return name_to_timestamps, name_to_correctionIndex
 
 
 # function to check if naming convention was followed while saving storeslist file
@@ -269,6 +263,7 @@ def read_control_and_signal(filepath, storesList):
     name_to_data = {}
     name_to_timestamps = {}
     name_to_sampling_rate = {}
+    name_to_npoints = {}
 
     for i in range(channels_arr.shape[1]):
         control_name = channels_arr[0, i]
@@ -284,6 +279,12 @@ def read_control_and_signal(filepath, storesList):
         signal_timestamps = read_hdf5(signal_storename, filepath, "timestamps")
         control_sampling_rate = read_hdf5(control_storename, filepath, "sampling_rate")
         signal_sampling_rate = read_hdf5(signal_storename, filepath, "sampling_rate")
+        try:  # TODO: define npoints for csv datasets
+            control_npoints = read_hdf5(control_storename, filepath, "npoints")
+            signal_npoints = read_hdf5(signal_storename, filepath, "npoints")
+        except KeyError:  # npoints is not defined for csv datasets
+            control_npoints = None
+            signal_npoints = None
 
         name_to_data[control_name] = control_data
         name_to_data[signal_name] = signal_data
@@ -291,5 +292,7 @@ def read_control_and_signal(filepath, storesList):
         name_to_timestamps[signal_name] = signal_timestamps
         name_to_sampling_rate[control_name] = control_sampling_rate
         name_to_sampling_rate[signal_name] = signal_sampling_rate
+        name_to_npoints[control_name] = control_npoints
+        name_to_npoints[signal_name] = signal_npoints
 
-    return name_to_data, name_to_timestamps, name_to_sampling_rate
+    return name_to_data, name_to_timestamps, name_to_sampling_rate, name_to_npoints
diff --git a/src/guppy/preprocess.py b/src/guppy/preprocess.py
index 413246d..db9d8d0 100755
--- a/src/guppy/preprocess.py
+++ b/src/guppy/preprocess.py
@@ -279,15 +279,36 @@ def execute_timestamp_correction(folderNames, inputParameters):
                 storesList = add_control_channel(filepath, storesList)
 
             if cond == True:
-                timestampCorrection_tdt(filepath, timeForLightsTurnOn, storesList)
+                control_and_signal_dicts = read_control_and_signal(filepath, storesList)
+                name_to_data, name_to_timestamps, name_to_sampling_rate, name_to_npoints = control_and_signal_dicts
+                corrected_name_to_timestamps, name_to_correctionIndex = timestampCorrection_tdt(
+                    filepath,
+                    timeForLightsTurnOn,
+                    storesList,
+                    name_to_timestamps,
+                    name_to_data,
+                    name_to_sampling_rate,
+                    name_to_npoints,
+                )
+                write_corrected_timestamps(
+                    filepath,
+                    corrected_name_to_timestamps,
+                    name_to_timestamps,
+                    name_to_sampling_rate,
+                    name_to_correctionIndex,
+                )
             else:
                 control_and_signal_dicts = read_control_and_signal(filepath, storesList)
-                name_to_data, name_to_timestamps, name_to_sampling_rate = control_and_signal_dicts
-                corrected_name_to_timestamps = timestampCorrection_csv(
+                name_to_data, name_to_timestamps, name_to_sampling_rate, _ = control_and_signal_dicts
+                corrected_name_to_timestamps, name_to_correctionIndex = timestampCorrection_csv(
                     timeForLightsTurnOn, storesList, name_to_data, name_to_timestamps
                 )
                 write_corrected_timestamps(
-                    filepath, corrected_name_to_timestamps, name_to_timestamps, name_to_sampling_rate
+                    filepath,
+                    corrected_name_to_timestamps,
+                    name_to_timestamps,
+                    name_to_sampling_rate,
+                    name_to_correctionIndex,
                 )
 
             for k in range(storesList.shape[1]):

From b73417063e15f8a1dafe9615bbc6abafcdcbcb23 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Fri, 12 Dec 2025 11:48:19 -0800
Subject: [PATCH 070/150] Removed, now unused file path parameter.

---
 src/guppy/analysis/timestamp_correction.py | 2 +-
 src/guppy/preprocess.py                    | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/guppy/analysis/timestamp_correction.py b/src/guppy/analysis/timestamp_correction.py
index 4e37efe..cd662bd 100644
--- a/src/guppy/analysis/timestamp_correction.py
+++ b/src/guppy/analysis/timestamp_correction.py
@@ -71,7 +71,7 @@ def write_corrected_timestamps(
 
 # function to correct timestamps after eliminating first few seconds of the data (for TDT data)
 def timestampCorrection_tdt(
-    filepath, timeForLightsTurnOn, storesList, name_to_timestamps, name_to_data, name_to_sampling_rate, name_to_npoints
+    timeForLightsTurnOn, storesList, name_to_timestamps, name_to_data, name_to_sampling_rate, name_to_npoints
 ):
     logger.debug(
         f"Correcting timestamps by getting rid of the first {timeForLightsTurnOn} seconds and convert timestamps to seconds"
diff --git a/src/guppy/preprocess.py b/src/guppy/preprocess.py
index db9d8d0..83659bf 100755
--- a/src/guppy/preprocess.py
+++ b/src/guppy/preprocess.py
@@ -282,7 +282,6 @@ def execute_timestamp_correction(folderNames, inputParameters):
                 control_and_signal_dicts = read_control_and_signal(filepath, storesList)
                 name_to_data, name_to_timestamps, name_to_sampling_rate, name_to_npoints = control_and_signal_dicts
                 corrected_name_to_timestamps, name_to_correctionIndex = timestampCorrection_tdt(
-                    filepath,
                     timeForLightsTurnOn,
                     storesList,
                     name_to_timestamps,

From 4402cbb20f02273c78020b2aa0d20f98236e1c9c Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Fri, 12 Dec 2025 12:10:39 -0800
Subject: [PATCH 071/150]  Consolidated TDT and CSV timestamp correction
 functions into a single timestamp_correction function with a mode parameter.

---
 src/guppy/analysis/timestamp_correction.py | 72 ++++++----------------
 src/guppy/preprocess.py                    | 54 ++++++----------
 2 files changed, 40 insertions(+), 86 deletions(-)

diff --git a/src/guppy/analysis/timestamp_correction.py b/src/guppy/analysis/timestamp_correction.py
index cd662bd..df72800 100644
--- a/src/guppy/analysis/timestamp_correction.py
+++ b/src/guppy/analysis/timestamp_correction.py
@@ -14,45 +14,6 @@
 logger = logging.getLogger(__name__)
 
 
-# function to correct timestamps after eliminating first few seconds of the data (for csv data)
-def timestampCorrection_csv(timeForLightsTurnOn, storesList, name_to_data, name_to_timestamps):
-    logger.debug(
-        f"Correcting timestamps by getting rid of the first {timeForLightsTurnOn} seconds and convert timestamps to seconds"
-    )
-    name_to_timestamps = name_to_timestamps.copy()
-    name_to_correctionIndex = {}
-    storenames = storesList[0, :]
-    names_for_storenames = storesList[1, :]
-    arr = get_control_and_signal_channel_names(storesList)
-
-    indices = check_cntrl_sig_length(arr, name_to_data)
-
-    for i in range(arr.shape[1]):
-        name_1 = arr[0, i].split("_")[-1]
-        name_2 = arr[1, i].split("_")[-1]
-        if name_1 != name_2:
-            logger.error("Error in naming convention of files or Error in storesList file")
-            raise Exception("Error in naming convention of files or Error in storesList file")
-
-        # dirname = os.path.dirname(path[i])
-        idx = np.where(names_for_storenames == indices[i])[0]
-
-        if idx.shape[0] == 0:
-            logger.error(f"{arr[0,i]} does not exist in the stores list file.")
-            raise Exception("{} does not exist in the stores list file.".format(arr[0, i]))
-
-        name = names_for_storenames[idx][0]
-        timestamp = name_to_timestamps[name]
-
-        correctionIndex = np.where(timestamp >= timeForLightsTurnOn)[0]
-        timestampNew = timestamp[correctionIndex]
-        name_to_timestamps[name] = timestampNew
-        name_to_correctionIndex[name] = correctionIndex
-
-    logger.info("Timestamps corrected and converted to seconds.")
-    return name_to_timestamps, name_to_correctionIndex
-
-
 def write_corrected_timestamps(
     filepath, corrected_name_to_timestamps, name_to_timestamps, name_to_sampling_rate, name_to_correctionIndex
 ):
@@ -69,13 +30,16 @@ def write_corrected_timestamps(
         write_hdf5(sampling_rate, "timeCorrection_" + name_1, filepath, "sampling_rate")
 
 
-# function to correct timestamps after eliminating first few seconds of the data (for TDT data)
-def timestampCorrection_tdt(
-    timeForLightsTurnOn, storesList, name_to_timestamps, name_to_data, name_to_sampling_rate, name_to_npoints
+# function to correct timestamps after eliminating first few seconds of the data (for csv or TDT data depending on mode)
+def timestampCorrection(
+    timeForLightsTurnOn, storesList, name_to_timestamps, name_to_data, name_to_sampling_rate, name_to_npoints, mode
 ):
     logger.debug(
         f"Correcting timestamps by getting rid of the first {timeForLightsTurnOn} seconds and convert timestamps to seconds"
     )
+    if mode not in ["tdt", "csv"]:
+        logger.error("Mode should be either 'tdt' or 'csv'")
+        raise ValueError("Mode should be either 'tdt' or 'csv'")
     name_to_timestamps = name_to_timestamps.copy()
     name_to_correctionIndex = {}
     storenames = storesList[0, :]
@@ -103,16 +67,20 @@ def timestampCorrection_tdt(
         sampling_rate = name_to_sampling_rate[name]
         npoints = name_to_npoints[name]
 
-        timeRecStart = timestamp[0]
-        timestamps = np.subtract(timestamp, timeRecStart)
-        adder = np.arange(npoints) / sampling_rate
-        lengthAdder = adder.shape[0]
-        timestampNew = np.zeros((len(timestamps), lengthAdder))
-        for i in range(lengthAdder):
-            timestampNew[:, i] = np.add(timestamps, adder[i])
-        timestampNew = (timestampNew.T).reshape(-1, order="F")
-        correctionIndex = np.where(timestampNew >= timeForLightsTurnOn)[0]
-        timestampNew = timestampNew[correctionIndex]
+        if mode == "tdt":
+            timeRecStart = timestamp[0]
+            timestamps = np.subtract(timestamp, timeRecStart)
+            adder = np.arange(npoints) / sampling_rate
+            lengthAdder = adder.shape[0]
+            timestampNew = np.zeros((len(timestamps), lengthAdder))
+            for i in range(lengthAdder):
+                timestampNew[:, i] = np.add(timestamps, adder[i])
+            timestampNew = (timestampNew.T).reshape(-1, order="F")
+            correctionIndex = np.where(timestampNew >= timeForLightsTurnOn)[0]
+            timestampNew = timestampNew[correctionIndex]
+        elif mode == "csv":
+            correctionIndex = np.where(timestamp >= timeForLightsTurnOn)[0]
+            timestampNew = timestamp[correctionIndex]
 
         name_to_timestamps[name] = timestampNew
         name_to_correctionIndex[name] = correctionIndex
diff --git a/src/guppy/preprocess.py b/src/guppy/preprocess.py
index 83659bf..19626dd 100755
--- a/src/guppy/preprocess.py
+++ b/src/guppy/preprocess.py
@@ -25,8 +25,7 @@
     create_control_channel,
     decide_naming_convention_and_applyCorrection,
     read_control_and_signal,
-    timestampCorrection_csv,
-    timestampCorrection_tdt,
+    timestampCorrection,
     write_corrected_timestamps,
 )
 from .analysis.z_score import compute_z_score
@@ -267,7 +266,7 @@ def execute_timestamp_correction(folderNames, inputParameters):
     for i in range(len(folderNames)):
         filepath = folderNames[i]
         storesListPath = takeOnlyDirs(glob.glob(os.path.join(filepath, "*_output_*")))
-        cond = check_TDT(folderNames[i])
+        mode = "tdt" if check_TDT(folderNames[i]) else "csv"
         logger.debug(f"Timestamps corrections started for {filepath}")
         for j in range(len(storesListPath)):
             filepath = storesListPath[j]
@@ -278,37 +277,24 @@ def execute_timestamp_correction(folderNames, inputParameters):
             if isosbestic_control == False:
                 storesList = add_control_channel(filepath, storesList)
 
-            if cond == True:
-                control_and_signal_dicts = read_control_and_signal(filepath, storesList)
-                name_to_data, name_to_timestamps, name_to_sampling_rate, name_to_npoints = control_and_signal_dicts
-                corrected_name_to_timestamps, name_to_correctionIndex = timestampCorrection_tdt(
-                    timeForLightsTurnOn,
-                    storesList,
-                    name_to_timestamps,
-                    name_to_data,
-                    name_to_sampling_rate,
-                    name_to_npoints,
-                )
-                write_corrected_timestamps(
-                    filepath,
-                    corrected_name_to_timestamps,
-                    name_to_timestamps,
-                    name_to_sampling_rate,
-                    name_to_correctionIndex,
-                )
-            else:
-                control_and_signal_dicts = read_control_and_signal(filepath, storesList)
-                name_to_data, name_to_timestamps, name_to_sampling_rate, _ = control_and_signal_dicts
-                corrected_name_to_timestamps, name_to_correctionIndex = timestampCorrection_csv(
-                    timeForLightsTurnOn, storesList, name_to_data, name_to_timestamps
-                )
-                write_corrected_timestamps(
-                    filepath,
-                    corrected_name_to_timestamps,
-                    name_to_timestamps,
-                    name_to_sampling_rate,
-                    name_to_correctionIndex,
-                )
+            control_and_signal_dicts = read_control_and_signal(filepath, storesList)
+            name_to_data, name_to_timestamps, name_to_sampling_rate, name_to_npoints = control_and_signal_dicts
+            corrected_name_to_timestamps, name_to_correctionIndex = timestampCorrection(
+                timeForLightsTurnOn,
+                storesList,
+                name_to_timestamps,
+                name_to_data,
+                name_to_sampling_rate,
+                name_to_npoints,
+                mode=mode,
+            )
+            write_corrected_timestamps(
+                filepath,
+                corrected_name_to_timestamps,
+                name_to_timestamps,
+                name_to_sampling_rate,
+                name_to_correctionIndex,
+            )
 
             for k in range(storesList.shape[1]):
                 decide_naming_convention_and_applyCorrection(

From ca735ce723a870e972308131f7cb1cd020a6ab61 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Fri, 12 Dec 2025 12:21:50 -0800
Subject: [PATCH 072/150] Cleaned up some inefficient code

---
 src/guppy/analysis/timestamp_correction.py | 21 ++++-----------------
 1 file changed, 4 insertions(+), 17 deletions(-)

diff --git a/src/guppy/analysis/timestamp_correction.py b/src/guppy/analysis/timestamp_correction.py
index df72800..efa4c52 100644
--- a/src/guppy/analysis/timestamp_correction.py
+++ b/src/guppy/analysis/timestamp_correction.py
@@ -94,25 +94,16 @@ def timestampCorrection(
 def decide_naming_convention_and_applyCorrection(filepath, timeForLightsTurnOn, event, displayName, storesList):
 
     logger.debug("Applying correction of timestamps to the data and event timestamps")
-    storesList = storesList[1, :]
-
-    arr = []
-    for i in range(storesList.shape[0]):
-        if "control" in storesList[i].lower() or "signal" in storesList[i].lower():
-            arr.append(storesList[i])
-
-    arr = sorted(arr, key=str.casefold)
-    arr = np.asarray(arr).reshape(2, -1)
+    arr = get_control_and_signal_channel_names(storesList)
 
     for i in range(arr.shape[1]):
         name_1 = arr[0, i].split("_")[-1]
         name_2 = arr[1, i].split("_")[-1]
-        # dirname = os.path.dirname(path[i])
-        if name_1 == name_2:
-            applyCorrection(filepath, timeForLightsTurnOn, event, displayName, name_1)
-        else:
+        if name_1 != name_2:
             logger.error("Error in naming convention of files or Error in storesList file")
             raise Exception("Error in naming convention of files or Error in storesList file")
+        else:
+            applyCorrection(filepath, timeForLightsTurnOn, event, displayName, name_1)
 
     logger.info("Timestamps corrections applied to the data and event timestamps.")
 
@@ -153,10 +144,6 @@ def applyCorrection(filepath, timeForLightsTurnOn, event, displayName, naming):
             arr = np.subtract(arr, timeForLightsTurnOn)
         write_hdf5(arr, displayName + "_" + naming, filepath, "ts")
 
-    # if isosbestic_control==False and 'control' in displayName.lower():
-    # 	control = create_control_channel(filepath, displayName)
-    # 	write_hdf5(control, displayName, filepath, 'data')
-
 
 # main function to create control channel using
 # signal channel and save it to a file

From 262681bcab890d51f73d65856ade3533a6b97842 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Fri, 12 Dec 2025 13:12:16 -0800
Subject: [PATCH 073/150] Pulled read operations out of the applyCorrection
 functions.

---
 src/guppy/analysis/timestamp_correction.py | 84 +++++++++++++++++-----
 src/guppy/preprocess.py                    | 26 ++++++-
 2 files changed, 92 insertions(+), 18 deletions(-)

diff --git a/src/guppy/analysis/timestamp_correction.py b/src/guppy/analysis/timestamp_correction.py
index efa4c52..2da2020 100644
--- a/src/guppy/analysis/timestamp_correction.py
+++ b/src/guppy/analysis/timestamp_correction.py
@@ -91,7 +91,19 @@ def timestampCorrection(
 
 # function to check if naming convention was followed while saving storeslist file
 # and apply timestamps correction using the function applyCorrection
-def decide_naming_convention_and_applyCorrection(filepath, timeForLightsTurnOn, event, displayName, storesList):
+def decide_naming_convention_and_applyCorrection(
+    filepath,
+    timeForLightsTurnOn,
+    event,
+    displayName,
+    storesList,
+    name_1_to_corrected_timestamps,
+    name_1_to_timestamps,
+    name_1_to_sampling_rate,
+    name_1_to_correctionIndex,
+    data,
+    ttl_timestamps,
+):
 
     logger.debug("Applying correction of timestamps to the data and event timestamps")
     arr = get_control_and_signal_channel_names(storesList)
@@ -103,36 +115,61 @@ def decide_naming_convention_and_applyCorrection(filepath, timeForLightsTurnOn,
             logger.error("Error in naming convention of files or Error in storesList file")
             raise Exception("Error in naming convention of files or Error in storesList file")
         else:
-            applyCorrection(filepath, timeForLightsTurnOn, event, displayName, name_1)
+            corrected_timestamps = name_1_to_corrected_timestamps[name_1]
+            timestamps = name_1_to_timestamps[name_1]
+            timeRecStart = timestamps[0]
+            sampling_rate = name_1_to_sampling_rate[name_1]
+            correctionIndex = name_1_to_correctionIndex[name_1]
+            applyCorrection(
+                filepath,
+                timeForLightsTurnOn,
+                event,
+                displayName,
+                name_1,
+                corrected_timestamps,
+                sampling_rate,
+                correctionIndex,
+                timeRecStart,
+                data,
+                ttl_timestamps,
+            )
 
     logger.info("Timestamps corrections applied to the data and event timestamps.")
 
 
 # function to apply correction to control, signal and event timestamps
-def applyCorrection(filepath, timeForLightsTurnOn, event, displayName, naming):
+def applyCorrection(
+    filepath,
+    timeForLightsTurnOn,
+    event,
+    displayName,
+    naming,
+    corrected_timestamps,
+    sampling_rate,
+    correctionIndex,
+    timeRecStart,
+    data,
+    ttl_timestamps,
+):
 
     cond = check_TDT(os.path.dirname(filepath))
 
-    if cond == True:
-        timeRecStart = read_hdf5("timeCorrection_" + naming, filepath, "timeRecStart")[0]
-
-    timestampNew = read_hdf5("timeCorrection_" + naming, filepath, "timestampNew")
-    correctionIndex = read_hdf5("timeCorrection_" + naming, filepath, "correctionIndex")
-
+    timestampNew = corrected_timestamps
     if "control" in displayName.lower() or "signal" in displayName.lower():
-        split_name = displayName.split("_")[-1]
-        if split_name == naming:
-            pass
-        else:
-            correctionIndex = read_hdf5("timeCorrection_" + split_name, filepath, "correctionIndex")
-        arr = read_hdf5(event, filepath, "data")
+        # TODO: double-check that this code is not reachable
+        # split_name = displayName.split("_")[-1]
+        # if split_name == naming:
+        #     pass
+        # else:
+        #     correctionIndex = read_hdf5("timeCorrection_" + split_name, filepath, "correctionIndex")
+        arr = data
         if (arr == 0).all() == True:
             arr = arr
         else:
             arr = arr[correctionIndex]
         write_hdf5(arr, displayName, filepath, "data")
     else:
-        arr = read_hdf5(event, filepath, "timestamps")
+        arr = ttl_timestamps
         if cond == True:
             res = (arr >= timeRecStart).all()
             if res == True:
@@ -251,3 +288,18 @@ def read_control_and_signal(filepath, storesList):
         name_to_npoints[signal_name] = signal_npoints
 
     return name_to_data, name_to_timestamps, name_to_sampling_rate, name_to_npoints
+
+
+def read_ttl(filepath, storesList):
+    channels_arr = get_control_and_signal_channel_names(storesList)
+    storenames = storesList[0, :]
+    names_for_storenames = storesList[1, :]
+
+    name_to_timestamps = {}
+    for storename, name in zip(storenames, names_for_storenames):
+        if storename in channels_arr:
+            continue
+        timestamps = read_hdf5(storename, filepath, "timestamps")
+        name_to_timestamps[name] = timestamps
+
+    return name_to_timestamps
diff --git a/src/guppy/preprocess.py b/src/guppy/preprocess.py
index 19626dd..1715cfc 100755
--- a/src/guppy/preprocess.py
+++ b/src/guppy/preprocess.py
@@ -25,6 +25,7 @@
     create_control_channel,
     decide_naming_convention_and_applyCorrection,
     read_control_and_signal,
+    read_ttl,
     timestampCorrection,
     write_corrected_timestamps,
 )
@@ -295,10 +296,31 @@ def execute_timestamp_correction(folderNames, inputParameters):
                 name_to_sampling_rate,
                 name_to_correctionIndex,
             )
-
+            name_1_to_corrected_timestamps = {
+                name.split("_")[-1]: ts for name, ts in corrected_name_to_timestamps.items()
+            }
+            name_1_to_timestamps = {name.split("_")[-1]: ts for name, ts in name_to_timestamps.items()}
+            name_1_to_sampling_rate = {name.split("_")[-1]: sr for name, sr in name_to_sampling_rate.items()}
+            name_1_to_correctionIndex = {name.split("_")[-1]: idx for name, idx in name_to_correctionIndex.items()}
+
+            name_to_timestamps_ttl = read_ttl(filepath, storesList)
             for k in range(storesList.shape[1]):
+                data = name_to_data[storesList[1, k]] if storesList[1, k] in name_to_data else None
+                ttl_timestamps = (
+                    name_to_timestamps_ttl[storesList[1, k]] if storesList[1, k] in name_to_timestamps_ttl else None
+                )
                 decide_naming_convention_and_applyCorrection(
-                    filepath, timeForLightsTurnOn, storesList[0, k], storesList[1, k], storesList
+                    filepath,
+                    timeForLightsTurnOn,
+                    storesList[0, k],
+                    storesList[1, k],
+                    storesList,
+                    name_1_to_corrected_timestamps,
+                    name_1_to_timestamps,
+                    name_1_to_sampling_rate,
+                    name_1_to_correctionIndex,
+                    data,
+                    ttl_timestamps,
                 )
 
             # check if isosbestic control is false and also if new control channel is added

From b6173dd889e892f65f7e2c2f096dd10c88acee17 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Fri, 12 Dec 2025 16:41:46 -0800
Subject: [PATCH 074/150] split up applyCorrection by ttl vs signal_and_control

---
 src/guppy/analysis/timestamp_correction.py | 112 ++++++++++++++++++++-
 src/guppy/preprocess.py                    |  70 ++++++++-----
 2 files changed, 154 insertions(+), 28 deletions(-)

diff --git a/src/guppy/analysis/timestamp_correction.py b/src/guppy/analysis/timestamp_correction.py
index 2da2020..a1088c9 100644
--- a/src/guppy/analysis/timestamp_correction.py
+++ b/src/guppy/analysis/timestamp_correction.py
@@ -137,6 +137,116 @@ def decide_naming_convention_and_applyCorrection(
     logger.info("Timestamps corrections applied to the data and event timestamps.")
 
 
+def decide_naming_and_applyCorrection_signal_and_control(
+    filepath,
+    storesList,
+    name_to_correctionIndex,
+    name_to_data,
+):
+    logger.debug("Applying correction of timestamps to the data and event timestamps")
+    storenames = storesList[0, :]
+    names_for_storenames = storesList[1, :]
+    arr = get_control_and_signal_channel_names(storesList)
+    indices = check_cntrl_sig_length(arr, name_to_data)
+
+    for i in range(arr.shape[1]):
+        name_1 = arr[0, i].split("_")[-1]
+        name_2 = arr[1, i].split("_")[-1]
+        if name_1 != name_2:
+            logger.error("Error in naming convention of files or Error in storesList file")
+            raise Exception("Error in naming convention of files or Error in storesList file")
+
+        idx = np.where(names_for_storenames == indices[i])[0]
+        if idx.shape[0] == 0:
+            logger.error(f"{arr[0,i]} does not exist in the stores list file.")
+            raise Exception("{} does not exist in the stores list file.".format(arr[0, i]))
+
+        name = names_for_storenames[idx][0]
+        correctionIndex = name_to_correctionIndex[name]
+        control_name = arr[0, i]
+        signal_name = arr[1, i]
+        control_data = name_to_data[control_name]
+        signal_data = name_to_data[signal_name]
+        applyCorrection_signal_and_control(filepath, control_name, correctionIndex, control_data)
+        applyCorrection_signal_and_control(filepath, signal_name, correctionIndex, signal_data)
+
+    logger.info("Timestamps corrections applied to the data and event timestamps.")
+
+
+def applyCorrection_signal_and_control(filepath, displayName, correctionIndex, data):
+    arr = data
+    if (arr == 0).all() == True:
+        arr = arr
+    else:
+        arr = arr[correctionIndex]
+    write_hdf5(arr, displayName, filepath, "data")
+
+
+def decide_naming_and_applyCorrection_ttl(
+    filepath,
+    timeForLightsTurnOn,
+    storesList,
+    name_to_timestamps_ttl,
+    name_to_timestamps,
+    name_to_data,
+):
+    logger.debug("Applying correction of timestamps to the data and event timestamps")
+    storenames = storesList[0, :]
+    names_for_storenames = storesList[1, :]
+    arr = get_control_and_signal_channel_names(storesList)
+    indices = check_cntrl_sig_length(arr, name_to_data)
+
+    for ttl_name, ttl_timestamps in name_to_timestamps_ttl.items():
+        displayName = ttl_name
+        for i in range(arr.shape[1]):
+            name_1 = arr[0, i].split("_")[-1]
+            name_2 = arr[1, i].split("_")[-1]
+            if name_1 != name_2:
+                logger.error("Error in naming convention of files or Error in storesList file")
+                raise Exception("Error in naming convention of files or Error in storesList file")
+
+            idx = np.where(names_for_storenames == indices[i])[0]
+            if idx.shape[0] == 0:
+                logger.error(f"{arr[0,i]} does not exist in the stores list file.")
+                raise Exception("{} does not exist in the stores list file.".format(arr[0, i]))
+
+            name = names_for_storenames[idx][0]
+            timestamps = name_to_timestamps[name]
+            timeRecStart = timestamps[0]
+            applyCorrection_ttl(
+                filepath,
+                timeForLightsTurnOn,
+                displayName,
+                name_1,
+                timeRecStart,
+                ttl_timestamps,
+            )
+
+    logger.info("Timestamps corrections applied to the data and event timestamps.")
+
+
+def applyCorrection_ttl(
+    filepath,
+    timeForLightsTurnOn,
+    displayName,
+    naming,
+    timeRecStart,
+    ttl_timestamps,
+):
+    cond = check_TDT(os.path.dirname(filepath))
+    arr = ttl_timestamps
+    if cond == True:
+        res = (arr >= timeRecStart).all()
+        if res == True:
+            arr = np.subtract(arr, timeRecStart)
+            arr = np.subtract(arr, timeForLightsTurnOn)
+        else:
+            arr = np.subtract(arr, timeForLightsTurnOn)
+    else:
+        arr = np.subtract(arr, timeForLightsTurnOn)
+    write_hdf5(arr, displayName + "_" + naming, filepath, "ts")
+
+
 # function to apply correction to control, signal and event timestamps
 def applyCorrection(
     filepath,
@@ -297,7 +407,7 @@ def read_ttl(filepath, storesList):
 
     name_to_timestamps = {}
     for storename, name in zip(storenames, names_for_storenames):
-        if storename in channels_arr:
+        if name in channels_arr:
             continue
         timestamps = read_hdf5(storename, filepath, "timestamps")
         name_to_timestamps[name] = timestamps
diff --git a/src/guppy/preprocess.py b/src/guppy/preprocess.py
index 1715cfc..acea813 100755
--- a/src/guppy/preprocess.py
+++ b/src/guppy/preprocess.py
@@ -23,7 +23,8 @@
 )
 from .analysis.timestamp_correction import (
     create_control_channel,
-    decide_naming_convention_and_applyCorrection,
+    decide_naming_and_applyCorrection_signal_and_control,
+    decide_naming_and_applyCorrection_ttl,
     read_control_and_signal,
     read_ttl,
     timestampCorrection,
@@ -280,7 +281,7 @@ def execute_timestamp_correction(folderNames, inputParameters):
 
             control_and_signal_dicts = read_control_and_signal(filepath, storesList)
             name_to_data, name_to_timestamps, name_to_sampling_rate, name_to_npoints = control_and_signal_dicts
-            corrected_name_to_timestamps, name_to_correctionIndex = timestampCorrection(
+            name_to_corrected_timestamps, name_to_correctionIndex = timestampCorrection(
                 timeForLightsTurnOn,
                 storesList,
                 name_to_timestamps,
@@ -291,37 +292,52 @@ def execute_timestamp_correction(folderNames, inputParameters):
             )
             write_corrected_timestamps(
                 filepath,
-                corrected_name_to_timestamps,
+                name_to_corrected_timestamps,
                 name_to_timestamps,
                 name_to_sampling_rate,
                 name_to_correctionIndex,
             )
-            name_1_to_corrected_timestamps = {
-                name.split("_")[-1]: ts for name, ts in corrected_name_to_timestamps.items()
-            }
-            name_1_to_timestamps = {name.split("_")[-1]: ts for name, ts in name_to_timestamps.items()}
-            name_1_to_sampling_rate = {name.split("_")[-1]: sr for name, sr in name_to_sampling_rate.items()}
-            name_1_to_correctionIndex = {name.split("_")[-1]: idx for name, idx in name_to_correctionIndex.items()}
 
             name_to_timestamps_ttl = read_ttl(filepath, storesList)
-            for k in range(storesList.shape[1]):
-                data = name_to_data[storesList[1, k]] if storesList[1, k] in name_to_data else None
-                ttl_timestamps = (
-                    name_to_timestamps_ttl[storesList[1, k]] if storesList[1, k] in name_to_timestamps_ttl else None
-                )
-                decide_naming_convention_and_applyCorrection(
-                    filepath,
-                    timeForLightsTurnOn,
-                    storesList[0, k],
-                    storesList[1, k],
-                    storesList,
-                    name_1_to_corrected_timestamps,
-                    name_1_to_timestamps,
-                    name_1_to_sampling_rate,
-                    name_1_to_correctionIndex,
-                    data,
-                    ttl_timestamps,
-                )
+            decide_naming_and_applyCorrection_signal_and_control(
+                filepath,
+                storesList,
+                name_to_correctionIndex,
+                name_to_data,
+            )
+            decide_naming_and_applyCorrection_ttl(
+                filepath,
+                timeForLightsTurnOn,
+                storesList,
+                name_to_timestamps_ttl,
+                name_to_timestamps,
+                name_to_data,
+            )
+
+            # name_1_to_corrected_timestamps = {
+            #     name.split("_")[-1]: ts for name, ts in name_to_corrected_timestamps.items()
+            # }
+            # name_1_to_timestamps = {name.split("_")[-1]: ts for name, ts in name_to_timestamps.items()}
+            # name_1_to_sampling_rate = {name.split("_")[-1]: sr for name, sr in name_to_sampling_rate.items()}
+            # name_1_to_correctionIndex = {name.split("_")[-1]: idx for name, idx in name_to_correctionIndex.items()}
+            # for k in range(storesList.shape[1]): # TODO: Refactor nested loops for clarity
+            #     data = name_to_data[storesList[1, k]] if storesList[1, k] in name_to_data else None
+            #     ttl_timestamps = (
+            #         name_to_timestamps_ttl[storesList[1, k]] if storesList[1, k] in name_to_timestamps_ttl else None
+            #     )
+            #     decide_naming_convention_and_applyCorrection(
+            #         filepath,
+            #         timeForLightsTurnOn,
+            #         storesList[0, k],
+            #         storesList[1, k],
+            #         storesList,
+            #         name_1_to_corrected_timestamps,
+            #         name_1_to_timestamps,
+            #         name_1_to_sampling_rate,
+            #         name_1_to_correctionIndex,
+            #         data,
+            #         ttl_timestamps,
+            #     )
 
             # check if isosbestic control is false and also if new control channel is added
             if isosbestic_control == False:

From 4bfc1a7c41ca9ab792b4484f1fa68b5f06b8b23e Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Fri, 12 Dec 2025 16:42:47 -0800
Subject: [PATCH 075/150] Removed commented section.

---
 src/guppy/preprocess.py | 25 -------------------------
 1 file changed, 25 deletions(-)

diff --git a/src/guppy/preprocess.py b/src/guppy/preprocess.py
index acea813..543f565 100755
--- a/src/guppy/preprocess.py
+++ b/src/guppy/preprocess.py
@@ -314,31 +314,6 @@ def execute_timestamp_correction(folderNames, inputParameters):
                 name_to_data,
             )
 
-            # name_1_to_corrected_timestamps = {
-            #     name.split("_")[-1]: ts for name, ts in name_to_corrected_timestamps.items()
-            # }
-            # name_1_to_timestamps = {name.split("_")[-1]: ts for name, ts in name_to_timestamps.items()}
-            # name_1_to_sampling_rate = {name.split("_")[-1]: sr for name, sr in name_to_sampling_rate.items()}
-            # name_1_to_correctionIndex = {name.split("_")[-1]: idx for name, idx in name_to_correctionIndex.items()}
-            # for k in range(storesList.shape[1]): # TODO: Refactor nested loops for clarity
-            #     data = name_to_data[storesList[1, k]] if storesList[1, k] in name_to_data else None
-            #     ttl_timestamps = (
-            #         name_to_timestamps_ttl[storesList[1, k]] if storesList[1, k] in name_to_timestamps_ttl else None
-            #     )
-            #     decide_naming_convention_and_applyCorrection(
-            #         filepath,
-            #         timeForLightsTurnOn,
-            #         storesList[0, k],
-            #         storesList[1, k],
-            #         storesList,
-            #         name_1_to_corrected_timestamps,
-            #         name_1_to_timestamps,
-            #         name_1_to_sampling_rate,
-            #         name_1_to_correctionIndex,
-            #         data,
-            #         ttl_timestamps,
-            #     )
-
             # check if isosbestic control is false and also if new control channel is added
             if isosbestic_control == False:
                 create_control_channel(filepath, storesList, window=101)

From b01a58f525f20a9a0f29c06b01e30c4672fa3f57 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Fri, 12 Dec 2025 16:57:31 -0800
Subject: [PATCH 076/150] Refactored applyCorrection inside timestampCorrection
 for signal and control

---
 src/guppy/analysis/timestamp_correction.py | 25 +++++++++++++++++++++-
 src/guppy/preprocess.py                    |  8 +------
 2 files changed, 25 insertions(+), 8 deletions(-)

diff --git a/src/guppy/analysis/timestamp_correction.py b/src/guppy/analysis/timestamp_correction.py
index a1088c9..3d5c73c 100644
--- a/src/guppy/analysis/timestamp_correction.py
+++ b/src/guppy/analysis/timestamp_correction.py
@@ -32,7 +32,14 @@ def write_corrected_timestamps(
 
 # function to correct timestamps after eliminating first few seconds of the data (for csv or TDT data depending on mode)
 def timestampCorrection(
-    timeForLightsTurnOn, storesList, name_to_timestamps, name_to_data, name_to_sampling_rate, name_to_npoints, mode
+    filepath,
+    timeForLightsTurnOn,
+    storesList,
+    name_to_timestamps,
+    name_to_data,
+    name_to_sampling_rate,
+    name_to_npoints,
+    mode,
 ):
     logger.debug(
         f"Correcting timestamps by getting rid of the first {timeForLightsTurnOn} seconds and convert timestamps to seconds"
@@ -49,6 +56,8 @@ def timestampCorrection(
     indices = check_cntrl_sig_length(arr, name_to_data)
 
     for i in range(arr.shape[1]):
+        control_name = arr[0, i]
+        signal_name = arr[1, i]
         name_1 = arr[0, i].split("_")[-1]
         name_2 = arr[1, i].split("_")[-1]
         if name_1 != name_2:
@@ -85,6 +94,20 @@ def timestampCorrection(
         name_to_timestamps[name] = timestampNew
         name_to_correctionIndex[name] = correctionIndex
 
+        arr = name_to_data[control_name]
+        if (arr == 0).all() == True:
+            arr = arr
+        else:
+            arr = arr[correctionIndex]
+        write_hdf5(arr, control_name, filepath, "data")
+
+        arr = name_to_data[signal_name]
+        if (arr == 0).all() == True:
+            arr = arr
+        else:
+            arr = arr[correctionIndex]
+        write_hdf5(arr, signal_name, filepath, "data")
+
     logger.info("Timestamps corrected and converted to seconds.")
     return name_to_timestamps, name_to_correctionIndex
 
diff --git a/src/guppy/preprocess.py b/src/guppy/preprocess.py
index 543f565..df07c21 100755
--- a/src/guppy/preprocess.py
+++ b/src/guppy/preprocess.py
@@ -23,7 +23,6 @@
 )
 from .analysis.timestamp_correction import (
     create_control_channel,
-    decide_naming_and_applyCorrection_signal_and_control,
     decide_naming_and_applyCorrection_ttl,
     read_control_and_signal,
     read_ttl,
@@ -282,6 +281,7 @@ def execute_timestamp_correction(folderNames, inputParameters):
             control_and_signal_dicts = read_control_and_signal(filepath, storesList)
             name_to_data, name_to_timestamps, name_to_sampling_rate, name_to_npoints = control_and_signal_dicts
             name_to_corrected_timestamps, name_to_correctionIndex = timestampCorrection(
+                filepath,
                 timeForLightsTurnOn,
                 storesList,
                 name_to_timestamps,
@@ -299,12 +299,6 @@ def execute_timestamp_correction(folderNames, inputParameters):
             )
 
             name_to_timestamps_ttl = read_ttl(filepath, storesList)
-            decide_naming_and_applyCorrection_signal_and_control(
-                filepath,
-                storesList,
-                name_to_correctionIndex,
-                name_to_data,
-            )
             decide_naming_and_applyCorrection_ttl(
                 filepath,
                 timeForLightsTurnOn,

From 62cb84f921fbb26c6a7b78e76ca037d68a12bb18 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Fri, 12 Dec 2025 17:19:28 -0800
Subject: [PATCH 077/150] Pulled write operations back out of
 timestamp_correction.

---
 src/guppy/analysis/timestamp_correction.py | 53 ++++++++++------------
 src/guppy/preprocess.py                    |  5 +-
 2 files changed, 28 insertions(+), 30 deletions(-)

diff --git a/src/guppy/analysis/timestamp_correction.py b/src/guppy/analysis/timestamp_correction.py
index 3d5c73c..e8144f3 100644
--- a/src/guppy/analysis/timestamp_correction.py
+++ b/src/guppy/analysis/timestamp_correction.py
@@ -30,9 +30,13 @@ def write_corrected_timestamps(
         write_hdf5(sampling_rate, "timeCorrection_" + name_1, filepath, "sampling_rate")
 
 
+def write_corrected_data(filepath, name_to_corrected_data):
+    for name, data in name_to_corrected_data.items():
+        write_hdf5(data, name, filepath, "data")
+
+
 # function to correct timestamps after eliminating first few seconds of the data (for csv or TDT data depending on mode)
 def timestampCorrection(
-    filepath,
     timeForLightsTurnOn,
     storesList,
     name_to_timestamps,
@@ -47,19 +51,20 @@ def timestampCorrection(
     if mode not in ["tdt", "csv"]:
         logger.error("Mode should be either 'tdt' or 'csv'")
         raise ValueError("Mode should be either 'tdt' or 'csv'")
-    name_to_timestamps = name_to_timestamps.copy()
+    name_to_corrected_timestamps = {}
     name_to_correctionIndex = {}
+    name_to_corrected_data = {}
     storenames = storesList[0, :]
     names_for_storenames = storesList[1, :]
-    arr = get_control_and_signal_channel_names(storesList)
+    data = get_control_and_signal_channel_names(storesList)
 
-    indices = check_cntrl_sig_length(arr, name_to_data)
+    indices = check_cntrl_sig_length(data, name_to_data)
 
-    for i in range(arr.shape[1]):
-        control_name = arr[0, i]
-        signal_name = arr[1, i]
-        name_1 = arr[0, i].split("_")[-1]
-        name_2 = arr[1, i].split("_")[-1]
+    for i in range(data.shape[1]):
+        control_name = data[0, i]
+        signal_name = data[1, i]
+        name_1 = data[0, i].split("_")[-1]
+        name_2 = data[1, i].split("_")[-1]
         if name_1 != name_2:
             logger.error("Error in naming convention of files or Error in storesList file")
             raise Exception("Error in naming convention of files or Error in storesList file")
@@ -68,8 +73,8 @@ def timestampCorrection(
         idx = np.where(names_for_storenames == indices[i])[0]
 
         if idx.shape[0] == 0:
-            logger.error(f"{arr[0,i]} does not exist in the stores list file.")
-            raise Exception("{} does not exist in the stores list file.".format(arr[0, i]))
+            logger.error(f"{data[0,i]} does not exist in the stores list file.")
+            raise Exception("{} does not exist in the stores list file.".format(data[0, i]))
 
         name = names_for_storenames[idx][0]
         timestamp = name_to_timestamps[name]
@@ -91,25 +96,17 @@ def timestampCorrection(
             correctionIndex = np.where(timestamp >= timeForLightsTurnOn)[0]
             timestampNew = timestamp[correctionIndex]
 
-        name_to_timestamps[name] = timestampNew
-        name_to_correctionIndex[name] = correctionIndex
-
-        arr = name_to_data[control_name]
-        if (arr == 0).all() == True:
-            arr = arr
-        else:
-            arr = arr[correctionIndex]
-        write_hdf5(arr, control_name, filepath, "data")
-
-        arr = name_to_data[signal_name]
-        if (arr == 0).all() == True:
-            arr = arr
-        else:
-            arr = arr[correctionIndex]
-        write_hdf5(arr, signal_name, filepath, "data")
+        for displayName in [control_name, signal_name]:
+            name_to_corrected_timestamps[displayName] = timestampNew
+            name_to_correctionIndex[displayName] = correctionIndex
+            data = name_to_data[displayName]
+            if (data == 0).all() == True:
+                name_to_corrected_data[displayName] = data
+            else:
+                name_to_corrected_data[displayName] = data[correctionIndex]
 
     logger.info("Timestamps corrected and converted to seconds.")
-    return name_to_timestamps, name_to_correctionIndex
+    return name_to_corrected_timestamps, name_to_correctionIndex, name_to_corrected_data
 
 
 # function to check if naming convention was followed while saving storeslist file
diff --git a/src/guppy/preprocess.py b/src/guppy/preprocess.py
index df07c21..4653ce3 100755
--- a/src/guppy/preprocess.py
+++ b/src/guppy/preprocess.py
@@ -27,6 +27,7 @@
     read_control_and_signal,
     read_ttl,
     timestampCorrection,
+    write_corrected_data,
     write_corrected_timestamps,
 )
 from .analysis.z_score import compute_z_score
@@ -280,8 +281,7 @@ def execute_timestamp_correction(folderNames, inputParameters):
 
             control_and_signal_dicts = read_control_and_signal(filepath, storesList)
             name_to_data, name_to_timestamps, name_to_sampling_rate, name_to_npoints = control_and_signal_dicts
-            name_to_corrected_timestamps, name_to_correctionIndex = timestampCorrection(
-                filepath,
+            name_to_corrected_timestamps, name_to_correctionIndex, name_to_corrected_data = timestampCorrection(
                 timeForLightsTurnOn,
                 storesList,
                 name_to_timestamps,
@@ -297,6 +297,7 @@ def execute_timestamp_correction(folderNames, inputParameters):
                 name_to_sampling_rate,
                 name_to_correctionIndex,
             )
+            write_corrected_data(filepath, name_to_corrected_data)
 
             name_to_timestamps_ttl = read_ttl(filepath, storesList)
             decide_naming_and_applyCorrection_ttl(

From 36ba6b848362e827a489d9700e6ae41d29f6f974 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Mon, 15 Dec 2025 09:13:33 -0800
Subject: [PATCH 078/150] Pulled write operations out of applyCorrection_ttl.

---
 src/guppy/analysis/timestamp_correction.py | 46 +++++++++++++---------
 src/guppy/preprocess.py                    |  7 ++--
 2 files changed, 31 insertions(+), 22 deletions(-)

diff --git a/src/guppy/analysis/timestamp_correction.py b/src/guppy/analysis/timestamp_correction.py
index e8144f3..d9d873f 100644
--- a/src/guppy/analysis/timestamp_correction.py
+++ b/src/guppy/analysis/timestamp_correction.py
@@ -203,12 +203,12 @@ def applyCorrection_signal_and_control(filepath, displayName, correctionIndex, d
 
 
 def decide_naming_and_applyCorrection_ttl(
-    filepath,
     timeForLightsTurnOn,
     storesList,
     name_to_timestamps_ttl,
     name_to_timestamps,
     name_to_data,
+    mode,
 ):
     logger.debug("Applying correction of timestamps to the data and event timestamps")
     storenames = storesList[0, :]
@@ -216,8 +216,8 @@ def decide_naming_and_applyCorrection_ttl(
     arr = get_control_and_signal_channel_names(storesList)
     indices = check_cntrl_sig_length(arr, name_to_data)
 
+    compound_name_to_corrected_ttl_timestamps = {}
     for ttl_name, ttl_timestamps in name_to_timestamps_ttl.items():
-        displayName = ttl_name
         for i in range(arr.shape[1]):
             name_1 = arr[0, i].split("_")[-1]
             name_2 = arr[1, i].split("_")[-1]
@@ -233,38 +233,46 @@ def decide_naming_and_applyCorrection_ttl(
             name = names_for_storenames[idx][0]
             timestamps = name_to_timestamps[name]
             timeRecStart = timestamps[0]
-            applyCorrection_ttl(
-                filepath,
+            corrected_ttl_timestamps = applyCorrection_ttl(
                 timeForLightsTurnOn,
-                displayName,
-                name_1,
                 timeRecStart,
                 ttl_timestamps,
+                mode,
             )
+            compound_name = ttl_name + "_" + name_1
+            compound_name_to_corrected_ttl_timestamps[compound_name] = corrected_ttl_timestamps
 
     logger.info("Timestamps corrections applied to the data and event timestamps.")
+    return compound_name_to_corrected_ttl_timestamps
 
 
 def applyCorrection_ttl(
-    filepath,
     timeForLightsTurnOn,
-    displayName,
-    naming,
     timeRecStart,
     ttl_timestamps,
+    mode,
 ):
-    cond = check_TDT(os.path.dirname(filepath))
-    arr = ttl_timestamps
-    if cond == True:
-        res = (arr >= timeRecStart).all()
+    corrected_ttl_timestamps = ttl_timestamps
+    if mode == "tdt":
+        res = (corrected_ttl_timestamps >= timeRecStart).all()
         if res == True:
-            arr = np.subtract(arr, timeRecStart)
-            arr = np.subtract(arr, timeForLightsTurnOn)
+            corrected_ttl_timestamps = np.subtract(corrected_ttl_timestamps, timeRecStart)
+            corrected_ttl_timestamps = np.subtract(corrected_ttl_timestamps, timeForLightsTurnOn)
         else:
-            arr = np.subtract(arr, timeForLightsTurnOn)
-    else:
-        arr = np.subtract(arr, timeForLightsTurnOn)
-    write_hdf5(arr, displayName + "_" + naming, filepath, "ts")
+            corrected_ttl_timestamps = np.subtract(corrected_ttl_timestamps, timeForLightsTurnOn)
+    elif mode == "csv":
+        corrected_ttl_timestamps = np.subtract(corrected_ttl_timestamps, timeForLightsTurnOn)
+    return corrected_ttl_timestamps
+
+
+def write_corrected_ttl_timestamps(
+    filepath,
+    compound_name_to_corrected_ttl_timestamps,
+):
+    logger.debug("Applying correction of timestamps to the data and event timestamps")
+    for compound_name, corrected_ttl_timestamps in compound_name_to_corrected_ttl_timestamps.items():
+        write_hdf5(corrected_ttl_timestamps, compound_name, filepath, "ts")
+    logger.info("Timestamps corrections applied to the data and event timestamps.")
 
 
 # function to apply correction to control, signal and event timestamps
diff --git a/src/guppy/preprocess.py b/src/guppy/preprocess.py
index 4653ce3..127e929 100755
--- a/src/guppy/preprocess.py
+++ b/src/guppy/preprocess.py
@@ -29,6 +29,7 @@
     timestampCorrection,
     write_corrected_data,
     write_corrected_timestamps,
+    write_corrected_ttl_timestamps,
 )
 from .analysis.z_score import compute_z_score
 
@@ -300,15 +301,15 @@ def execute_timestamp_correction(folderNames, inputParameters):
             write_corrected_data(filepath, name_to_corrected_data)
 
             name_to_timestamps_ttl = read_ttl(filepath, storesList)
-            decide_naming_and_applyCorrection_ttl(
-                filepath,
+            compound_name_to_corrected_ttl_timestamps = decide_naming_and_applyCorrection_ttl(
                 timeForLightsTurnOn,
                 storesList,
                 name_to_timestamps_ttl,
                 name_to_timestamps,
                 name_to_data,
+                mode=mode,
             )
-
+            write_corrected_ttl_timestamps(filepath, compound_name_to_corrected_ttl_timestamps)
             # check if isosbestic control is false and also if new control channel is added
             if isosbestic_control == False:
                 create_control_channel(filepath, storesList, window=101)

From 05d855ec34dd29adde0d21c1f0685571000adf74 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Mon, 15 Dec 2025 09:34:51 -0800
Subject: [PATCH 079/150] Move add_control_channel and create_control_channel
 to the control_channel module

---
 src/guppy/analysis/control_channel.py      | 81 ++++++++++++++++++++++
 src/guppy/analysis/timestamp_correction.py | 28 --------
 src/guppy/preprocess.py                    | 51 +-------------
 3 files changed, 82 insertions(+), 78 deletions(-)

diff --git a/src/guppy/analysis/control_channel.py b/src/guppy/analysis/control_channel.py
index 2da82e2..d9f6ad8 100644
--- a/src/guppy/analysis/control_channel.py
+++ b/src/guppy/analysis/control_channel.py
@@ -1,12 +1,93 @@
 import logging
+import os
 
 import numpy as np
+import pandas as pd
 from scipy import signal as ss
 from scipy.optimize import curve_fit
 
+from .io_utils import (
+    read_hdf5,
+    write_hdf5,
+)
+
 logger = logging.getLogger(__name__)
 
 
+# This function just creates placeholder Control-HDF5 files that are then immediately overwritten later on in the pipeline.
+# TODO: Refactor this function to avoid unnecessary file creation.
+# function to add control channel when there is no
+# isosbestic control channel and update the storeslist file
+def add_control_channel(filepath, arr):
+
+    storenames = arr[0, :]
+    storesList = np.char.lower(arr[1, :])
+
+    keep_control = np.array([])
+    # check a case if there is isosbestic control channel present
+    for i in range(storesList.shape[0]):
+        if "control" in storesList[i].lower():
+            name = storesList[i].split("_")[-1]
+            new_str = "signal_" + str(name).lower()
+            find_signal = [True for i in storesList if i == new_str]
+            if len(find_signal) > 1:
+                logger.error("Error in naming convention of files or Error in storesList file")
+                raise Exception("Error in naming convention of files or Error in storesList file")
+            if len(find_signal) == 0:
+                logger.error(
+                    "Isosbectic control channel parameter is set to False and still \
+							 	 storeslist file shows there is control channel present"
+                )
+                raise Exception(
+                    "Isosbectic control channel parameter is set to False and still \
+							 	 storeslist file shows there is control channel present"
+                )
+        else:
+            continue
+
+    for i in range(storesList.shape[0]):
+        if "signal" in storesList[i].lower():
+            name = storesList[i].split("_")[-1]
+            new_str = "control_" + str(name).lower()
+            find_signal = [True for i in storesList if i == new_str]
+            if len(find_signal) == 0:
+                src, dst = os.path.join(filepath, arr[0, i] + ".hdf5"), os.path.join(
+                    filepath, "cntrl" + str(i) + ".hdf5"
+                )
+                shutil.copyfile(src, dst)
+                arr = np.concatenate((arr, [["cntrl" + str(i)], ["control_" + str(arr[1, i].split("_")[-1])]]), axis=1)
+
+    np.savetxt(os.path.join(filepath, "storesList.csv"), arr, delimiter=",", fmt="%s")
+
+    return arr
+
+
+# main function to create control channel using
+# signal channel and save it to a file
+def create_control_channel(filepath, arr, window=5001):
+
+    storenames = arr[0, :]
+    storesList = arr[1, :]
+
+    for i in range(storesList.shape[0]):
+        event_name, event = storesList[i], storenames[i]
+        if "control" in event_name.lower() and "cntrl" in event.lower():
+            logger.debug("Creating control channel from signal channel using curve-fitting")
+            name = event_name.split("_")[-1]
+            signal = read_hdf5("signal_" + name, filepath, "data")
+            timestampNew = read_hdf5("timeCorrection_" + name, filepath, "timestampNew")
+            sampling_rate = np.full(timestampNew.shape, np.nan)
+            sampling_rate[0] = read_hdf5("timeCorrection_" + name, filepath, "sampling_rate")[0]
+
+            control = helper_create_control_channel(signal, timestampNew, window)
+
+            write_hdf5(control, event_name, filepath, "data")
+            d = {"timestamps": timestampNew, "data": control, "sampling_rate": sampling_rate}
+            df = pd.DataFrame(d)
+            df.to_csv(os.path.join(os.path.dirname(filepath), event.lower() + ".csv"), index=False)
+            logger.info("Control channel from signal channel created using curve-fitting")
+
+
 # TODO: figure out why a control channel is created for both timestamp correction and z-score steps.
 # helper function to create control channel using signal channel
 # by curve fitting signal channel to exponential function
diff --git a/src/guppy/analysis/timestamp_correction.py b/src/guppy/analysis/timestamp_correction.py
index d9d873f..709deca 100644
--- a/src/guppy/analysis/timestamp_correction.py
+++ b/src/guppy/analysis/timestamp_correction.py
@@ -2,9 +2,7 @@
 import os
 
 import numpy as np
-import pandas as pd
 
-from .control_channel import helper_create_control_channel
 from .io_utils import (
     check_TDT,
     read_hdf5,
@@ -320,32 +318,6 @@ def applyCorrection(
         write_hdf5(arr, displayName + "_" + naming, filepath, "ts")
 
 
-# main function to create control channel using
-# signal channel and save it to a file
-def create_control_channel(filepath, arr, window=5001):
-
-    storenames = arr[0, :]
-    storesList = arr[1, :]
-
-    for i in range(storesList.shape[0]):
-        event_name, event = storesList[i], storenames[i]
-        if "control" in event_name.lower() and "cntrl" in event.lower():
-            logger.debug("Creating control channel from signal channel using curve-fitting")
-            name = event_name.split("_")[-1]
-            signal = read_hdf5("signal_" + name, filepath, "data")
-            timestampNew = read_hdf5("timeCorrection_" + name, filepath, "timestampNew")
-            sampling_rate = np.full(timestampNew.shape, np.nan)
-            sampling_rate[0] = read_hdf5("timeCorrection_" + name, filepath, "sampling_rate")[0]
-
-            control = helper_create_control_channel(signal, timestampNew, window)
-
-            write_hdf5(control, event_name, filepath, "data")
-            d = {"timestamps": timestampNew, "data": control, "sampling_rate": sampling_rate}
-            df = pd.DataFrame(d)
-            df.to_csv(os.path.join(os.path.dirname(filepath), event.lower() + ".csv"), index=False)
-            logger.info("Control channel from signal channel created using curve-fitting")
-
-
 # function to check control and signal channel has same length
 # if not, take a smaller length and do pre-processing
 def check_cntrl_sig_length(channels_arr, name_to_data):
diff --git a/src/guppy/preprocess.py b/src/guppy/preprocess.py
index 127e929..9f1f14e 100755
--- a/src/guppy/preprocess.py
+++ b/src/guppy/preprocess.py
@@ -2,7 +2,6 @@
 import json
 import logging
 import os
-import shutil
 import sys
 
 import matplotlib.pyplot as plt
@@ -13,6 +12,7 @@
     processTimestampsForArtifacts,
 )
 from .analysis.combine_data import combineData
+from .analysis.control_channel import add_control_channel, create_control_channel
 from .analysis.io_utils import (
     check_storeslistfile,
     check_TDT,
@@ -22,7 +22,6 @@
     takeOnlyDirs,
 )
 from .analysis.timestamp_correction import (
-    create_control_channel,
     decide_naming_and_applyCorrection_ttl,
     read_control_and_signal,
     read_ttl,
@@ -212,54 +211,6 @@ def visualizeControlAndSignal(filepath, removeArtifacts):
         visualize(filepath, ts, control, signal, cntrl_sig_fit, plot_name, removeArtifacts)
 
 
-# This function just creates placeholder Control-HDF5 files that are then immediately overwritten later on in the pipeline.
-# TODO: Refactor this function to avoid unnecessary file creation.
-# function to add control channel when there is no
-# isosbestic control channel and update the storeslist file
-def add_control_channel(filepath, arr):
-
-    storenames = arr[0, :]
-    storesList = np.char.lower(arr[1, :])
-
-    keep_control = np.array([])
-    # check a case if there is isosbestic control channel present
-    for i in range(storesList.shape[0]):
-        if "control" in storesList[i].lower():
-            name = storesList[i].split("_")[-1]
-            new_str = "signal_" + str(name).lower()
-            find_signal = [True for i in storesList if i == new_str]
-            if len(find_signal) > 1:
-                logger.error("Error in naming convention of files or Error in storesList file")
-                raise Exception("Error in naming convention of files or Error in storesList file")
-            if len(find_signal) == 0:
-                logger.error(
-                    "Isosbectic control channel parameter is set to False and still \
-							 	 storeslist file shows there is control channel present"
-                )
-                raise Exception(
-                    "Isosbectic control channel parameter is set to False and still \
-							 	 storeslist file shows there is control channel present"
-                )
-        else:
-            continue
-
-    for i in range(storesList.shape[0]):
-        if "signal" in storesList[i].lower():
-            name = storesList[i].split("_")[-1]
-            new_str = "control_" + str(name).lower()
-            find_signal = [True for i in storesList if i == new_str]
-            if len(find_signal) == 0:
-                src, dst = os.path.join(filepath, arr[0, i] + ".hdf5"), os.path.join(
-                    filepath, "cntrl" + str(i) + ".hdf5"
-                )
-                shutil.copyfile(src, dst)
-                arr = np.concatenate((arr, [["cntrl" + str(i)], ["control_" + str(arr[1, i].split("_")[-1])]]), axis=1)
-
-    np.savetxt(os.path.join(filepath, "storesList.csv"), arr, delimiter=",", fmt="%s")
-
-    return arr
-
-
 # function to execute timestamps corrections using functions timestampCorrection and decide_naming_convention_and_applyCorrection
 def execute_timestamp_correction(folderNames, inputParameters):
 

From 1f65c14b838096c4625e5895a791fb5d0976a64e Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Mon, 15 Dec 2025 10:57:02 -0800
Subject: [PATCH 080/150] Moved read and write to standard_io.py.

---
 src/guppy/analysis/io_utils.py             |  19 +++
 src/guppy/analysis/timestamp_correction.py | 138 +++++----------------
 src/guppy/preprocess.py                    |  28 ++---
 3 files changed, 66 insertions(+), 119 deletions(-)

diff --git a/src/guppy/analysis/io_utils.py b/src/guppy/analysis/io_utils.py
index 8b10127..c11edba 100644
--- a/src/guppy/analysis/io_utils.py
+++ b/src/guppy/analysis/io_utils.py
@@ -166,3 +166,22 @@ def check_storeslistfile(folderNames):
     storesList = np.unique(storesList, axis=1)
 
     return storesList
+
+
+def get_control_and_signal_channel_names(storesList):
+    storenames = storesList[0, :]
+    names_for_storenames = storesList[1, :]
+
+    channels_arr = []
+    for i in range(names_for_storenames.shape[0]):
+        if "control" in names_for_storenames[i].lower() or "signal" in names_for_storenames[i].lower():
+            channels_arr.append(names_for_storenames[i])
+
+    channels_arr = sorted(channels_arr, key=str.casefold)
+    try:
+        channels_arr = np.asarray(channels_arr).reshape(2, -1)
+    except:
+        logger.error("Error in saving stores list file or spelling mistake for control or signal")
+        raise Exception("Error in saving stores list file or spelling mistake for control or signal")
+
+    return channels_arr
diff --git a/src/guppy/analysis/timestamp_correction.py b/src/guppy/analysis/timestamp_correction.py
index 709deca..f48a255 100644
--- a/src/guppy/analysis/timestamp_correction.py
+++ b/src/guppy/analysis/timestamp_correction.py
@@ -5,32 +5,47 @@
 
 from .io_utils import (
     check_TDT,
-    read_hdf5,
+    get_control_and_signal_channel_names,
     write_hdf5,
 )
 
 logger = logging.getLogger(__name__)
 
 
-def write_corrected_timestamps(
-    filepath, corrected_name_to_timestamps, name_to_timestamps, name_to_sampling_rate, name_to_correctionIndex
+def correct_timestamps(
+    timeForLightsTurnOn,
+    storesList,
+    name_to_timestamps,
+    name_to_data,
+    name_to_sampling_rate,
+    name_to_npoints,
+    name_to_timestamps_ttl,
+    mode,
 ):
-    for name, correctionIndex in name_to_correctionIndex.items():
-        timestamps = name_to_timestamps[name]
-        corrected_timestamps = corrected_name_to_timestamps[name]
-        sampling_rate = name_to_sampling_rate[name]
-        if sampling_rate.shape == ():  # numpy scalar
-            sampling_rate = np.asarray([sampling_rate])
-        name_1 = name.split("_")[-1]
-        write_hdf5(np.asarray([timestamps[0]]), "timeCorrection_" + name_1, filepath, "timeRecStart")
-        write_hdf5(corrected_timestamps, "timeCorrection_" + name_1, filepath, "timestampNew")
-        write_hdf5(correctionIndex, "timeCorrection_" + name_1, filepath, "correctionIndex")
-        write_hdf5(sampling_rate, "timeCorrection_" + name_1, filepath, "sampling_rate")
-
+    name_to_corrected_timestamps, name_to_correctionIndex, name_to_corrected_data = timestampCorrection(
+        timeForLightsTurnOn,
+        storesList,
+        name_to_timestamps,
+        name_to_data,
+        name_to_sampling_rate,
+        name_to_npoints,
+        mode=mode,
+    )
+    compound_name_to_corrected_ttl_timestamps = decide_naming_and_applyCorrection_ttl(
+        timeForLightsTurnOn,
+        storesList,
+        name_to_timestamps_ttl,
+        name_to_timestamps,
+        name_to_data,
+        mode=mode,
+    )
 
-def write_corrected_data(filepath, name_to_corrected_data):
-    for name, data in name_to_corrected_data.items():
-        write_hdf5(data, name, filepath, "data")
+    return (
+        name_to_corrected_timestamps,
+        name_to_correctionIndex,
+        name_to_corrected_data,
+        compound_name_to_corrected_ttl_timestamps,
+    )
 
 
 # function to correct timestamps after eliminating first few seconds of the data (for csv or TDT data depending on mode)
@@ -263,16 +278,6 @@ def applyCorrection_ttl(
     return corrected_ttl_timestamps
 
 
-def write_corrected_ttl_timestamps(
-    filepath,
-    compound_name_to_corrected_ttl_timestamps,
-):
-    logger.debug("Applying correction of timestamps to the data and event timestamps")
-    for compound_name, corrected_ttl_timestamps in compound_name_to_corrected_ttl_timestamps.items():
-        write_hdf5(corrected_ttl_timestamps, compound_name, filepath, "ts")
-    logger.info("Timestamps corrections applied to the data and event timestamps.")
-
-
 # function to apply correction to control, signal and event timestamps
 def applyCorrection(
     filepath,
@@ -336,80 +341,3 @@ def check_cntrl_sig_length(channels_arr, name_to_data):
             indices.append(signal_name)
 
     return indices
-
-
-def get_control_and_signal_channel_names(storesList):
-    storenames = storesList[0, :]
-    names_for_storenames = storesList[1, :]
-
-    channels_arr = []
-    for i in range(names_for_storenames.shape[0]):
-        if "control" in names_for_storenames[i].lower() or "signal" in names_for_storenames[i].lower():
-            channels_arr.append(names_for_storenames[i])
-
-    channels_arr = sorted(channels_arr, key=str.casefold)
-    try:
-        channels_arr = np.asarray(channels_arr).reshape(2, -1)
-    except:
-        logger.error("Error in saving stores list file or spelling mistake for control or signal")
-        raise Exception("Error in saving stores list file or spelling mistake for control or signal")
-
-    return channels_arr
-
-
-def read_control_and_signal(filepath, storesList):
-    channels_arr = get_control_and_signal_channel_names(storesList)
-    storenames = storesList[0, :]
-    names_for_storenames = storesList[1, :]
-
-    name_to_data = {}
-    name_to_timestamps = {}
-    name_to_sampling_rate = {}
-    name_to_npoints = {}
-
-    for i in range(channels_arr.shape[1]):
-        control_name = channels_arr[0, i]
-        signal_name = channels_arr[1, i]
-        idx_c = np.where(storesList == control_name)[0]
-        idx_s = np.where(storesList == signal_name)[0]
-        control_storename = storenames[idx_c[0]]
-        signal_storename = storenames[idx_s[0]]
-
-        control_data = read_hdf5(control_storename, filepath, "data")
-        signal_data = read_hdf5(signal_storename, filepath, "data")
-        control_timestamps = read_hdf5(control_storename, filepath, "timestamps")
-        signal_timestamps = read_hdf5(signal_storename, filepath, "timestamps")
-        control_sampling_rate = read_hdf5(control_storename, filepath, "sampling_rate")
-        signal_sampling_rate = read_hdf5(signal_storename, filepath, "sampling_rate")
-        try:  # TODO: define npoints for csv datasets
-            control_npoints = read_hdf5(control_storename, filepath, "npoints")
-            signal_npoints = read_hdf5(signal_storename, filepath, "npoints")
-        except KeyError:  # npoints is not defined for csv datasets
-            control_npoints = None
-            signal_npoints = None
-
-        name_to_data[control_name] = control_data
-        name_to_data[signal_name] = signal_data
-        name_to_timestamps[control_name] = control_timestamps
-        name_to_timestamps[signal_name] = signal_timestamps
-        name_to_sampling_rate[control_name] = control_sampling_rate
-        name_to_sampling_rate[signal_name] = signal_sampling_rate
-        name_to_npoints[control_name] = control_npoints
-        name_to_npoints[signal_name] = signal_npoints
-
-    return name_to_data, name_to_timestamps, name_to_sampling_rate, name_to_npoints
-
-
-def read_ttl(filepath, storesList):
-    channels_arr = get_control_and_signal_channel_names(storesList)
-    storenames = storesList[0, :]
-    names_for_storenames = storesList[1, :]
-
-    name_to_timestamps = {}
-    for storename, name in zip(storenames, names_for_storenames):
-        if name in channels_arr:
-            continue
-        timestamps = read_hdf5(storename, filepath, "timestamps")
-        name_to_timestamps[name] = timestamps
-
-    return name_to_timestamps
diff --git a/src/guppy/preprocess.py b/src/guppy/preprocess.py
index 9f1f14e..aa0c761 100755
--- a/src/guppy/preprocess.py
+++ b/src/guppy/preprocess.py
@@ -21,15 +21,14 @@
     read_hdf5,
     takeOnlyDirs,
 )
-from .analysis.timestamp_correction import (
-    decide_naming_and_applyCorrection_ttl,
+from .analysis.standard_io import (
     read_control_and_signal,
     read_ttl,
-    timestampCorrection,
     write_corrected_data,
     write_corrected_timestamps,
     write_corrected_ttl_timestamps,
 )
+from .analysis.timestamp_correction import correct_timestamps
 from .analysis.z_score import compute_z_score
 
 logger = logging.getLogger(__name__)
@@ -233,15 +232,25 @@ def execute_timestamp_correction(folderNames, inputParameters):
 
             control_and_signal_dicts = read_control_and_signal(filepath, storesList)
             name_to_data, name_to_timestamps, name_to_sampling_rate, name_to_npoints = control_and_signal_dicts
-            name_to_corrected_timestamps, name_to_correctionIndex, name_to_corrected_data = timestampCorrection(
+            name_to_timestamps_ttl = read_ttl(filepath, storesList)
+
+            timestamps_dicts = correct_timestamps(
                 timeForLightsTurnOn,
                 storesList,
                 name_to_timestamps,
                 name_to_data,
                 name_to_sampling_rate,
                 name_to_npoints,
+                name_to_timestamps_ttl,
                 mode=mode,
             )
+            (
+                name_to_corrected_timestamps,
+                name_to_correctionIndex,
+                name_to_corrected_data,
+                compound_name_to_corrected_ttl_timestamps,
+            ) = timestamps_dicts
+
             write_corrected_timestamps(
                 filepath,
                 name_to_corrected_timestamps,
@@ -250,17 +259,8 @@ def execute_timestamp_correction(folderNames, inputParameters):
                 name_to_correctionIndex,
             )
             write_corrected_data(filepath, name_to_corrected_data)
-
-            name_to_timestamps_ttl = read_ttl(filepath, storesList)
-            compound_name_to_corrected_ttl_timestamps = decide_naming_and_applyCorrection_ttl(
-                timeForLightsTurnOn,
-                storesList,
-                name_to_timestamps_ttl,
-                name_to_timestamps,
-                name_to_data,
-                mode=mode,
-            )
             write_corrected_ttl_timestamps(filepath, compound_name_to_corrected_ttl_timestamps)
+
             # check if isosbestic control is false and also if new control channel is added
             if isosbestic_control == False:
                 create_control_channel(filepath, storesList, window=101)

From b628232b16de5a59260e8caa09b75a3504a56c40 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Mon, 15 Dec 2025 10:57:18 -0800
Subject: [PATCH 081/150] Moved read and write to standard_io.py.

---
 src/guppy/analysis/standard_io.py | 100 ++++++++++++++++++++++++++++++
 1 file changed, 100 insertions(+)
 create mode 100644 src/guppy/analysis/standard_io.py

diff --git a/src/guppy/analysis/standard_io.py b/src/guppy/analysis/standard_io.py
new file mode 100644
index 0000000..2ce8189
--- /dev/null
+++ b/src/guppy/analysis/standard_io.py
@@ -0,0 +1,100 @@
+import logging
+
+import numpy as np
+
+from .io_utils import (
+    get_control_and_signal_channel_names,
+    read_hdf5,
+    write_hdf5,
+)
+
+logger = logging.getLogger(__name__)
+
+
+def read_control_and_signal(filepath, storesList):
+    channels_arr = get_control_and_signal_channel_names(storesList)
+    storenames = storesList[0, :]
+    names_for_storenames = storesList[1, :]
+
+    name_to_data = {}
+    name_to_timestamps = {}
+    name_to_sampling_rate = {}
+    name_to_npoints = {}
+
+    for i in range(channels_arr.shape[1]):
+        control_name = channels_arr[0, i]
+        signal_name = channels_arr[1, i]
+        idx_c = np.where(storesList == control_name)[0]
+        idx_s = np.where(storesList == signal_name)[0]
+        control_storename = storenames[idx_c[0]]
+        signal_storename = storenames[idx_s[0]]
+
+        control_data = read_hdf5(control_storename, filepath, "data")
+        signal_data = read_hdf5(signal_storename, filepath, "data")
+        control_timestamps = read_hdf5(control_storename, filepath, "timestamps")
+        signal_timestamps = read_hdf5(signal_storename, filepath, "timestamps")
+        control_sampling_rate = read_hdf5(control_storename, filepath, "sampling_rate")
+        signal_sampling_rate = read_hdf5(signal_storename, filepath, "sampling_rate")
+        try:  # TODO: define npoints for csv datasets
+            control_npoints = read_hdf5(control_storename, filepath, "npoints")
+            signal_npoints = read_hdf5(signal_storename, filepath, "npoints")
+        except KeyError:  # npoints is not defined for csv datasets
+            control_npoints = None
+            signal_npoints = None
+
+        name_to_data[control_name] = control_data
+        name_to_data[signal_name] = signal_data
+        name_to_timestamps[control_name] = control_timestamps
+        name_to_timestamps[signal_name] = signal_timestamps
+        name_to_sampling_rate[control_name] = control_sampling_rate
+        name_to_sampling_rate[signal_name] = signal_sampling_rate
+        name_to_npoints[control_name] = control_npoints
+        name_to_npoints[signal_name] = signal_npoints
+
+    return name_to_data, name_to_timestamps, name_to_sampling_rate, name_to_npoints
+
+
+def read_ttl(filepath, storesList):
+    channels_arr = get_control_and_signal_channel_names(storesList)
+    storenames = storesList[0, :]
+    names_for_storenames = storesList[1, :]
+
+    name_to_timestamps = {}
+    for storename, name in zip(storenames, names_for_storenames):
+        if name in channels_arr:
+            continue
+        timestamps = read_hdf5(storename, filepath, "timestamps")
+        name_to_timestamps[name] = timestamps
+
+    return name_to_timestamps
+
+
+def write_corrected_timestamps(
+    filepath, corrected_name_to_timestamps, name_to_timestamps, name_to_sampling_rate, name_to_correctionIndex
+):
+    for name, correctionIndex in name_to_correctionIndex.items():
+        timestamps = name_to_timestamps[name]
+        corrected_timestamps = corrected_name_to_timestamps[name]
+        sampling_rate = name_to_sampling_rate[name]
+        if sampling_rate.shape == ():  # numpy scalar
+            sampling_rate = np.asarray([sampling_rate])
+        name_1 = name.split("_")[-1]
+        write_hdf5(np.asarray([timestamps[0]]), "timeCorrection_" + name_1, filepath, "timeRecStart")
+        write_hdf5(corrected_timestamps, "timeCorrection_" + name_1, filepath, "timestampNew")
+        write_hdf5(correctionIndex, "timeCorrection_" + name_1, filepath, "correctionIndex")
+        write_hdf5(sampling_rate, "timeCorrection_" + name_1, filepath, "sampling_rate")
+
+
+def write_corrected_data(filepath, name_to_corrected_data):
+    for name, data in name_to_corrected_data.items():
+        write_hdf5(data, name, filepath, "data")
+
+
+def write_corrected_ttl_timestamps(
+    filepath,
+    compound_name_to_corrected_ttl_timestamps,
+):
+    logger.debug("Applying correction of timestamps to the data and event timestamps")
+    for compound_name, corrected_ttl_timestamps in compound_name_to_corrected_ttl_timestamps.items():
+        write_hdf5(corrected_ttl_timestamps, compound_name, filepath, "ts")
+    logger.info("Timestamps corrections applied to the data and event timestamps.")

From 90e838bccde583051ddbf52e5d8902f4c4f01c00 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Mon, 15 Dec 2025 11:06:02 -0800
Subject: [PATCH 082/150] Removed unused functions after the refactor.

---
 src/guppy/analysis/timestamp_correction.py | 145 +--------------------
 1 file changed, 1 insertion(+), 144 deletions(-)

diff --git a/src/guppy/analysis/timestamp_correction.py b/src/guppy/analysis/timestamp_correction.py
index f48a255..60cf76a 100644
--- a/src/guppy/analysis/timestamp_correction.py
+++ b/src/guppy/analysis/timestamp_correction.py
@@ -1,13 +1,8 @@
 import logging
-import os
 
 import numpy as np
 
-from .io_utils import (
-    check_TDT,
-    get_control_and_signal_channel_names,
-    write_hdf5,
-)
+from .io_utils import get_control_and_signal_channel_names
 
 logger = logging.getLogger(__name__)
 
@@ -122,99 +117,6 @@ def timestampCorrection(
     return name_to_corrected_timestamps, name_to_correctionIndex, name_to_corrected_data
 
 
-# function to check if naming convention was followed while saving storeslist file
-# and apply timestamps correction using the function applyCorrection
-def decide_naming_convention_and_applyCorrection(
-    filepath,
-    timeForLightsTurnOn,
-    event,
-    displayName,
-    storesList,
-    name_1_to_corrected_timestamps,
-    name_1_to_timestamps,
-    name_1_to_sampling_rate,
-    name_1_to_correctionIndex,
-    data,
-    ttl_timestamps,
-):
-
-    logger.debug("Applying correction of timestamps to the data and event timestamps")
-    arr = get_control_and_signal_channel_names(storesList)
-
-    for i in range(arr.shape[1]):
-        name_1 = arr[0, i].split("_")[-1]
-        name_2 = arr[1, i].split("_")[-1]
-        if name_1 != name_2:
-            logger.error("Error in naming convention of files or Error in storesList file")
-            raise Exception("Error in naming convention of files or Error in storesList file")
-        else:
-            corrected_timestamps = name_1_to_corrected_timestamps[name_1]
-            timestamps = name_1_to_timestamps[name_1]
-            timeRecStart = timestamps[0]
-            sampling_rate = name_1_to_sampling_rate[name_1]
-            correctionIndex = name_1_to_correctionIndex[name_1]
-            applyCorrection(
-                filepath,
-                timeForLightsTurnOn,
-                event,
-                displayName,
-                name_1,
-                corrected_timestamps,
-                sampling_rate,
-                correctionIndex,
-                timeRecStart,
-                data,
-                ttl_timestamps,
-            )
-
-    logger.info("Timestamps corrections applied to the data and event timestamps.")
-
-
-def decide_naming_and_applyCorrection_signal_and_control(
-    filepath,
-    storesList,
-    name_to_correctionIndex,
-    name_to_data,
-):
-    logger.debug("Applying correction of timestamps to the data and event timestamps")
-    storenames = storesList[0, :]
-    names_for_storenames = storesList[1, :]
-    arr = get_control_and_signal_channel_names(storesList)
-    indices = check_cntrl_sig_length(arr, name_to_data)
-
-    for i in range(arr.shape[1]):
-        name_1 = arr[0, i].split("_")[-1]
-        name_2 = arr[1, i].split("_")[-1]
-        if name_1 != name_2:
-            logger.error("Error in naming convention of files or Error in storesList file")
-            raise Exception("Error in naming convention of files or Error in storesList file")
-
-        idx = np.where(names_for_storenames == indices[i])[0]
-        if idx.shape[0] == 0:
-            logger.error(f"{arr[0,i]} does not exist in the stores list file.")
-            raise Exception("{} does not exist in the stores list file.".format(arr[0, i]))
-
-        name = names_for_storenames[idx][0]
-        correctionIndex = name_to_correctionIndex[name]
-        control_name = arr[0, i]
-        signal_name = arr[1, i]
-        control_data = name_to_data[control_name]
-        signal_data = name_to_data[signal_name]
-        applyCorrection_signal_and_control(filepath, control_name, correctionIndex, control_data)
-        applyCorrection_signal_and_control(filepath, signal_name, correctionIndex, signal_data)
-
-    logger.info("Timestamps corrections applied to the data and event timestamps.")
-
-
-def applyCorrection_signal_and_control(filepath, displayName, correctionIndex, data):
-    arr = data
-    if (arr == 0).all() == True:
-        arr = arr
-    else:
-        arr = arr[correctionIndex]
-    write_hdf5(arr, displayName, filepath, "data")
-
-
 def decide_naming_and_applyCorrection_ttl(
     timeForLightsTurnOn,
     storesList,
@@ -278,51 +180,6 @@ def applyCorrection_ttl(
     return corrected_ttl_timestamps
 
 
-# function to apply correction to control, signal and event timestamps
-def applyCorrection(
-    filepath,
-    timeForLightsTurnOn,
-    event,
-    displayName,
-    naming,
-    corrected_timestamps,
-    sampling_rate,
-    correctionIndex,
-    timeRecStart,
-    data,
-    ttl_timestamps,
-):
-
-    cond = check_TDT(os.path.dirname(filepath))
-
-    timestampNew = corrected_timestamps
-    if "control" in displayName.lower() or "signal" in displayName.lower():
-        # TODO: double-check that this code is not reachable
-        # split_name = displayName.split("_")[-1]
-        # if split_name == naming:
-        #     pass
-        # else:
-        #     correctionIndex = read_hdf5("timeCorrection_" + split_name, filepath, "correctionIndex")
-        arr = data
-        if (arr == 0).all() == True:
-            arr = arr
-        else:
-            arr = arr[correctionIndex]
-        write_hdf5(arr, displayName, filepath, "data")
-    else:
-        arr = ttl_timestamps
-        if cond == True:
-            res = (arr >= timeRecStart).all()
-            if res == True:
-                arr = np.subtract(arr, timeRecStart)
-                arr = np.subtract(arr, timeForLightsTurnOn)
-            else:
-                arr = np.subtract(arr, timeForLightsTurnOn)
-        else:
-            arr = np.subtract(arr, timeForLightsTurnOn)
-        write_hdf5(arr, displayName + "_" + naming, filepath, "ts")
-
-
 # function to check control and signal channel has same length
 # if not, take a smaller length and do pre-processing
 def check_cntrl_sig_length(channels_arr, name_to_data):

From bf57616f1671a0c5a0ca674cceb6c36cbdbc8fe5 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Mon, 15 Dec 2025 11:47:54 -0800
Subject: [PATCH 083/150] Refactored artifact removal separate from z score

---
 src/guppy/preprocess.py | 62 ++++++++++++++++++++++++++++-------------
 1 file changed, 43 insertions(+), 19 deletions(-)

diff --git a/src/guppy/preprocess.py b/src/guppy/preprocess.py
index aa0c761..4f72929 100755
--- a/src/guppy/preprocess.py
+++ b/src/guppy/preprocess.py
@@ -270,15 +270,11 @@ def execute_timestamp_correction(folderNames, inputParameters):
         logger.info(f"Timestamps corrections finished for {filepath}")
 
 
-# function to compute z-score and deltaF/F using functions : compute_z_score and/or processTimestampsForArtifacts
+# function to compute z-score and deltaF/F
 def execute_zscore(folderNames, inputParameters):
 
-    timeForLightsTurnOn = inputParameters["timeForLightsTurnOn"]
-    remove_artifacts = inputParameters["removeArtifacts"]
-    artifactsRemovalMethod = inputParameters["artifactsRemovalMethod"]
     plot_zScore_dff = inputParameters["plot_zScore_dff"]
     combine_data = inputParameters["combine_data"]
-    isosbestic_control = inputParameters["isosbestic_control"]
 
     storesListPath = []
     for i in range(len(folderNames)):
@@ -292,20 +288,9 @@ def execute_zscore(folderNames, inputParameters):
 
     for j in range(len(storesListPath)):
         filepath = storesListPath[j]
-        storesList = np.genfromtxt(os.path.join(filepath, "storesList.csv"), dtype="str", delimiter=",").reshape(2, -1)
 
-        if remove_artifacts == True:
-            logger.debug("Removing Artifacts from the data and correcting timestamps...")
-            compute_z_score(filepath, inputParameters)
-            if artifactsRemovalMethod == "concatenate":
-                processTimestampsForArtifacts(filepath, timeForLightsTurnOn, storesList)
-            else:
-                addingNaNtoChunksWithArtifacts(filepath, storesList)
-            visualizeControlAndSignal(filepath, remove_artifacts)
-            logger.info("Artifacts from the data are removed and timestamps are corrected.")
-        else:
-            compute_z_score(filepath, inputParameters)
-            visualizeControlAndSignal(filepath, remove_artifacts)
+        compute_z_score(filepath, inputParameters)
+        visualizeControlAndSignal(filepath, removeArtifacts=False)
 
         if plot_zScore_dff == "z_score":
             visualize_z_score(filepath)
@@ -319,7 +304,42 @@ def execute_zscore(folderNames, inputParameters):
         inputParameters["step"] += 1
 
     plt.show()
-    logger.info("Signal data and event timestamps are extracted.")
+    logger.info("Z-score computation completed.")
+
+
+# function to remove artifacts from z-score data
+def execute_artifact_removal(folderNames, inputParameters):
+
+    timeForLightsTurnOn = inputParameters["timeForLightsTurnOn"]
+    artifactsRemovalMethod = inputParameters["artifactsRemovalMethod"]
+    combine_data = inputParameters["combine_data"]
+
+    storesListPath = []
+    for i in range(len(folderNames)):
+        if combine_data == True:
+            storesListPath.append([folderNames[i][0]])
+        else:
+            filepath = folderNames[i]
+            storesListPath.append(takeOnlyDirs(glob.glob(os.path.join(filepath, "*_output_*"))))
+
+    storesListPath = np.concatenate(storesListPath)
+
+    for j in range(len(storesListPath)):
+        filepath = storesListPath[j]
+        storesList = np.genfromtxt(os.path.join(filepath, "storesList.csv"), dtype="str", delimiter=",").reshape(2, -1)
+
+        logger.debug("Removing artifacts from the data...")
+        if artifactsRemovalMethod == "concatenate":
+            processTimestampsForArtifacts(filepath, timeForLightsTurnOn, storesList)
+        else:
+            addingNaNtoChunksWithArtifacts(filepath, storesList)
+        visualizeControlAndSignal(filepath, removeArtifacts=True)
+        logger.info("Artifacts removed and timestamps corrected.")
+
+        writeToFile(str(10 + ((inputParameters["step"] + 1) * 10)) + "\n")
+        inputParameters["step"] += 1
+
+    logger.info("Artifact removal completed.")
 
 
 def extractTsAndSignal(inputParameters):
@@ -350,6 +370,8 @@ def extractTsAndSignal(inputParameters):
         writeToFile(str((pbMaxValue + 1) * 10) + "\n" + str(10) + "\n")
         execute_timestamp_correction(folderNames, inputParameters)
         execute_zscore(folderNames, inputParameters)
+        if remove_artifacts == True:
+            execute_artifact_removal(folderNames, inputParameters)
     else:
         pbMaxValue = 1 + len(folderNames)
         writeToFile(str((pbMaxValue) * 10) + "\n" + str(10) + "\n")
@@ -357,6 +379,8 @@ def extractTsAndSignal(inputParameters):
         storesList = check_storeslistfile(folderNames)
         op_folder = combineData(folderNames, inputParameters, storesList)
         execute_zscore(op_folder, inputParameters)
+        if remove_artifacts == True:
+            execute_artifact_removal(op_folder, inputParameters)
 
 
 def main(input_parameters):

From a03d018fb3ee1a5cf5558a8a8afc34f8019d665a Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Mon, 15 Dec 2025 14:27:38 -0800
Subject: [PATCH 084/150] Added artifact removal parameter back to
 execute_zscore.

---
 src/guppy/preprocess.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/guppy/preprocess.py b/src/guppy/preprocess.py
index 4f72929..ad4507e 100755
--- a/src/guppy/preprocess.py
+++ b/src/guppy/preprocess.py
@@ -275,6 +275,7 @@ def execute_zscore(folderNames, inputParameters):
 
     plot_zScore_dff = inputParameters["plot_zScore_dff"]
     combine_data = inputParameters["combine_data"]
+    remove_artifacts = inputParameters["removeArtifacts"]
 
     storesListPath = []
     for i in range(len(folderNames)):
@@ -290,7 +291,8 @@ def execute_zscore(folderNames, inputParameters):
         filepath = storesListPath[j]
 
         compute_z_score(filepath, inputParameters)
-        visualizeControlAndSignal(filepath, removeArtifacts=False)
+        if not remove_artifacts:
+            visualizeControlAndSignal(filepath, removeArtifacts=remove_artifacts)
 
         if plot_zScore_dff == "z_score":
             visualize_z_score(filepath)
@@ -334,11 +336,11 @@ def execute_artifact_removal(folderNames, inputParameters):
         else:
             addingNaNtoChunksWithArtifacts(filepath, storesList)
         visualizeControlAndSignal(filepath, removeArtifacts=True)
-        logger.info("Artifacts removed and timestamps corrected.")
 
         writeToFile(str(10 + ((inputParameters["step"] + 1) * 10)) + "\n")
         inputParameters["step"] += 1
 
+    plt.show()
     logger.info("Artifact removal completed.")
 
 

From e0a4ca80e6b470c6d9d53e2a8c3032e93246e5a9 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Mon, 15 Dec 2025 14:31:13 -0800
Subject: [PATCH 085/150] Removed idle removeArtifacts parameter from compute
 z-score function.

---
 src/guppy/analysis/z_score.py | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/src/guppy/analysis/z_score.py b/src/guppy/analysis/z_score.py
index b5032be..87bf184 100644
--- a/src/guppy/analysis/z_score.py
+++ b/src/guppy/analysis/z_score.py
@@ -19,7 +19,6 @@
 def compute_z_score(filepath, inputParameters):
 
     logger.debug(f"Computing z-score for each of the data in {filepath}")
-    remove_artifacts = inputParameters["removeArtifacts"]
 
     path_1 = find_files(filepath, "control_*", ignore_case=True)  # glob.glob(os.path.join(filepath, 'control*'))
     path_2 = find_files(filepath, "signal_*", ignore_case=True)  # glob.glob(os.path.join(filepath, 'signal*'))
@@ -48,14 +47,9 @@ def compute_z_score(filepath, inputParameters):
             # signal_smooth = ss.filtfilt(b, a, signal)
             # _score, dff = helper_z_score(control_smooth, signal_smooth)
             z_score, dff, control_fit = helper_z_score(control, signal, filepath, name, inputParameters)
-            if remove_artifacts == True:
-                write_hdf5(z_score, "z_score_" + name, filepath, "data")
-                write_hdf5(dff, "dff_" + name, filepath, "data")
-                write_hdf5(control_fit, "cntrl_sig_fit_" + name, filepath, "data")
-            else:
-                write_hdf5(z_score, "z_score_" + name, filepath, "data")
-                write_hdf5(dff, "dff_" + name, filepath, "data")
-                write_hdf5(control_fit, "cntrl_sig_fit_" + name, filepath, "data")
+            write_hdf5(z_score, "z_score_" + name, filepath, "data")
+            write_hdf5(dff, "dff_" + name, filepath, "data")
+            write_hdf5(control_fit, "cntrl_sig_fit_" + name, filepath, "data")
         else:
             logger.error("Error in naming convention of files or Error in storesList file")
             raise Exception("Error in naming convention of files or Error in storesList file")

From 44292ae41c2e7cc7ff2a94c93040da30ddba739d Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Mon, 15 Dec 2025 15:55:25 -0800
Subject: [PATCH 086/150] Streamlined remove artifact branch of the
 helper_z_score function.

---
 src/guppy/analysis/z_score.py | 62 +++++++++++++++--------------------
 1 file changed, 27 insertions(+), 35 deletions(-)

diff --git a/src/guppy/analysis/z_score.py b/src/guppy/analysis/z_score.py
index 87bf184..5f64d7f 100644
--- a/src/guppy/analysis/z_score.py
+++ b/src/guppy/analysis/z_score.py
@@ -80,42 +80,34 @@ def helper_z_score(control, signal, filepath, name, inputParameters):  # helper_
 
     if removeArtifacts == True:
         coords = fetchCoords(filepath, name, tsNew)
-
-        # for artifacts removal, each chunk which was selected by user is being processed individually and then
-        # z-score is calculated
-        for i in range(coords.shape[0]):
-            tsNew_index = np.where((tsNew > coords[i, 0]) & (tsNew < coords[i, 1]))[0]
-            if isosbestic_control == False:
-                control_arr = helper_create_control_channel(signal[tsNew_index], tsNew[tsNew_index], window=101)
-                signal_arr = signal[tsNew_index]
-                norm_data, control_fit = execute_controlFit_dff(
-                    control_arr, signal_arr, isosbestic_control, filter_window
-                )
-                temp_control_arr[tsNew_index] = control_arr
-                if i < coords.shape[0] - 1:
-                    blank_index = np.where((tsNew > coords[i, 1]) & (tsNew < coords[i + 1, 0]))[0]
-                    temp_control_arr[blank_index] = np.full(blank_index.shape[0], np.nan)
-            else:
-                control_arr = control[tsNew_index]
-                signal_arr = signal[tsNew_index]
-                norm_data, control_fit = execute_controlFit_dff(
-                    control_arr, signal_arr, isosbestic_control, filter_window
-                )
-            norm_data_arr[tsNew_index] = norm_data
-            control_fit_arr[tsNew_index] = control_fit
-
-        if artifactsRemovalMethod == "concatenate":
-            norm_data_arr = norm_data_arr[~np.isnan(norm_data_arr)]
-            control_fit_arr = control_fit_arr[~np.isnan(control_fit_arr)]
-        z_score = z_score_computation(norm_data_arr, tsNew, inputParameters)
-        z_score_arr = np.concatenate((z_score_arr, z_score))
     else:
-        tsNew_index = np.arange(tsNew.shape[0])
-        norm_data, control_fit = execute_controlFit_dff(control, signal, isosbestic_control, filter_window)
-        z_score = z_score_computation(norm_data, tsNew, inputParameters)
-        z_score_arr = np.concatenate((z_score_arr, z_score))
-        norm_data_arr[tsNew_index] = norm_data  # np.concatenate((norm_data_arr, norm_data))
-        control_fit_arr[tsNew_index] = control_fit  # np.concatenate((control_fit_arr, control_fit))
+        dt = tsNew[1] - tsNew[0]
+        coords = np.array([[tsNew[0] - dt, tsNew[-1] + dt]])
+
+    # for artifacts removal, each chunk which was selected by user is being processed individually and then
+    # z-score is calculated
+    for i in range(coords.shape[0]):
+        tsNew_index = np.where((tsNew > coords[i, 0]) & (tsNew < coords[i, 1]))[0]
+        if isosbestic_control == False:
+            control_arr = helper_create_control_channel(signal[tsNew_index], tsNew[tsNew_index], window=101)
+            signal_arr = signal[tsNew_index]
+            norm_data, control_fit = execute_controlFit_dff(control_arr, signal_arr, isosbestic_control, filter_window)
+            temp_control_arr[tsNew_index] = control_arr
+            if i < coords.shape[0] - 1:
+                blank_index = np.where((tsNew > coords[i, 1]) & (tsNew < coords[i + 1, 0]))[0]
+                temp_control_arr[blank_index] = np.full(blank_index.shape[0], np.nan)
+        else:
+            control_arr = control[tsNew_index]
+            signal_arr = signal[tsNew_index]
+            norm_data, control_fit = execute_controlFit_dff(control_arr, signal_arr, isosbestic_control, filter_window)
+        norm_data_arr[tsNew_index] = norm_data
+        control_fit_arr[tsNew_index] = control_fit
+
+    if artifactsRemovalMethod == "concatenate":
+        norm_data_arr = norm_data_arr[~np.isnan(norm_data_arr)]
+        control_fit_arr = control_fit_arr[~np.isnan(control_fit_arr)]
+    z_score = z_score_computation(norm_data_arr, tsNew, inputParameters)
+    z_score_arr = np.concatenate((z_score_arr, z_score))
 
     # handle the case if there are chunks being cut in the front and the end
     if isosbestic_control == False and removeArtifacts == True:

From 6da97c08ec9da0448b9a7ace28f31ebea463b62b Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Mon, 15 Dec 2025 16:02:49 -0800
Subject: [PATCH 087/150] Streamlined remove artifact branch of the
 helper_z_score function pt 2

---
 src/guppy/analysis/control_channel.py | 1 +
 src/guppy/analysis/z_score.py         | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/guppy/analysis/control_channel.py b/src/guppy/analysis/control_channel.py
index d9f6ad8..605bd17 100644
--- a/src/guppy/analysis/control_channel.py
+++ b/src/guppy/analysis/control_channel.py
@@ -1,5 +1,6 @@
 import logging
 import os
+import shutil
 
 import numpy as np
 import pandas as pd
diff --git a/src/guppy/analysis/z_score.py b/src/guppy/analysis/z_score.py
index 5f64d7f..9472322 100644
--- a/src/guppy/analysis/z_score.py
+++ b/src/guppy/analysis/z_score.py
@@ -110,7 +110,7 @@ def helper_z_score(control, signal, filepath, name, inputParameters):  # helper_
     z_score_arr = np.concatenate((z_score_arr, z_score))
 
     # handle the case if there are chunks being cut in the front and the end
-    if isosbestic_control == False and removeArtifacts == True:
+    if isosbestic_control == False:
         coords = coords.flatten()
         # front chunk
         idx = np.where((tsNew >= tsNew[0]) & (tsNew < coords[0]))[0]

From d8bfcc0d8ba9c1e06b9c484613dd6e4c7fec3d05 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Mon, 15 Dec 2025 16:14:14 -0800
Subject: [PATCH 088/150] Pulled remove_artifact code out of helper_z_score

---
 src/guppy/analysis/z_score.py | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/src/guppy/analysis/z_score.py b/src/guppy/analysis/z_score.py
index 9472322..60bb88a 100644
--- a/src/guppy/analysis/z_score.py
+++ b/src/guppy/analysis/z_score.py
@@ -46,7 +46,14 @@ def compute_z_score(filepath, inputParameters):
             # control_smooth = ss.filtfilt(b, a, control)
             # signal_smooth = ss.filtfilt(b, a, signal)
             # _score, dff = helper_z_score(control_smooth, signal_smooth)
-            z_score, dff, control_fit = helper_z_score(control, signal, filepath, name, inputParameters)
+            tsNew = read_hdf5("timeCorrection_" + name, filepath, "timestampNew")
+            removeArtifacts = inputParameters["removeArtifacts"]
+            if removeArtifacts == True:
+                coords = fetchCoords(filepath, name, tsNew)
+            else:
+                dt = tsNew[1] - tsNew[0]
+                coords = np.array([[tsNew[0] - dt, tsNew[-1] + dt]])
+            z_score, dff, control_fit = helper_z_score(control, signal, filepath, name, inputParameters, coords)
             write_hdf5(z_score, "z_score_" + name, filepath, "data")
             write_hdf5(dff, "dff_" + name, filepath, "data")
             write_hdf5(control_fit, "cntrl_sig_fit_" + name, filepath, "data")
@@ -58,9 +65,10 @@ def compute_z_score(filepath, inputParameters):
 
 
 # helper function to compute z-score and deltaF/F
-def helper_z_score(control, signal, filepath, name, inputParameters):  # helper_z_score(control_smooth, signal_smooth):
+def helper_z_score(
+    control, signal, filepath, name, inputParameters, coords
+):  # helper_z_score(control_smooth, signal_smooth):
 
-    removeArtifacts = inputParameters["removeArtifacts"]
     artifactsRemovalMethod = inputParameters["artifactsRemovalMethod"]
     filter_window = inputParameters["filter_window"]
 
@@ -68,8 +76,6 @@ def helper_z_score(control, signal, filepath, name, inputParameters):  # helper_
     tsNew = read_hdf5("timeCorrection_" + name, filepath, "timestampNew")
     coords_path = os.path.join(filepath, "coordsForPreProcessing_" + name + ".npy")
 
-    logger.info("Remove Artifacts : ", removeArtifacts)
-
     if (control == 0).all() == True:
         control = np.zeros(tsNew.shape[0])
 
@@ -78,12 +84,6 @@ def helper_z_score(control, signal, filepath, name, inputParameters):  # helper_
     control_fit_arr = np.full(tsNew.shape[0], np.nan)
     temp_control_arr = np.full(tsNew.shape[0], np.nan)
 
-    if removeArtifacts == True:
-        coords = fetchCoords(filepath, name, tsNew)
-    else:
-        dt = tsNew[1] - tsNew[0]
-        coords = np.array([[tsNew[0] - dt, tsNew[-1] + dt]])
-
     # for artifacts removal, each chunk which was selected by user is being processed individually and then
     # z-score is calculated
     for i in range(coords.shape[0]):

From b33c522ed317376f771794f166003d98bc815f4c Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Mon, 15 Dec 2025 16:28:20 -0800
Subject: [PATCH 089/150] Pulled remove_artifact code into dedicated fn

---
 src/guppy/analysis/z_score.py | 33 +++++++++++++++------------------
 1 file changed, 15 insertions(+), 18 deletions(-)

diff --git a/src/guppy/analysis/z_score.py b/src/guppy/analysis/z_score.py
index 60bb88a..7537f9d 100644
--- a/src/guppy/analysis/z_score.py
+++ b/src/guppy/analysis/z_score.py
@@ -25,35 +25,26 @@ def compute_z_score(filepath, inputParameters):
 
     path = sorted(path_1 + path_2, key=str.casefold)
 
-    b = np.divide(np.ones((100,)), 100)
-    a = 1
-
     if len(path) % 2 != 0:
         logger.error("There are not equal number of Control and Signal data")
         raise Exception("There are not equal number of Control and Signal data")
 
     path = np.asarray(path).reshape(2, -1)
+    removeArtifacts = inputParameters["removeArtifacts"]
 
     for i in range(path.shape[1]):
         name_1 = ((os.path.basename(path[0, i])).split(".")[0]).split("_")
         name_2 = ((os.path.basename(path[1, i])).split(".")[0]).split("_")
-        # dirname = os.path.dirname(path[i])
 
         if name_1[-1] == name_2[-1]:
             name = name_1[-1]
             control = read_hdf5("", path[0, i], "data").reshape(-1)
             signal = read_hdf5("", path[1, i], "data").reshape(-1)
-            # control_smooth = ss.filtfilt(b, a, control)
-            # signal_smooth = ss.filtfilt(b, a, signal)
-            # _score, dff = helper_z_score(control_smooth, signal_smooth)
             tsNew = read_hdf5("timeCorrection_" + name, filepath, "timestampNew")
-            removeArtifacts = inputParameters["removeArtifacts"]
-            if removeArtifacts == True:
-                coords = fetchCoords(filepath, name, tsNew)
-            else:
-                dt = tsNew[1] - tsNew[0]
-                coords = np.array([[tsNew[0] - dt, tsNew[-1] + dt]])
-            z_score, dff, control_fit = helper_z_score(control, signal, filepath, name, inputParameters, coords)
+
+            coords = get_coords(filepath, name, tsNew, removeArtifacts)
+            z_score, dff, control_fit = helper_z_score(control, signal, tsNew, filepath, name, inputParameters, coords)
+
             write_hdf5(z_score, "z_score_" + name, filepath, "data")
             write_hdf5(dff, "dff_" + name, filepath, "data")
             write_hdf5(control_fit, "cntrl_sig_fit_" + name, filepath, "data")
@@ -64,17 +55,23 @@ def compute_z_score(filepath, inputParameters):
     logger.info(f"z-score for the data in {filepath} computed.")
 
 
+def get_coords(filepath, name, tsNew, removeArtifacts):  # TODO: Make less redundant with fetchCoords
+    if removeArtifacts == True:
+        coords = fetchCoords(filepath, name, tsNew)
+    else:
+        dt = tsNew[1] - tsNew[0]
+        coords = np.array([[tsNew[0] - dt, tsNew[-1] + dt]])
+    return coords
+
+
 # helper function to compute z-score and deltaF/F
 def helper_z_score(
-    control, signal, filepath, name, inputParameters, coords
+    control, signal, tsNew, filepath, name, inputParameters, coords
 ):  # helper_z_score(control_smooth, signal_smooth):
 
     artifactsRemovalMethod = inputParameters["artifactsRemovalMethod"]
     filter_window = inputParameters["filter_window"]
-
     isosbestic_control = inputParameters["isosbestic_control"]
-    tsNew = read_hdf5("timeCorrection_" + name, filepath, "timestampNew")
-    coords_path = os.path.join(filepath, "coordsForPreProcessing_" + name + ".npy")
 
     if (control == 0).all() == True:
         control = np.zeros(tsNew.shape[0])

From e87c80963224de6e298fab3c50514598cf6a0009 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Mon, 15 Dec 2025 16:44:41 -0800
Subject: [PATCH 090/150] Pulled write code out of helper_z_score

---
 src/guppy/analysis/z_score.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/src/guppy/analysis/z_score.py b/src/guppy/analysis/z_score.py
index 7537f9d..0dd4171 100644
--- a/src/guppy/analysis/z_score.py
+++ b/src/guppy/analysis/z_score.py
@@ -43,11 +43,15 @@ def compute_z_score(filepath, inputParameters):
             tsNew = read_hdf5("timeCorrection_" + name, filepath, "timestampNew")
 
             coords = get_coords(filepath, name, tsNew, removeArtifacts)
-            z_score, dff, control_fit = helper_z_score(control, signal, tsNew, filepath, name, inputParameters, coords)
+            z_score, dff, control_fit, temp_control_arr = helper_z_score(
+                control, signal, tsNew, filepath, name, inputParameters, coords
+            )
 
             write_hdf5(z_score, "z_score_" + name, filepath, "data")
             write_hdf5(dff, "dff_" + name, filepath, "data")
             write_hdf5(control_fit, "cntrl_sig_fit_" + name, filepath, "data")
+            if temp_control_arr is not None:
+                write_hdf5(temp_control_arr, "control_" + name, filepath, "data")
         else:
             logger.error("Error in naming convention of files or Error in storesList file")
             raise Exception("Error in naming convention of files or Error in storesList file")
@@ -115,9 +119,10 @@ def helper_z_score(
         # end chunk
         idx = np.where((tsNew > coords[-1]) & (tsNew <= tsNew[-1]))[0]
         temp_control_arr[idx] = np.full(idx.shape[0], np.nan)
-        write_hdf5(temp_control_arr, "control_" + name, filepath, "data")
+    else:
+        temp_control_arr = None
 
-    return z_score_arr, norm_data_arr, control_fit_arr
+    return z_score_arr, norm_data_arr, control_fit_arr, temp_control_arr
 
 
 # function to filter control and signal channel, also execute above two function : controlFit and deltaFF

From cf7345888e6c42e330263ca596271348b36d57a7 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Mon, 15 Dec 2025 16:47:26 -0800
Subject: [PATCH 091/150] inverted input handling

---
 src/guppy/analysis/z_score.py | 35 +++++++++++++++++------------------
 1 file changed, 17 insertions(+), 18 deletions(-)

diff --git a/src/guppy/analysis/z_score.py b/src/guppy/analysis/z_score.py
index 0dd4171..8fc598b 100644
--- a/src/guppy/analysis/z_score.py
+++ b/src/guppy/analysis/z_score.py
@@ -35,27 +35,26 @@ def compute_z_score(filepath, inputParameters):
     for i in range(path.shape[1]):
         name_1 = ((os.path.basename(path[0, i])).split(".")[0]).split("_")
         name_2 = ((os.path.basename(path[1, i])).split(".")[0]).split("_")
-
-        if name_1[-1] == name_2[-1]:
-            name = name_1[-1]
-            control = read_hdf5("", path[0, i], "data").reshape(-1)
-            signal = read_hdf5("", path[1, i], "data").reshape(-1)
-            tsNew = read_hdf5("timeCorrection_" + name, filepath, "timestampNew")
-
-            coords = get_coords(filepath, name, tsNew, removeArtifacts)
-            z_score, dff, control_fit, temp_control_arr = helper_z_score(
-                control, signal, tsNew, filepath, name, inputParameters, coords
-            )
-
-            write_hdf5(z_score, "z_score_" + name, filepath, "data")
-            write_hdf5(dff, "dff_" + name, filepath, "data")
-            write_hdf5(control_fit, "cntrl_sig_fit_" + name, filepath, "data")
-            if temp_control_arr is not None:
-                write_hdf5(temp_control_arr, "control_" + name, filepath, "data")
-        else:
+        if name_1[-1] != name_2[-1]:
             logger.error("Error in naming convention of files or Error in storesList file")
             raise Exception("Error in naming convention of files or Error in storesList file")
 
+        name = name_1[-1]
+        control = read_hdf5("", path[0, i], "data").reshape(-1)
+        signal = read_hdf5("", path[1, i], "data").reshape(-1)
+        tsNew = read_hdf5("timeCorrection_" + name, filepath, "timestampNew")
+
+        coords = get_coords(filepath, name, tsNew, removeArtifacts)
+        z_score, dff, control_fit, temp_control_arr = helper_z_score(
+            control, signal, tsNew, filepath, name, inputParameters, coords
+        )
+
+        write_hdf5(z_score, "z_score_" + name, filepath, "data")
+        write_hdf5(dff, "dff_" + name, filepath, "data")
+        write_hdf5(control_fit, "cntrl_sig_fit_" + name, filepath, "data")
+        if temp_control_arr is not None:
+            write_hdf5(temp_control_arr, "control_" + name, filepath, "data")
+
     logger.info(f"z-score for the data in {filepath} computed.")
 
 

From 7304fae988fdf569532f6918acabf0b6b902b08e Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Mon, 15 Dec 2025 16:50:34 -0800
Subject: [PATCH 092/150] removed unnecessary parameters

---
 src/guppy/analysis/z_score.py | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/src/guppy/analysis/z_score.py b/src/guppy/analysis/z_score.py
index 8fc598b..1afe9e5 100644
--- a/src/guppy/analysis/z_score.py
+++ b/src/guppy/analysis/z_score.py
@@ -45,9 +45,7 @@ def compute_z_score(filepath, inputParameters):
         tsNew = read_hdf5("timeCorrection_" + name, filepath, "timestampNew")
 
         coords = get_coords(filepath, name, tsNew, removeArtifacts)
-        z_score, dff, control_fit, temp_control_arr = helper_z_score(
-            control, signal, tsNew, filepath, name, inputParameters, coords
-        )
+        z_score, dff, control_fit, temp_control_arr = helper_z_score(control, signal, tsNew, inputParameters, coords)
 
         write_hdf5(z_score, "z_score_" + name, filepath, "data")
         write_hdf5(dff, "dff_" + name, filepath, "data")
@@ -68,9 +66,7 @@ def get_coords(filepath, name, tsNew, removeArtifacts):  # TODO: Make less redun
 
 
 # helper function to compute z-score and deltaF/F
-def helper_z_score(
-    control, signal, tsNew, filepath, name, inputParameters, coords
-):  # helper_z_score(control_smooth, signal_smooth):
+def helper_z_score(control, signal, tsNew, inputParameters, coords):
 
     artifactsRemovalMethod = inputParameters["artifactsRemovalMethod"]
     filter_window = inputParameters["filter_window"]

From 965f62b4edc3455c6414eea6432b6325caa69580 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Tue, 16 Dec 2025 11:40:26 -0800
Subject: [PATCH 093/150] purified helper_z_score

---
 src/guppy/analysis/z_score.py | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/src/guppy/analysis/z_score.py b/src/guppy/analysis/z_score.py
index 1afe9e5..167863a 100644
--- a/src/guppy/analysis/z_score.py
+++ b/src/guppy/analysis/z_score.py
@@ -18,6 +18,10 @@
 # compute z-score and deltaF/F and save it to hdf5 file
 def compute_z_score(filepath, inputParameters):
 
+    artifactsRemovalMethod = inputParameters["artifactsRemovalMethod"]
+    filter_window = inputParameters["filter_window"]
+    isosbestic_control = inputParameters["isosbestic_control"]
+
     logger.debug(f"Computing z-score for each of the data in {filepath}")
 
     path_1 = find_files(filepath, "control_*", ignore_case=True)  # glob.glob(os.path.join(filepath, 'control*'))
@@ -45,7 +49,9 @@ def compute_z_score(filepath, inputParameters):
         tsNew = read_hdf5("timeCorrection_" + name, filepath, "timestampNew")
 
         coords = get_coords(filepath, name, tsNew, removeArtifacts)
-        z_score, dff, control_fit, temp_control_arr = helper_z_score(control, signal, tsNew, inputParameters, coords)
+        z_score, dff, control_fit, temp_control_arr = helper_z_score(
+            control, signal, tsNew, inputParameters, coords, artifactsRemovalMethod, filter_window, isosbestic_control
+        )
 
         write_hdf5(z_score, "z_score_" + name, filepath, "data")
         write_hdf5(dff, "dff_" + name, filepath, "data")
@@ -66,12 +72,9 @@ def get_coords(filepath, name, tsNew, removeArtifacts):  # TODO: Make less redun
 
 
 # helper function to compute z-score and deltaF/F
-def helper_z_score(control, signal, tsNew, inputParameters, coords):
-
-    artifactsRemovalMethod = inputParameters["artifactsRemovalMethod"]
-    filter_window = inputParameters["filter_window"]
-    isosbestic_control = inputParameters["isosbestic_control"]
-
+def helper_z_score(
+    control, signal, tsNew, inputParameters, coords, artifactsRemovalMethod, filter_window, isosbestic_control
+):
     if (control == 0).all() == True:
         control = np.zeros(tsNew.shape[0])
 

From c49d05f32bf2933abdc02bdeac73ed4ad2043607 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Tue, 16 Dec 2025 11:44:38 -0800
Subject: [PATCH 094/150] purified z_score_computation

---
 src/guppy/analysis/z_score.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/src/guppy/analysis/z_score.py b/src/guppy/analysis/z_score.py
index 167863a..7dae540 100644
--- a/src/guppy/analysis/z_score.py
+++ b/src/guppy/analysis/z_score.py
@@ -75,6 +75,8 @@ def get_coords(filepath, name, tsNew, removeArtifacts):  # TODO: Make less redun
 def helper_z_score(
     control, signal, tsNew, inputParameters, coords, artifactsRemovalMethod, filter_window, isosbestic_control
 ):
+    zscore_method = inputParameters["zscore_method"]
+    baseline_start, baseline_end = inputParameters["baselineWindowStart"], inputParameters["baselineWindowEnd"]
     if (control == 0).all() == True:
         control = np.zeros(tsNew.shape[0])
 
@@ -105,7 +107,7 @@ def helper_z_score(
     if artifactsRemovalMethod == "concatenate":
         norm_data_arr = norm_data_arr[~np.isnan(norm_data_arr)]
         control_fit_arr = control_fit_arr[~np.isnan(control_fit_arr)]
-    z_score = z_score_computation(norm_data_arr, tsNew, inputParameters)
+    z_score = z_score_computation(norm_data_arr, tsNew, zscore_method, baseline_start, baseline_end)
     z_score_arr = np.concatenate((z_score_arr, z_score))
 
     # handle the case if there are chunks being cut in the front and the end
@@ -173,11 +175,7 @@ def filterSignal(filter_window, signal):
 
 
 # function to compute z-score based on z-score computation method
-def z_score_computation(dff, timestamps, inputParameters):
-
-    zscore_method = inputParameters["zscore_method"]
-    baseline_start, baseline_end = inputParameters["baselineWindowStart"], inputParameters["baselineWindowEnd"]
-
+def z_score_computation(dff, timestamps, zscore_method, baseline_start, baseline_end):
     if zscore_method == "standard z-score":
         numerator = np.subtract(dff, np.nanmean(dff))
         zscore = np.divide(numerator, np.nanstd(dff))

From a88c026aef77be33f3154caaed65b2d595be11d8 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Tue, 16 Dec 2025 11:46:53 -0800
Subject: [PATCH 095/150] purified helper_z_score

---
 src/guppy/analysis/z_score.py | 26 ++++++++++++++++++++++----
 1 file changed, 22 insertions(+), 4 deletions(-)

diff --git a/src/guppy/analysis/z_score.py b/src/guppy/analysis/z_score.py
index 7dae540..31645b5 100644
--- a/src/guppy/analysis/z_score.py
+++ b/src/guppy/analysis/z_score.py
@@ -21,6 +21,8 @@ def compute_z_score(filepath, inputParameters):
     artifactsRemovalMethod = inputParameters["artifactsRemovalMethod"]
     filter_window = inputParameters["filter_window"]
     isosbestic_control = inputParameters["isosbestic_control"]
+    zscore_method = inputParameters["zscore_method"]
+    baseline_start, baseline_end = inputParameters["baselineWindowStart"], inputParameters["baselineWindowEnd"]
 
     logger.debug(f"Computing z-score for each of the data in {filepath}")
 
@@ -50,7 +52,16 @@ def compute_z_score(filepath, inputParameters):
 
         coords = get_coords(filepath, name, tsNew, removeArtifacts)
         z_score, dff, control_fit, temp_control_arr = helper_z_score(
-            control, signal, tsNew, inputParameters, coords, artifactsRemovalMethod, filter_window, isosbestic_control
+            control,
+            signal,
+            tsNew,
+            coords,
+            artifactsRemovalMethod,
+            filter_window,
+            isosbestic_control,
+            zscore_method,
+            baseline_start,
+            baseline_end,
         )
 
         write_hdf5(z_score, "z_score_" + name, filepath, "data")
@@ -73,10 +84,17 @@ def get_coords(filepath, name, tsNew, removeArtifacts):  # TODO: Make less redun
 
 # helper function to compute z-score and deltaF/F
 def helper_z_score(
-    control, signal, tsNew, inputParameters, coords, artifactsRemovalMethod, filter_window, isosbestic_control
+    control,
+    signal,
+    tsNew,
+    coords,
+    artifactsRemovalMethod,
+    filter_window,
+    isosbestic_control,
+    zscore_method,
+    baseline_start,
+    baseline_end,
 ):
-    zscore_method = inputParameters["zscore_method"]
-    baseline_start, baseline_end = inputParameters["baselineWindowStart"], inputParameters["baselineWindowEnd"]
     if (control == 0).all() == True:
         control = np.zeros(tsNew.shape[0])
 

From bf268f81147a5b471d9506c63a26ab34080074f9 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Tue, 16 Dec 2025 12:12:07 -0800
Subject: [PATCH 096/150] Refactored zscore to use a single high-level
 compute_zscore function that is pure and moved all the impure code into
 execute_zscore in preprocess.py.

---
 src/guppy/analysis/io_utils.py    |  9 ++++
 src/guppy/analysis/standard_io.py | 16 +++++++
 src/guppy/analysis/z_score.py     | 78 +------------------------------
 src/guppy/preprocess.py           | 44 ++++++++++++++++-
 4 files changed, 69 insertions(+), 78 deletions(-)

diff --git a/src/guppy/analysis/io_utils.py b/src/guppy/analysis/io_utils.py
index c11edba..b467c37 100644
--- a/src/guppy/analysis/io_utils.py
+++ b/src/guppy/analysis/io_utils.py
@@ -133,6 +133,15 @@ def fetchCoords(filepath, naming, data):
     return coords
 
 
+def get_coords(filepath, name, tsNew, removeArtifacts):  # TODO: Make less redundant with fetchCoords
+    if removeArtifacts == True:
+        coords = fetchCoords(filepath, name, tsNew)
+    else:
+        dt = tsNew[1] - tsNew[0]
+        coords = np.array([[tsNew[0] - dt, tsNew[-1] + dt]])
+    return coords
+
+
 def get_all_stores_for_combining_data(folderNames):
     op = []
     for i in range(100):
diff --git a/src/guppy/analysis/standard_io.py b/src/guppy/analysis/standard_io.py
index 2ce8189..b6fcd8a 100644
--- a/src/guppy/analysis/standard_io.py
+++ b/src/guppy/analysis/standard_io.py
@@ -98,3 +98,19 @@ def write_corrected_ttl_timestamps(
     for compound_name, corrected_ttl_timestamps in compound_name_to_corrected_ttl_timestamps.items():
         write_hdf5(corrected_ttl_timestamps, compound_name, filepath, "ts")
     logger.info("Timestamps corrections applied to the data and event timestamps.")
+
+
+def read_corrected_data(control_path, signal_path, filepath, name):
+    control = read_hdf5("", control_path, "data").reshape(-1)
+    signal = read_hdf5("", signal_path, "data").reshape(-1)
+    tsNew = read_hdf5("timeCorrection_" + name, filepath, "timestampNew")
+
+    return control, signal, tsNew
+
+
+def write_zscore(filepath, name, z_score, dff, control_fit, temp_control_arr):
+    write_hdf5(z_score, "z_score_" + name, filepath, "data")
+    write_hdf5(dff, "dff_" + name, filepath, "data")
+    write_hdf5(control_fit, "cntrl_sig_fit_" + name, filepath, "data")
+    if temp_control_arr is not None:
+        write_hdf5(temp_control_arr, "control_" + name, filepath, "data")
diff --git a/src/guppy/analysis/z_score.py b/src/guppy/analysis/z_score.py
index 31645b5..34b29ee 100644
--- a/src/guppy/analysis/z_score.py
+++ b/src/guppy/analysis/z_score.py
@@ -1,89 +1,15 @@
 import logging
-import os
 
 import numpy as np
 from scipy import signal as ss
 
 from .control_channel import helper_create_control_channel
-from .io_utils import (
-    fetchCoords,
-    find_files,
-    read_hdf5,
-    write_hdf5,
-)
 
 logger = logging.getLogger(__name__)
 
 
-# compute z-score and deltaF/F and save it to hdf5 file
-def compute_z_score(filepath, inputParameters):
-
-    artifactsRemovalMethod = inputParameters["artifactsRemovalMethod"]
-    filter_window = inputParameters["filter_window"]
-    isosbestic_control = inputParameters["isosbestic_control"]
-    zscore_method = inputParameters["zscore_method"]
-    baseline_start, baseline_end = inputParameters["baselineWindowStart"], inputParameters["baselineWindowEnd"]
-
-    logger.debug(f"Computing z-score for each of the data in {filepath}")
-
-    path_1 = find_files(filepath, "control_*", ignore_case=True)  # glob.glob(os.path.join(filepath, 'control*'))
-    path_2 = find_files(filepath, "signal_*", ignore_case=True)  # glob.glob(os.path.join(filepath, 'signal*'))
-
-    path = sorted(path_1 + path_2, key=str.casefold)
-
-    if len(path) % 2 != 0:
-        logger.error("There are not equal number of Control and Signal data")
-        raise Exception("There are not equal number of Control and Signal data")
-
-    path = np.asarray(path).reshape(2, -1)
-    removeArtifacts = inputParameters["removeArtifacts"]
-
-    for i in range(path.shape[1]):
-        name_1 = ((os.path.basename(path[0, i])).split(".")[0]).split("_")
-        name_2 = ((os.path.basename(path[1, i])).split(".")[0]).split("_")
-        if name_1[-1] != name_2[-1]:
-            logger.error("Error in naming convention of files or Error in storesList file")
-            raise Exception("Error in naming convention of files or Error in storesList file")
-
-        name = name_1[-1]
-        control = read_hdf5("", path[0, i], "data").reshape(-1)
-        signal = read_hdf5("", path[1, i], "data").reshape(-1)
-        tsNew = read_hdf5("timeCorrection_" + name, filepath, "timestampNew")
-
-        coords = get_coords(filepath, name, tsNew, removeArtifacts)
-        z_score, dff, control_fit, temp_control_arr = helper_z_score(
-            control,
-            signal,
-            tsNew,
-            coords,
-            artifactsRemovalMethod,
-            filter_window,
-            isosbestic_control,
-            zscore_method,
-            baseline_start,
-            baseline_end,
-        )
-
-        write_hdf5(z_score, "z_score_" + name, filepath, "data")
-        write_hdf5(dff, "dff_" + name, filepath, "data")
-        write_hdf5(control_fit, "cntrl_sig_fit_" + name, filepath, "data")
-        if temp_control_arr is not None:
-            write_hdf5(temp_control_arr, "control_" + name, filepath, "data")
-
-    logger.info(f"z-score for the data in {filepath} computed.")
-
-
-def get_coords(filepath, name, tsNew, removeArtifacts):  # TODO: Make less redundant with fetchCoords
-    if removeArtifacts == True:
-        coords = fetchCoords(filepath, name, tsNew)
-    else:
-        dt = tsNew[1] - tsNew[0]
-        coords = np.array([[tsNew[0] - dt, tsNew[-1] + dt]])
-    return coords
-
-
-# helper function to compute z-score and deltaF/F
-def helper_z_score(
+# high-level function to compute z-score and deltaF/F
+def compute_z_score(
     control,
     signal,
     tsNew,
diff --git a/src/guppy/preprocess.py b/src/guppy/preprocess.py
index ad4507e..5829a2d 100755
--- a/src/guppy/preprocess.py
+++ b/src/guppy/preprocess.py
@@ -18,15 +18,18 @@
     check_TDT,
     find_files,
     get_all_stores_for_combining_data,  # noqa: F401 -- Necessary for other modules that depend on preprocess.py
+    get_coords,
     read_hdf5,
     takeOnlyDirs,
 )
 from .analysis.standard_io import (
     read_control_and_signal,
+    read_corrected_data,
     read_ttl,
     write_corrected_data,
     write_corrected_timestamps,
     write_corrected_ttl_timestamps,
+    write_zscore,
 )
 from .analysis.timestamp_correction import correct_timestamps
 from .analysis.z_score import compute_z_score
@@ -276,6 +279,11 @@ def execute_zscore(folderNames, inputParameters):
     plot_zScore_dff = inputParameters["plot_zScore_dff"]
     combine_data = inputParameters["combine_data"]
     remove_artifacts = inputParameters["removeArtifacts"]
+    artifactsRemovalMethod = inputParameters["artifactsRemovalMethod"]
+    filter_window = inputParameters["filter_window"]
+    isosbestic_control = inputParameters["isosbestic_control"]
+    zscore_method = inputParameters["zscore_method"]
+    baseline_start, baseline_end = inputParameters["baselineWindowStart"], inputParameters["baselineWindowEnd"]
 
     storesListPath = []
     for i in range(len(folderNames)):
@@ -284,13 +292,45 @@ def execute_zscore(folderNames, inputParameters):
         else:
             filepath = folderNames[i]
             storesListPath.append(takeOnlyDirs(glob.glob(os.path.join(filepath, "*_output_*"))))
-
     storesListPath = np.concatenate(storesListPath)
 
     for j in range(len(storesListPath)):
         filepath = storesListPath[j]
+        logger.debug(f"Computing z-score for each of the data in {filepath}")
+        path_1 = find_files(filepath, "control_*", ignore_case=True)  # glob.glob(os.path.join(filepath, 'control*'))
+        path_2 = find_files(filepath, "signal_*", ignore_case=True)  # glob.glob(os.path.join(filepath, 'signal*'))
+        path = sorted(path_1 + path_2, key=str.casefold)
+        if len(path) % 2 != 0:
+            logger.error("There are not equal number of Control and Signal data")
+            raise Exception("There are not equal number of Control and Signal data")
+        path = np.asarray(path).reshape(2, -1)
+
+        for i in range(path.shape[1]):
+            name_1 = ((os.path.basename(path[0, i])).split(".")[0]).split("_")
+            name_2 = ((os.path.basename(path[1, i])).split(".")[0]).split("_")
+            if name_1[-1] != name_2[-1]:
+                logger.error("Error in naming convention of files or Error in storesList file")
+                raise Exception("Error in naming convention of files or Error in storesList file")
+            name = name_1[-1]
+
+            control, signal, tsNew = read_corrected_data(path[0, i], path[1, i], filepath, name)
+            coords = get_coords(filepath, name, tsNew, remove_artifacts)
+            z_score, dff, control_fit, temp_control_arr = compute_z_score(
+                control,
+                signal,
+                tsNew,
+                coords,
+                artifactsRemovalMethod,
+                filter_window,
+                isosbestic_control,
+                zscore_method,
+                baseline_start,
+                baseline_end,
+            )
+            write_zscore(filepath, name, z_score, dff, control_fit, temp_control_arr)
+
+        logger.info(f"z-score for the data in {filepath} computed.")
 
-        compute_z_score(filepath, inputParameters)
         if not remove_artifacts:
             visualizeControlAndSignal(filepath, removeArtifacts=remove_artifacts)
 

From 4d49fd973f34b31af1b24bd66086e056004ea076 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Tue, 16 Dec 2025 13:47:15 -0800
Subject: [PATCH 097/150] Refactored read-out of addingNaNValues

---
 src/guppy/analysis/artifact_removal.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/src/guppy/analysis/artifact_removal.py b/src/guppy/analysis/artifact_removal.py
index ac483bb..0106ec6 100644
--- a/src/guppy/analysis/artifact_removal.py
+++ b/src/guppy/analysis/artifact_removal.py
@@ -27,12 +27,15 @@ def addingNaNtoChunksWithArtifacts(filepath, events):
         if name_1[-1] == name_2[-1]:
             name = name_1[-1]
             sampling_rate = read_hdf5("timeCorrection_" + name, filepath, "sampling_rate")[0]
+            ts = read_hdf5("timeCorrection_" + name, filepath, "timestampNew")
+            coords = fetchCoords(filepath, name, ts)
             for i in range(len(storesList)):
                 if (
                     "control_" + name.lower() in storesList[i].lower()
                     or "signal_" + name.lower() in storesList[i].lower()
                 ):  # changes done
-                    data = addingNaNValues(filepath, storesList[i], name)
+                    data = read_hdf5(storesList[i], filepath, "data").reshape(-1)
+                    data = addingNaNValues(data=data, ts=ts, coords=coords)
                     write_hdf5(data, storesList[i], filepath, "data")
                 else:
                     if "control" in storesList[i].lower() or "signal" in storesList[i].lower():
@@ -151,11 +154,7 @@ def eliminateTs(filepath, timeForLightsTurnOn, event, sampling_rate, naming):
 
 # adding nan values to removed chunks
 # when using artifacts removal method - replace with NaN
-def addingNaNValues(filepath, event, naming):
-
-    ts = read_hdf5("timeCorrection_" + naming, filepath, "timestampNew")
-    data = read_hdf5(event, filepath, "data").reshape(-1)
-    coords = fetchCoords(filepath, naming, ts)
+def addingNaNValues(*, data, ts, coords):
 
     if (data == 0).all() == True:
         data = np.zeros(ts.shape[0])

From a80f080e3f0b9fc69c5b3b83f42020ab599f82f9 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Tue, 16 Dec 2025 13:54:44 -0800
Subject: [PATCH 098/150] Refactored read out of removeTTLs

---
 src/guppy/analysis/artifact_removal.py | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/src/guppy/analysis/artifact_removal.py b/src/guppy/analysis/artifact_removal.py
index 0106ec6..599372e 100644
--- a/src/guppy/analysis/artifact_removal.py
+++ b/src/guppy/analysis/artifact_removal.py
@@ -41,7 +41,8 @@ def addingNaNtoChunksWithArtifacts(filepath, events):
                     if "control" in storesList[i].lower() or "signal" in storesList[i].lower():
                         continue
                     else:
-                        ts = removeTTLs(filepath, storesList[i], name)
+                        ts = read_hdf5(storesList[i] + "_" + name, filepath, "ts").reshape(-1)
+                        ts = removeTTLs(ts=ts, coords=coords)
                         write_hdf5(ts, storesList[i] + "_" + name, filepath, "ts")
 
         else:
@@ -174,11 +175,7 @@ def addingNaNValues(*, data, ts, coords):
 
 # remove event TTLs which falls in the removed chunks
 # when using artifacts removal method - replace with NaN
-def removeTTLs(filepath, event, naming):
-    tsNew = read_hdf5("timeCorrection_" + naming, filepath, "timestampNew")
-    ts = read_hdf5(event + "_" + naming, filepath, "ts").reshape(-1)
-    coords = fetchCoords(filepath, naming, tsNew)
-
+def removeTTLs(*, ts, coords):
     ts_arr = np.array([])
     for i in range(coords.shape[0]):
         ts_index = np.where((ts > coords[i, 0]) & (ts < coords[i, 1]))[0]

From 1b2066d5db7d2ad364c21e7f968a02fffd490f73 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Tue, 16 Dec 2025 14:08:48 -0800
Subject: [PATCH 099/150] Refactored read out of eliminateData and eliminateTs

---
 src/guppy/analysis/artifact_removal.py | 32 ++++++++++++++++----------
 1 file changed, 20 insertions(+), 12 deletions(-)

diff --git a/src/guppy/analysis/artifact_removal.py b/src/guppy/analysis/artifact_removal.py
index 599372e..f7e95a3 100644
--- a/src/guppy/analysis/artifact_removal.py
+++ b/src/guppy/analysis/artifact_removal.py
@@ -73,15 +73,31 @@ def processTimestampsForArtifacts(filepath, timeForLightsTurnOn, events):
                     "control_" + name.lower() in storesList[i].lower()
                     or "signal_" + name.lower() in storesList[i].lower()
                 ):  # changes done
+                    ts = read_hdf5("timeCorrection_" + name, filepath, "timestampNew")
+                    data = read_hdf5(storesList[i], filepath, "data").reshape(-1)
+                    coords = fetchCoords(filepath, name, ts)
                     data, timestampNew = eliminateData(
-                        filepath, timeForLightsTurnOn, storesList[i], sampling_rate, name
+                        data=data,
+                        ts=ts,
+                        coords=coords,
+                        timeForLightsTurnOn=timeForLightsTurnOn,
+                        sampling_rate=sampling_rate,
                     )
                     write_hdf5(data, storesList[i], filepath, "data")
                 else:
                     if "control" in storesList[i].lower() or "signal" in storesList[i].lower():
                         continue
                     else:
-                        ts = eliminateTs(filepath, timeForLightsTurnOn, storesList[i], sampling_rate, name)
+                        tsNew = read_hdf5("timeCorrection_" + name, filepath, "timestampNew")
+                        ts = read_hdf5(storesList[i] + "_" + name, filepath, "ts").reshape(-1)
+                        coords = fetchCoords(filepath, name, tsNew)
+                        ts = eliminateTs(
+                            ts=ts,
+                            tsNew=tsNew,
+                            coords=coords,
+                            timeForLightsTurnOn=timeForLightsTurnOn,
+                            sampling_rate=sampling_rate,
+                        )
                         write_hdf5(ts, storesList[i] + "_" + name, filepath, "ts")
 
             # timestamp_dict[name] = timestampNew
@@ -93,11 +109,7 @@ def processTimestampsForArtifacts(filepath, timeForLightsTurnOn, events):
 
 
 # helper function to process control and signal timestamps
-def eliminateData(filepath, timeForLightsTurnOn, event, sampling_rate, naming):
-
-    ts = read_hdf5("timeCorrection_" + naming, filepath, "timestampNew")
-    data = read_hdf5(event, filepath, "data").reshape(-1)
-    coords = fetchCoords(filepath, naming, ts)
+def eliminateData(*, data, ts, coords, timeForLightsTurnOn, sampling_rate):
 
     if (data == 0).all() == True:
         data = np.zeros(ts.shape[0])
@@ -126,11 +138,7 @@ def eliminateData(filepath, timeForLightsTurnOn, event, sampling_rate, naming):
 
 
 # helper function to align event timestamps with the control and signal timestamps
-def eliminateTs(filepath, timeForLightsTurnOn, event, sampling_rate, naming):
-
-    tsNew = read_hdf5("timeCorrection_" + naming, filepath, "timestampNew")
-    ts = read_hdf5(event + "_" + naming, filepath, "ts").reshape(-1)
-    coords = fetchCoords(filepath, naming, tsNew)
+def eliminateTs(*, ts, tsNew, coords, timeForLightsTurnOn, sampling_rate):
 
     ts_arr = np.array([])
     tsNew_arr = np.array([])

From 7275b50342300cbb73146824d7e317663506b089 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Tue, 16 Dec 2025 14:31:34 -0800
Subject: [PATCH 100/150] cleaned up addingNaNtoChunksWithArtifacts

---
 src/guppy/analysis/artifact_removal.py | 47 ++++++++++++--------------
 1 file changed, 22 insertions(+), 25 deletions(-)

diff --git a/src/guppy/analysis/artifact_removal.py b/src/guppy/analysis/artifact_removal.py
index f7e95a3..a17eb0e 100644
--- a/src/guppy/analysis/artifact_removal.py
+++ b/src/guppy/analysis/artifact_removal.py
@@ -13,41 +13,38 @@
 logger = logging.getLogger(__name__)
 
 
-def addingNaNtoChunksWithArtifacts(filepath, events):
+def addingNaNtoChunksWithArtifacts(filepath, storesList):
 
     logger.debug("Replacing chunks with artifacts by NaN values.")
-    storesList = events[1, :]
+    names_for_storenames = storesList[1, :]
 
     path = decide_naming_convention(filepath)
 
     for j in range(path.shape[1]):
         name_1 = ((os.path.basename(path[0, j])).split(".")[0]).split("_")
         name_2 = ((os.path.basename(path[1, j])).split(".")[0]).split("_")
-        # dirname = os.path.dirname(path[i])
-        if name_1[-1] == name_2[-1]:
-            name = name_1[-1]
-            sampling_rate = read_hdf5("timeCorrection_" + name, filepath, "sampling_rate")[0]
-            ts = read_hdf5("timeCorrection_" + name, filepath, "timestampNew")
-            coords = fetchCoords(filepath, name, ts)
-            for i in range(len(storesList)):
-                if (
-                    "control_" + name.lower() in storesList[i].lower()
-                    or "signal_" + name.lower() in storesList[i].lower()
-                ):  # changes done
-                    data = read_hdf5(storesList[i], filepath, "data").reshape(-1)
-                    data = addingNaNValues(data=data, ts=ts, coords=coords)
-                    write_hdf5(data, storesList[i], filepath, "data")
-                else:
-                    if "control" in storesList[i].lower() or "signal" in storesList[i].lower():
-                        continue
-                    else:
-                        ts = read_hdf5(storesList[i] + "_" + name, filepath, "ts").reshape(-1)
-                        ts = removeTTLs(ts=ts, coords=coords)
-                        write_hdf5(ts, storesList[i] + "_" + name, filepath, "ts")
-
-        else:
+        if name_1[-1] != name_2[-1]:
             logger.error("Error in naming convention of files or Error in storesList file")
             raise Exception("Error in naming convention of files or Error in storesList file")
+        name = name_1[-1]
+
+        sampling_rate = read_hdf5("timeCorrection_" + name, filepath, "sampling_rate")[0]
+        ts = read_hdf5("timeCorrection_" + name, filepath, "timestampNew")
+        coords = fetchCoords(filepath, name, ts)
+        for i in range(len(names_for_storenames)):
+            if (
+                "control_" + name.lower() in names_for_storenames[i].lower()
+                or "signal_" + name.lower() in names_for_storenames[i].lower()
+            ):  # changes done
+                data = read_hdf5(names_for_storenames[i], filepath, "data").reshape(-1)
+                data = addingNaNValues(data=data, ts=ts, coords=coords)
+                write_hdf5(data, names_for_storenames[i], filepath, "data")
+            else:
+                if "control" in names_for_storenames[i].lower() or "signal" in names_for_storenames[i].lower():
+                    continue
+                ts = read_hdf5(names_for_storenames[i] + "_" + name, filepath, "ts").reshape(-1)
+                ts = removeTTLs(ts=ts, coords=coords)
+                write_hdf5(ts, names_for_storenames[i] + "_" + name, filepath, "ts")
     logger.info("Chunks with artifacts are replaced by NaN values.")
 
 

From 07dcfa80ede5a9b5ba15ab7b27da5319aa2ec709 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Tue, 16 Dec 2025 15:08:14 -0800
Subject: [PATCH 101/150] moved read to the top of
 addingNaNtoChunksWithArtifacts

---
 src/guppy/analysis/artifact_removal.py | 32 +++++++++-----
 src/guppy/analysis/standard_io.py      | 59 ++++++++++++++++++++++++++
 2 files changed, 81 insertions(+), 10 deletions(-)

diff --git a/src/guppy/analysis/artifact_removal.py b/src/guppy/analysis/artifact_removal.py
index a17eb0e..3af3001 100644
--- a/src/guppy/analysis/artifact_removal.py
+++ b/src/guppy/analysis/artifact_removal.py
@@ -9,11 +9,21 @@
     read_hdf5,
     write_hdf5,
 )
+from .standard_io import (
+    read_control_and_signal,
+    read_coords_pairwise,
+    read_corrected_timestamps_pairwise,
+    read_corrected_ttl_timestamps,
+)
 
 logger = logging.getLogger(__name__)
 
 
 def addingNaNtoChunksWithArtifacts(filepath, storesList):
+    name_to_data, _, _, _ = read_control_and_signal(filepath, storesList)
+    pair_name_to_tsNew = read_corrected_timestamps_pairwise(filepath)
+    pair_name_to_coords = read_coords_pairwise(filepath, pair_name_to_tsNew)
+    compound_name_to_ttl_timestamps = read_corrected_ttl_timestamps(filepath, storesList, pair_name_to_tsNew)
 
     logger.debug("Replacing chunks with artifacts by NaN values.")
     names_for_storenames = storesList[1, :]
@@ -26,25 +36,27 @@ def addingNaNtoChunksWithArtifacts(filepath, storesList):
         if name_1[-1] != name_2[-1]:
             logger.error("Error in naming convention of files or Error in storesList file")
             raise Exception("Error in naming convention of files or Error in storesList file")
-        name = name_1[-1]
+        pair_name = name_1[-1]
 
-        sampling_rate = read_hdf5("timeCorrection_" + name, filepath, "sampling_rate")[0]
-        ts = read_hdf5("timeCorrection_" + name, filepath, "timestampNew")
-        coords = fetchCoords(filepath, name, ts)
+        tsNew = pair_name_to_tsNew[pair_name]
+        coords = pair_name_to_coords[pair_name]
         for i in range(len(names_for_storenames)):
             if (
-                "control_" + name.lower() in names_for_storenames[i].lower()
-                or "signal_" + name.lower() in names_for_storenames[i].lower()
+                "control_" + pair_name.lower() in names_for_storenames[i].lower()
+                or "signal_" + pair_name.lower() in names_for_storenames[i].lower()
             ):  # changes done
-                data = read_hdf5(names_for_storenames[i], filepath, "data").reshape(-1)
-                data = addingNaNValues(data=data, ts=ts, coords=coords)
+                # data = read_hdf5(names_for_storenames[i], filepath, "data").reshape(-1)
+                data = name_to_data[names_for_storenames[i]].reshape(-1)
+                data = addingNaNValues(data=data, ts=tsNew, coords=coords)
                 write_hdf5(data, names_for_storenames[i], filepath, "data")
             else:
                 if "control" in names_for_storenames[i].lower() or "signal" in names_for_storenames[i].lower():
                     continue
-                ts = read_hdf5(names_for_storenames[i] + "_" + name, filepath, "ts").reshape(-1)
+                ttl_name = names_for_storenames[i]
+                compound_name = ttl_name + "_" + pair_name
+                ts = compound_name_to_ttl_timestamps[compound_name].reshape(-1)
                 ts = removeTTLs(ts=ts, coords=coords)
-                write_hdf5(ts, names_for_storenames[i] + "_" + name, filepath, "ts")
+                write_hdf5(ts, names_for_storenames[i] + "_" + pair_name, filepath, "ts")
     logger.info("Chunks with artifacts are replaced by NaN values.")
 
 
diff --git a/src/guppy/analysis/standard_io.py b/src/guppy/analysis/standard_io.py
index b6fcd8a..9c2b7c5 100644
--- a/src/guppy/analysis/standard_io.py
+++ b/src/guppy/analysis/standard_io.py
@@ -1,8 +1,11 @@
 import logging
+import os
 
 import numpy as np
 
 from .io_utils import (
+    decide_naming_convention,
+    fetchCoords,
     get_control_and_signal_channel_names,
     read_hdf5,
     write_hdf5,
@@ -114,3 +117,59 @@ def write_zscore(filepath, name, z_score, dff, control_fit, temp_control_arr):
     write_hdf5(control_fit, "cntrl_sig_fit_" + name, filepath, "data")
     if temp_control_arr is not None:
         write_hdf5(temp_control_arr, "control_" + name, filepath, "data")
+
+
+def read_corrected_timestamps_pairwise(filepath):
+    pair_name_to_tsNew = {}
+    path = decide_naming_convention(filepath)
+    for j in range(path.shape[1]):
+        name_1 = ((os.path.basename(path[0, j])).split(".")[0]).split("_")
+        name_2 = ((os.path.basename(path[1, j])).split(".")[0]).split("_")
+        if name_1[-1] != name_2[-1]:
+            logger.error("Error in naming convention of files or Error in storesList file")
+            raise Exception("Error in naming convention of files or Error in storesList file")
+        name = name_1[-1]
+
+        tsNew = read_hdf5("timeCorrection_" + name, filepath, "timestampNew")
+        pair_name_to_tsNew[name] = tsNew
+    return pair_name_to_tsNew
+
+
+def read_coords_pairwise(filepath, pair_name_to_tsNew):
+    pair_name_to_coords = {}
+    path = decide_naming_convention(filepath)
+    for j in range(path.shape[1]):
+        name_1 = ((os.path.basename(path[0, j])).split(".")[0]).split("_")
+        name_2 = ((os.path.basename(path[1, j])).split(".")[0]).split("_")
+        if name_1[-1] != name_2[-1]:
+            logger.error("Error in naming convention of files or Error in storesList file")
+            raise Exception("Error in naming convention of files or Error in storesList file")
+        pair_name = name_1[-1]
+
+        tsNew = pair_name_to_tsNew[pair_name]
+        coords = fetchCoords(filepath, pair_name, tsNew)
+        pair_name_to_coords[pair_name] = coords
+    return pair_name_to_coords
+
+
+def read_corrected_ttl_timestamps(filepath, storesList):
+    compound_name_to_ttl_timestamps = {}
+    storenames = storesList[0, :]
+    names_for_storenames = storesList[1, :]
+    arr = get_control_and_signal_channel_names(storesList)
+
+    for storename, name in zip(storenames, names_for_storenames):
+        if name in arr:
+            continue
+        ttl_name = name
+        for i in range(arr.shape[1]):
+            name_1 = arr[0, i].split("_")[-1]
+            name_2 = arr[1, i].split("_")[-1]
+            if name_1 != name_2:
+                logger.error("Error in naming convention of files or Error in storesList file")
+                raise Exception("Error in naming convention of files or Error in storesList file")
+            compound_name = ttl_name + "_" + name_1
+            ts = read_hdf5(compound_name, filepath, "ts")
+            compound_name_to_ttl_timestamps[compound_name] = ts
+
+    return compound_name_to_ttl_timestamps

From 8e037759ed2ac405ff6e615ec7ca572156b8723c Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Tue, 16 Dec 2025 15:18:43 -0800
Subject: [PATCH 102/150] moved read out of addingNaNtoChunksWithArtifacts

---
 src/guppy/analysis/artifact_removal.py | 16 +++-------------
 src/guppy/preprocess.py                | 16 +++++++++++++++-
 2 files changed, 18 insertions(+), 14 deletions(-)

diff --git a/src/guppy/analysis/artifact_removal.py b/src/guppy/analysis/artifact_removal.py
index 3af3001..97e24f3 100644
--- a/src/guppy/analysis/artifact_removal.py
+++ b/src/guppy/analysis/artifact_removal.py
@@ -9,22 +9,13 @@
     read_hdf5,
     write_hdf5,
 )
-from .standard_io import (
-    read_control_and_signal,
-    read_coords_pairwise,
-    read_corrected_timestamps_pairwise,
-    read_corrected_ttl_timestamps,
-)
 
 logger = logging.getLogger(__name__)
 
 
-def addingNaNtoChunksWithArtifacts(filepath, storesList):
-    name_to_data, _, _, _ = read_control_and_signal(filepath, storesList)
-    pair_name_to_tsNew = read_corrected_timestamps_pairwise(filepath)
-    pair_name_to_coords = read_coords_pairwise(filepath, pair_name_to_tsNew)
-    compound_name_to_ttl_timestamps = read_corrected_ttl_timestamps(filepath, storesList, pair_name_to_tsNew)
-
+def addingNaNtoChunksWithArtifacts(
+    filepath, storesList, pair_name_to_tsNew, pair_name_to_coords, name_to_data, compound_name_to_ttl_timestamps
+):
     logger.debug("Replacing chunks with artifacts by NaN values.")
     names_for_storenames = storesList[1, :]
 
@@ -45,7 +36,6 @@ def addingNaNtoChunksWithArtifacts(filepath, storesList):
                 "control_" + pair_name.lower() in names_for_storenames[i].lower()
                 or "signal_" + pair_name.lower() in names_for_storenames[i].lower()
             ):  # changes done
-                # data = read_hdf5(names_for_storenames[i], filepath, "data").reshape(-1)
                 data = name_to_data[names_for_storenames[i]].reshape(-1)
                 data = addingNaNValues(data=data, ts=tsNew, coords=coords)
                 write_hdf5(data, names_for_storenames[i], filepath, "data")
diff --git a/src/guppy/preprocess.py b/src/guppy/preprocess.py
index 5829a2d..184d9fa 100755
--- a/src/guppy/preprocess.py
+++ b/src/guppy/preprocess.py
@@ -24,7 +24,10 @@
 )
 from .analysis.standard_io import (
     read_control_and_signal,
+    read_coords_pairwise,
     read_corrected_data,
+    read_corrected_timestamps_pairwise,
+    read_corrected_ttl_timestamps,
     read_ttl,
     write_corrected_data,
     write_corrected_timestamps,
@@ -374,7 +377,18 @@ def execute_artifact_removal(folderNames, inputParameters):
         if artifactsRemovalMethod == "concatenate":
             processTimestampsForArtifacts(filepath, timeForLightsTurnOn, storesList)
         else:
-            addingNaNtoChunksWithArtifacts(filepath, storesList)
+            name_to_data, _, _, _ = read_control_and_signal(filepath, storesList)
+            pair_name_to_tsNew = read_corrected_timestamps_pairwise(filepath)
+            pair_name_to_coords = read_coords_pairwise(filepath, pair_name_to_tsNew)
+            compound_name_to_ttl_timestamps = read_corrected_ttl_timestamps(filepath, storesList)
+            addingNaNtoChunksWithArtifacts(
+                filepath,
+                storesList,
+                pair_name_to_tsNew,
+                pair_name_to_coords,
+                name_to_data,
+                compound_name_to_ttl_timestamps,
+            )
         visualizeControlAndSignal(filepath, removeArtifacts=True)
 
         writeToFile(str(10 + ((inputParameters["step"] + 1) * 10)) + "\n")

From a87c507144e8ebe5968a22d68718f716dee44d67 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Tue, 16 Dec 2025 17:15:18 -0800
Subject: [PATCH 103/150] fixed data read bug

---
 src/guppy/analysis/standard_io.py | 15 +++++++++++++++
 src/guppy/preprocess.py           |  3 ++-
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/src/guppy/analysis/standard_io.py b/src/guppy/analysis/standard_io.py
index 9c2b7c5..f8d291b 100644
--- a/src/guppy/analysis/standard_io.py
+++ b/src/guppy/analysis/standard_io.py
@@ -152,6 +152,21 @@ def read_coords_pairwise(filepath, pair_name_to_tsNew):
     return pair_name_to_coords
 
 
+def read_corrected_data_dict(filepath, storesList):  # TODO: coordinate with read_corrected_data
+    name_to_corrected_data = {}
+    storenames = storesList[0, :]
+    names_for_storenames = storesList[1, :]
+    control_and_signal_names = get_control_and_signal_channel_names(storesList)
+
+    for storename, name in zip(storenames, names_for_storenames):
+        if name not in control_and_signal_names:
+            continue
+        data = read_hdf5(name, filepath, "data").reshape(-1)
+        name_to_corrected_data[name] = data
+
+    return name_to_corrected_data
+
+
 def read_corrected_ttl_timestamps(filepath, storesList):
     compound_name_to_ttl_timestamps = {}
     storenames = storesList[0, :]
diff --git a/src/guppy/preprocess.py b/src/guppy/preprocess.py
index 184d9fa..0c0e176 100755
--- a/src/guppy/preprocess.py
+++ b/src/guppy/preprocess.py
@@ -26,6 +26,7 @@
     read_control_and_signal,
     read_coords_pairwise,
     read_corrected_data,
+    read_corrected_data_dict,
     read_corrected_timestamps_pairwise,
     read_corrected_ttl_timestamps,
     read_ttl,
@@ -377,7 +378,7 @@ def execute_artifact_removal(folderNames, inputParameters):
         if artifactsRemovalMethod == "concatenate":
             processTimestampsForArtifacts(filepath, timeForLightsTurnOn, storesList)
         else:
-            name_to_data, _, _, _ = read_control_and_signal(filepath, storesList)
+            name_to_data = read_corrected_data_dict(filepath, storesList)
             pair_name_to_tsNew = read_corrected_timestamps_pairwise(filepath)
             pair_name_to_coords = read_coords_pairwise(filepath, pair_name_to_tsNew)
             compound_name_to_ttl_timestamps = read_corrected_ttl_timestamps(filepath, storesList)

From b1cbc836971c2faee6c1b633a7ee3d7122e398c2 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Tue, 16 Dec 2025 17:29:08 -0800
Subject: [PATCH 104/150] Refactored write operations out of
 addingNaNtoChunksWithArtifacts

---
 src/guppy/analysis/artifact_removal.py |  8 ++++++--
 src/guppy/analysis/standard_io.py      | 13 +++++++++++++
 src/guppy/preprocess.py                |  7 ++++++-
 3 files changed, 25 insertions(+), 3 deletions(-)

diff --git a/src/guppy/analysis/artifact_removal.py b/src/guppy/analysis/artifact_removal.py
index 97e24f3..db40e64 100644
--- a/src/guppy/analysis/artifact_removal.py
+++ b/src/guppy/analysis/artifact_removal.py
@@ -21,6 +21,8 @@ def addingNaNtoChunksWithArtifacts(
 
     path = decide_naming_convention(filepath)
 
+    name_to_corrected_data = {}
+    compound_name_to_corrected_ttl_timestamps = {}
     for j in range(path.shape[1]):
         name_1 = ((os.path.basename(path[0, j])).split(".")[0]).split("_")
         name_2 = ((os.path.basename(path[1, j])).split(".")[0]).split("_")
@@ -38,7 +40,7 @@ def addingNaNtoChunksWithArtifacts(
             ):  # changes done
                 data = name_to_data[names_for_storenames[i]].reshape(-1)
                 data = addingNaNValues(data=data, ts=tsNew, coords=coords)
-                write_hdf5(data, names_for_storenames[i], filepath, "data")
+                name_to_corrected_data[names_for_storenames[i]] = data
             else:
                 if "control" in names_for_storenames[i].lower() or "signal" in names_for_storenames[i].lower():
                     continue
@@ -46,9 +48,11 @@ def addingNaNtoChunksWithArtifacts(
                 compound_name = ttl_name + "_" + pair_name
                 ts = compound_name_to_ttl_timestamps[compound_name].reshape(-1)
                 ts = removeTTLs(ts=ts, coords=coords)
-                write_hdf5(ts, names_for_storenames[i] + "_" + pair_name, filepath, "ts")
+                compound_name_to_corrected_ttl_timestamps[compound_name] = ts
     logger.info("Chunks with artifacts are replaced by NaN values.")
 
+    return name_to_corrected_data, compound_name_to_corrected_ttl_timestamps
+
 
 # main function to align timestamps for control, signal and event timestamps for artifacts removal
 def processTimestampsForArtifacts(filepath, timeForLightsTurnOn, events):
diff --git a/src/guppy/analysis/standard_io.py b/src/guppy/analysis/standard_io.py
index f8d291b..ad7408e 100644
--- a/src/guppy/analysis/standard_io.py
+++ b/src/guppy/analysis/standard_io.py
@@ -188,3 +188,16 @@ def read_corrected_ttl_timestamps(filepath, storesList):
             compound_name_to_ttl_timestamps[compound_name] = ts
 
     return compound_name_to_ttl_timestamps
+
+
+def write_nan_corrected_data(filepath, name_to_corrected_data):
+    for name, data in name_to_corrected_data.items():
+        write_hdf5(data, name, filepath, "data")
+
+
+def write_nan_corrected_ttl_timestamps(
+    filepath,
+    compound_name_to_corrected_ttl_timestamps,
+):
+    for compound_name, corrected_ttl_timestamps in compound_name_to_corrected_ttl_timestamps.items():
+        write_hdf5(corrected_ttl_timestamps, compound_name, filepath, "ts")
diff --git a/src/guppy/preprocess.py b/src/guppy/preprocess.py
index 0c0e176..a625bc9 100755
--- a/src/guppy/preprocess.py
+++ b/src/guppy/preprocess.py
@@ -33,6 +33,8 @@
     write_corrected_data,
     write_corrected_timestamps,
     write_corrected_ttl_timestamps,
+    write_nan_corrected_data,
+    write_nan_corrected_ttl_timestamps,
     write_zscore,
 )
 from .analysis.timestamp_correction import correct_timestamps
@@ -382,7 +384,7 @@ def execute_artifact_removal(folderNames, inputParameters):
             pair_name_to_tsNew = read_corrected_timestamps_pairwise(filepath)
             pair_name_to_coords = read_coords_pairwise(filepath, pair_name_to_tsNew)
             compound_name_to_ttl_timestamps = read_corrected_ttl_timestamps(filepath, storesList)
-            addingNaNtoChunksWithArtifacts(
+            name_to_data, compound_name_to_ttl_timestamps = addingNaNtoChunksWithArtifacts(
                 filepath,
                 storesList,
                 pair_name_to_tsNew,
@@ -390,6 +392,9 @@ def execute_artifact_removal(folderNames, inputParameters):
                 name_to_data,
                 compound_name_to_ttl_timestamps,
             )
+            write_nan_corrected_data(filepath, name_to_data)
+            write_nan_corrected_ttl_timestamps(filepath, compound_name_to_ttl_timestamps)
+
         visualizeControlAndSignal(filepath, removeArtifacts=True)
 
         writeToFile(str(10 + ((inputParameters["step"] + 1) * 10)) + "\n")

From 393d3aa79fbbccb9335d73612d2747ef131d1421 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Wed, 17 Dec 2025 08:15:06 -0800
Subject: [PATCH 105/150] Refactored filepath out of
 addingNaNtoChunksWithArtifacts

---
 src/guppy/analysis/artifact_removal.py | 14 +++-----------
 src/guppy/preprocess.py                |  1 -
 2 files changed, 3 insertions(+), 12 deletions(-)

diff --git a/src/guppy/analysis/artifact_removal.py b/src/guppy/analysis/artifact_removal.py
index db40e64..556a719 100644
--- a/src/guppy/analysis/artifact_removal.py
+++ b/src/guppy/analysis/artifact_removal.py
@@ -14,23 +14,15 @@
 
 
 def addingNaNtoChunksWithArtifacts(
-    filepath, storesList, pair_name_to_tsNew, pair_name_to_coords, name_to_data, compound_name_to_ttl_timestamps
+    storesList, pair_name_to_tsNew, pair_name_to_coords, name_to_data, compound_name_to_ttl_timestamps
 ):
     logger.debug("Replacing chunks with artifacts by NaN values.")
     names_for_storenames = storesList[1, :]
-
-    path = decide_naming_convention(filepath)
+    pair_names = pair_name_to_tsNew.keys()
 
     name_to_corrected_data = {}
     compound_name_to_corrected_ttl_timestamps = {}
-    for j in range(path.shape[1]):
-        name_1 = ((os.path.basename(path[0, j])).split(".")[0]).split("_")
-        name_2 = ((os.path.basename(path[1, j])).split(".")[0]).split("_")
-        if name_1[-1] != name_2[-1]:
-            logger.error("Error in naming convention of files or Error in storesList file")
-            raise Exception("Error in naming convention of files or Error in storesList file")
-        pair_name = name_1[-1]
-
+    for pair_name in pair_names:
         tsNew = pair_name_to_tsNew[pair_name]
         coords = pair_name_to_coords[pair_name]
         for i in range(len(names_for_storenames)):
diff --git a/src/guppy/preprocess.py b/src/guppy/preprocess.py
index a625bc9..8555b55 100755
--- a/src/guppy/preprocess.py
+++ b/src/guppy/preprocess.py
@@ -385,7 +385,6 @@ def execute_artifact_removal(folderNames, inputParameters):
             pair_name_to_coords = read_coords_pairwise(filepath, pair_name_to_tsNew)
             compound_name_to_ttl_timestamps = read_corrected_ttl_timestamps(filepath, storesList)
             name_to_data, compound_name_to_ttl_timestamps = addingNaNtoChunksWithArtifacts(
-                filepath,
                 storesList,
                 pair_name_to_tsNew,
                 pair_name_to_coords,

From 22f4f182c24851a6aa2e9abb62f85dc71e96551d Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Wed, 17 Dec 2025 08:18:09 -0800
Subject: [PATCH 106/150] Renamed some variables in
 processTimestampsForArtifacts

---
 src/guppy/analysis/artifact_removal.py | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/src/guppy/analysis/artifact_removal.py b/src/guppy/analysis/artifact_removal.py
index 556a719..51c2d19 100644
--- a/src/guppy/analysis/artifact_removal.py
+++ b/src/guppy/analysis/artifact_removal.py
@@ -47,10 +47,10 @@ def addingNaNtoChunksWithArtifacts(
 
 
 # main function to align timestamps for control, signal and event timestamps for artifacts removal
-def processTimestampsForArtifacts(filepath, timeForLightsTurnOn, events):
+def processTimestampsForArtifacts(filepath, timeForLightsTurnOn, storesList):
 
     logger.debug("Processing timestamps to get rid of artifacts using concatenate method...")
-    storesList = events[1, :]
+    names_for_storenames = storesList[1, :]
 
     path = decide_naming_convention(filepath)
 
@@ -63,13 +63,13 @@ def processTimestampsForArtifacts(filepath, timeForLightsTurnOn, events):
             name = name_1[-1]
             sampling_rate = read_hdf5("timeCorrection_" + name, filepath, "sampling_rate")[0]
 
-            for i in range(len(storesList)):
+            for i in range(len(names_for_storenames)):
                 if (
-                    "control_" + name.lower() in storesList[i].lower()
-                    or "signal_" + name.lower() in storesList[i].lower()
+                    "control_" + name.lower() in names_for_storenames[i].lower()
+                    or "signal_" + name.lower() in names_for_storenames[i].lower()
                 ):  # changes done
                     ts = read_hdf5("timeCorrection_" + name, filepath, "timestampNew")
-                    data = read_hdf5(storesList[i], filepath, "data").reshape(-1)
+                    data = read_hdf5(names_for_storenames[i], filepath, "data").reshape(-1)
                     coords = fetchCoords(filepath, name, ts)
                     data, timestampNew = eliminateData(
                         data=data,
@@ -78,13 +78,13 @@ def processTimestampsForArtifacts(filepath, timeForLightsTurnOn, events):
                         timeForLightsTurnOn=timeForLightsTurnOn,
                         sampling_rate=sampling_rate,
                     )
-                    write_hdf5(data, storesList[i], filepath, "data")
+                    write_hdf5(data, names_for_storenames[i], filepath, "data")
                 else:
-                    if "control" in storesList[i].lower() or "signal" in storesList[i].lower():
+                    if "control" in names_for_storenames[i].lower() or "signal" in names_for_storenames[i].lower():
                         continue
                     else:
                         tsNew = read_hdf5("timeCorrection_" + name, filepath, "timestampNew")
-                        ts = read_hdf5(storesList[i] + "_" + name, filepath, "ts").reshape(-1)
+                        ts = read_hdf5(names_for_storenames[i] + "_" + name, filepath, "ts").reshape(-1)
                         coords = fetchCoords(filepath, name, tsNew)
                         ts = eliminateTs(
                             ts=ts,
@@ -93,7 +93,7 @@ def processTimestampsForArtifacts(filepath, timeForLightsTurnOn, events):
                             timeForLightsTurnOn=timeForLightsTurnOn,
                             sampling_rate=sampling_rate,
                         )
-                        write_hdf5(ts, storesList[i] + "_" + name, filepath, "ts")
+                        write_hdf5(ts, names_for_storenames[i] + "_" + name, filepath, "ts")
 
             # timestamp_dict[name] = timestampNew
             write_hdf5(timestampNew, "timeCorrection_" + name, filepath, "timestampNew")

From a4a162f2267a295ed89d3ee3aca7188f23e596fb Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Wed, 17 Dec 2025 08:23:21 -0800
Subject: [PATCH 107/150] Refactored read out of  processTimestampsForArtifacts

---
 src/guppy/analysis/artifact_removal.py | 24 ++++++++++++++++--------
 src/guppy/preprocess.py                | 14 +++++++++++++-
 2 files changed, 29 insertions(+), 9 deletions(-)

diff --git a/src/guppy/analysis/artifact_removal.py b/src/guppy/analysis/artifact_removal.py
index 51c2d19..8e78669 100644
--- a/src/guppy/analysis/artifact_removal.py
+++ b/src/guppy/analysis/artifact_removal.py
@@ -5,7 +5,6 @@
 
 from .io_utils import (
     decide_naming_convention,
-    fetchCoords,
     read_hdf5,
     write_hdf5,
 )
@@ -47,7 +46,15 @@ def addingNaNtoChunksWithArtifacts(
 
 
 # main function to align timestamps for control, signal and event timestamps for artifacts removal
-def processTimestampsForArtifacts(filepath, timeForLightsTurnOn, storesList):
+def processTimestampsForArtifacts(
+    filepath,
+    timeForLightsTurnOn,
+    storesList,
+    pair_name_to_tsNew,
+    pair_name_to_coords,
+    name_to_data,
+    compound_name_to_ttl_timestamps,
+):
 
     logger.debug("Processing timestamps to get rid of artifacts using concatenate method...")
     names_for_storenames = storesList[1, :]
@@ -68,9 +75,9 @@ def processTimestampsForArtifacts(filepath, timeForLightsTurnOn, storesList):
                     "control_" + name.lower() in names_for_storenames[i].lower()
                     or "signal_" + name.lower() in names_for_storenames[i].lower()
                 ):  # changes done
-                    ts = read_hdf5("timeCorrection_" + name, filepath, "timestampNew")
-                    data = read_hdf5(names_for_storenames[i], filepath, "data").reshape(-1)
-                    coords = fetchCoords(filepath, name, ts)
+                    ts = pair_name_to_tsNew[name]
+                    data = name_to_data[names_for_storenames[i]]
+                    coords = pair_name_to_coords[name]
                     data, timestampNew = eliminateData(
                         data=data,
                         ts=ts,
@@ -83,9 +90,10 @@ def processTimestampsForArtifacts(filepath, timeForLightsTurnOn, storesList):
                     if "control" in names_for_storenames[i].lower() or "signal" in names_for_storenames[i].lower():
                         continue
                     else:
-                        tsNew = read_hdf5("timeCorrection_" + name, filepath, "timestampNew")
-                        ts = read_hdf5(names_for_storenames[i] + "_" + name, filepath, "ts").reshape(-1)
-                        coords = fetchCoords(filepath, name, tsNew)
+                        compound_name = names_for_storenames[i] + "_" + name
+                        tsNew = pair_name_to_tsNew[name]
+                        ts = compound_name_to_ttl_timestamps[compound_name]
+                        coords = pair_name_to_coords[name]
                         ts = eliminateTs(
                             ts=ts,
                             tsNew=tsNew,
diff --git a/src/guppy/preprocess.py b/src/guppy/preprocess.py
index 8555b55..dd02bd0 100755
--- a/src/guppy/preprocess.py
+++ b/src/guppy/preprocess.py
@@ -378,7 +378,19 @@ def execute_artifact_removal(folderNames, inputParameters):
 
         logger.debug("Removing artifacts from the data...")
         if artifactsRemovalMethod == "concatenate":
-            processTimestampsForArtifacts(filepath, timeForLightsTurnOn, storesList)
+            name_to_data = read_corrected_data_dict(filepath, storesList)
+            pair_name_to_tsNew = read_corrected_timestamps_pairwise(filepath)
+            pair_name_to_coords = read_coords_pairwise(filepath, pair_name_to_tsNew)
+            compound_name_to_ttl_timestamps = read_corrected_ttl_timestamps(filepath, storesList)
+            processTimestampsForArtifacts(
+                filepath,
+                timeForLightsTurnOn,
+                storesList,
+                pair_name_to_tsNew,
+                pair_name_to_coords,
+                name_to_data,
+                compound_name_to_ttl_timestamps,
+            )
         else:
             name_to_data = read_corrected_data_dict(filepath, storesList)
             pair_name_to_tsNew = read_corrected_timestamps_pairwise(filepath)

From a25e7acaecf27e7ad1fd4667478af72509205e35 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Wed, 17 Dec 2025 08:31:16 -0800
Subject: [PATCH 108/150] Refactored read out of  processTimestampsForArtifacts

---
 src/guppy/analysis/artifact_removal.py | 5 ++---
 src/guppy/analysis/standard_io.py      | 5 ++++-
 src/guppy/preprocess.py                | 5 +++--
 3 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/src/guppy/analysis/artifact_removal.py b/src/guppy/analysis/artifact_removal.py
index 8e78669..852cfb8 100644
--- a/src/guppy/analysis/artifact_removal.py
+++ b/src/guppy/analysis/artifact_removal.py
@@ -5,7 +5,6 @@
 
 from .io_utils import (
     decide_naming_convention,
-    read_hdf5,
     write_hdf5,
 )
 
@@ -51,6 +50,7 @@ def processTimestampsForArtifacts(
     timeForLightsTurnOn,
     storesList,
     pair_name_to_tsNew,
+    pair_name_to_sampling_rate,
     pair_name_to_coords,
     name_to_data,
     compound_name_to_ttl_timestamps,
@@ -65,10 +65,9 @@ def processTimestampsForArtifacts(
     for j in range(path.shape[1]):
         name_1 = ((os.path.basename(path[0, j])).split(".")[0]).split("_")
         name_2 = ((os.path.basename(path[1, j])).split(".")[0]).split("_")
-        # dirname = os.path.dirname(path[i])
         if name_1[-1] == name_2[-1]:
             name = name_1[-1]
-            sampling_rate = read_hdf5("timeCorrection_" + name, filepath, "sampling_rate")[0]
+            sampling_rate = pair_name_to_sampling_rate[name]
 
             for i in range(len(names_for_storenames)):
                 if (
diff --git a/src/guppy/analysis/standard_io.py b/src/guppy/analysis/standard_io.py
index ad7408e..bba3d20 100644
--- a/src/guppy/analysis/standard_io.py
+++ b/src/guppy/analysis/standard_io.py
@@ -121,6 +121,7 @@ def write_zscore(filepath, name, z_score, dff, control_fit, temp_control_arr):
 
 def read_corrected_timestamps_pairwise(filepath):
     pair_name_to_tsNew = {}
+    pair_name_to_sampling_rate = {}
     path = decide_naming_convention(filepath)
     for j in range(path.shape[1]):
         name_1 = ((os.path.basename(path[0, j])).split(".")[0]).split("_")
@@ -131,8 +132,10 @@ def read_corrected_timestamps_pairwise(filepath):
         name = name_1[-1]
 
         tsNew = read_hdf5("timeCorrection_" + name, filepath, "timestampNew")
+        sampling_rate = read_hdf5("timeCorrection_" + name, filepath, "sampling_rate")[0]
         pair_name_to_tsNew[name] = tsNew
-    return pair_name_to_tsNew
+        pair_name_to_sampling_rate[name] = sampling_rate
+    return pair_name_to_tsNew, pair_name_to_sampling_rate
 
 
 def read_coords_pairwise(filepath, pair_name_to_tsNew):
diff --git a/src/guppy/preprocess.py b/src/guppy/preprocess.py
index dd02bd0..b618deb 100755
--- a/src/guppy/preprocess.py
+++ b/src/guppy/preprocess.py
@@ -379,7 +379,7 @@ def execute_artifact_removal(folderNames, inputParameters):
         logger.debug("Removing artifacts from the data...")
         if artifactsRemovalMethod == "concatenate":
             name_to_data = read_corrected_data_dict(filepath, storesList)
-            pair_name_to_tsNew = read_corrected_timestamps_pairwise(filepath)
+            pair_name_to_tsNew, pair_name_to_sampling_rate = read_corrected_timestamps_pairwise(filepath)
             pair_name_to_coords = read_coords_pairwise(filepath, pair_name_to_tsNew)
             compound_name_to_ttl_timestamps = read_corrected_ttl_timestamps(filepath, storesList)
             processTimestampsForArtifacts(
@@ -387,13 +387,14 @@ def execute_artifact_removal(folderNames, inputParameters):
                 timeForLightsTurnOn,
                 storesList,
                 pair_name_to_tsNew,
+                pair_name_to_sampling_rate,
                 pair_name_to_coords,
                 name_to_data,
                 compound_name_to_ttl_timestamps,
             )
         else:
             name_to_data = read_corrected_data_dict(filepath, storesList)
-            pair_name_to_tsNew = read_corrected_timestamps_pairwise(filepath)
+            pair_name_to_tsNew, _ = read_corrected_timestamps_pairwise(filepath)
             pair_name_to_coords = read_coords_pairwise(filepath, pair_name_to_tsNew)
             compound_name_to_ttl_timestamps = read_corrected_ttl_timestamps(filepath, storesList)
             name_to_data, compound_name_to_ttl_timestamps = addingNaNtoChunksWithArtifacts(

From 3c7057916867bf451f809156735288c329984b4a Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Wed, 17 Dec 2025 08:36:35 -0800
Subject: [PATCH 109/150] Reorganized processTimestampsForArtifacts

---
 src/guppy/analysis/artifact_removal.py | 77 ++++++++++++--------------
 1 file changed, 36 insertions(+), 41 deletions(-)

diff --git a/src/guppy/analysis/artifact_removal.py b/src/guppy/analysis/artifact_removal.py
index 852cfb8..ebc1df9 100644
--- a/src/guppy/analysis/artifact_removal.py
+++ b/src/guppy/analysis/artifact_removal.py
@@ -61,52 +61,47 @@ def processTimestampsForArtifacts(
 
     path = decide_naming_convention(filepath)
 
-    timestamp_dict = dict()
     for j in range(path.shape[1]):
         name_1 = ((os.path.basename(path[0, j])).split(".")[0]).split("_")
         name_2 = ((os.path.basename(path[1, j])).split(".")[0]).split("_")
-        if name_1[-1] == name_2[-1]:
-            name = name_1[-1]
-            sampling_rate = pair_name_to_sampling_rate[name]
-
-            for i in range(len(names_for_storenames)):
-                if (
-                    "control_" + name.lower() in names_for_storenames[i].lower()
-                    or "signal_" + name.lower() in names_for_storenames[i].lower()
-                ):  # changes done
-                    ts = pair_name_to_tsNew[name]
-                    data = name_to_data[names_for_storenames[i]]
-                    coords = pair_name_to_coords[name]
-                    data, timestampNew = eliminateData(
-                        data=data,
-                        ts=ts,
-                        coords=coords,
-                        timeForLightsTurnOn=timeForLightsTurnOn,
-                        sampling_rate=sampling_rate,
-                    )
-                    write_hdf5(data, names_for_storenames[i], filepath, "data")
-                else:
-                    if "control" in names_for_storenames[i].lower() or "signal" in names_for_storenames[i].lower():
-                        continue
-                    else:
-                        compound_name = names_for_storenames[i] + "_" + name
-                        tsNew = pair_name_to_tsNew[name]
-                        ts = compound_name_to_ttl_timestamps[compound_name]
-                        coords = pair_name_to_coords[name]
-                        ts = eliminateTs(
-                            ts=ts,
-                            tsNew=tsNew,
-                            coords=coords,
-                            timeForLightsTurnOn=timeForLightsTurnOn,
-                            sampling_rate=sampling_rate,
-                        )
-                        write_hdf5(ts, names_for_storenames[i] + "_" + name, filepath, "ts")
-
-            # timestamp_dict[name] = timestampNew
-            write_hdf5(timestampNew, "timeCorrection_" + name, filepath, "timestampNew")
-        else:
+        if name_1[-1] != name_2[-1]:
             logger.error("Error in naming convention of files or Error in storesList file")
             raise Exception("Error in naming convention of files or Error in storesList file")
+        name = name_1[-1]
+
+        sampling_rate = pair_name_to_sampling_rate[name]
+        tsNew = pair_name_to_tsNew[name]
+        coords = pair_name_to_coords[name]
+
+        for i in range(len(names_for_storenames)):
+            if (
+                "control_" + name.lower() in names_for_storenames[i].lower()
+                or "signal_" + name.lower() in names_for_storenames[i].lower()
+            ):  # changes done
+                data = name_to_data[names_for_storenames[i]]
+                data, timestampNew = eliminateData(
+                    data=data,
+                    ts=tsNew,
+                    coords=coords,
+                    timeForLightsTurnOn=timeForLightsTurnOn,
+                    sampling_rate=sampling_rate,
+                )
+                write_hdf5(data, names_for_storenames[i], filepath, "data")
+            else:
+                if "control" in names_for_storenames[i].lower() or "signal" in names_for_storenames[i].lower():
+                    continue
+                compound_name = names_for_storenames[i] + "_" + name
+                ts = compound_name_to_ttl_timestamps[compound_name]
+                ts = eliminateTs(
+                    ts=ts,
+                    tsNew=tsNew,
+                    coords=coords,
+                    timeForLightsTurnOn=timeForLightsTurnOn,
+                    sampling_rate=sampling_rate,
+                )
+                write_hdf5(ts, names_for_storenames[i] + "_" + name, filepath, "ts")
+
+        write_hdf5(timestampNew, "timeCorrection_" + name, filepath, "timestampNew")
     logger.info("Timestamps processed, artifacts are removed and good chunks are concatenated.")
 
 

From b7d054967b992e113a404139306bf53fbe5baab8 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Wed, 17 Dec 2025 08:56:43 -0800
Subject: [PATCH 110/150] Removed write from processTimestampsForArtifacts

---
 src/guppy/analysis/artifact_removal.py | 33 +++++++++++++++++---------
 src/guppy/analysis/standard_io.py      |  5 ++++
 src/guppy/preprocess.py                |  6 ++++-
 3 files changed, 32 insertions(+), 12 deletions(-)

diff --git a/src/guppy/analysis/artifact_removal.py b/src/guppy/analysis/artifact_removal.py
index ebc1df9..08ffc98 100644
--- a/src/guppy/analysis/artifact_removal.py
+++ b/src/guppy/analysis/artifact_removal.py
@@ -5,7 +5,6 @@
 
 from .io_utils import (
     decide_naming_convention,
-    write_hdf5,
 )
 
 logger = logging.getLogger(__name__)
@@ -61,22 +60,25 @@ def processTimestampsForArtifacts(
 
     path = decide_naming_convention(filepath)
 
+    name_to_corrected_data = {}
+    pair_name_to_corrected_timestamps = {}
+    compound_name_to_corrected_ttl_timestamps = {}
     for j in range(path.shape[1]):
         name_1 = ((os.path.basename(path[0, j])).split(".")[0]).split("_")
         name_2 = ((os.path.basename(path[1, j])).split(".")[0]).split("_")
         if name_1[-1] != name_2[-1]:
             logger.error("Error in naming convention of files or Error in storesList file")
             raise Exception("Error in naming convention of files or Error in storesList file")
-        name = name_1[-1]
+        pair_name = name_1[-1]
 
-        sampling_rate = pair_name_to_sampling_rate[name]
-        tsNew = pair_name_to_tsNew[name]
-        coords = pair_name_to_coords[name]
+        sampling_rate = pair_name_to_sampling_rate[pair_name]
+        tsNew = pair_name_to_tsNew[pair_name]
+        coords = pair_name_to_coords[pair_name]
 
         for i in range(len(names_for_storenames)):
             if (
-                "control_" + name.lower() in names_for_storenames[i].lower()
-                or "signal_" + name.lower() in names_for_storenames[i].lower()
+                "control_" + pair_name.lower() in names_for_storenames[i].lower()
+                or "signal_" + pair_name.lower() in names_for_storenames[i].lower()
             ):  # changes done
                 data = name_to_data[names_for_storenames[i]]
                 data, timestampNew = eliminateData(
@@ -86,11 +88,13 @@ def processTimestampsForArtifacts(
                     timeForLightsTurnOn=timeForLightsTurnOn,
                     sampling_rate=sampling_rate,
                 )
-                write_hdf5(data, names_for_storenames[i], filepath, "data")
+                name_to_corrected_data[names_for_storenames[i]] = data
+                pair_name_to_corrected_timestamps[pair_name] = timestampNew
+                # write_hdf5(data, names_for_storenames[i], filepath, "data")
             else:
                 if "control" in names_for_storenames[i].lower() or "signal" in names_for_storenames[i].lower():
                     continue
-                compound_name = names_for_storenames[i] + "_" + name
+                compound_name = names_for_storenames[i] + "_" + pair_name
                 ts = compound_name_to_ttl_timestamps[compound_name]
                 ts = eliminateTs(
                     ts=ts,
@@ -99,11 +103,18 @@ def processTimestampsForArtifacts(
                     timeForLightsTurnOn=timeForLightsTurnOn,
                     sampling_rate=sampling_rate,
                 )
-                write_hdf5(ts, names_for_storenames[i] + "_" + name, filepath, "ts")
+                compound_name_to_corrected_ttl_timestamps[compound_name] = ts
+                # write_hdf5(ts, names_for_storenames[i] + "_" + pair_name, filepath, "ts")
 
-        write_hdf5(timestampNew, "timeCorrection_" + name, filepath, "timestampNew")
+        # write_hdf5(timestampNew, "timeCorrection_" + pair_name, filepath, "timestampNew")
     logger.info("Timestamps processed, artifacts are removed and good chunks are concatenated.")
 
+    return (
+        name_to_corrected_data,
+        pair_name_to_corrected_timestamps,
+        compound_name_to_corrected_ttl_timestamps,
+    )
+
 
 # helper function to process control and signal timestamps
 def eliminateData(*, data, ts, coords, timeForLightsTurnOn, sampling_rate):
diff --git a/src/guppy/analysis/standard_io.py b/src/guppy/analysis/standard_io.py
index bba3d20..3131da5 100644
--- a/src/guppy/analysis/standard_io.py
+++ b/src/guppy/analysis/standard_io.py
@@ -204,3 +204,8 @@ def write_nan_corrected_ttl_timestamps(
 ):
     for compound_name, corrected_ttl_timestamps in compound_name_to_corrected_ttl_timestamps.items():
         write_hdf5(corrected_ttl_timestamps, compound_name, filepath, "ts")
+
+
+def write_concat_corrected_timestamps(filepath, pair_name_to_corrected_timestamps):
+    for pair_name, timestamps in pair_name_to_corrected_timestamps.items():
+        write_hdf5(timestamps, "timeCorrection_" + pair_name, filepath, "timestampNew")
diff --git a/src/guppy/preprocess.py b/src/guppy/preprocess.py
index b618deb..3f899a9 100755
--- a/src/guppy/preprocess.py
+++ b/src/guppy/preprocess.py
@@ -30,6 +30,7 @@
     read_corrected_timestamps_pairwise,
     read_corrected_ttl_timestamps,
     read_ttl,
+    write_concat_corrected_timestamps,
     write_corrected_data,
     write_corrected_timestamps,
     write_corrected_ttl_timestamps,
@@ -382,7 +383,7 @@ def execute_artifact_removal(folderNames, inputParameters):
             pair_name_to_tsNew, pair_name_to_sampling_rate = read_corrected_timestamps_pairwise(filepath)
             pair_name_to_coords = read_coords_pairwise(filepath, pair_name_to_tsNew)
             compound_name_to_ttl_timestamps = read_corrected_ttl_timestamps(filepath, storesList)
-            processTimestampsForArtifacts(
+            name_to_data, pair_name_to_timestamps, compound_name_to_ttl_timestamps = processTimestampsForArtifacts(
                 filepath,
                 timeForLightsTurnOn,
                 storesList,
@@ -392,6 +393,9 @@ def execute_artifact_removal(folderNames, inputParameters):
                 name_to_data,
                 compound_name_to_ttl_timestamps,
             )
+            write_nan_corrected_data(filepath, name_to_data)
+            write_concat_corrected_timestamps(filepath, pair_name_to_timestamps)
+            write_nan_corrected_ttl_timestamps(filepath, compound_name_to_ttl_timestamps)
         else:
             name_to_data = read_corrected_data_dict(filepath, storesList)
             pair_name_to_tsNew, _ = read_corrected_timestamps_pairwise(filepath)

From 61b2712d1aceb8bf894ad1d5868c66760b2b75f5 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Wed, 17 Dec 2025 08:57:15 -0800
Subject: [PATCH 111/150] Removed write from processTimestampsForArtifacts

---
 src/guppy/analysis/artifact_removal.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/src/guppy/analysis/artifact_removal.py b/src/guppy/analysis/artifact_removal.py
index 08ffc98..4ac22c9 100644
--- a/src/guppy/analysis/artifact_removal.py
+++ b/src/guppy/analysis/artifact_removal.py
@@ -90,7 +90,6 @@ def processTimestampsForArtifacts(
                 )
                 name_to_corrected_data[names_for_storenames[i]] = data
                 pair_name_to_corrected_timestamps[pair_name] = timestampNew
-                # write_hdf5(data, names_for_storenames[i], filepath, "data")
             else:
                 if "control" in names_for_storenames[i].lower() or "signal" in names_for_storenames[i].lower():
                     continue
@@ -104,9 +103,7 @@ def processTimestampsForArtifacts(
                     sampling_rate=sampling_rate,
                 )
                 compound_name_to_corrected_ttl_timestamps[compound_name] = ts
-                # write_hdf5(ts, names_for_storenames[i] + "_" + pair_name, filepath, "ts")
 
-        # write_hdf5(timestampNew, "timeCorrection_" + pair_name, filepath, "timestampNew")
     logger.info("Timestamps processed, artifacts are removed and good chunks are concatenated.")
 
     return (

From 2dc18cc51a47a8efb03e6f093df4355a6c473a7f Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Wed, 17 Dec 2025 08:59:30 -0800
Subject: [PATCH 112/150] Refactored filepath out of
 processTimestampsForArtifacts

---
 src/guppy/analysis/artifact_removal.py | 19 ++-----------------
 1 file changed, 2 insertions(+), 17 deletions(-)

diff --git a/src/guppy/analysis/artifact_removal.py b/src/guppy/analysis/artifact_removal.py
index 4ac22c9..c661e49 100644
--- a/src/guppy/analysis/artifact_removal.py
+++ b/src/guppy/analysis/artifact_removal.py
@@ -1,12 +1,7 @@
 import logging
-import os
 
 import numpy as np
 
-from .io_utils import (
-    decide_naming_convention,
-)
-
 logger = logging.getLogger(__name__)
 
 
@@ -45,7 +40,6 @@ def addingNaNtoChunksWithArtifacts(
 
 # main function to align timestamps for control, signal and event timestamps for artifacts removal
 def processTimestampsForArtifacts(
-    filepath,
     timeForLightsTurnOn,
     storesList,
     pair_name_to_tsNew,
@@ -54,23 +48,14 @@ def processTimestampsForArtifacts(
     name_to_data,
     compound_name_to_ttl_timestamps,
 ):
-
     logger.debug("Processing timestamps to get rid of artifacts using concatenate method...")
     names_for_storenames = storesList[1, :]
-
-    path = decide_naming_convention(filepath)
+    pair_names = pair_name_to_tsNew.keys()
 
     name_to_corrected_data = {}
     pair_name_to_corrected_timestamps = {}
     compound_name_to_corrected_ttl_timestamps = {}
-    for j in range(path.shape[1]):
-        name_1 = ((os.path.basename(path[0, j])).split(".")[0]).split("_")
-        name_2 = ((os.path.basename(path[1, j])).split(".")[0]).split("_")
-        if name_1[-1] != name_2[-1]:
-            logger.error("Error in naming convention of files or Error in storesList file")
-            raise Exception("Error in naming convention of files or Error in storesList file")
-        pair_name = name_1[-1]
-
+    for pair_name in pair_names:
         sampling_rate = pair_name_to_sampling_rate[pair_name]
         tsNew = pair_name_to_tsNew[pair_name]
         coords = pair_name_to_coords[pair_name]

From bfb18e058f3cae8abffde64412895d913a5a2c46 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Wed, 17 Dec 2025 09:10:11 -0800
Subject: [PATCH 113/150] Consolidated write operations

---
 src/guppy/analysis/standard_io.py | 23 +++++++++++------------
 src/guppy/preprocess.py           | 25 ++++++++-----------------
 2 files changed, 19 insertions(+), 29 deletions(-)

diff --git a/src/guppy/analysis/standard_io.py b/src/guppy/analysis/standard_io.py
index 3131da5..89f1b40 100644
--- a/src/guppy/analysis/standard_io.py
+++ b/src/guppy/analysis/standard_io.py
@@ -193,19 +193,18 @@ def read_corrected_ttl_timestamps(filepath, storesList):
     return compound_name_to_ttl_timestamps
 
 
-def write_nan_corrected_data(filepath, name_to_corrected_data):
-    for name, data in name_to_corrected_data.items():
-        write_hdf5(data, name, filepath, "data")
+def write_artifact_corrected_timestamps(filepath, pair_name_to_corrected_timestamps):
+    for pair_name, timestamps in pair_name_to_corrected_timestamps.items():
+        write_hdf5(timestamps, "timeCorrection_" + pair_name, filepath, "timestampNew")
 
 
-def write_nan_corrected_ttl_timestamps(
+def write_artifact_removal(
     filepath,
-    compound_name_to_corrected_ttl_timestamps,
+    name_to_corrected_data,
+    pair_name_to_corrected_timestamps,
+    compound_name_to_corrected_ttl_timestamps=None,
 ):
-    for compound_name, corrected_ttl_timestamps in compound_name_to_corrected_ttl_timestamps.items():
-        write_hdf5(corrected_ttl_timestamps, compound_name, filepath, "ts")
-
-
-def write_concat_corrected_timestamps(filepath, pair_name_to_corrected_timestamps):
-    for pair_name, timestamps in pair_name_to_corrected_timestamps.items():
-        write_hdf5(timestamps, "timeCorrection_" + pair_name, filepath, "timestampNew")
+    write_corrected_data(filepath, name_to_corrected_data)
+    write_corrected_ttl_timestamps(filepath, compound_name_to_corrected_ttl_timestamps)
+    if pair_name_to_corrected_timestamps is not None:
+        write_artifact_corrected_timestamps(filepath, pair_name_to_corrected_timestamps)
diff --git a/src/guppy/preprocess.py b/src/guppy/preprocess.py
index 3f899a9..fc90b77 100755
--- a/src/guppy/preprocess.py
+++ b/src/guppy/preprocess.py
@@ -30,12 +30,10 @@
     read_corrected_timestamps_pairwise,
     read_corrected_ttl_timestamps,
     read_ttl,
-    write_concat_corrected_timestamps,
+    write_artifact_removal,
     write_corrected_data,
     write_corrected_timestamps,
     write_corrected_ttl_timestamps,
-    write_nan_corrected_data,
-    write_nan_corrected_ttl_timestamps,
     write_zscore,
 )
 from .analysis.timestamp_correction import correct_timestamps
@@ -377,14 +375,14 @@ def execute_artifact_removal(folderNames, inputParameters):
         filepath = storesListPath[j]
         storesList = np.genfromtxt(os.path.join(filepath, "storesList.csv"), dtype="str", delimiter=",").reshape(2, -1)
 
+        name_to_data = read_corrected_data_dict(filepath, storesList)
+        pair_name_to_tsNew, pair_name_to_sampling_rate = read_corrected_timestamps_pairwise(filepath)
+        pair_name_to_coords = read_coords_pairwise(filepath, pair_name_to_tsNew)
+        compound_name_to_ttl_timestamps = read_corrected_ttl_timestamps(filepath, storesList)
+
         logger.debug("Removing artifacts from the data...")
         if artifactsRemovalMethod == "concatenate":
-            name_to_data = read_corrected_data_dict(filepath, storesList)
-            pair_name_to_tsNew, pair_name_to_sampling_rate = read_corrected_timestamps_pairwise(filepath)
-            pair_name_to_coords = read_coords_pairwise(filepath, pair_name_to_tsNew)
-            compound_name_to_ttl_timestamps = read_corrected_ttl_timestamps(filepath, storesList)
             name_to_data, pair_name_to_timestamps, compound_name_to_ttl_timestamps = processTimestampsForArtifacts(
-                filepath,
                 timeForLightsTurnOn,
                 storesList,
                 pair_name_to_tsNew,
@@ -393,14 +391,7 @@ def execute_artifact_removal(folderNames, inputParameters):
                 name_to_data,
                 compound_name_to_ttl_timestamps,
             )
-            write_nan_corrected_data(filepath, name_to_data)
-            write_concat_corrected_timestamps(filepath, pair_name_to_timestamps)
-            write_nan_corrected_ttl_timestamps(filepath, compound_name_to_ttl_timestamps)
         else:
-            name_to_data = read_corrected_data_dict(filepath, storesList)
-            pair_name_to_tsNew, _ = read_corrected_timestamps_pairwise(filepath)
-            pair_name_to_coords = read_coords_pairwise(filepath, pair_name_to_tsNew)
-            compound_name_to_ttl_timestamps = read_corrected_ttl_timestamps(filepath, storesList)
             name_to_data, compound_name_to_ttl_timestamps = addingNaNtoChunksWithArtifacts(
                 storesList,
                 pair_name_to_tsNew,
@@ -408,9 +399,9 @@ def execute_artifact_removal(folderNames, inputParameters):
                 name_to_data,
                 compound_name_to_ttl_timestamps,
             )
-            write_nan_corrected_data(filepath, name_to_data)
-            write_nan_corrected_ttl_timestamps(filepath, compound_name_to_ttl_timestamps)
+            pair_name_to_timestamps = None
 
+        write_artifact_removal(filepath, name_to_data, pair_name_to_timestamps, compound_name_to_ttl_timestamps)
         visualizeControlAndSignal(filepath, removeArtifacts=True)
 
         writeToFile(str(10 + ((inputParameters["step"] + 1) * 10)) + "\n")

From d4f3de43f207f84d3b7ff6ad67021f59e9263cc1 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Wed, 17 Dec 2025 09:30:02 -0800
Subject: [PATCH 114/150] Consolidated into single remove_artifacts fn

---
 src/guppy/analysis/artifact_removal.py | 40 ++++++++++++++++++++++++++
 src/guppy/preprocess.py                | 34 +++++++---------------
 2 files changed, 51 insertions(+), 23 deletions(-)

diff --git a/src/guppy/analysis/artifact_removal.py b/src/guppy/analysis/artifact_removal.py
index c661e49..d3da042 100644
--- a/src/guppy/analysis/artifact_removal.py
+++ b/src/guppy/analysis/artifact_removal.py
@@ -5,6 +5,46 @@
 logger = logging.getLogger(__name__)
 
 
+def remove_artifacts(
+    timeForLightsTurnOn,
+    storesList,
+    pair_name_to_tsNew,
+    pair_name_to_sampling_rate,
+    pair_name_to_coords,
+    name_to_data,
+    compound_name_to_ttl_timestamps,
+    method,
+):
+    if method == "concatenate":
+        name_to_corrected_data, pair_name_to_corrected_timestamps, compound_name_to_corrected_ttl_timestamps = (
+            processTimestampsForArtifacts(
+                timeForLightsTurnOn,
+                storesList,
+                pair_name_to_tsNew,
+                pair_name_to_sampling_rate,
+                pair_name_to_coords,
+                name_to_data,
+                compound_name_to_ttl_timestamps,
+            )
+        )
+        logger.info("Artifacts removed using concatenate method.")
+    elif method == "replace with NaN":
+        name_to_corrected_data, compound_name_to_corrected_ttl_timestamps = addingNaNtoChunksWithArtifacts(
+            storesList,
+            pair_name_to_tsNew,
+            pair_name_to_coords,
+            name_to_data,
+            compound_name_to_ttl_timestamps,
+        )
+        pair_name_to_corrected_timestamps = None
+        logger.info("Artifacts removed using NaN replacement method.")
+    else:
+        logger.error("Invalid artifact removal method specified.")
+        raise ValueError("Invalid artifact removal method specified.")
+
+    return name_to_corrected_data, pair_name_to_corrected_timestamps, compound_name_to_corrected_ttl_timestamps
+
+
 def addingNaNtoChunksWithArtifacts(
     storesList, pair_name_to_tsNew, pair_name_to_coords, name_to_data, compound_name_to_ttl_timestamps
 ):
diff --git a/src/guppy/preprocess.py b/src/guppy/preprocess.py
index fc90b77..46fc7c7 100755
--- a/src/guppy/preprocess.py
+++ b/src/guppy/preprocess.py
@@ -7,10 +7,7 @@
 import matplotlib.pyplot as plt
 import numpy as np
 
-from .analysis.artifact_removal import (
-    addingNaNtoChunksWithArtifacts,
-    processTimestampsForArtifacts,
-)
+from .analysis.artifact_removal import remove_artifacts
 from .analysis.combine_data import combineData
 from .analysis.control_channel import add_control_channel, create_control_channel
 from .analysis.io_utils import (
@@ -381,25 +378,16 @@ def execute_artifact_removal(folderNames, inputParameters):
         compound_name_to_ttl_timestamps = read_corrected_ttl_timestamps(filepath, storesList)
 
         logger.debug("Removing artifacts from the data...")
-        if artifactsRemovalMethod == "concatenate":
-            name_to_data, pair_name_to_timestamps, compound_name_to_ttl_timestamps = processTimestampsForArtifacts(
-                timeForLightsTurnOn,
-                storesList,
-                pair_name_to_tsNew,
-                pair_name_to_sampling_rate,
-                pair_name_to_coords,
-                name_to_data,
-                compound_name_to_ttl_timestamps,
-            )
-        else:
-            name_to_data, compound_name_to_ttl_timestamps = addingNaNtoChunksWithArtifacts(
-                storesList,
-                pair_name_to_tsNew,
-                pair_name_to_coords,
-                name_to_data,
-                compound_name_to_ttl_timestamps,
-            )
-            pair_name_to_timestamps = None
+        name_to_data, pair_name_to_timestamps, compound_name_to_ttl_timestamps = remove_artifacts(
+            timeForLightsTurnOn,
+            storesList,
+            pair_name_to_tsNew,
+            pair_name_to_sampling_rate,
+            pair_name_to_coords,
+            name_to_data,
+            compound_name_to_ttl_timestamps,
+            method=artifactsRemovalMethod,
+        )
 
         write_artifact_removal(filepath, name_to_data, pair_name_to_timestamps, compound_name_to_ttl_timestamps)
         visualizeControlAndSignal(filepath, removeArtifacts=True)

From c23aa1ddf12ba4c2525574f64e5952586db03113 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Wed, 17 Dec 2025 13:17:21 -0800
Subject: [PATCH 115/150]  fixed bug with read_control_and_signal

---
 src/guppy/analysis/standard_io.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/guppy/analysis/standard_io.py b/src/guppy/analysis/standard_io.py
index 89f1b40..e7fe8e0 100644
--- a/src/guppy/analysis/standard_io.py
+++ b/src/guppy/analysis/standard_io.py
@@ -27,8 +27,8 @@ def read_control_and_signal(filepath, storesList):
     for i in range(channels_arr.shape[1]):
         control_name = channels_arr[0, i]
         signal_name = channels_arr[1, i]
-        idx_c = np.where(storesList == control_name)[0]
-        idx_s = np.where(storesList == signal_name)[0]
+        idx_c = np.where(names_for_storenames == control_name)[0]
+        idx_s = np.where(names_for_storenames == signal_name)[0]
         control_storename = storenames[idx_c[0]]
         signal_storename = storenames[idx_s[0]]
 

From 1cda972960addc599f378d699d6d8eaa2da9e12e Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Wed, 17 Dec 2025 13:18:32 -0800
Subject: [PATCH 116/150]  fixed naming bug in timestampCorrection

---
 src/guppy/analysis/timestamp_correction.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/guppy/analysis/timestamp_correction.py b/src/guppy/analysis/timestamp_correction.py
index 60cf76a..0806fb8 100644
--- a/src/guppy/analysis/timestamp_correction.py
+++ b/src/guppy/analysis/timestamp_correction.py
@@ -64,15 +64,15 @@ def timestampCorrection(
     name_to_corrected_data = {}
     storenames = storesList[0, :]
     names_for_storenames = storesList[1, :]
-    data = get_control_and_signal_channel_names(storesList)
+    channels_arr = get_control_and_signal_channel_names(storesList)
 
-    indices = check_cntrl_sig_length(data, name_to_data)
+    indices = check_cntrl_sig_length(channels_arr, name_to_data)
 
-    for i in range(data.shape[1]):
-        control_name = data[0, i]
-        signal_name = data[1, i]
-        name_1 = data[0, i].split("_")[-1]
-        name_2 = data[1, i].split("_")[-1]
+    for i in range(channels_arr.shape[1]):
+        control_name = channels_arr[0, i]
+        signal_name = channels_arr[1, i]
+        name_1 = channels_arr[0, i].split("_")[-1]
+        name_2 = channels_arr[1, i].split("_")[-1]
         if name_1 != name_2:
             logger.error("Error in naming convention of files or Error in storesList file")
             raise Exception("Error in naming convention of files or Error in storesList file")
@@ -81,8 +81,8 @@ def timestampCorrection(
         idx = np.where(names_for_storenames == indices[i])[0]
 
         if idx.shape[0] == 0:
-            logger.error(f"{data[0,i]} does not exist in the stores list file.")
-            raise Exception("{} does not exist in the stores list file.".format(data[0, i]))
+            logger.error(f"{channels_arr[0,i]} does not exist in the stores list file.")
+            raise Exception("{} does not exist in the stores list file.".format(channels_arr[0, i]))
 
         name = names_for_storenames[idx][0]
         timestamp = name_to_timestamps[name]

From 19986c81c974138d2007982badd8ae2a8dcc679a Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Fri, 19 Dec 2025 16:36:18 -0800
Subject: [PATCH 117/150] Fixed combinedata bug

---
 src/guppy/analysis/combine_data.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/guppy/analysis/combine_data.py b/src/guppy/analysis/combine_data.py
index f89315f..cf96835 100644
--- a/src/guppy/analysis/combine_data.py
+++ b/src/guppy/analysis/combine_data.py
@@ -66,6 +66,8 @@ def eliminateData(filepath, timeForLightsTurnOn, event, sampling_rate, naming):
     for i in range(len(filepath)):
         ts = read_hdf5("timeCorrection_" + naming, filepath[i], "timestampNew")
         data = read_hdf5(event, filepath[i], "data").reshape(-1)
+        print(f"{ts.shape = }")
+        print(f"{data.shape = }")
 
         # index = np.where((ts>coords[i,0]) & (ts<coords[i,1]))[0]
 
@@ -123,6 +125,7 @@ def processTimestampsForCombiningData(filepath, timeForLightsTurnOn, events, sam
 
         path = decide_naming_convention(filepath[k][0])
 
+        pair_name_to_tsNew = {}
         for j in range(path.shape[1]):
             name_1 = ((os.path.basename(path[0, j])).split(".")[0]).split("_")
             name_2 = ((os.path.basename(path[1, j])).split(".")[0]).split("_")
@@ -139,9 +142,12 @@ def processTimestampsForCombiningData(filepath, timeForLightsTurnOn, events, sam
                             filepath[k], timeForLightsTurnOn, storesList[i], sampling_rate, name
                         )
                         write_hdf5(data, storesList[i], filepath[k][0], "data")
+                        pair_name_to_tsNew[name] = timestampNew
                     else:
                         if "control" in storesList[i].lower() or "signal" in storesList[i].lower():
                             continue
                         else:
                             ts = eliminateTs(filepath[k], timeForLightsTurnOn, storesList[i], sampling_rate, name)
                             write_hdf5(ts, storesList[i] + "_" + name, filepath[k][0], "ts")
+        for pair_name, tsNew in pair_name_to_tsNew.items():
+            write_hdf5(tsNew, "timeCorrection_" + pair_name, filepath[k][0], "timestampNew")

From 995b1e248e7297249bb3d6da917b3c6fb93ee7da Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Fri, 19 Dec 2025 16:42:28 -0800
Subject: [PATCH 118/150] Fixed combinedata bug

---
 src/guppy/analysis/combine_data.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/guppy/analysis/combine_data.py b/src/guppy/analysis/combine_data.py
index cf96835..3da338d 100644
--- a/src/guppy/analysis/combine_data.py
+++ b/src/guppy/analysis/combine_data.py
@@ -66,8 +66,6 @@ def eliminateData(filepath, timeForLightsTurnOn, event, sampling_rate, naming):
     for i in range(len(filepath)):
         ts = read_hdf5("timeCorrection_" + naming, filepath[i], "timestampNew")
         data = read_hdf5(event, filepath[i], "data").reshape(-1)
-        print(f"{ts.shape = }")
-        print(f"{data.shape = }")
 
         # index = np.where((ts>coords[i,0]) & (ts<coords[i,1]))[0]
 

From d4ac68c6792aff21f1a457d51ef25364be45dc30 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Fri, 19 Dec 2025 16:51:22 -0800
Subject: [PATCH 119/150] Reorganized into execute_combined_data and
 combine_data.

---
 src/guppy/analysis/combine_data.py | 49 +-----------------------------
 src/guppy/preprocess.py            | 48 +++++++++++++++++++++++++++--
 2 files changed, 47 insertions(+), 50 deletions(-)

diff --git a/src/guppy/analysis/combine_data.py b/src/guppy/analysis/combine_data.py
index 3da338d..3ab73d3 100644
--- a/src/guppy/analysis/combine_data.py
+++ b/src/guppy/analysis/combine_data.py
@@ -1,4 +1,3 @@
-import glob
 import logging
 import os
 
@@ -6,59 +5,13 @@
 
 from .io_utils import (
     decide_naming_convention,
-    get_all_stores_for_combining_data,
     read_hdf5,
-    takeOnlyDirs,
     write_hdf5,
 )
 
 logger = logging.getLogger(__name__)
 
 
-# function to combine data when there are two different data files for the same recording session
-# it will combine the data, do timestamps processing and save the combined data in the first output folder.
-def combineData(folderNames, inputParameters, storesList):
-
-    logger.debug("Combining Data from different data files...")
-    timeForLightsTurnOn = inputParameters["timeForLightsTurnOn"]
-    op_folder = []
-    for i in range(len(folderNames)):
-        filepath = folderNames[i]
-        op_folder.append(takeOnlyDirs(glob.glob(os.path.join(filepath, "*_output_*"))))
-
-    op_folder = list(np.concatenate(op_folder).flatten())
-    sampling_rate_fp = []
-    for i in range(len(folderNames)):
-        filepath = folderNames[i]
-        storesListPath = takeOnlyDirs(glob.glob(os.path.join(filepath, "*_output_*")))
-        for j in range(len(storesListPath)):
-            filepath = storesListPath[j]
-            storesList_new = np.genfromtxt(
-                os.path.join(filepath, "storesList.csv"), dtype="str", delimiter=","
-            ).reshape(2, -1)
-            sampling_rate_fp.append(glob.glob(os.path.join(filepath, "timeCorrection_*")))
-
-    # check if sampling rate is same for both data
-    sampling_rate_fp = np.concatenate(sampling_rate_fp)
-    sampling_rate = []
-    for i in range(sampling_rate_fp.shape[0]):
-        sampling_rate.append(read_hdf5("", sampling_rate_fp[i], "sampling_rate"))
-
-    res = all(i == sampling_rate[0] for i in sampling_rate)
-    if res == False:
-        logger.error("To combine the data, sampling rate for both the data should be same.")
-        raise Exception("To combine the data, sampling rate for both the data should be same.")
-
-    # get the output folders informatinos
-    op = get_all_stores_for_combining_data(op_folder)
-
-    # processing timestamps for combining the data
-    processTimestampsForCombiningData(op, timeForLightsTurnOn, storesList, sampling_rate[0])
-    logger.info("Data is combined from different data files.")
-
-    return op
-
-
 def eliminateData(filepath, timeForLightsTurnOn, event, sampling_rate, naming):
 
     arr = np.array([])
@@ -113,7 +66,7 @@ def eliminateTs(filepath, timeForLightsTurnOn, event, sampling_rate, naming):
     return ts_arr
 
 
-def processTimestampsForCombiningData(filepath, timeForLightsTurnOn, events, sampling_rate):
+def combine_data(filepath, timeForLightsTurnOn, events, sampling_rate):
 
     logger.debug("Processing timestamps for combining data...")
 
diff --git a/src/guppy/preprocess.py b/src/guppy/preprocess.py
index 46fc7c7..17d1fb5 100755
--- a/src/guppy/preprocess.py
+++ b/src/guppy/preprocess.py
@@ -8,7 +8,7 @@
 import numpy as np
 
 from .analysis.artifact_removal import remove_artifacts
-from .analysis.combine_data import combineData
+from .analysis.combine_data import combine_data
 from .analysis.control_channel import add_control_channel, create_control_channel
 from .analysis.io_utils import (
     check_storeslistfile,
@@ -399,6 +399,50 @@ def execute_artifact_removal(folderNames, inputParameters):
     logger.info("Artifact removal completed.")
 
 
+# function to combine data when there are two different data files for the same recording session
+# it will combine the data, do timestamps processing and save the combined data in the first output folder.
+def execute_combine_data(folderNames, inputParameters, storesList):
+
+    logger.debug("Combining Data from different data files...")
+    timeForLightsTurnOn = inputParameters["timeForLightsTurnOn"]
+    op_folder = []
+    for i in range(len(folderNames)):
+        filepath = folderNames[i]
+        op_folder.append(takeOnlyDirs(glob.glob(os.path.join(filepath, "*_output_*"))))
+
+    op_folder = list(np.concatenate(op_folder).flatten())
+    sampling_rate_fp = []
+    for i in range(len(folderNames)):
+        filepath = folderNames[i]
+        storesListPath = takeOnlyDirs(glob.glob(os.path.join(filepath, "*_output_*")))
+        for j in range(len(storesListPath)):
+            filepath = storesListPath[j]
+            storesList_new = np.genfromtxt(
+                os.path.join(filepath, "storesList.csv"), dtype="str", delimiter=","
+            ).reshape(2, -1)
+            sampling_rate_fp.append(glob.glob(os.path.join(filepath, "timeCorrection_*")))
+
+    # check if sampling rate is same for both data
+    sampling_rate_fp = np.concatenate(sampling_rate_fp)
+    sampling_rate = []
+    for i in range(sampling_rate_fp.shape[0]):
+        sampling_rate.append(read_hdf5("", sampling_rate_fp[i], "sampling_rate"))
+
+    res = all(i == sampling_rate[0] for i in sampling_rate)
+    if res == False:
+        logger.error("To combine the data, sampling rate for both the data should be same.")
+        raise Exception("To combine the data, sampling rate for both the data should be same.")
+
+    # get the output folders informatinos
+    op = get_all_stores_for_combining_data(op_folder)
+
+    # processing timestamps for combining the data
+    combine_data(op, timeForLightsTurnOn, storesList, sampling_rate[0])
+    logger.info("Data is combined from different data files.")
+
+    return op
+
+
 def extractTsAndSignal(inputParameters):
 
     logger.debug("Extracting signal data and event timestamps...")
@@ -434,7 +478,7 @@ def extractTsAndSignal(inputParameters):
         writeToFile(str((pbMaxValue) * 10) + "\n" + str(10) + "\n")
         execute_timestamp_correction(folderNames, inputParameters)
         storesList = check_storeslistfile(folderNames)
-        op_folder = combineData(folderNames, inputParameters, storesList)
+        op_folder = execute_combine_data(folderNames, inputParameters, storesList)
         execute_zscore(op_folder, inputParameters)
         if remove_artifacts == True:
             execute_artifact_removal(op_folder, inputParameters)

From 042fb33c26327376cb6fe67497667e61341089c4 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Fri, 19 Dec 2025 17:14:38 -0800
Subject: [PATCH 120/150] Renamed some variables for clarity.

---
 src/guppy/analysis/combine_data.py | 28 ++++++++++++++++------------
 src/guppy/preprocess.py            |  1 -
 2 files changed, 16 insertions(+), 13 deletions(-)

diff --git a/src/guppy/analysis/combine_data.py b/src/guppy/analysis/combine_data.py
index 3ab73d3..b89f9e1 100644
--- a/src/guppy/analysis/combine_data.py
+++ b/src/guppy/analysis/combine_data.py
@@ -12,13 +12,13 @@
 logger = logging.getLogger(__name__)
 
 
-def eliminateData(filepath, timeForLightsTurnOn, event, sampling_rate, naming):
+def eliminateData(filepaths, timeForLightsTurnOn, event, sampling_rate, naming):
 
     arr = np.array([])
     ts_arr = np.array([])
-    for i in range(len(filepath)):
-        ts = read_hdf5("timeCorrection_" + naming, filepath[i], "timestampNew")
-        data = read_hdf5(event, filepath[i], "data").reshape(-1)
+    for i in range(len(filepaths)):
+        ts = read_hdf5("timeCorrection_" + naming, filepaths[i], "timestampNew")
+        data = read_hdf5(event, filepaths[i], "data").reshape(-1)
 
         # index = np.where((ts>coords[i,0]) & (ts<coords[i,1]))[0]
 
@@ -66,15 +66,17 @@ def eliminateTs(filepath, timeForLightsTurnOn, event, sampling_rate, naming):
     return ts_arr
 
 
-def combine_data(filepath, timeForLightsTurnOn, events, sampling_rate):
+def combine_data(filepath: list[list[str]], timeForLightsTurnOn, events, sampling_rate):
+    # filepath = [[folder1_output_0, folder2_output_0], [folder1_output_1, folder2_output_1], ...]
 
     logger.debug("Processing timestamps for combining data...")
 
     storesList = events[1, :]
 
-    for k in range(len(filepath)):
+    for single_output_filepaths in filepath:
+        # single_output_filepaths = [folder1_output_i, folder2_output_i, ...]
 
-        path = decide_naming_convention(filepath[k][0])
+        path = decide_naming_convention(single_output_filepaths[0])
 
         pair_name_to_tsNew = {}
         for j in range(path.shape[1]):
@@ -90,15 +92,17 @@ def combine_data(filepath, timeForLightsTurnOn, events, sampling_rate):
                         or "signal_" + name.lower() in storesList[i].lower()
                     ):
                         data, timestampNew = eliminateData(
-                            filepath[k], timeForLightsTurnOn, storesList[i], sampling_rate, name
+                            single_output_filepaths, timeForLightsTurnOn, storesList[i], sampling_rate, name
                         )
-                        write_hdf5(data, storesList[i], filepath[k][0], "data")
+                        write_hdf5(data, storesList[i], single_output_filepaths[0], "data")
                         pair_name_to_tsNew[name] = timestampNew
                     else:
                         if "control" in storesList[i].lower() or "signal" in storesList[i].lower():
                             continue
                         else:
-                            ts = eliminateTs(filepath[k], timeForLightsTurnOn, storesList[i], sampling_rate, name)
-                            write_hdf5(ts, storesList[i] + "_" + name, filepath[k][0], "ts")
+                            ts = eliminateTs(
+                                single_output_filepaths, timeForLightsTurnOn, storesList[i], sampling_rate, name
+                            )
+                            write_hdf5(ts, storesList[i] + "_" + name, single_output_filepaths[0], "ts")
         for pair_name, tsNew in pair_name_to_tsNew.items():
-            write_hdf5(tsNew, "timeCorrection_" + pair_name, filepath[k][0], "timestampNew")
+            write_hdf5(tsNew, "timeCorrection_" + pair_name, single_output_filepaths[0], "timestampNew")
diff --git a/src/guppy/preprocess.py b/src/guppy/preprocess.py
index 17d1fb5..0c41ae4 100755
--- a/src/guppy/preprocess.py
+++ b/src/guppy/preprocess.py
@@ -402,7 +402,6 @@ def execute_artifact_removal(folderNames, inputParameters):
 # function to combine data when there are two different data files for the same recording session
 # it will combine the data, do timestamps processing and save the combined data in the first output folder.
 def execute_combine_data(folderNames, inputParameters, storesList):
-
     logger.debug("Combining Data from different data files...")
     timeForLightsTurnOn = inputParameters["timeForLightsTurnOn"]
     op_folder = []

From 9db15aaf5dd1a82804db1cef3824bfd4f625339c Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Fri, 19 Dec 2025 17:27:49 -0800
Subject: [PATCH 121/150] Refactored read operations out of eliminateData.

---
 src/guppy/analysis/combine_data.py | 29 +++++++++++++++++++++--------
 1 file changed, 21 insertions(+), 8 deletions(-)

diff --git a/src/guppy/analysis/combine_data.py b/src/guppy/analysis/combine_data.py
index b89f9e1..a63be7e 100644
--- a/src/guppy/analysis/combine_data.py
+++ b/src/guppy/analysis/combine_data.py
@@ -12,15 +12,16 @@
 logger = logging.getLogger(__name__)
 
 
-def eliminateData(filepaths, timeForLightsTurnOn, event, sampling_rate, naming):
+def eliminateData(filepath_to_timestamps, filepath_to_data, timeForLightsTurnOn, event, sampling_rate, naming):
 
     arr = np.array([])
     ts_arr = np.array([])
-    for i in range(len(filepaths)):
-        ts = read_hdf5("timeCorrection_" + naming, filepaths[i], "timestampNew")
-        data = read_hdf5(event, filepaths[i], "data").reshape(-1)
-
-        # index = np.where((ts>coords[i,0]) & (ts<coords[i,1]))[0]
+    filepaths = list(filepath_to_timestamps.keys())
+    for filepath in filepaths:
+        ts = filepath_to_timestamps[filepath]
+        data = filepath_to_data[filepath]
+        # ts = read_hdf5("timeCorrection_" + naming, filepaths[i], "timestampNew")
+        # data = read_hdf5(event, filepaths[i], "data").reshape(-1)
 
         if len(arr) == 0:
             arr = np.concatenate((arr, data))
@@ -82,7 +83,6 @@ def combine_data(filepath: list[list[str]], timeForLightsTurnOn, events, samplin
         for j in range(path.shape[1]):
             name_1 = ((os.path.basename(path[0, j])).split(".")[0]).split("_")
             name_2 = ((os.path.basename(path[1, j])).split(".")[0]).split("_")
-            # dirname = os.path.dirname(path[i])
             if name_1[-1] == name_2[-1]:
                 name = name_1[-1]
 
@@ -91,8 +91,21 @@ def combine_data(filepath: list[list[str]], timeForLightsTurnOn, events, samplin
                         "control_" + name.lower() in storesList[i].lower()
                         or "signal_" + name.lower() in storesList[i].lower()
                     ):
+                        filepath_to_timestamps = {}
+                        filepath_to_data = {}
+                        for filepath in single_output_filepaths:
+                            ts = read_hdf5("timeCorrection_" + name, filepath, "timestampNew")
+                            data = read_hdf5(storesList[i], filepath, "data").reshape(-1)
+                            filepath_to_timestamps[filepath] = ts
+                            filepath_to_data[filepath] = data
+
                         data, timestampNew = eliminateData(
-                            single_output_filepaths, timeForLightsTurnOn, storesList[i], sampling_rate, name
+                            filepath_to_timestamps,
+                            filepath_to_data,
+                            timeForLightsTurnOn,
+                            storesList[i],
+                            sampling_rate,
+                            name,
                         )
                         write_hdf5(data, storesList[i], single_output_filepaths[0], "data")
                         pair_name_to_tsNew[name] = timestampNew

From 0109f83855560710e9fc48ea702dc42f52063202 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Fri, 19 Dec 2025 17:36:57 -0800
Subject: [PATCH 122/150] Cleaned up some indentation in combine_data.

---
 src/guppy/analysis/combine_data.py | 77 +++++++++++++++---------------
 1 file changed, 39 insertions(+), 38 deletions(-)

diff --git a/src/guppy/analysis/combine_data.py b/src/guppy/analysis/combine_data.py
index a63be7e..e2fb719 100644
--- a/src/guppy/analysis/combine_data.py
+++ b/src/guppy/analysis/combine_data.py
@@ -67,12 +67,12 @@ def eliminateTs(filepath, timeForLightsTurnOn, event, sampling_rate, naming):
     return ts_arr
 
 
-def combine_data(filepath: list[list[str]], timeForLightsTurnOn, events, sampling_rate):
+def combine_data(filepath: list[list[str]], timeForLightsTurnOn, names_for_storenames, sampling_rate):
     # filepath = [[folder1_output_0, folder2_output_0], [folder1_output_1, folder2_output_1], ...]
 
     logger.debug("Processing timestamps for combining data...")
 
-    storesList = events[1, :]
+    names_for_storenames = names_for_storenames[1, :]
 
     for single_output_filepaths in filepath:
         # single_output_filepaths = [folder1_output_i, folder2_output_i, ...]
@@ -81,41 +81,42 @@ def combine_data(filepath: list[list[str]], timeForLightsTurnOn, events, samplin
 
         pair_name_to_tsNew = {}
         for j in range(path.shape[1]):
-            name_1 = ((os.path.basename(path[0, j])).split(".")[0]).split("_")
-            name_2 = ((os.path.basename(path[1, j])).split(".")[0]).split("_")
-            if name_1[-1] == name_2[-1]:
-                name = name_1[-1]
-
-                for i in range(len(storesList)):
-                    if (
-                        "control_" + name.lower() in storesList[i].lower()
-                        or "signal_" + name.lower() in storesList[i].lower()
-                    ):
-                        filepath_to_timestamps = {}
-                        filepath_to_data = {}
-                        for filepath in single_output_filepaths:
-                            ts = read_hdf5("timeCorrection_" + name, filepath, "timestampNew")
-                            data = read_hdf5(storesList[i], filepath, "data").reshape(-1)
-                            filepath_to_timestamps[filepath] = ts
-                            filepath_to_data[filepath] = data
-
-                        data, timestampNew = eliminateData(
-                            filepath_to_timestamps,
-                            filepath_to_data,
-                            timeForLightsTurnOn,
-                            storesList[i],
-                            sampling_rate,
-                            name,
-                        )
-                        write_hdf5(data, storesList[i], single_output_filepaths[0], "data")
-                        pair_name_to_tsNew[name] = timestampNew
-                    else:
-                        if "control" in storesList[i].lower() or "signal" in storesList[i].lower():
-                            continue
-                        else:
-                            ts = eliminateTs(
-                                single_output_filepaths, timeForLightsTurnOn, storesList[i], sampling_rate, name
-                            )
-                            write_hdf5(ts, storesList[i] + "_" + name, single_output_filepaths[0], "ts")
+            name_1 = ((os.path.basename(path[0, j])).split(".")[0]).split("_")[-1]
+            name_2 = ((os.path.basename(path[1, j])).split(".")[0]).split("_")[-1]
+            if name_1 != name_2:
+                logger.error("Error in naming convention of files or Error in storesList file")
+                raise Exception("Error in naming convention of files or Error in storesList file")
+            pair_name = name_1
+
+            for i in range(len(names_for_storenames)):
+                if (
+                    "control_" + pair_name.lower() in names_for_storenames[i].lower()
+                    or "signal_" + pair_name.lower() in names_for_storenames[i].lower()
+                ):
+                    filepath_to_timestamps = {}
+                    filepath_to_data = {}
+                    for filepath in single_output_filepaths:
+                        ts = read_hdf5("timeCorrection_" + pair_name, filepath, "timestampNew")
+                        data = read_hdf5(names_for_storenames[i], filepath, "data").reshape(-1)
+                        filepath_to_timestamps[filepath] = ts
+                        filepath_to_data[filepath] = data
+
+                    data, timestampNew = eliminateData(
+                        filepath_to_timestamps,
+                        filepath_to_data,
+                        timeForLightsTurnOn,
+                        names_for_storenames[i],
+                        sampling_rate,
+                        pair_name,
+                    )
+                    write_hdf5(data, names_for_storenames[i], single_output_filepaths[0], "data")
+                    pair_name_to_tsNew[pair_name] = timestampNew
+                else:
+                    if "control" in names_for_storenames[i].lower() or "signal" in names_for_storenames[i].lower():
+                        continue
+                    ts = eliminateTs(
+                        single_output_filepaths, timeForLightsTurnOn, names_for_storenames[i], sampling_rate, pair_name
+                    )
+                    write_hdf5(ts, names_for_storenames[i] + "_" + pair_name, single_output_filepaths[0], "ts")
         for pair_name, tsNew in pair_name_to_tsNew.items():
             write_hdf5(tsNew, "timeCorrection_" + pair_name, single_output_filepaths[0], "timestampNew")

From d3a8fbc5c302867296f8f4b2a4bb97428e56781d Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Fri, 19 Dec 2025 17:41:22 -0800
Subject: [PATCH 123/150] Refactored read operations out of eliminateTs.

---
 src/guppy/analysis/combine_data.py | 28 ++++++++++++++++++++++------
 1 file changed, 22 insertions(+), 6 deletions(-)

diff --git a/src/guppy/analysis/combine_data.py b/src/guppy/analysis/combine_data.py
index e2fb719..6c00be6 100644
--- a/src/guppy/analysis/combine_data.py
+++ b/src/guppy/analysis/combine_data.py
@@ -43,11 +43,11 @@ def eliminateTs(filepath, timeForLightsTurnOn, event, sampling_rate, naming):
     ts_arr = np.array([])
     tsNew_arr = np.array([])
     for i in range(len(filepath)):
-        tsNew = read_hdf5("timeCorrection_" + naming, filepath[i], "timestampNew")
-        if os.path.exists(os.path.join(filepath[i], event + "_" + naming + ".hdf5")):
-            ts = read_hdf5(event + "_" + naming, filepath[i], "ts").reshape(-1)
-        else:
-            ts = np.array([])
+        # tsNew = read_hdf5("timeCorrection_" + naming, filepath[i], "timestampNew")
+        # if os.path.exists(os.path.join(filepath[i], event + "_" + naming + ".hdf5")):
+        #     ts = read_hdf5(event + "_" + naming, filepath[i], "ts").reshape(-1)
+        # else:
+        #     ts = np.array([])
 
         # logger.info("total time : ", tsNew[-1])
         if len(tsNew_arr) == 0:
@@ -114,8 +114,24 @@ def combine_data(filepath: list[list[str]], timeForLightsTurnOn, names_for_store
                 else:
                     if "control" in names_for_storenames[i].lower() or "signal" in names_for_storenames[i].lower():
                         continue
+                    filepath_to_timestamps = {}
+                    filepath_to_ttl_timestamps = {}
+                    for filepath in single_output_filepaths:
+                        tsNew = read_hdf5("timeCorrection_" + pair_name, filepath, "timestampNew")
+                        if os.path.exists(os.path.join(filepath, names_for_storenames[i] + "_" + pair_name + ".hdf5")):
+                            ts = read_hdf5(names_for_storenames[i] + "_" + pair_name, filepath, "ts").reshape(-1)
+                        else:
+                            ts = np.array([])
+                        filepath_to_timestamps[filepath] = tsNew
+                        filepath_to_ttl_timestamps[filepath] = ts
+
                     ts = eliminateTs(
-                        single_output_filepaths, timeForLightsTurnOn, names_for_storenames[i], sampling_rate, pair_name
+                        filepath_to_timestamps,
+                        filepath_to_ttl_timestamps,
+                        timeForLightsTurnOn,
+                        names_for_storenames[i],
+                        sampling_rate,
+                        pair_name,
                     )
                     write_hdf5(ts, names_for_storenames[i] + "_" + pair_name, single_output_filepaths[0], "ts")
         for pair_name, tsNew in pair_name_to_tsNew.items():

From c481d953aafac5919e1afb676a019a35aa338f89 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Fri, 19 Dec 2025 17:43:50 -0800
Subject: [PATCH 124/150] Refactored read operations out of eliminateTs.

---
 src/guppy/analysis/combine_data.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/guppy/analysis/combine_data.py b/src/guppy/analysis/combine_data.py
index 6c00be6..4cddb32 100644
--- a/src/guppy/analysis/combine_data.py
+++ b/src/guppy/analysis/combine_data.py
@@ -38,11 +38,14 @@ def eliminateData(filepath_to_timestamps, filepath_to_data, timeForLightsTurnOn,
     return arr, ts_arr
 
 
-def eliminateTs(filepath, timeForLightsTurnOn, event, sampling_rate, naming):
+def eliminateTs(filepath_to_timestamps, filepath_to_ttl_timestamps, timeForLightsTurnOn, event, sampling_rate, naming):
 
     ts_arr = np.array([])
     tsNew_arr = np.array([])
-    for i in range(len(filepath)):
+    filepaths = list(filepath_to_timestamps.keys())
+    for filepath in filepaths:
+        ts = filepath_to_timestamps[filepath]
+        tsNew = filepath_to_ttl_timestamps[filepath]
         # tsNew = read_hdf5("timeCorrection_" + naming, filepath[i], "timestampNew")
         # if os.path.exists(os.path.join(filepath[i], event + "_" + naming + ".hdf5")):
         #     ts = read_hdf5(event + "_" + naming, filepath[i], "ts").reshape(-1)

From ebe24b64799cc603ea719d8fcbc970edd43950ec Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Fri, 19 Dec 2025 17:47:11 -0800
Subject: [PATCH 125/150] Refactored read operations out of eliminateTs.

---
 src/guppy/analysis/combine_data.py | 17 ++---------------
 1 file changed, 2 insertions(+), 15 deletions(-)

diff --git a/src/guppy/analysis/combine_data.py b/src/guppy/analysis/combine_data.py
index 4cddb32..6ccddc0 100644
--- a/src/guppy/analysis/combine_data.py
+++ b/src/guppy/analysis/combine_data.py
@@ -12,7 +12,7 @@
 logger = logging.getLogger(__name__)
 
 
-def eliminateData(filepath_to_timestamps, filepath_to_data, timeForLightsTurnOn, event, sampling_rate, naming):
+def eliminateData(filepath_to_timestamps, filepath_to_data, timeForLightsTurnOn, sampling_rate):
 
     arr = np.array([])
     ts_arr = np.array([])
@@ -20,8 +20,6 @@ def eliminateData(filepath_to_timestamps, filepath_to_data, timeForLightsTurnOn,
     for filepath in filepaths:
         ts = filepath_to_timestamps[filepath]
         data = filepath_to_data[filepath]
-        # ts = read_hdf5("timeCorrection_" + naming, filepaths[i], "timestampNew")
-        # data = read_hdf5(event, filepaths[i], "data").reshape(-1)
 
         if len(arr) == 0:
             arr = np.concatenate((arr, data))
@@ -38,7 +36,7 @@ def eliminateData(filepath_to_timestamps, filepath_to_data, timeForLightsTurnOn,
     return arr, ts_arr
 
 
-def eliminateTs(filepath_to_timestamps, filepath_to_ttl_timestamps, timeForLightsTurnOn, event, sampling_rate, naming):
+def eliminateTs(filepath_to_timestamps, filepath_to_ttl_timestamps, timeForLightsTurnOn, sampling_rate):
 
     ts_arr = np.array([])
     tsNew_arr = np.array([])
@@ -46,13 +44,6 @@ def eliminateTs(filepath_to_timestamps, filepath_to_ttl_timestamps, timeForLight
     for filepath in filepaths:
         ts = filepath_to_timestamps[filepath]
         tsNew = filepath_to_ttl_timestamps[filepath]
-        # tsNew = read_hdf5("timeCorrection_" + naming, filepath[i], "timestampNew")
-        # if os.path.exists(os.path.join(filepath[i], event + "_" + naming + ".hdf5")):
-        #     ts = read_hdf5(event + "_" + naming, filepath[i], "ts").reshape(-1)
-        # else:
-        #     ts = np.array([])
-
-        # logger.info("total time : ", tsNew[-1])
         if len(tsNew_arr) == 0:
             sub = tsNew[0] - timeForLightsTurnOn
             tsNew_arr = np.concatenate((tsNew_arr, tsNew - sub))
@@ -108,9 +99,7 @@ def combine_data(filepath: list[list[str]], timeForLightsTurnOn, names_for_store
                         filepath_to_timestamps,
                         filepath_to_data,
                         timeForLightsTurnOn,
-                        names_for_storenames[i],
                         sampling_rate,
-                        pair_name,
                     )
                     write_hdf5(data, names_for_storenames[i], single_output_filepaths[0], "data")
                     pair_name_to_tsNew[pair_name] = timestampNew
@@ -132,9 +121,7 @@ def combine_data(filepath: list[list[str]], timeForLightsTurnOn, names_for_store
                         filepath_to_timestamps,
                         filepath_to_ttl_timestamps,
                         timeForLightsTurnOn,
-                        names_for_storenames[i],
                         sampling_rate,
-                        pair_name,
                     )
                     write_hdf5(ts, names_for_storenames[i] + "_" + pair_name, single_output_filepaths[0], "ts")
         for pair_name, tsNew in pair_name_to_tsNew.items():

From ec9623500781ab96a6a1e7b185ef304606c6a605 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Wed, 21 Jan 2026 09:20:04 -0800
Subject: [PATCH 126/150] Pulled loop out of combine_data

---
 src/guppy/analysis/combine_data.py | 121 ++++++++++++++---------------
 src/guppy/preprocess.py            |   3 +-
 2 files changed, 60 insertions(+), 64 deletions(-)

diff --git a/src/guppy/analysis/combine_data.py b/src/guppy/analysis/combine_data.py
index 6ccddc0..277258c 100644
--- a/src/guppy/analysis/combine_data.py
+++ b/src/guppy/analysis/combine_data.py
@@ -61,68 +61,63 @@ def eliminateTs(filepath_to_timestamps, filepath_to_ttl_timestamps, timeForLight
     return ts_arr
 
 
-def combine_data(filepath: list[list[str]], timeForLightsTurnOn, names_for_storenames, sampling_rate):
-    # filepath = [[folder1_output_0, folder2_output_0], [folder1_output_1, folder2_output_1], ...]
-
+def combine_data(filepaths_to_combine: list[str], timeForLightsTurnOn, storesList, sampling_rate):
+    # filepaths_to_combine = [folder1_output_i, folder2_output_i, ...]
     logger.debug("Processing timestamps for combining data...")
 
-    names_for_storenames = names_for_storenames[1, :]
-
-    for single_output_filepaths in filepath:
-        # single_output_filepaths = [folder1_output_i, folder2_output_i, ...]
-
-        path = decide_naming_convention(single_output_filepaths[0])
-
-        pair_name_to_tsNew = {}
-        for j in range(path.shape[1]):
-            name_1 = ((os.path.basename(path[0, j])).split(".")[0]).split("_")[-1]
-            name_2 = ((os.path.basename(path[1, j])).split(".")[0]).split("_")[-1]
-            if name_1 != name_2:
-                logger.error("Error in naming convention of files or Error in storesList file")
-                raise Exception("Error in naming convention of files or Error in storesList file")
-            pair_name = name_1
-
-            for i in range(len(names_for_storenames)):
-                if (
-                    "control_" + pair_name.lower() in names_for_storenames[i].lower()
-                    or "signal_" + pair_name.lower() in names_for_storenames[i].lower()
-                ):
-                    filepath_to_timestamps = {}
-                    filepath_to_data = {}
-                    for filepath in single_output_filepaths:
-                        ts = read_hdf5("timeCorrection_" + pair_name, filepath, "timestampNew")
-                        data = read_hdf5(names_for_storenames[i], filepath, "data").reshape(-1)
-                        filepath_to_timestamps[filepath] = ts
-                        filepath_to_data[filepath] = data
-
-                    data, timestampNew = eliminateData(
-                        filepath_to_timestamps,
-                        filepath_to_data,
-                        timeForLightsTurnOn,
-                        sampling_rate,
-                    )
-                    write_hdf5(data, names_for_storenames[i], single_output_filepaths[0], "data")
-                    pair_name_to_tsNew[pair_name] = timestampNew
-                else:
-                    if "control" in names_for_storenames[i].lower() or "signal" in names_for_storenames[i].lower():
-                        continue
-                    filepath_to_timestamps = {}
-                    filepath_to_ttl_timestamps = {}
-                    for filepath in single_output_filepaths:
-                        tsNew = read_hdf5("timeCorrection_" + pair_name, filepath, "timestampNew")
-                        if os.path.exists(os.path.join(filepath, names_for_storenames[i] + "_" + pair_name + ".hdf5")):
-                            ts = read_hdf5(names_for_storenames[i] + "_" + pair_name, filepath, "ts").reshape(-1)
-                        else:
-                            ts = np.array([])
-                        filepath_to_timestamps[filepath] = tsNew
-                        filepath_to_ttl_timestamps[filepath] = ts
-
-                    ts = eliminateTs(
-                        filepath_to_timestamps,
-                        filepath_to_ttl_timestamps,
-                        timeForLightsTurnOn,
-                        sampling_rate,
-                    )
-                    write_hdf5(ts, names_for_storenames[i] + "_" + pair_name, single_output_filepaths[0], "ts")
-        for pair_name, tsNew in pair_name_to_tsNew.items():
-            write_hdf5(tsNew, "timeCorrection_" + pair_name, single_output_filepaths[0], "timestampNew")
+    names_for_storenames = storesList[1, :]
+    path = decide_naming_convention(filepaths_to_combine[0])
+
+    pair_name_to_tsNew = {}
+    for j in range(path.shape[1]):
+        name_1 = ((os.path.basename(path[0, j])).split(".")[0]).split("_")[-1]
+        name_2 = ((os.path.basename(path[1, j])).split(".")[0]).split("_")[-1]
+        if name_1 != name_2:
+            logger.error("Error in naming convention of files or Error in storesList file")
+            raise Exception("Error in naming convention of files or Error in storesList file")
+        pair_name = name_1
+
+        for i in range(len(names_for_storenames)):
+            if (
+                "control_" + pair_name.lower() in names_for_storenames[i].lower()
+                or "signal_" + pair_name.lower() in names_for_storenames[i].lower()
+            ):
+                filepath_to_timestamps = {}
+                filepath_to_data = {}
+                for filepath in filepaths_to_combine:
+                    ts = read_hdf5("timeCorrection_" + pair_name, filepath, "timestampNew")
+                    data = read_hdf5(names_for_storenames[i], filepath, "data").reshape(-1)
+                    filepath_to_timestamps[filepath] = ts
+                    filepath_to_data[filepath] = data
+
+                data, timestampNew = eliminateData(
+                    filepath_to_timestamps,
+                    filepath_to_data,
+                    timeForLightsTurnOn,
+                    sampling_rate,
+                )
+                write_hdf5(data, names_for_storenames[i], filepaths_to_combine[0], "data")
+                pair_name_to_tsNew[pair_name] = timestampNew
+            else:
+                if "control" in names_for_storenames[i].lower() or "signal" in names_for_storenames[i].lower():
+                    continue
+                filepath_to_timestamps = {}
+                filepath_to_ttl_timestamps = {}
+                for filepath in filepaths_to_combine:
+                    tsNew = read_hdf5("timeCorrection_" + pair_name, filepath, "timestampNew")
+                    if os.path.exists(os.path.join(filepath, names_for_storenames[i] + "_" + pair_name + ".hdf5")):
+                        ts = read_hdf5(names_for_storenames[i] + "_" + pair_name, filepath, "ts").reshape(-1)
+                    else:
+                        ts = np.array([])
+                    filepath_to_timestamps[filepath] = tsNew
+                    filepath_to_ttl_timestamps[filepath] = ts
+
+                ts = eliminateTs(
+                    filepath_to_timestamps,
+                    filepath_to_ttl_timestamps,
+                    timeForLightsTurnOn,
+                    sampling_rate,
+                )
+                write_hdf5(ts, names_for_storenames[i] + "_" + pair_name, filepaths_to_combine[0], "ts")
+    for pair_name, tsNew in pair_name_to_tsNew.items():
+        write_hdf5(tsNew, "timeCorrection_" + pair_name, filepaths_to_combine[0], "timestampNew")
diff --git a/src/guppy/preprocess.py b/src/guppy/preprocess.py
index 0c41ae4..a7b6e27 100755
--- a/src/guppy/preprocess.py
+++ b/src/guppy/preprocess.py
@@ -436,7 +436,8 @@ def execute_combine_data(folderNames, inputParameters, storesList):
     op = get_all_stores_for_combining_data(op_folder)
 
     # processing timestamps for combining the data
-    combine_data(op, timeForLightsTurnOn, storesList, sampling_rate[0])
+    for filepaths_to_combine in op:
+        combine_data(filepaths_to_combine, timeForLightsTurnOn, storesList, sampling_rate[0])
     logger.info("Data is combined from different data files.")
 
     return op

From fd1fd332453b37dab102e6c71b5b263f2fcef464 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Wed, 21 Jan 2026 11:07:48 -0800
Subject: [PATCH 127/150] Pulled read out of combine_data

---
 src/guppy/analysis/combine_data.py | 39 +++++++---------
 src/guppy/analysis/standard_io.py  | 73 ++++++++++++++++++++++++++++++
 src/guppy/preprocess.py            | 18 +++++++-
 3 files changed, 107 insertions(+), 23 deletions(-)

diff --git a/src/guppy/analysis/combine_data.py b/src/guppy/analysis/combine_data.py
index 277258c..8cbeace 100644
--- a/src/guppy/analysis/combine_data.py
+++ b/src/guppy/analysis/combine_data.py
@@ -5,7 +5,6 @@
 
 from .io_utils import (
     decide_naming_convention,
-    read_hdf5,
     write_hdf5,
 )
 
@@ -61,7 +60,15 @@ def eliminateTs(filepath_to_timestamps, filepath_to_ttl_timestamps, timeForLight
     return ts_arr
 
 
-def combine_data(filepaths_to_combine: list[str], timeForLightsTurnOn, storesList, sampling_rate):
+def combine_data(
+    filepaths_to_combine: list[str],
+    pair_name_to_filepath_to_timestamps: dict[str, dict[str, np.ndarray]],
+    display_name_to_filepath_to_data: dict[str, dict[str, np.ndarray]],
+    compound_name_to_filepath_to_ttl_timestamps: dict[str, dict[str, np.ndarray]],
+    timeForLightsTurnOn,
+    storesList,
+    sampling_rate,
+):
     # filepaths_to_combine = [folder1_output_i, folder2_output_i, ...]
     logger.debug("Processing timestamps for combining data...")
 
@@ -82,35 +89,23 @@ def combine_data(filepaths_to_combine: list[str], timeForLightsTurnOn, storesLis
                 "control_" + pair_name.lower() in names_for_storenames[i].lower()
                 or "signal_" + pair_name.lower() in names_for_storenames[i].lower()
             ):
-                filepath_to_timestamps = {}
-                filepath_to_data = {}
-                for filepath in filepaths_to_combine:
-                    ts = read_hdf5("timeCorrection_" + pair_name, filepath, "timestampNew")
-                    data = read_hdf5(names_for_storenames[i], filepath, "data").reshape(-1)
-                    filepath_to_timestamps[filepath] = ts
-                    filepath_to_data[filepath] = data
-
+                display_name = names_for_storenames[i]
+                filepath_to_timestamps = pair_name_to_filepath_to_timestamps[pair_name]
+                filepath_to_data = display_name_to_filepath_to_data[display_name]
                 data, timestampNew = eliminateData(
                     filepath_to_timestamps,
                     filepath_to_data,
                     timeForLightsTurnOn,
                     sampling_rate,
                 )
-                write_hdf5(data, names_for_storenames[i], filepaths_to_combine[0], "data")
+                write_hdf5(data, display_name, filepaths_to_combine[0], "data")
                 pair_name_to_tsNew[pair_name] = timestampNew
             else:
                 if "control" in names_for_storenames[i].lower() or "signal" in names_for_storenames[i].lower():
                     continue
-                filepath_to_timestamps = {}
-                filepath_to_ttl_timestamps = {}
-                for filepath in filepaths_to_combine:
-                    tsNew = read_hdf5("timeCorrection_" + pair_name, filepath, "timestampNew")
-                    if os.path.exists(os.path.join(filepath, names_for_storenames[i] + "_" + pair_name + ".hdf5")):
-                        ts = read_hdf5(names_for_storenames[i] + "_" + pair_name, filepath, "ts").reshape(-1)
-                    else:
-                        ts = np.array([])
-                    filepath_to_timestamps[filepath] = tsNew
-                    filepath_to_ttl_timestamps[filepath] = ts
+                compound_name = names_for_storenames[i] + "_" + pair_name
+                filepath_to_timestamps = pair_name_to_filepath_to_timestamps[pair_name]
+                filepath_to_ttl_timestamps = compound_name_to_filepath_to_ttl_timestamps[compound_name]
 
                 ts = eliminateTs(
                     filepath_to_timestamps,
@@ -118,6 +113,6 @@ def combine_data(filepaths_to_combine: list[str], timeForLightsTurnOn, storesLis
                     timeForLightsTurnOn,
                     sampling_rate,
                 )
-                write_hdf5(ts, names_for_storenames[i] + "_" + pair_name, filepaths_to_combine[0], "ts")
+                write_hdf5(ts, compound_name, filepaths_to_combine[0], "ts")
     for pair_name, tsNew in pair_name_to_tsNew.items():
         write_hdf5(tsNew, "timeCorrection_" + pair_name, filepaths_to_combine[0], "timestampNew")
diff --git a/src/guppy/analysis/standard_io.py b/src/guppy/analysis/standard_io.py
index e7fe8e0..02bbe99 100644
--- a/src/guppy/analysis/standard_io.py
+++ b/src/guppy/analysis/standard_io.py
@@ -208,3 +208,76 @@ def write_artifact_removal(
     write_corrected_ttl_timestamps(filepath, compound_name_to_corrected_ttl_timestamps)
     if pair_name_to_corrected_timestamps is not None:
         write_artifact_corrected_timestamps(filepath, pair_name_to_corrected_timestamps)
+
+
+def read_timestamps_for_combining_data(filepaths_to_combine):
+    path = decide_naming_convention(filepaths_to_combine[0])
+    pair_name_to_filepath_to_timestamps: dict[str, dict[str, np.ndarray]] = {}
+    for j in range(path.shape[1]):
+        name_1 = ((os.path.basename(path[0, j])).split(".")[0]).split("_")[-1]
+        name_2 = ((os.path.basename(path[1, j])).split(".")[0]).split("_")[-1]
+        if name_1 != name_2:
+            logger.error("Error in naming convention of files or Error in storesList file")
+            raise Exception("Error in naming convention of files or Error in storesList file")
+        pair_name = name_1
+        pair_name_to_filepath_to_timestamps[pair_name] = {}
+        for filepath in filepaths_to_combine:
+            tsNew = read_hdf5("timeCorrection_" + pair_name, filepath, "timestampNew")
+            pair_name_to_filepath_to_timestamps[pair_name][filepath] = tsNew
+
+    return pair_name_to_filepath_to_timestamps
+
+
+def read_data_for_combining_data(filepaths_to_combine, storesList):
+    names_for_storenames = storesList[1, :]
+    path = decide_naming_convention(filepaths_to_combine[0])
+    display_name_to_filepath_to_data: dict[str, dict[str, np.ndarray]] = {}
+    for j in range(path.shape[1]):
+        name_1 = ((os.path.basename(path[0, j])).split(".")[0]).split("_")[-1]
+        name_2 = ((os.path.basename(path[1, j])).split(".")[0]).split("_")[-1]
+        if name_1 != name_2:
+            logger.error("Error in naming convention of files or Error in storesList file")
+            raise Exception("Error in naming convention of files or Error in storesList file")
+        pair_name = name_1
+        for i in range(len(names_for_storenames)):
+            if not (
+                "control_" + pair_name.lower() in names_for_storenames[i].lower()
+                or "signal_" + pair_name.lower() in names_for_storenames[i].lower()
+            ):
+                continue
+            display_name = names_for_storenames[i]
+            display_name_to_filepath_to_data[display_name] = {}
+            for filepath in filepaths_to_combine:
+                data = read_hdf5(display_name, filepath, "data").reshape(-1)
+                display_name_to_filepath_to_data[display_name][filepath] = data
+
+    return display_name_to_filepath_to_data
+
+
+def read_ttl_timestamps_for_combining_data(filepaths_to_combine, storesList):
+    names_for_storenames = storesList[1, :]
+    path = decide_naming_convention(filepaths_to_combine[0])
+    compound_name_to_filepath_to_ttl_timestamps: dict[str, dict[str, np.ndarray]] = {}
+    for j in range(path.shape[1]):
+        name_1 = ((os.path.basename(path[0, j])).split(".")[0]).split("_")[-1]
+        name_2 = ((os.path.basename(path[1, j])).split(".")[0]).split("_")[-1]
+        if name_1 != name_2:
+            logger.error("Error in naming convention of files or Error in storesList file")
+            raise Exception("Error in naming convention of files or Error in storesList file")
+        pair_name = name_1
+        for i in range(len(names_for_storenames)):
+            if (
+                "control_" + pair_name.lower() in names_for_storenames[i].lower()
+                or "signal_" + pair_name.lower() in names_for_storenames[i].lower()
+            ):
+                continue
+            compound_name = names_for_storenames[i] + "_" + pair_name
+            compound_name_to_filepath_to_ttl_timestamps[compound_name] = {}
+            for filepath in filepaths_to_combine:
+                if os.path.exists(os.path.join(filepath, names_for_storenames[i] + "_" + pair_name + ".hdf5")):
+                    ts = read_hdf5(names_for_storenames[i] + "_" + pair_name, filepath, "ts").reshape(-1)
+                else:
+                    ts = np.array([])
+                compound_name_to_filepath_to_ttl_timestamps[compound_name][filepath] = ts
+
+    return compound_name_to_filepath_to_ttl_timestamps
diff --git a/src/guppy/preprocess.py b/src/guppy/preprocess.py
index a7b6e27..17a0cbc 100755
--- a/src/guppy/preprocess.py
+++ b/src/guppy/preprocess.py
@@ -26,7 +26,10 @@
     read_corrected_data_dict,
     read_corrected_timestamps_pairwise,
     read_corrected_ttl_timestamps,
+    read_data_for_combining_data,
+    read_timestamps_for_combining_data,
     read_ttl,
+    read_ttl_timestamps_for_combining_data,
     write_artifact_removal,
     write_corrected_data,
     write_corrected_timestamps,
@@ -437,7 +440,20 @@ def execute_combine_data(folderNames, inputParameters, storesList):
 
     # processing timestamps for combining the data
     for filepaths_to_combine in op:
-        combine_data(filepaths_to_combine, timeForLightsTurnOn, storesList, sampling_rate[0])
+        pair_name_to_filepath_to_timestamps = read_timestamps_for_combining_data(filepaths_to_combine)
+        display_name_to_filepath_to_data = read_data_for_combining_data(filepaths_to_combine, storesList)
+        compound_name_to_filepath_to_ttl_timestamps = read_ttl_timestamps_for_combining_data(
+            filepaths_to_combine, storesList
+        )
+        combine_data(
+            filepaths_to_combine,
+            pair_name_to_filepath_to_timestamps,
+            display_name_to_filepath_to_data,
+            compound_name_to_filepath_to_ttl_timestamps,
+            timeForLightsTurnOn,
+            storesList,
+            sampling_rate[0],
+        )
     logger.info("Data is combined from different data files.")
 
     return op

From a8fd7387139bd439c29c7b09a5ed27f3f971e297 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Wed, 21 Jan 2026 11:19:05 -0800
Subject: [PATCH 128/150] Pulled write out of combine_data

---
 src/guppy/analysis/combine_data.py | 11 ++++++-----
 src/guppy/analysis/standard_io.py  |  9 +++++++++
 src/guppy/preprocess.py            |  5 ++++-
 3 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/src/guppy/analysis/combine_data.py b/src/guppy/analysis/combine_data.py
index 8cbeace..1eac5b6 100644
--- a/src/guppy/analysis/combine_data.py
+++ b/src/guppy/analysis/combine_data.py
@@ -5,7 +5,6 @@
 
 from .io_utils import (
     decide_naming_convention,
-    write_hdf5,
 )
 
 logger = logging.getLogger(__name__)
@@ -76,6 +75,8 @@ def combine_data(
     path = decide_naming_convention(filepaths_to_combine[0])
 
     pair_name_to_tsNew = {}
+    display_name_to_data = {}
+    compound_name_to_ttl_timestamps = {}
     for j in range(path.shape[1]):
         name_1 = ((os.path.basename(path[0, j])).split(".")[0]).split("_")[-1]
         name_2 = ((os.path.basename(path[1, j])).split(".")[0]).split("_")[-1]
@@ -98,8 +99,8 @@ def combine_data(
                     timeForLightsTurnOn,
                     sampling_rate,
                 )
-                write_hdf5(data, display_name, filepaths_to_combine[0], "data")
                 pair_name_to_tsNew[pair_name] = timestampNew
+                display_name_to_data[display_name] = data
             else:
                 if "control" in names_for_storenames[i].lower() or "signal" in names_for_storenames[i].lower():
                     continue
@@ -113,6 +114,6 @@ def combine_data(
                     timeForLightsTurnOn,
                     sampling_rate,
                 )
-                write_hdf5(ts, compound_name, filepaths_to_combine[0], "ts")
-    for pair_name, tsNew in pair_name_to_tsNew.items():
-        write_hdf5(tsNew, "timeCorrection_" + pair_name, filepaths_to_combine[0], "timestampNew")
+                compound_name_to_ttl_timestamps[compound_name] = ts
+
+    return pair_name_to_tsNew, display_name_to_data, compound_name_to_ttl_timestamps
diff --git a/src/guppy/analysis/standard_io.py b/src/guppy/analysis/standard_io.py
index 02bbe99..2baefca 100644
--- a/src/guppy/analysis/standard_io.py
+++ b/src/guppy/analysis/standard_io.py
@@ -281,3 +281,12 @@ def read_ttl_timestamps_for_combining_data(filepaths_to_combine, storesList):
                 compound_name_to_filepath_to_ttl_timestamps[compound_name][filepath] = ts
 
     return compound_name_to_filepath_to_ttl_timestamps
+
+
+def write_combined_data(output_filepath, pair_name_to_tsNew, display_name_to_data, compound_name_to_ttl_timestamps):
+    for pair_name, tsNew in pair_name_to_tsNew.items():
+        write_hdf5(tsNew, "timeCorrection_" + pair_name, output_filepath, "timestampNew")
+    for display_name, data in display_name_to_data.items():
+        write_hdf5(data, display_name, output_filepath, "data")
+    for compound_name, ts in compound_name_to_ttl_timestamps.items():
+        write_hdf5(ts, compound_name, output_filepath, "ts")
diff --git a/src/guppy/preprocess.py b/src/guppy/preprocess.py
index 17a0cbc..e4812a2 100755
--- a/src/guppy/preprocess.py
+++ b/src/guppy/preprocess.py
@@ -31,6 +31,7 @@
     read_ttl,
     read_ttl_timestamps_for_combining_data,
     write_artifact_removal,
+    write_combined_data,
     write_corrected_data,
     write_corrected_timestamps,
     write_corrected_ttl_timestamps,
@@ -445,7 +446,7 @@ def execute_combine_data(folderNames, inputParameters, storesList):
         compound_name_to_filepath_to_ttl_timestamps = read_ttl_timestamps_for_combining_data(
             filepaths_to_combine, storesList
         )
-        combine_data(
+        pair_name_to_tsNew, display_name_to_data, compound_name_to_ttl_timestamps = combine_data(
             filepaths_to_combine,
             pair_name_to_filepath_to_timestamps,
             display_name_to_filepath_to_data,
@@ -454,6 +455,8 @@ def execute_combine_data(folderNames, inputParameters, storesList):
             storesList,
             sampling_rate[0],
         )
+        output_filepath = filepaths_to_combine[0]
+        write_combined_data(output_filepath, pair_name_to_tsNew, display_name_to_data, compound_name_to_ttl_timestamps)
     logger.info("Data is combined from different data files.")
 
     return op

From dccd54a706a2dbe78167ec7abff7caadf1df66a3 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Wed, 21 Jan 2026 12:20:45 -0800
Subject: [PATCH 129/150] Added test for combined data.

---
 src/guppy/testing/api.py   |   6 ++
 tests/test_combine_data.py | 138 +++++++++++++++++++++++++++++++++++++
 2 files changed, 144 insertions(+)
 create mode 100644 tests/test_combine_data.py

diff --git a/src/guppy/testing/api.py b/src/guppy/testing/api.py
index c647907..98939cf 100644
--- a/src/guppy/testing/api.py
+++ b/src/guppy/testing/api.py
@@ -268,6 +268,7 @@ def step4(
     npm_timestamp_column_names: list[str | None] | None = None,
     npm_time_units: list[str] | None = None,
     npm_split_events: list[bool] | None = None,
+    combine_data: bool = False,
 ) -> None:
     """
     Run pipeline Step 4 (Extract timestamps and signal) via the Panel-backed logic, headlessly.
@@ -293,6 +294,8 @@ def step4(
         List of time units for NPM files, one per CSV file (e.g., 'seconds', 'milliseconds'). None if not applicable.
     npm_split_events : list[bool] | None
         List of booleans indicating whether to split events for NPM files, one per CSV file. None if not applicable.
+    combine_data : bool
+        Whether to enable data combining logic in Step 4.
 
     Raises
     ------
@@ -345,6 +348,9 @@ def step4(
     # Inject modality
     input_params["modality"] = modality
 
+    # Inject combine_data
+    input_params["combine_data"] = combine_data
+
     # Call the underlying Step 4 worker directly (no subprocess)
     extractTsAndSignal(input_params)
 
diff --git a/tests/test_combine_data.py b/tests/test_combine_data.py
new file mode 100644
index 0000000..f7c0261
--- /dev/null
+++ b/tests/test_combine_data.py
@@ -0,0 +1,138 @@
+import glob
+import os
+import shutil
+from pathlib import Path
+
+import h5py
+import pytest
+
+from guppy.testing.api import step2, step3, step4, step5
+
+
+@pytest.mark.filterwarnings("ignore::UserWarning")
+def test_combine_data(tmp_path, monkeypatch):
+    session_subdirs = [
+        "SampleData_Clean/Photo_63_207-181030-103332",
+        "SampleData_with_artifacts/Photo_048_392-200728-121222",
+    ]
+    storenames_map = {
+        "Dv1A": "control_dms",
+        "Dv2A": "signal_dms",
+        "PrtN": "port_entries_dms",
+    }
+    expected_region = "dms"
+    expected_ttl = "port_entries_dms"
+    modality = "tdt"
+
+    npm_timestamp_column_names = None
+    npm_time_units = None
+    npm_split_events = [True, True]
+
+    # Use the CSV sample session
+    src_base_dir = str(Path(".") / "testing_data")
+    src_sessions = [os.path.join(src_base_dir, session_subdir) for session_subdir in session_subdirs]
+    for src_session in src_sessions:
+        if not os.path.isdir(src_session):
+            pytest.skip(f"Sample data not available at expected path: {src_session}")
+
+    # Stub matplotlib.pyplot.show to avoid GUI blocking
+    import matplotlib.pyplot as plt  # noqa: F401
+
+    monkeypatch.setattr("matplotlib.pyplot.show", lambda *args, **kwargs: None)
+
+    # Stage a clean copy of the session into a temporary workspace
+    tmp_base = tmp_path / "data_root"
+    tmp_base.mkdir(parents=True, exist_ok=True)
+    session_copies = []
+    for src_session in src_sessions:
+        dest_name = os.path.basename(src_session)
+        session_copy = tmp_base / dest_name
+        shutil.copytree(src_session, session_copy)
+        session_copies.append(session_copy)
+
+    for session_copy in session_copies:
+        # Remove any copied artifacts in the temp session (match only this session's output dirs)
+        for d in glob.glob(os.path.join(session_copy, f"{dest_name}_output_*")):
+            assert os.path.isdir(d), f"Expected output directory for cleanup, got non-directory: {d}"
+            shutil.rmtree(d)
+        params_fp = session_copy / "GuPPyParamtersUsed.json"
+        if params_fp.exists():
+            params_fp.unlink()
+
+    selected_folders = [str(session_copy) for session_copy in session_copies]
+    base_dir = str(tmp_base)
+
+    # Step 2: create storesList.csv in the temp copy
+    step2(
+        base_dir=base_dir,
+        selected_folders=selected_folders,
+        storenames_map=storenames_map,
+        modality=modality,
+        npm_timestamp_column_names=npm_timestamp_column_names,
+        npm_time_units=npm_time_units,
+        npm_split_events=npm_split_events,
+    )
+
+    # Step 3: read raw data in the temp copy
+    step3(
+        base_dir=base_dir,
+        selected_folders=selected_folders,
+        modality=modality,
+        npm_timestamp_column_names=npm_timestamp_column_names,
+        npm_time_units=npm_time_units,
+        npm_split_events=npm_split_events,
+    )
+
+    # Step 4: extract timestamps and signal in the temp copy
+    step4(
+        base_dir=base_dir,
+        selected_folders=selected_folders,
+        modality=modality,
+        npm_timestamp_column_names=npm_timestamp_column_names,
+        npm_time_units=npm_time_units,
+        npm_split_events=npm_split_events,
+        combine_data=True,
+    )
+
+    # Step 5: compute PSTH in the temp copy (headless)
+    step5(
+        base_dir=str(tmp_base),
+        selected_folders=[str(session_copy)],
+        modality=modality,
+        npm_timestamp_column_names=npm_timestamp_column_names,
+        npm_time_units=npm_time_units,
+        npm_split_events=npm_split_events,
+    )
+
+    # Validate outputs exist in the temp copy
+    session_copy = selected_folders[0]  # Outputs are written to the first session folder
+    basename = os.path.basename(session_copy)
+    output_dirs = sorted(glob.glob(os.path.join(session_copy, f"{basename}_output_*")))
+    assert output_dirs, f"No output directories found in {session_copy}"
+    out_dir = None
+    for d in output_dirs:
+        if os.path.exists(os.path.join(d, "storesList.csv")):
+            out_dir = d
+            break
+    assert out_dir is not None, f"No storesList.csv found in any output directory under {session_copy}"
+    stores_fp = os.path.join(out_dir, "storesList.csv")
+    assert os.path.exists(stores_fp), "Missing storesList.csv after Step 2/3/4"
+
+    # Ensure timeCorrection_<region>.hdf5 exists with 'timestampNew'
+    timecorr = os.path.join(out_dir, f"timeCorrection_{expected_region}.hdf5")
+    assert os.path.exists(timecorr), f"Missing {timecorr}"
+    with h5py.File(timecorr, "r") as f:
+        assert "timestampNew" in f, f"Expected 'timestampNew' dataset in {timecorr}"
+
+    # If TTLs exist, check their per-region 'ts' outputs
+    if expected_ttl is None:
+        expected_ttls = []
+    elif isinstance(expected_ttl, str):
+        expected_ttls = [expected_ttl]
+    else:
+        expected_ttls = expected_ttl
+    for expected_ttl in expected_ttls:
+        ttl_fp = os.path.join(out_dir, f"{expected_ttl}_{expected_region}.hdf5")
+        assert os.path.exists(ttl_fp), f"Missing TTL-aligned file {ttl_fp}"
+        with h5py.File(ttl_fp, "r") as f:
+            assert "ts" in f, f"Expected 'ts' dataset in {ttl_fp}"

From ba6ced17f9d23042eb43acc3f4a2a8b0595f84fe Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Wed, 21 Jan 2026 14:04:30 -0800
Subject: [PATCH 130/150] Reorganized imports for computePsth.py

---
 src/guppy/computePsth.py | 49 +---------------------------------------
 1 file changed, 1 insertion(+), 48 deletions(-)

diff --git a/src/guppy/computePsth.py b/src/guppy/computePsth.py
index 671d1d3..3153f12 100755
--- a/src/guppy/computePsth.py
+++ b/src/guppy/computePsth.py
@@ -12,13 +12,12 @@
 from collections import OrderedDict
 from itertools import repeat
 
-import h5py
 import numpy as np
 import pandas as pd
 from scipy import signal as ss
 
+from .analysis.io_utils import get_all_stores_for_combining_data, read_hdf5, write_hdf5
 from .computeCorr import computeCrossCorrelation, getCorrCombinations, make_dir
-from .preprocess import get_all_stores_for_combining_data
 
 logger = logging.getLogger(__name__)
 
@@ -36,52 +35,6 @@ def writeToFile(value: str):
         file.write(value)
 
 
-# function to read hdf5 file
-def read_hdf5(event, filepath, key):
-    if event:
-        event = event.replace("\\", "_")
-        event = event.replace("/", "_")
-        op = os.path.join(filepath, event + ".hdf5")
-    else:
-        op = filepath
-
-    if os.path.exists(op):
-        with h5py.File(op, "r") as f:
-            arr = np.asarray(f[key])
-    else:
-        raise Exception("{}.hdf5 file does not exist".format(event))
-
-    return arr
-
-
-# function to write hdf5 file
-def write_hdf5(data, event, filepath, key):
-    event = event.replace("\\", "_")
-    event = event.replace("/", "_")
-    op = os.path.join(filepath, event + ".hdf5")
-
-    # if file does not exist create a new file
-    if not os.path.exists(op):
-        with h5py.File(op, "w") as f:
-            if type(data) is np.ndarray:
-                f.create_dataset(key, data=data, maxshape=(None,), chunks=True)
-            else:
-                f.create_dataset(key, data=data)
-    # if file already exists, append data to it or add a new key to it
-    else:
-        with h5py.File(op, "r+") as f:
-            if key in list(f.keys()):
-                if type(data) is np.ndarray:
-                    f[key].resize(data.shape)
-                    arr = f[key]
-                    arr[:] = data
-                else:
-                    arr = f[key]
-                    arr = data
-            else:
-                f.create_dataset(key, data=data, maxshape=(None,), chunks=True)
-
-
 def create_Df_area_peak(filepath, arr, name, index=[]):
 
     op = os.path.join(filepath, "peak_AUC_" + name + ".h5")

From 62d751c7329ca7d35cfc39a0452dbc4168d694fd Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Wed, 21 Jan 2026 14:22:55 -0800
Subject: [PATCH 131/150] Refactored psthForEachStorename into 3 execute fns

---
 src/guppy/computePsth.py | 205 ++++++++++++++++++++-------------------
 1 file changed, 106 insertions(+), 99 deletions(-)

diff --git a/src/guppy/computePsth.py b/src/guppy/computePsth.py
index 3153f12..d6636a9 100755
--- a/src/guppy/computePsth.py
+++ b/src/guppy/computePsth.py
@@ -668,8 +668,6 @@ def psthForEachStorename(inputParameters):
 
     # storesList = np.genfromtxt(inputParameters['storesListPath'], dtype='str', delimiter=',')
 
-    folderNames = inputParameters["folderNames"]
-    folderNamesForAvg = inputParameters["folderNamesForAvg"]
     average = inputParameters["averageForGroup"]
     combine_data = inputParameters["combine_data"]
     numProcesses = inputParameters["numberOfCores"]
@@ -687,112 +685,121 @@ def psthForEachStorename(inputParameters):
 
     # for average following if statement will be executed
     if average == True:
-        if len(folderNamesForAvg) > 0:
-            storesListPath = []
-            for i in range(len(folderNamesForAvg)):
-                filepath = folderNamesForAvg[i]
-                storesListPath.append(takeOnlyDirs(glob.glob(os.path.join(filepath, "*_output_*"))))
-            storesListPath = np.concatenate(storesListPath)
-            storesList = np.asarray([[], []])
-            for i in range(storesListPath.shape[0]):
-                storesList = np.concatenate(
-                    (
-                        storesList,
-                        np.genfromtxt(
-                            os.path.join(storesListPath[i], "storesList.csv"), dtype="str", delimiter=","
-                        ).reshape(2, -1),
-                    ),
-                    axis=1,
-                )
-            storesList = np.unique(storesList, axis=1)
-            op = makeAverageDir(inputParameters["abspath"])
-            np.savetxt(os.path.join(op, "storesList.csv"), storesList, delimiter=",", fmt="%s")
-            pbMaxValue = 0
-            for j in range(storesList.shape[1]):
-                if "control" in storesList[1, j].lower() or "signal" in storesList[1, j].lower():
-                    continue
-                else:
-                    pbMaxValue += 1
-            writeToFile(str((1 + pbMaxValue + 1) * 10) + "\n" + str(10) + "\n")
-            for k in range(storesList.shape[1]):
-                if "control" in storesList[1, k].lower() or "signal" in storesList[1, k].lower():
-                    continue
-                else:
-                    averageForGroup(storesListPath, storesList[1, k], inputParameters)
-                writeToFile(str(10 + ((inputParameters["step"] + 1) * 10)) + "\n")
-                inputParameters["step"] += 1
-
-        else:
-            logger.error("Not a single folder name is provided in folderNamesForAvg in inputParamters File.")
-            raise Exception("Not a single folder name is provided in folderNamesForAvg in inputParamters File.")
+        execute_average_for_group(inputParameters)
 
     # for individual analysis following else statement will be executed
     else:
         if combine_data == True:
-            storesListPath = []
-            for i in range(len(folderNames)):
-                storesListPath.append(takeOnlyDirs(glob.glob(os.path.join(folderNames[i], "*_output_*"))))
-            storesListPath = list(np.concatenate(storesListPath).flatten())
-            op = get_all_stores_for_combining_data(storesListPath)
-            writeToFile(str((len(op) + len(op) + 1) * 10) + "\n" + str(10) + "\n")
-            for i in range(len(op)):
-                storesList = np.asarray([[], []])
-                for j in range(len(op[i])):
-                    storesList = np.concatenate(
-                        (
-                            storesList,
-                            np.genfromtxt(os.path.join(op[i][j], "storesList.csv"), dtype="str", delimiter=",").reshape(
-                                2, -1
-                            ),
-                        ),
-                        axis=1,
-                    )
-                storesList = np.unique(storesList, axis=1)
-                for k in range(storesList.shape[1]):
-                    storenamePsth(op[i][0], storesList[1, k], inputParameters)
-                    findPSTHPeakAndArea(op[i][0], storesList[1, k], inputParameters)
-                    computeCrossCorrelation(op[i][0], storesList[1, k], inputParameters)
-                writeToFile(str(10 + ((inputParameters["step"] + 1) * 10)) + "\n")
-                inputParameters["step"] += 1
+            execute_psth_combined(inputParameters)
         else:
-            storesListPath = []
-            for i in range(len(folderNames)):
-                storesListPath.append(takeOnlyDirs(glob.glob(os.path.join(folderNames[i], "*_output_*"))))
-            storesListPath = np.concatenate(storesListPath)
-            writeToFile(str((storesListPath.shape[0] + storesListPath.shape[0] + 1) * 10) + "\n" + str(10) + "\n")
-            for i in range(len(folderNames)):
-                logger.debug(f"Computing PSTH, Peak and Area for each event in {folderNames[i]}")
-                storesListPath = takeOnlyDirs(glob.glob(os.path.join(folderNames[i], "*_output_*")))
-                for j in range(len(storesListPath)):
-                    filepath = storesListPath[j]
-                    storesList = np.genfromtxt(
-                        os.path.join(filepath, "storesList.csv"), dtype="str", delimiter=","
-                    ).reshape(2, -1)
-
-                    with mp.Pool(numProcesses) as p:
-                        p.starmap(storenamePsth, zip(repeat(filepath), storesList[1, :], repeat(inputParameters)))
-
-                    with mp.Pool(numProcesses) as pq:
-                        pq.starmap(
-                            findPSTHPeakAndArea, zip(repeat(filepath), storesList[1, :], repeat(inputParameters))
-                        )
-
-                    with mp.Pool(numProcesses) as cr:
-                        cr.starmap(
-                            computeCrossCorrelation, zip(repeat(filepath), storesList[1, :], repeat(inputParameters))
-                        )
-
-                    # for k in range(storesList.shape[1]):
-                    # 	storenamePsth(filepath, storesList[1,k], inputParameters)
-                    # 	findPSTHPeakAndArea(filepath, storesList[1,k], inputParameters)
-
-                    writeToFile(str(10 + ((inputParameters["step"] + 1) * 10)) + "\n")
-                    inputParameters["step"] += 1
-                logger.info(f"PSTH, Area and Peak are computed for all events in {folderNames[i]}.")
+            execute_psth(inputParameters)
     logger.info("PSTH, Area and Peak are computed for all events.")
     return inputParameters
 
 
+def execute_psth(inputParameters):
+    folderNames = inputParameters["folderNames"]
+    numProcesses = inputParameters["numberOfCores"]
+    storesListPath = []
+    for i in range(len(folderNames)):
+        storesListPath.append(takeOnlyDirs(glob.glob(os.path.join(folderNames[i], "*_output_*"))))
+    storesListPath = np.concatenate(storesListPath)
+    writeToFile(str((storesListPath.shape[0] + storesListPath.shape[0] + 1) * 10) + "\n" + str(10) + "\n")
+    for i in range(len(folderNames)):
+        logger.debug(f"Computing PSTH, Peak and Area for each event in {folderNames[i]}")
+        storesListPath = takeOnlyDirs(glob.glob(os.path.join(folderNames[i], "*_output_*")))
+        for j in range(len(storesListPath)):
+            filepath = storesListPath[j]
+            storesList = np.genfromtxt(os.path.join(filepath, "storesList.csv"), dtype="str", delimiter=",").reshape(
+                2, -1
+            )
+
+            with mp.Pool(numProcesses) as p:
+                p.starmap(storenamePsth, zip(repeat(filepath), storesList[1, :], repeat(inputParameters)))
+
+            with mp.Pool(numProcesses) as pq:
+                pq.starmap(findPSTHPeakAndArea, zip(repeat(filepath), storesList[1, :], repeat(inputParameters)))
+
+            with mp.Pool(numProcesses) as cr:
+                cr.starmap(computeCrossCorrelation, zip(repeat(filepath), storesList[1, :], repeat(inputParameters)))
+
+                # for k in range(storesList.shape[1]):
+                # 	storenamePsth(filepath, storesList[1,k], inputParameters)
+                # 	findPSTHPeakAndArea(filepath, storesList[1,k], inputParameters)
+
+            writeToFile(str(10 + ((inputParameters["step"] + 1) * 10)) + "\n")
+            inputParameters["step"] += 1
+        logger.info(f"PSTH, Area and Peak are computed for all events in {folderNames[i]}.")
+
+
+def execute_psth_combined(inputParameters):
+    folderNames = inputParameters["folderNames"]
+    storesListPath = []
+    for i in range(len(folderNames)):
+        storesListPath.append(takeOnlyDirs(glob.glob(os.path.join(folderNames[i], "*_output_*"))))
+    storesListPath = list(np.concatenate(storesListPath).flatten())
+    op = get_all_stores_for_combining_data(storesListPath)
+    writeToFile(str((len(op) + len(op) + 1) * 10) + "\n" + str(10) + "\n")
+    for i in range(len(op)):
+        storesList = np.asarray([[], []])
+        for j in range(len(op[i])):
+            storesList = np.concatenate(
+                (
+                    storesList,
+                    np.genfromtxt(os.path.join(op[i][j], "storesList.csv"), dtype="str", delimiter=",").reshape(2, -1),
+                ),
+                axis=1,
+            )
+        storesList = np.unique(storesList, axis=1)
+        for k in range(storesList.shape[1]):
+            storenamePsth(op[i][0], storesList[1, k], inputParameters)
+            findPSTHPeakAndArea(op[i][0], storesList[1, k], inputParameters)
+            computeCrossCorrelation(op[i][0], storesList[1, k], inputParameters)
+        writeToFile(str(10 + ((inputParameters["step"] + 1) * 10)) + "\n")
+        inputParameters["step"] += 1
+
+
+def execute_average_for_group(inputParameters):
+    folderNamesForAvg = inputParameters["folderNamesForAvg"]
+    if len(folderNamesForAvg) == 0:
+        logger.error("Not a single folder name is provided in folderNamesForAvg in inputParamters File.")
+        raise Exception("Not a single folder name is provided in folderNamesForAvg in inputParamters File.")
+
+    storesListPath = []
+    for i in range(len(folderNamesForAvg)):
+        filepath = folderNamesForAvg[i]
+        storesListPath.append(takeOnlyDirs(glob.glob(os.path.join(filepath, "*_output_*"))))
+    storesListPath = np.concatenate(storesListPath)
+    storesList = np.asarray([[], []])
+    for i in range(storesListPath.shape[0]):
+        storesList = np.concatenate(
+            (
+                storesList,
+                np.genfromtxt(os.path.join(storesListPath[i], "storesList.csv"), dtype="str", delimiter=",").reshape(
+                    2, -1
+                ),
+            ),
+            axis=1,
+        )
+    storesList = np.unique(storesList, axis=1)
+    op = makeAverageDir(inputParameters["abspath"])
+    np.savetxt(os.path.join(op, "storesList.csv"), storesList, delimiter=",", fmt="%s")
+    pbMaxValue = 0
+    for j in range(storesList.shape[1]):
+        if "control" in storesList[1, j].lower() or "signal" in storesList[1, j].lower():
+            continue
+        else:
+            pbMaxValue += 1
+    writeToFile(str((1 + pbMaxValue + 1) * 10) + "\n" + str(10) + "\n")
+    for k in range(storesList.shape[1]):
+        if "control" in storesList[1, k].lower() or "signal" in storesList[1, k].lower():
+            continue
+        else:
+            averageForGroup(storesListPath, storesList[1, k], inputParameters)
+        writeToFile(str(10 + ((inputParameters["step"] + 1) * 10)) + "\n")
+        inputParameters["step"] += 1
+
+
 def main(input_parameters):
     try:
         inputParameters = psthForEachStorename(input_parameters)

From c835aa8b1d7572f140b7d5205eed7a5d66812bb8 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Wed, 21 Jan 2026 15:49:25 -0800
Subject: [PATCH 132/150] Reorganzied execute_psth fns into separate modules

---
 .../cross_correlation.py}                     | 162 +++----
 src/guppy/analysis/io_utils.py                |  21 +
 src/guppy/analysis/psth_peak_and_area.py      | 119 +++++
 src/guppy/analysis/storename_psth.py          | 323 +++++++++++++
 src/guppy/combineDataFn.py                    | 341 --------------
 src/guppy/computePsth.py                      | 442 +-----------------
 6 files changed, 540 insertions(+), 868 deletions(-)
 rename src/guppy/{computeCorr.py => analysis/cross_correlation.py} (86%)
 create mode 100644 src/guppy/analysis/psth_peak_and_area.py
 create mode 100644 src/guppy/analysis/storename_psth.py
 delete mode 100755 src/guppy/combineDataFn.py

diff --git a/src/guppy/computeCorr.py b/src/guppy/analysis/cross_correlation.py
similarity index 86%
rename from src/guppy/computeCorr.py
rename to src/guppy/analysis/cross_correlation.py
index 9070b43..43d0a10 100644
--- a/src/guppy/computeCorr.py
+++ b/src/guppy/analysis/cross_correlation.py
@@ -4,47 +4,85 @@
 import os
 import re
 
-import h5py
 import numpy as np
 import pandas as pd
 from scipy import signal
 
-logger = logging.getLogger(__name__)
+from .io_utils import make_dir_for_cross_correlation, read_Df, read_hdf5
 
+logger = logging.getLogger(__name__)
 
-def make_dir(filepath):
-    op = os.path.join(filepath, "cross_correlation_output")
-    if not os.path.exists(op):
-        os.mkdir(op)
-    return op
 
+def computeCrossCorrelation(filepath, event, inputParameters):
+    isCompute = inputParameters["computeCorr"]
+    removeArtifacts = inputParameters["removeArtifacts"]
+    artifactsRemovalMethod = inputParameters["artifactsRemovalMethod"]
+    if isCompute == True:
+        if removeArtifacts == True and artifactsRemovalMethod == "concatenate":
+            raise Exception(
+                "For cross-correlation, when removeArtifacts is True, artifacts removal method\
+                            should be replace with NaNs and not concatenate"
+            )
+        corr_info, type = getCorrCombinations(filepath, inputParameters)
+        if "control" in event.lower() or "signal" in event.lower():
+            return
+        else:
+            for i in range(1, len(corr_info)):
+                logger.debug(f"Computing cross-correlation for event {event}...")
+                for j in range(len(type)):
+                    psth_a = read_Df(filepath, event + "_" + corr_info[i - 1], type[j] + "_" + corr_info[i - 1])
+                    psth_b = read_Df(filepath, event + "_" + corr_info[i], type[j] + "_" + corr_info[i])
+                    sample_rate = 1 / (psth_a["timestamps"][1] - psth_a["timestamps"][0])
+                    psth_a = psth_a.drop(columns=["timestamps", "err", "mean"])
+                    psth_b = psth_b.drop(columns=["timestamps", "err", "mean"])
+                    cols_a, cols_b = np.array(psth_a.columns), np.array(psth_b.columns)
+                    if np.intersect1d(cols_a, cols_b).size > 0:
+                        cols = list(np.intersect1d(cols_a, cols_b))
+                    else:
+                        cols = list(cols_a)
+                    arr_A, arr_B = np.array(psth_a).T, np.array(psth_b).T
+                    cross_corr = helperCrossCorrelation(arr_A, arr_B, sample_rate)
+                    cols.append("timestamps")
+                    create_Df(
+                        make_dir_for_cross_correlation(filepath),
+                        "corr_" + event,
+                        type[j] + "_" + corr_info[i - 1] + "_" + corr_info[i],
+                        cross_corr,
+                        cols,
+                    )
+                logger.info(f"Cross-correlation for event {event} computed.")
 
-# function to read hdf5 file
-def read_hdf5(event, filepath, key):
-    if event:
-        op = os.path.join(filepath, event + ".hdf5")
-    else:
-        op = filepath
 
-    if os.path.exists(op):
-        with h5py.File(op, "r") as f:
-            arr = np.asarray(f[key])
+def getCorrCombinations(filepath, inputParameters):
+    selectForComputePsth = inputParameters["selectForComputePsth"]
+    if selectForComputePsth == "z_score":
+        path = glob.glob(os.path.join(filepath, "z_score_*"))
+    elif selectForComputePsth == "dff":
+        path = glob.glob(os.path.join(filepath, "dff_*"))
     else:
-        logger.error(f"{event}.hdf5 file does not exist")
-        raise Exception("{}.hdf5 file does not exist".format(event))
+        path = glob.glob(os.path.join(filepath, "z_score_*")) + glob.glob(os.path.join(filepath, "dff_*"))
 
-    return arr
+    names = list()
+    type = list()
+    for i in range(len(path)):
+        basename = (os.path.basename(path[i])).split(".")[0]
+        names.append(basename.split("_")[-1])
+        type.append((os.path.basename(path[i])).split(".")[0].split("_" + names[-1], 1)[0])
 
+    names = list(np.unique(np.array(names)))
+    type = list(np.unique(np.array(type)))
 
-# function to read h5 file and make a dataframe from it
-def read_Df(filepath, event, name):
-    if name:
-        op = os.path.join(filepath, event + "_{}.h5".format(name))
+    corr_info = list()
+    if len(names) <= 1:
+        logger.info("Cross-correlation cannot be computed because only one signal is present.")
+        return corr_info, type
+    elif len(names) == 2:
+        corr_info = names
     else:
-        op = os.path.join(filepath, event + ".h5")
-    df = pd.read_hdf(op, key="df", mode="r")
+        corr_info = names
+        corr_info.append(names[0])
 
-    return df
+    return corr_info, type
 
 
 # same function used to store PSTH in computePsth file
@@ -91,38 +129,6 @@ def create_Df(filepath, event, name, psth, columns=[]):
     df.to_hdf(op, key="df", mode="w")
 
 
-def getCorrCombinations(filepath, inputParameters):
-    selectForComputePsth = inputParameters["selectForComputePsth"]
-    if selectForComputePsth == "z_score":
-        path = glob.glob(os.path.join(filepath, "z_score_*"))
-    elif selectForComputePsth == "dff":
-        path = glob.glob(os.path.join(filepath, "dff_*"))
-    else:
-        path = glob.glob(os.path.join(filepath, "z_score_*")) + glob.glob(os.path.join(filepath, "dff_*"))
-
-    names = list()
-    type = list()
-    for i in range(len(path)):
-        basename = (os.path.basename(path[i])).split(".")[0]
-        names.append(basename.split("_")[-1])
-        type.append((os.path.basename(path[i])).split(".")[0].split("_" + names[-1], 1)[0])
-
-    names = list(np.unique(np.array(names)))
-    type = list(np.unique(np.array(type)))
-
-    corr_info = list()
-    if len(names) <= 1:
-        logger.info("Cross-correlation cannot be computed because only one signal is present.")
-        return corr_info, type
-    elif len(names) == 2:
-        corr_info = names
-    else:
-        corr_info = names
-        corr_info.append(names[0])
-
-    return corr_info, type
-
-
 def helperCrossCorrelation(arr_A, arr_B, sample_rate):
     cross_corr = list()
     for a, b in zip(arr_A, arr_B):
@@ -139,43 +145,3 @@ def helperCrossCorrelation(arr_A, arr_B, sample_rate):
     lag_msec = lag_msec.reshape(1, -1)
     cross_corr_arr = np.concatenate((cross_corr_arr, lag_msec), axis=0)
     return cross_corr_arr
-
-
-def computeCrossCorrelation(filepath, event, inputParameters):
-    isCompute = inputParameters["computeCorr"]
-    removeArtifacts = inputParameters["removeArtifacts"]
-    artifactsRemovalMethod = inputParameters["artifactsRemovalMethod"]
-    if isCompute == True:
-        if removeArtifacts == True and artifactsRemovalMethod == "concatenate":
-            raise Exception(
-                "For cross-correlation, when removeArtifacts is True, artifacts removal method\
-                            should be replace with NaNs and not concatenate"
-            )
-        corr_info, type = getCorrCombinations(filepath, inputParameters)
-        if "control" in event.lower() or "signal" in event.lower():
-            return
-        else:
-            for i in range(1, len(corr_info)):
-                logger.debug(f"Computing cross-correlation for event {event}...")
-                for j in range(len(type)):
-                    psth_a = read_Df(filepath, event + "_" + corr_info[i - 1], type[j] + "_" + corr_info[i - 1])
-                    psth_b = read_Df(filepath, event + "_" + corr_info[i], type[j] + "_" + corr_info[i])
-                    sample_rate = 1 / (psth_a["timestamps"][1] - psth_a["timestamps"][0])
-                    psth_a = psth_a.drop(columns=["timestamps", "err", "mean"])
-                    psth_b = psth_b.drop(columns=["timestamps", "err", "mean"])
-                    cols_a, cols_b = np.array(psth_a.columns), np.array(psth_b.columns)
-                    if np.intersect1d(cols_a, cols_b).size > 0:
-                        cols = list(np.intersect1d(cols_a, cols_b))
-                    else:
-                        cols = list(cols_a)
-                    arr_A, arr_B = np.array(psth_a).T, np.array(psth_b).T
-                    cross_corr = helperCrossCorrelation(arr_A, arr_B, sample_rate)
-                    cols.append("timestamps")
-                    create_Df(
-                        make_dir(filepath),
-                        "corr_" + event,
-                        type[j] + "_" + corr_info[i - 1] + "_" + corr_info[i],
-                        cross_corr,
-                        cols,
-                    )
-                logger.info(f"Cross-correlation for event {event} computed.")
diff --git a/src/guppy/analysis/io_utils.py b/src/guppy/analysis/io_utils.py
index b467c37..c1dd39f 100644
--- a/src/guppy/analysis/io_utils.py
+++ b/src/guppy/analysis/io_utils.py
@@ -6,6 +6,7 @@
 
 import h5py
 import numpy as np
+import pandas as pd
 
 logger = logging.getLogger(__name__)
 
@@ -194,3 +195,23 @@ def get_control_and_signal_channel_names(storesList):
         raise Exception("Error in saving stores list file or spelling mistake for control or signal")
 
     return channels_arr
+
+
+# function to read h5 file and make a dataframe from it
+def read_Df(filepath, event, name):
+    event = event.replace("\\", "_")
+    event = event.replace("/", "_")
+    if name:
+        op = os.path.join(filepath, event + "_{}.h5".format(name))
+    else:
+        op = os.path.join(filepath, event + ".h5")
+    df = pd.read_hdf(op, key="df", mode="r")
+
+    return df
+
+
+def make_dir_for_cross_correlation(filepath):
+    op = os.path.join(filepath, "cross_correlation_output")
+    if not os.path.exists(op):
+        os.mkdir(op)
+    return op
diff --git a/src/guppy/analysis/psth_peak_and_area.py b/src/guppy/analysis/psth_peak_and_area.py
new file mode 100644
index 0000000..849bd29
--- /dev/null
+++ b/src/guppy/analysis/psth_peak_and_area.py
@@ -0,0 +1,119 @@
+import glob
+import logging
+import os
+import re
+from collections import OrderedDict
+
+import numpy as np
+import pandas as pd
+
+from .io_utils import read_Df, read_hdf5
+
+logger = logging.getLogger(__name__)
+
+
+# function to compute PSTH peak and area using the function helperPSTHPeakAndArea save the values to h5 and csv files.
+def findPSTHPeakAndArea(filepath, event, inputParameters):
+
+    event = event.replace("\\", "_")
+    event = event.replace("/", "_")
+
+    # sampling_rate = read_hdf5(storesList[0,0], filepath, 'sampling_rate')
+    peak_startPoint = inputParameters["peak_startPoint"]
+    peak_endPoint = inputParameters["peak_endPoint"]
+    selectForComputePsth = inputParameters["selectForComputePsth"]
+
+    if selectForComputePsth == "z_score":
+        path = glob.glob(os.path.join(filepath, "z_score_*"))
+    elif selectForComputePsth == "dff":
+        path = glob.glob(os.path.join(filepath, "dff_*"))
+    else:
+        path = glob.glob(os.path.join(filepath, "z_score_*")) + glob.glob(os.path.join(filepath, "dff_*"))
+
+    if "control" in event.lower() or "signal" in event.lower():
+        return 0
+    else:
+        for i in range(len(path)):
+            logger.info(f"Computing peak and area for PSTH mean signal for event {event}...")
+            basename = (os.path.basename(path[i])).split(".")[0]
+            name_1 = basename.split("_")[-1]
+            sampling_rate = read_hdf5("timeCorrection_" + name_1, filepath, "sampling_rate")[0]
+            psth = read_Df(filepath, event + "_" + name_1, basename)
+            cols = list(psth.columns)
+            regex = re.compile("bin_[(]")
+            bin_names = [cols[i] for i in range(len(cols)) if regex.match(cols[i])]
+            regex_trials = re.compile("[+-]?([0-9]*[.])?[0-9]+")
+            trials_names = [cols[i] for i in range(len(cols)) if regex_trials.match(cols[i])]
+            psth_mean_bin_names = trials_names + bin_names + ["mean"]
+            psth_mean_bin_mean = np.asarray(psth[psth_mean_bin_names])
+            timestamps = np.asarray(psth["timestamps"]).ravel()  # np.asarray(read_Df(filepath, 'ts_psth', '')).ravel()
+            peak_area = helperPSTHPeakAndArea(
+                psth_mean_bin_mean, timestamps, sampling_rate, peak_startPoint, peak_endPoint
+            )  # peak, area =
+            # arr = np.array([[peak, area]])
+            fileName = [os.path.basename(os.path.dirname(filepath))]
+            index = [fileName[0] + "_" + s for s in psth_mean_bin_names]
+            create_Df_area_peak(
+                filepath, peak_area, event + "_" + name_1 + "_" + basename, index=index
+            )  # columns=['peak', 'area']
+            create_csv_area_peak(filepath, peak_area, event + "_" + name_1 + "_" + basename, index=index)
+            logger.info(f"Peak and Area for PSTH mean signal for event {event} computed.")
+
+
+def helperPSTHPeakAndArea(psth_mean, timestamps, sampling_rate, peak_startPoint, peak_endPoint):
+
+    peak_startPoint = np.asarray(peak_startPoint)
+    peak_endPoint = np.asarray(peak_endPoint)
+
+    peak_startPoint = peak_startPoint[~np.isnan(peak_startPoint)]
+    peak_endPoint = peak_endPoint[~np.isnan(peak_endPoint)]
+
+    if peak_startPoint.shape[0] != peak_endPoint.shape[0]:
+        logger.error("Number of Peak Start Time and Peak End Time are unequal.")
+        raise Exception("Number of Peak Start Time and Peak End Time are unequal.")
+
+    if np.less_equal(peak_endPoint, peak_startPoint).any() == True:
+        logger.error(
+            "Peak End Time is lesser than or equal to Peak Start Time. Please check the Peak parameters window."
+        )
+        raise Exception(
+            "Peak End Time is lesser than or equal to Peak Start Time. Please check the Peak parameters window."
+        )
+
+    peak_area = OrderedDict()
+
+    if peak_startPoint.shape[0] == 0 or peak_endPoint.shape[0] == 0:
+        peak_area["peak"] = np.nan
+        peak_area["area"] = np.nan
+
+    for i in range(peak_startPoint.shape[0]):
+        startPtForPeak = np.where(timestamps >= peak_startPoint[i])[0]
+        endPtForPeak = np.where(timestamps >= peak_endPoint[i])[0]
+        if len(startPtForPeak) >= 1 and len(endPtForPeak) >= 1:
+            peakPoint_pos = startPtForPeak[0] + np.argmax(psth_mean[startPtForPeak[0] : endPtForPeak[0], :], axis=0)
+            peakPoint_neg = startPtForPeak[0] + np.argmin(psth_mean[startPtForPeak[0] : endPtForPeak[0], :], axis=0)
+            peak_area["peak_pos_" + str(i + 1)] = np.amax(psth_mean[peakPoint_pos], axis=0)
+            peak_area["peak_neg_" + str(i + 1)] = np.amin(psth_mean[peakPoint_neg], axis=0)
+            peak_area["area_" + str(i + 1)] = np.trapz(psth_mean[startPtForPeak[0] : endPtForPeak[0], :], axis=0)
+        else:
+            peak_area["peak_" + str(i + 1)] = np.nan
+            peak_area["area_" + str(i + 1)] = np.nan
+
+    return peak_area
+
+
+def create_Df_area_peak(filepath, arr, name, index=[]):
+
+    op = os.path.join(filepath, "peak_AUC_" + name + ".h5")
+    dirname = os.path.dirname(filepath)
+
+    df = pd.DataFrame(arr, index=index)
+
+    df.to_hdf(op, key="df", mode="w")
+
+
+def create_csv_area_peak(filepath, arr, name, index=[]):
+    op = os.path.join(filepath, "peak_AUC_" + name + ".csv")
+    df = pd.DataFrame(arr, index=index)
+
+    df.to_csv(op)
diff --git a/src/guppy/analysis/storename_psth.py b/src/guppy/analysis/storename_psth.py
new file mode 100644
index 0000000..db99057
--- /dev/null
+++ b/src/guppy/analysis/storename_psth.py
@@ -0,0 +1,323 @@
+import glob
+import logging
+import math
+import os
+import re
+
+import numpy as np
+import pandas as pd
+from scipy import signal as ss
+
+from .io_utils import read_hdf5, write_hdf5
+
+logger = logging.getLogger(__name__)
+
+
+# function to create PSTH for each event using function helper_psth and save the PSTH to h5 file
+def storenamePsth(filepath, event, inputParameters):
+
+    event = event.replace("\\", "_")
+    event = event.replace("/", "_")
+
+    selectForComputePsth = inputParameters["selectForComputePsth"]
+    bin_psth_trials = inputParameters["bin_psth_trials"]
+    use_time_or_trials = inputParameters["use_time_or_trials"]
+
+    if selectForComputePsth == "z_score":
+        path = glob.glob(os.path.join(filepath, "z_score_*"))
+    elif selectForComputePsth == "dff":
+        path = glob.glob(os.path.join(filepath, "dff_*"))
+    else:
+        path = glob.glob(os.path.join(filepath, "z_score_*")) + glob.glob(os.path.join(filepath, "dff_*"))
+
+    b = np.divide(np.ones((100,)), 100)
+    a = 1
+
+    # storesList = storesList
+    # sampling_rate = read_hdf5(storesList[0,0], filepath, 'sampling_rate')
+    nSecPrev, nSecPost = inputParameters["nSecPrev"], inputParameters["nSecPost"]
+    baselineStart, baselineEnd = inputParameters["baselineCorrectionStart"], inputParameters["baselineCorrectionEnd"]
+    timeInterval = inputParameters["timeInterval"]
+
+    if "control" in event.lower() or "signal" in event.lower():
+        return 0
+    else:
+        for i in range(len(path)):
+            logger.info(f"Computing PSTH for event {event}...")
+            basename = (os.path.basename(path[i])).split(".")[0]
+            name_1 = basename.split("_")[-1]
+            control = read_hdf5("control_" + name_1, os.path.dirname(path[i]), "data")
+            if (control == 0).all() == True:
+                signal = read_hdf5("signal_" + name_1, os.path.dirname(path[i]), "data")
+                z_score = ss.filtfilt(b, a, signal)
+                just_use_signal = True
+            else:
+                z_score = read_hdf5("", path[i], "data")
+                just_use_signal = False
+            psth, psth_baselineUncorrected, cols = helper_psth(
+                z_score,
+                event,
+                filepath,
+                nSecPrev,
+                nSecPost,
+                timeInterval,
+                bin_psth_trials,
+                use_time_or_trials,
+                baselineStart,
+                baselineEnd,
+                name_1,
+                just_use_signal,
+            )
+
+            create_Df(
+                filepath,
+                event + "_" + name_1 + "_baselineUncorrected",
+                basename,
+                psth_baselineUncorrected,
+                columns=cols,
+            )  # extra
+            create_Df(filepath, event + "_" + name_1, basename, psth, columns=cols)
+            logger.info(f"PSTH for event {event} computed.")
+
+
+# *********************************** Functions used by storenamePsth *********************************** #
+
+
+# helper function to make PSTH for each event
+def helper_psth(
+    z_score,
+    event,
+    filepath,
+    nSecPrev,
+    nSecPost,
+    timeInterval,
+    bin_psth_trials,
+    use_time_or_trials,
+    baselineStart,
+    baselineEnd,
+    naming,
+    just_use_signal,
+):
+
+    event = event.replace("\\", "_")
+    event = event.replace("/", "_")
+
+    sampling_rate = read_hdf5("timeCorrection_" + naming, filepath, "sampling_rate")[0]
+
+    # calculate time before event timestamp and time after event timestamp
+    nTsPrev = int(round(nSecPrev * sampling_rate))
+    nTsPost = int(round(nSecPost * sampling_rate))
+
+    totalTs = (-1 * nTsPrev) + nTsPost
+    increment = ((-1 * nSecPrev) + nSecPost) / totalTs
+    timeAxis = np.linspace(nSecPrev, nSecPost + increment, totalTs + 1)
+    timeAxisNew = np.concatenate((timeAxis, timeAxis[::-1]))
+
+    # avoid writing same data to same file in multi-processing
+    # if not os.path.exists(os.path.join(filepath, 'ts_psth.h5')):
+    # 	logger.info('file not exists')
+    # 	create_Df(filepath, 'ts_psth', '', timeAxis)
+    # 	time.sleep(2)
+
+    ts = read_hdf5(event + "_" + naming, filepath, "ts")
+
+    # reject timestamps for which baseline cannot be calculated because of nan values
+    new_ts = []
+    for i in range(ts.shape[0]):
+        thisTime = ts[i]  # -1 not needed anymore
+        if thisTime < abs(baselineStart):
+            continue
+        else:
+            new_ts.append(ts[i])
+
+    # reject burst of timestamps
+    ts = np.asarray(new_ts)
+    # skip the event if there are no TTLs
+    if len(ts) == 0:
+        new_ts = np.array([])
+        logger.info(f"Warning : No TTLs present for {event}. This will cause an error in Visualization step")
+    else:
+        new_ts = [ts[0]]
+        for i in range(1, ts.shape[0]):
+            thisTime = ts[i]
+            prevTime = new_ts[-1]
+            diff = thisTime - prevTime
+            if diff < timeInterval:
+                continue
+            else:
+                new_ts.append(ts[i])
+
+    # final timestamps
+    ts = np.asarray(new_ts)
+    nTs = ts.shape[0]
+
+    # initialize PSTH vector
+    psth = np.full((nTs, totalTs + 1), np.nan)
+    psth_baselineUncorrected = np.full((nTs, totalTs + 1), np.nan)  # extra
+
+    # for each timestamp, create trial which will be saved in a PSTH vector
+    for i in range(nTs):
+        thisTime = ts[i]  # -timeForLightsTurnOn
+        thisIndex = int(round(thisTime * sampling_rate))
+        arr = rowFormation(z_score, thisIndex, -1 * nTsPrev, nTsPost)
+        if just_use_signal == True:
+            res = np.subtract(arr, np.nanmean(arr))
+            z_score_arr = np.divide(res, np.nanstd(arr))
+            arr = z_score_arr
+        else:
+            arr = arr
+
+        psth_baselineUncorrected[i, :] = arr  # extra
+        psth[i, :] = baselineCorrection(filepath, arr, timeAxis, baselineStart, baselineEnd)
+
+    write_hdf5(ts, event + "_" + naming, filepath, "ts")
+    columns = list(ts)
+
+    if use_time_or_trials == "Time (min)" and bin_psth_trials > 0:
+        timestamps = read_hdf5("timeCorrection_" + naming, filepath, "timestampNew")
+        timestamps = np.divide(timestamps, 60)
+        ts_min = np.divide(ts, 60)
+        bin_steps = np.arange(timestamps[0], timestamps[-1] + bin_psth_trials, bin_psth_trials)
+        indices_each_step = dict()
+        for i in range(1, bin_steps.shape[0]):
+            indices_each_step[f"{np.around(bin_steps[i-1],0)}-{np.around(bin_steps[i],0)}"] = np.where(
+                (ts_min >= bin_steps[i - 1]) & (ts_min <= bin_steps[i])
+            )[0]
+    elif use_time_or_trials == "# of trials" and bin_psth_trials > 0:
+        bin_steps = np.arange(0, ts.shape[0], bin_psth_trials)
+        if bin_steps[-1] < ts.shape[0]:
+            bin_steps = np.concatenate((bin_steps, [ts.shape[0]]), axis=0)
+        indices_each_step = dict()
+        for i in range(1, bin_steps.shape[0]):
+            indices_each_step[f"{bin_steps[i-1]}-{bin_steps[i]}"] = np.arange(bin_steps[i - 1], bin_steps[i])
+    else:
+        indices_each_step = dict()
+
+    psth_bin, psth_bin_baselineUncorrected = [], []
+    if indices_each_step:
+        keys = list(indices_each_step.keys())
+        for k in keys:
+            # no trials in a given bin window, just put all the nan values
+            if indices_each_step[k].shape[0] == 0:
+                psth_bin.append(np.full(psth.shape[1], np.nan))
+                psth_bin_baselineUncorrected.append(np.full(psth_baselineUncorrected.shape[1], np.nan))
+                psth_bin.append(np.full(psth.shape[1], np.nan))
+                psth_bin_baselineUncorrected.append(np.full(psth_baselineUncorrected.shape[1], np.nan))
+            else:
+                index = indices_each_step[k]
+                arr = psth[index, :]
+                #  mean of bins
+                psth_bin.append(np.nanmean(psth[index, :], axis=0))
+                psth_bin_baselineUncorrected.append(np.nanmean(psth_baselineUncorrected[index, :], axis=0))
+                psth_bin.append(np.nanstd(psth[index, :], axis=0) / math.sqrt(psth[index, :].shape[0]))
+                # error of bins
+                psth_bin_baselineUncorrected.append(
+                    np.nanstd(psth_baselineUncorrected[index, :], axis=0)
+                    / math.sqrt(psth_baselineUncorrected[index, :].shape[0])
+                )
+
+            # adding column names
+            columns.append(f"bin_({k})")
+            columns.append(f"bin_err_({k})")
+
+        psth = np.concatenate((psth, psth_bin), axis=0)
+        psth_baselineUncorrected = np.concatenate((psth_baselineUncorrected, psth_bin_baselineUncorrected), axis=0)
+
+    timeAxis = timeAxis.reshape(1, -1)
+    psth = np.concatenate((psth, timeAxis), axis=0)
+    psth_baselineUncorrected = np.concatenate((psth_baselineUncorrected, timeAxis), axis=0)
+    columns.append("timestamps")
+
+    return psth, psth_baselineUncorrected, columns
+
+
+# function to create dataframe for each event PSTH and save it to h5 file
+def create_Df(filepath, event, name, psth, columns=[]):
+    event = event.replace("\\", "_")
+    event = event.replace("/", "_")
+    if name:
+        op = os.path.join(filepath, event + "_{}.h5".format(name))
+    else:
+        op = os.path.join(filepath, event + ".h5")
+
+    # check if file already exists
+    # if os.path.exists(op):
+    # 	return 0
+
+    # removing psth binned trials
+    columns = np.array(columns, dtype="str")
+    regex = re.compile("bin_*")
+    single_trials = columns[[i for i in range(len(columns)) if not regex.match(columns[i])]]
+    single_trials_index = [i for i in range(len(single_trials)) if single_trials[i] != "timestamps"]
+
+    psth = psth.T
+    if psth.ndim > 1:
+        mean = np.nanmean(psth[:, single_trials_index], axis=1).reshape(-1, 1)
+        err = np.nanstd(psth[:, single_trials_index], axis=1) / math.sqrt(psth[:, single_trials_index].shape[1])
+        err = err.reshape(-1, 1)
+        psth = np.hstack((psth, mean))
+        psth = np.hstack((psth, err))
+        # timestamps = np.asarray(read_Df(filepath, 'ts_psth', ''))
+        # psth = np.hstack((psth, timestamps))
+    try:
+        ts = read_hdf5(event, filepath, "ts")
+        ts = np.append(ts, ["mean", "err"])
+    except:
+        ts = None
+
+    if len(columns) == 0:
+        df = pd.DataFrame(psth, index=None, columns=ts, dtype="float32")
+    else:
+        columns = np.asarray(columns)
+        columns = np.append(columns, ["mean", "err"])
+        df = pd.DataFrame(psth, index=None, columns=list(columns), dtype="float32")
+
+    df.to_hdf(op, key="df", mode="w")
+
+
+# ***************************** Functions used by helper_psth ***************************** #
+
+
+# function to create PSTH trials corresponding to each event timestamp
+def rowFormation(z_score, thisIndex, nTsPrev, nTsPost):
+
+    if nTsPrev < thisIndex and z_score.shape[0] > (thisIndex + nTsPost):
+        res = z_score[thisIndex - nTsPrev - 1 : thisIndex + nTsPost]
+    elif nTsPrev >= thisIndex and z_score.shape[0] > (thisIndex + nTsPost):
+        mismatch = nTsPrev - thisIndex + 1
+        res = np.zeros(nTsPrev + nTsPost + 1)
+        res[:mismatch] = np.nan
+        res[mismatch:] = z_score[: thisIndex + nTsPost]
+    elif nTsPrev >= thisIndex and z_score.shape[0] < (thisIndex + nTsPost):
+        mismatch1 = nTsPrev - thisIndex + 1
+        mismatch2 = (thisIndex + nTsPost) - z_score.shape[0]
+        res1 = np.full(mismatch1, np.nan)
+        res2 = z_score
+        res3 = np.full(mismatch2, np.nan)
+        res = np.concatenate((res1, np.concatenate((res2, res3))))
+    else:
+        mismatch = (thisIndex + nTsPost) - z_score.shape[0]
+        res1 = np.zeros(mismatch)
+        res1[:] = np.nan
+        res2 = z_score[thisIndex - nTsPrev - 1 : z_score.shape[0]]
+        res = np.concatenate((res2, res1))
+
+    return res
+
+
+# function to calculate baseline for each PSTH trial and do baseline correction
+def baselineCorrection(filepath, arr, timeAxis, baselineStart, baselineEnd):
+
+    # timeAxis = read_Df(filepath, 'ts_psth', '')
+    # timeAxis = np.asarray(timeAxis).reshape(-1)
+    baselineStrtPt = np.where(timeAxis >= baselineStart)[0]
+    baselineEndPt = np.where(timeAxis >= baselineEnd)[0]
+
+    # logger.info(baselineStrtPt[0], baselineEndPt[0])
+    if baselineStart == 0 and baselineEnd == 0:
+        return arr
+
+    baseline = np.nanmean(arr[baselineStrtPt[0] : baselineEndPt[0]])
+    baselineSub = np.subtract(arr, baseline)
+
+    return baselineSub
diff --git a/src/guppy/combineDataFn.py b/src/guppy/combineDataFn.py
deleted file mode 100755
index 51e2bd0..0000000
--- a/src/guppy/combineDataFn.py
+++ /dev/null
@@ -1,341 +0,0 @@
-import fnmatch
-import logging
-import os
-import re
-
-logger = logging.getLogger(__name__)
-
-
-def find_files(path, glob_path, ignore_case=False):
-    rule = (
-        re.compile(fnmatch.translate(glob_path), re.IGNORECASE)
-        if ignore_case
-        else re.compile(fnmatch.translate(glob_path))
-    )
-    no_bytes_path = os.listdir(os.path.expanduser(path))
-    str_path = []
-
-    # converting byte object to string
-    for x in no_bytes_path:
-        try:
-            str_path.append(x.decode("utf-8"))
-        except:
-            str_path.append(x)
-
-    return [os.path.join(path, n) for n in str_path if rule.match(n)]
-
-
-def read_hdf5(event, filepath, key):
-    if event:
-        op = os.path.join(filepath, event + ".hdf5")
-    else:
-        op = filepath
-
-    if os.path.exists(op):
-        with h5py.File(op, "r") as f:
-            arr = np.asarray(f[key])
-    else:
-        raise Exception("{}.hdf5 file does not exist".format(event))
-
-    return arr
-
-
-def write_hdf5(data, event, filepath, key):
-    op = os.path.join(filepath, event + ".hdf5")
-
-    if not os.path.exists(op):
-        with h5py.File(op, "w") as f:
-            if type(data) is np.ndarray:
-                f.create_dataset(key, data=data, maxshape=(None,), chunks=True)
-            else:
-                f.create_dataset(key, data=data)
-    else:
-        with h5py.File(op, "r+") as f:
-            if key in list(f.keys()):
-                if type(data) is np.ndarray:
-                    f[key].resize(data.shape)
-                    arr = f[key]
-                    arr[:] = data
-                else:
-                    arr = f[key]
-                    arr = data
-            else:
-                f.create_dataset(key, data=data, maxshape=(None,), chunks=True)
-
-
-def decide_naming_convention(filepath):
-    path_1 = find_files(filepath, "control*", ignore_case=True)  # glob.glob(os.path.join(filepath, 'control*'))
-
-    path_2 = find_files(filepath, "signal*", ignore_case=True)  # glob.glob(os.path.join(filepath, 'signal*'))
-
-    path = sorted(path_1 + path_2, key=str.casefold)
-
-    if len(path) % 2 != 0:
-        raise Exception("There are not equal number of Control and Signal data")
-
-    path = np.asarray(path).reshape(2, -1)
-
-    return path
-
-
-def eliminateData(filepath, timeForLightsTurnOn, event, sampling_rate, naming):
-
-    arr = np.array([])
-    ts_arr = np.array([])
-    for i in range(len(filepath)):
-        ts = read_hdf5("timeCorrection_" + naming, filepath[i], "timestampNew")
-        data = read_hdf5(event, filepath[i], "data").reshape(-1)
-
-        # index = np.where((ts>coords[i,0]) & (ts<coords[i,1]))[0]
-
-        if len(arr) == 0:
-            arr = np.concatenate((arr, data))
-            sub = ts[0] - timeForLightsTurnOn
-            new_ts = ts - sub
-            ts_arr = np.concatenate((ts_arr, new_ts))
-        else:
-            temp = data
-            temp_ts = ts
-            new_ts = temp_ts - (temp_ts[0] - ts_arr[-1])
-            arr = np.concatenate((arr, temp))
-            ts_arr = np.concatenate((ts_arr, new_ts + (1 / sampling_rate)))
-
-    return arr, ts_arr
-
-
-def eliminateTs(filepath, timeForLightsTurnOn, event, sampling_rate, naming):
-
-    ts_arr = np.array([])
-    tsNew_arr = np.array([])
-    for i in range(len(filepath)):
-        tsNew = read_hdf5("timeCorrection_" + naming, filepath[i], "timestampNew")
-        if os.path.exists(os.path.join(filepath[i], event + "_" + naming + ".hdf5")):
-            ts = read_hdf5(event + "_" + naming, filepath[i], "ts").reshape(-1)
-        else:
-            ts = np.array([])
-
-        # logger.info("total time : ", tsNew[-1])
-        if len(tsNew_arr) == 0:
-            sub = tsNew[0] - timeForLightsTurnOn
-            tsNew_arr = np.concatenate((tsNew_arr, tsNew - sub))
-            ts_arr = np.concatenate((ts_arr, ts - sub))
-        else:
-            temp_tsNew = tsNew
-            temp_ts = ts
-            new_ts = temp_ts - (temp_tsNew[0] - tsNew_arr[-1])
-            new_tsNew = temp_tsNew - (temp_tsNew[0] - tsNew_arr[-1])
-            tsNew_arr = np.concatenate((tsNew_arr, new_tsNew + (1 / sampling_rate)))
-            ts_arr = np.concatenate((ts_arr, new_ts + (1 / sampling_rate)))
-
-        # logger.info(event)
-        # logger.info(ts_arr)
-    return ts_arr
-
-
-def processTimestampsForCombiningData(filepath, timeForLightsTurnOn, events, sampling_rate):
-
-    logger.debug("Processing timestamps for combining data...")
-
-    storesList = events[1, :]
-
-    for k in range(len(filepath)):
-
-        path = decide_naming_convention(filepath[k][0])
-
-        for j in range(path.shape[1]):
-            name_1 = ((os.path.basename(path[0, j])).split(".")[0]).split("_")
-            name_2 = ((os.path.basename(path[1, j])).split(".")[0]).split("_")
-            # dirname = os.path.dirname(path[i])
-            if name_1[-1] == name_2[-1]:
-                name = name_1[-1]
-
-                for i in range(len(storesList)):
-                    if (
-                        "control_" + name.lower() in storesList[i].lower()
-                        or "signal_" + name.lower() in storesList[i].lower()
-                    ):
-                        data, timestampNew = eliminateData(
-                            filepath[k], timeForLightsTurnOn, storesList[i], sampling_rate, name
-                        )
-                        write_hdf5(data, storesList[i], filepath[k][0], "data")
-                    else:
-                        if "control" in storesList[i].lower() or "signal" in storesList[i].lower():
-                            continue
-                        else:
-                            ts = eliminateTs(filepath[k], timeForLightsTurnOn, storesList[i], sampling_rate, name)
-                            write_hdf5(ts, storesList[i] + "_" + name, filepath[k][0], "ts")
-
-
-import h5py
-import numpy as np
-
-logger = logging.getLogger(__name__)
-
-
-def find_files(path, glob_path, ignore_case=False):
-    rule = (
-        re.compile(fnmatch.translate(glob_path), re.IGNORECASE)
-        if ignore_case
-        else re.compile(fnmatch.translate(glob_path))
-    )
-    no_bytes_path = os.listdir(os.path.expanduser(path))
-    str_path = []
-
-    # converting byte object to string
-    for x in no_bytes_path:
-        try:
-            str_path.append(x.decode("utf-8"))
-        except:
-            str_path.append(x)
-
-    return [os.path.join(path, n) for n in str_path if rule.match(n)]
-
-
-def read_hdf5(event, filepath, key):
-    if event:
-        op = os.path.join(filepath, event + ".hdf5")
-    else:
-        op = filepath
-
-    if os.path.exists(op):
-        with h5py.File(op, "r") as f:
-            arr = np.asarray(f[key])
-    else:
-        raise Exception("{}.hdf5 file does not exist".format(event))
-
-    return arr
-
-
-def write_hdf5(data, event, filepath, key):
-    op = os.path.join(filepath, event + ".hdf5")
-
-    if not os.path.exists(op):
-        with h5py.File(op, "w") as f:
-            if type(data) is np.ndarray:
-                f.create_dataset(key, data=data, maxshape=(None,), chunks=True)
-            else:
-                f.create_dataset(key, data=data)
-    else:
-        with h5py.File(op, "r+") as f:
-            if key in list(f.keys()):
-                if type(data) is np.ndarray:
-                    f[key].resize(data.shape)
-                    arr = f[key]
-                    arr[:] = data
-                else:
-                    arr = f[key]
-                    arr = data
-            else:
-                f.create_dataset(key, data=data, maxshape=(None,), chunks=True)
-
-
-def decide_naming_convention(filepath):
-    path_1 = find_files(filepath, "control*", ignore_case=True)  # glob.glob(os.path.join(filepath, 'control*'))
-
-    path_2 = find_files(filepath, "signal*", ignore_case=True)  # glob.glob(os.path.join(filepath, 'signal*'))
-
-    path = sorted(path_1 + path_2, key=str.casefold)
-
-    if len(path) % 2 != 0:
-        raise Exception("There are not equal number of Control and Signal data")
-
-    path = np.asarray(path).reshape(2, -1)
-
-    return path
-
-
-def eliminateData(filepath, timeForLightsTurnOn, event, sampling_rate, naming):
-
-    arr = np.array([])
-    ts_arr = np.array([])
-    for i in range(len(filepath)):
-        ts = read_hdf5("timeCorrection_" + naming, filepath[i], "timestampNew")
-        data = read_hdf5(event, filepath[i], "data").reshape(-1)
-
-        # index = np.where((ts>coords[i,0]) & (ts<coords[i,1]))[0]
-
-        if len(arr) == 0:
-            arr = np.concatenate((arr, data))
-            sub = ts[0] - timeForLightsTurnOn
-            new_ts = ts - sub
-            ts_arr = np.concatenate((ts_arr, new_ts))
-        else:
-            temp = data
-            temp_ts = ts
-            new_ts = temp_ts - (temp_ts[0] - ts_arr[-1])
-            arr = np.concatenate((arr, temp))
-            ts_arr = np.concatenate((ts_arr, new_ts + (1 / sampling_rate)))
-
-    return arr, ts_arr
-
-
-def eliminateTs(filepath, timeForLightsTurnOn, event, sampling_rate, naming):
-
-    ts_arr = np.array([])
-    tsNew_arr = np.array([])
-    for i in range(len(filepath)):
-        tsNew = read_hdf5("timeCorrection_" + naming, filepath[i], "timestampNew")
-        if os.path.exists(os.path.join(filepath[i], event + "_" + naming + ".hdf5")):
-            ts = read_hdf5(event + "_" + naming, filepath[i], "ts").reshape(-1)
-        else:
-            ts = np.array([])
-
-        # logger.info("total time : ", tsNew[-1])
-        if len(tsNew_arr) == 0:
-            sub = tsNew[0] - timeForLightsTurnOn
-            tsNew_arr = np.concatenate((tsNew_arr, tsNew - sub))
-            ts_arr = np.concatenate((ts_arr, ts - sub))
-        else:
-            temp_tsNew = tsNew
-            temp_ts = ts
-            new_ts = temp_ts - (temp_tsNew[0] - tsNew_arr[-1])
-            new_tsNew = temp_tsNew - (temp_tsNew[0] - tsNew_arr[-1])
-            tsNew_arr = np.concatenate((tsNew_arr, new_tsNew + (1 / sampling_rate)))
-            ts_arr = np.concatenate((ts_arr, new_ts + (1 / sampling_rate)))
-
-        # logger.info(event)
-        # logger.info(ts_arr)
-    return ts_arr
-
-
-def processTimestampsForCombiningData(filepath, timeForLightsTurnOn, events, sampling_rate):
-
-    logger.debug("Processing timestamps for combining data...")
-
-    storesList = events[1, :]
-
-    for k in range(len(filepath)):
-
-        path = decide_naming_convention(filepath[k][0])
-
-        for j in range(path.shape[1]):
-            name_1 = ((os.path.basename(path[0, j])).split(".")[0]).split("_")
-            name_2 = ((os.path.basename(path[1, j])).split(".")[0]).split("_")
-            # dirname = os.path.dirname(path[i])
-            if name_1[-1] == name_2[-1]:
-                name = name_1[-1]
-
-                for i in range(len(storesList)):
-                    if (
-                        "control_" + name.lower() in storesList[i].lower()
-                        or "signal_" + name.lower() in storesList[i].lower()
-                    ):
-                        data, timestampNew = eliminateData(
-                            filepath[k], timeForLightsTurnOn, storesList[i], sampling_rate, name
-                        )
-                        write_hdf5(data, storesList[i], filepath[k][0], "data")
-                    else:
-                        if "control" in storesList[i].lower() or "signal" in storesList[i].lower():
-                            continue
-                        else:
-                            ts = eliminateTs(filepath[k], timeForLightsTurnOn, storesList[i], sampling_rate, name)
-                            write_hdf5(ts, storesList[i] + "_" + name, filepath[k][0], "ts")
-
-                write_hdf5(timestampNew, "timeCorrection_" + name, filepath[k][0], "timestampNew")
-
-            else:
-                raise Exception("Error in naming convention of files or Error in storesList file")
-
-        np.savetxt(os.path.join(filepath[k][0], "combine_storesList.csv"), events, delimiter=",", fmt="%s")
-
-    logger.info("Timestamps processed and data is combined.")
diff --git a/src/guppy/computePsth.py b/src/guppy/computePsth.py
index d6636a9..a9b4415 100755
--- a/src/guppy/computePsth.py
+++ b/src/guppy/computePsth.py
@@ -9,15 +9,19 @@
 import re
 import subprocess
 import sys
-from collections import OrderedDict
 from itertools import repeat
 
 import numpy as np
 import pandas as pd
-from scipy import signal as ss
 
-from .analysis.io_utils import get_all_stores_for_combining_data, read_hdf5, write_hdf5
-from .computeCorr import computeCrossCorrelation, getCorrCombinations, make_dir
+from .analysis.cross_correlation import computeCrossCorrelation, getCorrCombinations
+from .analysis.io_utils import (
+    get_all_stores_for_combining_data,
+    make_dir_for_cross_correlation,
+    write_hdf5,
+)
+from .analysis.psth_peak_and_area import findPSTHPeakAndArea, read_Df
+from .analysis.storename_psth import create_Df, storenamePsth
 
 logger = logging.getLogger(__name__)
 
@@ -35,16 +39,6 @@ def writeToFile(value: str):
         file.write(value)
 
 
-def create_Df_area_peak(filepath, arr, name, index=[]):
-
-    op = os.path.join(filepath, "peak_AUC_" + name + ".h5")
-    dirname = os.path.dirname(filepath)
-
-    df = pd.DataFrame(arr, index=index)
-
-    df.to_hdf(op, key="df", mode="w")
-
-
 def read_Df_area_peak(filepath, name):
     op = os.path.join(filepath, "peak_AUC_" + name + ".h5")
     df = pd.read_hdf(op, key="df", mode="r")
@@ -52,420 +46,6 @@ def read_Df_area_peak(filepath, name):
     return df
 
 
-def create_csv_area_peak(filepath, arr, name, index=[]):
-    op = os.path.join(filepath, "peak_AUC_" + name + ".csv")
-    df = pd.DataFrame(arr, index=index)
-
-    df.to_csv(op)
-
-
-# function to create dataframe for each event PSTH and save it to h5 file
-def create_Df(filepath, event, name, psth, columns=[]):
-    event = event.replace("\\", "_")
-    event = event.replace("/", "_")
-    if name:
-        op = os.path.join(filepath, event + "_{}.h5".format(name))
-    else:
-        op = os.path.join(filepath, event + ".h5")
-
-    # check if file already exists
-    # if os.path.exists(op):
-    # 	return 0
-
-    # removing psth binned trials
-    columns = np.array(columns, dtype="str")
-    regex = re.compile("bin_*")
-    single_trials = columns[[i for i in range(len(columns)) if not regex.match(columns[i])]]
-    single_trials_index = [i for i in range(len(single_trials)) if single_trials[i] != "timestamps"]
-
-    psth = psth.T
-    if psth.ndim > 1:
-        mean = np.nanmean(psth[:, single_trials_index], axis=1).reshape(-1, 1)
-        err = np.nanstd(psth[:, single_trials_index], axis=1) / math.sqrt(psth[:, single_trials_index].shape[1])
-        err = err.reshape(-1, 1)
-        psth = np.hstack((psth, mean))
-        psth = np.hstack((psth, err))
-        # timestamps = np.asarray(read_Df(filepath, 'ts_psth', ''))
-        # psth = np.hstack((psth, timestamps))
-    try:
-        ts = read_hdf5(event, filepath, "ts")
-        ts = np.append(ts, ["mean", "err"])
-    except:
-        ts = None
-
-    if len(columns) == 0:
-        df = pd.DataFrame(psth, index=None, columns=ts, dtype="float32")
-    else:
-        columns = np.asarray(columns)
-        columns = np.append(columns, ["mean", "err"])
-        df = pd.DataFrame(psth, index=None, columns=list(columns), dtype="float32")
-
-    df.to_hdf(op, key="df", mode="w")
-
-
-# function to read h5 file and make a dataframe from it
-def read_Df(filepath, event, name):
-    event = event.replace("\\", "_")
-    event = event.replace("/", "_")
-    if name:
-        op = os.path.join(filepath, event + "_{}.h5".format(name))
-    else:
-        op = os.path.join(filepath, event + ".h5")
-    df = pd.read_hdf(op, key="df", mode="r")
-
-    return df
-
-
-# function to create PSTH trials corresponding to each event timestamp
-def rowFormation(z_score, thisIndex, nTsPrev, nTsPost):
-
-    if nTsPrev < thisIndex and z_score.shape[0] > (thisIndex + nTsPost):
-        res = z_score[thisIndex - nTsPrev - 1 : thisIndex + nTsPost]
-    elif nTsPrev >= thisIndex and z_score.shape[0] > (thisIndex + nTsPost):
-        mismatch = nTsPrev - thisIndex + 1
-        res = np.zeros(nTsPrev + nTsPost + 1)
-        res[:mismatch] = np.nan
-        res[mismatch:] = z_score[: thisIndex + nTsPost]
-    elif nTsPrev >= thisIndex and z_score.shape[0] < (thisIndex + nTsPost):
-        mismatch1 = nTsPrev - thisIndex + 1
-        mismatch2 = (thisIndex + nTsPost) - z_score.shape[0]
-        res1 = np.full(mismatch1, np.nan)
-        res2 = z_score
-        res3 = np.full(mismatch2, np.nan)
-        res = np.concatenate((res1, np.concatenate((res2, res3))))
-    else:
-        mismatch = (thisIndex + nTsPost) - z_score.shape[0]
-        res1 = np.zeros(mismatch)
-        res1[:] = np.nan
-        res2 = z_score[thisIndex - nTsPrev - 1 : z_score.shape[0]]
-        res = np.concatenate((res2, res1))
-
-    return res
-
-
-# function to calculate baseline for each PSTH trial and do baseline correction
-def baselineCorrection(filepath, arr, timeAxis, baselineStart, baselineEnd):
-
-    # timeAxis = read_Df(filepath, 'ts_psth', '')
-    # timeAxis = np.asarray(timeAxis).reshape(-1)
-    baselineStrtPt = np.where(timeAxis >= baselineStart)[0]
-    baselineEndPt = np.where(timeAxis >= baselineEnd)[0]
-
-    # logger.info(baselineStrtPt[0], baselineEndPt[0])
-    if baselineStart == 0 and baselineEnd == 0:
-        return arr
-
-    baseline = np.nanmean(arr[baselineStrtPt[0] : baselineEndPt[0]])
-    baselineSub = np.subtract(arr, baseline)
-
-    return baselineSub
-
-
-# helper function to make PSTH for each event
-def helper_psth(
-    z_score,
-    event,
-    filepath,
-    nSecPrev,
-    nSecPost,
-    timeInterval,
-    bin_psth_trials,
-    use_time_or_trials,
-    baselineStart,
-    baselineEnd,
-    naming,
-    just_use_signal,
-):
-
-    event = event.replace("\\", "_")
-    event = event.replace("/", "_")
-
-    sampling_rate = read_hdf5("timeCorrection_" + naming, filepath, "sampling_rate")[0]
-
-    # calculate time before event timestamp and time after event timestamp
-    nTsPrev = int(round(nSecPrev * sampling_rate))
-    nTsPost = int(round(nSecPost * sampling_rate))
-
-    totalTs = (-1 * nTsPrev) + nTsPost
-    increment = ((-1 * nSecPrev) + nSecPost) / totalTs
-    timeAxis = np.linspace(nSecPrev, nSecPost + increment, totalTs + 1)
-    timeAxisNew = np.concatenate((timeAxis, timeAxis[::-1]))
-
-    # avoid writing same data to same file in multi-processing
-    # if not os.path.exists(os.path.join(filepath, 'ts_psth.h5')):
-    # 	logger.info('file not exists')
-    # 	create_Df(filepath, 'ts_psth', '', timeAxis)
-    # 	time.sleep(2)
-
-    ts = read_hdf5(event + "_" + naming, filepath, "ts")
-
-    # reject timestamps for which baseline cannot be calculated because of nan values
-    new_ts = []
-    for i in range(ts.shape[0]):
-        thisTime = ts[i]  # -1 not needed anymore
-        if thisTime < abs(baselineStart):
-            continue
-        else:
-            new_ts.append(ts[i])
-
-    # reject burst of timestamps
-    ts = np.asarray(new_ts)
-    # skip the event if there are no TTLs
-    if len(ts) == 0:
-        new_ts = np.array([])
-        logger.info(f"Warning : No TTLs present for {event}. This will cause an error in Visualization step")
-    else:
-        new_ts = [ts[0]]
-        for i in range(1, ts.shape[0]):
-            thisTime = ts[i]
-            prevTime = new_ts[-1]
-            diff = thisTime - prevTime
-            if diff < timeInterval:
-                continue
-            else:
-                new_ts.append(ts[i])
-
-    # final timestamps
-    ts = np.asarray(new_ts)
-    nTs = ts.shape[0]
-
-    # initialize PSTH vector
-    psth = np.full((nTs, totalTs + 1), np.nan)
-    psth_baselineUncorrected = np.full((nTs, totalTs + 1), np.nan)  # extra
-
-    # for each timestamp, create trial which will be saved in a PSTH vector
-    for i in range(nTs):
-        thisTime = ts[i]  # -timeForLightsTurnOn
-        thisIndex = int(round(thisTime * sampling_rate))
-        arr = rowFormation(z_score, thisIndex, -1 * nTsPrev, nTsPost)
-        if just_use_signal == True:
-            res = np.subtract(arr, np.nanmean(arr))
-            z_score_arr = np.divide(res, np.nanstd(arr))
-            arr = z_score_arr
-        else:
-            arr = arr
-
-        psth_baselineUncorrected[i, :] = arr  # extra
-        psth[i, :] = baselineCorrection(filepath, arr, timeAxis, baselineStart, baselineEnd)
-
-    write_hdf5(ts, event + "_" + naming, filepath, "ts")
-    columns = list(ts)
-
-    if use_time_or_trials == "Time (min)" and bin_psth_trials > 0:
-        timestamps = read_hdf5("timeCorrection_" + naming, filepath, "timestampNew")
-        timestamps = np.divide(timestamps, 60)
-        ts_min = np.divide(ts, 60)
-        bin_steps = np.arange(timestamps[0], timestamps[-1] + bin_psth_trials, bin_psth_trials)
-        indices_each_step = dict()
-        for i in range(1, bin_steps.shape[0]):
-            indices_each_step[f"{np.around(bin_steps[i-1],0)}-{np.around(bin_steps[i],0)}"] = np.where(
-                (ts_min >= bin_steps[i - 1]) & (ts_min <= bin_steps[i])
-            )[0]
-    elif use_time_or_trials == "# of trials" and bin_psth_trials > 0:
-        bin_steps = np.arange(0, ts.shape[0], bin_psth_trials)
-        if bin_steps[-1] < ts.shape[0]:
-            bin_steps = np.concatenate((bin_steps, [ts.shape[0]]), axis=0)
-        indices_each_step = dict()
-        for i in range(1, bin_steps.shape[0]):
-            indices_each_step[f"{bin_steps[i-1]}-{bin_steps[i]}"] = np.arange(bin_steps[i - 1], bin_steps[i])
-    else:
-        indices_each_step = dict()
-
-    psth_bin, psth_bin_baselineUncorrected = [], []
-    if indices_each_step:
-        keys = list(indices_each_step.keys())
-        for k in keys:
-            # no trials in a given bin window, just put all the nan values
-            if indices_each_step[k].shape[0] == 0:
-                psth_bin.append(np.full(psth.shape[1], np.nan))
-                psth_bin_baselineUncorrected.append(np.full(psth_baselineUncorrected.shape[1], np.nan))
-                psth_bin.append(np.full(psth.shape[1], np.nan))
-                psth_bin_baselineUncorrected.append(np.full(psth_baselineUncorrected.shape[1], np.nan))
-            else:
-                index = indices_each_step[k]
-                arr = psth[index, :]
-                #  mean of bins
-                psth_bin.append(np.nanmean(psth[index, :], axis=0))
-                psth_bin_baselineUncorrected.append(np.nanmean(psth_baselineUncorrected[index, :], axis=0))
-                psth_bin.append(np.nanstd(psth[index, :], axis=0) / math.sqrt(psth[index, :].shape[0]))
-                # error of bins
-                psth_bin_baselineUncorrected.append(
-                    np.nanstd(psth_baselineUncorrected[index, :], axis=0)
-                    / math.sqrt(psth_baselineUncorrected[index, :].shape[0])
-                )
-
-            # adding column names
-            columns.append(f"bin_({k})")
-            columns.append(f"bin_err_({k})")
-
-        psth = np.concatenate((psth, psth_bin), axis=0)
-        psth_baselineUncorrected = np.concatenate((psth_baselineUncorrected, psth_bin_baselineUncorrected), axis=0)
-
-    timeAxis = timeAxis.reshape(1, -1)
-    psth = np.concatenate((psth, timeAxis), axis=0)
-    psth_baselineUncorrected = np.concatenate((psth_baselineUncorrected, timeAxis), axis=0)
-    columns.append("timestamps")
-
-    return psth, psth_baselineUncorrected, columns
-
-
-# function to create PSTH for each event using function helper_psth and save the PSTH to h5 file
-def storenamePsth(filepath, event, inputParameters):
-
-    event = event.replace("\\", "_")
-    event = event.replace("/", "_")
-
-    selectForComputePsth = inputParameters["selectForComputePsth"]
-    bin_psth_trials = inputParameters["bin_psth_trials"]
-    use_time_or_trials = inputParameters["use_time_or_trials"]
-
-    if selectForComputePsth == "z_score":
-        path = glob.glob(os.path.join(filepath, "z_score_*"))
-    elif selectForComputePsth == "dff":
-        path = glob.glob(os.path.join(filepath, "dff_*"))
-    else:
-        path = glob.glob(os.path.join(filepath, "z_score_*")) + glob.glob(os.path.join(filepath, "dff_*"))
-
-    b = np.divide(np.ones((100,)), 100)
-    a = 1
-
-    # storesList = storesList
-    # sampling_rate = read_hdf5(storesList[0,0], filepath, 'sampling_rate')
-    nSecPrev, nSecPost = inputParameters["nSecPrev"], inputParameters["nSecPost"]
-    baselineStart, baselineEnd = inputParameters["baselineCorrectionStart"], inputParameters["baselineCorrectionEnd"]
-    timeInterval = inputParameters["timeInterval"]
-
-    if "control" in event.lower() or "signal" in event.lower():
-        return 0
-    else:
-        for i in range(len(path)):
-            logger.info(f"Computing PSTH for event {event}...")
-            basename = (os.path.basename(path[i])).split(".")[0]
-            name_1 = basename.split("_")[-1]
-            control = read_hdf5("control_" + name_1, os.path.dirname(path[i]), "data")
-            if (control == 0).all() == True:
-                signal = read_hdf5("signal_" + name_1, os.path.dirname(path[i]), "data")
-                z_score = ss.filtfilt(b, a, signal)
-                just_use_signal = True
-            else:
-                z_score = read_hdf5("", path[i], "data")
-                just_use_signal = False
-            psth, psth_baselineUncorrected, cols = helper_psth(
-                z_score,
-                event,
-                filepath,
-                nSecPrev,
-                nSecPost,
-                timeInterval,
-                bin_psth_trials,
-                use_time_or_trials,
-                baselineStart,
-                baselineEnd,
-                name_1,
-                just_use_signal,
-            )
-
-            create_Df(
-                filepath,
-                event + "_" + name_1 + "_baselineUncorrected",
-                basename,
-                psth_baselineUncorrected,
-                columns=cols,
-            )  # extra
-            create_Df(filepath, event + "_" + name_1, basename, psth, columns=cols)
-            logger.info(f"PSTH for event {event} computed.")
-
-
-def helperPSTHPeakAndArea(psth_mean, timestamps, sampling_rate, peak_startPoint, peak_endPoint):
-
-    peak_startPoint = np.asarray(peak_startPoint)
-    peak_endPoint = np.asarray(peak_endPoint)
-
-    peak_startPoint = peak_startPoint[~np.isnan(peak_startPoint)]
-    peak_endPoint = peak_endPoint[~np.isnan(peak_endPoint)]
-
-    if peak_startPoint.shape[0] != peak_endPoint.shape[0]:
-        logger.error("Number of Peak Start Time and Peak End Time are unequal.")
-        raise Exception("Number of Peak Start Time and Peak End Time are unequal.")
-
-    if np.less_equal(peak_endPoint, peak_startPoint).any() == True:
-        logger.error(
-            "Peak End Time is lesser than or equal to Peak Start Time. Please check the Peak parameters window."
-        )
-        raise Exception(
-            "Peak End Time is lesser than or equal to Peak Start Time. Please check the Peak parameters window."
-        )
-
-    peak_area = OrderedDict()
-
-    if peak_startPoint.shape[0] == 0 or peak_endPoint.shape[0] == 0:
-        peak_area["peak"] = np.nan
-        peak_area["area"] = np.nan
-
-    for i in range(peak_startPoint.shape[0]):
-        startPtForPeak = np.where(timestamps >= peak_startPoint[i])[0]
-        endPtForPeak = np.where(timestamps >= peak_endPoint[i])[0]
-        if len(startPtForPeak) >= 1 and len(endPtForPeak) >= 1:
-            peakPoint_pos = startPtForPeak[0] + np.argmax(psth_mean[startPtForPeak[0] : endPtForPeak[0], :], axis=0)
-            peakPoint_neg = startPtForPeak[0] + np.argmin(psth_mean[startPtForPeak[0] : endPtForPeak[0], :], axis=0)
-            peak_area["peak_pos_" + str(i + 1)] = np.amax(psth_mean[peakPoint_pos], axis=0)
-            peak_area["peak_neg_" + str(i + 1)] = np.amin(psth_mean[peakPoint_neg], axis=0)
-            peak_area["area_" + str(i + 1)] = np.trapz(psth_mean[startPtForPeak[0] : endPtForPeak[0], :], axis=0)
-        else:
-            peak_area["peak_" + str(i + 1)] = np.nan
-            peak_area["area_" + str(i + 1)] = np.nan
-
-    return peak_area
-
-
-# function to compute PSTH peak and area using the function helperPSTHPeakAndArea save the values to h5 and csv files.
-def findPSTHPeakAndArea(filepath, event, inputParameters):
-
-    event = event.replace("\\", "_")
-    event = event.replace("/", "_")
-
-    # sampling_rate = read_hdf5(storesList[0,0], filepath, 'sampling_rate')
-    peak_startPoint = inputParameters["peak_startPoint"]
-    peak_endPoint = inputParameters["peak_endPoint"]
-    selectForComputePsth = inputParameters["selectForComputePsth"]
-
-    if selectForComputePsth == "z_score":
-        path = glob.glob(os.path.join(filepath, "z_score_*"))
-    elif selectForComputePsth == "dff":
-        path = glob.glob(os.path.join(filepath, "dff_*"))
-    else:
-        path = glob.glob(os.path.join(filepath, "z_score_*")) + glob.glob(os.path.join(filepath, "dff_*"))
-
-    if "control" in event.lower() or "signal" in event.lower():
-        return 0
-    else:
-        for i in range(len(path)):
-            logger.info(f"Computing peak and area for PSTH mean signal for event {event}...")
-            basename = (os.path.basename(path[i])).split(".")[0]
-            name_1 = basename.split("_")[-1]
-            sampling_rate = read_hdf5("timeCorrection_" + name_1, filepath, "sampling_rate")[0]
-            psth = read_Df(filepath, event + "_" + name_1, basename)
-            cols = list(psth.columns)
-            regex = re.compile("bin_[(]")
-            bin_names = [cols[i] for i in range(len(cols)) if regex.match(cols[i])]
-            regex_trials = re.compile("[+-]?([0-9]*[.])?[0-9]+")
-            trials_names = [cols[i] for i in range(len(cols)) if regex_trials.match(cols[i])]
-            psth_mean_bin_names = trials_names + bin_names + ["mean"]
-            psth_mean_bin_mean = np.asarray(psth[psth_mean_bin_names])
-            timestamps = np.asarray(psth["timestamps"]).ravel()  # np.asarray(read_Df(filepath, 'ts_psth', '')).ravel()
-            peak_area = helperPSTHPeakAndArea(
-                psth_mean_bin_mean, timestamps, sampling_rate, peak_startPoint, peak_endPoint
-            )  # peak, area =
-            # arr = np.array([[peak, area]])
-            fileName = [os.path.basename(os.path.dirname(filepath))]
-            index = [fileName[0] + "_" + s for s in psth_mean_bin_names]
-            create_Df_area_peak(
-                filepath, peak_area, event + "_" + name_1 + "_" + basename, index=index
-            )  # columns=['peak', 'area']
-            create_csv_area_peak(filepath, peak_area, event + "_" + name_1 + "_" + basename, index=index)
-            logger.info(f"Peak and Area for PSTH mean signal for event {event} computed.")
-
-
 def makeAverageDir(filepath):
 
     op = os.path.join(filepath, "average")
@@ -655,7 +235,11 @@ def averageForGroup(folderNames, event, inputParameters):
         corr = np.concatenate((corr, timestamps), axis=0)
         columns.append("timestamps")
         create_Df(
-            make_dir(op), "corr_" + event, type[i] + "_" + corr_info[k - 1] + "_" + corr_info[k], corr, columns=columns
+            make_dir_for_cross_correlation(op),
+            "corr_" + event,
+            type[i] + "_" + corr_info[k - 1] + "_" + corr_info[k],
+            corr,
+            columns=columns,
         )
 
     logger.info("Group of data averaged.")

From d182dc206a4dae86700183410216d1a959055656 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Wed, 21 Jan 2026 16:08:12 -0800
Subject: [PATCH 133/150] Reorganzied execute_psth fns into separate modules

---
 src/guppy/analysis/io_utils.py       |   9 ++
 src/guppy/analysis/storename_psth.py |  47 +-----
 src/guppy/computePsth.py             | 218 +--------------------------
 3 files changed, 14 insertions(+), 260 deletions(-)

diff --git a/src/guppy/analysis/io_utils.py b/src/guppy/analysis/io_utils.py
index c1dd39f..742ab3b 100644
--- a/src/guppy/analysis/io_utils.py
+++ b/src/guppy/analysis/io_utils.py
@@ -215,3 +215,12 @@ def make_dir_for_cross_correlation(filepath):
     if not os.path.exists(op):
         os.mkdir(op)
     return op
+
+
+def makeAverageDir(filepath):
+
+    op = os.path.join(filepath, "average")
+    if not os.path.exists(op):
+        os.mkdir(op)
+
+    return op
diff --git a/src/guppy/analysis/storename_psth.py b/src/guppy/analysis/storename_psth.py
index db99057..d582572 100644
--- a/src/guppy/analysis/storename_psth.py
+++ b/src/guppy/analysis/storename_psth.py
@@ -2,13 +2,12 @@
 import logging
 import math
 import os
-import re
 
 import numpy as np
-import pandas as pd
 from scipy import signal as ss
 
 from .io_utils import read_hdf5, write_hdf5
+from .psth_utils import create_Df
 
 logger = logging.getLogger(__name__)
 
@@ -231,50 +230,6 @@ def helper_psth(
     return psth, psth_baselineUncorrected, columns
 
 
-# function to create dataframe for each event PSTH and save it to h5 file
-def create_Df(filepath, event, name, psth, columns=[]):
-    event = event.replace("\\", "_")
-    event = event.replace("/", "_")
-    if name:
-        op = os.path.join(filepath, event + "_{}.h5".format(name))
-    else:
-        op = os.path.join(filepath, event + ".h5")
-
-    # check if file already exists
-    # if os.path.exists(op):
-    # 	return 0
-
-    # removing psth binned trials
-    columns = np.array(columns, dtype="str")
-    regex = re.compile("bin_*")
-    single_trials = columns[[i for i in range(len(columns)) if not regex.match(columns[i])]]
-    single_trials_index = [i for i in range(len(single_trials)) if single_trials[i] != "timestamps"]
-
-    psth = psth.T
-    if psth.ndim > 1:
-        mean = np.nanmean(psth[:, single_trials_index], axis=1).reshape(-1, 1)
-        err = np.nanstd(psth[:, single_trials_index], axis=1) / math.sqrt(psth[:, single_trials_index].shape[1])
-        err = err.reshape(-1, 1)
-        psth = np.hstack((psth, mean))
-        psth = np.hstack((psth, err))
-        # timestamps = np.asarray(read_Df(filepath, 'ts_psth', ''))
-        # psth = np.hstack((psth, timestamps))
-    try:
-        ts = read_hdf5(event, filepath, "ts")
-        ts = np.append(ts, ["mean", "err"])
-    except:
-        ts = None
-
-    if len(columns) == 0:
-        df = pd.DataFrame(psth, index=None, columns=ts, dtype="float32")
-    else:
-        columns = np.asarray(columns)
-        columns = np.append(columns, ["mean", "err"])
-        df = pd.DataFrame(psth, index=None, columns=list(columns), dtype="float32")
-
-    df.to_hdf(op, key="df", mode="w")
-
-
 # ***************************** Functions used by helper_psth ***************************** #
 
 
diff --git a/src/guppy/computePsth.py b/src/guppy/computePsth.py
index a9b4415..4d12240 100755
--- a/src/guppy/computePsth.py
+++ b/src/guppy/computePsth.py
@@ -3,25 +3,21 @@
 import glob
 import json
 import logging
-import math
 import multiprocessing as mp
 import os
-import re
 import subprocess
 import sys
 from itertools import repeat
 
 import numpy as np
-import pandas as pd
 
-from .analysis.cross_correlation import computeCrossCorrelation, getCorrCombinations
+from .analysis.cross_correlation import computeCrossCorrelation
 from .analysis.io_utils import (
     get_all_stores_for_combining_data,
-    make_dir_for_cross_correlation,
-    write_hdf5,
+    makeAverageDir,
 )
-from .analysis.psth_peak_and_area import findPSTHPeakAndArea, read_Df
-from .analysis.storename_psth import create_Df, storenamePsth
+from .analysis.psth_peak_and_area import findPSTHPeakAndArea
+from .analysis.storename_psth import storenamePsth
 
 logger = logging.getLogger(__name__)
 
@@ -39,212 +35,6 @@ def writeToFile(value: str):
         file.write(value)
 
 
-def read_Df_area_peak(filepath, name):
-    op = os.path.join(filepath, "peak_AUC_" + name + ".h5")
-    df = pd.read_hdf(op, key="df", mode="r")
-
-    return df
-
-
-def makeAverageDir(filepath):
-
-    op = os.path.join(filepath, "average")
-    if not os.path.exists(op):
-        os.mkdir(op)
-
-    return op
-
-
-def psth_shape_check(psth):
-
-    each_ln = []
-    for i in range(len(psth)):
-        each_ln.append(psth[i].shape[0])
-
-    each_ln = np.asarray(each_ln)
-    keep_ln = each_ln[-1]
-
-    for i in range(len(psth)):
-        if psth[i].shape[0] > keep_ln:
-            psth[i] = psth[i][:keep_ln]
-        elif psth[i].shape[0] < keep_ln:
-            psth[i] = np.append(psth[i], np.full(keep_ln - len(psth[i]), np.nan))
-        else:
-            psth[i] = psth[i]
-
-    return psth
-
-
-# function to compute average of group of recordings
-def averageForGroup(folderNames, event, inputParameters):
-
-    event = event.replace("\\", "_")
-    event = event.replace("/", "_")
-
-    logger.debug("Averaging group of data...")
-    path = []
-    abspath = inputParameters["abspath"]
-    selectForComputePsth = inputParameters["selectForComputePsth"]
-    path_temp_len = []
-    op = makeAverageDir(abspath)
-
-    # combining paths to all the selected folders for doing average
-    for i in range(len(folderNames)):
-        if selectForComputePsth == "z_score":
-            path_temp = glob.glob(os.path.join(folderNames[i], "z_score_*"))
-        elif selectForComputePsth == "dff":
-            path_temp = glob.glob(os.path.join(folderNames[i], "dff_*"))
-        else:
-            path_temp = glob.glob(os.path.join(folderNames[i], "z_score_*")) + glob.glob(
-                os.path.join(folderNames[i], "dff_*")
-            )
-
-        path_temp_len.append(len(path_temp))
-        # path_temp = glob.glob(os.path.join(folderNames[i], 'z_score_*'))
-        for j in range(len(path_temp)):
-            basename = (os.path.basename(path_temp[j])).split(".")[0]
-            write_hdf5(np.array([]), basename, op, "data")
-            name_1 = basename.split("_")[-1]
-            temp = [folderNames[i], event + "_" + name_1, basename]
-            path.append(temp)
-
-    # processing of all the paths
-    path_temp_len = np.asarray(path_temp_len)
-    max_len = np.argmax(path_temp_len)
-
-    naming = []
-    for i in range(len(path)):
-        naming.append(path[i][2])
-    naming = np.unique(np.asarray(naming))
-
-    new_path = [[] for _ in range(path_temp_len[max_len])]
-    for i in range(len(path)):
-        idx = np.where(naming == path[i][2])[0][0]
-        new_path[idx].append(path[i])
-
-    # read PSTH for each event and make the average of it. Save the final output to an average folder.
-    for i in range(len(new_path)):
-        psth, psth_bins = [], []
-        columns = []
-        bins_cols = []
-        temp_path = new_path[i]
-        for j in range(len(temp_path)):
-            # logger.info(os.path.join(temp_path[j][0], temp_path[j][1]+'_{}.h5'.format(temp_path[j][2])))
-            if not os.path.exists(os.path.join(temp_path[j][0], temp_path[j][1] + "_{}.h5".format(temp_path[j][2]))):
-                continue
-            else:
-                df = read_Df(temp_path[j][0], temp_path[j][1], temp_path[j][2])  # filepath, event, name
-                cols = list(df.columns)
-                regex = re.compile("bin_[(]")
-                bins_cols = [cols[i] for i in range(len(cols)) if regex.match(cols[i])]
-                psth.append(np.asarray(df["mean"]))
-                columns.append(os.path.basename(temp_path[j][0]))
-                if len(bins_cols) > 0:
-                    psth_bins.append(df[bins_cols])
-
-        if len(psth) == 0:
-            logger.warning("Something is wrong with the file search pattern.")
-            continue
-
-        if len(bins_cols) > 0:
-            df_bins = pd.concat(psth_bins, axis=1)
-            df_bins_mean = df_bins.groupby(by=df_bins.columns, axis=1).mean()
-            df_bins_err = df_bins.groupby(by=df_bins.columns, axis=1).std() / math.sqrt(df_bins.shape[1])
-            cols_err = list(df_bins_err.columns)
-            dict_err = {}
-            for i in cols_err:
-                split = i.split("_")
-                dict_err[i] = "{}_err_{}".format(split[0], split[1])
-            df_bins_err = df_bins_err.rename(columns=dict_err)
-            columns = columns + list(df_bins_mean.columns) + list(df_bins_err.columns)
-            df_bins_mean_err = pd.concat([df_bins_mean, df_bins_err], axis=1).T
-            psth, df_bins_mean_err = np.asarray(psth), np.asarray(df_bins_mean_err)
-            psth = np.concatenate((psth, df_bins_mean_err), axis=0)
-        else:
-            psth = psth_shape_check(psth)
-            psth = np.asarray(psth)
-
-        timestamps = np.asarray(df["timestamps"]).reshape(1, -1)
-        psth = np.concatenate((psth, timestamps), axis=0)
-        columns = columns + ["timestamps"]
-        create_Df(op, temp_path[j][1], temp_path[j][2], psth, columns=columns)
-
-    # read PSTH peak and area for each event and combine them. Save the final output to an average folder
-    for i in range(len(new_path)):
-        arr = []
-        index = []
-        temp_path = new_path[i]
-        for j in range(len(temp_path)):
-            if not os.path.exists(
-                os.path.join(temp_path[j][0], "peak_AUC_" + temp_path[j][1] + "_" + temp_path[j][2] + ".h5")
-            ):
-                continue
-            else:
-                df = read_Df_area_peak(temp_path[j][0], temp_path[j][1] + "_" + temp_path[j][2])
-                arr.append(df)
-                index.append(list(df.index))
-
-        if len(arr) == 0:
-            logger.warning("Something is wrong with the file search pattern.")
-            continue
-        index = list(np.concatenate(index))
-        new_df = pd.concat(arr, axis=0)  # os.path.join(filepath, 'peak_AUC_'+name+'.csv')
-        new_df.to_csv(os.path.join(op, "peak_AUC_{}_{}.csv".format(temp_path[j][1], temp_path[j][2])), index=index)
-        new_df.to_hdf(
-            os.path.join(op, "peak_AUC_{}_{}.h5".format(temp_path[j][1], temp_path[j][2])),
-            key="df",
-            mode="w",
-            index=index,
-        )
-
-    # read cross-correlation files and combine them. Save the final output to an average folder
-    type = []
-    for i in range(len(folderNames)):
-        _, temp_type = getCorrCombinations(folderNames[i], inputParameters)
-        type.append(temp_type)
-
-    type = np.unique(np.array(type))
-    for i in range(len(type)):
-        corr = []
-        columns = []
-        df = None
-        for j in range(len(folderNames)):
-            corr_info, _ = getCorrCombinations(folderNames[j], inputParameters)
-            for k in range(1, len(corr_info)):
-                path = os.path.join(
-                    folderNames[j],
-                    "cross_correlation_output",
-                    "corr_" + event + "_" + type[i] + "_" + corr_info[k - 1] + "_" + corr_info[k],
-                )
-                if not os.path.exists(path + ".h5"):
-                    continue
-                else:
-                    df = read_Df(
-                        os.path.join(folderNames[j], "cross_correlation_output"),
-                        "corr_" + event,
-                        type[i] + "_" + corr_info[k - 1] + "_" + corr_info[k],
-                    )
-                    corr.append(df["mean"])
-                    columns.append(os.path.basename(folderNames[j]))
-
-        if not isinstance(df, pd.DataFrame):
-            break
-
-        corr = np.array(corr)
-        timestamps = np.array(df["timestamps"]).reshape(1, -1)
-        corr = np.concatenate((corr, timestamps), axis=0)
-        columns.append("timestamps")
-        create_Df(
-            make_dir_for_cross_correlation(op),
-            "corr_" + event,
-            type[i] + "_" + corr_info[k - 1] + "_" + corr_info[k],
-            corr,
-            columns=columns,
-        )
-
-    logger.info("Group of data averaged.")
-
-
 def psthForEachStorename(inputParameters):
 
     logger.info("Computing PSTH, Peak and Area for each event...")

From 8dd4042a127c132c127f099c428856cb26052de9 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Wed, 21 Jan 2026 16:18:38 -0800
Subject: [PATCH 134/150] Reorganzied execute_psth fns into separate modules

---
 src/guppy/analysis/psth_average.py   | 216 +++++++++++++++++++++++++++
 src/guppy/analysis/psth_utils.py     |  55 +++++++
 src/guppy/analysis/storename_psth.py |  92 +++++-------
 3 files changed, 312 insertions(+), 51 deletions(-)
 create mode 100644 src/guppy/analysis/psth_average.py
 create mode 100644 src/guppy/analysis/psth_utils.py

diff --git a/src/guppy/analysis/psth_average.py b/src/guppy/analysis/psth_average.py
new file mode 100644
index 0000000..b539419
--- /dev/null
+++ b/src/guppy/analysis/psth_average.py
@@ -0,0 +1,216 @@
+import glob
+import logging
+import math
+import os
+import re
+
+import numpy as np
+import pandas as pd
+
+from .cross_correlation import getCorrCombinations
+from .io_utils import (
+    make_dir_for_cross_correlation,
+    makeAverageDir,
+    read_Df,
+    write_hdf5,
+)
+from .psth_utils import create_Df
+
+logger = logging.getLogger(__name__)
+
+
+# function to compute average of group of recordings
+def averageForGroup(folderNames, event, inputParameters):
+
+    event = event.replace("\\", "_")
+    event = event.replace("/", "_")
+
+    logger.debug("Averaging group of data...")
+    path = []
+    abspath = inputParameters["abspath"]
+    selectForComputePsth = inputParameters["selectForComputePsth"]
+    path_temp_len = []
+    op = makeAverageDir(abspath)
+
+    # combining paths to all the selected folders for doing average
+    for i in range(len(folderNames)):
+        if selectForComputePsth == "z_score":
+            path_temp = glob.glob(os.path.join(folderNames[i], "z_score_*"))
+        elif selectForComputePsth == "dff":
+            path_temp = glob.glob(os.path.join(folderNames[i], "dff_*"))
+        else:
+            path_temp = glob.glob(os.path.join(folderNames[i], "z_score_*")) + glob.glob(
+                os.path.join(folderNames[i], "dff_*")
+            )
+
+        path_temp_len.append(len(path_temp))
+        # path_temp = glob.glob(os.path.join(folderNames[i], 'z_score_*'))
+        for j in range(len(path_temp)):
+            basename = (os.path.basename(path_temp[j])).split(".")[0]
+            write_hdf5(np.array([]), basename, op, "data")
+            name_1 = basename.split("_")[-1]
+            temp = [folderNames[i], event + "_" + name_1, basename]
+            path.append(temp)
+
+    # processing of all the paths
+    path_temp_len = np.asarray(path_temp_len)
+    max_len = np.argmax(path_temp_len)
+
+    naming = []
+    for i in range(len(path)):
+        naming.append(path[i][2])
+    naming = np.unique(np.asarray(naming))
+
+    new_path = [[] for _ in range(path_temp_len[max_len])]
+    for i in range(len(path)):
+        idx = np.where(naming == path[i][2])[0][0]
+        new_path[idx].append(path[i])
+
+    # read PSTH for each event and make the average of it. Save the final output to an average folder.
+    for i in range(len(new_path)):
+        psth, psth_bins = [], []
+        columns = []
+        bins_cols = []
+        temp_path = new_path[i]
+        for j in range(len(temp_path)):
+            # logger.info(os.path.join(temp_path[j][0], temp_path[j][1]+'_{}.h5'.format(temp_path[j][2])))
+            if not os.path.exists(os.path.join(temp_path[j][0], temp_path[j][1] + "_{}.h5".format(temp_path[j][2]))):
+                continue
+            else:
+                df = read_Df(temp_path[j][0], temp_path[j][1], temp_path[j][2])  # filepath, event, name
+                cols = list(df.columns)
+                regex = re.compile("bin_[(]")
+                bins_cols = [cols[i] for i in range(len(cols)) if regex.match(cols[i])]
+                psth.append(np.asarray(df["mean"]))
+                columns.append(os.path.basename(temp_path[j][0]))
+                if len(bins_cols) > 0:
+                    psth_bins.append(df[bins_cols])
+
+        if len(psth) == 0:
+            logger.warning("Something is wrong with the file search pattern.")
+            continue
+
+        if len(bins_cols) > 0:
+            df_bins = pd.concat(psth_bins, axis=1)
+            df_bins_mean = df_bins.groupby(by=df_bins.columns, axis=1).mean()
+            df_bins_err = df_bins.groupby(by=df_bins.columns, axis=1).std() / math.sqrt(df_bins.shape[1])
+            cols_err = list(df_bins_err.columns)
+            dict_err = {}
+            for i in cols_err:
+                split = i.split("_")
+                dict_err[i] = "{}_err_{}".format(split[0], split[1])
+            df_bins_err = df_bins_err.rename(columns=dict_err)
+            columns = columns + list(df_bins_mean.columns) + list(df_bins_err.columns)
+            df_bins_mean_err = pd.concat([df_bins_mean, df_bins_err], axis=1).T
+            psth, df_bins_mean_err = np.asarray(psth), np.asarray(df_bins_mean_err)
+            psth = np.concatenate((psth, df_bins_mean_err), axis=0)
+        else:
+            psth = psth_shape_check(psth)
+            psth = np.asarray(psth)
+
+        timestamps = np.asarray(df["timestamps"]).reshape(1, -1)
+        psth = np.concatenate((psth, timestamps), axis=0)
+        columns = columns + ["timestamps"]
+        create_Df(op, temp_path[j][1], temp_path[j][2], psth, columns=columns)
+
+    # read PSTH peak and area for each event and combine them. Save the final output to an average folder
+    for i in range(len(new_path)):
+        arr = []
+        index = []
+        temp_path = new_path[i]
+        for j in range(len(temp_path)):
+            if not os.path.exists(
+                os.path.join(temp_path[j][0], "peak_AUC_" + temp_path[j][1] + "_" + temp_path[j][2] + ".h5")
+            ):
+                continue
+            else:
+                df = read_Df_area_peak(temp_path[j][0], temp_path[j][1] + "_" + temp_path[j][2])
+                arr.append(df)
+                index.append(list(df.index))
+
+        if len(arr) == 0:
+            logger.warning("Something is wrong with the file search pattern.")
+            continue
+        index = list(np.concatenate(index))
+        new_df = pd.concat(arr, axis=0)  # os.path.join(filepath, 'peak_AUC_'+name+'.csv')
+        new_df.to_csv(os.path.join(op, "peak_AUC_{}_{}.csv".format(temp_path[j][1], temp_path[j][2])), index=index)
+        new_df.to_hdf(
+            os.path.join(op, "peak_AUC_{}_{}.h5".format(temp_path[j][1], temp_path[j][2])),
+            key="df",
+            mode="w",
+            index=index,
+        )
+
+    # read cross-correlation files and combine them. Save the final output to an average folder
+    type = []
+    for i in range(len(folderNames)):
+        _, temp_type = getCorrCombinations(folderNames[i], inputParameters)
+        type.append(temp_type)
+
+    type = np.unique(np.array(type))
+    for i in range(len(type)):
+        corr = []
+        columns = []
+        df = None
+        for j in range(len(folderNames)):
+            corr_info, _ = getCorrCombinations(folderNames[j], inputParameters)
+            for k in range(1, len(corr_info)):
+                path = os.path.join(
+                    folderNames[j],
+                    "cross_correlation_output",
+                    "corr_" + event + "_" + type[i] + "_" + corr_info[k - 1] + "_" + corr_info[k],
+                )
+                if not os.path.exists(path + ".h5"):
+                    continue
+                else:
+                    df = read_Df(
+                        os.path.join(folderNames[j], "cross_correlation_output"),
+                        "corr_" + event,
+                        type[i] + "_" + corr_info[k - 1] + "_" + corr_info[k],
+                    )
+                    corr.append(df["mean"])
+                    columns.append(os.path.basename(folderNames[j]))
+
+        if not isinstance(df, pd.DataFrame):
+            break
+
+        corr = np.array(corr)
+        timestamps = np.array(df["timestamps"]).reshape(1, -1)
+        corr = np.concatenate((corr, timestamps), axis=0)
+        columns.append("timestamps")
+        create_Df(
+            make_dir_for_cross_correlation(op),
+            "corr_" + event,
+            type[i] + "_" + corr_info[k - 1] + "_" + corr_info[k],
+            corr,
+            columns=columns,
+        )
+
+    logger.info("Group of data averaged.")
+
+
+def psth_shape_check(psth):
+
+    each_ln = []
+    for i in range(len(psth)):
+        each_ln.append(psth[i].shape[0])
+
+    each_ln = np.asarray(each_ln)
+    keep_ln = each_ln[-1]
+
+    for i in range(len(psth)):
+        if psth[i].shape[0] > keep_ln:
+            psth[i] = psth[i][:keep_ln]
+        elif psth[i].shape[0] < keep_ln:
+            psth[i] = np.append(psth[i], np.full(keep_ln - len(psth[i]), np.nan))
+        else:
+            psth[i] = psth[i]
+
+    return psth
+
+
+def read_Df_area_peak(filepath, name):
+    op = os.path.join(filepath, "peak_AUC_" + name + ".h5")
+    df = pd.read_hdf(op, key="df", mode="r")
+
+    return df
diff --git a/src/guppy/analysis/psth_utils.py b/src/guppy/analysis/psth_utils.py
new file mode 100644
index 0000000..13b2479
--- /dev/null
+++ b/src/guppy/analysis/psth_utils.py
@@ -0,0 +1,55 @@
+import logging
+import math
+import os
+import re
+
+import numpy as np
+import pandas as pd
+
+from .io_utils import read_hdf5
+
+logger = logging.getLogger(__name__)
+
+
+# function to create dataframe for each event PSTH and save it to h5 file
+def create_Df(filepath, event, name, psth, columns=[]):
+    event = event.replace("\\", "_")
+    event = event.replace("/", "_")
+    if name:
+        op = os.path.join(filepath, event + "_{}.h5".format(name))
+    else:
+        op = os.path.join(filepath, event + ".h5")
+
+    # check if file already exists
+    # if os.path.exists(op):
+    # 	return 0
+
+    # removing psth binned trials
+    columns = np.array(columns, dtype="str")
+    regex = re.compile("bin_*")
+    single_trials = columns[[i for i in range(len(columns)) if not regex.match(columns[i])]]
+    single_trials_index = [i for i in range(len(single_trials)) if single_trials[i] != "timestamps"]
+
+    psth = psth.T
+    if psth.ndim > 1:
+        mean = np.nanmean(psth[:, single_trials_index], axis=1).reshape(-1, 1)
+        err = np.nanstd(psth[:, single_trials_index], axis=1) / math.sqrt(psth[:, single_trials_index].shape[1])
+        err = err.reshape(-1, 1)
+        psth = np.hstack((psth, mean))
+        psth = np.hstack((psth, err))
+        # timestamps = np.asarray(read_Df(filepath, 'ts_psth', ''))
+        # psth = np.hstack((psth, timestamps))
+    try:
+        ts = read_hdf5(event, filepath, "ts")
+        ts = np.append(ts, ["mean", "err"])
+    except:
+        ts = None
+
+    if len(columns) == 0:
+        df = pd.DataFrame(psth, index=None, columns=ts, dtype="float32")
+    else:
+        columns = np.asarray(columns)
+        columns = np.append(columns, ["mean", "err"])
+        df = pd.DataFrame(psth, index=None, columns=list(columns), dtype="float32")
+
+    df.to_hdf(op, key="df", mode="w")
diff --git a/src/guppy/analysis/storename_psth.py b/src/guppy/analysis/storename_psth.py
index d582572..33d5657 100644
--- a/src/guppy/analysis/storename_psth.py
+++ b/src/guppy/analysis/storename_psth.py
@@ -17,10 +17,15 @@ def storenamePsth(filepath, event, inputParameters):
 
     event = event.replace("\\", "_")
     event = event.replace("/", "_")
+    if "control" in event.lower() or "signal" in event.lower():
+        return 0
 
     selectForComputePsth = inputParameters["selectForComputePsth"]
     bin_psth_trials = inputParameters["bin_psth_trials"]
     use_time_or_trials = inputParameters["use_time_or_trials"]
+    nSecPrev, nSecPost = inputParameters["nSecPrev"], inputParameters["nSecPost"]
+    baselineStart, baselineEnd = inputParameters["baselineCorrectionStart"], inputParameters["baselineCorrectionEnd"]
+    timeInterval = inputParameters["timeInterval"]
 
     if selectForComputePsth == "z_score":
         path = glob.glob(os.path.join(filepath, "z_score_*"))
@@ -32,54 +37,42 @@ def storenamePsth(filepath, event, inputParameters):
     b = np.divide(np.ones((100,)), 100)
     a = 1
 
-    # storesList = storesList
-    # sampling_rate = read_hdf5(storesList[0,0], filepath, 'sampling_rate')
-    nSecPrev, nSecPost = inputParameters["nSecPrev"], inputParameters["nSecPost"]
-    baselineStart, baselineEnd = inputParameters["baselineCorrectionStart"], inputParameters["baselineCorrectionEnd"]
-    timeInterval = inputParameters["timeInterval"]
-
-    if "control" in event.lower() or "signal" in event.lower():
-        return 0
-    else:
-        for i in range(len(path)):
-            logger.info(f"Computing PSTH for event {event}...")
-            basename = (os.path.basename(path[i])).split(".")[0]
-            name_1 = basename.split("_")[-1]
-            control = read_hdf5("control_" + name_1, os.path.dirname(path[i]), "data")
-            if (control == 0).all() == True:
-                signal = read_hdf5("signal_" + name_1, os.path.dirname(path[i]), "data")
-                z_score = ss.filtfilt(b, a, signal)
-                just_use_signal = True
-            else:
-                z_score = read_hdf5("", path[i], "data")
-                just_use_signal = False
-            psth, psth_baselineUncorrected, cols = helper_psth(
-                z_score,
-                event,
-                filepath,
-                nSecPrev,
-                nSecPost,
-                timeInterval,
-                bin_psth_trials,
-                use_time_or_trials,
-                baselineStart,
-                baselineEnd,
-                name_1,
-                just_use_signal,
-            )
-
-            create_Df(
-                filepath,
-                event + "_" + name_1 + "_baselineUncorrected",
-                basename,
-                psth_baselineUncorrected,
-                columns=cols,
-            )  # extra
-            create_Df(filepath, event + "_" + name_1, basename, psth, columns=cols)
-            logger.info(f"PSTH for event {event} computed.")
-
-
-# *********************************** Functions used by storenamePsth *********************************** #
+    for i in range(len(path)):
+        logger.info(f"Computing PSTH for event {event}...")
+        basename = (os.path.basename(path[i])).split(".")[0]
+        name_1 = basename.split("_")[-1]
+        control = read_hdf5("control_" + name_1, os.path.dirname(path[i]), "data")
+        if (control == 0).all() == True:
+            signal = read_hdf5("signal_" + name_1, os.path.dirname(path[i]), "data")
+            z_score = ss.filtfilt(b, a, signal)
+            just_use_signal = True
+        else:
+            z_score = read_hdf5("", path[i], "data")
+            just_use_signal = False
+        psth, psth_baselineUncorrected, cols = helper_psth(
+            z_score,
+            event,
+            filepath,
+            nSecPrev,
+            nSecPost,
+            timeInterval,
+            bin_psth_trials,
+            use_time_or_trials,
+            baselineStart,
+            baselineEnd,
+            name_1,
+            just_use_signal,
+        )
+
+        create_Df(
+            filepath,
+            event + "_" + name_1 + "_baselineUncorrected",
+            basename,
+            psth_baselineUncorrected,
+            columns=cols,
+        )  # extra
+        create_Df(filepath, event + "_" + name_1, basename, psth, columns=cols)
+        logger.info(f"PSTH for event {event} computed.")
 
 
 # helper function to make PSTH for each event
@@ -230,9 +223,6 @@ def helper_psth(
     return psth, psth_baselineUncorrected, columns
 
 
-# ***************************** Functions used by helper_psth ***************************** #
-
-
 # function to create PSTH trials corresponding to each event timestamp
 def rowFormation(z_score, thisIndex, nTsPrev, nTsPost):
 

From 2d2d4fa920ab50c92fa4bcb861b4e0314a653da4 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Wed, 21 Jan 2026 16:29:20 -0800
Subject: [PATCH 135/150] Reorganzied execute_psth fns into separate modules

---
 .../{storename_psth.py => compute_psth.py}    | 69 +----------------
 src/guppy/computePsth.py                      | 77 +++++++++++++++++--
 2 files changed, 73 insertions(+), 73 deletions(-)
 rename src/guppy/analysis/{storename_psth.py => compute_psth.py} (75%)

diff --git a/src/guppy/analysis/storename_psth.py b/src/guppy/analysis/compute_psth.py
similarity index 75%
rename from src/guppy/analysis/storename_psth.py
rename to src/guppy/analysis/compute_psth.py
index 33d5657..887081d 100644
--- a/src/guppy/analysis/storename_psth.py
+++ b/src/guppy/analysis/compute_psth.py
@@ -1,82 +1,15 @@
-import glob
 import logging
 import math
-import os
 
 import numpy as np
-from scipy import signal as ss
 
 from .io_utils import read_hdf5, write_hdf5
-from .psth_utils import create_Df
 
 logger = logging.getLogger(__name__)
 
 
-# function to create PSTH for each event using function helper_psth and save the PSTH to h5 file
-def storenamePsth(filepath, event, inputParameters):
-
-    event = event.replace("\\", "_")
-    event = event.replace("/", "_")
-    if "control" in event.lower() or "signal" in event.lower():
-        return 0
-
-    selectForComputePsth = inputParameters["selectForComputePsth"]
-    bin_psth_trials = inputParameters["bin_psth_trials"]
-    use_time_or_trials = inputParameters["use_time_or_trials"]
-    nSecPrev, nSecPost = inputParameters["nSecPrev"], inputParameters["nSecPost"]
-    baselineStart, baselineEnd = inputParameters["baselineCorrectionStart"], inputParameters["baselineCorrectionEnd"]
-    timeInterval = inputParameters["timeInterval"]
-
-    if selectForComputePsth == "z_score":
-        path = glob.glob(os.path.join(filepath, "z_score_*"))
-    elif selectForComputePsth == "dff":
-        path = glob.glob(os.path.join(filepath, "dff_*"))
-    else:
-        path = glob.glob(os.path.join(filepath, "z_score_*")) + glob.glob(os.path.join(filepath, "dff_*"))
-
-    b = np.divide(np.ones((100,)), 100)
-    a = 1
-
-    for i in range(len(path)):
-        logger.info(f"Computing PSTH for event {event}...")
-        basename = (os.path.basename(path[i])).split(".")[0]
-        name_1 = basename.split("_")[-1]
-        control = read_hdf5("control_" + name_1, os.path.dirname(path[i]), "data")
-        if (control == 0).all() == True:
-            signal = read_hdf5("signal_" + name_1, os.path.dirname(path[i]), "data")
-            z_score = ss.filtfilt(b, a, signal)
-            just_use_signal = True
-        else:
-            z_score = read_hdf5("", path[i], "data")
-            just_use_signal = False
-        psth, psth_baselineUncorrected, cols = helper_psth(
-            z_score,
-            event,
-            filepath,
-            nSecPrev,
-            nSecPost,
-            timeInterval,
-            bin_psth_trials,
-            use_time_or_trials,
-            baselineStart,
-            baselineEnd,
-            name_1,
-            just_use_signal,
-        )
-
-        create_Df(
-            filepath,
-            event + "_" + name_1 + "_baselineUncorrected",
-            basename,
-            psth_baselineUncorrected,
-            columns=cols,
-        )  # extra
-        create_Df(filepath, event + "_" + name_1, basename, psth, columns=cols)
-        logger.info(f"PSTH for event {event} computed.")
-
-
 # helper function to make PSTH for each event
-def helper_psth(
+def compute_psth(
     z_score,
     event,
     filepath,
diff --git a/src/guppy/computePsth.py b/src/guppy/computePsth.py
index 4d12240..19f0b40 100755
--- a/src/guppy/computePsth.py
+++ b/src/guppy/computePsth.py
@@ -10,14 +10,18 @@
 from itertools import repeat
 
 import numpy as np
+from scipy import signal as ss
 
+from .analysis.compute_psth import compute_psth
 from .analysis.cross_correlation import computeCrossCorrelation
 from .analysis.io_utils import (
     get_all_stores_for_combining_data,
     makeAverageDir,
+    read_hdf5,
 )
+from .analysis.psth_average import averageForGroup
 from .analysis.psth_peak_and_area import findPSTHPeakAndArea
-from .analysis.storename_psth import storenamePsth
+from .analysis.psth_utils import create_Df
 
 logger = logging.getLogger(__name__)
 
@@ -66,12 +70,75 @@ def psthForEachStorename(inputParameters):
         if combine_data == True:
             execute_psth_combined(inputParameters)
         else:
-            execute_psth(inputParameters)
+            orchestrate_psth(inputParameters)
     logger.info("PSTH, Area and Peak are computed for all events.")
     return inputParameters
 
 
-def execute_psth(inputParameters):
+# function to create PSTH for each event using function helper_psth and save the PSTH to h5 file
+def execute_compute_psth(filepath, event, inputParameters):
+
+    event = event.replace("\\", "_")
+    event = event.replace("/", "_")
+    if "control" in event.lower() or "signal" in event.lower():
+        return 0
+
+    selectForComputePsth = inputParameters["selectForComputePsth"]
+    bin_psth_trials = inputParameters["bin_psth_trials"]
+    use_time_or_trials = inputParameters["use_time_or_trials"]
+    nSecPrev, nSecPost = inputParameters["nSecPrev"], inputParameters["nSecPost"]
+    baselineStart, baselineEnd = inputParameters["baselineCorrectionStart"], inputParameters["baselineCorrectionEnd"]
+    timeInterval = inputParameters["timeInterval"]
+
+    if selectForComputePsth == "z_score":
+        path = glob.glob(os.path.join(filepath, "z_score_*"))
+    elif selectForComputePsth == "dff":
+        path = glob.glob(os.path.join(filepath, "dff_*"))
+    else:
+        path = glob.glob(os.path.join(filepath, "z_score_*")) + glob.glob(os.path.join(filepath, "dff_*"))
+
+    b = np.divide(np.ones((100,)), 100)
+    a = 1
+
+    for i in range(len(path)):
+        logger.info(f"Computing PSTH for event {event}...")
+        basename = (os.path.basename(path[i])).split(".")[0]
+        name_1 = basename.split("_")[-1]
+        control = read_hdf5("control_" + name_1, os.path.dirname(path[i]), "data")
+        if (control == 0).all() == True:
+            signal = read_hdf5("signal_" + name_1, os.path.dirname(path[i]), "data")
+            z_score = ss.filtfilt(b, a, signal)
+            just_use_signal = True
+        else:
+            z_score = read_hdf5("", path[i], "data")
+            just_use_signal = False
+        psth, psth_baselineUncorrected, cols = compute_psth(
+            z_score,
+            event,
+            filepath,
+            nSecPrev,
+            nSecPost,
+            timeInterval,
+            bin_psth_trials,
+            use_time_or_trials,
+            baselineStart,
+            baselineEnd,
+            name_1,
+            just_use_signal,
+        )
+
+        create_Df(
+            filepath,
+            event + "_" + name_1 + "_baselineUncorrected",
+            basename,
+            psth_baselineUncorrected,
+            columns=cols,
+        )  # extra
+        create_Df(filepath, event + "_" + name_1, basename, psth, columns=cols)
+        logger.info(f"PSTH for event {event} computed.")
+
+
+def orchestrate_psth(inputParameters):
     folderNames = inputParameters["folderNames"]
     numProcesses = inputParameters["numberOfCores"]
     storesListPath = []
@@ -89,7 +156,7 @@ def execute_psth(inputParameters):
             )
 
             with mp.Pool(numProcesses) as p:
-                p.starmap(storenamePsth, zip(repeat(filepath), storesList[1, :], repeat(inputParameters)))
+                p.starmap(execute_compute_psth, zip(repeat(filepath), storesList[1, :], repeat(inputParameters)))
 
             with mp.Pool(numProcesses) as pq:
                 pq.starmap(findPSTHPeakAndArea, zip(repeat(filepath), storesList[1, :], repeat(inputParameters)))
@@ -126,7 +193,7 @@ def execute_psth_combined(inputParameters):
             )
         storesList = np.unique(storesList, axis=1)
         for k in range(storesList.shape[1]):
-            storenamePsth(op[i][0], storesList[1, k], inputParameters)
+            execute_compute_psth(op[i][0], storesList[1, k], inputParameters)
             findPSTHPeakAndArea(op[i][0], storesList[1, k], inputParameters)
             computeCrossCorrelation(op[i][0], storesList[1, k], inputParameters)
         writeToFile(str(10 + ((inputParameters["step"] + 1) * 10)) + "\n")

From 7929ec11f04fbe6f24aaaeebfaf51ea2806fd49e Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Wed, 21 Jan 2026 16:41:35 -0800
Subject: [PATCH 136/150] pulled read out of compute_psth

---
 src/guppy/analysis/compute_psth.py | 28 ++++++++--------------------
 src/guppy/computePsth.py           | 10 ++++++++++
 2 files changed, 18 insertions(+), 20 deletions(-)

diff --git a/src/guppy/analysis/compute_psth.py b/src/guppy/analysis/compute_psth.py
index 887081d..fd848ac 100644
--- a/src/guppy/analysis/compute_psth.py
+++ b/src/guppy/analysis/compute_psth.py
@@ -3,7 +3,7 @@
 
 import numpy as np
 
-from .io_utils import read_hdf5, write_hdf5
+from .io_utils import write_hdf5
 
 logger = logging.getLogger(__name__)
 
@@ -22,13 +22,14 @@ def compute_psth(
     baselineEnd,
     naming,
     just_use_signal,
+    sampling_rate,
+    ts,
+    corrected_timestamps,
 ):
 
     event = event.replace("\\", "_")
     event = event.replace("/", "_")
 
-    sampling_rate = read_hdf5("timeCorrection_" + naming, filepath, "sampling_rate")[0]
-
     # calculate time before event timestamp and time after event timestamp
     nTsPrev = int(round(nSecPrev * sampling_rate))
     nTsPost = int(round(nSecPost * sampling_rate))
@@ -38,14 +39,6 @@ def compute_psth(
     timeAxis = np.linspace(nSecPrev, nSecPost + increment, totalTs + 1)
     timeAxisNew = np.concatenate((timeAxis, timeAxis[::-1]))
 
-    # avoid writing same data to same file in multi-processing
-    # if not os.path.exists(os.path.join(filepath, 'ts_psth.h5')):
-    # 	logger.info('file not exists')
-    # 	create_Df(filepath, 'ts_psth', '', timeAxis)
-    # 	time.sleep(2)
-
-    ts = read_hdf5(event + "_" + naming, filepath, "ts")
-
     # reject timestamps for which baseline cannot be calculated because of nan values
     new_ts = []
     for i in range(ts.shape[0]):
@@ -93,16 +86,15 @@ def compute_psth(
             arr = arr
 
         psth_baselineUncorrected[i, :] = arr  # extra
-        psth[i, :] = baselineCorrection(filepath, arr, timeAxis, baselineStart, baselineEnd)
+        psth[i, :] = baselineCorrection(arr, timeAxis, baselineStart, baselineEnd)
 
     write_hdf5(ts, event + "_" + naming, filepath, "ts")
     columns = list(ts)
 
     if use_time_or_trials == "Time (min)" and bin_psth_trials > 0:
-        timestamps = read_hdf5("timeCorrection_" + naming, filepath, "timestampNew")
-        timestamps = np.divide(timestamps, 60)
+        corrected_timestamps = np.divide(corrected_timestamps, 60)
         ts_min = np.divide(ts, 60)
-        bin_steps = np.arange(timestamps[0], timestamps[-1] + bin_psth_trials, bin_psth_trials)
+        bin_steps = np.arange(corrected_timestamps[0], corrected_timestamps[-1] + bin_psth_trials, bin_psth_trials)
         indices_each_step = dict()
         for i in range(1, bin_steps.shape[0]):
             indices_each_step[f"{np.around(bin_steps[i-1],0)}-{np.around(bin_steps[i],0)}"] = np.where(
@@ -184,14 +176,10 @@ def rowFormation(z_score, thisIndex, nTsPrev, nTsPost):
 
 
 # function to calculate baseline for each PSTH trial and do baseline correction
-def baselineCorrection(filepath, arr, timeAxis, baselineStart, baselineEnd):
-
-    # timeAxis = read_Df(filepath, 'ts_psth', '')
-    # timeAxis = np.asarray(timeAxis).reshape(-1)
+def baselineCorrection(arr, timeAxis, baselineStart, baselineEnd):
     baselineStrtPt = np.where(timeAxis >= baselineStart)[0]
     baselineEndPt = np.where(timeAxis >= baselineEnd)[0]
 
-    # logger.info(baselineStrtPt[0], baselineEndPt[0])
     if baselineStart == 0 and baselineEnd == 0:
         return arr
 
diff --git a/src/guppy/computePsth.py b/src/guppy/computePsth.py
index 19f0b40..717fdba 100755
--- a/src/guppy/computePsth.py
+++ b/src/guppy/computePsth.py
@@ -112,6 +112,13 @@ def execute_compute_psth(filepath, event, inputParameters):
         else:
             z_score = read_hdf5("", path[i], "data")
             just_use_signal = False
+
+        sampling_rate = read_hdf5("timeCorrection_" + name_1, filepath, "sampling_rate")[0]
+        ts = read_hdf5(event + "_" + name_1, filepath, "ts")
+        if use_time_or_trials == "Time (min)" and bin_psth_trials > 0:
+            corrected_timestamps = read_hdf5("timeCorrection_" + name_1, filepath, "timestampNew")
+        else:
+            corrected_timestamps = None
         psth, psth_baselineUncorrected, cols = compute_psth(
             z_score,
             event,
@@ -125,6 +132,9 @@ def execute_compute_psth(filepath, event, inputParameters):
             baselineEnd,
             name_1,
             just_use_signal,
+            sampling_rate,
+            ts,
+            corrected_timestamps,
         )
 
         create_Df(

From fa74a4df089e22dcd87d539f8cf7b3cb1ec83b33 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Wed, 21 Jan 2026 16:50:12 -0800
Subject: [PATCH 137/150] pulled write out of compute_psth

---
 src/guppy/analysis/compute_psth.py | 5 +----
 src/guppy/computePsth.py           | 4 +++-
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/src/guppy/analysis/compute_psth.py b/src/guppy/analysis/compute_psth.py
index fd848ac..80aa8a7 100644
--- a/src/guppy/analysis/compute_psth.py
+++ b/src/guppy/analysis/compute_psth.py
@@ -3,8 +3,6 @@
 
 import numpy as np
 
-from .io_utils import write_hdf5
-
 logger = logging.getLogger(__name__)
 
 
@@ -88,7 +86,6 @@ def compute_psth(
         psth_baselineUncorrected[i, :] = arr  # extra
         psth[i, :] = baselineCorrection(arr, timeAxis, baselineStart, baselineEnd)
 
-    write_hdf5(ts, event + "_" + naming, filepath, "ts")
     columns = list(ts)
 
     if use_time_or_trials == "Time (min)" and bin_psth_trials > 0:
@@ -145,7 +142,7 @@ def compute_psth(
     psth_baselineUncorrected = np.concatenate((psth_baselineUncorrected, timeAxis), axis=0)
     columns.append("timestamps")
 
-    return psth, psth_baselineUncorrected, columns
+    return psth, psth_baselineUncorrected, columns, ts
 
 
 # function to create PSTH trials corresponding to each event timestamp
diff --git a/src/guppy/computePsth.py b/src/guppy/computePsth.py
index 717fdba..38b04df 100755
--- a/src/guppy/computePsth.py
+++ b/src/guppy/computePsth.py
@@ -18,6 +18,7 @@
     get_all_stores_for_combining_data,
     makeAverageDir,
     read_hdf5,
+    write_hdf5,
 )
 from .analysis.psth_average import averageForGroup
 from .analysis.psth_peak_and_area import findPSTHPeakAndArea
@@ -119,7 +120,7 @@ def execute_compute_psth(filepath, event, inputParameters):
             corrected_timestamps = read_hdf5("timeCorrection_" + name_1, filepath, "timestampNew")
         else:
             corrected_timestamps = None
-        psth, psth_baselineUncorrected, cols = compute_psth(
+        psth, psth_baselineUncorrected, cols, ts = compute_psth(
             z_score,
             event,
             filepath,
@@ -136,6 +137,7 @@ def execute_compute_psth(filepath, event, inputParameters):
             ts,
             corrected_timestamps,
         )
+        write_hdf5(ts, event + "_" + name_1, filepath, "ts")
 
         create_Df(
             filepath,

From efab4476278104cb7489694f4aa20d0e59932912 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Thu, 22 Jan 2026 16:45:47 -0800
Subject: [PATCH 138/150] refactored findPSTHPeakAndArea

---
 src/guppy/analysis/psth_peak_and_area.py | 55 ++++++++++++------------
 1 file changed, 27 insertions(+), 28 deletions(-)

diff --git a/src/guppy/analysis/psth_peak_and_area.py b/src/guppy/analysis/psth_peak_and_area.py
index 849bd29..11b290a 100644
--- a/src/guppy/analysis/psth_peak_and_area.py
+++ b/src/guppy/analysis/psth_peak_and_area.py
@@ -17,6 +17,8 @@ def findPSTHPeakAndArea(filepath, event, inputParameters):
 
     event = event.replace("\\", "_")
     event = event.replace("/", "_")
+    if "control" in event.lower() or "signal" in event.lower():
+        return 0
 
     # sampling_rate = read_hdf5(storesList[0,0], filepath, 'sampling_rate')
     peak_startPoint = inputParameters["peak_startPoint"]
@@ -30,34 +32,31 @@ def findPSTHPeakAndArea(filepath, event, inputParameters):
     else:
         path = glob.glob(os.path.join(filepath, "z_score_*")) + glob.glob(os.path.join(filepath, "dff_*"))
 
-    if "control" in event.lower() or "signal" in event.lower():
-        return 0
-    else:
-        for i in range(len(path)):
-            logger.info(f"Computing peak and area for PSTH mean signal for event {event}...")
-            basename = (os.path.basename(path[i])).split(".")[0]
-            name_1 = basename.split("_")[-1]
-            sampling_rate = read_hdf5("timeCorrection_" + name_1, filepath, "sampling_rate")[0]
-            psth = read_Df(filepath, event + "_" + name_1, basename)
-            cols = list(psth.columns)
-            regex = re.compile("bin_[(]")
-            bin_names = [cols[i] for i in range(len(cols)) if regex.match(cols[i])]
-            regex_trials = re.compile("[+-]?([0-9]*[.])?[0-9]+")
-            trials_names = [cols[i] for i in range(len(cols)) if regex_trials.match(cols[i])]
-            psth_mean_bin_names = trials_names + bin_names + ["mean"]
-            psth_mean_bin_mean = np.asarray(psth[psth_mean_bin_names])
-            timestamps = np.asarray(psth["timestamps"]).ravel()  # np.asarray(read_Df(filepath, 'ts_psth', '')).ravel()
-            peak_area = helperPSTHPeakAndArea(
-                psth_mean_bin_mean, timestamps, sampling_rate, peak_startPoint, peak_endPoint
-            )  # peak, area =
-            # arr = np.array([[peak, area]])
-            fileName = [os.path.basename(os.path.dirname(filepath))]
-            index = [fileName[0] + "_" + s for s in psth_mean_bin_names]
-            create_Df_area_peak(
-                filepath, peak_area, event + "_" + name_1 + "_" + basename, index=index
-            )  # columns=['peak', 'area']
-            create_csv_area_peak(filepath, peak_area, event + "_" + name_1 + "_" + basename, index=index)
-            logger.info(f"Peak and Area for PSTH mean signal for event {event} computed.")
+    for i in range(len(path)):
+        logger.info(f"Computing peak and area for PSTH mean signal for event {event}...")
+        basename = (os.path.basename(path[i])).split(".")[0]
+        name_1 = basename.split("_")[-1]
+        sampling_rate = read_hdf5("timeCorrection_" + name_1, filepath, "sampling_rate")[0]
+        psth = read_Df(filepath, event + "_" + name_1, basename)
+        cols = list(psth.columns)
+        regex = re.compile("bin_[(]")
+        bin_names = [cols[i] for i in range(len(cols)) if regex.match(cols[i])]
+        regex_trials = re.compile("[+-]?([0-9]*[.])?[0-9]+")
+        trials_names = [cols[i] for i in range(len(cols)) if regex_trials.match(cols[i])]
+        psth_mean_bin_names = trials_names + bin_names + ["mean"]
+        psth_mean_bin_mean = np.asarray(psth[psth_mean_bin_names])
+        timestamps = np.asarray(psth["timestamps"]).ravel()  # np.asarray(read_Df(filepath, 'ts_psth', '')).ravel()
+        peak_area = helperPSTHPeakAndArea(
+            psth_mean_bin_mean, timestamps, sampling_rate, peak_startPoint, peak_endPoint
+        )  # peak, area =
+        # arr = np.array([[peak, area]])
+        fileName = [os.path.basename(os.path.dirname(filepath))]
+        index = [fileName[0] + "_" + s for s in psth_mean_bin_names]
+        create_Df_area_peak(
+            filepath, peak_area, event + "_" + name_1 + "_" + basename, index=index
+        )  # columns=['peak', 'area']
+        create_csv_area_peak(filepath, peak_area, event + "_" + name_1 + "_" + basename, index=index)
+        logger.info(f"Peak and Area for PSTH mean signal for event {event} computed.")
 
 
 def helperPSTHPeakAndArea(psth_mean, timestamps, sampling_rate, peak_startPoint, peak_endPoint):

From 0a004eded2d5e71038aed966a205756696c138f5 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Thu, 22 Jan 2026 16:58:27 -0800
Subject: [PATCH 139/150] reorganized findPSTHPeakAndArea

---
 src/guppy/analysis/psth_peak_and_area.py | 90 +++---------------------
 src/guppy/analysis/standard_io.py        | 18 +++++
 src/guppy/computePsth.py                 | 61 +++++++++++++++-
 3 files changed, 86 insertions(+), 83 deletions(-)

diff --git a/src/guppy/analysis/psth_peak_and_area.py b/src/guppy/analysis/psth_peak_and_area.py
index 11b290a..2c2c421 100644
--- a/src/guppy/analysis/psth_peak_and_area.py
+++ b/src/guppy/analysis/psth_peak_and_area.py
@@ -1,65 +1,12 @@
-import glob
 import logging
-import os
-import re
 from collections import OrderedDict
 
 import numpy as np
-import pandas as pd
-
-from .io_utils import read_Df, read_hdf5
 
 logger = logging.getLogger(__name__)
 
 
-# function to compute PSTH peak and area using the function helperPSTHPeakAndArea save the values to h5 and csv files.
-def findPSTHPeakAndArea(filepath, event, inputParameters):
-
-    event = event.replace("\\", "_")
-    event = event.replace("/", "_")
-    if "control" in event.lower() or "signal" in event.lower():
-        return 0
-
-    # sampling_rate = read_hdf5(storesList[0,0], filepath, 'sampling_rate')
-    peak_startPoint = inputParameters["peak_startPoint"]
-    peak_endPoint = inputParameters["peak_endPoint"]
-    selectForComputePsth = inputParameters["selectForComputePsth"]
-
-    if selectForComputePsth == "z_score":
-        path = glob.glob(os.path.join(filepath, "z_score_*"))
-    elif selectForComputePsth == "dff":
-        path = glob.glob(os.path.join(filepath, "dff_*"))
-    else:
-        path = glob.glob(os.path.join(filepath, "z_score_*")) + glob.glob(os.path.join(filepath, "dff_*"))
-
-    for i in range(len(path)):
-        logger.info(f"Computing peak and area for PSTH mean signal for event {event}...")
-        basename = (os.path.basename(path[i])).split(".")[0]
-        name_1 = basename.split("_")[-1]
-        sampling_rate = read_hdf5("timeCorrection_" + name_1, filepath, "sampling_rate")[0]
-        psth = read_Df(filepath, event + "_" + name_1, basename)
-        cols = list(psth.columns)
-        regex = re.compile("bin_[(]")
-        bin_names = [cols[i] for i in range(len(cols)) if regex.match(cols[i])]
-        regex_trials = re.compile("[+-]?([0-9]*[.])?[0-9]+")
-        trials_names = [cols[i] for i in range(len(cols)) if regex_trials.match(cols[i])]
-        psth_mean_bin_names = trials_names + bin_names + ["mean"]
-        psth_mean_bin_mean = np.asarray(psth[psth_mean_bin_names])
-        timestamps = np.asarray(psth["timestamps"]).ravel()  # np.asarray(read_Df(filepath, 'ts_psth', '')).ravel()
-        peak_area = helperPSTHPeakAndArea(
-            psth_mean_bin_mean, timestamps, sampling_rate, peak_startPoint, peak_endPoint
-        )  # peak, area =
-        # arr = np.array([[peak, area]])
-        fileName = [os.path.basename(os.path.dirname(filepath))]
-        index = [fileName[0] + "_" + s for s in psth_mean_bin_names]
-        create_Df_area_peak(
-            filepath, peak_area, event + "_" + name_1 + "_" + basename, index=index
-        )  # columns=['peak', 'area']
-        create_csv_area_peak(filepath, peak_area, event + "_" + name_1 + "_" + basename, index=index)
-        logger.info(f"Peak and Area for PSTH mean signal for event {event} computed.")
-
-
-def helperPSTHPeakAndArea(psth_mean, timestamps, sampling_rate, peak_startPoint, peak_endPoint):
+def compute_psth_peak_and_area(psth_mean, timestamps, sampling_rate, peak_startPoint, peak_endPoint):
 
     peak_startPoint = np.asarray(peak_startPoint)
     peak_endPoint = np.asarray(peak_endPoint)
@@ -79,11 +26,11 @@ def helperPSTHPeakAndArea(psth_mean, timestamps, sampling_rate, peak_startPoint,
             "Peak End Time is lesser than or equal to Peak Start Time. Please check the Peak parameters window."
         )
 
-    peak_area = OrderedDict()
+    peak_and_area = OrderedDict()
 
     if peak_startPoint.shape[0] == 0 or peak_endPoint.shape[0] == 0:
-        peak_area["peak"] = np.nan
-        peak_area["area"] = np.nan
+        peak_and_area["peak"] = np.nan
+        peak_and_area["area"] = np.nan
 
     for i in range(peak_startPoint.shape[0]):
         startPtForPeak = np.where(timestamps >= peak_startPoint[i])[0]
@@ -91,28 +38,11 @@ def helperPSTHPeakAndArea(psth_mean, timestamps, sampling_rate, peak_startPoint,
         if len(startPtForPeak) >= 1 and len(endPtForPeak) >= 1:
             peakPoint_pos = startPtForPeak[0] + np.argmax(psth_mean[startPtForPeak[0] : endPtForPeak[0], :], axis=0)
             peakPoint_neg = startPtForPeak[0] + np.argmin(psth_mean[startPtForPeak[0] : endPtForPeak[0], :], axis=0)
-            peak_area["peak_pos_" + str(i + 1)] = np.amax(psth_mean[peakPoint_pos], axis=0)
-            peak_area["peak_neg_" + str(i + 1)] = np.amin(psth_mean[peakPoint_neg], axis=0)
-            peak_area["area_" + str(i + 1)] = np.trapz(psth_mean[startPtForPeak[0] : endPtForPeak[0], :], axis=0)
+            peak_and_area["peak_pos_" + str(i + 1)] = np.amax(psth_mean[peakPoint_pos], axis=0)
+            peak_and_area["peak_neg_" + str(i + 1)] = np.amin(psth_mean[peakPoint_neg], axis=0)
+            peak_and_area["area_" + str(i + 1)] = np.trapz(psth_mean[startPtForPeak[0] : endPtForPeak[0], :], axis=0)
         else:
-            peak_area["peak_" + str(i + 1)] = np.nan
-            peak_area["area_" + str(i + 1)] = np.nan
-
-    return peak_area
-
-
-def create_Df_area_peak(filepath, arr, name, index=[]):
-
-    op = os.path.join(filepath, "peak_AUC_" + name + ".h5")
-    dirname = os.path.dirname(filepath)
-
-    df = pd.DataFrame(arr, index=index)
-
-    df.to_hdf(op, key="df", mode="w")
-
-
-def create_csv_area_peak(filepath, arr, name, index=[]):
-    op = os.path.join(filepath, "peak_AUC_" + name + ".csv")
-    df = pd.DataFrame(arr, index=index)
+            peak_and_area["peak_" + str(i + 1)] = np.nan
+            peak_and_area["area_" + str(i + 1)] = np.nan
 
-    df.to_csv(op)
+    return peak_and_area
diff --git a/src/guppy/analysis/standard_io.py b/src/guppy/analysis/standard_io.py
index 2baefca..c3b323c 100644
--- a/src/guppy/analysis/standard_io.py
+++ b/src/guppy/analysis/standard_io.py
@@ -2,6 +2,7 @@
 import os
 
 import numpy as np
+import pandas as pd
 
 from .io_utils import (
     decide_naming_convention,
@@ -290,3 +291,20 @@ def write_combined_data(output_filepath, pair_name_to_tsNew, display_name_to_dat
         write_hdf5(data, display_name, output_filepath, "data")
     for compound_name, ts in compound_name_to_ttl_timestamps.items():
         write_hdf5(ts, compound_name, output_filepath, "ts")
+
+
+def write_peak_and_area_to_hdf5(filepath, arr, name, index=[]):
+
+    op = os.path.join(filepath, "peak_AUC_" + name + ".h5")
+    dirname = os.path.dirname(filepath)
+
+    df = pd.DataFrame(arr, index=index)
+
+    df.to_hdf(op, key="df", mode="w")
+
+
+def write_peak_and_area_to_csv(filepath, arr, name, index=[]):
+    op = os.path.join(filepath, "peak_AUC_" + name + ".csv")
+    df = pd.DataFrame(arr, index=index)
+
+    df.to_csv(op)
diff --git a/src/guppy/computePsth.py b/src/guppy/computePsth.py
index 38b04df..65b5d21 100755
--- a/src/guppy/computePsth.py
+++ b/src/guppy/computePsth.py
@@ -5,6 +5,7 @@
 import logging
 import multiprocessing as mp
 import os
+import re
 import subprocess
 import sys
 from itertools import repeat
@@ -17,12 +18,17 @@
 from .analysis.io_utils import (
     get_all_stores_for_combining_data,
     makeAverageDir,
+    read_Df,
     read_hdf5,
     write_hdf5,
 )
 from .analysis.psth_average import averageForGroup
-from .analysis.psth_peak_and_area import findPSTHPeakAndArea
+from .analysis.psth_peak_and_area import compute_psth_peak_and_area
 from .analysis.psth_utils import create_Df
+from .analysis.standard_io import (
+    write_peak_and_area_to_csv,
+    write_peak_and_area_to_hdf5,
+)
 
 logger = logging.getLogger(__name__)
 
@@ -150,6 +156,53 @@ def execute_compute_psth(filepath, event, inputParameters):
         logger.info(f"PSTH for event {event} computed.")
 
 
+# function to compute PSTH peak and area using the function helperPSTHPeakAndArea save the values to h5 and csv files.
+def execute_compute_psth_peak_and_area(filepath, event, inputParameters):
+
+    event = event.replace("\\", "_")
+    event = event.replace("/", "_")
+    if "control" in event.lower() or "signal" in event.lower():
+        return 0
+
+    # sampling_rate = read_hdf5(storesList[0,0], filepath, 'sampling_rate')
+    peak_startPoint = inputParameters["peak_startPoint"]
+    peak_endPoint = inputParameters["peak_endPoint"]
+    selectForComputePsth = inputParameters["selectForComputePsth"]
+
+    if selectForComputePsth == "z_score":
+        path = glob.glob(os.path.join(filepath, "z_score_*"))
+    elif selectForComputePsth == "dff":
+        path = glob.glob(os.path.join(filepath, "dff_*"))
+    else:
+        path = glob.glob(os.path.join(filepath, "z_score_*")) + glob.glob(os.path.join(filepath, "dff_*"))
+
+    for i in range(len(path)):
+        logger.info(f"Computing peak and area for PSTH mean signal for event {event}...")
+        basename = (os.path.basename(path[i])).split(".")[0]
+        name_1 = basename.split("_")[-1]
+        sampling_rate = read_hdf5("timeCorrection_" + name_1, filepath, "sampling_rate")[0]
+        psth = read_Df(filepath, event + "_" + name_1, basename)
+        cols = list(psth.columns)
+        regex = re.compile("bin_[(]")
+        bin_names = [cols[i] for i in range(len(cols)) if regex.match(cols[i])]
+        regex_trials = re.compile("[+-]?([0-9]*[.])?[0-9]+")
+        trials_names = [cols[i] for i in range(len(cols)) if regex_trials.match(cols[i])]
+        psth_mean_bin_names = trials_names + bin_names + ["mean"]
+        psth_mean_bin_mean = np.asarray(psth[psth_mean_bin_names])
+        timestamps = np.asarray(psth["timestamps"]).ravel()  # np.asarray(read_Df(filepath, 'ts_psth', '')).ravel()
+        peak_area = compute_psth_peak_and_area(
+            psth_mean_bin_mean, timestamps, sampling_rate, peak_startPoint, peak_endPoint
+        )  # peak, area =
+        # arr = np.array([[peak, area]])
+        fileName = [os.path.basename(os.path.dirname(filepath))]
+        index = [fileName[0] + "_" + s for s in psth_mean_bin_names]
+        write_peak_and_area_to_hdf5(
+            filepath, peak_area, event + "_" + name_1 + "_" + basename, index=index
+        )  # columns=['peak', 'area']
+        write_peak_and_area_to_csv(filepath, peak_area, event + "_" + name_1 + "_" + basename, index=index)
+        logger.info(f"Peak and Area for PSTH mean signal for event {event} computed.")
+
+
 def orchestrate_psth(inputParameters):
     folderNames = inputParameters["folderNames"]
     numProcesses = inputParameters["numberOfCores"]
@@ -171,7 +224,9 @@ def orchestrate_psth(inputParameters):
                 p.starmap(execute_compute_psth, zip(repeat(filepath), storesList[1, :], repeat(inputParameters)))
 
             with mp.Pool(numProcesses) as pq:
-                pq.starmap(findPSTHPeakAndArea, zip(repeat(filepath), storesList[1, :], repeat(inputParameters)))
+                pq.starmap(
+                    execute_compute_psth_peak_and_area, zip(repeat(filepath), storesList[1, :], repeat(inputParameters))
+                )
 
             with mp.Pool(numProcesses) as cr:
                 cr.starmap(computeCrossCorrelation, zip(repeat(filepath), storesList[1, :], repeat(inputParameters)))
@@ -206,7 +261,7 @@ def execute_psth_combined(inputParameters):
         storesList = np.unique(storesList, axis=1)
         for k in range(storesList.shape[1]):
             execute_compute_psth(op[i][0], storesList[1, k], inputParameters)
-            findPSTHPeakAndArea(op[i][0], storesList[1, k], inputParameters)
+            execute_compute_psth_peak_and_area(op[i][0], storesList[1, k], inputParameters)
             computeCrossCorrelation(op[i][0], storesList[1, k], inputParameters)
         writeToFile(str(10 + ((inputParameters["step"] + 1) * 10)) + "\n")
         inputParameters["step"] += 1

From 1d212556552c464775232b22a00b41e580c5d7b3 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Fri, 23 Jan 2026 10:04:37 -0800
Subject: [PATCH 140/150] reorganized x-corr

---
 src/guppy/analysis/cross_correlation.py | 91 +------------------------
 src/guppy/analysis/psth_average.py      |  6 +-
 src/guppy/analysis/psth_utils.py        | 46 ++++++++++++-
 src/guppy/computePsth.py                | 55 +++++++++++++--
 4 files changed, 98 insertions(+), 100 deletions(-)

diff --git a/src/guppy/analysis/cross_correlation.py b/src/guppy/analysis/cross_correlation.py
index 43d0a10..3d69136 100644
--- a/src/guppy/analysis/cross_correlation.py
+++ b/src/guppy/analysis/cross_correlation.py
@@ -1,58 +1,13 @@
 import glob
 import logging
-import math
 import os
-import re
 
 import numpy as np
-import pandas as pd
 from scipy import signal
 
-from .io_utils import make_dir_for_cross_correlation, read_Df, read_hdf5
-
 logger = logging.getLogger(__name__)
 
 
-def computeCrossCorrelation(filepath, event, inputParameters):
-    isCompute = inputParameters["computeCorr"]
-    removeArtifacts = inputParameters["removeArtifacts"]
-    artifactsRemovalMethod = inputParameters["artifactsRemovalMethod"]
-    if isCompute == True:
-        if removeArtifacts == True and artifactsRemovalMethod == "concatenate":
-            raise Exception(
-                "For cross-correlation, when removeArtifacts is True, artifacts removal method\
-                            should be replace with NaNs and not concatenate"
-            )
-        corr_info, type = getCorrCombinations(filepath, inputParameters)
-        if "control" in event.lower() or "signal" in event.lower():
-            return
-        else:
-            for i in range(1, len(corr_info)):
-                logger.debug(f"Computing cross-correlation for event {event}...")
-                for j in range(len(type)):
-                    psth_a = read_Df(filepath, event + "_" + corr_info[i - 1], type[j] + "_" + corr_info[i - 1])
-                    psth_b = read_Df(filepath, event + "_" + corr_info[i], type[j] + "_" + corr_info[i])
-                    sample_rate = 1 / (psth_a["timestamps"][1] - psth_a["timestamps"][0])
-                    psth_a = psth_a.drop(columns=["timestamps", "err", "mean"])
-                    psth_b = psth_b.drop(columns=["timestamps", "err", "mean"])
-                    cols_a, cols_b = np.array(psth_a.columns), np.array(psth_b.columns)
-                    if np.intersect1d(cols_a, cols_b).size > 0:
-                        cols = list(np.intersect1d(cols_a, cols_b))
-                    else:
-                        cols = list(cols_a)
-                    arr_A, arr_B = np.array(psth_a).T, np.array(psth_b).T
-                    cross_corr = helperCrossCorrelation(arr_A, arr_B, sample_rate)
-                    cols.append("timestamps")
-                    create_Df(
-                        make_dir_for_cross_correlation(filepath),
-                        "corr_" + event,
-                        type[j] + "_" + corr_info[i - 1] + "_" + corr_info[i],
-                        cross_corr,
-                        cols,
-                    )
-                logger.info(f"Cross-correlation for event {event} computed.")
-
-
 def getCorrCombinations(filepath, inputParameters):
     selectForComputePsth = inputParameters["selectForComputePsth"]
     if selectForComputePsth == "z_score":
@@ -85,51 +40,7 @@ def getCorrCombinations(filepath, inputParameters):
     return corr_info, type
 
 
-# same function used to store PSTH in computePsth file
-# Here, cross correlation dataframe is saved instead of PSTH
-# cross correlation dataframe has the same structure as PSTH file
-def create_Df(filepath, event, name, psth, columns=[]):
-    if name:
-        op = os.path.join(filepath, event + "_{}.h5".format(name))
-    else:
-        op = os.path.join(filepath, event + ".h5")
-
-    # check if file already exists
-    # if os.path.exists(op):
-    # 	return 0
-
-    # removing psth binned trials
-    columns = list(np.array(columns, dtype="str"))
-    regex = re.compile("bin_*")
-    single_trials_index = [i for i in range(len(columns)) if not regex.match(columns[i])]
-    single_trials_index = [i for i in range(len(columns)) if columns[i] != "timestamps"]
-
-    psth = psth.T
-    if psth.ndim > 1:
-        mean = np.nanmean(psth[:, single_trials_index], axis=1).reshape(-1, 1)
-        err = np.nanstd(psth[:, single_trials_index], axis=1) / math.sqrt(psth[:, single_trials_index].shape[1])
-        err = err.reshape(-1, 1)
-        psth = np.hstack((psth, mean))
-        psth = np.hstack((psth, err))
-        # timestamps = np.asarray(read_Df(filepath, 'ts_psth', ''))
-        # psth = np.hstack((psth, timestamps))
-    try:
-        ts = read_hdf5(event, filepath, "ts")
-        ts = np.append(ts, ["mean", "err"])
-    except:
-        ts = None
-
-    if len(columns) == 0:
-        df = pd.DataFrame(psth, index=None, columns=ts, dtype="float32")
-    else:
-        columns = np.asarray(columns)
-        columns = np.append(columns, ["mean", "err"])
-        df = pd.DataFrame(psth, index=None, columns=columns, dtype="float32")
-
-    df.to_hdf(op, key="df", mode="w")
-
-
-def helperCrossCorrelation(arr_A, arr_B, sample_rate):
+def compute_cross_correlation(arr_A, arr_B, sample_rate):
     cross_corr = list()
     for a, b in zip(arr_A, arr_B):
         if np.isnan(a).any() or np.isnan(b).any():
diff --git a/src/guppy/analysis/psth_average.py b/src/guppy/analysis/psth_average.py
index b539419..4f3c589 100644
--- a/src/guppy/analysis/psth_average.py
+++ b/src/guppy/analysis/psth_average.py
@@ -14,7 +14,7 @@
     read_Df,
     write_hdf5,
 )
-from .psth_utils import create_Df
+from .psth_utils import create_Df_for_psth
 
 logger = logging.getLogger(__name__)
 
@@ -111,7 +111,7 @@ def averageForGroup(folderNames, event, inputParameters):
         timestamps = np.asarray(df["timestamps"]).reshape(1, -1)
         psth = np.concatenate((psth, timestamps), axis=0)
         columns = columns + ["timestamps"]
-        create_Df(op, temp_path[j][1], temp_path[j][2], psth, columns=columns)
+        create_Df_for_psth(op, temp_path[j][1], temp_path[j][2], psth, columns=columns)
 
     # read PSTH peak and area for each event and combine them. Save the final output to an average folder
     for i in range(len(new_path)):
@@ -178,7 +178,7 @@ def averageForGroup(folderNames, event, inputParameters):
         timestamps = np.array(df["timestamps"]).reshape(1, -1)
         corr = np.concatenate((corr, timestamps), axis=0)
         columns.append("timestamps")
-        create_Df(
+        create_Df_for_psth(
             make_dir_for_cross_correlation(op),
             "corr_" + event,
             type[i] + "_" + corr_info[k - 1] + "_" + corr_info[k],
diff --git a/src/guppy/analysis/psth_utils.py b/src/guppy/analysis/psth_utils.py
index 13b2479..45bc2c7 100644
--- a/src/guppy/analysis/psth_utils.py
+++ b/src/guppy/analysis/psth_utils.py
@@ -12,7 +12,7 @@
 
 
 # function to create dataframe for each event PSTH and save it to h5 file
-def create_Df(filepath, event, name, psth, columns=[]):
+def create_Df_for_psth(filepath, event, name, psth, columns=[]):
     event = event.replace("\\", "_")
     event = event.replace("/", "_")
     if name:
@@ -53,3 +53,47 @@ def create_Df(filepath, event, name, psth, columns=[]):
         df = pd.DataFrame(psth, index=None, columns=list(columns), dtype="float32")
 
     df.to_hdf(op, key="df", mode="w")
+
+
+# same function used to store PSTH in computePsth file
+# Here, cross correlation dataframe is saved instead of PSTH
+# cross correlation dataframe has the same structure as PSTH file
+def create_Df_for_cross_correlation(filepath, event, name, psth, columns=[]):
+    if name:
+        op = os.path.join(filepath, event + "_{}.h5".format(name))
+    else:
+        op = os.path.join(filepath, event + ".h5")
+
+    # check if file already exists
+    # if os.path.exists(op):
+    # 	return 0
+
+    # removing psth binned trials
+    columns = list(np.array(columns, dtype="str"))
+    regex = re.compile("bin_*")
+    single_trials_index = [i for i in range(len(columns)) if not regex.match(columns[i])]
+    single_trials_index = [i for i in range(len(columns)) if columns[i] != "timestamps"]
+
+    psth = psth.T
+    if psth.ndim > 1:
+        mean = np.nanmean(psth[:, single_trials_index], axis=1).reshape(-1, 1)
+        err = np.nanstd(psth[:, single_trials_index], axis=1) / math.sqrt(psth[:, single_trials_index].shape[1])
+        err = err.reshape(-1, 1)
+        psth = np.hstack((psth, mean))
+        psth = np.hstack((psth, err))
+        # timestamps = np.asarray(read_Df(filepath, 'ts_psth', ''))
+        # psth = np.hstack((psth, timestamps))
+    try:
+        ts = read_hdf5(event, filepath, "ts")
+        ts = np.append(ts, ["mean", "err"])
+    except:
+        ts = None
+
+    if len(columns) == 0:
+        df = pd.DataFrame(psth, index=None, columns=ts, dtype="float32")
+    else:
+        columns = np.asarray(columns)
+        columns = np.append(columns, ["mean", "err"])
+        df = pd.DataFrame(psth, index=None, columns=columns, dtype="float32")
+
+    df.to_hdf(op, key="df", mode="w")
diff --git a/src/guppy/computePsth.py b/src/guppy/computePsth.py
index 65b5d21..04a5f2d 100755
--- a/src/guppy/computePsth.py
+++ b/src/guppy/computePsth.py
@@ -14,9 +14,10 @@
 from scipy import signal as ss
 
 from .analysis.compute_psth import compute_psth
-from .analysis.cross_correlation import computeCrossCorrelation
+from .analysis.cross_correlation import compute_cross_correlation, getCorrCombinations
 from .analysis.io_utils import (
     get_all_stores_for_combining_data,
+    make_dir_for_cross_correlation,
     makeAverageDir,
     read_Df,
     read_hdf5,
@@ -24,7 +25,7 @@
 )
 from .analysis.psth_average import averageForGroup
 from .analysis.psth_peak_and_area import compute_psth_peak_and_area
-from .analysis.psth_utils import create_Df
+from .analysis.psth_utils import create_Df_for_cross_correlation, create_Df_for_psth
 from .analysis.standard_io import (
     write_peak_and_area_to_csv,
     write_peak_and_area_to_hdf5,
@@ -145,14 +146,14 @@ def execute_compute_psth(filepath, event, inputParameters):
         )
         write_hdf5(ts, event + "_" + name_1, filepath, "ts")
 
-        create_Df(
+        create_Df_for_psth(
             filepath,
             event + "_" + name_1 + "_baselineUncorrected",
             basename,
             psth_baselineUncorrected,
             columns=cols,
         )  # extra
-        create_Df(filepath, event + "_" + name_1, basename, psth, columns=cols)
+        create_Df_for_psth(filepath, event + "_" + name_1, basename, psth, columns=cols)
         logger.info(f"PSTH for event {event} computed.")
 
 
@@ -203,6 +204,46 @@ def execute_compute_psth_peak_and_area(filepath, event, inputParameters):
         logger.info(f"Peak and Area for PSTH mean signal for event {event} computed.")
 
 
+def execute_compute_cross_correlation(filepath, event, inputParameters):
+    isCompute = inputParameters["computeCorr"]
+    removeArtifacts = inputParameters["removeArtifacts"]
+    artifactsRemovalMethod = inputParameters["artifactsRemovalMethod"]
+    if isCompute == True:
+        if removeArtifacts == True and artifactsRemovalMethod == "concatenate":
+            raise Exception(
+                "For cross-correlation, when removeArtifacts is True, artifacts removal method\
+                            should be replace with NaNs and not concatenate"
+            )
+        corr_info, type = getCorrCombinations(filepath, inputParameters)
+        if "control" in event.lower() or "signal" in event.lower():
+            return
+        else:
+            for i in range(1, len(corr_info)):
+                logger.debug(f"Computing cross-correlation for event {event}...")
+                for j in range(len(type)):
+                    psth_a = read_Df(filepath, event + "_" + corr_info[i - 1], type[j] + "_" + corr_info[i - 1])
+                    psth_b = read_Df(filepath, event + "_" + corr_info[i], type[j] + "_" + corr_info[i])
+                    sample_rate = 1 / (psth_a["timestamps"][1] - psth_a["timestamps"][0])
+                    psth_a = psth_a.drop(columns=["timestamps", "err", "mean"])
+                    psth_b = psth_b.drop(columns=["timestamps", "err", "mean"])
+                    cols_a, cols_b = np.array(psth_a.columns), np.array(psth_b.columns)
+                    if np.intersect1d(cols_a, cols_b).size > 0:
+                        cols = list(np.intersect1d(cols_a, cols_b))
+                    else:
+                        cols = list(cols_a)
+                    arr_A, arr_B = np.array(psth_a).T, np.array(psth_b).T
+                    cross_corr = compute_cross_correlation(arr_A, arr_B, sample_rate)
+                    cols.append("timestamps")
+                    create_Df_for_cross_correlation(
+                        make_dir_for_cross_correlation(filepath),
+                        "corr_" + event,
+                        type[j] + "_" + corr_info[i - 1] + "_" + corr_info[i],
+                        cross_corr,
+                        cols,
+                    )
+                logger.info(f"Cross-correlation for event {event} computed.")
+
+
 def orchestrate_psth(inputParameters):
     folderNames = inputParameters["folderNames"]
     numProcesses = inputParameters["numberOfCores"]
@@ -229,7 +270,9 @@ def orchestrate_psth(inputParameters):
                 )
 
             with mp.Pool(numProcesses) as cr:
-                cr.starmap(computeCrossCorrelation, zip(repeat(filepath), storesList[1, :], repeat(inputParameters)))
+                cr.starmap(
+                    execute_compute_cross_correlation, zip(repeat(filepath), storesList[1, :], repeat(inputParameters))
+                )
 
                 # for k in range(storesList.shape[1]):
                 # 	storenamePsth(filepath, storesList[1,k], inputParameters)
@@ -262,7 +305,7 @@ def execute_psth_combined(inputParameters):
         for k in range(storesList.shape[1]):
             execute_compute_psth(op[i][0], storesList[1, k], inputParameters)
             execute_compute_psth_peak_and_area(op[i][0], storesList[1, k], inputParameters)
-            computeCrossCorrelation(op[i][0], storesList[1, k], inputParameters)
+            execute_compute_cross_correlation(op[i][0], storesList[1, k], inputParameters)
         writeToFile(str(10 + ((inputParameters["step"] + 1) * 10)) + "\n")
         inputParameters["step"] += 1
 

From 707fa186a1c4c21350748f50262c7065a2d61309 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Fri, 23 Jan 2026 10:08:14 -0800
Subject: [PATCH 141/150] reorganized x-corr

---
 src/guppy/analysis/cross_correlation.py | 34 -------------------------
 src/guppy/analysis/psth_average.py      |  3 +--
 src/guppy/analysis/psth_utils.py        | 33 ++++++++++++++++++++++++
 src/guppy/computePsth.py                |  8 ++++--
 4 files changed, 40 insertions(+), 38 deletions(-)

diff --git a/src/guppy/analysis/cross_correlation.py b/src/guppy/analysis/cross_correlation.py
index 3d69136..726943d 100644
--- a/src/guppy/analysis/cross_correlation.py
+++ b/src/guppy/analysis/cross_correlation.py
@@ -1,6 +1,4 @@
-import glob
 import logging
-import os
 
 import numpy as np
 from scipy import signal
@@ -8,38 +6,6 @@
 logger = logging.getLogger(__name__)
 
 
-def getCorrCombinations(filepath, inputParameters):
-    selectForComputePsth = inputParameters["selectForComputePsth"]
-    if selectForComputePsth == "z_score":
-        path = glob.glob(os.path.join(filepath, "z_score_*"))
-    elif selectForComputePsth == "dff":
-        path = glob.glob(os.path.join(filepath, "dff_*"))
-    else:
-        path = glob.glob(os.path.join(filepath, "z_score_*")) + glob.glob(os.path.join(filepath, "dff_*"))
-
-    names = list()
-    type = list()
-    for i in range(len(path)):
-        basename = (os.path.basename(path[i])).split(".")[0]
-        names.append(basename.split("_")[-1])
-        type.append((os.path.basename(path[i])).split(".")[0].split("_" + names[-1], 1)[0])
-
-    names = list(np.unique(np.array(names)))
-    type = list(np.unique(np.array(type)))
-
-    corr_info = list()
-    if len(names) <= 1:
-        logger.info("Cross-correlation cannot be computed because only one signal is present.")
-        return corr_info, type
-    elif len(names) == 2:
-        corr_info = names
-    else:
-        corr_info = names
-        corr_info.append(names[0])
-
-    return corr_info, type
-
-
 def compute_cross_correlation(arr_A, arr_B, sample_rate):
     cross_corr = list()
     for a, b in zip(arr_A, arr_B):
diff --git a/src/guppy/analysis/psth_average.py b/src/guppy/analysis/psth_average.py
index 4f3c589..664cc3d 100644
--- a/src/guppy/analysis/psth_average.py
+++ b/src/guppy/analysis/psth_average.py
@@ -7,14 +7,13 @@
 import numpy as np
 import pandas as pd
 
-from .cross_correlation import getCorrCombinations
 from .io_utils import (
     make_dir_for_cross_correlation,
     makeAverageDir,
     read_Df,
     write_hdf5,
 )
-from .psth_utils import create_Df_for_psth
+from .psth_utils import create_Df_for_psth, getCorrCombinations
 
 logger = logging.getLogger(__name__)
 
diff --git a/src/guppy/analysis/psth_utils.py b/src/guppy/analysis/psth_utils.py
index 45bc2c7..c351511 100644
--- a/src/guppy/analysis/psth_utils.py
+++ b/src/guppy/analysis/psth_utils.py
@@ -1,3 +1,4 @@
+import glob
 import logging
 import math
 import os
@@ -97,3 +98,35 @@ def create_Df_for_cross_correlation(filepath, event, name, psth, columns=[]):
         df = pd.DataFrame(psth, index=None, columns=columns, dtype="float32")
 
     df.to_hdf(op, key="df", mode="w")
+
+
+def getCorrCombinations(filepath, inputParameters):
+    selectForComputePsth = inputParameters["selectForComputePsth"]
+    if selectForComputePsth == "z_score":
+        path = glob.glob(os.path.join(filepath, "z_score_*"))
+    elif selectForComputePsth == "dff":
+        path = glob.glob(os.path.join(filepath, "dff_*"))
+    else:
+        path = glob.glob(os.path.join(filepath, "z_score_*")) + glob.glob(os.path.join(filepath, "dff_*"))
+
+    names = list()
+    type = list()
+    for i in range(len(path)):
+        basename = (os.path.basename(path[i])).split(".")[0]
+        names.append(basename.split("_")[-1])
+        type.append((os.path.basename(path[i])).split(".")[0].split("_" + names[-1], 1)[0])
+
+    names = list(np.unique(np.array(names)))
+    type = list(np.unique(np.array(type)))
+
+    corr_info = list()
+    if len(names) <= 1:
+        logger.info("Cross-correlation cannot be computed because only one signal is present.")
+        return corr_info, type
+    elif len(names) == 2:
+        corr_info = names
+    else:
+        corr_info = names
+        corr_info.append(names[0])
+
+    return corr_info, type
diff --git a/src/guppy/computePsth.py b/src/guppy/computePsth.py
index 04a5f2d..654f299 100755
--- a/src/guppy/computePsth.py
+++ b/src/guppy/computePsth.py
@@ -14,7 +14,7 @@
 from scipy import signal as ss
 
 from .analysis.compute_psth import compute_psth
-from .analysis.cross_correlation import compute_cross_correlation, getCorrCombinations
+from .analysis.cross_correlation import compute_cross_correlation
 from .analysis.io_utils import (
     get_all_stores_for_combining_data,
     make_dir_for_cross_correlation,
@@ -25,7 +25,11 @@
 )
 from .analysis.psth_average import averageForGroup
 from .analysis.psth_peak_and_area import compute_psth_peak_and_area
-from .analysis.psth_utils import create_Df_for_cross_correlation, create_Df_for_psth
+from .analysis.psth_utils import (
+    create_Df_for_cross_correlation,
+    create_Df_for_psth,
+    getCorrCombinations,
+)
 from .analysis.standard_io import (
     write_peak_and_area_to_csv,
     write_peak_and_area_to_hdf5,

From 0ae733307ecfcf0908c9196804a9388101fa2361 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Fri, 23 Jan 2026 10:24:32 -0800
Subject: [PATCH 142/150] updated imports

---
 src/guppy/findTransientsFreqAndAmp.py | 21 +--------------------
 1 file changed, 1 insertion(+), 20 deletions(-)

diff --git a/src/guppy/findTransientsFreqAndAmp.py b/src/guppy/findTransientsFreqAndAmp.py
index a31980a..c8cc4de 100755
--- a/src/guppy/findTransientsFreqAndAmp.py
+++ b/src/guppy/findTransientsFreqAndAmp.py
@@ -7,15 +7,12 @@
 import sys
 from itertools import repeat
 
-import h5py
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
 from scipy.signal import argrelextrema
 
-from .preprocess import get_all_stores_for_combining_data
-
-logger = logging.getLogger(__name__)
+from .analysis.io_utils import get_all_stores_for_combining_data, read_hdf5
 
 logger = logging.getLogger(__name__)
 
@@ -33,22 +30,6 @@ def writeToFile(value: str):
         file.write(value)
 
 
-def read_hdf5(event, filepath, key):
-    if event:
-        op = os.path.join(filepath, event + ".hdf5")
-    else:
-        op = filepath
-
-    if os.path.exists(op):
-        with h5py.File(op, "r") as f:
-            arr = np.asarray(f[key])
-    else:
-        logger.error(f"{event}.hdf5 file does not exist")
-        raise Exception("{}.hdf5 file does not exist".format(event))
-
-    return arr
-
-
 def processChunks(arrValues, arrIndexes, highAmpFilt, transientsThresh):
 
     arrValues = arrValues[~np.isnan(arrValues)]

From 43f3289f47a4707f9836d2761b94b7baeacf9b7b Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Fri, 23 Jan 2026 10:32:54 -0800
Subject: [PATCH 143/150] reordered fns bottom --> up

---
 src/guppy/computePsth.py | 72 ++++++++++++++++++++--------------------
 1 file changed, 36 insertions(+), 36 deletions(-)

diff --git a/src/guppy/computePsth.py b/src/guppy/computePsth.py
index 654f299..32d9be1 100755
--- a/src/guppy/computePsth.py
+++ b/src/guppy/computePsth.py
@@ -51,42 +51,6 @@ def writeToFile(value: str):
         file.write(value)
 
 
-def psthForEachStorename(inputParameters):
-
-    logger.info("Computing PSTH, Peak and Area for each event...")
-    inputParameters = inputParameters
-
-    # storesList = np.genfromtxt(inputParameters['storesListPath'], dtype='str', delimiter=',')
-
-    average = inputParameters["averageForGroup"]
-    combine_data = inputParameters["combine_data"]
-    numProcesses = inputParameters["numberOfCores"]
-    inputParameters["step"] = 0
-    if numProcesses == 0:
-        numProcesses = mp.cpu_count()
-    elif numProcesses > mp.cpu_count():
-        logger.warning(
-            "Warning : # of cores parameter set is greater than the cores available \
-			   available in your machine"
-        )
-        numProcesses = mp.cpu_count() - 1
-
-    logger.info(f"Average for group : {average}")
-
-    # for average following if statement will be executed
-    if average == True:
-        execute_average_for_group(inputParameters)
-
-    # for individual analysis following else statement will be executed
-    else:
-        if combine_data == True:
-            execute_psth_combined(inputParameters)
-        else:
-            orchestrate_psth(inputParameters)
-    logger.info("PSTH, Area and Peak are computed for all events.")
-    return inputParameters
-
-
 # function to create PSTH for each event using function helper_psth and save the PSTH to h5 file
 def execute_compute_psth(filepath, event, inputParameters):
 
@@ -355,6 +319,42 @@ def execute_average_for_group(inputParameters):
         inputParameters["step"] += 1
 
 
+def psthForEachStorename(inputParameters):
+
+    logger.info("Computing PSTH, Peak and Area for each event...")
+    inputParameters = inputParameters
+
+    # storesList = np.genfromtxt(inputParameters['storesListPath'], dtype='str', delimiter=',')
+
+    average = inputParameters["averageForGroup"]
+    combine_data = inputParameters["combine_data"]
+    numProcesses = inputParameters["numberOfCores"]
+    inputParameters["step"] = 0
+    if numProcesses == 0:
+        numProcesses = mp.cpu_count()
+    elif numProcesses > mp.cpu_count():
+        logger.warning(
+            "Warning : # of cores parameter set is greater than the cores available \
+			   available in your machine"
+        )
+        numProcesses = mp.cpu_count() - 1
+
+    logger.info(f"Average for group : {average}")
+
+    # for average following if statement will be executed
+    if average == True:
+        execute_average_for_group(inputParameters)
+
+    # for individual analysis following else statement will be executed
+    else:
+        if combine_data == True:
+            execute_psth_combined(inputParameters)
+        else:
+            orchestrate_psth(inputParameters)
+    logger.info("PSTH, Area and Peak are computed for all events.")
+    return inputParameters
+
+
 def main(input_parameters):
     try:
         inputParameters = psthForEachStorename(input_parameters)

From b3ec696c68ced7e9d943260a4465cf53875044f5 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Fri, 23 Jan 2026 16:36:46 -0800
Subject: [PATCH 144/150] reorganized findTransientsFreqAndAmp

---
 src/guppy/findTransientsFreqAndAmp.py | 201 +++++++-------------------
 1 file changed, 54 insertions(+), 147 deletions(-)

diff --git a/src/guppy/findTransientsFreqAndAmp.py b/src/guppy/findTransientsFreqAndAmp.py
index c8cc4de..71b9060 100755
--- a/src/guppy/findTransientsFreqAndAmp.py
+++ b/src/guppy/findTransientsFreqAndAmp.py
@@ -1,18 +1,16 @@
 import glob
 import json
 import logging
-import math
 import multiprocessing as mp
 import os
 import sys
-from itertools import repeat
 
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
-from scipy.signal import argrelextrema
 
 from .analysis.io_utils import get_all_stores_for_combining_data, read_hdf5
+from .analysis.transients import analyze_transients
 
 logger = logging.getLogger(__name__)
 
@@ -30,91 +28,6 @@ def writeToFile(value: str):
         file.write(value)
 
 
-def processChunks(arrValues, arrIndexes, highAmpFilt, transientsThresh):
-
-    arrValues = arrValues[~np.isnan(arrValues)]
-    median = np.median(arrValues)
-
-    mad = np.median(np.abs(arrValues - median))
-
-    firstThreshold = median + (highAmpFilt * mad)
-
-    greaterThanMad = np.where(arrValues > firstThreshold)[0]
-
-    arr = np.arange(arrValues.shape[0])
-    lowerThanMad = np.isin(arr, greaterThanMad, invert=True)
-    filteredOut = arrValues[np.where(lowerThanMad == True)[0]]
-
-    filteredOutMedian = np.median(filteredOut)
-    filteredOutMad = np.median(np.abs(filteredOut - np.median(filteredOut)))
-    secondThreshold = filteredOutMedian + (transientsThresh * filteredOutMad)
-
-    greaterThanThreshIndex = np.where(arrValues > secondThreshold)[0]
-    greaterThanThreshValues = arrValues[greaterThanThreshIndex]
-    temp = np.zeros(arrValues.shape[0])
-    temp[greaterThanThreshIndex] = greaterThanThreshValues
-    peaks = argrelextrema(temp, np.greater)[0]
-
-    firstThresholdY = np.full(arrValues.shape[0], firstThreshold)
-    secondThresholdY = np.full(arrValues.shape[0], secondThreshold)
-
-    newPeaks = np.full(arrValues.shape[0], np.nan)
-    newPeaks[peaks] = peaks + arrIndexes[0]
-
-    # madY = np.full(arrValues.shape[0], mad)
-    medianY = np.full(arrValues.shape[0], median)
-    filteredOutMedianY = np.full(arrValues.shape[0], filteredOutMedian)
-
-    return peaks, mad, filteredOutMad, medianY, filteredOutMedianY, firstThresholdY, secondThresholdY
-
-
-def createChunks(z_score, sampling_rate, window):
-
-    logger.debug("Creating chunks for multiprocessing...")
-    windowPoints = math.ceil(sampling_rate * window)
-    remainderPoints = math.ceil((sampling_rate * window) - (z_score.shape[0] % windowPoints))
-
-    if remainderPoints == windowPoints:
-        padded_z_score = z_score
-        z_score_index = np.arange(padded_z_score.shape[0])
-    else:
-        padding = np.full(remainderPoints, np.nan)
-        padded_z_score = np.concatenate((z_score, padding))
-        z_score_index = np.arange(padded_z_score.shape[0])
-
-    reshape = padded_z_score.shape[0] / windowPoints
-
-    if reshape.is_integer() == True:
-        z_score_chunks = padded_z_score.reshape(int(reshape), -1)
-        z_score_chunks_index = z_score_index.reshape(int(reshape), -1)
-    else:
-        logger.error("Reshaping values should be integer.")
-        raise Exception("Reshaping values should be integer.")
-    logger.info("Chunks are created for multiprocessing.")
-    return z_score_chunks, z_score_chunks_index
-
-
-def calculate_freq_amp(arr, z_score, z_score_chunks_index, timestamps):
-    peaks = arr[:, 0]
-    filteredOutMedian = arr[:, 4]
-    count = 0
-    peaksAmp = np.array([])
-    peaksInd = np.array([])
-    for i in range(z_score_chunks_index.shape[0]):
-        count += peaks[i].shape[0]
-        peaksIndexes = peaks[i] + z_score_chunks_index[i][0]
-        peaksInd = np.concatenate((peaksInd, peaksIndexes))
-        amps = z_score[peaksIndexes] - filteredOutMedian[i][0]
-        peaksAmp = np.concatenate((peaksAmp, amps))
-
-    peaksInd = peaksInd.ravel()
-    peaksInd = peaksInd.astype(int)
-    # logger.info(timestamps)
-    freq = peaksAmp.shape[0] / ((timestamps[-1] - timestamps[0]) / 60)
-
-    return freq, peaksAmp, peaksInd
-
-
 def create_Df(filepath, arr, name, index=[], columns=[]):
 
     op = os.path.join(filepath, "freqAndAmp_" + name + ".h5")
@@ -170,21 +83,9 @@ def findFreqAndAmp(filepath, inputParameters, window=15, numProcesses=mp.cpu_cou
         name_1 = basename.split("_")[-1]
         sampling_rate = read_hdf5("timeCorrection_" + name_1, filepath, "sampling_rate")[0]
         z_score = read_hdf5("", path[i], "data")
-        not_nan_indices = ~np.isnan(z_score)
-        z_score = z_score[not_nan_indices]
-        z_score_chunks, z_score_chunks_index = createChunks(z_score, sampling_rate, window)
-
-        with mp.Pool(numProcesses) as p:
-            result = p.starmap(
-                processChunks, zip(z_score_chunks, z_score_chunks_index, repeat(highAmpFilt), repeat(transientsThresh))
-            )
-
-        result = np.asarray(result, dtype=object)
-        ts = read_hdf5("timeCorrection_" + name_1, filepath, "timestampNew")
-        ts = ts[not_nan_indices]
-        freq, peaksAmp, peaksInd = calculate_freq_amp(result, z_score, z_score_chunks_index, ts)
-        peaks_occurrences = np.array([ts[peaksInd], peaksAmp]).T
-        arr = np.array([[freq, np.mean(peaksAmp)]])
+        z_score, ts, peaksInd, peaks_occurrences, arr = analyze_transients(
+            filepath, window, numProcesses, highAmpFilt, transientsThresh, name_1, sampling_rate, z_score
+        )
         fileName = [os.path.basename(os.path.dirname(filepath))]
         create_Df(filepath, arr, basename, index=fileName, columns=["freq (events/min)", "amplitude"])
         create_csv(
@@ -297,57 +198,63 @@ def executeFindFreqAndAmp(inputParameters):
         numProcesses = mp.cpu_count() - 1
 
     if average == True:
-        if len(folderNamesForAvg) > 0:
-            storesListPath = []
-            for i in range(len(folderNamesForAvg)):
-                filepath = folderNamesForAvg[i]
-                storesListPath.append(takeOnlyDirs(glob.glob(os.path.join(filepath, "*_output_*"))))
-            storesListPath = np.concatenate(storesListPath)
-            averageForGroup(storesListPath, inputParameters)
-            writeToFile(str(10 + ((inputParameters["step"] + 1) * 10)) + "\n")
-            inputParameters["step"] += 1
-        else:
-            logger.error("Not a single folder name is provided in folderNamesForAvg in inputParamters File.")
-            raise Exception("Not a single folder name is provided in folderNamesForAvg in inputParamters File.")
-
+        execute_average_for_group(inputParameters, folderNamesForAvg)
     else:
         if combine_data == True:
-            storesListPath = []
-            for i in range(len(folderNames)):
-                filepath = folderNames[i]
-                storesListPath.append(takeOnlyDirs(glob.glob(os.path.join(filepath, "*_output_*"))))
-            storesListPath = list(np.concatenate(storesListPath).flatten())
-            op = get_all_stores_for_combining_data(storesListPath)
-            for i in range(len(op)):
-                filepath = op[i][0]
-                storesList = np.genfromtxt(
-                    os.path.join(filepath, "storesList.csv"), dtype="str", delimiter=","
-                ).reshape(2, -1)
-                findFreqAndAmp(filepath, inputParameters, window=moving_window, numProcesses=numProcesses)
-                writeToFile(str(10 + ((inputParameters["step"] + 1) * 10)) + "\n")
-                inputParameters["step"] += 1
-            plt.show()
+            execute_find_freq_and_amp_combined(inputParameters, folderNames, moving_window, numProcesses)
         else:
-            for i in range(len(folderNames)):
-                logger.debug(
-                    f"Finding transients in z-score data of {folderNames[i]} and calculating frequency and amplitude."
-                )
-                filepath = folderNames[i]
-                storesListPath = takeOnlyDirs(glob.glob(os.path.join(filepath, "*_output_*")))
-                for j in range(len(storesListPath)):
-                    filepath = storesListPath[j]
-                    storesList = np.genfromtxt(
-                        os.path.join(filepath, "storesList.csv"), dtype="str", delimiter=","
-                    ).reshape(2, -1)
-                    findFreqAndAmp(filepath, inputParameters, window=moving_window, numProcesses=numProcesses)
-                    writeToFile(str(10 + ((inputParameters["step"] + 1) * 10)) + "\n")
-                    inputParameters["step"] += 1
-                logger.info("Transients in z-score data found and frequency and amplitude are calculated.")
-            plt.show()
+            execute_find_freq_and_amp(inputParameters, folderNames, moving_window, numProcesses)
 
     logger.info("Transients in z-score data found and frequency and amplitude are calculated.")
 
 
+def execute_find_freq_and_amp(inputParameters, folderNames, moving_window, numProcesses):
+    for i in range(len(folderNames)):
+        logger.debug(f"Finding transients in z-score data of {folderNames[i]} and calculating frequency and amplitude.")
+        filepath = folderNames[i]
+        storesListPath = takeOnlyDirs(glob.glob(os.path.join(filepath, "*_output_*")))
+        for j in range(len(storesListPath)):
+            filepath = storesListPath[j]
+            storesList = np.genfromtxt(os.path.join(filepath, "storesList.csv"), dtype="str", delimiter=",").reshape(
+                2, -1
+            )
+            findFreqAndAmp(filepath, inputParameters, window=moving_window, numProcesses=numProcesses)
+            writeToFile(str(10 + ((inputParameters["step"] + 1) * 10)) + "\n")
+            inputParameters["step"] += 1
+        logger.info("Transients in z-score data found and frequency and amplitude are calculated.")
+    plt.show()
+
+
+def execute_find_freq_and_amp_combined(inputParameters, folderNames, moving_window, numProcesses):
+    storesListPath = []
+    for i in range(len(folderNames)):
+        filepath = folderNames[i]
+        storesListPath.append(takeOnlyDirs(glob.glob(os.path.join(filepath, "*_output_*"))))
+    storesListPath = list(np.concatenate(storesListPath).flatten())
+    op = get_all_stores_for_combining_data(storesListPath)
+    for i in range(len(op)):
+        filepath = op[i][0]
+        storesList = np.genfromtxt(os.path.join(filepath, "storesList.csv"), dtype="str", delimiter=",").reshape(2, -1)
+        findFreqAndAmp(filepath, inputParameters, window=moving_window, numProcesses=numProcesses)
+        writeToFile(str(10 + ((inputParameters["step"] + 1) * 10)) + "\n")
+        inputParameters["step"] += 1
+    plt.show()
+
+
+def execute_average_for_group(inputParameters, folderNamesForAvg):
+    if len(folderNamesForAvg) == 0:
+        logger.error("Not a single folder name is provided in folderNamesForAvg in inputParamters File.")
+        raise Exception("Not a single folder name is provided in folderNamesForAvg in inputParamters File.")
+    storesListPath = []
+    for i in range(len(folderNamesForAvg)):
+        filepath = folderNamesForAvg[i]
+        storesListPath.append(takeOnlyDirs(glob.glob(os.path.join(filepath, "*_output_*"))))
+    storesListPath = np.concatenate(storesListPath)
+    averageForGroup(storesListPath, inputParameters)
+    writeToFile(str(10 + ((inputParameters["step"] + 1) * 10)) + "\n")
+    inputParameters["step"] += 1
+
+
 if __name__ == "__main__":
     try:
         executeFindFreqAndAmp(json.loads(sys.argv[1]))

From 74816400519bd38dd8c1c69b45d8bea10f4ea9c3 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Fri, 23 Jan 2026 16:38:03 -0800
Subject: [PATCH 145/150] reorganized findTransientsFreqAndAmp

---
 src/guppy/analysis/transients.py | 115 +++++++++++++++++++++++++++++++
 1 file changed, 115 insertions(+)
 create mode 100644 src/guppy/analysis/transients.py

diff --git a/src/guppy/analysis/transients.py b/src/guppy/analysis/transients.py
new file mode 100644
index 0000000..2e6c189
--- /dev/null
+++ b/src/guppy/analysis/transients.py
@@ -0,0 +1,115 @@
+import logging
+import math
+import multiprocessing as mp
+from itertools import repeat
+
+import numpy as np
+from scipy.signal import argrelextrema
+
+from .io_utils import read_hdf5
+
+logger = logging.getLogger(__name__)
+
+
+def analyze_transients(filepath, window, numProcesses, highAmpFilt, transientsThresh, name_1, sampling_rate, z_score):
+    not_nan_indices = ~np.isnan(z_score)
+    z_score = z_score[not_nan_indices]
+    z_score_chunks, z_score_chunks_index = createChunks(z_score, sampling_rate, window)
+
+    with mp.Pool(numProcesses) as p:
+        result = p.starmap(
+            processChunks, zip(z_score_chunks, z_score_chunks_index, repeat(highAmpFilt), repeat(transientsThresh))
+        )
+
+    result = np.asarray(result, dtype=object)
+    ts = read_hdf5("timeCorrection_" + name_1, filepath, "timestampNew")
+    ts = ts[not_nan_indices]
+    freq, peaksAmp, peaksInd = calculate_freq_amp(result, z_score, z_score_chunks_index, ts)
+    peaks_occurrences = np.array([ts[peaksInd], peaksAmp]).T
+    arr = np.array([[freq, np.mean(peaksAmp)]])
+    return z_score, ts, peaksInd, peaks_occurrences, arr
+
+
+def processChunks(arrValues, arrIndexes, highAmpFilt, transientsThresh):
+
+    arrValues = arrValues[~np.isnan(arrValues)]
+    median = np.median(arrValues)
+
+    mad = np.median(np.abs(arrValues - median))
+
+    firstThreshold = median + (highAmpFilt * mad)
+
+    greaterThanMad = np.where(arrValues > firstThreshold)[0]
+
+    arr = np.arange(arrValues.shape[0])
+    lowerThanMad = np.isin(arr, greaterThanMad, invert=True)
+    filteredOut = arrValues[np.where(lowerThanMad == True)[0]]
+
+    filteredOutMedian = np.median(filteredOut)
+    filteredOutMad = np.median(np.abs(filteredOut - np.median(filteredOut)))
+    secondThreshold = filteredOutMedian + (transientsThresh * filteredOutMad)
+
+    greaterThanThreshIndex = np.where(arrValues > secondThreshold)[0]
+    greaterThanThreshValues = arrValues[greaterThanThreshIndex]
+    temp = np.zeros(arrValues.shape[0])
+    temp[greaterThanThreshIndex] = greaterThanThreshValues
+    peaks = argrelextrema(temp, np.greater)[0]
+
+    firstThresholdY = np.full(arrValues.shape[0], firstThreshold)
+    secondThresholdY = np.full(arrValues.shape[0], secondThreshold)
+
+    newPeaks = np.full(arrValues.shape[0], np.nan)
+    newPeaks[peaks] = peaks + arrIndexes[0]
+
+    # madY = np.full(arrValues.shape[0], mad)
+    medianY = np.full(arrValues.shape[0], median)
+    filteredOutMedianY = np.full(arrValues.shape[0], filteredOutMedian)
+
+    return peaks, mad, filteredOutMad, medianY, filteredOutMedianY, firstThresholdY, secondThresholdY
+
+
+def createChunks(z_score, sampling_rate, window):
+
+    logger.debug("Creating chunks for multiprocessing...")
+    windowPoints = math.ceil(sampling_rate * window)
+    remainderPoints = math.ceil((sampling_rate * window) - (z_score.shape[0] % windowPoints))
+
+    if remainderPoints == windowPoints:
+        padded_z_score = z_score
+        z_score_index = np.arange(padded_z_score.shape[0])
+    else:
+        padding = np.full(remainderPoints, np.nan)
+        padded_z_score = np.concatenate((z_score, padding))
+        z_score_index = np.arange(padded_z_score.shape[0])
+
+    reshape = padded_z_score.shape[0] / windowPoints
+
+    if reshape.is_integer() == True:
+        z_score_chunks = padded_z_score.reshape(int(reshape), -1)
+        z_score_chunks_index = z_score_index.reshape(int(reshape), -1)
+    else:
+        logger.error("Reshaping values should be integer.")
+        raise Exception("Reshaping values should be integer.")
+    logger.info("Chunks are created for multiprocessing.")
+    return z_score_chunks, z_score_chunks_index
+
+
+def calculate_freq_amp(arr, z_score, z_score_chunks_index, timestamps):
+    peaks = arr[:, 0]
+    filteredOutMedian = arr[:, 4]
+    count = 0
+    peaksAmp = np.array([])
+    peaksInd = np.array([])
+    for i in range(z_score_chunks_index.shape[0]):
+        count += peaks[i].shape[0]
+        peaksIndexes = peaks[i] + z_score_chunks_index[i][0]
+        peaksInd = np.concatenate((peaksInd, peaksIndexes))
+        amps = z_score[peaksIndexes] - filteredOutMedian[i][0]
+        peaksAmp = np.concatenate((peaksAmp, amps))
+
+    peaksInd = peaksInd.ravel()
+    peaksInd = peaksInd.astype(int)
+    # logger.info(timestamps)
+    freq = peaksAmp.shape[0] / ((timestamps[-1] - timestamps[0]) / 60)
+
+    return freq, peaksAmp, peaksInd

From 4a87c885c77431f81cce3c5552536b756b693105 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Fri, 23 Jan 2026 16:39:52 -0800
Subject: [PATCH 146/150] pulled read out of analyze_transients

---
 src/guppy/analysis/transients.py      | 5 +----
 src/guppy/findTransientsFreqAndAmp.py | 3 ++-
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/src/guppy/analysis/transients.py b/src/guppy/analysis/transients.py
index 2e6c189..5fd8645 100644
--- a/src/guppy/analysis/transients.py
+++ b/src/guppy/analysis/transients.py
@@ -6,12 +6,10 @@
 import numpy as np
 from scipy.signal import argrelextrema
 
-from .io_utils import read_hdf5
-
 logger = logging.getLogger(__name__)
 
 
-def analyze_transients(filepath, window, numProcesses, highAmpFilt, transientsThresh, name_1, sampling_rate, z_score):
+def analyze_transients(ts, window, numProcesses, highAmpFilt, transientsThresh, sampling_rate, z_score):
     not_nan_indices = ~np.isnan(z_score)
     z_score = z_score[not_nan_indices]
     z_score_chunks, z_score_chunks_index = createChunks(z_score, sampling_rate, window)
@@ -22,7 +20,6 @@ def analyze_transients(filepath, window, numProcesses, highAmpFilt, transientsTh
         )
 
     result = np.asarray(result, dtype=object)
-    ts = read_hdf5("timeCorrection_" + name_1, filepath, "timestampNew")
     ts = ts[not_nan_indices]
     freq, peaksAmp, peaksInd = calculate_freq_amp(result, z_score, z_score_chunks_index, ts)
     peaks_occurrences = np.array([ts[peaksInd], peaksAmp]).T
diff --git a/src/guppy/findTransientsFreqAndAmp.py b/src/guppy/findTransientsFreqAndAmp.py
index 71b9060..2111332 100755
--- a/src/guppy/findTransientsFreqAndAmp.py
+++ b/src/guppy/findTransientsFreqAndAmp.py
@@ -83,8 +83,9 @@ def findFreqAndAmp(filepath, inputParameters, window=15, numProcesses=mp.cpu_cou
         name_1 = basename.split("_")[-1]
         sampling_rate = read_hdf5("timeCorrection_" + name_1, filepath, "sampling_rate")[0]
         z_score = read_hdf5("", path[i], "data")
+        ts = read_hdf5("timeCorrection_" + name_1, filepath, "timestampNew")
         z_score, ts, peaksInd, peaks_occurrences, arr = analyze_transients(
-            filepath, window, numProcesses, highAmpFilt, transientsThresh, name_1, sampling_rate, z_score
+            ts, window, numProcesses, highAmpFilt, transientsThresh, sampling_rate, z_score
         )
         fileName = [os.path.basename(os.path.dirname(filepath))]
         create_Df(filepath, arr, basename, index=fileName, columns=["freq (events/min)", "amplitude"])

From 4eb229b215171bc589d8f2962bd602dced34f0ee Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Fri, 23 Jan 2026 16:50:05 -0800
Subject: [PATCH 147/150] Moved some read-write operations to io_utils and
 standard_io

---
 src/guppy/analysis/standard_io.py     | 23 ++++++++++
 src/guppy/findTransientsFreqAndAmp.py | 65 +++++++--------------------
 2 files changed, 40 insertions(+), 48 deletions(-)

diff --git a/src/guppy/analysis/standard_io.py b/src/guppy/analysis/standard_io.py
index c3b323c..d6dd9af 100644
--- a/src/guppy/analysis/standard_io.py
+++ b/src/guppy/analysis/standard_io.py
@@ -308,3 +308,26 @@ def write_peak_and_area_to_csv(filepath, arr, name, index=[]):
     df = pd.DataFrame(arr, index=index)
 
     df.to_csv(op)
+
+
+def write_freq_and_amp_to_hdf5(filepath, arr, name, index=[], columns=[]):
+
+    op = os.path.join(filepath, "freqAndAmp_" + name + ".h5")
+    dirname = os.path.dirname(filepath)
+
+    df = pd.DataFrame(arr, index=index, columns=columns)
+
+    df.to_hdf(op, key="df", mode="w")
+
+
+def write_freq_and_amp_to_csv(filepath, arr, name, index=[], columns=[]):
+    op = os.path.join(filepath, name)
+    df = pd.DataFrame(arr, index=index, columns=columns)
+    df.to_csv(op)
+
+
+def read_freq_and_amp_from_hdf5(filepath, name):
+    op = os.path.join(filepath, "freqAndAmp_" + name + ".h5")
+    df = pd.read_hdf(op, key="df", mode="r")
+
+    return df
diff --git a/src/guppy/findTransientsFreqAndAmp.py b/src/guppy/findTransientsFreqAndAmp.py
index 2111332..3970c24 100755
--- a/src/guppy/findTransientsFreqAndAmp.py
+++ b/src/guppy/findTransientsFreqAndAmp.py
@@ -7,50 +7,28 @@
 
 import matplotlib.pyplot as plt
 import numpy as np
-import pandas as pd
 
-from .analysis.io_utils import get_all_stores_for_combining_data, read_hdf5
+from .analysis.io_utils import (
+    get_all_stores_for_combining_data,
+    makeAverageDir,
+    read_hdf5,
+    takeOnlyDirs,
+)
+from .analysis.standard_io import (
+    read_freq_and_amp_from_hdf5,
+    write_freq_and_amp_to_csv,
+    write_freq_and_amp_to_hdf5,
+)
 from .analysis.transients import analyze_transients
 
 logger = logging.getLogger(__name__)
 
 
-def takeOnlyDirs(paths):
-    removePaths = []
-    for p in paths:
-        if os.path.isfile(p):
-            removePaths.append(p)
-    return list(set(paths) - set(removePaths))
-
-
 def writeToFile(value: str):
     with open(os.path.join(os.path.expanduser("~"), "pbSteps.txt"), "a") as file:
         file.write(value)
 
 
-def create_Df(filepath, arr, name, index=[], columns=[]):
-
-    op = os.path.join(filepath, "freqAndAmp_" + name + ".h5")
-    dirname = os.path.dirname(filepath)
-
-    df = pd.DataFrame(arr, index=index, columns=columns)
-
-    df.to_hdf(op, key="df", mode="w")
-
-
-def create_csv(filepath, arr, name, index=[], columns=[]):
-    op = os.path.join(filepath, name)
-    df = pd.DataFrame(arr, index=index, columns=columns)
-    df.to_csv(op)
-
-
-def read_Df(filepath, name):
-    op = os.path.join(filepath, "freqAndAmp_" + name + ".h5")
-    df = pd.read_hdf(op, key="df", mode="r")
-
-    return df
-
-
 def visuzlize_peaks(filepath, z_score, timestamps, peaksIndex):
 
     dirname = os.path.dirname(filepath)
@@ -88,11 +66,11 @@ def findFreqAndAmp(filepath, inputParameters, window=15, numProcesses=mp.cpu_cou
             ts, window, numProcesses, highAmpFilt, transientsThresh, sampling_rate, z_score
         )
         fileName = [os.path.basename(os.path.dirname(filepath))]
-        create_Df(filepath, arr, basename, index=fileName, columns=["freq (events/min)", "amplitude"])
-        create_csv(
+        write_freq_and_amp_to_hdf5(filepath, arr, basename, index=fileName, columns=["freq (events/min)", "amplitude"])
+        write_freq_and_amp_to_csv(
             filepath, arr, "freqAndAmp_" + basename + ".csv", index=fileName, columns=["freq (events/min)", "amplitude"]
         )
-        create_csv(
+        write_freq_and_amp_to_csv(
             filepath,
             peaks_occurrences,
             "transientsOccurrences_" + basename + ".csv",
@@ -103,15 +81,6 @@ def findFreqAndAmp(filepath, inputParameters, window=15, numProcesses=mp.cpu_cou
     logger.info("Frequency and amplitude of transients in z_score data are calculated.")
 
 
-def makeAverageDir(filepath):
-
-    op = os.path.join(filepath, "average")
-    if not os.path.exists(op):
-        os.mkdir(op)
-
-    return op
-
-
 def averageForGroup(folderNames, inputParameters):
 
     logger.debug("Combining results for frequency and amplitude of transients in z-score data...")
@@ -161,13 +130,13 @@ def averageForGroup(folderNames, inputParameters):
             if not os.path.exists(os.path.join(temp_path[j][0], "freqAndAmp_" + temp_path[j][1] + ".h5")):
                 continue
             else:
-                df = read_Df(temp_path[j][0], temp_path[j][1])
+                df = read_freq_and_amp_from_hdf5(temp_path[j][0], temp_path[j][1])
                 arr.append(np.array([df["freq (events/min)"][0], df["amplitude"][0]]))
                 fileName.append(os.path.basename(temp_path[j][0]))
 
         arr = np.asarray(arr)
-        create_Df(op, arr, temp_path[j][1], index=fileName, columns=["freq (events/min)", "amplitude"])
-        create_csv(
+        write_freq_and_amp_to_hdf5(op, arr, temp_path[j][1], index=fileName, columns=["freq (events/min)", "amplitude"])
+        write_freq_and_amp_to_csv(
             op,
             arr,
             "freqAndAmp_" + temp_path[j][1] + ".csv",

From bfe7c71ad443988abcdab00e6175d6a97158f643 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Fri, 23 Jan 2026 16:56:01 -0800
Subject: [PATCH 148/150] Reorganzied AverageForGroup

---
 src/guppy/analysis/transients_average.py | 81 ++++++++++++++++++++++++
 src/guppy/findTransientsFreqAndAmp.py    | 68 +-------------------
 2 files changed, 82 insertions(+), 67 deletions(-)
 create mode 100644 src/guppy/analysis/transients_average.py

diff --git a/src/guppy/analysis/transients_average.py b/src/guppy/analysis/transients_average.py
new file mode 100644
index 0000000..3b8dd79
--- /dev/null
+++ b/src/guppy/analysis/transients_average.py
@@ -0,0 +1,81 @@
+import glob
+import logging
+import os
+
+import numpy as np
+
+from .io_utils import (
+    makeAverageDir,
+)
+from .standard_io import (
+    read_freq_and_amp_from_hdf5,
+    write_freq_and_amp_to_csv,
+    write_freq_and_amp_to_hdf5,
+)
+
+logger = logging.getLogger(__name__)
+
+
+def averageForGroup(folderNames, inputParameters):
+
+    logger.debug("Combining results for frequency and amplitude of transients in z-score data...")
+    path = []
+    abspath = inputParameters["abspath"]
+    selectForTransientsComputation = inputParameters["selectForTransientsComputation"]
+    path_temp_len = []
+
+    for i in range(len(folderNames)):
+        if selectForTransientsComputation == "z_score":
+            path_temp = glob.glob(os.path.join(folderNames[i], "z_score_*"))
+        elif selectForTransientsComputation == "dff":
+            path_temp = glob.glob(os.path.join(folderNames[i], "dff_*"))
+        else:
+            path_temp = glob.glob(os.path.join(folderNames[i], "z_score_*")) + glob.glob(
+                os.path.join(folderNames[i], "dff_*")
+            )
+
+        path_temp_len.append(len(path_temp))
+
+        for j in range(len(path_temp)):
+            basename = (os.path.basename(path_temp[j])).split(".")[0]
+            # name = name[0]
+            temp = [folderNames[i], basename]
+            path.append(temp)
+
+    path_temp_len = np.asarray(path_temp_len)
+    max_len = np.argmax(path_temp_len)
+
+    naming = []
+    for i in range(len(path)):
+        naming.append(path[i][1])
+    naming = np.unique(np.asarray(naming))
+
+    new_path = [[] for _ in range(path_temp_len[max_len])]
+    for i in range(len(path)):
+        idx = np.where(naming == path[i][1])[0][0]
+        new_path[idx].append(path[i])
+
+    op = makeAverageDir(abspath)
+
+    for i in range(len(new_path)):
+        arr = []  # np.zeros((len(new_path[i]), 2))
+        fileName = []
+        temp_path = new_path[i]
+        for j in range(len(temp_path)):
+            if not os.path.exists(os.path.join(temp_path[j][0], "freqAndAmp_" + temp_path[j][1] + ".h5")):
+                continue
+            else:
+                df = read_freq_and_amp_from_hdf5(temp_path[j][0], temp_path[j][1])
+                arr.append(np.array([df["freq (events/min)"][0], df["amplitude"][0]]))
+                fileName.append(os.path.basename(temp_path[j][0]))
+
+        arr = np.asarray(arr)
+        write_freq_and_amp_to_hdf5(op, arr, temp_path[j][1], index=fileName, columns=["freq (events/min)", "amplitude"])
+        write_freq_and_amp_to_csv(
+            op,
+            arr,
+            "freqAndAmp_" + temp_path[j][1] + ".csv",
+            index=fileName,
+            columns=["freq (events/min)", "amplitude"],
+        )
+    logger.info("Results for frequency and amplitude of transients in z-score data are combined.")
diff --git a/src/guppy/findTransientsFreqAndAmp.py b/src/guppy/findTransientsFreqAndAmp.py
index 3970c24..f6c3d6e 100755
--- a/src/guppy/findTransientsFreqAndAmp.py
+++ b/src/guppy/findTransientsFreqAndAmp.py
@@ -10,16 +10,15 @@
 
 from .analysis.io_utils import (
     get_all_stores_for_combining_data,
-    makeAverageDir,
     read_hdf5,
     takeOnlyDirs,
 )
 from .analysis.standard_io import (
-    read_freq_and_amp_from_hdf5,
     write_freq_and_amp_to_csv,
     write_freq_and_amp_to_hdf5,
 )
 from .analysis.transients import analyze_transients
+from .analysis.transients_average import averageForGroup
 
 logger = logging.getLogger(__name__)
 
@@ -81,71 +80,6 @@ def findFreqAndAmp(filepath, inputParameters, window=15, numProcesses=mp.cpu_cou
     logger.info("Frequency and amplitude of transients in z_score data are calculated.")
 
 
-def averageForGroup(folderNames, inputParameters):
-
-    logger.debug("Combining results for frequency and amplitude of transients in z-score data...")
-    path = []
-    abspath = inputParameters["abspath"]
-    selectForTransientsComputation = inputParameters["selectForTransientsComputation"]
-    path_temp_len = []
-
-    for i in range(len(folderNames)):
-        if selectForTransientsComputation == "z_score":
-            path_temp = glob.glob(os.path.join(folderNames[i], "z_score_*"))
-        elif selectForTransientsComputation == "dff":
-            path_temp = glob.glob(os.path.join(folderNames[i], "dff_*"))
-        else:
-            path_temp = glob.glob(os.path.join(folderNames[i], "z_score_*")) + glob.glob(
-                os.path.join(folderNames[i], "dff_*")
-            )
-
-        path_temp_len.append(len(path_temp))
-
-        for j in range(len(path_temp)):
-            basename = (os.path.basename(path_temp[j])).split(".")[0]
-            # name = name[0]
-            temp = [folderNames[i], basename]
-            path.append(temp)
-
-    path_temp_len = np.asarray(path_temp_len)
-    max_len = np.argmax(path_temp_len)
-
-    naming = []
-    for i in range(len(path)):
-        naming.append(path[i][1])
-    naming = np.unique(np.asarray(naming))
-
-    new_path = [[] for _ in range(path_temp_len[max_len])]
-    for i in range(len(path)):
-        idx = np.where(naming == path[i][1])[0][0]
-        new_path[idx].append(path[i])
-
-    op = makeAverageDir(abspath)
-
-    for i in range(len(new_path)):
-        arr = []  # np.zeros((len(new_path[i]), 2))
-        fileName = []
-        temp_path = new_path[i]
-        for j in range(len(temp_path)):
-            if not os.path.exists(os.path.join(temp_path[j][0], "freqAndAmp_" + temp_path[j][1] + ".h5")):
-                continue
-            else:
-                df = read_freq_and_amp_from_hdf5(temp_path[j][0], temp_path[j][1])
-                arr.append(np.array([df["freq (events/min)"][0], df["amplitude"][0]]))
-                fileName.append(os.path.basename(temp_path[j][0]))
-
-        arr = np.asarray(arr)
-        write_freq_and_amp_to_hdf5(op, arr, temp_path[j][1], index=fileName, columns=["freq (events/min)", "amplitude"])
-        write_freq_and_amp_to_csv(
-            op,
-            arr,
-            "freqAndAmp_" + temp_path[j][1] + ".csv",
-            index=fileName,
-            columns=["freq (events/min)", "amplitude"],
-        )
-    logger.info("Results for frequency and amplitude of transients in z-score data are combined.")
-
-
 def executeFindFreqAndAmp(inputParameters):
 
     logger.info("Finding transients in z-score data and calculating frequency and amplitude....")

From 33a7054fd42f3ac44038550baded4a1bae97889c Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Mon, 26 Jan 2026 12:43:36 -0800
Subject: [PATCH 149/150] deleted temp markdowns

---
 step4_data_flow_analysis.md      | 348 ---------------
 timestamp_correction_analysis.md | 723 -------------------------------
 2 files changed, 1071 deletions(-)
 delete mode 100644 step4_data_flow_analysis.md
 delete mode 100644 timestamp_correction_analysis.md

diff --git a/step4_data_flow_analysis.md b/step4_data_flow_analysis.md
deleted file mode 100644
index d86e938..0000000
--- a/step4_data_flow_analysis.md
+++ /dev/null
@@ -1,348 +0,0 @@
-# Step 4 (preprocess.py) Data Flow Analysis
-
-## Overview
-
-Step 4 processes timestamp-corrected photometry data and computes normalized signals (ΔF/F and z-scores). It handles artifact removal, data combination from multiple sessions, and generates quality control visualizations.
-
-## High-Level Data Flow
-
-```mermaid
-flowchart TD
-    A[Entry: extractTsAndSignal] --> B{combine_data?}
-
-    B -->|False| C[execute_timestamp_correction]
-    B -->|True| D[execute_timestamp_correction]
-
-    C --> E[execute_zscore]
-
-    D --> F[check_storeslistfile]
-    F --> G[combineData]
-    G --> H[execute_zscore]
-
-    E --> I[Output: z_score, dff, cntrl_sig_fit HDF5 files]
-    H --> I
-
-    style A fill:#e1f5ff
-    style I fill:#d4edda
-```
-
-## Main Processing Paths
-
-### Entry Point
-**`extractTsAndSignal(inputParameters)`** (line 1178) is the main entry point called by the GUI or API.
-
-### Path 1: Normal Processing (combine_data = False)
-1. `execute_timestamp_correction()` → Correct timestamps and align data
-2. `execute_zscore()` → Compute z-scores and ΔF/F
-
-### Path 2: Combined Data Processing (combine_data = True)
-1. `execute_timestamp_correction()` → Correct timestamps for each file
-2. `check_storeslistfile()` → Merge store lists from multiple files
-3. `combineData()` → Combine data from multiple recording sessions
-4. `execute_zscore()` → Compute z-scores and ΔF/F on combined data
-
-## Detailed Processing Stages
-
-### Stage 1: Timestamp Correction
-
-```mermaid
-flowchart LR
-    A[Raw HDF5 files] --> B[Read storesList.csv]
-    B --> C{isosbestic_control?}
-    C -->|No| D[add_control_channel]
-    C -->|Yes| E[timestampCorrection_tdt/csv]
-    D --> E
-    E --> F[Eliminate first N seconds]
-    F --> G[decide_naming_convention_and_applyCorrection]
-    G --> H[applyCorrection for each store]
-    H --> I{isosbestic_control?}
-    I -->|No| J[create_control_channel via curve fitting]
-    I -->|Yes| K[timeCorrection_*.hdf5 files]
-    J --> K
-
-    style A fill:#e1f5ff
-    style K fill:#d4edda
-```
-
-#### Function: `execute_timestamp_correction(folderNames, inputParameters)`
-
-**Input:**
-- Raw HDF5 files from extractors: `control_*.hdf5`, `signal_*.hdf5`, `event_*.hdf5`
-
-**Process:**
-1. For each session folder:
-   - Read `storesList.csv` (mapping of raw names to semantic names)
-   - If no isosbestic control: `add_control_channel()` creates placeholder control files
-   - **`timestampCorrection_tdt()`** or **`timestampCorrection_csv()`**:
-     - Eliminates first N seconds (`timeForLightsTurnOn`)
-     - For TDT: expands timestamps from block timestamps + sampling rate
-     - For CSV: uses timestamps as-is
-     - Writes `timeCorrection_*.hdf5` with keys: `timestampNew`, `correctionIndex`, `sampling_rate`
-   - **`decide_naming_convention_and_applyCorrection()`**:
-     - For each store, calls `applyCorrection()` to crop data using `correctionIndex`
-     - For control/signal channels: crops data arrays
-     - For event channels: subtracts time offset from timestamps
-   - If no isosbestic control: **`create_control_channel()`** generates synthetic control via curve fitting
-
-**Output:**
-- Timestamp-corrected HDF5 files with trimmed data
-- `timeCorrection_*.hdf5` files containing corrected timestamps
-
-### Stage 2: Z-Score Computation
-
-```mermaid
-flowchart TD
-    A[Timestamp-corrected HDF5] --> B[compute_z_score]
-    B --> C{removeArtifacts?}
-
-    C -->|No| D[helper_z_score: full data]
-    C -->|Yes| E[helper_z_score: chunk-by-chunk]
-
-    D --> F[filterSignal]
-    E --> F
-
-    F --> G[controlFit: linear regression]
-    G --> H[deltaFF: compute ΔF/F]
-    H --> I[z_score_computation]
-
-    I --> J{removeArtifacts?}
-
-    J -->|No| K[Write z_score, dff, cntrl_sig_fit]
-    J -->|Yes| L{artifactsRemovalMethod?}
-
-    L -->|concatenate| M[processTimestampsForArtifacts]
-    L -->|NaN| N[addingNaNtoChunksWithArtifacts]
-
-    M --> K
-    N --> K
-
-    K --> O[visualizeControlAndSignal]
-
-    style A fill:#e1f5ff
-    style K fill:#d4edda
-    style O fill:#fff3cd
-```
-
-#### Function: `execute_zscore(folderNames, inputParameters)`
-
-**Input:**
-- Timestamp-corrected HDF5 files
-
-**Process:**
-1. For each output folder:
-
-   **`compute_z_score(filepath, inputParameters)`**:
-   - For each control/signal pair:
-     - **`helper_z_score(control, signal, filepath, name, inputParameters)`**:
-
-       **Without artifacts removal:**
-       - `execute_controlFit_dff()`: Filter signals → fit control to signal → compute ΔF/F
-       - `z_score_computation()`: Compute z-score from ΔF/F
-
-       **With artifacts removal:**
-       - For each user-selected chunk (from `coordsForPreProcessing_*.npy`):
-         - If no isosbestic: `helper_create_control_channel()` creates synthetic control
-         - `execute_controlFit_dff()` on chunk
-       - Concatenate or NaN-fill between chunks
-       - `z_score_computation()` on processed data
-
-     - Writes: `z_score_*.hdf5`, `dff_*.hdf5`, `cntrl_sig_fit_*.hdf5`
-
-   **If artifacts removal with concatenate method:**
-   - **`processTimestampsForArtifacts()`**:
-     - `eliminateData()`: Concatenates good chunks, adjusts timestamps to be continuous
-     - `eliminateTs()`: Aligns event timestamps with new timeline
-     - Overwrites data files with concatenated versions
-
-   **If artifacts removal with NaN method:**
-   - **`addingNaNtoChunksWithArtifacts()`**:
-     - `addingNaNValues()`: Replaces bad chunks with NaN
-     - `removeTTLs()`: Filters event timestamps to keep only valid times
-
-   - **`visualizeControlAndSignal()`**: Plots control, signal, cntrl_sig_fit for QC
-
-**Output:**
-- `z_score_*.hdf5` (z-scored signal)
-- `dff_*.hdf5` (ΔF/F)
-- `cntrl_sig_fit_*.hdf5` (fitted control channel)
-
-## Key Data Transformations
-
-### Signal Processing Pipeline
-
-```mermaid
-flowchart LR
-    A[Raw Signal] --> B[filterSignal: Moving Average]
-    C[Raw Control] --> D[filterSignal: Moving Average]
-
-    B --> E[controlFit: Linear Regression]
-    D --> E
-
-    E --> F[control_fit = p0*control + p1]
-    F --> G[deltaFF]
-
-    B --> G
-
-    G --> H[ΔF/F = signal - control_fit / control_fit * 100]
-    H --> I[z_score_computation]
-
-    I --> J{zscore_method?}
-    J -->|standard| K[z = ΔF/F - mean / std]
-    J -->|baseline| L[z = ΔF/F - baseline_mean / baseline_std]
-    J -->|robust| M[z = 0.6745 * ΔF/F - median / MAD]
-
-    K --> N[Z-Score Output]
-    L --> N
-    M --> N
-
-    style A fill:#e1f5ff
-    style C fill:#e1f5ff
-    style N fill:#d4edda
-```
-
-### Transformation Functions
-
-1. **`filterSignal(filter_window, signal)`** (line 822)
-   - Applies moving average filter with configurable window
-   - Uses `scipy.signal.filtfilt` for zero-phase filtering
-
-2. **`controlFit(control, signal)`** (line 815)
-   - Linear regression: fits control to signal
-   - Returns: `fitted_control = p[0] * control + p[1]`
-
-3. **`deltaFF(signal, control)`** (line 804)
-   - Formula: `((signal - control) / control) * 100`
-   - Computes normalized fluorescence change
-
-4. **`z_score_computation(dff, timestamps, inputParameters)`** (line 853)
-   - **Standard z-score:** `(ΔF/F - mean(ΔF/F)) / std(ΔF/F)`
-   - **Baseline z-score:** `(ΔF/F - mean(baseline)) / std(baseline)`
-   - **Robust z-score:** `0.6745 * (ΔF/F - median) / MAD`
-
-## Artifact Removal Workflow
-
-### Interactive Artifact Selection
-
-The `visualize()` function (line 469) provides an interactive matplotlib plot:
-- **Space key:** Mark artifact boundary (vertical line drawn)
-- **'d' key:** Delete last marked boundary
-- **Close plot:** Save coordinates to `coordsForPreProcessing_*.npy`
-
-### Two Removal Methods
-
-**Concatenate Method:**
-- Removes artifact chunks completely
-- Concatenates good chunks end-to-end
-- Adjusts timestamps to be continuous
-- Event timestamps realigned to new timeline
-
-**NaN Method:**
-- Replaces artifact chunks with NaN values
-- Preserves original timeline
-- Filters out event timestamps in artifact regions
-
-## Supporting Functions
-
-### Control Channel Creation
-
-**`helper_create_control_channel(signal, timestamps, window)`** (line 69)
-- Used when no isosbestic control is available
-- Applies Savitzky-Golay filter to signal
-- Fits to exponential function: `f(x) = a + b * exp(-(1/c) * x)`
-- Returns synthetic control channel
-
-### Data Combination
-
-**`combineData(folderNames, inputParameters, storesList)`** (line 1084)
-- Merges data from multiple recording sessions
-- Validates that sampling rates match across sessions
-- Calls `processTimestampsForCombiningData()` to align timelines
-- Saves combined data to first output folder
-
-### Coordinate Fetching
-
-**`fetchCoords(filepath, naming, data)`** (line 610)
-- Reads `coordsForPreProcessing_*.npy` (artifact boundary coordinates)
-- If file doesn't exist: uses `[0, data[-1]]` (entire recording)
-- Validates even number of coordinates (pairs of boundaries)
-- Returns reshaped array of coordinate pairs
-
-## File I/O Summary
-
-### Files Read
-
-| File Pattern | Content | Source |
-|-------------|---------|--------|
-| `control_*.hdf5` | Control channel data | Extractors (Step 3) |
-| `signal_*.hdf5` | Signal channel data | Extractors (Step 3) |
-| `event_*.hdf5` | Event timestamps | Extractors (Step 3) |
-| `storesList.csv` | Channel name mapping | Step 2 |
-| `coordsForPreProcessing_*.npy` | Artifact boundaries | User selection (optional) |
-
-### Files Written
-
-| File Pattern | Content | Keys |
-|-------------|---------|------|
-| `timeCorrection_*.hdf5` | Corrected timestamps | `timestampNew`, `correctionIndex`, `sampling_rate`, `timeRecStart` (TDT only) |
-| `z_score_*.hdf5` | Z-scored signal | `data` |
-| `dff_*.hdf5` | ΔF/F signal | `data` |
-| `cntrl_sig_fit_*.hdf5` | Fitted control | `data` |
-| `event_*_*.hdf5` | Corrected event timestamps | `ts` |
-
-## Key Parameters from inputParameters
-
-| Parameter | Purpose | Default/Options |
-|-----------|---------|-----------------|
-| `timeForLightsTurnOn` | Seconds to eliminate from start | 1 |
-| `filter_window` | Moving average window size | 100 |
-| `isosbestic_control` | Use isosbestic control channel? | True/False |
-| `removeArtifacts` | Enable artifact removal? | True/False |
-| `artifactsRemovalMethod` | How to handle artifacts | "concatenate" / "NaN" |
-| `zscore_method` | Z-score computation method | "standard z-score" / "baseline z-score" / "robust z-score" |
-| `baselineWindowStart` | Baseline window start (seconds) | 0 |
-| `baselineWindowEnd` | Baseline window end (seconds) | 0 |
-| `combine_data` | Combine multiple recordings? | True/False |
-
-## Architecture Notes for Refactoring
-
-### Current Coupling Issues
-
-1. **GUI Progress Tracking:** `writeToFile()` writes to `~/pbSteps.txt` for progress bar updates (lines 36-38, 1042, 1171, 1203, 1208, 1220)
-2. **Interactive Plotting:** `visualize()` requires user interaction (matplotlib event handlers)
-3. **File Path Assumptions:** Hard-coded path patterns (`*_output_*`, naming conventions)
-4. **Mixed Responsibilities:** Single functions handle both computation and I/O
-
-### Recommended Separation Points
-
-**Backend Analysis Layer Should Include:**
-- `filterSignal()` - pure signal processing
-- `controlFit()` - pure regression
-- `deltaFF()` - pure computation
-- `z_score_computation()` - pure statistical computation
-- `helper_create_control_channel()` - algorithmic control generation
-- Core timestamp correction logic (separated from I/O)
-- Core artifact removal logic (separated from I/O)
-
-**Data I/O Layer Should Include:**
-- `read_hdf5()`, `write_hdf5()` - file operations
-- Store list reading/writing
-- Coordinate file handling
-- HDF5 file discovery and path management
-
-**Frontend Visualization Layer Should Include:**
-- `visualize()` - interactive artifact selection
-- `visualizeControlAndSignal()` - QC plots
-- `visualize_z_score()`, `visualize_dff()` - result visualization
-- Progress tracking callbacks (replace `writeToFile()`)
-
-### Potential Refactoring Strategy
-
-1. **Extract pure computation functions** into a `signal_processing` module
-2. **Create data models** (dataclasses) for:
-   - TimeCorrectionResult
-   - ProcessedSignal (with z_score, dff, control_fit)
-   - ArtifactRegions
-3. **Separate I/O operations** into `io_utils` module with consistent interfaces
-4. **Create processing pipelines** that accept data objects, return data objects
-5. **Move visualization to separate module** with callbacks for progress/interaction
-6. **Use dependency injection** for progress callbacks instead of hard-coded file writes
diff --git a/timestamp_correction_analysis.md b/timestamp_correction_analysis.md
deleted file mode 100644
index 121aa3f..0000000
--- a/timestamp_correction_analysis.md
+++ /dev/null
@@ -1,723 +0,0 @@
-# Timestamp Correction Module Analysis
-
-## Overview
-
-The `timestamp_correction.py` module handles the correction of timestamps for photometry data, including:
-- Eliminating the first N seconds of recording (light stabilization period)
-- Expanding TDT block timestamps into continuous timestamps
-- Creating synthetic control channels when no isosbestic control is present
-- Applying corrections to both data channels and event markers
-
-## Module Structure
-
-### Entry Point from preprocess.py
-
-```python
-execute_timestamp_correction(folderNames, inputParameters)  # preprocess.py:212
-```
-
-This orchestrator loops through all session folders and calls functions in this module.
-
-## Two-Phase Control Channel Creation Pattern
-
-### Understanding add_control_channel vs create_control_channel
-
-These two functions work together in a **two-phase process** to handle synthetic control channel generation. They are **not redundant** but serve distinct purposes:
-
-#### Phase 1: `add_control_channel` (Called BEFORE timestamp correction)
-
-**Execution:** Line 229 in `execute_timestamp_correction`
-
-**Purpose:** Create **PLACEHOLDER** control files to satisfy workflow requirements
-
-**What it does:**
-1. Validates that if `isosbestic_control=False`, no real control channels exist
-2. For each signal channel without a matching control:
-   - Copies the raw signal HDF5 file to `cntrl{i}.hdf5` (placeholder)
-   - Adds entry to storesList: `[["cntrl{i}"], ["control_{region}"]]`
-3. Saves updated `storesList.csv`
-
-**Files created:**
-- `cntrl0.hdf5`, `cntrl1.hdf5`, etc. (copies of **RAW** signal data)
-- Updated `storesList.csv` with placeholder entries
-
-**Why it's needed:**
-- Timestamp correction workflow expects **paired** control/signal channels in storesList
-- Without placeholders, the pairing logic in `timestampCorrection_xxx` and `check_cntrl_sig_length` would fail
-- The placeholder **data is never actually used** - it just satisfies structural requirements
-
-#### Phase 2: `create_control_channel` (Called AFTER timestamp correction)
-
-**Execution:** Line 243 in `execute_timestamp_correction`
-
-**Purpose:** Generate **ACTUAL** synthetic control via curve fitting and overwrite placeholders
-
-**What it does:**
-1. Looks for placeholder files (checks: `"control" in event_name.lower() and "cntrl" in event.lower()`)
-2. Reads the **CORRECTED** signal data: `signal_{region}.hdf5` (after timestamp correction)
-3. Calls `helper_create_control_channel()` to:
-   - Apply Savitzky-Golay filter to cleaned signal
-   - Fit to exponential function: `f(x) = a + b * exp(-(1/c) * x)`
-4. **OVERWRITES** the placeholder `control_{region}.hdf5` with real synthetic control
-5. Also exports to CSV format (legacy)
-
-**Files written:**
-- `control_{region}.hdf5` → `data` (replaces placeholder with curve-fitted control)
-- `{raw_name}.csv` (timestamps, data, sampling_rate columns)
-
-**Why it's separate:**
-- Requires **timestamp-corrected** signal data (doesn't exist until after lines 232-239)
-- Curve fitting algorithm needs clean timestamps (first N seconds eliminated)
-- Cannot be done before timestamp correction without re-correcting the synthetic control
-
-#### Execution Timeline
-
-```python
-# When isosbestic_control == False:
-
-# ========== PHASE 1: BEFORE TIMESTAMP CORRECTION ==========
-# Line 229: Create placeholders (just file copies)
-storesList = add_control_channel(filepath, storesList)
-# Result: storesList now has paired structure
-#   [["Dv1A", "cntrl0"], ["signal_dms", "control_dms"]]
-# Files: cntrl0.hdf5 (copy of raw signal, never used)
-
-# ========== TIMESTAMP CORRECTION PHASE ==========
-# Lines 232-234: Process both signal AND placeholder control
-timestampCorrection_tdt(filepath, timeForLightsTurnOn, storesList)
-# Result: Creates timeCorrection_dms.hdf5 with correctionIndex
-
-# Lines 236-239: Apply corrections to all channels
-decide_naming_convention_and_applyCorrection(...)
-# Result: signal_dms.hdf5 now contains corrected signal data
-#         control_dms.hdf5 still contains uncorrected placeholder copy
-
-# ========== PHASE 2: AFTER TIMESTAMP CORRECTION ==========
-# Line 243: Generate REAL synthetic controls
-create_control_channel(filepath, storesList, window=101)
-# Result: control_dms.hdf5 OVERWRITTEN with curve-fitted synthetic control
-#         Now contains valid control data derived from corrected signal
-```
-
-#### Why This Design Exists
-
-This is a **chicken-and-egg problem solved with placeholders:**
-
-1. **Requirement:** Timestamp correction expects paired control/signal channels
-2. **Constraint:** Synthetic control generation requires timestamp-corrected signal data
-3. **Solution:** Create dummy placeholders → correct everything → replace placeholders with real data
-
-#### Visual Flow
-
-```mermaid
-flowchart TD
-    A[isosbestic_control = False] --> B[add_control_channel]
-    B --> C[Copy signal.hdf5 to cntrl0.hdf5]
-    C --> D[Update storesList.csv]
-
-    D --> E[timestampCorrection_xxx]
-    E --> F[Creates timeCorrection_dms.hdf5]
-
-    F --> G[decide_naming_convention_and_applyCorrection]
-    G --> H[Corrects signal_dms.hdf5]
-    G --> I[Corrects control_dms.hdf5<br/>still contains placeholder]
-
-    I --> J[create_control_channel]
-    J --> K[Read corrected signal_dms.hdf5]
-    K --> L[helper_create_control_channel<br/>curve fit]
-    L --> M[OVERWRITE control_dms.hdf5<br/>with synthetic control]
-
-    style C fill:#fff3cd
-    style I fill:#fff3cd
-    style M fill:#d4edda
-```
-
-#### Refactoring Opportunity
-
-This placeholder pattern is a **code smell** indicating potential design improvements:
-
-**Issues:**
-1. **Unnecessary I/O:** Placeholder files are written and then overwritten
-2. **Confusing flow:** Hard to understand that placeholders are temporary
-3. **Tight coupling:** Timestamp correction assumes paired files exist
-4. **Wasted computation:** Placeholder controls get timestamp-corrected unnecessarily
-
-**Potential Improvements:**
-
-**Option 1: Lazy Control Creation**
-- Modify timestamp correction to handle missing controls gracefully
-- Only create synthetic controls after all corrections complete
-- Remove placeholder file creation entirely
-
-**Option 2: Data Structure Refactoring**
-- Use a data structure that doesn't require physical paired files upfront
-- Track "needs synthetic control" as metadata rather than file presence
-- Generate and write controls only once at the end
-
-**Option 3: Two-Pass Workflow**
-- First pass: Correct only signal channels
-- Second pass: Generate synthetic controls from corrected signals
-- Would require refactoring `check_cntrl_sig_length` and pairing logic
-
-## Function Catalog
-
-### 1. add_control_channel
-**Location:** `timestamp_correction.py:20`
-**Purpose:** Create placeholder control channel files when no isosbestic control exists
-
-```python
-def add_control_channel(filepath, arr) -> arr
-```
-
-**Input:**
-- `filepath`: Path to session output folder
-- `arr`: 2D array `[[storenames], [storesList]]` from storesList.csv
-
-**Process:**
-1. Validates that control/signal pairs match (raises error if mismatched)
-2. For each signal channel without a matching control:
-   - Copies signal HDF5 file to `cntrl{i}.hdf5` (placeholder)
-   - Adds entry to storesList array: `[["cntrl{i}"], ["control_{region}"]]`
-3. Writes updated storesList.csv
-
-**Output:**
-- Updated `arr` with new control channel entries
-- **Files Written:** Updated `storesList.csv`, copied `cntrl*.hdf5` files
-
-**I/O Summary:**
-- **Reads:** Signal HDF5 files (via shutil.copyfile)
-- **Writes:** `storesList.csv`, placeholder `cntrl*.hdf5` files
-
----
-
-### 2. timestampCorrection_csv
-**Location:** `timestamp_correction.py:65`
-**Purpose:** Correct timestamps for CSV-format data (Doric, NPM, custom CSV)
-
-```python
-def timestampCorrection_csv(filepath, timeForLightsTurnOn, storesList)
-```
-
-**Input:**
-- `filepath`: Path to session output folder
-- `timeForLightsTurnOn`: Seconds to eliminate from start (default: 1)
-- `storesList`: 2D array `[[storenames], [storesList]]`
-
-**Process:**
-1. Filters storesList to control/signal channels only
-2. Pairs control/signal channels, validates naming matches
-3. Calls `check_cntrl_sig_length()` to determine which channel to use (shorter one)
-4. For each control/signal pair:
-   - **Reads:** `timestamps` and `sampling_rate` from raw HDF5
-   - **Computes:** `correctionIndex = np.where(timestamp >= timeForLightsTurnOn)`
-   - **Writes:** `timeCorrection_{region}.hdf5` with keys:
-     - `timestampNew`: Corrected timestamps
-     - `correctionIndex`: Indices to keep
-     - `sampling_rate`: Sampling rate
-
-**Output:**
-- **Files Written:** `timeCorrection_{region}.hdf5` for each control/signal pair
-
-**I/O Summary:**
-- **Reads:** `{storename}.hdf5` → `timestamps`, `sampling_rate`
-- **Writes:** `timeCorrection_{region}.hdf5` → `timestampNew`, `correctionIndex`, `sampling_rate`
-
----
-
-### 3. timestampCorrection_tdt
-**Location:** `timestamp_correction.py:115`
-**Purpose:** Correct timestamps for TDT-format data (expands block timestamps)
-
-```python
-def timestampCorrection_tdt(filepath, timeForLightsTurnOn, storesList)
-```
-
-**Input:** Same as `timestampCorrection_csv`
-
-**Process:**
-1. Filters storesList to control/signal channels only
-2. Pairs control/signal channels, validates naming matches
-3. Calls `check_cntrl_sig_length()` to determine which channel to use
-4. For each control/signal pair:
-   - **Reads:** `timestamps`, `npoints`, `sampling_rate` from raw HDF5
-   - **TDT-specific expansion algorithm:**
-     ```python
-     timeRecStart = timestamp[0]
-     timestamps = np.subtract(timestamp, timeRecStart)  # Zero-base
-     adder = np.arange(npoints) / sampling_rate         # Within-block offsets
-     # Expand: for each block timestamp, add within-block offsets
-     timestampNew = np.zeros((len(timestamps), lengthAdder))
-     for i in range(lengthAdder):
-         timestampNew[:, i] = np.add(timestamps, adder[i])
-     timestampNew = (timestampNew.T).reshape(-1, order="F")  # Flatten
-     correctionIndex = np.where(timestampNew >= timeForLightsTurnOn)
-     timestampNew = timestampNew[correctionIndex]
-     ```
-   - **Writes:** `timeCorrection_{region}.hdf5` with keys:
-     - `timeRecStart`: Recording start time (TDT-specific)
-     - `timestampNew`: Expanded, corrected timestamps
-     - `correctionIndex`: Indices to keep
-     - `sampling_rate`: Sampling rate
-
-**Output:**
-- **Files Written:** `timeCorrection_{region}.hdf5` with TDT-specific `timeRecStart` key
-
-**I/O Summary:**
-- **Reads:** `{storename}.hdf5` → `timestamps`, `npoints`, `sampling_rate`
-- **Writes:** `timeCorrection_{region}.hdf5` → `timeRecStart`, `timestampNew`, `correctionIndex`, `sampling_rate`
-
----
-
-### 4. check_cntrl_sig_length
-**Location:** `timestamp_correction.py:273`
-**Purpose:** Determine which channel (control or signal) to use as reference based on length
-
-```python
-def check_cntrl_sig_length(filepath, channels_arr, storenames, storesList) -> indices
-```
-
-**Input:**
-- `filepath`: Path to session output folder
-- `channels_arr`: Paired control/signal array `[["control_A", "control_B"], ["signal_A", "signal_B"]]`
-- `storenames`: Raw HDF5 filenames
-- `storesList`: Semantic channel names
-
-**Process:**
-1. For each control/signal pair:
-   - **Reads:** `data` from both control and signal HDF5
-   - Compares lengths: `control.shape[0]` vs `signal.shape[0]`
-   - Returns the shorter one's storename (or signal if equal)
-
-**Output:**
-- List of storenames to use for timestamp correction (one per pair)
-
-**I/O Summary:**
-- **Reads:** `{control_storename}.hdf5` → `data`, `{signal_storename}.hdf5` → `data`
-
-**Note:** This is a pure analysis function but performs I/O to determine which data to use.
-
----
-
-### 5. decide_naming_convention_and_applyCorrection
-**Location:** `timestamp_correction.py:178`
-**Purpose:** Loop through all channels and apply timestamp corrections
-
-```python
-def decide_naming_convention_and_applyCorrection(filepath, timeForLightsTurnOn, event, displayName, storesList)
-```
-
-**Input:**
-- `filepath`: Path to session output folder
-- `timeForLightsTurnOn`: Seconds eliminated from start
-- `event`: Raw storename (e.g., "Dv1A")
-- `displayName`: Semantic name (e.g., "control_DMS")
-- `storesList`: Full storesList array
-
-**Process:**
-1. Filters storesList to control/signal channels
-2. Pairs channels and validates naming conventions
-3. For each pair, calls `applyCorrection(filepath, timeForLightsTurnOn, event, displayName, region)`
-
-**Output:**
-- Delegates to `applyCorrection()` (no direct I/O)
-
----
-
-### 6. applyCorrection
-**Location:** `timestamp_correction.py:205`
-**Purpose:** Apply timestamp corrections to data channels or event markers
-
-```python
-def applyCorrection(filepath, timeForLightsTurnOn, event, displayName, naming)
-```
-
-**Input:**
-- `filepath`: Path to session output folder
-- `timeForLightsTurnOn`: Seconds eliminated from start
-- `event`: Raw storename
-- `displayName`: Semantic display name
-- `naming`: Region identifier (e.g., "dms")
-
-**Process:**
-
-**For Control/Signal Channels:**
-1. **Reads:** `timeCorrection_{naming}.hdf5` → `correctionIndex`
-2. **Reads:** `{event}.hdf5` → `data`
-3. **Applies:** `arr = arr[correctionIndex]` (crops data)
-4. **Writes:** `{displayName}.hdf5` → `data` (overwrites with corrected data)
-
-**For Event Channels:**
-1. Detects TDT format: `check_TDT(os.path.dirname(filepath))`
-2. **Reads:** `timeCorrection_{naming}.hdf5` → `timeRecStart` (if TDT)
-3. **Reads:** `{event}.hdf5` → `timestamps`
-4. **Applies corrections:**
-   - If TDT and timestamps >= timeRecStart: subtract both `timeRecStart` and `timeForLightsTurnOn`
-   - Otherwise: subtract only `timeForLightsTurnOn`
-5. **Writes:** `{event}_{naming}.hdf5` → `ts` (corrected event timestamps)
-
-**Output:**
-- **Files Written:**
-  - `{displayName}.hdf5` → `data` (for control/signal)
-  - `{event}_{naming}.hdf5` → `ts` (for events)
-
-**I/O Summary:**
-- **Reads:** `timeCorrection_{naming}.hdf5`, `{event}.hdf5`
-- **Writes:** `{displayName}.hdf5` or `{event}_{naming}.hdf5`
-
----
-
-### 7. create_control_channel
-**Location:** `timestamp_correction.py:247`
-**Purpose:** Generate synthetic control channel using curve fitting (when no isosbestic control exists)
-
-```python
-def create_control_channel(filepath, arr, window=5001)
-```
-
-**Input:**
-- `filepath`: Path to session output folder
-- `arr`: storesList array `[[storenames], [storesList]]`
-- `window`: Savitzky-Golay filter window (default: 5001)
-
-**Process:**
-1. Loops through storesList to find placeholder control channels (`cntrl` in storename)
-2. For each placeholder:
-   - **Reads:** `signal_{region}.hdf5` → `data` (corrected signal)
-   - **Reads:** `timeCorrection_{region}.hdf5` → `timestampNew`, `sampling_rate`
-   - **Calls:** `helper_create_control_channel(signal, timestampNew, window)` from `control_channel.py`
-     - Applies Savitzky-Golay filter
-     - Fits to exponential: `f(x) = a + b * exp(-(1/c) * x)`
-   - **Writes:** `{control_name}.hdf5` → `data` (synthetic control)
-   - **Writes:** `{event_name}.csv` with columns: `timestamps`, `data`, `sampling_rate`
-
-**Output:**
-- **Files Written:**
-  - `control_{region}.hdf5` → `data` (replaces placeholder)
-  - `{raw_name}.csv` (legacy format export)
-
-**I/O Summary:**
-- **Reads:** `signal_{region}.hdf5` → `data`, `timeCorrection_{region}.hdf5` → `timestampNew`, `sampling_rate`
-- **Writes:** `control_{region}.hdf5` → `data`, `{raw_name}.csv`
-
----
-
-## Data Flow Diagram
-
-### High-Level Flow (called from execute_timestamp_correction)
-
-```mermaid
-flowchart TD
-    A[execute_timestamp_correction] --> B[Read storesList.csv]
-    B --> C{isosbestic_control?}
-
-    C -->|False| D[add_control_channel]
-    C -->|True| E{Check format}
-    D --> E
-
-    E -->|TDT| F[timestampCorrection_tdt]
-    E -->|CSV/Doric/NPM| G[timestampCorrection_csv]
-
-    F --> H[Loop: decide_naming_convention_and_applyCorrection]
-    G --> H
-
-    H --> I[For each store: applyCorrection]
-
-    I --> J{isosbestic_control?}
-    J -->|False| K[create_control_channel]
-    J -->|True| L[Done]
-    K --> L
-
-    style A fill:#e1f5ff
-    style L fill:#d4edda
-```
-
-### Detailed Flow: timestampCorrection Functions
-
-```mermaid
-flowchart LR
-    A[Raw HDF5 files] --> B[check_cntrl_sig_length]
-    B --> C[Read control & signal data]
-    C --> D[Return shorter channel name]
-
-    D --> E{Format?}
-    E -->|CSV| F[timestampCorrection_csv]
-    E -->|TDT| G[timestampCorrection_tdt]
-
-    F --> H[Read timestamps from selected channel]
-    G --> I[Read timestamps, npoints, sampling_rate]
-
-    H --> J[correctionIndex = where >= timeForLightsTurnOn]
-    I --> K[Expand block timestamps]
-    K --> J
-
-    J --> L[Write timeCorrection_{region}.hdf5]
-
-    style A fill:#e1f5ff
-    style L fill:#d4edda
-```
-
-### Detailed Flow: applyCorrection
-
-```mermaid
-flowchart TD
-    A[applyCorrection called] --> B{Channel type?}
-
-    B -->|control/signal| C[Read correctionIndex]
-    B -->|event| D[Read event timestamps]
-
-    C --> E[Read raw data]
-    E --> F[data = data correctionIndex]
-    F --> G[Write displayName.hdf5]
-
-    D --> H{TDT format?}
-    H -->|Yes| I[Read timeRecStart]
-    H -->|No| J[ts -= timeForLightsTurnOn]
-
-    I --> K[ts -= timeRecStart]
-    K --> J
-    J --> L[Write event_region.hdf5]
-
-    style A fill:#e1f5ff
-    style G fill:#d4edda
-    style L fill:#d4edda
-```
-
-### Detailed Flow: Control Channel Creation
-
-```mermaid
-flowchart LR
-    A[add_control_channel] --> B[For each signal without control]
-    B --> C[Copy signal.hdf5 to cntrl_i.hdf5]
-    C --> D[Update storesList.csv]
-
-    D --> E[... timestamp correction ...]
-
-    E --> F[create_control_channel]
-    F --> G[For each cntrl_i placeholder]
-    G --> H[Read signal_{region}.hdf5]
-    H --> I[helper_create_control_channel]
-    I --> J[Savitzky-Golay filter]
-    J --> K[Curve fit to exponential]
-    K --> L[Write control_{region}.hdf5]
-    L --> M[Export to CSV]
-
-    style A fill:#fff3cd
-    style M fill:#d4edda
-```
-
-## Execution Order in execute_timestamp_correction
-
-```python
-# preprocess.py:212-247
-for each session in folderNames:
-    for each output_folder in session:
-        # Step 1: Read metadata
-        storesList = np.genfromtxt("storesList.csv")
-
-        # Step 2: Add placeholder controls if needed
-        if isosbestic_control == False:
-            storesList = add_control_channel(filepath, storesList)
-
-        # Step 3: Compute correctionIndex and timestampNew
-        if check_TDT(folderName):
-            timestampCorrection_tdt(filepath, timeForLightsTurnOn, storesList)
-        else:
-            timestampCorrection_csv(filepath, timeForLightsTurnOn, storesList)
-
-        # Step 4: Apply corrections to all channels/events
-        for each store in storesList:
-            decide_naming_convention_and_applyCorrection(
-                filepath, timeForLightsTurnOn, storename, displayName, storesList
-            )
-            # ^ This calls applyCorrection for each channel
-
-        # Step 5: Generate synthetic controls via curve fitting
-        if isosbestic_control == False:
-            create_control_channel(filepath, storesList, window=101)
-```
-
-## File I/O Summary
-
-### Files Read
-
-| Function | Files Read | Keys |
-|----------|-----------|------|
-| `add_control_channel` | `signal_*.hdf5` (for copying) | - |
-| `timestampCorrection_csv` | `{storename}.hdf5` | `timestamps`, `sampling_rate` |
-| `timestampCorrection_tdt` | `{storename}.hdf5` | `timestamps`, `npoints`, `sampling_rate` |
-| `check_cntrl_sig_length` | `control_*.hdf5`, `signal_*.hdf5` | `data` |
-| `applyCorrection` | `timeCorrection_{region}.hdf5`<br>`{event}.hdf5` | `correctionIndex`, `timeRecStart` (TDT)<br>`data` or `timestamps` |
-| `create_control_channel` | `signal_{region}.hdf5`<br>`timeCorrection_{region}.hdf5` | `data`<br>`timestampNew`, `sampling_rate` |
-
-### Files Written
-
-| Function | Files Written | Keys | Notes |
-|----------|--------------|------|-------|
-| `add_control_channel` | `storesList.csv`<br>`cntrl{i}.hdf5` | -<br>(copy of signal) | Placeholder files |
-| `timestampCorrection_csv` | `timeCorrection_{region}.hdf5` | `timestampNew`, `correctionIndex`, `sampling_rate` | One per region |
-| `timestampCorrection_tdt` | `timeCorrection_{region}.hdf5` | `timeRecStart`, `timestampNew`, `correctionIndex`, `sampling_rate` | TDT-specific |
-| `applyCorrection` | `{displayName}.hdf5`<br>`{event}_{region}.hdf5` | `data`<br>`ts` | Overwrites with corrected data |
-| `create_control_channel` | `control_{region}.hdf5`<br>`{raw_name}.csv` | `data`<br>timestamps, data, sampling_rate | Replaces placeholder |
-
-## Key Transformations
-
-### 1. Timestamp Expansion (TDT only)
-
-**Input:** Block timestamps (one per acquisition block)
-**Algorithm:**
-```python
-timeRecStart = timestamp[0]
-timestamps = timestamp - timeRecStart  # Zero-base
-adder = np.arange(npoints) / sampling_rate  # Within-block offsets [0, 1/fs, 2/fs, ...]
-# Matrix multiplication to expand:
-timestampNew = zeros((n_blocks, npoints))
-for i in range(npoints):
-    timestampNew[:, i] = timestamps + adder[i]
-timestampNew = timestampNew.T.reshape(-1, order='F')  # Column-major flatten
-```
-**Output:** Continuous timestamps at full sampling rate
-
-### 2. Correction Index Computation
-
-**Input:** Timestamps array, `timeForLightsTurnOn`
-**Algorithm:**
-```python
-correctionIndex = np.where(timestamp >= timeForLightsTurnOn)[0]
-```
-**Output:** Indices of timestamps to keep (after eliminating first N seconds)
-
-### 3. Data Cropping
-
-**Applied to:** Control/signal data channels
-**Algorithm:**
-```python
-data_corrected = data[correctionIndex]
-```
-
-### 4. Event Timestamp Adjustment
-
-**Applied to:** Event markers (TTL pulses)
-**Algorithm:**
-```python
-# CSV format:
-ts_corrected = ts - timeForLightsTurnOn
-
-# TDT format (if ts >= timeRecStart):
-ts_corrected = ts - timeRecStart - timeForLightsTurnOn
-```
-
-### 5. Synthetic Control Generation
-
-**Input:** Signal channel (already corrected)
-**Algorithm:**
-1. Apply Savitzky-Golay filter: `filtered_signal = savgol_filter(signal, window, polyorder=3)`
-2. Curve fit to exponential: `control = a + b * exp(-(1/c) * t)`
-3. Return fitted curve as synthetic control
-
-## Analysis for I/O Separation
-
-### Pure Analysis Functions (Minimal I/O)
-These could be extracted with I/O injected:
-- ❌ None - all functions perform substantial I/O
-
-### Orchestration Functions (Heavy I/O, Light Analysis)
-These coordinate reading/writing and delegate computation:
-- `add_control_channel` - File copying and CSV writing
-- `decide_naming_convention_and_applyCorrection` - Loops and delegates
-- `create_control_channel` - Orchestrates read → process → write
-
-### Mixed Functions (I/O + Analysis)
-These perform both I/O and computation inline:
-- `timestampCorrection_csv` - Reads data, computes correctionIndex, writes results
-- `timestampCorrection_tdt` - Reads data, expands timestamps, computes correctionIndex, writes
-- `applyCorrection` - Reads multiple files, applies transformations, writes
-- `check_cntrl_sig_length` - Reads data just to compare lengths
-
-## Refactoring Recommendations for I/O Separation
-
-### Option 1: Extract Pure Computation Functions
-
-Create new pure functions:
-```python
-# Pure analysis (no I/O)
-def compute_correction_index(timestamps, timeForLightsTurnOn):
-    return np.where(timestamps >= timeForLightsTurnOn)[0]
-
-def expand_tdt_timestamps(block_timestamps, npoints, sampling_rate):
-    # TDT expansion algorithm
-    ...
-    return expanded_timestamps
-
-def crop_data_by_index(data, correctionIndex):
-    return data[correctionIndex]
-
-def adjust_event_timestamps(ts, timeRecStart, timeForLightsTurnOn, is_tdt):
-    # Event adjustment logic
-    ...
-    return adjusted_ts
-```
-
-Then modify existing functions to use these pure functions, keeping I/O separate.
-
-### Option 2: Reader/Writer Pattern
-
-Create dedicated I/O classes:
-```python
-class TimestampCorrectionReader:
-    def read_raw_timestamps(self, filepath, storename):
-        ...
-
-    def read_correction_data(self, filepath, region):
-        ...
-
-class TimestampCorrectionWriter:
-    def write_correction_file(self, filepath, region, data):
-        ...
-
-    def write_corrected_data(self, filepath, displayName, data):
-        ...
-```
-
-### Option 3: Data Class Pattern
-
-Return data objects instead of writing directly:
-```python
-@dataclass
-class TimestampCorrection:
-    timestampNew: np.ndarray
-    correctionIndex: np.ndarray
-    sampling_rate: float
-    timeRecStart: Optional[float] = None  # TDT only
-
-def timestampCorrection_tdt(...) -> TimestampCorrection:
-    # Compute all values
-    return TimestampCorrection(
-        timestampNew=...,
-        correctionIndex=...,
-        sampling_rate=...,
-        timeRecStart=...
-    )
-
-# Separate writer function
-def write_timestamp_correction(filepath, region, correction: TimestampCorrection):
-    write_hdf5(correction.timestampNew, f"timeCorrection_{region}", filepath, "timestampNew")
-    # ... etc
-```
-
-## Current I/O Patterns to Refactor
-
-1. **Inline writes in computation functions:**
-   - `timestampCorrection_csv` and `timestampCorrection_tdt` compute AND write
-   - Should separate: compute → return data → write in caller
-
-2. **Reading for validation only:**
-   - `check_cntrl_sig_length` reads full data arrays just to compare shapes
-   - Could be optimized to read only array metadata/shapes
-
-3. **Side-effect file creation:**
-   - `add_control_channel` creates files as side effect
-   - `create_control_channel` both generates data AND writes multiple formats (HDF5 + CSV)
-
-4. **Mixed responsibilities in applyCorrection:**
-   - Handles both control/signal cropping AND event timestamp adjustment
-   - Could be split into two separate functions

From 543ddfded2023a43ac9c38601321135b76d495b3 Mon Sep 17 00:00:00 2001
From: pauladkisson <paul.wesley.adkisson@gmail.com>
Date: Thu, 5 Feb 2026 15:00:52 -0800
Subject: [PATCH 150/150] propagate iloc fix

---
 src/guppy/analysis/transients_average.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/guppy/analysis/transients_average.py b/src/guppy/analysis/transients_average.py
index 3b8dd79..9e6d372 100644
--- a/src/guppy/analysis/transients_average.py
+++ b/src/guppy/analysis/transients_average.py
@@ -66,7 +66,7 @@ def averageForGroup(folderNames, inputParameters):
                 continue
             else:
                 df = read_freq_and_amp_from_hdf5(temp_path[j][0], temp_path[j][1])
-                arr.append(np.array([df["freq (events/min)"][0], df["amplitude"][0]]))
+                arr.append(np.array([df["freq (events/min)"].iloc[0], df["amplitude"].iloc[0]]))
                 fileName.append(os.path.basename(temp_path[j][0]))
 
         arr = np.asarray(arr)