From cfcd20c6a6c5091ebbf6303ba9f65d44fcac338a Mon Sep 17 00:00:00 2001 From: mtorabi59 Date: Mon, 29 Apr 2024 21:56:37 -0400 Subject: [PATCH 01/15] add FileNotFoundError to load_TS --- pydfc/data_loader.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/pydfc/data_loader.py b/pydfc/data_loader.py index 84e7713..e14b2fc 100644 --- a/pydfc/data_loader.py +++ b/pydfc/data_loader.py @@ -363,9 +363,15 @@ def load_TS( if "{run}" in file_name: assert run is not None, "run must be provided" TS_file = TS_file.replace("{run}", run) - time_series = np.load( - f"{data_root}/{subj_fldr}/{TS_file}", allow_pickle="True" - ).item() + + try: + time_series = np.load( + f"{data_root}/{subj_fldr}/{TS_file}", allow_pickle="True" + ).item() + except FileNotFoundError: + print(f"File {TS_file} not found for {subj}") + continue + if TS[session] is None: TS[session] = time_series else: From a63fc97253a61763a0d41d1ce695040b05c2f4a0 Mon Sep 17 00:00:00 2001 From: mtorabi59 Date: Fri, 17 May 2024 22:58:17 -0400 Subject: [PATCH 02/15] change subj_lvl_dFC_assess --- pydfc/multi_analysis.py | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/pydfc/multi_analysis.py b/pydfc/multi_analysis.py index 3a28a6b..437aa2c 100644 --- a/pydfc/multi_analysis.py +++ b/pydfc/multi_analysis.py @@ -231,30 +231,34 @@ def group_dFC_assess(self, time_series_dict): return OUT - def subj_lvl_dFC_assess(self, time_series_dict): + def subj_lvl_dFC_assess(self, time_series): + """ + time_series can be a dict of time_series or a single time_series + if it is a dict, the time_series with key `measure.params["session"]` + will be used + """ - # time_series_dict is a dict of time_series + if isinstance(time_series, dict): + if not measure.params["session"] in time_series: + raise ValueError( + f"session {measure.params['session']} is not in time_series" + ) + else: + time_series = time_series[measure.params["session"]] dFC_dict = {} - # dFC_corr_assess_dict = {} if self.params["n_jobs"] is None: dFC_lst = list() for measure in self.MEASURES_fit_lst_: - dFC_lst.append( - measure.estimate_dFC( - time_series=time_series_dict[measure.params["session"]] - ) - ) + dFC_lst.append(measure.estimate_dFC(time_series=time_series)) else: dFC_lst = Parallel( n_jobs=self.params["n_jobs"], verbose=self.params["verbose"], backend=self.params["backend"], )( - delayed(measure.estimate_dFC)( - time_series=time_series_dict[measure.params["session"]] - ) + delayed(measure.estimate_dFC)(time_series=time_series) for measure in self.MEASURES_fit_lst_ ) From 7323961b8b01bd429ea9e7b4533029f5d76225f1 Mon Sep 17 00:00:00 2001 From: mtorabi59 Date: Sun, 9 Jun 2024 22:05:29 -0400 Subject: [PATCH 03/15] minor fix in TS visualize --- pydfc/time_series.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pydfc/time_series.py b/pydfc/time_series.py index 0d0e3ac..add9b71 100644 --- a/pydfc/time_series.py +++ b/pydfc/time_series.py @@ -424,7 +424,7 @@ def visualize( interval = list(range(start, end)) if nodes_lst is None: - nodes_lst = self.nodes_lst + nodes_lst = self.nodes_lst[:, np.newaxis] else: nodes_lst = np.array(nodes_lst)[:, np.newaxis] From 223a0c497ec713ddd29ce7a16396653a3dc21fcb Mon Sep 17 00:00:00 2001 From: mtorabi59 Date: Thu, 20 Jun 2024 20:19:29 -0400 Subject: [PATCH 04/15] minor change in dFC class --- pydfc/dfc.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pydfc/dfc.py b/pydfc/dfc.py index b5ba597..ee49cf0 100644 --- a/pydfc/dfc.py +++ b/pydfc/dfc.py @@ -287,7 +287,7 @@ def visualize_dFC( threshold=0.0, fix_lim=False, save_image=False, - fig_name=None, + output_root=None, ): assert not self.measure is None, "Measure is not provided." @@ -321,5 +321,5 @@ def visualize_dFC( cmap=cmap, center_0=center_0, save_image=save_image, - output_root=fig_name, + output_root=output_root, ) From 42dfe8f9f52814dc589ad20ae166098b3e182a76 Mon Sep 17 00:00:00 2001 From: mtorabi59 Date: Wed, 17 Jul 2024 10:00:59 -0400 Subject: [PATCH 05/15] fix time array handling in TS --- pydfc/time_series.py | 112 +++++++++++++++++++++++-------------------- 1 file changed, 59 insertions(+), 53 deletions(-) diff --git a/pydfc/time_series.py b/pydfc/time_series.py index add9b71..7eab737 100644 --- a/pydfc/time_series.py +++ b/pydfc/time_series.py @@ -6,11 +6,11 @@ """ import os +import warnings from copy import deepcopy import matplotlib.pyplot as plt import numpy as np -from requests import session from scipy import signal from .dfc_utils import print_dict @@ -49,9 +49,8 @@ def __init__( """ subj_id is an id to identify the subjects all properties are applied to every subject separately - for instance interval applies to TS of each subj separately - time_array of all subjects must be equal + It is recommended that the time_array of all subjects is equal """ assert ( @@ -76,22 +75,17 @@ def __init__( self.TS_name_ = TS_name self.session_name_ = session_name self.n_regions_ = data.shape[0] - self.n_time_ = data.shape[1] - - # assert self.n_regions_ < self.n_time_, \ - # "Probably you have to transpose the time_series." if time_array is None: - self.time_array_ = 1 / self.Fs_ + np.arange( + self.data_dict_[subj_id]["time_array"] = 1 / self.Fs_ + np.arange( 0, data.shape[1] / self.Fs_, 1 / self.Fs_ ) else: - self.time_array_ = time_array + self.data_dict_[subj_id]["time_array"] = time_array self.locs_ = locs self.node_labels_ = node_labels - self.interval_ = np.arange(0, self.n_time_, dtype=int) self.nodes_selection_ = list(range(self.n_regions_)) @property @@ -110,7 +104,6 @@ def info_dict(self): info_dict["node_labels"] = self.node_labels info_dict["nodes_locs"] = self.locs info_dict["subj_id_lst"] = self.subj_id_lst - info_dict["interval"] = self.interval info_dict["time"] = self.time return info_dict @@ -146,11 +139,6 @@ def nodes_lst(self): # output shape is (n_region,) return np.array(self.nodes_selection_) - @property - def interval(self): - # output shape is (n_time,) - return self.interval_ - @property def locs(self): """ @@ -174,7 +162,11 @@ def Fs(self): @property def n_time(self): - return len(self.time) + # if there are multiple subjects, the time_array is ambiguous, so None is returned + if len(self.subj_id_lst) > 1: + return None + else: + return len(self.time) @property def n_regions(self): @@ -182,7 +174,11 @@ def n_regions(self): @property def time(self): - return self.time_array_[self.interval] + # if there are multiple subjects, the time_array is ambiguous, so None is returned + if len(self.subj_id_lst) > 1: + return None + else: + return self.data_dict[self.subj_id_lst[0]]["time_array"] @property def TS_name(self): @@ -231,7 +227,11 @@ def append_ts(self, new_time_series, time_array=None, subj_id=None): self.data_dict_[subj_id] = {} if not time_array is None: - assert self.time_array_ == time_array, "time array mismatch!" + self.data_dict_[subj_id]["time_array"] = time_array + else: + self.data_dict_[subj_id]["time_array"] = 1 / self.Fs_ + np.arange( + 0, new_time_series.shape[1] / self.Fs_, 1 / self.Fs_ + ) self.data_dict_[subj_id]["data"] = new_time_series @@ -241,12 +241,14 @@ def concat_ts(self, new_TS): """ concatenate another Time Series obj to the current one. + It assumes that the new TS has only one subject """ assert self.Fs == new_TS.Fs, "Fs mismatch!" assert len(self.node_labels) == len(new_TS.node_labels), "node_labels mismatch!" for i in range(len(self.node_labels)): assert self.node_labels[i] == new_TS.node_labels[i], "node_labels mismatch!" assert np.all(self.locs[i, :] == new_TS.locs[i, :]), "locs mismatch!" + assert len(new_TS.subj_id_lst) == 1, "new_TS must have only one subject." self.append_ts(new_time_series=new_TS.data, subj_id=new_TS.subj_id_lst[0]) @@ -255,34 +257,36 @@ def truncate(self, start_time=None, end_time=None, start_point=None, end_point=N # truncates TS of every subj separately # based on either time or samples # if all None -> whole time_series - # check if not out of total interval - start = 0 - end = self.n_time + for subj_id in self.data_dict: - if not start_point is None: - start = start_point + time_array = self.data_dict[subj_id]["time_array"] + n_time = self.data_dict[subj_id]["data"].shape[1] + assert n_time == len(time_array), "time_array and data mismatch." - if not end_point is None: - end = end_point + 1 + start = 0 + end = n_time - if not start_time is None: - start = np.argwhere(self.time_array_ >= start_time)[0, 0] + if not start_point is None: + start = start_point - if not end_time is None: - end = np.argwhere(self.time_array_ <= end_time)[-1, 0] + 1 + if not end_point is None: + end = end_point + 1 - if start > self.interval_[0] or end < self.interval_[-1]: - # make sure the interval is not out of range - start = max(start, 0) - end = min(end, self.n_time) + if not start_time is None: + start = np.argwhere(time_array >= start_time)[0, 0] + + if not end_time is None: + end = np.argwhere(time_array <= end_time)[-1, 0] + 1 - self.interval_ = np.arange(start, end, dtype=int) + start = max(start, 0) + end = min(end, n_time) + assert start < end, "start must be less than end." - for subj in self.data_dict_: - self.data_dict_[subj]["data"] = self.data_dict_[subj]["data"][ - :, self.interval - ] + self.data_dict_[subj_id]["data"] = self.data_dict[subj_id]["data"][ + :, start:end + ] + self.data_dict_[subj_id]["time_array"] = time_array[start:end] self.data_ = None @@ -336,21 +340,16 @@ def Fs_resample(self, Fs_ratio=None): new_time_series[n, :], int(new_time_series.shape[1] * Fs_ratio) ) self.data_dict_[subj_id]["data"] = downsampled_time_series - self.Fs_ratio_ = Fs_ratio - - start_time = self.time_array_[self.interval_[0]] - end_time = self.time_array_[self.interval_[-1]] - _, self.time_array_ = signal.resample( - new_time_series[n, :], - int(new_time_series.shape[1] * Fs_ratio), - t=self.time_array_, - ) - - start = np.argwhere(self.time_array_ >= start_time)[0, 0] - end = np.argwhere(self.time_array_ <= end_time)[-1, 0] + 1 - self.interval_ = np.arange(start, end, dtype=int) + # find the new time_array after resampling + _, new_time_array = signal.resample( + new_time_series[n, :], + int(new_time_series.shape[1] * Fs_ratio), + t=self.data_dict[subj_id]["time_array"], + ) + self.data_dict_[subj_id]["time_array"] = new_time_array + self.Fs_ratio_ = Fs_ratio self.data_ = None def add_noise(self, noise_ratio, mean_noise=0): @@ -373,7 +372,7 @@ def add_noise(self, noise_ratio, mean_noise=0): def select_subjs(self, num_subj): # selects the first num_subj subjects in self.subj_id_lst - SUBJECTS = [subj_id for subj_id in self.data_dict_] + SUBJECTS = self.subj_id_lst if num_subj < len(SUBJECTS): for subj_id in SUBJECTS: if not subj_id in SUBJECTS[:num_subj]: @@ -412,6 +411,13 @@ def visualize( nodes_lst is a list of indices """ + if len(self.subj_id_lst) > 1: + # raise a user warning + warnings.warn( + "Multiple subjects are not supported in visualization.", UserWarning + ) + return + start = 0 end = self.n_time From bfb675b356008d17bd0f0acc6477f9e803eed7bc Mon Sep 17 00:00:00 2001 From: mtorabi59 Date: Wed, 17 Jul 2024 10:27:29 -0400 Subject: [PATCH 06/15] add test_time_series --- tests/test_time_series.py | 256 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 256 insertions(+) create mode 100644 tests/test_time_series.py diff --git a/tests/test_time_series.py b/tests/test_time_series.py new file mode 100644 index 0000000..0c8af67 --- /dev/null +++ b/tests/test_time_series.py @@ -0,0 +1,256 @@ +import numpy as np +import pytest + +from pydfc import TIME_SERIES + + +def test_create_time_series(): + # create a random data with 100 regions and 1000 time points + data = np.random.rand(100, 1000) + # create a random locs with 100 regions and 3 coordinates + locs = np.random.rand(100, 3) + # create a random node_labels list with 100 regions + node_labels = [f"Region {i}" for i in range(100)] + time_series = TIME_SERIES( + data=data, + subj_id="sub-0001", + Fs=2, + locs=locs, + node_labels=node_labels, + ) + + assert time_series.data.shape == (100, 1000) + assert time_series.subj_id_lst == ["sub-0001"] + assert time_series.Fs == 2 + assert time_series.n_time == 1000 + assert time_series.n_regions == 100 + assert np.all(time_series.nodes_lst == np.arange(0, time_series.n_regions, dtype=int)) + assert np.all(time_series.time == 1 / 2 + np.arange(0, 1000 / 2, 1 / 2)) + assert time_series.data_dict.keys() == {"sub-0001"} + assert np.all(time_series.data_dict["sub-0001"]["data"] == data) + + +def test_append_ts(): + # create a random data with 100 regions and 1000 time points + data_1 = np.random.rand(100, 1000) + # create a random locs with 100 regions and 3 coordinates + locs = np.random.rand(100, 3) + # create a random node_labels list with 100 regions + node_labels = [f"Region {i}" for i in range(100)] + time_series = TIME_SERIES( + data=data_1, + subj_id="sub-0001", + Fs=2, + locs=locs, + node_labels=node_labels, + ) + + # create a random data with 100 regions and 1000 time points + data_2 = np.random.rand(100, 1000) + time_series.append_ts( + new_time_series=data_2, + subj_id="sub-0002", + ) + + assert time_series.data.shape == (100, 2000) + assert time_series.subj_id_lst == ["sub-0001", "sub-0002"] + assert time_series.Fs == 2 + assert time_series.n_time is None + assert time_series.n_regions == 100 + assert np.all(time_series.nodes_lst == np.arange(0, time_series.n_regions, dtype=int)) + assert time_series.time is None + assert time_series.data_dict.keys() == {"sub-0001", "sub-0002"} + assert np.all(time_series.data_dict["sub-0001"]["data"] == data_1) + assert np.all(time_series.data_dict["sub-0002"]["data"] == data_2) + assert np.all( + time_series.data_dict["sub-0001"]["time_array"] + == 1 / 2 + np.arange(0, 1000 / 2, 1 / 2) + ) + assert np.all( + time_series.data_dict["sub-0002"]["time_array"] + == 1 / 2 + np.arange(0, 1000 / 2, 1 / 2) + ) + + # check if visualization will raise warning correctly + with pytest.warns( + UserWarning, match="Multiple subjects are not supported in visualization." + ): + time_series.visualize() + + +def test_concat_ts(): + # create a random data with 100 regions and 1000 time points + data_1 = np.random.rand(100, 1000) + # create a random locs with 100 regions and 3 coordinates + locs = np.random.rand(100, 3) + # create a random node_labels list with 100 regions + node_labels = [f"Region {i}" for i in range(100)] + time_series = TIME_SERIES( + data=data_1, + subj_id="sub-0001", + Fs=2, + locs=locs, + node_labels=node_labels, + ) + + # create a random data with 100 regions and 1000 time points + data_2 = np.random.rand(100, 1000) + locs_2 = np.random.rand(100, 3) + node_labels_2 = [f"Parcel {i}" for i in range(100)] + + time_series_2 = TIME_SERIES( + data=data_2, + subj_id="sub-0002", + Fs=1, + locs=locs, + node_labels=node_labels, + ) + + # check Fs mismatch assert error + with pytest.raises(AssertionError, match="Fs mismatch!"): + time_series.concat_ts(time_series_2) + + time_series_2 = TIME_SERIES( + data=data_2, + subj_id="sub-0002", + Fs=2, + locs=locs_2, + node_labels=node_labels, + ) + # check locs mismatch assert error + with pytest.raises(AssertionError, match="locs mismatch!"): + time_series.concat_ts(time_series_2) + + time_series_2 = TIME_SERIES( + data=data_2, + subj_id="sub-0002", + Fs=2, + locs=locs, + node_labels=node_labels_2, + ) + # check node_labels mismatch assert error + with pytest.raises(AssertionError, match="node_labels mismatch!"): + time_series.concat_ts(time_series_2) + + time_series_2 = TIME_SERIES( + data=data_2, + subj_id="sub-0002", + Fs=2, + locs=locs, + node_labels=node_labels, + ) + time_series.concat_ts(time_series_2) + + assert time_series.data.shape == (100, 2000) + assert time_series.subj_id_lst == ["sub-0001", "sub-0002"] + assert time_series.Fs == 2 + assert time_series.n_time is None + assert time_series.n_regions == 100 + assert np.all(time_series.nodes_lst == np.arange(0, 100, dtype=int)) + assert time_series.time is None + assert time_series.data_dict.keys() == {"sub-0001", "sub-0002"} + assert np.all(time_series.data_dict["sub-0001"]["data"] == data_1) + assert np.all(time_series.data_dict["sub-0002"]["data"] == data_2) + assert np.all( + time_series.data_dict["sub-0001"]["time_array"] + == 1 / 2 + np.arange(0, 1000 / 2, 1 / 2) + ) + assert np.all( + time_series.data_dict["sub-0002"]["time_array"] + == 1 / 2 + np.arange(0, 1000 / 2, 1 / 2) + ) + + +def test_get_subj_ts(): + # create a random data with 100 regions and 1000 time points + data_1 = np.random.rand(100, 1000) + # create a random locs with 100 regions and 3 coordinates + locs = np.random.rand(100, 3) + # create a random node_labels list with 100 regions + node_labels = [f"Region {i}" for i in range(100)] + time_series = TIME_SERIES( + data=data_1, + subj_id="sub-0001", + Fs=2, + locs=locs, + node_labels=node_labels, + ) + # create a random data with 100 regions and 1000 time points + data_2 = np.random.rand(100, 1000) + time_series_2 = TIME_SERIES( + data=data_2, + subj_id="sub-0002", + Fs=2, + locs=locs, + node_labels=node_labels, + ) + time_series.concat_ts(time_series_2) + + subj_ts = time_series.get_subj_ts(subjs_id="sub-0001") + assert subj_ts.data.shape == (100, 1000) + assert subj_ts.subj_id_lst == ["sub-0001"] + assert subj_ts.Fs == 2 + assert subj_ts.n_time == 1000 + assert subj_ts.n_regions == 100 + assert np.all(subj_ts.nodes_lst == np.arange(0, 100, dtype=int)) + assert np.all(subj_ts.time == 1 / 2 + np.arange(0, 1000 / 2, 1 / 2)) + assert subj_ts.data_dict.keys() == {"sub-0001"} + assert np.all(subj_ts.data_dict["sub-0001"]["data"] == data_1) + + subj_ts = time_series.get_subj_ts(subjs_id="sub-0002") + assert subj_ts.data.shape == (100, 1000) + assert subj_ts.subj_id_lst == ["sub-0002"] + assert subj_ts.Fs == 2 + assert subj_ts.n_time == 1000 + assert subj_ts.n_regions == 100 + assert np.all(subj_ts.nodes_lst == np.arange(0, 100, dtype=int)) + assert np.all(subj_ts.time == 1 / 2 + np.arange(0, 1000 / 2, 1 / 2)) + assert subj_ts.data_dict.keys() == {"sub-0002"} + assert np.all(subj_ts.data_dict["sub-0002"]["data"] == data_2) + + # check that the original time_series is not changed + assert time_series.data.shape == (100, 2000) + assert time_series.subj_id_lst == ["sub-0001", "sub-0002"] + assert time_series.Fs == 2 + assert time_series.n_time is None + assert time_series.n_regions == 100 + assert np.all(time_series.nodes_lst == np.arange(0, 100, dtype=int)) + assert time_series.time is None + assert time_series.data_dict.keys() == {"sub-0001", "sub-0002"} + assert np.all(time_series.data_dict["sub-0001"]["data"] == data_1) + assert np.all(time_series.data_dict["sub-0002"]["data"] == data_2) + assert np.all( + time_series.data_dict["sub-0001"]["time_array"] + == 1 / 2 + np.arange(0, 1000 / 2, 1 / 2) + ) + assert np.all( + time_series.data_dict["sub-0002"]["time_array"] + == 1 / 2 + np.arange(0, 1000 / 2, 1 / 2) + ) + + +def test_select_nodes(): + # create a random data with 100 regions and 1000 time points + data = np.random.rand(100, 1000) + # create a random locs with 100 regions and 3 coordinates + locs = np.random.rand(100, 3) + # create a random node_labels list with 100 regions + node_labels = [f"Region {i}" for i in range(100)] + time_series = TIME_SERIES( + data=data, + subj_id="sub-0001", + Fs=2, + locs=locs, + node_labels=node_labels, + ) + + # select 5 nodes + nodes_idx = np.arange(0, 100, 20, dtype=int) + time_series.select_nodes(nodes_idx=nodes_idx) + assert time_series.data.shape == (5, 1000) + assert np.all(time_series.data == data[nodes_idx, :]) + assert time_series.n_regions == 5 + assert np.all(time_series.nodes_lst == nodes_idx) + assert np.all(time_series.locs == locs[nodes_idx]) + assert np.all(time_series.node_labels == [f"Region {i}" for i in nodes_idx]) + assert np.all(time_series.nodes_selection_ == nodes_idx) From 5ed44c82b8a08e85b8b9096bceba7e7adf64d799 Mon Sep 17 00:00:00 2001 From: mtorabi59 Date: Wed, 17 Jul 2024 10:30:25 -0400 Subject: [PATCH 07/15] minor change --- tests/test_time_series.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/test_time_series.py b/tests/test_time_series.py index 0c8af67..4d7fd37 100644 --- a/tests/test_time_series.py +++ b/tests/test_time_series.py @@ -245,12 +245,15 @@ def test_select_nodes(): ) # select 5 nodes - nodes_idx = np.arange(0, 100, 20, dtype=int) + nodes_idx = np.array([0, 1, 23, 43, 87]) time_series.select_nodes(nodes_idx=nodes_idx) assert time_series.data.shape == (5, 1000) assert np.all(time_series.data == data[nodes_idx, :]) assert time_series.n_regions == 5 assert np.all(time_series.nodes_lst == nodes_idx) assert np.all(time_series.locs == locs[nodes_idx]) - assert np.all(time_series.node_labels == [f"Region {i}" for i in nodes_idx]) + assert np.all( + time_series.node_labels + == ["Region 0", "Region 1", "Region 23", "Region 43", "Region 87"] + ) assert np.all(time_series.nodes_selection_ == nodes_idx) From 2620c37b5f41478b3505ec637582802f3f6269ba Mon Sep 17 00:00:00 2001 From: mtorabi59 Date: Wed, 17 Jul 2024 12:10:29 -0400 Subject: [PATCH 08/15] add test_dfc --- pydfc/dfc.py | 12 ++- tests/test_dfc.py | 215 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 223 insertions(+), 4 deletions(-) create mode 100644 tests/test_dfc.py diff --git a/pydfc/dfc.py b/pydfc/dfc.py index ee49cf0..e02e662 100644 --- a/pydfc/dfc.py +++ b/pydfc/dfc.py @@ -170,7 +170,7 @@ def dFC2dict(self, TRs=None): dFC_mat = self.get_dFC_mat(TRs=TRs) dFC_dict = {} for k, TR in enumerate(TRs): - dFC_dict["TR" + str(TR)] = dFC_mat[k, :, :] + dFC_dict[f"TR{TR}"] = dFC_mat[k, :, :] return dFC_dict # test this @@ -199,7 +199,7 @@ def get_dFC_mat(self, TRs=None, num_samples=None): dFC_mat = list() for TR in TRs: - dFC_mat.append(self.FCSs[self.FCS_idx["TR" + str(TR)]]) + dFC_mat.append(self.FCSs[self.FCS_idx[f"TR{TR}"]]) dFC_mat = np.array(dFC_mat) @@ -231,6 +231,10 @@ def SWed_dFC_mat(self, W=None, n_overlap=None, tapered_window=False): return dFC_mat_new def set_dFC(self, FCSs, FCS_idx=None, TS_info=None, TR_array=None): + """ + FCSs: a 3D numpy array of FC matrices with shape (n_time, n_regions, n_regions) + FCS_idx: a list of indices that correspond to each FC matrix in FCSs over time + """ if len(FCSs.shape) == 2: FCSs = np.expand_dims(FCSs, axis=0) @@ -267,11 +271,11 @@ def set_dFC(self, FCSs, FCS_idx=None, TS_info=None, TR_array=None): # the input FCS_idx is ranged from 0 to len(FCS)-1 but we shift it to 1 to len(FCS) self.FCSs_ = {} for i, FCS in enumerate(FCSs): - self.FCSs_["FCS" + str(i + 1)] = FCS + self.FCSs_[f"FCS{i + 1}"] = FCS self.FCS_idx_ = {} for i, idx in enumerate(FCS_idx): - self.FCS_idx_["TR" + str(TR_array[i])] = "FCS" + str(idx + 1) + self.FCS_idx_[f"TR{TR_array[i]}"] = f"FCS{idx + 1}" # "FCS" + str(idx + 1) self.TS_info_ = TS_info self.n_regions_ = FCSs.shape[1] diff --git a/tests/test_dfc.py b/tests/test_dfc.py new file mode 100644 index 0000000..b3a7761 --- /dev/null +++ b/tests/test_dfc.py @@ -0,0 +1,215 @@ +import numpy as np + +from pydfc import DFC + + +def test_create_dFC_state_based(): + ## STATE BASED ## + # create 5 FCSs with 3 regions representing 5 states + FCSs = np.array( + [ + [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6], [0.7, 0.8, 0.9]], + [[0.2, 0.3, 0.4], [-0.5, -0.6, -0.7], [0.8, 0.9, 0.1]], + [[0.3, 0.4, 0.5], [0.6, 0.7, 0.8], [-0.9, 0.1, -0.2]], + [[0.4, 0.5, 0.6], [-0.7, 0.8, 0.9], [0.1, 0.2, 0.3]], + [[0.5, 0.6, 0.7], [0.8, 0.9, 0.1], [0.2, 0.3, 0.4]], + ] + ) + # create a random FCS_idx with 10 time points and 5 states + FCS_idx = np.array([0, 1, 2, 3, 4, 0, 1, 2, 3, 4]) + + dfc = DFC() + dfc.set_dFC(FCSs=FCSs, FCS_idx=FCS_idx) + + assert dfc.FCSs.keys() == {"FCS1", "FCS2", "FCS3", "FCS4", "FCS5"} + assert np.all( + dfc.FCSs["FCS1"] == np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6], [0.7, 0.8, 0.9]]) + ) + assert np.all( + dfc.FCSs["FCS2"] + == np.array([[0.2, 0.3, 0.4], [-0.5, -0.6, -0.7], [0.8, 0.9, 0.1]]) + ) + assert np.all( + dfc.FCSs["FCS3"] + == np.array([[0.3, 0.4, 0.5], [0.6, 0.7, 0.8], [-0.9, 0.1, -0.2]]) + ) + assert np.all( + dfc.FCSs["FCS4"] == np.array([[0.4, 0.5, 0.6], [-0.7, 0.8, 0.9], [0.1, 0.2, 0.3]]) + ) + assert np.all( + dfc.FCSs["FCS5"] == np.array([[0.5, 0.6, 0.7], [0.8, 0.9, 0.1], [0.2, 0.3, 0.4]]) + ) + + assert dfc.FCS_idx.keys() == { + "TR0", + "TR1", + "TR2", + "TR3", + "TR4", + "TR5", + "TR6", + "TR7", + "TR8", + "TR9", + } + assert dfc.FCS_idx["TR0"] == "FCS1" + assert dfc.FCS_idx["TR1"] == "FCS2" + assert dfc.FCS_idx["TR2"] == "FCS3" + assert dfc.FCS_idx["TR3"] == "FCS4" + assert dfc.FCS_idx["TR4"] == "FCS5" + assert dfc.FCS_idx["TR5"] == "FCS1" + assert dfc.FCS_idx["TR6"] == "FCS2" + assert dfc.FCS_idx["TR7"] == "FCS3" + assert dfc.FCS_idx["TR8"] == "FCS4" + assert dfc.FCS_idx["TR9"] == "FCS5" + + assert dfc.n_regions == 3 + assert dfc.n_time == 10 + assert np.all(dfc.state_TC() == np.array([1, 2, 3, 4, 5, 1, 2, 3, 4, 5])) + + dFC_mat = dfc.get_dFC_mat() + assert dFC_mat.shape == (10, 3, 3) + assert np.all( + dFC_mat[0] == np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6], [0.7, 0.8, 0.9]]) + ) + assert np.all( + dFC_mat[1] == np.array([[0.2, 0.3, 0.4], [-0.5, -0.6, -0.7], [0.8, 0.9, 0.1]]) + ) + assert np.all( + dFC_mat[2] == np.array([[0.3, 0.4, 0.5], [0.6, 0.7, 0.8], [-0.9, 0.1, -0.2]]) + ) + assert np.all( + dFC_mat[3] == np.array([[0.4, 0.5, 0.6], [-0.7, 0.8, 0.9], [0.1, 0.2, 0.3]]) + ) + assert np.all( + dFC_mat[4] == np.array([[0.5, 0.6, 0.7], [0.8, 0.9, 0.1], [0.2, 0.3, 0.4]]) + ) + assert np.all( + dFC_mat[5] == np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6], [0.7, 0.8, 0.9]]) + ) + assert np.all( + dFC_mat[6] == np.array([[0.2, 0.3, 0.4], [-0.5, -0.6, -0.7], [0.8, 0.9, 0.1]]) + ) + assert np.all( + dFC_mat[7] == np.array([[0.3, 0.4, 0.5], [0.6, 0.7, 0.8], [-0.9, 0.1, -0.2]]) + ) + assert np.all( + dFC_mat[8] == np.array([[0.4, 0.5, 0.6], [-0.7, 0.8, 0.9], [0.1, 0.2, 0.3]]) + ) + assert np.all( + dFC_mat[9] == np.array([[0.5, 0.6, 0.7], [0.8, 0.9, 0.1], [0.2, 0.3, 0.4]]) + ) + + dFC_mat = dfc.get_dFC_mat(TRs=[1, 3, 8]) + assert dFC_mat.shape == (3, 3, 3) + assert np.all( + dFC_mat[0] == np.array([[0.2, 0.3, 0.4], [-0.5, -0.6, -0.7], [0.8, 0.9, 0.1]]) + ) + assert np.all( + dFC_mat[1] == np.array([[0.4, 0.5, 0.6], [-0.7, 0.8, 0.9], [0.1, 0.2, 0.3]]) + ) + assert np.all( + dFC_mat[2] == np.array([[0.4, 0.5, 0.6], [-0.7, 0.8, 0.9], [0.1, 0.2, 0.3]]) + ) + + dFC_dict = dfc.dFC2dict(TRs=[1, 3, 8]) + assert dFC_dict.keys() == {"TR1", "TR3", "TR8"} + assert np.all( + dFC_dict["TR1"] + == np.array([[0.2, 0.3, 0.4], [-0.5, -0.6, -0.7], [0.8, 0.9, 0.1]]) + ) + assert np.all( + dFC_dict["TR3"] == np.array([[0.4, 0.5, 0.6], [-0.7, 0.8, 0.9], [0.1, 0.2, 0.3]]) + ) + assert np.all( + dFC_dict["TR8"] == np.array([[0.4, 0.5, 0.6], [-0.7, 0.8, 0.9], [0.1, 0.2, 0.3]]) + ) + + +def test_create_dFC_state_free(): + ## STATE FREE ## + # create 5 FCSs with 3 regions corresponding to 5 time points + FCSs = np.array( + [ + [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6], [0.7, 0.8, 0.9]], + [[0.2, 0.3, 0.4], [-0.5, -0.6, -0.7], [0.8, 0.9, 0.1]], + [[0.3, 0.4, 0.5], [0.6, 0.7, 0.8], [-0.9, 0.1, -0.2]], + [[0.4, 0.5, 0.6], [-0.7, 0.8, 0.9], [0.1, 0.2, 0.3]], + [[0.5, 0.6, 0.7], [0.8, 0.9, 0.1], [0.2, 0.3, 0.4]], + ] + ) + + dfc = DFC() + dfc.set_dFC(FCSs=FCSs) + + assert dfc.FCSs.keys() == {"FCS1", "FCS2", "FCS3", "FCS4", "FCS5"} + assert np.all( + dfc.FCSs["FCS1"] == np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6], [0.7, 0.8, 0.9]]) + ) + assert np.all( + dfc.FCSs["FCS2"] + == np.array([[0.2, 0.3, 0.4], [-0.5, -0.6, -0.7], [0.8, 0.9, 0.1]]) + ) + assert np.all( + dfc.FCSs["FCS3"] + == np.array([[0.3, 0.4, 0.5], [0.6, 0.7, 0.8], [-0.9, 0.1, -0.2]]) + ) + assert np.all( + dfc.FCSs["FCS4"] == np.array([[0.4, 0.5, 0.6], [-0.7, 0.8, 0.9], [0.1, 0.2, 0.3]]) + ) + assert np.all( + dfc.FCSs["FCS5"] == np.array([[0.5, 0.6, 0.7], [0.8, 0.9, 0.1], [0.2, 0.3, 0.4]]) + ) + + assert dfc.FCS_idx.keys() == {"TR0", "TR1", "TR2", "TR3", "TR4"} + assert dfc.FCS_idx["TR0"] == "FCS1" + assert dfc.FCS_idx["TR1"] == "FCS2" + assert dfc.FCS_idx["TR2"] == "FCS3" + assert dfc.FCS_idx["TR3"] == "FCS4" + assert dfc.FCS_idx["TR4"] == "FCS5" + + assert dfc.n_regions == 3 + assert dfc.n_time == 5 + + dFC_mat = dfc.get_dFC_mat() + assert dFC_mat.shape == (5, 3, 3) + assert np.all( + dFC_mat[0] == np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6], [0.7, 0.8, 0.9]]) + ) + assert np.all( + dFC_mat[1] == np.array([[0.2, 0.3, 0.4], [-0.5, -0.6, -0.7], [0.8, 0.9, 0.1]]) + ) + assert np.all( + dFC_mat[2] == np.array([[0.3, 0.4, 0.5], [0.6, 0.7, 0.8], [-0.9, 0.1, -0.2]]) + ) + assert np.all( + dFC_mat[3] == np.array([[0.4, 0.5, 0.6], [-0.7, 0.8, 0.9], [0.1, 0.2, 0.3]]) + ) + assert np.all( + dFC_mat[4] == np.array([[0.5, 0.6, 0.7], [0.8, 0.9, 0.1], [0.2, 0.3, 0.4]]) + ) + + dFC_mat = dfc.get_dFC_mat(TRs=[1, 3, 4]) + assert dFC_mat.shape == (3, 3, 3) + assert np.all( + dFC_mat[0] == np.array([[0.2, 0.3, 0.4], [-0.5, -0.6, -0.7], [0.8, 0.9, 0.1]]) + ) + assert np.all( + dFC_mat[1] == np.array([[0.4, 0.5, 0.6], [-0.7, 0.8, 0.9], [0.1, 0.2, 0.3]]) + ) + assert np.all( + dFC_mat[2] == np.array([[0.5, 0.6, 0.7], [0.8, 0.9, 0.1], [0.2, 0.3, 0.4]]) + ) + + dFC_dict = dfc.dFC2dict(TRs=[1, 3, 4]) + assert dFC_dict.keys() == {"TR1", "TR3", "TR4"} + assert np.all( + dFC_dict["TR1"] + == np.array([[0.2, 0.3, 0.4], [-0.5, -0.6, -0.7], [0.8, 0.9, 0.1]]) + ) + assert np.all( + dFC_dict["TR3"] == np.array([[0.4, 0.5, 0.6], [-0.7, 0.8, 0.9], [0.1, 0.2, 0.3]]) + ) + assert np.all( + dFC_dict["TR4"] == np.array([[0.5, 0.6, 0.7], [0.8, 0.9, 0.1], [0.2, 0.3, 0.4]]) + ) From badef6100463cd1f86894202323407f0a465da2c Mon Sep 17 00:00:00 2001 From: mtorabi59 Date: Tue, 24 Sep 2024 13:34:54 -0400 Subject: [PATCH 09/15] kmeans dtype issue --- pydfc/dfc_methods/cap.py | 4 ++-- pydfc/dfc_methods/sliding_window_clustr.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pydfc/dfc_methods/cap.py b/pydfc/dfc_methods/cap.py index 408e6d4..210f0a9 100644 --- a/pydfc/dfc_methods/cap.py +++ b/pydfc/dfc_methods/cap.py @@ -122,7 +122,7 @@ def estimate_FCS(self, time_series): act_vecs=act_center_1st_level, n_clusters=self.params["n_states"] ) self.FCS_ = self.act_vec2FCS(group_act_centroids) - self.Z = self.kmeans_.predict(time_series.data.T) + self.Z = self.kmeans_.predict(time_series.data.T.astype(np.float32)) # mean activation of states self.set_mean_activity(time_series) @@ -149,7 +149,7 @@ def estimate_dFC(self, time_series): act_vecs = time_series.data.T - Z = self.kmeans_.predict(act_vecs) + Z = self.kmeans_.predict(act_vecs.astype(np.float32)) # record time self.set_dFC_assess_time(time.time() - tic) diff --git a/pydfc/dfc_methods/sliding_window_clustr.py b/pydfc/dfc_methods/sliding_window_clustr.py index 6fd31c4..34036fd 100644 --- a/pydfc/dfc_methods/sliding_window_clustr.py +++ b/pydfc/dfc_methods/sliding_window_clustr.py @@ -228,7 +228,7 @@ def estimate_FCS(self, time_series): n_clusters=self.params["n_states"], n_regions=dFC_raw.n_regions, ) - self.Z = self.kmeans_.predict(self.dFC_mat2vec(SW_dFC)) + self.Z = self.kmeans_.predict(self.dFC_mat2vec(SW_dFC).astype(np.float32)) # mean activation of states self.set_mean_activity(time_series) @@ -270,7 +270,7 @@ def estimate_dFC(self, time_series): # Z = self.clusters_lst2idx(self.kmeans_.get_clusters()) else: ########### Euclidean Clustering ############## - Z = self.kmeans_.predict(F) + Z = self.kmeans_.predict(F.astype(np.float32)) # record time self.set_dFC_assess_time(time.time() - tic) From 92c300eeafbb055dad59dcf36378564aa754d293 Mon Sep 17 00:00:00 2001 From: mtorabi59 Date: Mon, 7 Oct 2024 15:05:13 -0400 Subject: [PATCH 10/15] solve the kmeans Buffer dtype mismatch issue --- pydfc/dfc_methods/cap.py | 1 + pydfc/dfc_methods/sliding_window_clustr.py | 1 + 2 files changed, 2 insertions(+) diff --git a/pydfc/dfc_methods/cap.py b/pydfc/dfc_methods/cap.py index 210f0a9..0ee51bf 100644 --- a/pydfc/dfc_methods/cap.py +++ b/pydfc/dfc_methods/cap.py @@ -78,6 +78,7 @@ def act_vec2FCS(self, act_vecs): def cluster_act_vec(self, act_vecs, n_clusters): kmeans_ = KMeans(n_clusters=n_clusters, n_init=500).fit(act_vecs) + kmeans_.cluster_centers_ = kmeans_.cluster_centers_.astype(np.float32) act_centroids = kmeans_.cluster_centers_ return act_centroids, kmeans_ diff --git a/pydfc/dfc_methods/sliding_window_clustr.py b/pydfc/dfc_methods/sliding_window_clustr.py index 34036fd..c19e883 100644 --- a/pydfc/dfc_methods/sliding_window_clustr.py +++ b/pydfc/dfc_methods/sliding_window_clustr.py @@ -156,6 +156,7 @@ def cluster_FC(self, FCS_raw, n_clusters, n_regions): else: ########### Euclidean Clustering ############## kmeans_ = KMeans(n_clusters=n_clusters, n_init=500).fit(F) + kmeans_.cluster_centers_ = kmeans_.cluster_centers_.astype(np.float32) F_cent = kmeans_.cluster_centers_ FCS_ = self.dFC_vec2mat(F_cent, N=n_regions) From 18ae4ac407f09cbf46cfc89d0eb6fc6ec9303dd4 Mon Sep 17 00:00:00 2001 From: mtorabi59 Date: Tue, 3 Dec 2024 13:57:27 -0500 Subject: [PATCH 11/15] refactor multi_analysis --- pydfc/data_loader.py | 2 +- pydfc/multi_analysis_utils.py | 205 ++++++++++++++++++++++++++++++++++ 2 files changed, 206 insertions(+), 1 deletion(-) create mode 100644 pydfc/multi_analysis_utils.py diff --git a/pydfc/data_loader.py b/pydfc/data_loader.py index e14b2fc..21584eb 100644 --- a/pydfc/data_loader.py +++ b/pydfc/data_loader.py @@ -1,5 +1,5 @@ """ -Implementation of dFC methods. +Implementation of functions for leading fmri data. Created on Jun 29 2023 @author: Mohammad Torabi diff --git a/pydfc/multi_analysis_utils.py b/pydfc/multi_analysis_utils.py new file mode 100644 index 0000000..2c6b0a4 --- /dev/null +++ b/pydfc/multi_analysis_utils.py @@ -0,0 +1,205 @@ +""" +Implementation of functions for Multi Analysis of dFC. + +Created on Dec 3 2024 +@author: Mohammad Torabi +""" + +from copy import deepcopy + +from joblib import Parallel, delayed + +from .dfc_methods import * + +################################# DATA_LOADER functions ###################################### + + +def create_measure_obj(MEASURES_name_lst, **params): + + MEASURES_lst = list() + for MEASURES_name in MEASURES_name_lst: + + ###### CAP ###### + if MEASURES_name == "CAP": + measure = CAP(**params) + + ###### CONTINUOUS HMM ###### + if MEASURES_name == "ContinuousHMM": + measure = HMM_CONT(**params) + + ###### WINDOW_LESS ###### + if MEASURES_name == "Windowless": + measure = WINDOWLESS(**params) + + ###### SLIDING WINDOW ###### + if MEASURES_name == "SlidingWindow": + measure = SLIDING_WINDOW(**params) + + ###### TIME FREQUENCY ###### + if MEASURES_name == "Time-Freq": + measure = TIME_FREQ(**params) + + ###### SLIDING WINDOW + CLUSTERING ###### + if MEASURES_name == "Clustering": + measure = SLIDING_WINDOW_CLUSTR(**params) + + ###### DISCRETE HMM ###### + if MEASURES_name == "DiscreteHMM": + measure = HMM_DISC(**params) + + MEASURES_lst.append(measure) + + return MEASURES_lst + + +def measures_initializer(MEASURES_name_lst, params_methods, alter_hparams): + """ + - this will test values in alter_hparams other than + values already in params_methods. values in params_methods + will be considered the reference + sample: + hyper_params = { \ + 'n_states': [6, 12, 16], \ + 'normalization': [True], \ + 'num_subj': [50, 100, 395], \ + 'num_select_nodes': [50, 100, 333], \ + 'num_time_point': [500, 800, 1200], \ + 'Fs_ratio': [0.50, 1.00, 1.50], \ + 'noise_ratio': [0.00, 0.50, 1.00], \ + 'num_realization': [1, 2, 3], \ + } + + MEASURES_name_lst = ( \ + 'SlidingWindow', \ + 'Time-Freq', \ + 'CAP', \ + 'ContinuousHMM', \ + 'Windowless', \ + 'Clustering', \ + 'DiscreteHMM' \ + ) + """ + + # a list of MEASURES with default parameter values + MEASURES_lst = create_measure_obj( + MEASURES_name_lst=MEASURES_name_lst, **params_methods + ) + + # adding MEASURES with alternative parameter values + hyper_param_info = {} + hyper_param_info["default_values"] = params_methods + for hyper_param in alter_hparams: + for value in alter_hparams[hyper_param]: + params = deepcopy(params_methods) + params[hyper_param] = value + hyper_param_info[hyper_param + "_" + str(value)] = deepcopy(params) + new_MEASURES = create_measure_obj( + MEASURES_name_lst=MEASURES_name_lst, **params + ) + for new_measure in new_MEASURES: + flag = 0 + for MEASURE in MEASURES_lst: + if new_measure.issame(MEASURE): + flag = 1 + if flag == 0: + MEASURES_lst.append(new_measure) + + return MEASURES_lst, hyper_param_info + + +def get_SB_MEASURES_lst(MEASURES_lst): + """returns state_based measures""" + SB_MEASURES = list() + for measure in MEASURES_lst: + if measure.is_state_based: + SB_MEASURES.append(measure) + return SB_MEASURES + + +def get_DD_MEASURES_lst(MEASURES_lst): + """returns data_driven measures""" + DD_MEASURES = list() + for measure in MEASURES_lst: + if not measure.is_state_based: + DD_MEASURES.append(measure) + return DD_MEASURES + + +def estimate_group_FCS(time_series, MEASURES_lst, n_jobs=None, verbose=0, backend="loky"): + + SB_MEASURES_lst = get_SB_MEASURES_lst(MEASURES_lst) + if n_jobs is None: + SB_MEASURES_lst_NEW = list() + for measure in SB_MEASURES_lst: + SB_MEASURES_lst_NEW.append(measure.estimate_FCS(time_series=time_series)) + else: + SB_MEASURES_lst_NEW = Parallel( + n_jobs=n_jobs, + verbose=verbose, + backend=backend, + )( + delayed(measure.estimate_FCS)(time_series=time_series) + for measure in SB_MEASURES_lst + ) + MEASURES_fit_lst = get_DD_MEASURES_lst(MEASURES_lst) + SB_MEASURES_lst_NEW + + return MEASURES_fit_lst + + +##################### dFC ASSESSMENT ###################### + + +def group_dFC_assess( + time_series, MEASURES_fit_lst, n_jobs=None, verbose=0, backend="loky" +): + """ + assess dFC for all subjects using all measures + and a single time_series + """ + SUBJECTs = time_series.subj_id_lst + + OUT = list() + for subject in SUBJECTs: + OUT.append( + subj_lvl_dFC_assess( + time_series=time_series.get_subj_ts(subjs_id=subject), + MEASURES_fit_lst=MEASURES_fit_lst, + n_jobs=n_jobs, + verbose=verbose, + backend=backend, + ) + ) + + return OUT + + +def subj_lvl_dFC_assess( + time_series, MEASURES_fit_lst, n_jobs=None, verbose=0, backend="loky" +): + """ + assess dFC for a single subject using all measures and a + single time_series + """ + + dFC_dict = {} + + if n_jobs is None: + dFC_lst = list() + for measure in MEASURES_fit_lst: + dFC_lst.append(measure.estimate_dFC(time_series=time_series)) + else: + dFC_lst = Parallel( + n_jobs=n_jobs, + verbose=verbose, + backend=backend, + )( + delayed(measure.estimate_dFC)(time_series=time_series) + for measure in MEASURES_fit_lst + ) + + dFC_dict["dFC_lst"] = dFC_lst + + return dFC_dict + + +############################################################################################################## From 778ddf913ef100c2aae5c9168e1010a068aaae8f Mon Sep 17 00:00:00 2001 From: mtorabi59 Date: Tue, 3 Dec 2024 14:03:50 -0500 Subject: [PATCH 12/15] minor --- pydfc/__init__.py | 4 ++++ pydfc/data_loader.py | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/pydfc/__init__.py b/pydfc/__init__.py index d5ac722..b62481f 100644 --- a/pydfc/__init__.py +++ b/pydfc/__init__.py @@ -9,6 +9,9 @@ time_series --- time series class dfc --- dfc class data_loader --- load data +multi_analysis_utils --- utility functions for multi analysis + to implement multiple dFC methods + simultaneously dfc_utils --- functions used for dFC analysis comparison --- functions used for dFC results comparison @@ -24,6 +27,7 @@ "TIME_SERIES", "DFC", "data_loader", + "multi_analysis_utils", "dfc_methods", "dfc_utils", "comparison", diff --git a/pydfc/data_loader.py b/pydfc/data_loader.py index 21584eb..303b8bd 100644 --- a/pydfc/data_loader.py +++ b/pydfc/data_loader.py @@ -1,5 +1,5 @@ """ -Implementation of functions for leading fmri data. +Implementation of functions for loading fmri data. Created on Jun 29 2023 @author: Mohammad Torabi From f8dd111cde04a76cb54b3ec17cfd4394be8eece0 Mon Sep 17 00:00:00 2001 From: mtorabi59 Date: Tue, 3 Dec 2024 14:22:31 -0500 Subject: [PATCH 13/15] add estimate_FCS for state free methods --- pydfc/dfc_methods/sliding_window.py | 4 ++++ pydfc/dfc_methods/time_freq.py | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/pydfc/dfc_methods/sliding_window.py b/pydfc/dfc_methods/sliding_window.py index b7c7063..680edd9 100644 --- a/pydfc/dfc_methods/sliding_window.py +++ b/pydfc/dfc_methods/sliding_window.py @@ -161,6 +161,10 @@ def dFC(self, time_series, W=None, n_overlap=None, tapered_window=False): return np.array(FCSs), np.array(TR_array) + def estimate_FCS(self, time_series): + + return self + def estimate_dFC(self, time_series): """ we assume calc is applied on subjects separately diff --git a/pydfc/dfc_methods/time_freq.py b/pydfc/dfc_methods/time_freq.py index 4d7c701..320a253 100644 --- a/pydfc/dfc_methods/time_freq.py +++ b/pydfc/dfc_methods/time_freq.py @@ -183,6 +183,10 @@ def WT_dFC(self, Y1, Y2, Fs, J, s0, dj): return wt + def estimate_FCS(self, time_series): + + return self + def estimate_dFC(self, time_series): """ we assume calc is applied on subjects separately From d9b1ed25cfe66295e3a7b776e12f7a3124594a18 Mon Sep 17 00:00:00 2001 From: mtorabi59 Date: Tue, 3 Dec 2024 14:29:41 -0500 Subject: [PATCH 14/15] minor --- pydfc/multi_analysis_utils.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/pydfc/multi_analysis_utils.py b/pydfc/multi_analysis_utils.py index 2c6b0a4..93e1c6a 100644 --- a/pydfc/multi_analysis_utils.py +++ b/pydfc/multi_analysis_utils.py @@ -127,21 +127,19 @@ def get_DD_MEASURES_lst(MEASURES_lst): def estimate_group_FCS(time_series, MEASURES_lst, n_jobs=None, verbose=0, backend="loky"): - SB_MEASURES_lst = get_SB_MEASURES_lst(MEASURES_lst) if n_jobs is None: - SB_MEASURES_lst_NEW = list() - for measure in SB_MEASURES_lst: - SB_MEASURES_lst_NEW.append(measure.estimate_FCS(time_series=time_series)) + MEASURES_fit_lst = list() + for measure in MEASURES_lst: + MEASURES_fit_lst.append(measure.estimate_FCS(time_series=time_series)) else: - SB_MEASURES_lst_NEW = Parallel( + MEASURES_fit_lst = Parallel( n_jobs=n_jobs, verbose=verbose, backend=backend, )( delayed(measure.estimate_FCS)(time_series=time_series) - for measure in SB_MEASURES_lst + for measure in MEASURES_lst ) - MEASURES_fit_lst = get_DD_MEASURES_lst(MEASURES_lst) + SB_MEASURES_lst_NEW return MEASURES_fit_lst From 93dade744d02a1da3b3ea177b9deccbd21a133f8 Mon Sep 17 00:00:00 2001 From: mtorabi59 Date: Wed, 5 Feb 2025 16:03:42 -0500 Subject: [PATCH 15/15] improve GraphLasso in SW --- pydfc/dfc_methods/sliding_window.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/pydfc/dfc_methods/sliding_window.py b/pydfc/dfc_methods/sliding_window.py index 680edd9..17ec5e6 100644 --- a/pydfc/dfc_methods/sliding_window.py +++ b/pydfc/dfc_methods/sliding_window.py @@ -9,7 +9,7 @@ import numpy as np from scipy import signal -from sklearn.covariance import GraphicalLassoCV, graphical_lasso +from sklearn.covariance import GraphicalLasso, GraphicalLassoCV from ..dfc import DFC from ..time_series import TIME_SERIES @@ -38,6 +38,7 @@ def __init__(self, **params): self.FCS_ = [] self.FCS_fit_time_ = None self.dFC_assess_time_ = None + self.graphical_lasso_alpha_ = None self.params_name_lst = [ "measure_name", @@ -96,8 +97,13 @@ def calc_MI(self, X, Y): def FC(self, time_series): if self.params["sw_method"] == "GraphLasso": - model = GraphicalLassoCV() - model.fit(time_series.T) + # Standardize the data (zero mean, unit variance for each feature) + mean = np.mean(time_series, axis=1, keepdims=True) + std = np.std(time_series, axis=1, keepdims=True) + time_series_standardized = np.where(std != 0, (time_series - mean) / std, 0) + model = GraphicalLasso(alpha=self.graphical_lasso_alpha_) + model.fit(time_series_standardized.T) + # the covariance matrix will equal the correlation matrix C = model.covariance_ else: C = np.zeros((time_series.shape[0], time_series.shape[0])) @@ -129,6 +135,12 @@ def dFC(self, time_series, W=None, n_overlap=None, tapered_window=False): if step == 0: step = 1 + # find the L1 penalty for GraphLasso + if self.params["sw_method"] == "GraphLasso": + model = GraphicalLassoCV() + model.fit(time_series.T) + self.graphical_lasso_alpha_ = model.alpha_ + window_taper = signal.windows.gaussian(W, std=3 * W / 22) # C = DFC(measure=self) FCSs = list()