FeTS-AI
diff --git a/‎Task_1/FeTS_Challenge.py‎
Lines changed: 16 additions & 24 deletions b/‎Task_1/FeTS_Challenge.py‎
Lines changed: 16 additions & 24 deletions
diff --git a/‎Task_1/fets_challenge/checkpoint_utils.py‎
Lines changed: 1 addition & 2 deletions b/‎Task_1/fets_challenge/checkpoint_utils.py‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎Task_1/fets_challenge/config/gandlf_config.yaml‎
Lines changed: 2 additions & 1 deletion b/‎Task_1/fets_challenge/config/gandlf_config.yaml‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎Task_1/fets_challenge/experiment.py‎
Lines changed: 35 additions & 35 deletions b/‎Task_1/fets_challenge/experiment.py‎
Lines changed: 35 additions & 35 deletions
@@ -336,26 +336,19 @@ def clipped_aggregation(local_tensors,
     clip_to_percentile = 80
 
     # first, we need to determine how much each local update has changed the tensor from the previous value
-    # we'll use the tensor_db search function to find the 
-    previous_tensor_value = tensor_db.search(tensor_name=tensor_name, fl_round=fl_round, tags=('trained',), origin='aggregator')
-    logger.info(f"Tensor Values {previous_tensor_value}")
-    logger.info(f"Tensor Values Shape {previous_tensor_value.shape[0]}")
+    # we'll use the tensor_db retrieve function to find the previous tensor value
+    previous_tensor_value = tensor_db.retrieve(tensor_name=tensor_name, origin='aggregator', fl_round=fl_round - 1, tags=('aggregated',))
 
-    if previous_tensor_value.shape[0] > 1:
-        logger.info(previous_tensor_value)
-        raise ValueError(f'found multiple matching tensors for {tensor_name}, tags=(model,), origin=aggregator')
-
-    if previous_tensor_value.shape[0] < 1:
+    if previous_tensor_value is None:
         # no previous tensor, so just return the weighted average
+        logger.info(f"previous_tensor_value is None")
         return weighted_average_aggregation(local_tensors,
                                             tensor_db,
                                             tensor_name,
                                             fl_round,
                                             collaborators_chosen_each_round,
                                             collaborator_times_per_round)
 
-    previous_tensor_value = previous_tensor_value.nparray.iloc[0]
-
     # compute the deltas for each collaborator
     deltas = [t.tensor - previous_tensor_value for t in local_tensors]
 
@@ -428,21 +421,20 @@ def FedAvgM_Selection(local_tensors,
             if tensor_name not in tensor_db.search(tags=('weight_speeds',))['tensor_name']:    
                 #weight_speeds[tensor_name] = np.zeros_like(local_tensors[0].tensor) # weight_speeds[tensor_name] = np.zeros(local_tensors[0].tensor.shape)
                 tensor_db.store(
-                    tensor_name=tensor_name, 
+                    tensor_name=tensor_name,
                     tags=('weight_speeds',), 
                     nparray=np.zeros_like(local_tensors[0].tensor),
                 )
+
             return new_tensor_weight        
         else:
             if tensor_name.endswith("weight") or tensor_name.endswith("bias"):
                 # Calculate aggregator's last value
                 previous_tensor_value = None
                 for _, record in tensor_db.iterrows():
-                    print(f'record tags {record["tags"]} record round {record["round"]} record tensor_name {record["tensor_name"]}')
-                    print(f'fl_round {fl_round} tensor_name {tensor_name}')
-                    if (record['round'] == fl_round 
+                    if (record['round'] == fl_round - 1 # Fetching aggregated value for previous round
                         and record["tensor_name"] == tensor_name
-                        and record["tags"] == ("aggregated",)): 
+                        and record["tags"] == ('aggregated',)):
                         previous_tensor_value = record['nparray']
                         break
 
@@ -457,7 +449,7 @@ def FedAvgM_Selection(local_tensors,
 
                     if tensor_name not in tensor_db.search(tags=('weight_speeds',))['tensor_name']:    
                         tensor_db.store(
-                            tensor_name=tensor_name, 
+                            tensor_name=tensor_name,
                             tags=('weight_speeds',), 
                             nparray=np.zeros_like(local_tensors[0].tensor),
                         )
@@ -481,7 +473,7 @@ def FedAvgM_Selection(local_tensors,
                     new_tensor_weight_speed = momentum * tensor_weight_speed + average_deltas # fix delete (1-momentum)
 
                     tensor_db.store(
-                        tensor_name=tensor_name, 
+                        tensor_name=tensor_name,
                         tags=('weight_speeds',), 
                         nparray=new_tensor_weight_speed
                     )
@@ -516,7 +508,7 @@ def FedAvgM_Selection(local_tensors,
 
 
 # change any of these you wish to your custom functions. You may leave defaults if you wish.
-aggregation_function = FedAvgM_Selection
+aggregation_function = weighted_average_aggregation
 choose_training_collaborators = all_collaborators_train
 training_hyper_parameters_for_round = constant_hyper_parameters
 
@@ -525,26 +517,26 @@ def FedAvgM_Selection(local_tensors,
 # to those you specify immediately above. Changing the below value to False will change 
 # this fact, excluding the three hausdorff measurements. As hausdorff distance is 
 # expensive to compute, excluding them will speed up your experiments.
-include_validation_with_hausdorff=True #TODO change it to True
+include_validation_with_hausdorff=True
 
 # We encourage participants to experiment with partitioning_1 and partitioning_2, as well as to create
 # other partitionings to test your changes for generalization to multiple partitionings.
 #institution_split_csv_filename = 'partitioning_1.csv'
 institution_split_csv_filename = 'small_split.csv'
 
 # change this to point to the parent directory of the data
-brats_training_data_parent_dir = '/home/ad_kagrawa2/Data/MICCAI_FeTS2022_TrainingData'
+brats_training_data_parent_dir = '/raid/datasets/FeTS22/MICCAI_FeTS2022_TrainingData'
 
 # increase this if you need a longer history for your algorithms
 # decrease this if you need to reduce system RAM consumption
-db_store_rounds = 1 #TODO store the tensor db for these many rounds
+db_store_rounds = 1
 
 # this is passed to PyTorch, so set it accordingly for your system
 device = 'cpu'
 
 # you'll want to increase this most likely. You can set it as high as you like, 
 # however, the experiment will exit once the simulated time exceeds one week. 
-rounds_to_train = 2 #TODO change it to 5 before merging
+rounds_to_train = 5
 
 # (bool) Determines whether checkpoints should be saved during the experiment. 
 # The checkpoints can grow quite large (5-10GB) so only the latest will be saved when this parameter is enabled
@@ -612,7 +604,7 @@ def FedAvgM_Selection(local_tensors,
 # the data you want to run inference over (assumed to be the experiment that just completed)
 
 #data_path = </PATH/TO/CHALLENGE_VALIDATION_DATA>
-data_path = '/home/ad_kagrawa2/Data/MICCAI_FeTS2022_ValidationData'
+data_path = '/raid/datasets/FeTS22/MICCAI_FeTS2022_ValidationData'
 validation_csv_filename = 'validation.csv'
 
 # you can keep these the same if you wish
 
@@ -28,7 +28,6 @@ def save_checkpoint(checkpoint_folder, agg_tensor_db,
                     best_dice_over_time_auc, 
                     collaborators_chosen_each_round, 
                     collaborator_times_per_round,
-                    tensor_keys_per_col,
                     experiment_results,
                     summary):
     """
@@ -39,7 +38,7 @@ def save_checkpoint(checkpoint_folder, agg_tensor_db,
     with open(f'checkpoint/{checkpoint_folder}/state.pkl', 'wb') as f:
         pickle.dump([collaborator_names, round_num, collaborator_time_stats, total_simulated_time, 
                      best_dice, best_dice_over_time_auc, collaborators_chosen_each_round, 
-                     collaborator_times_per_round, tensor_keys_per_col, experiment_results, summary], f)
+                     collaborator_times_per_round, experiment_results, summary], f)
 
 def load_checkpoint(checkpoint_folder):
     """
 
@@ -31,6 +31,7 @@ model:
   final_layer: softmax
   ignore_label_validation: null
   norm_type: instance
+  num_channels: 4
 nested_training:
   testing: 1
   validation: -5
@@ -56,7 +57,7 @@ scaling_factor: 1
 scheduler:
   type: triangle_modified
 track_memory_usage: false
-verbose: True
+verbose: False
 version:
   maximum: 0.1.0
   minimum: 0.0.14
 
@@ -20,27 +20,28 @@
 from openfl.experimental.workflow.interface import Aggregator, Collaborator
 from openfl.experimental.workflow.runtime import LocalRuntime
 
+from GANDLF.config_manager import ConfigManager
+
 logger = getLogger(__name__)
 # This catches PyTorch UserWarnings for CPU
 warnings.filterwarnings("ignore", category=UserWarning)
 
-def aggregator_private_attributes(
-       aggregation_type, collaborator_names, db_store_rounds):
-    return {"aggregation_type" : aggregation_type,
-            "collaborator_names": collaborator_names,
-            "checkpoint_folder":None,
-            "db_store_rounds":db_store_rounds
-}
- 
-
-def collaborator_private_attributes(
-        index, gandlf_config, train_csv_path, val_csv_path):
-        return {
-            "index": index,
-            "gandlf_config": gandlf_config,
-            "train_csv_path": train_csv_path,
-            "val_csv_path": val_csv_path
-        }
+def aggregator_private_attributes(aggregation_type, collaborator_names, db_store_rounds):
+    return {
+        "aggregation_type" : aggregation_type,
+        "collaborator_names": collaborator_names,
+        "checkpoint_folder":None,
+        "db_store_rounds":db_store_rounds,
+        "agg_tensor_dict":{}
+    }
+
+
+def collaborator_private_attributes(index, train_csv_path, val_csv_path):
+    return {
+        "index": index,
+        "train_csv_path": train_csv_path,
+        "val_csv_path": val_csv_path
+    }
 
 
 def run_challenge_experiment(aggregation_function,
@@ -70,12 +71,20 @@ def run_challenge_experiment(aggregation_function,
                                               0.8,
                                               gandlf_csv_path)
 
-    print(f'Collaborator names for experiment : {collaborator_names}')
+    logger.info(f'Collaborator names for experiment : {collaborator_names}')
 
     aggregation_wrapper = CustomAggregationWrapper(aggregation_function)
 
     transformed_csv_dict = extract_csv_partitions(os.path.join(work, 'gandlf_paths.csv'))
 
+    gandlf_conf = {}
+    if isinstance(gandlf_config_path, str) and os.path.exists(gandlf_config_path):
+        gandlf_conf = ConfigManager(gandlf_config_path)
+    elif isinstance(gandlf_config_path, dict):
+        gandlf_conf = gandlf_config_path
+    else:
+        exit("GANDLF config file not found. Exiting...")
+
     collaborators = []
     for idx, col in enumerate(collaborator_names):
         col_dir = os.path.join(work, 'data', str(col))
@@ -96,9 +105,8 @@ def run_challenge_experiment(aggregation_function,
                 # with ray backend with 2 collaborators
                 num_cpus=4.0,
                 num_gpus=0.0,
-                # arguments required to pass to callable
+                # private arguments required to pass to callable
                 index=idx,
-                gandlf_config=gandlf_config_path,
                 train_csv_path=train_csv_path,
                 val_csv_path=val_csv_path
             )
@@ -108,6 +116,7 @@ def run_challenge_experiment(aggregation_function,
                             private_attributes_callable=aggregator_private_attributes,
                             num_cpus=4.0,
                             num_gpus=0.0,
+                            # private arguments required to pass to callable
                             collaborator_names=collaborator_names,
                             aggregation_type=aggregation_wrapper,
                             db_store_rounds=db_store_rounds)
@@ -119,10 +128,12 @@ def run_challenge_experiment(aggregation_function,
     logger.info(f"Local runtime collaborators = {local_runtime.collaborators}")
 
     params_dict = {"include_validation_with_hausdorff": include_validation_with_hausdorff,
-              "choose_training_collaborators": choose_training_collaborators,
-              "training_hyper_parameters_for_round": training_hyper_parameters_for_round,
-              "restore_from_checkpoint_folder": restore_from_checkpoint_folder,
-              "save_checkpoints": save_checkpoints}
+                   "use_pretrained_model": use_pretrained_model,
+                   "gandlf_config": gandlf_conf,
+                    "choose_training_collaborators": choose_training_collaborators,
+                    "training_hyper_parameters_for_round": training_hyper_parameters_for_round,
+                    "restore_from_checkpoint_folder": restore_from_checkpoint_folder,
+                    "save_checkpoints": save_checkpoints}
 
     model = FeTSChallengeModel()
     flflow = FeTSFederatedFlow(
@@ -134,15 +145,4 @@ def run_challenge_experiment(aggregation_function,
 
     flflow.runtime = local_runtime
     flflow.run()
-
-    # #TODO [Workflow - API] -> Commenting as pretrained model is not used.
-    # if use_pretrained_model:
-    #     if device == 'cpu':
-    #         checkpoint = torch.load(f'{root}/pretrained_model/resunet_pretrained.pth',map_location=torch.device('cpu'))
-    #         task_runner.model.load_state_dict(checkpoint['model_state_dict'])
-    #         task_runner.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
-    #     else:
-    #         checkpoint = torch.load(f'{root}/pretrained_model/resunet_pretrained.pth')
-    #         task_runner.model.load_state_dict(checkpoint['model_state_dict'])
-    #         task_runner.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
     return aggregator.private_attributes["checkpoint_folder"]