diff --git a/.gitignore b/.gitignore
index 78a0cc4e..bb10ee7d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,21 @@
+# Ignore dataset files that are not Python
 /datasets/*[!.py]
+
+# Ignore Jupyter notebook checkpoints
 .ipynb_checkpoints
+*.idea/
+# Ignore Python cache files
 __pycache__
+
+# Ignore everything in the training folder except .py files
 training/*
+!training/*.py
+
+# Ignore specific nohup files in the scripts folder
 scripts/_nohup
+
+# Ignore rsync filter file
 .rsync-filter
+
+# Ignore the ts2vec_env directory (virtual environment)
+ts2vec_env/
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
new file mode 100644
index 00000000..feccba82
--- /dev/null
+++ b/.idea/misc.xml
@@ -0,0 +1,10 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="Black">
+    <option name="sdkName" value="Python 3.8 (ts2vec)" />
+  </component>
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.8 (ts2vec)" project-jdk-type="Python SDK" />
+  <component name="PyCharmDSProjectLayout">
+    <option name="id" value="JupyterRightHiddenStructureLayout" />
+  </component>
+</project>
\ No newline at end of file
diff --git a/.idea/ts2vec.iml b/.idea/ts2vec.iml
new file mode 100644
index 00000000..ec63674c
--- /dev/null
+++ b/.idea/ts2vec.iml
@@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module version="4">
+  <component name="PyDocumentationSettings">
+    <option name="format" value="PLAIN" />
+    <option name="myDocStringFormat" value="Plain" />
+  </component>
+</module>
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 00000000..35eb1ddf
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="" vcs="Git" />
+  </component>
+</project>
\ No newline at end of file
diff --git a/.idea/workspace.xml b/.idea/workspace.xml
new file mode 100644
index 00000000..ef09f3fa
--- /dev/null
+++ b/.idea/workspace.xml
@@ -0,0 +1,138 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="AutoImportSettings">
+    <option name="autoReloadType" value="SELECTIVE" />
+  </component>
+  <component name="ChangeListManager">
+    <list default="true" id="f34426fb-75db-4b60-811c-f2e497b8258c" name="Changes" comment="TS2Vec is now able to process the Online Retail II dataset using an editing version of load_forecasting_csv. It can also evaluate this dataset with a new function eval_forecasting_unsupervised. Also includes additional utility functions and configuration">
+      <change beforePath="$PROJECT_DIR$/requirements.txt" beforeDir="false" afterPath="$PROJECT_DIR$/requirements.txt" afterDir="false" />
+    </list>
+    <option name="SHOW_DIALOG" value="false" />
+    <option name="HIGHLIGHT_CONFLICTS" value="true" />
+    <option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
+    <option name="LAST_RESOLUTION" value="IGNORE" />
+  </component>
+  <component name="FileTemplateManagerImpl">
+    <option name="RECENT_TEMPLATES">
+      <list>
+        <option value="Python Script" />
+      </list>
+    </option>
+  </component>
+  <component name="Git.Settings">
+    <option name="RECENT_BRANCH_BY_REPOSITORY">
+      <map>
+        <entry key="$PROJECT_DIR$" value="sos" />
+      </map>
+    </option>
+    <option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$" />
+  </component>
+  <component name="GitHubPullRequestSearchHistory">{
+  &quot;lastFilter&quot;: {
+    &quot;state&quot;: &quot;OPEN&quot;,
+    &quot;assignee&quot;: &quot;jumairamiller&quot;
+  }
+}</component>
+  <component name="GithubPullRequestsUISettings">{
+  &quot;selectedUrlAndAccountId&quot;: {
+    &quot;url&quot;: &quot;https://github.com/jumairamiller/ts2vec.git&quot;,
+    &quot;accountId&quot;: &quot;1c182f61-b702-458b-8f9a-03f9dbad0d94&quot;
+  },
+  &quot;recentNewPullRequestHead&quot;: {
+    &quot;server&quot;: {
+      &quot;useHttp&quot;: false,
+      &quot;host&quot;: &quot;github.com&quot;,
+      &quot;port&quot;: null,
+      &quot;suffix&quot;: null
+    },
+    &quot;owner&quot;: &quot;jumairamiller&quot;,
+    &quot;repository&quot;: &quot;ts2vec&quot;
+  }
+}</component>
+  <component name="ProblemsViewState">
+    <option name="selectedTabId" value="ProjectErrors" />
+  </component>
+  <component name="ProjectColorInfo">{
+  &quot;associatedIndex&quot;: 3
+}</component>
+  <component name="ProjectId" id="2l9pBR4as2gmUPvcH8kBcJXtCQH" />
+  <component name="ProjectViewState">
+    <option name="hideEmptyMiddlePackages" value="true" />
+    <option name="showLibraryContents" value="true" />
+  </component>
+  <component name="PropertiesComponent"><![CDATA[{
+  "keyToString": {
+    "Python.train.executor": "Run",
+    "RunOnceActivity.ShowReadmeOnStart": "true",
+    "git-widget-placeholder": "sos",
+    "last_opened_file_path": "/home/jumaira/Documents/online_retail_data_preprocessing",
+    "settings.editor.selected.configurable": "vcs.Git"
+  }
+}]]></component>
+  <component name="RunManager">
+    <configuration name="train" type="PythonConfigurationType" factoryName="Python" temporary="true" nameIsGenerated="true">
+      <module name="ts2vec" />
+      <option name="ENV_FILES" value="" />
+      <option name="INTERPRETER_OPTIONS" value="" />
+      <option name="PARENT_ENVS" value="true" />
+      <envs>
+        <env name="PYTHONUNBUFFERED" value="1" />
+      </envs>
+      <option name="SDK_HOME" value="" />
+      <option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
+      <option name="IS_MODULE_SDK" value="true" />
+      <option name="ADD_CONTENT_ROOTS" value="true" />
+      <option name="ADD_SOURCE_ROOTS" value="true" />
+      <option name="SCRIPT_NAME" value="$PROJECT_DIR$/train.py" />
+      <option name="PARAMETERS" value="restructured_ts2vec_online_retail Results --loader forecast_csv --eval" />
+      <option name="SHOW_COMMAND_LINE" value="false" />
+      <option name="EMULATE_TERMINAL" value="false" />
+      <option name="MODULE_MODE" value="false" />
+      <option name="REDIRECT_INPUT" value="false" />
+      <option name="INPUT_FILE" value="" />
+      <method v="2" />
+    </configuration>
+    <recent_temporary>
+      <list>
+        <item itemvalue="Python.train" />
+      </list>
+    </recent_temporary>
+  </component>
+  <component name="SharedIndexes">
+    <attachedChunks>
+      <set>
+        <option value="bundled-python-sdk-54d380b879e2-1e58bffe31b7-com.jetbrains.pycharm.ds.sharedIndexes.bundled-DS-242.20224.354" />
+      </set>
+    </attachedChunks>
+  </component>
+  <component name="SpellCheckerSettings" RuntimeDictionaries="0" Folders="0" CustomDictionaries="0" DefaultDictionary="application-level" UseSingleDictionary="true" transferred="true" />
+  <component name="Vcs.Log.Tabs.Properties">
+    <option name="TAB_STATES">
+      <map>
+        <entry key="MAIN">
+          <value>
+            <State>
+              <option name="FILTERS">
+                <map>
+                  <entry key="branch">
+                    <value>
+                      <list>
+                        <option value="rework" />
+                      </list>
+                    </value>
+                  </entry>
+                </map>
+              </option>
+            </State>
+          </value>
+        </entry>
+      </map>
+    </option>
+  </component>
+  <component name="VcsManagerConfiguration">
+    <MESSAGE value="." />
+    <MESSAGE value="started editing code to accept Online Retail dataset as an input, this includes creating new evaluation and loader functions for forecasting tasks" />
+    <MESSAGE value="TS2Vec is now able to process the Online Retail II dataset using an editing version of load_forecasting_csv. It can also evaluate this dataset with a new function eval_forecasting_unsupervised. Also includes additional utility functions and configuration" />
+    <option name="LAST_COMMIT_MESSAGE" value="TS2Vec is now able to process the Online Retail II dataset using an editing version of load_forecasting_csv. It can also evaluate this dataset with a new function eval_forecasting_unsupervised. Also includes additional utility functions and configuration" />
+  </component>
+</project>
\ No newline at end of file
diff --git a/datasets/stage1_online_retail_pre_processing.py b/datasets/stage1_online_retail_pre_processing.py
new file mode 100644
index 00000000..eee5d36d
--- /dev/null
+++ b/datasets/stage1_online_retail_pre_processing.py
@@ -0,0 +1,80 @@
+import pandas as pd
+import numpy as np
+from sklearn.preprocessing import MinMaxScaler
+import logging
+
+# Configure logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+
+# Step 1: Load and Combine Data from Excel Sheets
+def load_and_combine_sheets(file_path, sheets):
+    """
+    Load data from multiple Excel sheets and combine into a single DataFrame.
+    """
+    combined_data = pd.DataFrame()
+    for sheet in sheets:
+        logging.info(f"Loading data from sheet: {sheet}")
+        sheet_data = pd.read_excel(file_path, sheet_name=sheet)
+        combined_data = pd.concat([combined_data, sheet_data], ignore_index=True)
+    logging.info("Data successfully loaded and combined.")
+    return combined_data
+
+# Step 2: Preprocess Data
+def preprocess_data(data):
+    """
+    Preprocess data by converting dates, normalizing numeric columns, and handling missing values.
+    """
+    # Convert 'InvoiceDate' to datetime and ensure numerical consistency in key columns
+    logging.info("Preprocessing data: converting dates and normalizing numeric columns.")
+    data['InvoiceDate'] = pd.to_datetime(data['InvoiceDate'], errors='coerce')
+    data['Quantity'] = pd.to_numeric(data['Quantity'], errors='coerce')
+    data['Price'] = pd.to_numeric(data['Price'], errors='coerce')
+    data['Customer ID'] = pd.to_numeric(data['Customer ID'], errors='coerce')
+
+    # Remove rows where the Invoice starts with 'C' (canceled orders)
+    data = data[~data['Invoice'].astype(str).str.startswith('C')]
+
+    # Drop rows with missing critical data
+    data = data.dropna(subset=['InvoiceDate', 'Customer ID', 'Quantity', 'Price'])
+
+    # Normalize 'Quantity' and 'Price' using Min-Max scaling to insure values are positive
+    scaler = MinMaxScaler()
+    data[['Quantity', 'Price']] = scaler.fit_transform(data[['Quantity', 'Price']])
+    logging.info("Data normalized and missing values handled.")
+
+    return data
+
+# Step 3: Aggregate Data
+def aggregate_data(data):
+    """
+    Aggregate data by summing 'Quantity' and averaging 'Price' daily.
+    """
+    logging.info("Aggregating data by Date.")
+    # Group by Date, aggregating Quantity and Price
+    data_agg = data.groupby(pd.Grouper(key='InvoiceDate', freq='D')).agg({
+        'Quantity': 'sum',
+        'Price': 'mean'
+    }).reset_index()
+
+    logging.info("Data aggregation complete.")
+    return data_agg
+
+# Main Function to Run All Steps
+def main():
+    # File path and sheets to load
+    file_path = 'online_retail_II.xlsx'
+    sheets = ['Year 2009-2010', 'Year 2010-2011']
+    
+    # Load and preprocess the data
+    combined_data = load_and_combine_sheets(file_path, sheets)
+    cleaned_data = preprocess_data(combined_data)
+    
+    # Aggregate the data
+    aggregated_data = aggregate_data(cleaned_data)
+
+    # Save the final reshaped and adjusted data to CSV
+    aggregated_data.to_csv('ts2vec_online_retail_II_data.csv', index=False)
+    logging.info("Final data saved successfully.")
+
+if __name__ == "__main__":
+    main()
diff --git a/datasets/stage2_online_retail_pre_processing_.py b/datasets/stage2_online_retail_pre_processing_.py
new file mode 100644
index 00000000..f33962ef
--- /dev/null
+++ b/datasets/stage2_online_retail_pre_processing_.py
@@ -0,0 +1,67 @@
+import pandas as pd
+import logging
+
+# Configure logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+
+# Step 1: Load the original Online Retail II dataset
+def load_data(file_path):
+    logging.info(f"Loading data from: {file_path}")
+    data = pd.read_excel(file_path)
+    logging.info(f"Data successfully loaded with {len(data)} records.")
+    return data
+
+# Step 2: Clean and preprocess the dataset
+def preprocess_data(data):
+    logging.info("Preprocessing data: cleaning and handling missing values.")
+    # Convert 'InvoiceDate' to datetime and ensure numerical consistency
+    data['InvoiceDate'] = pd.to_datetime(data['InvoiceDate'], errors='coerce')
+    data['Quantity'] = pd.to_numeric(data['Quantity'], errors='coerce')
+    data['Price'] = pd.to_numeric(data['Price'], errors='coerce')
+    data['Customer ID'] = pd.to_numeric(data['Customer ID'], errors='coerce')
+
+    # Remove cancelled orders (invoices starting with 'C')
+    data = data[~data['Invoice'].str.startswith('C', na=False)]
+
+    # Drop rows with missing values in key columns
+    data = data.dropna(subset=['InvoiceDate', 'Customer ID', 'Quantity', 'Price'])
+
+    logging.info(f"Data cleaned. Remaining records: {len(data)}.")
+    return data
+
+# Step 3: Group by CustomerID and InvoiceNo
+def group_by_customer_invoice(data):
+    logging.info("Grouping by Customer ID and Invoice Number.")
+    # Group by CustomerID and InvoiceNo to represent each invoice as a time series record
+    grouped = data.groupby(['Customer ID', 'Invoice']).agg({
+        'InvoiceDate': 'first',  # First date of the invoice
+        'Quantity': 'sum',       # Sum of quantities in the invoice
+        'Price': 'mean'          # Average price in the invoice
+    }).reset_index()
+
+    logging.info(f"Grouped data created with {len(grouped)} records.")
+    return grouped
+
+# Step 4: Save the restructured dataset
+def save_data(grouped_data, output_file):
+    logging.info(f"Saving restructured data to {output_file}.")
+    grouped_data.to_csv(output_file, index=False)
+    logging.info("Data successfully saved.")
+
+# Main function to run the entire preprocessing pipeline
+def main():
+    file_path = 'online_retail_II.xlsx'
+    output_file = ('restructured_ts2vec_online_retail.csv')
+
+    # Load and preprocess the data
+    data = load_data(file_path)
+    cleaned_data = preprocess_data(data)
+
+    # Group data by CustomerID and InvoiceNo
+    grouped_data = group_by_customer_invoice(cleaned_data)
+
+    # Save the restructured dataset
+    save_data(grouped_data, output_file)
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/datautils.py b/datautils.py
index f8cfddd7..a31ec3d5 100644
--- a/datautils.py
+++ b/datautils.py
@@ -1,3 +1,4 @@
+import logging
 import os
 import numpy as np
 import pandas as pd
@@ -5,6 +6,9 @@
 import random
 from datetime import datetime
 import pickle
+
+import torch
+
 from utils import pkl_load, pad_nan_to_target
 from scipy.io.arff import loadarff
 from sklearn.preprocessing import StandardScaler, MinMaxScaler
@@ -68,7 +72,7 @@ def load_UCR(dataset):
         'UMD'
     ]:
         return train[..., np.newaxis], train_labels, test[..., np.newaxis], test_labels
-    
+
     mean = np.nanmean(train)
     std = np.nanstd(train)
     train = (train - mean) / std
@@ -79,7 +83,7 @@ def load_UCR(dataset):
 def load_UEA(dataset):
     train_data = loadarff(f'datasets/UEA/{dataset}/{dataset}_TRAIN.arff')[0]
     test_data = loadarff(f'datasets/UEA/{dataset}/{dataset}_TEST.arff')[0]
-    
+
     def extract_data(data):
         res_data = []
         res_labels = []
@@ -89,31 +93,31 @@ def extract_data(data):
             res_data.append(t_data)
             res_labels.append(t_label)
         return np.array(res_data).swapaxes(1, 2), np.array(res_labels)
-    
+
     train_X, train_y = extract_data(train_data)
     test_X, test_y = extract_data(test_data)
-    
+
     scaler = StandardScaler()
     scaler.fit(train_X.reshape(-1, train_X.shape[-1]))
     train_X = scaler.transform(train_X.reshape(-1, train_X.shape[-1])).reshape(train_X.shape)
     test_X = scaler.transform(test_X.reshape(-1, test_X.shape[-1])).reshape(test_X.shape)
-    
+
     labels = np.unique(train_y)
     transform = { k : i for i, k in enumerate(labels)}
     train_y = np.vectorize(transform.get)(train_y)
     test_y = np.vectorize(transform.get)(test_y)
     return train_X, train_y, test_X, test_y
-    
-    
+
+
 def load_forecast_npy(name, univar=False):
-    data = np.load(f'datasets/{name}.npy')    
+    data = np.load(f'datasets/{name}.npy')
     if univar:
         data = data[: -1:]
-        
+
     train_slice = slice(None, int(0.6 * len(data)))
     valid_slice = slice(int(0.6 * len(data)), int(0.8 * len(data)))
     test_slice = slice(int(0.8 * len(data)), None)
-    
+
     scaler = StandardScaler().fit(data[train_slice])
     data = scaler.transform(data)
     data = np.expand_dims(data, 0)
@@ -135,10 +139,32 @@ def _get_time_features(dt):
 
 
 def load_forecast_csv(name, univar=False):
-    data = pd.read_csv(f'datasets/{name}.csv', index_col='date', parse_dates=True)
+    """
+    Loads and processes time series data for forecasting tasks, supporting both the pre-processed
+    Online Retail II dataset and existing datasets like ETTh1, ETTm1, and electricity.
+
+    Parameters:
+    name (str): The name of the dataset file (without the .csv extension).
+    univar (bool): Whether to load the univariate version of the data.
+
+    Returns:
+    tuple: data, train_slice, valid_slice, test_slice, scaler, pred_lens, n_covariate_cols
+    """
+    # Try loading with 'date' as index, if it fails, try with 'InvoiceDate' to load online retail data
+    try:
+        data = pd.read_csv(f'datasets/{name}.csv', index_col='date', parse_dates=True)
+    except ValueError:
+        data = pd.read_csv(f'datasets/{name}.csv', index_col='InvoiceDate', parse_dates=True)
+
+    # Ensure index is parsed as datetime
+    if not pd.api.types.is_datetime64_any_dtype(data.index):
+        data.index = pd.to_datetime(data.index)
+
+    # Extract time features for the date/index column
     dt_embed = _get_time_features(data.index)
     n_covariate_cols = dt_embed.shape[-1]
-    
+
+    # Handle univariate or multivariate cases
     if univar:
         if name in ('ETTh1', 'ETTh2', 'ETTm1', 'ETTm2'):
             data = data[['OT']]
@@ -146,8 +172,11 @@ def load_forecast_csv(name, univar=False):
             data = data[['MT_001']]
         else:
             data = data.iloc[:, -1:]
-        
+
+    # Convert data to numpy array
     data = data.to_numpy()
+
+    # Define train, validation, and test splits based on dataset
     if name == 'ETTh1' or name == 'ETTh2':
         train_slice = slice(None, 12*30*24)
         valid_slice = slice(12*30*24, 16*30*24)
@@ -157,35 +186,54 @@ def load_forecast_csv(name, univar=False):
         valid_slice = slice(12*30*24*4, 16*30*24*4)
         test_slice = slice(16*30*24*4, 20*30*24*4)
     else:
-        train_slice = slice(None, int(0.6 * len(data)))
+        # Default case for other or custom datasets
+        train_slice = slice(0, int(0.6 * len(data)))
         valid_slice = slice(int(0.6 * len(data)), int(0.8 * len(data)))
-        test_slice = slice(int(0.8 * len(data)), None)
-    
-    scaler = StandardScaler().fit(data[train_slice])
+        test_slice = slice(int(0.8 * len(data)), len(data))
+
+    # Normalise data
+    scaler = None
+    if name == 'ts2vec_online_retail_II_data' or name == 'restructured_ts2vec_online_retail':
+        scaler = MinMaxScaler().fit(data[train_slice])
+    else:
+        scaler = StandardScaler().fit(data[train_slice])
+
     data = scaler.transform(data)
-    if name in ('electricity'):
+
+    # Reshape data based on dataset structure
+    if name in 'electricity':
         data = np.expand_dims(data.T, -1)  # Each variable is an instance rather than a feature
     else:
-        data = np.expand_dims(data, 0)
-    
+        data = np.expand_dims(data, 0) # Single instance case
+
     if n_covariate_cols > 0:
-        dt_scaler = StandardScaler().fit(dt_embed[train_slice])
+        if name == 'ts2vec_online_retail_II_data' or name == 'restructured_ts2vec_online_retail':
+            dt_scaler = MinMaxScaler().fit(dt_embed[train_slice])
+        else:
+            dt_scaler = StandardScaler().fit(dt_embed[train_slice])
         dt_embed = np.expand_dims(dt_scaler.transform(dt_embed), 0)
+        # Concatenating the time embeddings to the data
+        ''' NOTE: 
+        The np.repeat(dt_embed, data.shape[0], axis=0) function is used to repeat the time embeddings 
+        for each instance only in the case of the 'electricity' dataset. This ensures that the time 
+        embeddings are correctly aligned with the data instances'''
         data = np.concatenate([np.repeat(dt_embed, data.shape[0], axis=0), data], axis=-1)
-    
+
     if name in ('ETTh1', 'ETTh2', 'electricity'):
         pred_lens = [24, 48, 168, 336, 720]
+    elif name == 'ts2vec_online_retail_II_data':
+        pred_lens = [1, 2, 3, 4, 5]
     else:
         pred_lens = [24, 48, 96, 288, 672]
-        
+
     return data, train_slice, valid_slice, test_slice, scaler, pred_lens, n_covariate_cols
 
 
 def load_anomaly(name):
     res = pkl_load(f'datasets/{name}.pkl')
     return res['all_train_data'], res['all_train_labels'], res['all_train_timestamps'], \
-           res['all_test_data'],  res['all_test_labels'],  res['all_test_timestamps'], \
-           res['delay']
+        res['all_test_data'],  res['all_test_labels'],  res['all_test_timestamps'], \
+        res['delay']
 
 
 def gen_ano_train_data(all_train_data):
@@ -196,3 +244,134 @@ def gen_ano_train_data(all_train_data):
         pretrain_data.append(train_data)
     pretrain_data = np.expand_dims(np.stack(pretrain_data), 2)
     return pretrain_data
+
+#-----------------------------------------------------------------------------
+def _get_time_features(dt):
+    return np.stack([
+        dt.dayofweek.to_numpy(),  # Day of the week
+        dt.day.to_numpy(),        # Day of the month
+        dt.dayofyear.to_numpy(),  # Day of the year
+        dt.month.to_numpy(),      # Month
+        dt.to_series().apply(lambda x: x.strftime("%V")).astype(int).to_numpy(), # Week of the year
+    ], axis=1).astype(np.float)
+
+# Configure logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+def load_online_retail(name):
+#     """
+#     Loads and preprocesses the Online Retail dataset for forecasting tasks.
+#     Ensures both Price, Quantity, and customer embeddings are included throughout. """
+#
+#     Returns:
+#         train_data: Dictionary mapping customer_id to their training data (DataFrame).
+#         valid_data: Dictionary mapping customer_id to their validation data (DataFrame).
+#         test_data: Dictionary mapping customer_id to their test data (DataFrame).
+#         customer_embeddings: Fixed embeddings for each customer ID.
+#         customer_id_to_index: Mapping from customer IDs to embedding indices.
+#         scaler: Fitted scaler for data normalization.
+#     """
+
+     # Load data
+    data = pd.read_csv(f'datasets/{name}.csv', index_col='InvoiceDate', parse_dates=True)
+
+    # Convert 'InvoiceDate' to datetime if not already done
+    if not pd.api.types.is_datetime64_any_dtype(data.index):
+        data.index = pd.to_datetime(data.index)
+
+    # Remove rows with any None or NaN values
+    data.dropna(inplace=True)
+
+    # Remove rows with negative 'Quantity' or 'Price' values
+    data = data[(data['Quantity'] > 0) & (data['Price'] > 0)]
+    logging.info(f"Data shape after removing negative values: {data.shape}")
+
+    # Remove Invoice column
+    data.drop(columns=['Invoice'], inplace=True)
+
+    # Rename 'Customer ID' to 'CustomerID'
+    if 'Customer ID' in data.columns:
+        data.rename(columns={'Customer ID': 'CustomerID'}, inplace=True)
+
+    if 'CustomerID' not in data.columns:
+        raise KeyError("The 'CustomerID' column is missing from the dataset.")
+    logging.info(f"Data columns and shape: {data.columns, data.shape}")
+
+    # Extract customerIDs and create numerical mapping
+    customer_ids = data['CustomerID'].unique()
+    logging.info(f"Unique customer ID count: {customer_ids.shape[0]}")
+    customer_id_to_index = {cid: idx for idx, cid in enumerate(customer_ids)}
+    logging.info(f"Unique customer ID to index count: {len(customer_id_to_index)}")
+
+    # Create fixed embeddings for each unique customer ID
+    customer_embeddings = torch.nn.Embedding(len(customer_ids), 4)
+    customer_embeddings.weight.requires_grad = False  # Fixed embeddings
+
+    # Map customer IDs to their embeddings
+    customer_embeddings_tensor = customer_embeddings(torch.tensor(data['CustomerID'].map(customer_id_to_index).values))
+    logging.info(f"Customer embeddings shape: {customer_embeddings_tensor.shape}")
+    logging.info(f"Customer embeddings: {customer_embeddings_tensor}")
+
+    # Replace 'CustomerID' column with the corresponding customer embedding
+    customer_embeddings_expanded = customer_embeddings_tensor.detach().numpy()
+    logging.info(f"Customer embeddings expanded shape: {customer_embeddings_expanded.shape}")
+
+    # Drop the original 'CustomerID' column
+    data = data.drop(columns=['CustomerID'])
+
+    # Extract time features for the date/index column
+    dt_embed = _get_time_features(data.index)
+    n_covariate_cols = dt_embed.shape[-1] + customer_embeddings_expanded.shape[-1]
+    logging.info(f"Number of covariate columns: {n_covariate_cols}")
+    logging.info(f"Time features shape: {dt_embed.shape}")
+
+    # Convert the DataFrame to a NumPy array
+    data = data.to_numpy()
+
+    train_slice = slice(0, int(0.6 * len(data)))
+    valid_slice = slice(int(0.6 * len(data)), int(0.8 * len(data)))
+    test_slice = slice(int(0.8 * len(data)), len(data))
+
+    # Normalise data
+    scaler = MinMaxScaler().fit(data[train_slice])
+    data = scaler.transform(data)
+
+    # Reshape data based on dataset structure
+    data = np.expand_dims(data, 0) # Single instance case
+
+    if n_covariate_cols > 0:
+        dt_scaler = MinMaxScaler().fit(dt_embed[train_slice])
+        dt_embed = np.expand_dims(dt_scaler.transform(dt_embed), 0)
+        # Concatenating the time embeddings to the data
+        data = np.concatenate([np.repeat(dt_embed, data.shape[0], axis=0), data], axis=-1)
+
+        cid_scaler = MinMaxScaler().fit(customer_embeddings_expanded[train_slice])
+        cid_embed = np.expand_dims(cid_scaler.transform(customer_embeddings_expanded), 0)
+        # Concatenating the customer ID embeddings to the data
+        data = np.concatenate([np.repeat(cid_embed, data.shape[0], axis=0), data], axis=-1)
+
+    pred_lens = [1, 2, 3, 4, 5]
+
+    return data, train_slice, valid_slice, test_slice, scaler, pred_lens, n_covariate_cols
+
+
+
+    # # Filter customerIDs with at least 5 records
+    # customer_counts = data['CustomerID'].value_counts()
+    # valid_customers = customer_counts[customer_counts >= 5].index
+    # data = data[data['CustomerID'].isin(valid_customers)]
+    #
+    # # Group by CustomerID and sort by InvoiceDate
+    # grouped = data.groupby('CustomerID').apply(lambda x: x.sort_values('InvoiceDate')).reset_index(drop=True)
+    #
+    # # Create time series data for each CustomerID
+    # customer_data = {}
+    # for customer_id, group in grouped.groupby('CustomerID'):
+    #     group.set_index('InvoiceDate', inplace=True)
+    #     customer_data[customer_id] = group[['CustomerID', 'Quantity', 'Price']]
+
+    # Set other forecasting parameters
+    # scaler = MinMaxScaler() # alternative to StandardScaler which avoid negative values
+    # pred_lens = [1, 2, 3]
+    # n_covariate_cols = 2
+    #
+    # return customer_data
diff --git a/models/encoder.py b/models/encoder.py
index cedb13b0..c4aad5d5 100644
--- a/models/encoder.py
+++ b/models/encoder.py
@@ -41,7 +41,7 @@ def __init__(self, input_dims, output_dims, hidden_dims=64, depth=10, mask_mode=
     def forward(self, x, mask=None):  # x: B x T x input_dims
         nan_mask = ~x.isnan().any(axis=-1)
         x[~nan_mask] = 0
-        x = self.input_fc(x)  # B x T x Ch
+        x = self.input_fc(x)  # B x T x Ch NOTE: Ch = output_dims (I think...)
         
         # generate & apply mask
         if mask is None:
diff --git a/requirements.txt b/requirements.txt
index 9cc27049..0173de5d 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,3 +5,7 @@ numpy==1.19.2
 statsmodels==0.12.2
 pandas==1.0.1
 scikit_learn==0.24.2
+xlrd==1.2.0
+
+matplotlib==3.3.4
+scikit-learn~=0.24.2
\ No newline at end of file
diff --git a/tasks/__init__.py b/tasks/__init__.py
index 5e38a07c..3a09b083 100644
--- a/tasks/__init__.py
+++ b/tasks/__init__.py
@@ -1,3 +1,3 @@
 from .classification import eval_classification
-from .forecasting import eval_forecasting
+from .forecasting import eval_forecasting, eval_forecasting_customer_embed
 from .anomaly_detection import eval_anomaly_detection, eval_anomaly_detection_coldstart
diff --git a/tasks/_eval_protocols.py b/tasks/_eval_protocols.py
index e413bcf4..d0e9af8b 100644
--- a/tasks/_eval_protocols.py
+++ b/tasks/_eval_protocols.py
@@ -79,6 +79,18 @@ def fit_knn(features, y):
     return pipe
 
 def fit_ridge(train_features, train_y, valid_features, valid_y, MAX_SAMPLES=100000):
+    # Replace NaN and infinite values with 0
+    train_features = np.nan_to_num(train_features, nan=0.0, posinf=0.0, neginf=0.0)
+    train_y = np.nan_to_num(train_y, nan=0.0, posinf=0.0, neginf=0.0)
+    valid_features = np.nan_to_num(valid_features, nan=0.0, posinf=0.0, neginf=0.0)
+    valid_y = np.nan_to_num(valid_y, nan=0.0, posinf=0.0, neginf=0.0)
+
+    # Ensure values are within the range of float64
+    train_features = np.clip(train_features, -1e308, 1e308)
+    train_y = np.clip(train_y, -1e308, 1e308)
+    valid_features = np.clip(valid_features, -1e308, 1e308)
+    valid_y = np.clip(valid_y, -1e308, 1e308)
+
     # If the training set is too large, subsample MAX_SAMPLES examples
     if train_features.shape[0] > MAX_SAMPLES:
         split = train_test_split(
diff --git a/tasks/forecasting.py b/tasks/forecasting.py
index b3493d83..c5b0276c 100644
--- a/tasks/forecasting.py
+++ b/tasks/forecasting.py
@@ -9,17 +9,17 @@ def generate_pred_samples(features, data, pred_len, drop=0):
     features = features[:, drop:]
     labels = labels[:, drop:]
     return features.reshape(-1, features.shape[-1]), \
-            labels.reshape(-1, labels.shape[2]*labels.shape[3])
+        labels.reshape(-1, labels.shape[2]*labels.shape[3])
 
 def cal_metrics(pred, target):
     return {
         'MSE': ((pred - target) ** 2).mean(),
         'MAE': np.abs(pred - target).mean()
     }
-    
-def eval_forecasting(model, data, train_slice, valid_slice, test_slice, scaler, pred_lens, n_covariate_cols):
+
+def eval_forecasting(model, data, train_slice, valid_slice, test_slice, scaler, pred_lens, n_covariate_cols, dataset_name = None):
     padding = 200
-    
+
     t = time.time()
     all_repr = model.encode(
         data,
@@ -29,15 +29,15 @@ def eval_forecasting(model, data, train_slice, valid_slice, test_slice, scaler,
         batch_size=256
     )
     ts2vec_infer_time = time.time() - t
-    
+
     train_repr = all_repr[:, train_slice]
     valid_repr = all_repr[:, valid_slice]
     test_repr = all_repr[:, test_slice]
-    
+
     train_data = data[:, train_slice, n_covariate_cols:]
     valid_data = data[:, valid_slice, n_covariate_cols:]
     test_data = data[:, test_slice, n_covariate_cols:]
-    
+
     ours_result = {}
     lr_train_time = {}
     lr_infer_time = {}
@@ -46,26 +46,48 @@ def eval_forecasting(model, data, train_slice, valid_slice, test_slice, scaler,
         train_features, train_labels = generate_pred_samples(train_repr, train_data, pred_len, drop=padding)
         valid_features, valid_labels = generate_pred_samples(valid_repr, valid_data, pred_len)
         test_features, test_labels = generate_pred_samples(test_repr, test_data, pred_len)
-        
+
         t = time.time()
         lr = eval_protocols.fit_ridge(train_features, train_labels, valid_features, valid_labels)
         lr_train_time[pred_len] = time.time() - t
-        
+
         t = time.time()
         test_pred = lr.predict(test_features)
         lr_infer_time[pred_len] = time.time() - t
 
-        ori_shape = test_data.shape[0], -1, pred_len, test_data.shape[2]
-        test_pred = test_pred.reshape(ori_shape)
-        test_labels = test_labels.reshape(ori_shape)
-        
+        if dataset_name == 'ts2vec_online_retail_II_data':
+            test_pred = test_pred.reshape(-1, 2)
+            test_labels = test_labels.reshape(-1, 2)
+        elif dataset_name == 'restructured_ts2vec_online_retail':
+            test_pred = test_pred.reshape(-1, 4)
+            test_labels = test_labels.reshape(-1, 4)
+        else:
+            ori_shape = test_data.shape[0], -1, pred_len, test_data.shape[2]
+            test_pred = test_pred.reshape(ori_shape)
+            test_labels = test_labels.reshape(ori_shape)
+
         if test_data.shape[0] > 1:
             test_pred_inv = scaler.inverse_transform(test_pred.swapaxes(0, 3)).swapaxes(0, 3)
             test_labels_inv = scaler.inverse_transform(test_labels.swapaxes(0, 3)).swapaxes(0, 3)
+        elif dataset_name in ['ts2vec_online_retail_II_data','restructured_ts2vec_online_retail']:
+            if dataset_name == 'ts2vec_online_retail_II_data':
+                test_pred_inv = scaler.inverse_transform(test_pred.reshape(-1, 2)).reshape(test_pred.shape)
+                test_labels_inv = scaler.inverse_transform(test_labels.reshape(-1, 2)).reshape(test_labels.shape)
+            else: # restructured_ts2vec_online_retail
+                test_pred_inv = scaler.inverse_transform(test_pred.reshape(-1, 4)).reshape(test_pred.shape)
+                test_labels_inv = scaler.inverse_transform(test_labels.reshape(-1, 4)).reshape(test_labels.shape)
+
+            # Remove NaN values from all datasets
+            valid_indices = ~np.isnan(test_pred).any(axis=1) & ~np.isnan(test_pred_inv).any(axis=1) & ~np.isnan(test_labels).any(axis=1) & ~np.isnan(test_labels_inv).any(axis=1)
+            test_pred = test_pred[valid_indices]
+            test_pred_inv = test_pred_inv[valid_indices]
+            test_labels = test_labels[valid_indices]
+            test_labels_inv = test_labels_inv[valid_indices]
+
         else:
             test_pred_inv = scaler.inverse_transform(test_pred)
             test_labels_inv = scaler.inverse_transform(test_labels)
-            
+
         out_log[pred_len] = {
             'norm': test_pred,
             'raw': test_pred_inv,
@@ -76,11 +98,80 @@ def eval_forecasting(model, data, train_slice, valid_slice, test_slice, scaler,
             'norm': cal_metrics(test_pred, test_labels),
             'raw': cal_metrics(test_pred_inv, test_labels_inv)
         }
-        
-    eval_res = {
-        'ours': ours_result,
-        'ts2vec_infer_time': ts2vec_infer_time,
-        'lr_train_time': lr_train_time,
-        'lr_infer_time': lr_infer_time
-    }
+
+        eval_res = {
+            'ours': ours_result,
+            'ts2vec_infer_time': ts2vec_infer_time,
+            'lr_train_time': lr_train_time,
+            'lr_infer_time': lr_infer_time
+        }
     return out_log, eval_res
+
+def eval_forecasting_customer_embed(model, data, train_slice, valid_slice, test_slice, scaler, pred_lens, n_covariate_cols):
+    padding = 200
+
+    t = time.time()
+    all_repr = model.encode(
+        data,
+        causal=True,
+        sliding_length=1,
+        sliding_padding=padding,
+        batch_size=256
+    )
+    ts2vec_infer_time = time.time() - t
+
+    # Extract representations (embeddings) for each time slice
+    train_repr = all_repr[:, train_slice]
+    valid_repr = all_repr[:, valid_slice]
+    test_repr = all_repr[:, test_slice]
+
+    # Extract data for each time slice
+    train_data = data[:, train_slice, n_covariate_cols:]
+    valid_data = data[:, valid_slice, n_covariate_cols:]
+    test_data = data[:, test_slice, n_covariate_cols:]
+
+    ours_result = {}
+    lr_train_time = {}
+    lr_infer_time = {}
+    out_log = {}
+    for pred_len in pred_lens:
+        train_features, train_labels = generate_pred_samples(train_repr, train_data, pred_len, drop=padding)
+        valid_features, valid_labels = generate_pred_samples(valid_repr, valid_data, pred_len)
+        test_features, test_labels = generate_pred_samples(test_repr, test_data, pred_len)
+
+        t = time.time()
+        lr = eval_protocols.fit_ridge(train_features, train_labels, valid_features, valid_labels)
+        lr_train_time[pred_len] = time.time() - t
+
+        t = time.time()
+        test_pred = lr.predict(test_features)
+        lr_infer_time[pred_len] = time.time() - t
+
+        test_pred = test_pred.reshape(-1, 2)
+        test_labels = test_labels.reshape(-1, 2)
+
+        test_pred_inv = scaler.inverse_transform(test_pred.reshape(-1, 2)).reshape(test_pred.shape)
+        test_labels_inv = scaler.inverse_transform(test_labels.reshape(-1, 2)).reshape(test_labels.shape)
+
+        # NOTE: You can now use train_repr, valid_repr, and test_repr for unsupervised tasks (e.g., clustering, visualization)
+
+        out_log[pred_len] = {
+            'norm': test_pred,
+            'raw': test_pred_inv,
+            'norm_gt': test_labels,
+            'raw_gt': test_labels_inv
+        }
+        ours_result[pred_len] = {
+            'norm': cal_metrics(test_pred, test_labels),
+            'raw': cal_metrics(test_pred_inv, test_labels_inv)
+        }
+
+        eval_res = {
+            'ours': ours_result,
+            'ts2vec_infer_time': ts2vec_infer_time,
+            'lr_train_time': lr_train_time,
+            'lr_infer_time': lr_infer_time
+        }
+
+    return out_log, eval_res
+
diff --git a/train.py b/train.py
index d0ade884..fb8346c2 100644
--- a/train.py
+++ b/train.py
@@ -1,18 +1,27 @@
+import config
+import device
+import pandas as pd
 import torch
+from matplotlib import pyplot as plt
+from scipy.ndimage import label
+from torch import nn
 import numpy as np
 import argparse
 import os
 import sys
 import time
 import datetime
+
+from sklearn.preprocessing import MinMaxScaler
+
 from ts2vec import TS2Vec
 import tasks
 import datautils
 from utils import init_dl_program, name_with_datetime, pkl_save, data_dropout
 
 def save_checkpoint_callback(
-    save_every=1,
-    unit='epoch'
+        save_every=1,
+        unit='epoch'
 ):
     assert unit in ('epoch', 'iter')
     def callback(model, loss):
@@ -32,62 +41,72 @@ def callback(model, loss):
     parser.add_argument('--repr-dims', type=int, default=320, help='The representation dimension (defaults to 320)')
     parser.add_argument('--max-train-length', type=int, default=3000, help='For sequence with a length greater than <max_train_length>, it would be cropped into some sequences, each of which has a length less than <max_train_length> (defaults to 3000)')
     parser.add_argument('--iters', type=int, default=None, help='The number of iterations')
-    parser.add_argument('--epochs', type=int, default=None, help='The number of epochs')
+    parser.add_argument('--epochs', type=int, default=8, help='The number of epochs')
     parser.add_argument('--save-every', type=int, default=None, help='Save the checkpoint every <save_every> iterations/epochs')
     parser.add_argument('--seed', type=int, default=None, help='The random seed')
     parser.add_argument('--max-threads', type=int, default=None, help='The maximum allowed number of threads used by this process')
     parser.add_argument('--eval', action="store_true", help='Whether to perform evaluation after training')
     parser.add_argument('--irregular', type=float, default=0, help='The ratio of missing observations (defaults to 0)')
     args = parser.parse_args()
-    
+
     print("Dataset:", args.dataset)
     print("Arguments:", str(args))
-    
+
     device = init_dl_program(args.gpu, seed=args.seed, max_threads=args.max_threads)
-    
+
+    # Define variables outside the conditional block for online retail dataset
+    customer_embedding_layer = None
+    customer_id_to_index = None
+
+
     print('Loading data... ', end='')
     if args.loader == 'UCR':
         task_type = 'classification'
         train_data, train_labels, test_data, test_labels = datautils.load_UCR(args.dataset)
-        
+
     elif args.loader == 'UEA':
         task_type = 'classification'
         train_data, train_labels, test_data, test_labels = datautils.load_UEA(args.dataset)
-        
+
     elif args.loader == 'forecast_csv':
         task_type = 'forecasting'
         data, train_slice, valid_slice, test_slice, scaler, pred_lens, n_covariate_cols = datautils.load_forecast_csv(args.dataset)
         train_data = data[:, train_slice]
-        
+
     elif args.loader == 'forecast_csv_univar':
         task_type = 'forecasting'
         data, train_slice, valid_slice, test_slice, scaler, pred_lens, n_covariate_cols = datautils.load_forecast_csv(args.dataset, univar=True)
         train_data = data[:, train_slice]
-        
+
     elif args.loader == 'forecast_npy':
         task_type = 'forecasting'
         data, train_slice, valid_slice, test_slice, scaler, pred_lens, n_covariate_cols = datautils.load_forecast_npy(args.dataset)
         train_data = data[:, train_slice]
-        
+
     elif args.loader == 'forecast_npy_univar':
         task_type = 'forecasting'
         data, train_slice, valid_slice, test_slice, scaler, pred_lens, n_covariate_cols = datautils.load_forecast_npy(args.dataset, univar=True)
         train_data = data[:, train_slice]
-        
+
     elif args.loader == 'anomaly':
         task_type = 'anomaly_detection'
         all_train_data, all_train_labels, all_train_timestamps, all_test_data, all_test_labels, all_test_timestamps, delay = datautils.load_anomaly(args.dataset)
         train_data = datautils.gen_ano_train_data(all_train_data)
-        
+
     elif args.loader == 'anomaly_coldstart':
         task_type = 'anomaly_detection_coldstart'
         all_train_data, all_train_labels, all_train_timestamps, all_test_data, all_test_labels, all_test_timestamps, delay = datautils.load_anomaly(args.dataset)
         train_data, _, _, _ = datautils.load_UCR('FordA')
-        
+
+    elif args.loader == 'retail':
+        task_type = 'forecasting'
+        # Load the data
+        data, train_slice, valid_slice, test_slice, scaler, pred_lens, n_covariate_cols = datautils.load_online_retail(args.dataset)
+        train_data = data[:, train_slice]
+
     else:
         raise ValueError(f"Unknown loader {args.loader}.")
-        
-        
+
     if args.irregular > 0:
         if task_type == 'classification':
             train_data = data_dropout(train_data, args.irregular)
@@ -95,52 +114,96 @@ def callback(model, loss):
         else:
             raise ValueError(f"Task type {task_type} is not supported when irregular>0.")
     print('done')
-    
+
     config = dict(
         batch_size=args.batch_size,
         lr=args.lr,
         output_dims=args.repr_dims,
         max_train_length=args.max_train_length
     )
-    
+
     if args.save_every is not None:
         unit = 'epoch' if args.epochs is not None else 'iter'
         config[f'after_{unit}_callback'] = save_checkpoint_callback(args.save_every, unit)
 
     run_dir = 'training/' + args.dataset + '__' + name_with_datetime(args.run_name)
     os.makedirs(run_dir, exist_ok=True)
-    
+
     t = time.time()
-    
+
+    # Define the TS2Vec model
+
+
     model = TS2Vec(
         input_dims=train_data.shape[-1],
         device=device,
         **config
     )
+    # Training code
     loss_log = model.fit(
         train_data,
         n_epochs=args.epochs,
         n_iters=args.iters,
         verbose=True
     )
+
     model.save(f'{run_dir}/model.pkl')
 
+    # Plot the training loss log
+    plt.plot(loss_log, label='Training Loss')
+    plt.xlabel('Epoch')
+    plt.ylabel('Loss')
+    plt.title('Training Loss Over Epochs')
+    plt.grid(True)
+    plt.savefig(f'{run_dir}/loss_plot.png')
+    # Draw a straight line to show the general trend
+    z = np.polyfit(range(len(loss_log)), loss_log, 1)
+    p = np.poly1d(z)
+    plt.plot(range(len(loss_log)), p(range(len(loss_log))), "r--", label='Training Loss Trend')
+    plt.legend()
+    # Save the plot
+    plt.savefig(f'{run_dir}/train_loss_plot.png')
+    plt.show()
+
     t = time.time() - t
     print(f"\nTraining time: {datetime.timedelta(seconds=t)}\n")
 
     if args.eval:
+        out = None # Initialise 'out' as None to avoid the NameError in the case of unsupervised tasks
+
         if task_type == 'classification':
             out, eval_res = tasks.eval_classification(model, train_data, train_labels, test_data, test_labels, eval_protocol='svm')
         elif task_type == 'forecasting':
-            out, eval_res = tasks.eval_forecasting(model, data, train_slice, valid_slice, test_slice, scaler, pred_lens, n_covariate_cols)
+            # add case for unsupervised evaluation on Online Retail dataset
+            if args.dataset in 'ts2vec_online_retail_II_data' or 'restructured_ts2vec_online_retail':
+                # print data shapes and check for NaN or infinite values
+                print("Data shape:", data.shape)
+                print("Train slice:", train_slice)
+                print("Valid slice:", valid_slice)
+                print("Test slice:", test_slice)
+                print("Scaler:", scaler)
+                print("Prediction lengths:", pred_lens)
+                print("Number of covariate columns:", n_covariate_cols)
+                if args.loader == 'retail':
+                    out, eval_res = tasks.eval_forecasting_customer_embed(model, data, train_slice, valid_slice, test_slice, scaler, pred_lens, n_covariate_cols)
+                else:
+                    out, eval_res = tasks.eval_forecasting(model, data, train_slice, valid_slice, test_slice, scaler, pred_lens, n_covariate_cols, args.dataset)
+
+            else:
+                out, eval_res = tasks.eval_forecasting(model, data, train_slice, valid_slice, test_slice, scaler, pred_lens, n_covariate_cols)
         elif task_type == 'anomaly_detection':
             out, eval_res = tasks.eval_anomaly_detection(model, all_train_data, all_train_labels, all_train_timestamps, all_test_data, all_test_labels, all_test_timestamps, delay)
         elif task_type == 'anomaly_detection_coldstart':
             out, eval_res = tasks.eval_anomaly_detection_coldstart(model, all_train_data, all_train_labels, all_train_timestamps, all_test_data, all_test_labels, all_test_timestamps, delay)
         else:
             assert False
-        pkl_save(f'{run_dir}/out.pkl', out)
+
+        # Save only eval_res when 'out' is None (i.e., for unsupervised tasks)
+        if out is not None:
+            pkl_save(f'{run_dir}/out.pkl', out)
+
         pkl_save(f'{run_dir}/eval_res.pkl', eval_res)
         print('Evaluation result:', eval_res)
 
-    print("Finished.")
+        print("Finished.")
+
diff --git a/ts2vec.py b/ts2vec.py
index de909bd5..76dc227d 100644
--- a/ts2vec.py
+++ b/ts2vec.py
@@ -5,7 +5,6 @@
 from models import TSEncoder
 from models.losses import hierarchical_contrastive_loss
 from utils import take_per_row, split_with_nan, centerize_vary_length_series, torch_pad_nan
-import math
 
 class TS2Vec:
     '''The TS2Vec model'''