From d03fd631b112893515a7cf07fd6a0b42a23947ee Mon Sep 17 00:00:00 2001
From: Tianshun Gao <tgao13@illinois.edu>
Date: Sat, 15 Feb 2025 04:33:14 -0800
Subject: [PATCH 1/2] interactive analyzer for csv files

---
 GEMstack/utils/analysis.py | 126 +++++++++++++++++++++++++++++++++++++
 1 file changed, 126 insertions(+)
 create mode 100644 GEMstack/utils/analysis.py

diff --git a/GEMstack/utils/analysis.py b/GEMstack/utils/analysis.py
new file mode 100644
index 000000000..cc550a556
--- /dev/null
+++ b/GEMstack/utils/analysis.py
@@ -0,0 +1,126 @@
+import os
+import pandas as pd
+import numpy as np
+
+def list_log_directories():
+    logs_path = "./logs"
+    if not os.path.exists(logs_path):
+        print("Logs directory does not exist.")
+        return []
+    
+    directories = [d for d in os.listdir(logs_path) if os.path.isdir(os.path.join(logs_path, d))]
+    return directories
+
+def choose_directory(directories):
+    if not directories:
+        print("No log directories found.")
+        return None
+    
+    print("Available log directories:")
+    for i, dir_name in enumerate(directories):
+        print(f"{i + 1}. {dir_name}")
+    
+    choice = int(input("Select a directory by number: ")) - 1
+    if 0 <= choice < len(directories):
+        return os.path.join("./logs", directories[choice])
+    else:
+        print("Invalid choice.")
+        return None
+
+def choose_csv_file(log_dir):
+    files = [f for f in os.listdir(log_dir) if f.endswith(".csv")]
+    if not files:
+        print("No CSV files found in the selected directory.")
+        return None
+    
+    print("Available CSV files:")
+    for i, file in enumerate(files):
+        print(f"{i + 1}. {file}")
+    
+    choice = int(input("Select a CSV file by number: ")) - 1
+    if 0 <= choice < len(files):
+        return os.path.join(log_dir, files[choice])
+    else:
+        print("Invalid choice.")
+        return None
+
+def load_csv(csv_path):
+    if not os.path.exists(csv_path):
+        print("CSV file not found.")
+        return None
+    
+    df = pd.read_csv(csv_path)
+    print("CSV file loaded successfully.")
+    print("Available columns:", list(df.columns))
+    return df
+
+def choose_columns(df):
+    selected_columns = input("Enter column names to analyze (comma separated): ").split(',')
+    selected_columns = [col.strip() for col in selected_columns if col.strip() in df.columns]
+    if not selected_columns:
+        print("No valid columns selected.")
+        return None
+    return df[selected_columns]
+
+def analyze_data(df, log_dir):
+    methods = {
+        "1": ("Mean Squared Error (MSE)", lambda x: np.mean(np.square(x))),
+        "2": ("Root Mean Square (RMS)", lambda x: np.sqrt(np.mean(np.square(x)))),
+        "3": ("Mean", np.mean),
+        "4": ("Standard Deviation", np.std),
+        "5": ("Custom Lambda Function", None)
+    }
+    
+    print("Available analysis methods:")
+    for key, (name, _) in methods.items():
+        print(f"{key}. {name}")
+    
+    choice = input("Select a method by number: ")
+    if choice in methods:
+        if choice == "5":
+            function_str = input("Enter a lambda function (e.g., lambda x: np.max(x) - np.min(x)): ").strip()
+            try:
+                func = eval(function_str, {"np": np})
+                result = df.apply(func)
+            except Exception as e:
+                print(f"Invalid function: {e}")
+                return
+        else:
+            method_name, method_func = methods[choice]
+            print(f"Applying {method_name}...")
+            result = df.apply(method_func)
+        
+        print("Analysis result:")
+        print(result.to_string(header=True, index=True))
+        
+        save_path = input("Enter a path to save the result (press Enter to save in the chosen log directory): ")
+        if not save_path:
+            save_path = os.path.join(log_dir, "analysis_result.csv")
+        
+        result.to_frame().T.to_csv(save_path, index=False, header=True)
+        print(f"Analysis result saved to {save_path}")
+    else:
+        print("Invalid choice.")
+
+def main():
+    directories = list_log_directories()
+    log_dir = choose_directory(directories)
+    if not log_dir:
+        return
+    
+    csv_path = choose_csv_file(log_dir)
+    if not csv_path:
+        return
+    
+    df = load_csv(csv_path)
+    if df is None:
+        return
+    
+    df_selected = choose_columns(df)
+    if df_selected is None:
+        return
+    
+    analyze_data(df_selected, log_dir)
+
+if __name__ == "__main__":
+    main()

From 822493a25d819c0991d9a4795a65c0ca30b991c2 Mon Sep 17 00:00:00 2001
From: Tianshun Gao <tgao13@illinois.edu>
Date: Sat, 15 Feb 2025 21:02:28 -0800
Subject: [PATCH 2/2] add documentation and append timestamps to saved file
 names

---
 GEMstack/utils/analysis.py | 29 ++++++++++++++++++++++++++++-
 1 file changed, 28 insertions(+), 1 deletion(-)

diff --git a/GEMstack/utils/analysis.py b/GEMstack/utils/analysis.py
index cc550a556..0a354a6ca 100644
--- a/GEMstack/utils/analysis.py
+++ b/GEMstack/utils/analysis.py
@@ -1,6 +1,32 @@
+"""
+Log Analysis Tool
+
+This program allows users to analyze CSV files stored in log directories. Users can:
+- Select a log directory from the `./logs` folder.
+- Choose a CSV file within the selected directory.
+- Select specific columns to analyze.
+- Choose an analysis method such as MSE, RMS, Mean, or Standard Deviation.
+- Define a custom lambda function for analysis.
+- Save the results in the default log directory or specify a custom path.
+
+Usage:
+1. Run the script: `python ./GEMstack/utils/analysis.py`
+2. Follow the prompts to:
+   - Select a log directory.
+   - Choose a CSV file to analyze.
+   - Select the columns for analysis.
+   - Pick a predefined analysis method or define a custom lambda function.
+   - Save the results.
+
+Output:
+- The analysis results are saved in CSV format, with column names included, in the chosen directory.
+
+"""
+
 import os
 import pandas as pd
 import numpy as np
+from datetime import datetime
 
 def list_log_directories():
     logs_path = "./logs"
@@ -95,7 +121,8 @@ def analyze_data(df, log_dir):
         
         save_path = input("Enter a path to save the result (press Enter to save in the chosen log directory): ")
         if not save_path:
-            save_path = os.path.join(log_dir, "analysis_result.csv")
+            timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
+            save_path = os.path.join(log_dir, f"analysis_result_{timestamp}.csv")
         
         result.to_frame().T.to_csv(save_path, index=False, header=True)
         print(f"Analysis result saved to {save_path}")