From d03fd631b112893515a7cf07fd6a0b42a23947ee Mon Sep 17 00:00:00 2001 From: Tianshun Gao Date: Sat, 15 Feb 2025 04:33:14 -0800 Subject: [PATCH 1/2] interactive analyzer for csv files --- GEMstack/utils/analysis.py | 126 +++++++++++++++++++++++++++++++++++++ 1 file changed, 126 insertions(+) create mode 100644 GEMstack/utils/analysis.py diff --git a/GEMstack/utils/analysis.py b/GEMstack/utils/analysis.py new file mode 100644 index 000000000..cc550a556 --- /dev/null +++ b/GEMstack/utils/analysis.py @@ -0,0 +1,126 @@ +import os +import pandas as pd +import numpy as np + +def list_log_directories(): + logs_path = "./logs" + if not os.path.exists(logs_path): + print("Logs directory does not exist.") + return [] + + directories = [d for d in os.listdir(logs_path) if os.path.isdir(os.path.join(logs_path, d))] + return directories + +def choose_directory(directories): + if not directories: + print("No log directories found.") + return None + + print("Available log directories:") + for i, dir_name in enumerate(directories): + print(f"{i + 1}. {dir_name}") + + choice = int(input("Select a directory by number: ")) - 1 + if 0 <= choice < len(directories): + return os.path.join("./logs", directories[choice]) + else: + print("Invalid choice.") + return None + +def choose_csv_file(log_dir): + files = [f for f in os.listdir(log_dir) if f.endswith(".csv")] + if not files: + print("No CSV files found in the selected directory.") + return None + + print("Available CSV files:") + for i, file in enumerate(files): + print(f"{i + 1}. {file}") + + choice = int(input("Select a CSV file by number: ")) - 1 + if 0 <= choice < len(files): + return os.path.join(log_dir, files[choice]) + else: + print("Invalid choice.") + return None + +def load_csv(csv_path): + if not os.path.exists(csv_path): + print("CSV file not found.") + return None + + df = pd.read_csv(csv_path) + print("CSV file loaded successfully.") + print("Available columns:", list(df.columns)) + return df + +def choose_columns(df): + selected_columns = input("Enter column names to analyze (comma separated): ").split(',') + selected_columns = [col.strip() for col in selected_columns if col.strip() in df.columns] + if not selected_columns: + print("No valid columns selected.") + return None + return df[selected_columns] + +def analyze_data(df, log_dir): + methods = { + "1": ("Mean Squared Error (MSE)", lambda x: np.mean(np.square(x))), + "2": ("Root Mean Square (RMS)", lambda x: np.sqrt(np.mean(np.square(x)))), + "3": ("Mean", np.mean), + "4": ("Standard Deviation", np.std), + "5": ("Custom Lambda Function", None) + } + + print("Available analysis methods:") + for key, (name, _) in methods.items(): + print(f"{key}. {name}") + + choice = input("Select a method by number: ") + if choice in methods: + if choice == "5": + function_str = input("Enter a lambda function (e.g., lambda x: np.max(x) - np.min(x)): ").strip() + try: + func = eval(function_str, {"np": np}) + result = df.apply(func) + except Exception as e: + print(f"Invalid function: {e}") + return + else: + method_name, method_func = methods[choice] + print(f"Applying {method_name}...") + result = df.apply(method_func) + + print("Analysis result:") + print(result.to_string(header=True, index=True)) + + save_path = input("Enter a path to save the result (press Enter to save in the chosen log directory): ") + if not save_path: + save_path = os.path.join(log_dir, "analysis_result.csv") + + result.to_frame().T.to_csv(save_path, index=False, header=True) + print(f"Analysis result saved to {save_path}") + else: + print("Invalid choice.") + +def main(): + directories = list_log_directories() + log_dir = choose_directory(directories) + if not log_dir: + return + + csv_path = choose_csv_file(log_dir) + if not csv_path: + return + + df = load_csv(csv_path) + if df is None: + return + + df_selected = choose_columns(df) + if df_selected is None: + return + + analyze_data(df_selected, log_dir) + +if __name__ == "__main__": + main() From 822493a25d819c0991d9a4795a65c0ca30b991c2 Mon Sep 17 00:00:00 2001 From: Tianshun Gao Date: Sat, 15 Feb 2025 21:02:28 -0800 Subject: [PATCH 2/2] add documentation and append timestamps to saved file names --- GEMstack/utils/analysis.py | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/GEMstack/utils/analysis.py b/GEMstack/utils/analysis.py index cc550a556..0a354a6ca 100644 --- a/GEMstack/utils/analysis.py +++ b/GEMstack/utils/analysis.py @@ -1,6 +1,32 @@ +""" +Log Analysis Tool + +This program allows users to analyze CSV files stored in log directories. Users can: +- Select a log directory from the `./logs` folder. +- Choose a CSV file within the selected directory. +- Select specific columns to analyze. +- Choose an analysis method such as MSE, RMS, Mean, or Standard Deviation. +- Define a custom lambda function for analysis. +- Save the results in the default log directory or specify a custom path. + +Usage: +1. Run the script: `python ./GEMstack/utils/analysis.py` +2. Follow the prompts to: + - Select a log directory. + - Choose a CSV file to analyze. + - Select the columns for analysis. + - Pick a predefined analysis method or define a custom lambda function. + - Save the results. + +Output: +- The analysis results are saved in CSV format, with column names included, in the chosen directory. + +""" + import os import pandas as pd import numpy as np +from datetime import datetime def list_log_directories(): logs_path = "./logs" @@ -95,7 +121,8 @@ def analyze_data(df, log_dir): save_path = input("Enter a path to save the result (press Enter to save in the chosen log directory): ") if not save_path: - save_path = os.path.join(log_dir, "analysis_result.csv") + timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + save_path = os.path.join(log_dir, f"analysis_result_{timestamp}.csv") result.to_frame().T.to_csv(save_path, index=False, header=True) print(f"Analysis result saved to {save_path}")