From 3ab658d05c13e6bf44eebf226ac0e8cb732b21f7 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 9 Jan 2026 17:38:28 +0000 Subject: [PATCH 1/9] Initial plan From ed78b3067ebd60503521e8f45204d0107b94aef9 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 9 Jan 2026 17:48:43 +0000 Subject: [PATCH 2/9] Add TDR hash verification functionality - Create tdr_hash_utils.jl with functions to compute and compare file hashes - Modify time_domain_reduced_files_exist to check if input files changed - Update cluster_inputs to save hash file after TDR - Use SHA256 for file hashing and YAML for storage Co-authored-by: gschivley <10373332+gschivley@users.noreply.github.com> --- src/GenX.jl | 1 + src/case_runners/case_runner.jl | 24 +- src/time_domain_reduction/tdr_hash_utils.jl | 289 ++++++++++++++++++ .../time_domain_reduction.jl | 25 ++ 4 files changed, 335 insertions(+), 4 deletions(-) create mode 100644 src/time_domain_reduction/tdr_hash_utils.jl diff --git a/src/GenX.jl b/src/GenX.jl index 0eba8586f5..0439fb093e 100644 --- a/src/GenX.jl +++ b/src/GenX.jl @@ -72,6 +72,7 @@ include_all_in_folder("load_inputs") include_all_in_folder("model") include_all_in_folder("write_outputs") +include("time_domain_reduction/tdr_hash_utils.jl") include("time_domain_reduction/time_domain_reduction.jl") include("time_domain_reduction/precluster.jl") include("time_domain_reduction/full_time_series_reconstruction.jl") diff --git a/src/case_runners/case_runner.jl b/src/case_runners/case_runner.jl index 504cd8c378..52a2bcdb48 100644 --- a/src/case_runners/case_runner.jl +++ b/src/case_runners/case_runner.jl @@ -41,11 +41,27 @@ function run_genx_case!(case::AbstractString, optimizer::Any = HiGHS.Optimizer) end end -function time_domain_reduced_files_exist(tdrpath) +function time_domain_reduced_files_exist(tdrpath, case_path=nothing, setup=nothing) tdr_demand = file_exists(tdrpath, ["Demand_data.csv", "Load_data.csv"]) tdr_genvar = isfile(joinpath(tdrpath, "Generators_variability.csv")) tdr_fuels = isfile(joinpath(tdrpath, "Fuels_data.csv")) - return (tdr_demand && tdr_genvar && tdr_fuels) + files_exist = (tdr_demand && tdr_genvar && tdr_fuels) + + # If files don't exist, return false + if !files_exist + return false + end + + # If case_path and setup are provided, also check if input files have changed + if !isnothing(case_path) && !isnothing(setup) + inputs_changed = tdr_inputs_have_changed(case_path, tdrpath, setup) + if inputs_changed + println("TDR input files or settings have changed since last clustering.") + return false + end + end + + return true end function run_genx_case_simple!(case::AbstractString, mysetup::Dict, optimizer::Any) @@ -56,7 +72,7 @@ function run_genx_case_simple!(case::AbstractString, mysetup::Dict, optimizer::A TDRpath = joinpath(case, mysetup["TimeDomainReductionFolder"]) system_path = joinpath(case, mysetup["SystemFolder"]) prevent_doubled_timedomainreduction(system_path) - if !time_domain_reduced_files_exist(TDRpath) + if !time_domain_reduced_files_exist(TDRpath, case, mysetup) println("Clustering Time Series Data (Grouped)...") cluster_inputs(case, settings_path, mysetup) else @@ -121,7 +137,7 @@ function run_genx_case_multistage!(case::AbstractString, mysetup::Dict, optimize TDRpath = joinpath(first_stage_path, mysetup["TimeDomainReductionFolder"]) system_path = joinpath(first_stage_path, mysetup["SystemFolder"]) prevent_doubled_timedomainreduction(system_path) - if !time_domain_reduced_files_exist(TDRpath) + if !time_domain_reduced_files_exist(TDRpath, case, mysetup) if (mysetup["MultiStage"] == 1) && (TDRSettingsDict["MultiStageConcatenate"] == 0) println("Clustering Time Series Data (Individually)...") diff --git a/src/time_domain_reduction/tdr_hash_utils.jl b/src/time_domain_reduction/tdr_hash_utils.jl new file mode 100644 index 0000000000..6921340e3b --- /dev/null +++ b/src/time_domain_reduction/tdr_hash_utils.jl @@ -0,0 +1,289 @@ +""" + tdr_hash_utils.jl + +Utilities for computing and verifying hashes of time series input files +used in Time Domain Reduction (TDR) to detect when input data has changed. +""" + +using SHA + +@doc raw""" + compute_file_hash(filepath::AbstractString) + +Compute SHA256 hash of a file. + +# Arguments +- `filepath::AbstractString`: Path to the file to hash + +# Returns +- String: Hexadecimal representation of the SHA256 hash + +# Example +```julia +hash = compute_file_hash("path/to/file.csv") +``` +""" +function compute_file_hash(filepath::AbstractString) + if !isfile(filepath) + return nothing + end + open(filepath, "r") do file + return bytes2hex(sha256(file)) + end +end + +@doc raw""" + get_tdr_input_files(case_path::AbstractString, setup::Dict) + +Get list of time series input files that should be hashed for TDR validation. + +# Arguments +- `case_path::AbstractString`: Path to the case directory +- `setup::Dict`: GenX settings dictionary + +# Returns +- Dict{String, String}: Dictionary mapping file keys to their full paths + +# Example +```julia +files = get_tdr_input_files("/path/to/case", setup) +``` +""" +function get_tdr_input_files(case_path::AbstractString, setup::Dict) + system_path = joinpath(case_path, setup["SystemFolder"]) + settings_path = joinpath(case_path, "settings") + + files = Dict{String, String}() + + # Add demand data file (either Demand_data.csv or Load_data.csv) + demand_file = joinpath(system_path, "Demand_data.csv") + if isfile(demand_file) + files["Demand_data"] = demand_file + else + load_file = joinpath(system_path, "Load_data.csv") + if isfile(load_file) + files["Load_data"] = load_file + end + end + + # Add generator variability file + genvar_file = joinpath(system_path, "Generators_variability.csv") + if isfile(genvar_file) + files["Generators_variability"] = genvar_file + end + + # Add fuels data file + fuels_file = joinpath(system_path, "Fuels_data.csv") + if isfile(fuels_file) + files["Fuels_data"] = fuels_file + end + + # Add TDR settings file + tdr_settings_file = joinpath(settings_path, "time_domain_reduction_settings.yml") + if isfile(tdr_settings_file) + files["TDR_settings"] = tdr_settings_file + end + + return files +end + +@doc raw""" + get_tdr_input_files_multistage(case_path::AbstractString, setup::Dict, stage_id::Int) + +Get list of time series input files for a specific multi-stage planning stage. + +# Arguments +- `case_path::AbstractString`: Path to the case directory +- `setup::Dict`: GenX settings dictionary +- `stage_id::Int`: Stage identifier for multi-stage problems + +# Returns +- Dict{String, String}: Dictionary mapping file keys to their full paths +""" +function get_tdr_input_files_multistage(case_path::AbstractString, + setup::Dict, + stage_id::Int) + input_stage_path = joinpath(case_path, "inputs", "inputs_p$(stage_id)") + system_path = joinpath(input_stage_path, setup["SystemFolder"]) + settings_path = joinpath(case_path, "settings") + + files = Dict{String, String}() + + # Add demand data file + demand_file = joinpath(system_path, "Demand_data.csv") + if isfile(demand_file) + files["Demand_data_p$(stage_id)"] = demand_file + else + load_file = joinpath(system_path, "Load_data.csv") + if isfile(load_file) + files["Load_data_p$(stage_id)"] = load_file + end + end + + # Add generator variability file + genvar_file = joinpath(system_path, "Generators_variability.csv") + if isfile(genvar_file) + files["Generators_variability_p$(stage_id)"] = genvar_file + end + + # Add fuels data file + fuels_file = joinpath(system_path, "Fuels_data.csv") + if isfile(fuels_file) + files["Fuels_data_p$(stage_id)"] = fuels_file + end + + # Add TDR settings file (shared across stages) + tdr_settings_file = joinpath(settings_path, "time_domain_reduction_settings.yml") + if isfile(tdr_settings_file) + files["TDR_settings"] = tdr_settings_file + end + + return files +end + +@doc raw""" + compute_tdr_input_hashes(case_path::AbstractString, setup::Dict) + +Compute hashes for all TDR input files. + +# Arguments +- `case_path::AbstractString`: Path to the case directory +- `setup::Dict`: GenX settings dictionary + +# Returns +- Dict{String, String}: Dictionary mapping file keys to their hash values + +# Example +```julia +hashes = compute_tdr_input_hashes("/path/to/case", setup) +``` +""" +function compute_tdr_input_hashes(case_path::AbstractString, setup::Dict) + files = get_tdr_input_files(case_path, setup) + hashes = Dict{String, String}() + + for (key, filepath) in files + hash = compute_file_hash(filepath) + if !isnothing(hash) + hashes[key] = hash + end + end + + return hashes +end + +@doc raw""" + save_tdr_hash_file(tdr_results_path::AbstractString, hashes::Dict) + +Save computed hashes to a YAML file in the TDR_Results folder. + +# Arguments +- `tdr_results_path::AbstractString`: Path to the TDR_Results directory +- `hashes::Dict`: Dictionary of file hashes to save + +# Example +```julia +save_tdr_hash_file("/path/to/TDR_Results", hashes) +``` +""" +function save_tdr_hash_file(tdr_results_path::AbstractString, hashes::Dict) + # Ensure the directory exists + if !isdir(tdr_results_path) + mkpath(tdr_results_path) + end + + hash_file_path = joinpath(tdr_results_path, "tdr_input_hashes.yml") + + # Add metadata + hash_data = Dict( + "created_at" => string(Dates.now()), + "file_hashes" => hashes + ) + + YAML.write_file(hash_file_path, hash_data) + + return hash_file_path +end + +@doc raw""" + load_tdr_hash_file(tdr_results_path::AbstractString) + +Load previously saved hashes from the TDR_Results folder. + +# Arguments +- `tdr_results_path::AbstractString`: Path to the TDR_Results directory + +# Returns +- Union{Dict, Nothing}: Dictionary of file hashes, or nothing if file doesn't exist + +# Example +```julia +stored_hashes = load_tdr_hash_file("/path/to/TDR_Results") +``` +""" +function load_tdr_hash_file(tdr_results_path::AbstractString) + hash_file_path = joinpath(tdr_results_path, "tdr_input_hashes.yml") + + if !isfile(hash_file_path) + return nothing + end + + try + hash_data = YAML.load_file(hash_file_path) + return hash_data["file_hashes"] + catch e + @warn "Failed to load TDR hash file: $e" + return nothing + end +end + +@doc raw""" + tdr_inputs_have_changed(case_path::AbstractString, tdr_results_path::AbstractString, setup::Dict) + +Check if TDR input files have changed since the last clustering. + +# Arguments +- `case_path::AbstractString`: Path to the case directory +- `tdr_results_path::AbstractString`: Path to the TDR_Results directory +- `setup::Dict`: GenX settings dictionary + +# Returns +- Bool: true if inputs have changed or hash file doesn't exist, false otherwise + +# Example +```julia +if tdr_inputs_have_changed(case, tdr_path, setup) + println("Input files have changed, need to re-run TDR") +end +``` +""" +function tdr_inputs_have_changed(case_path::AbstractString, + tdr_results_path::AbstractString, + setup::Dict) + # Load stored hashes + stored_hashes = load_tdr_hash_file(tdr_results_path) + + # If no hash file exists, assume files have changed + if isnothing(stored_hashes) + return true + end + + # Compute current hashes + current_hashes = compute_tdr_input_hashes(case_path, setup) + + # Check if any hash has changed + for (key, current_hash) in current_hashes + if !haskey(stored_hashes, key) || stored_hashes[key] != current_hash + return true + end + end + + # Check if any stored file is missing from current files + for key in keys(stored_hashes) + if !haskey(current_hashes, key) + return true + end + end + + return false +end diff --git a/src/time_domain_reduction/time_domain_reduction.jl b/src/time_domain_reduction/time_domain_reduction.jl index 36aa7ecb6d..3229dd3cd1 100644 --- a/src/time_domain_reduction/time_domain_reduction.jl +++ b/src/time_domain_reduction/time_domain_reduction.jl @@ -1364,6 +1364,15 @@ function cluster_inputs(inpath, end YAML.write_file(joinpath(inpath, "inputs", Stage_Outfiles[per]["YAML"]), myTDRsetup) + + ### Save hash file for input validation (once per stage) + if v + println("Writing hash file for input validation...") + end + stage_case_path = joinpath(inpath, "inputs", "inputs_p$per") + input_hashes = compute_tdr_input_hashes(stage_case_path, mysetup) + stage_tdr_path = joinpath(stage_case_path, TimeDomainReductionFolder) + save_tdr_hash_file(stage_tdr_path, input_hashes) end else @@ -1511,6 +1520,15 @@ function cluster_inputs(inpath, YAML.write_file( joinpath(inpath, "inputs", input_stage_directory, YAML_Outfile), myTDRsetup) + + ### Save hash file for input validation + if v + println("Writing hash file for input validation...") + end + stage_case_path = joinpath(inpath, "inputs", input_stage_directory) + input_hashes = compute_tdr_input_hashes(stage_case_path, mysetup) + stage_tdr_path = joinpath(stage_case_path, TimeDomainReductionFolder) + save_tdr_hash_file(stage_tdr_path, input_hashes) end else if v @@ -1623,6 +1641,13 @@ function cluster_inputs(inpath, println("Writing .yml settings...") end YAML.write_file(joinpath(inpath, YAML_Outfile), myTDRsetup) + + ### Save hash file for input validation + if v + println("Writing hash file for input validation...") + end + input_hashes = compute_tdr_input_hashes(inpath, mysetup) + save_tdr_hash_file(joinpath(inpath, TimeDomainReductionFolder), input_hashes) end return Dict("OutputDF" => FinalOutputData, From 292ca58499d25eefd5b7f3608f186ace215b0725 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 9 Jan 2026 17:50:16 +0000 Subject: [PATCH 3/9] Add comprehensive tests for TDR hash verification - Create test_tdr_hash_verification.jl with unit tests - Test hash computation, storage, loading, and change detection - Add test to main test suite Co-authored-by: gschivley <10373332+gschivley@users.noreply.github.com> --- src/GenX.jl | 1 + test/runtests.jl | 4 + test/test_tdr_hash_verification.jl | 146 +++++++++++++++++++++++++++++ 3 files changed, 151 insertions(+) create mode 100644 test/test_tdr_hash_verification.jl diff --git a/src/GenX.jl b/src/GenX.jl index 0439fb093e..511f9cd96a 100644 --- a/src/GenX.jl +++ b/src/GenX.jl @@ -38,6 +38,7 @@ using RecursiveArrayTools using Statistics using HiGHS using Logging +using SHA using PrecompileTools: @compile_workload diff --git a/test/runtests.jl b/test/runtests.jl index 57b4d68454..9324228128 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -24,6 +24,10 @@ end include("test_time_domain_reduction.jl") end + @testset "TDR Hash Verification" begin + include("test_tdr_hash_verification.jl") + end + @testset "Piecewise Fuel" begin include("test_piecewisefuel.jl") end diff --git a/test/test_tdr_hash_verification.jl b/test/test_tdr_hash_verification.jl new file mode 100644 index 0000000000..32c4898610 --- /dev/null +++ b/test/test_tdr_hash_verification.jl @@ -0,0 +1,146 @@ +module TestTDRHashVerification + +import GenX +import Test +import YAML + +include(joinpath(@__DIR__, "utilities.jl")) + +# suppress printing +console_out = stdout +redirect_stdout(devnull) + +# Test setup +test_folder = "TDR" +test_system_path = joinpath(test_folder, "system") +test_settings_path = test_folder + +# Create a temporary test case directory +temp_test_dir = mktempdir() +temp_system_dir = joinpath(temp_test_dir, "system") +temp_settings_dir = joinpath(temp_test_dir, "settings") +temp_tdr_results = joinpath(temp_test_dir, "TDR_Results") + +# Setup test environment +mkpath(temp_system_dir) +mkpath(temp_settings_dir) +mkpath(temp_tdr_results) + +# Copy test files +cp(joinpath(test_system_path, "Demand_data.csv"), joinpath(temp_system_dir, "Demand_data.csv")) +cp(joinpath(test_system_path, "Generators_variability.csv"), joinpath(temp_system_dir, "Generators_variability.csv")) +cp(joinpath(test_system_path, "Fuels_data.csv"), joinpath(temp_system_dir, "Fuels_data.csv")) +cp(joinpath(test_settings_path, "time_domain_reduction_settings.yml"), joinpath(temp_settings_dir, "time_domain_reduction_settings.yml")) + +# Create test setup dictionary +test_setup = Dict( + "SystemFolder" => "system", + "TimeDomainReductionFolder" => "TDR_Results" +) + +# restore printing +redirect_stdout(console_out) + +# Test 1: compute_file_hash function +Test.@testset "compute_file_hash" begin + demand_file = joinpath(temp_system_dir, "Demand_data.csv") + hash1 = GenX.compute_file_hash(demand_file) + + # Hash should be a 64-character hex string (SHA256) + Test.@test length(hash1) == 64 + Test.@test all(c -> c in "0123456789abcdef", hash1) + + # Computing hash twice should give same result + hash2 = GenX.compute_file_hash(demand_file) + Test.@test hash1 == hash2 + + # Non-existent file should return nothing + Test.@test isnothing(GenX.compute_file_hash("nonexistent_file.csv")) +end + +# Test 2: get_tdr_input_files function +Test.@testset "get_tdr_input_files" begin + files = GenX.get_tdr_input_files(temp_test_dir, test_setup) + + # Should find all required files + Test.@test haskey(files, "Demand_data") + Test.@test haskey(files, "Generators_variability") + Test.@test haskey(files, "Fuels_data") + Test.@test haskey(files, "TDR_settings") + + # File paths should exist + Test.@test isfile(files["Demand_data"]) + Test.@test isfile(files["Generators_variability"]) + Test.@test isfile(files["Fuels_data"]) + Test.@test isfile(files["TDR_settings"]) +end + +# Test 3: compute_tdr_input_hashes function +Test.@testset "compute_tdr_input_hashes" begin + hashes = GenX.compute_tdr_input_hashes(temp_test_dir, test_setup) + + # Should have hashes for all files + Test.@test haskey(hashes, "Demand_data") + Test.@test haskey(hashes, "Generators_variability") + Test.@test haskey(hashes, "Fuels_data") + Test.@test haskey(hashes, "TDR_settings") + + # All hashes should be 64-character hex strings + for (key, hash) in hashes + Test.@test length(hash) == 64 + Test.@test all(c -> c in "0123456789abcdef", hash) + end +end + +# Test 4: save_tdr_hash_file and load_tdr_hash_file functions +Test.@testset "save and load hash file" begin + # Compute and save hashes + hashes = GenX.compute_tdr_input_hashes(temp_test_dir, test_setup) + hash_file_path = GenX.save_tdr_hash_file(temp_tdr_results, hashes) + + # Hash file should exist + Test.@test isfile(hash_file_path) + Test.@test isfile(joinpath(temp_tdr_results, "tdr_input_hashes.yml")) + + # Load hashes back + loaded_hashes = GenX.load_tdr_hash_file(temp_tdr_results) + Test.@test !isnothing(loaded_hashes) + + # Loaded hashes should match saved hashes + for (key, value) in hashes + Test.@test haskey(loaded_hashes, key) + Test.@test loaded_hashes[key] == value + end + + # Test loading from non-existent directory + Test.@test isnothing(GenX.load_tdr_hash_file("/nonexistent/path")) +end + +# Test 5: tdr_inputs_have_changed function +Test.@testset "tdr_inputs_have_changed" begin + # First time: no hash file exists, should return true + Test.@test GenX.tdr_inputs_have_changed(temp_test_dir, temp_tdr_results, test_setup) == true + + # Save hashes + hashes = GenX.compute_tdr_input_hashes(temp_test_dir, test_setup) + GenX.save_tdr_hash_file(temp_tdr_results, hashes) + + # Now hash file exists and matches, should return false + Test.@test GenX.tdr_inputs_have_changed(temp_test_dir, temp_tdr_results, test_setup) == false + + # Modify a file + demand_file = joinpath(temp_system_dir, "Demand_data.csv") + open(demand_file, "a") do f + write(f, "\n# Modified for testing") + end + + # Should now detect change + Test.@test GenX.tdr_inputs_have_changed(temp_test_dir, temp_tdr_results, test_setup) == true +end + +# Clean up +rm(temp_test_dir, recursive=true) + +println("All TDR hash verification tests passed!") + +end # module TestTDRHashVerification From b1dee7e13755889d8e9fcc90e58d18c8bf512e60 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 9 Jan 2026 17:52:12 +0000 Subject: [PATCH 4/9] Add documentation and manual testing script for TDR hash verification - Update TDR_input.md with section on input file hash verification - Add note to Tutorial 3 about automatic validation - Create manual_test_tdr_hash.jl for manual testing demonstrations Co-authored-by: gschivley <10373332+gschivley@users.noreply.github.com> --- ...utorial_3_K-means_time_domain_reduction.md | 3 + docs/src/User_Guide/TDR_input.md | 20 +++ test/manual_test_tdr_hash.jl | 156 ++++++++++++++++++ 3 files changed, 179 insertions(+) create mode 100644 test/manual_test_tdr_hash.jl diff --git a/docs/src/Tutorials/Tutorial_3_K-means_time_domain_reduction.md b/docs/src/Tutorials/Tutorial_3_K-means_time_domain_reduction.md index b2f47d6440..416bc3f45b 100644 --- a/docs/src/Tutorials/Tutorial_3_K-means_time_domain_reduction.md +++ b/docs/src/Tutorials/Tutorial_3_K-means_time_domain_reduction.md @@ -5,6 +5,9 @@ A good tool to reduce computation time of GenX is to use [Time-domain reduction](@ref). Time-domain Reduction is a method that selects a smaller set of time steps from the data in a way that reduces computation time while still capturing the main information of the model. In this tutorial, we go over how TDR works in GenX and how it uses K-means clustering to choose the optimal time steps. For more information on TDR in capacity expansion models, see [Mallapragada et al](https://www.sciencedirect.com/science/article/pii/S0360544218315238). +!!! note "Automatic Input File Validation" + GenX automatically tracks changes to time-series input files and TDR settings. When time-domain reduction is performed, GenX saves SHA256 hashes of the input files. On subsequent runs, if any input file or TDR setting has changed, GenX will automatically re-run the time-domain reduction to ensure clustered data remains valid. This prevents accidentally using stale clustered data when inputs have been modified. For more details, see the [Time-domain reduction](@ref) documentation. + ### Table of Contents * [Time Domain Reduction](#TDR) * [K-Means Clustering](#Kmeans) diff --git a/docs/src/User_Guide/TDR_input.md b/docs/src/User_Guide/TDR_input.md index 0b36a9de1f..e9e290350c 100644 --- a/docs/src/User_Guide/TDR_input.md +++ b/docs/src/User_Guide/TDR_input.md @@ -24,3 +24,23 @@ It's also possible for GenX perform clustering separately from the optimization |DemandWeight| Default = 1, a multiplier on demand columns to optionally prioritize better fits for demand profiles over resource capacity factor or fuel price profiles.| |WeightTotal |Default = 8760, the sum to which the relative weights of representative periods will be scaled.| |ClusterFuelPrices| Either 1 or 0, whether or not to use the fuel price time series in `Fuels_data.csv` in the clustering process. If 'no', this function will still write `Fuels_data.csv` in the TimeDomainReductionFolder with reshaped fuel prices based on the number and size of the representative periods but will not use the fuel price time series for selection of representative periods.| + +## Input File Hash Verification + +GenX automatically tracks changes to time-series input files and TDR settings to ensure that clustered data remains valid. When time-domain reduction is performed, GenX computes SHA256 hashes of the following files and stores them in `tdr_input_hashes.yml` within the TDR results folder: + +- `Demand_data.csv` (or `Load_data.csv`) +- `Generators_variability.csv` +- `Fuels_data.csv` +- `time_domain_reduction_settings.yml` + +On subsequent runs, GenX checks these hashes against the current input files. If any file has changed, GenX will automatically re-run the time-domain reduction to ensure the clustered data reflects the updated inputs. This prevents users from accidentally using stale clustered data when input files or TDR settings have been modified. + +**Behavior:** +- If the TDR results folder doesn't exist, GenX performs time-domain reduction +- If the TDR results folder exists but the hash file is missing, GenX performs time-domain reduction +- If the TDR results folder and hash file exist, GenX compares stored hashes with current input files: + - If hashes match, GenX uses the existing clustered data + - If any hash differs, GenX prints a message and re-runs time-domain reduction + +This feature is automatic and requires no user configuration. To force a re-run of time-domain reduction, simply delete the TDR results folder as before. diff --git a/test/manual_test_tdr_hash.jl b/test/manual_test_tdr_hash.jl new file mode 100644 index 0000000000..29176b9b32 --- /dev/null +++ b/test/manual_test_tdr_hash.jl @@ -0,0 +1,156 @@ +#!/usr/bin/env julia +""" +Manual test script for TDR hash verification feature. + +This script demonstrates: +1. Running TDR for the first time (no hash file exists) +2. Running again with unchanged inputs (hash check passes, skips TDR) +3. Modifying an input file +4. Running again (hash check fails, re-runs TDR) +""" + +using Pkg +Pkg.activate(".") + +using GenX +using YAML + +println("=".repeat(70)) +println("TDR Hash Verification Feature - Manual Test") +println("=".repeat(70)) + +# Use example system 1 +case_path = joinpath(@__DIR__, "..", "example_systems", "1_three_zones") +settings_path = joinpath(case_path, "settings") +tdr_folder = "TDR_Results" +tdr_path = joinpath(case_path, tdr_folder) + +# Load settings +genx_settings_file = joinpath(settings_path, "genx_settings.yml") +output_settings_file = joinpath(settings_path, "output_settings.yml") +setup = GenX.configure_settings(genx_settings_file, output_settings_file) + +println("\n1. Testing initial state...") +println("-".repeat(70)) + +# Clean up any existing TDR results +if isdir(tdr_path) + println("Removing existing TDR results...") + rm(tdr_path, recursive=true) +end + +println("✓ TDR results directory cleared") + +# Check if hash file exists +hash_file = joinpath(tdr_path, "tdr_input_hashes.yml") +println("Hash file exists: ", isfile(hash_file)) + +# Check if TDR files exist +tdr_exists = GenX.time_domain_reduced_files_exist(tdr_path, case_path, setup) +println("TDR files exist (with hash check): ", tdr_exists) + +println("\n2. Running TDR for the first time...") +println("-".repeat(70)) + +# Run TDR clustering +println("Running cluster_inputs...") +result = GenX.cluster_inputs(case_path, settings_path, setup, random=false) +println("✓ TDR clustering completed") + +# Check if hash file was created +println("Hash file created: ", isfile(hash_file)) + +# Display hash file contents +if isfile(hash_file) + hash_data = YAML.load_file(hash_file) + println("\nHash file contents:") + println(" Created at: ", hash_data["created_at"]) + println(" Number of files hashed: ", length(hash_data["file_hashes"])) + for (key, value) in hash_data["file_hashes"] + println(" $key: $(value[1:16])...") + end +end + +println("\n3. Testing with unchanged inputs...") +println("-".repeat(70)) + +# Check if inputs have changed +inputs_changed = GenX.tdr_inputs_have_changed(case_path, tdr_path, setup) +println("Input files changed: ", inputs_changed) + +# Check if TDR files exist (should be true now) +tdr_exists = GenX.time_domain_reduced_files_exist(tdr_path, case_path, setup) +println("TDR files exist (with hash check): ", tdr_exists) + +if !inputs_changed + println("✓ Hash verification passed - TDR would be skipped") +else + println("✗ Hash verification failed unexpectedly") +end + +println("\n4. Testing with modified input...") +println("-".repeat(70)) + +# Backup original demand file +demand_file = joinpath(case_path, setup["SystemFolder"], "Demand_data.csv") +backup_file = demand_file * ".backup" +cp(demand_file, backup_file, force=true) +println("Created backup of Demand_data.csv") + +# Modify the file slightly (add a comment at the end) +open(demand_file, "a") do f + write(f, "\n# Test modification to trigger hash change\n") +end +println("Modified Demand_data.csv") + +# Check if inputs have changed +inputs_changed = GenX.tdr_inputs_have_changed(case_path, tdr_path, setup) +println("Input files changed: ", inputs_changed) + +# Check if TDR files exist (should be false due to hash mismatch) +tdr_exists = GenX.time_domain_reduced_files_exist(tdr_path, case_path, setup) +println("TDR files exist (with hash check): ", tdr_exists) + +if inputs_changed && !tdr_exists + println("✓ Hash verification correctly detected file change") +else + println("✗ Hash verification failed to detect change") +end + +# Restore original file +mv(backup_file, demand_file, force=true) +println("Restored original Demand_data.csv") + +println("\n5. Testing TDR settings change...") +println("-".repeat(70)) + +# Backup TDR settings +tdr_settings_file = joinpath(settings_path, "time_domain_reduction_settings.yml") +tdr_backup = tdr_settings_file * ".backup" +cp(tdr_settings_file, tdr_backup, force=true) +println("Created backup of time_domain_reduction_settings.yml") + +# Modify TDR settings +tdr_settings = YAML.load_file(tdr_settings_file) +original_minperiods = tdr_settings["MinPeriods"] +tdr_settings["MinPeriods"] = original_minperiods + 1 +YAML.write_file(tdr_settings_file, tdr_settings) +println("Modified TDR settings (changed MinPeriods)") + +# Check if inputs have changed +inputs_changed = GenX.tdr_inputs_have_changed(case_path, tdr_path, setup) +println("Input files changed: ", inputs_changed) + +if inputs_changed + println("✓ Hash verification correctly detected TDR settings change") +else + println("✗ Hash verification failed to detect TDR settings change") +end + +# Restore original settings +mv(tdr_backup, tdr_settings_file, force=true) +println("Restored original time_domain_reduction_settings.yml") + +println("\n" * "=".repeat(70)) +println("Manual test completed successfully!") +println("=".repeat(70)) From 0673facd5e10fc90efa5b0f1012e52537685124c Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 9 Jan 2026 17:53:12 +0000 Subject: [PATCH 5/9] Fix missing imports in tdr_hash_utils.jl Add YAML and Dates imports that are used but were not declared Co-authored-by: gschivley <10373332+gschivley@users.noreply.github.com> --- src/time_domain_reduction/tdr_hash_utils.jl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/time_domain_reduction/tdr_hash_utils.jl b/src/time_domain_reduction/tdr_hash_utils.jl index 6921340e3b..db40b4a4e7 100644 --- a/src/time_domain_reduction/tdr_hash_utils.jl +++ b/src/time_domain_reduction/tdr_hash_utils.jl @@ -6,6 +6,8 @@ used in Time Domain Reduction (TDR) to detect when input data has changed. """ using SHA +using YAML +using Dates @doc raw""" compute_file_hash(filepath::AbstractString) From 56ed2f3b137666f1bb1d42f1a71799f1a13aa4f6 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 9 Jan 2026 17:54:30 +0000 Subject: [PATCH 6/9] Add safety check for hash file structure Check for 'file_hashes' key existence before accessing to prevent KeyError Co-authored-by: gschivley <10373332+gschivley@users.noreply.github.com> --- src/time_domain_reduction/tdr_hash_utils.jl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/time_domain_reduction/tdr_hash_utils.jl b/src/time_domain_reduction/tdr_hash_utils.jl index db40b4a4e7..6284e811c5 100644 --- a/src/time_domain_reduction/tdr_hash_utils.jl +++ b/src/time_domain_reduction/tdr_hash_utils.jl @@ -232,6 +232,10 @@ function load_tdr_hash_file(tdr_results_path::AbstractString) try hash_data = YAML.load_file(hash_file_path) + if !haskey(hash_data, "file_hashes") + @warn "TDR hash file missing 'file_hashes' key" + return nothing + end return hash_data["file_hashes"] catch e @warn "Failed to load TDR hash file: $e" From 1104d1b3f075290879a58db55c70d604f883aeb4 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 9 Jan 2026 17:56:38 +0000 Subject: [PATCH 7/9] Fix multi-stage hash computation to use correct file paths - Add compute_tdr_input_hashes_multistage function - Use multistage-specific functions for multi-stage TDR - Fixes incorrect path handling in multi-stage scenarios Co-authored-by: gschivley <10373332+gschivley@users.noreply.github.com> --- src/time_domain_reduction/tdr_hash_utils.jl | 34 +++++++++++++++++++ .../time_domain_reduction.jl | 10 +++--- 2 files changed, 38 insertions(+), 6 deletions(-) diff --git a/src/time_domain_reduction/tdr_hash_utils.jl b/src/time_domain_reduction/tdr_hash_utils.jl index 6284e811c5..d90bae9ff6 100644 --- a/src/time_domain_reduction/tdr_hash_utils.jl +++ b/src/time_domain_reduction/tdr_hash_utils.jl @@ -174,6 +174,40 @@ function compute_tdr_input_hashes(case_path::AbstractString, setup::Dict) return hashes end +@doc raw""" + compute_tdr_input_hashes_multistage(case_path::AbstractString, setup::Dict, stage_id::Int) + +Compute hashes for all TDR input files in a multi-stage planning stage. + +# Arguments +- `case_path::AbstractString`: Path to the case directory +- `setup::Dict`: GenX settings dictionary +- `stage_id::Int`: Stage identifier for multi-stage problems + +# Returns +- Dict{String, String}: Dictionary mapping file keys to their hash values + +# Example +```julia +hashes = compute_tdr_input_hashes_multistage("/path/to/case", setup, 1) +``` +""" +function compute_tdr_input_hashes_multistage(case_path::AbstractString, + setup::Dict, + stage_id::Int) + files = get_tdr_input_files_multistage(case_path, setup, stage_id) + hashes = Dict{String, String}() + + for (key, filepath) in files + hash = compute_file_hash(filepath) + if !isnothing(hash) + hashes[key] = hash + end + end + + return hashes +end + @doc raw""" save_tdr_hash_file(tdr_results_path::AbstractString, hashes::Dict) diff --git a/src/time_domain_reduction/time_domain_reduction.jl b/src/time_domain_reduction/time_domain_reduction.jl index 3229dd3cd1..f81484f61d 100644 --- a/src/time_domain_reduction/time_domain_reduction.jl +++ b/src/time_domain_reduction/time_domain_reduction.jl @@ -1369,9 +1369,8 @@ function cluster_inputs(inpath, if v println("Writing hash file for input validation...") end - stage_case_path = joinpath(inpath, "inputs", "inputs_p$per") - input_hashes = compute_tdr_input_hashes(stage_case_path, mysetup) - stage_tdr_path = joinpath(stage_case_path, TimeDomainReductionFolder) + input_hashes = compute_tdr_input_hashes_multistage(inpath, mysetup, per) + stage_tdr_path = joinpath(inpath, "inputs", "inputs_p$per", TimeDomainReductionFolder) save_tdr_hash_file(stage_tdr_path, input_hashes) end @@ -1525,9 +1524,8 @@ function cluster_inputs(inpath, if v println("Writing hash file for input validation...") end - stage_case_path = joinpath(inpath, "inputs", input_stage_directory) - input_hashes = compute_tdr_input_hashes(stage_case_path, mysetup) - stage_tdr_path = joinpath(stage_case_path, TimeDomainReductionFolder) + input_hashes = compute_tdr_input_hashes_multistage(inpath, mysetup, stage_id) + stage_tdr_path = joinpath(inpath, "inputs", input_stage_directory, TimeDomainReductionFolder) save_tdr_hash_file(stage_tdr_path, input_hashes) end else From ef484601e8fc1078b12716f4aa84d280149138d0 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 9 Jan 2026 19:40:34 +0000 Subject: [PATCH 8/9] Update compute_file_hash to handle binary files (parquet, gzip) - Use binary read mode instead of text mode to support both text and binary files - Update documentation with examples for parquet and gzipped files - Simplify implementation using read() directly Co-authored-by: gschivley <10373332+gschivley@users.noreply.github.com> --- src/time_domain_reduction/tdr_hash_utils.jl | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/time_domain_reduction/tdr_hash_utils.jl b/src/time_domain_reduction/tdr_hash_utils.jl index d90bae9ff6..a5802249d8 100644 --- a/src/time_domain_reduction/tdr_hash_utils.jl +++ b/src/time_domain_reduction/tdr_hash_utils.jl @@ -12,7 +12,8 @@ using Dates @doc raw""" compute_file_hash(filepath::AbstractString) -Compute SHA256 hash of a file. +Compute SHA256 hash of a file. Works with both text files (CSV, YAML) +and binary files (Parquet, gzipped files). # Arguments - `filepath::AbstractString`: Path to the file to hash @@ -23,15 +24,16 @@ Compute SHA256 hash of a file. # Example ```julia hash = compute_file_hash("path/to/file.csv") +hash_parquet = compute_file_hash("path/to/file.parquet") +hash_gzip = compute_file_hash("path/to/file.csv.gz") ``` """ function compute_file_hash(filepath::AbstractString) if !isfile(filepath) return nothing end - open(filepath, "r") do file - return bytes2hex(sha256(file)) - end + # Read file in binary mode to handle both text and binary files (parquet, gzip, etc.) + return bytes2hex(sha256(read(filepath))) end @doc raw""" From d0c4f42385b6f1112a644d8d334d4b39379055fb Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 5 Feb 2026 18:11:36 +0000 Subject: [PATCH 9/9] Add SHA to dependencies and update CHANGELOG - Add SHA package to Project.toml dependencies - Update CHANGELOG.md with TDR hash verification feature in keepachangelog format Co-authored-by: gschivley <10373332+gschivley@users.noreply.github.com> --- CHANGELOG.md | 3 +++ Project.toml | 1 + 2 files changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c9a117e329..812c26c3dc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## Unreleased +### Added +- Hash-based validation of Time Domain Reduction (TDR) input files to automatically detect changes. GenX now stores SHA256 hashes of time series input files (`Demand_data.csv`, `Generators_variability.csv`, `Fuels_data.csv`) and TDR settings in `tdr_input_hashes.yml`. When TDR results exist, GenX compares stored hashes with current files and automatically re-runs TDR if any changes are detected, eliminating the need to manually delete the TDR_Results folder (#844). + ## [0.4.5] - 2025-07-07 ### Added diff --git a/Project.toml b/Project.toml index 732858a675..e878db1b92 100644 --- a/Project.toml +++ b/Project.toml @@ -19,6 +19,7 @@ MathOptInterface = "b8f27783-ece8-5eb3-8dc8-9495eed66fee" PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" RecursiveArrayTools = "731186ca-8d62-57ce-b412-fbd966d074cd" +SHA = "ea8e919c-243c-51af-8825-aaa63cd721ce" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" YAML = "ddb6d928-2868-570f-bddf-ab3f9cf99eb6"