From 54032e08d402ffb99c3bf237b0643c75f67b66d8 Mon Sep 17 00:00:00 2001 From: justin-richling Date: Fri, 30 Aug 2024 16:21:18 -0600 Subject: [PATCH 01/15] Pull derivation code to external script Pull these checks and calculations out of `adf-diag.py` to clean that file up. --- lib/adf_derive.py | 274 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 274 insertions(+) create mode 100644 lib/adf_derive.py diff --git a/lib/adf_derive.py b/lib/adf_derive.py new file mode 100644 index 000000000..279cceef6 --- /dev/null +++ b/lib/adf_derive.py @@ -0,0 +1,274 @@ +import glob +import os +from pathlib import Path +import xarray as xr + + +def check_derive(self, res, var, case_name, diag_var_list, constit_dict, hist_file_ds, hist0): + """ + For incoming variable, look for list of constituents if available + - as a list in variable defaults file + + If the variable does not have the argument `derivable_from` or `derivable_from_cam_chem`, + then it will be assumed not to be a derivable variable, just missing from history file + + If the variable does have the argument `derivable_from` or `derivable_from_cam_chem`, + first check cam-chem, then regular cam. + + Arguments + --------- + self: AdfDiag + - ADF object + res: dict + - variable defaults dictionary from yaml file + var: str + - derived variable name + case_name: str + - model case + diag_var_list: list + - list of variables for diagnostics + NOTE: this is user supplied, but gets modified here for constituents + constit_dict: dict + - dictionary of derived variables as keys and list of constituents as values + hist_file_ds: xarray.DataSet + - history file dataset for checking if constituents are available + hist0: str + - history number for case + + Returns + ------- + constit_list: list + - list of declared consituents from the variable defaults yaml file + - empty list: + * if missing `derived_from` argument(s) + * if `derived_from` argument(s) exist but not declared + + diag_var_list: list + - updated list (if applicable) of ADF variables for time series creation + """ + + # Aerosol Calcs + #-------------- + + # Always make sure PMID is made if aerosols are desired in config file + # Since there's no requirement for `aerosol_zonal_list`, allow it to be absent: + azl = res.get("aerosol_zonal_list", []) + if azl: + if "PMID" not in diag_var_list: + if any(item in azl for item in diag_var_list): + diag_var_list += ["PMID"] + if "T" not in diag_var_list: + if any(item in azl for item in diag_var_list): + diag_var_list += ["T"] + # End aerosol calcs + + # Set error messages for printing/debugging + # Derived variable, but missing constituent list + constit_errmsg = f"create time series for {case_name}:" + constit_errmsg += f"\n Can't create time series for {var}. \n\tThis variable" + constit_errmsg += " is flagged for derivation, but is missing list of constiuents." + constit_errmsg += "\n\tPlease add list of constituents to 'derivable_from' " + constit_errmsg += f"for {var} in variable defaults yaml file." + + # No time series creation + exit_msg = f"WARNING: {var} is not in the file {hist0} and can't be derived." + exit_msg += "\n\t ** No time series will be generated. **\n" + + # Initialiaze list for constituents + # NOTE: This is if the variable is NOT derivable but needs + # an empty list as a check later + constit_list = [] + + try_cam_constits = True + # Try finding info from variable defaults yaml file + try: + vres = res[var] + except KeyError: + print(exit_msg) + self.debug_log(exit_msg) + return diag_var_list, constit_dict + + # Check first if variable is potentially part of a CAM-CHEM run + if "derivable_from_cam_chem" in vres: + constit_list = vres["derivable_from_cam_chem"] + + if constit_list: + if all(item in hist_file_ds.data_vars for item in constit_list): + # Set check to look for regular CAM constituents in variable defaults + try_cam_constits = False + msg = f"derive time series for {case_name}:" + msg += "\n\tLooks like this a CAM-CHEM run, " + msg += f"checking constituents for '{var}'" + self.debug_log(msg) + else: + self.debug_log(constit_errmsg) + # End if + # End if + + # If not CAM-CHEM, check regular CAM runs + if try_cam_constits: + if "derivable_from" in vres: + constit_list = vres["derivable_from"] + else: + # Missing variable or missing derivable_from argument + der_from_msg = f"derive time series for {case_name}:" + der_from_msg += f"\n Can't create time series for {var}.\n\tEither " + der_from_msg += "the variable is missing from CAM output or it is a " + der_from_msg += "derived quantity and is missing the 'derivable_from' " + der_from_msg += "config argument.\n\tPlease add variable to CAM run " + der_from_msg += "or set appropriate argument in variable " + der_from_msg += "defaults yaml file." + self.debug_log(der_from_msg) + # End if + # End if + + # Log if this variable can be derived but is missing list of constituents + if isinstance(constit_list, list) and not constit_list: + self.debug_log(constit_errmsg) + + # Check if any constituents were found + if constit_list: + # Add variable and constituent list to dictionary + constit_dict[var] = constit_list + + # Add constituents to ADF diag variable list for time series generation + for constit in constit_list: + if constit not in diag_var_list: + diag_var_list.append(constit) + else: + print(exit_msg) + self.debug_log(exit_msg) + # End if + + return diag_var_list, constit_dict + +######## + +def derive_variable(self, case_name, var, res=None, ts_dir=None, + constit_list=None, overwrite=None): + """ + Derive variables acccording to steps given here. Since derivations will depend on the + variable, each variable to derive will need its own set of steps below. + + Caution: this method assumes that there will be one time series file per variable + + If the file for the derived variable exists, the kwarg `overwrite` determines + whether to overwrite the file (true) or exit with a warning message. + + """ + + # Loop through derived variables + print(f"\t - deriving time series for {var}") + + # Grab all required time series files for derived variable + constit_files = [] + for constit in constit_list: + # Check if the constituent file is present, if so add it to list + if glob.glob(os.path.join(ts_dir, f"*.{constit}.*.nc")): + constit_files.append(glob.glob(os.path.join(ts_dir, f"*.{constit}.*"))[0]) + # End for + + # Check if all the necessary constituent files were found + if len(constit_files) != len(constit_list): + ermsg = f"\t ** Not all constituent files present; {var} cannot be calculated. **\n" + ermsg += f"\t Please remove {var} from 'diag_var_list' or find the " + ermsg += "relevant CAM files.\n" + print(ermsg) + if constit_files: + # Add what's missing to debug log + dmsg = f"derived time series for {case_name}:" + dmsg += f"\n\tneeded constituents for derivation of " + dmsg += f"{var}:\n\t\t- {constit_list}\n\tfound constituent file(s) in " + dmsg += f"{Path(constit_files[0]).parent}:\n\t\t" + dmsg += f"- {[Path(f).parts[-1] for f in constit_files if Path(f).is_file()]}" + self.debug_log(dmsg) + else: + dmsg = f"derived time series for {case_name}:" + dmsg += f"\n\tneeded constituents for derivation of " + dmsg += f"{var}:\n\t\t- {constit_list}\n" + dmsg += f"\tNo constituent(s) found in history files" + self.debug_log(dmsg) + # End if + else: + # Open a new dataset with all the constituent files/variables + ds = self.data.load_dataset(constit_files) + if not ds: + dmsg = f"derived time series for {case_name}:" + dmsg += f"\n\tNo files to open." + self.debug_log(dmsg) + return + + # Grab attributes from first constituent file to be used in derived variable + attrs = ds[constit_list[0]].attrs + + # create new file name for derived variable + derived_file = constit_files[0].replace(constit_list[0], var) + + # Check if clobber is true for file + if Path(derived_file).is_file(): + if overwrite: + Path(derived_file).unlink() + else: + msg = f"[{__name__}] Warning: '{var}' file was found " + msg += "and overwrite is False. Will use existing file." + print(msg) + + #NOTE: this will need to be changed when derived equations are more complex! - JR + if var == "RESTOM": + der_val = ds["FSNT"]-ds["FLNT"] + else: + # Loop through all constituents and sum + der_val = 0 + for v in constit_list: + der_val += ds[v] + + # Set derived variable name and add to dataset + der_val.name = var + ds[var] = der_val + + # Aerosol Calculations + #---------------------------------------------------------------------------------- + # These will be multiplied by rho (density of dry air) + + # User-defined defaults might not include aerosol zonal list + azl = res.get("aerosol_zonal_list", []) + if var in azl: + # Check if PMID is in file: + ds_pmid = self.data.load_dataset(glob.glob(os.path.join(ts_dir, "*.PMID.*"))[0]) + if not ds_pmid: + errmsg = "Missing necessary files for dry air density (rho) " + errmsg += "calculation.\nPlease make sure 'PMID' is in the CAM " + errmsg += "run for aerosol calculations" + print(errmsg) + dmsg = "derived time series:" + dmsg += f"\n\t missing 'PMID' in {ts_dir}, can't make time series for {var} " + self.debug_log(dmsg) + + # Check if T is in file: + ds_t = self.data.load_dataset(glob.glob(os.path.join(ts_dir, "*.T.*"))[0]) + if not ds_t: + errmsg = "Missing necessary files for dry air density (rho) " + errmsg += "calculation.\nPlease make sure 'T' is in the CAM " + errmsg += "run for aerosol calculations" + print(errmsg) + + dmsg = "derived time series:" + dmsg += f"\n\t missing 'T' in {ts_dir}, can't make time series for {var} " + self.debug_log(dmsg) + + # Multiply aerosol by dry air density (rho): (P/Rd*T) + ds[var] = ds[var]*(ds_pmid["PMID"]/(res["Rgas"]*ds_t["T"])) + + # Sulfate conversion factor + if var == "SO4": + ds[var] = ds[var]*(96./115.) + #---------------------------------------------------------------------------------- + + # Drop all constituents from final saved dataset + # These are not necessary because they have their own time series files + ds_final = ds.drop_vars(constit_list) + # Copy attributes from constituent file to derived variable + ds_final[var].attrs = attrs + ds_final.to_netcdf(derived_file, unlimited_dims='time', mode='w') + # End if (all the necessary constituent files exist) +######## \ No newline at end of file From 273107bfe02e08063eb85982faf8a55a15e67195 Mon Sep 17 00:00:00 2001 From: justin-richling Date: Fri, 30 Aug 2024 16:22:06 -0600 Subject: [PATCH 02/15] Update adf_diag.py Now call the `adf_derive.py` script for derived variables --- lib/adf_diag.py | 344 ++++++------------------------------------------ 1 file changed, 42 insertions(+), 302 deletions(-) diff --git a/lib/adf_diag.py b/lib/adf_diag.py index bf87cb498..0497f5305 100644 --- a/lib/adf_diag.py +++ b/lib/adf_diag.py @@ -97,6 +97,7 @@ # Finally, import needed ADF modules: from adf_web import AdfWeb from adf_dataset import AdfData +from adf_derive import check_derive, derive_variable ################# # Helper functions @@ -336,14 +337,11 @@ def call_ncrcat(cmd): It is declared as global to avoid AttributeError. """ return subprocess.run(cmd, shell=False) - # End def - # Check if baseline time-series files are being created: if baseline: - # Use baseline settings, while converting them all - # to lists: + # Use baseline settings, while converting them all to lists: case_names = [self.get_baseline_info("cam_case_name", required=True)] cam_ts_done = [self.get_baseline_info("cam_ts_done")] cam_hist_locs = [self.get_baseline_info("cam_hist_loc")] @@ -365,10 +363,6 @@ def call_ncrcat(cmd): end_years = self.climo_yrs["eyears"] case_type_string="case" hist_str_list = self.hist_string["test_hist_str"] - - # Notify user that script has started: - print(f"\n Writing time series files to {ts_dir}") - # End if # Read hist_str (component.hist_num) from the yaml file, or set to default @@ -380,10 +374,14 @@ def call_ncrcat(cmd): # Loop over cases: for case_idx, case_name in enumerate(case_names): + # Notify user that script has started: + print(f"\n Generating CAM time series files for '{case_name}'...") + print(f"\n Writing time series files to {ts_dir[case_idx]}") + # Check if particular case should be processed: if cam_ts_done[case_idx]: - emsg = " Configuration file indicates time series files have been pre-computed" - emsg += f" for case '{case_name}'. Will rely on those files directly." + emsg = "\tConfiguration file indicates time series files have been pre-computed." + emsg += f" Will rely on those files directly." print(emsg) continue # End if @@ -402,7 +400,7 @@ def call_ncrcat(cmd): self.end_diag_fail(emsg) # End if - # Check if history files actually exqist. If not then kill script: + # Check if history files actually exist. If not then kill script: hist_str_case = hist_str_list[case_idx] for hist_str in hist_str_case: @@ -512,114 +510,35 @@ def call_ncrcat(cmd): time_string_finish = last_file_split[-1].replace("-", "") time_string = "-".join([time_string_start, time_string_finish]) - # Loop over CAM history variables: + # Intitialize list for NCO commands list_of_commands = [] - vars_to_derive = [] - # create copy of var list that can be modified for derivable variables + + # Create copy of var list that can be modified for derivable variables diag_var_list = self.diag_var_list - # Aerosol Calcs - # -------------- - # Always make sure PMID is made if aerosols are desired in config file - # Since there's no requirement for `aerosol_zonal_list` to be included, allow it to be absent: - - azl = res.get("aerosol_zonal_list", []) - if "PMID" not in diag_var_list: - if any(item in azl for item in diag_var_list): - diag_var_list += ["PMID"] - if "T" not in diag_var_list: - if any(item in azl for item in diag_var_list): - diag_var_list += ["T"] - # End aerosol calcs - - # Initialize dictionary for derived variable with needed list of constituents + # Intitialize dictionary for derived variables, if appplicable constit_dict = {} + # Loop over CAM history variables: for var in diag_var_list: # Notify user of new time series file: print(f"\t - time series for {var}") - # Set error messages for printing/debugging - # Derived variable, but missing constituent list - constit_errmsg = f"create time series for {case_name}:" - constit_errmsg += f"\n Can't create time series for {var}. \n\tThis variable" - constit_errmsg += " is flagged for derivation, but is missing list of constiuents." - constit_errmsg += "\n\tPlease add list of constituents to 'derivable_from' " - constit_errmsg += f"for {var} in variable defaults yaml file." + # Initialize list for constituents if variable is derivable + constit_list = [] - # Check if current variable is a derived quantity + # Check if current variable is not in history file(s) if var not in hist_file_var_list: - vres = res.get(var, {}) - - # Initialiaze list for constituents - # NOTE: This is if the variable is NOT derivable but needs - # an empty list as a check later - constit_list = [] - - # intialize boolean to check if variable is derivable - derive = False # assume it can't be derived and update if it can - - # intialize boolean for regular CAM variable constituents - try_cam_constits = True - - # Check first if variable is potentially part of a CAM-CHEM run - if "derivable_from_cam_chem" in vres: - constit_list = vres["derivable_from_cam_chem"] - if constit_list: - if all(item in hist_file_ds.data_vars for item in constit_list): - # Set check to look for regular CAM constituents in variable defaults - try_cam_constits = False - derive = True - msg = f"create time series for {case_name}:" - msg += "\n\tLooks like this a CAM-CHEM run, " - msg += f"checking constituents for '{var}'" - self.debug_log(msg) - else: - self.debug_log(constit_errmsg) - # End if - # End if - - # If not CAM-CHEM, check regular CAM runs - if try_cam_constits: - if "derivable_from" in vres: - derive = True - constit_list = vres["derivable_from"] - else: - # Missing variable or missing derivable_from argument - der_from_msg = f"create time series for {case_name}:" - der_from_msg += f"\n Can't create time series for {var}.\n\tEither " - der_from_msg += "the variable is missing from CAM output or it is a " - der_from_msg += "derived quantity and is missing the 'derivable_from' " - der_from_msg += "config argument.\n\tPlease add variable to CAM run " - der_from_msg += "or set appropriate argument in variable " - der_from_msg += "defaults yaml file." - self.debug_log(der_from_msg) - # End if - - # Check if this variable can be derived - if (derive) and (constit_list): - for constit in constit_list: - if constit not in diag_var_list: - diag_var_list.append(constit) - # Add variable to list to derive - vars_to_derive.append(var) - # Add constituent list to variable key in dictionary - constit_dict[var] = constit_list - continue - # Log if this variable can be derived but is missing list of constituents - elif (derive) and (not constit_list): - self.debug_log(constit_errmsg) - continue - # Lastly, raise error if the variable is not a derived quanitity but is also not - # in the history file(s) - else: - msg = f"WARNING: {var} is not in the file {hist_files[0]} " - msg += "nor can it be derived.\n" - msg += "\t ** No time series will be generated." - print(msg) - continue - # End if - # End if (var in var_diag_list) + # Let user know variable is not in history file + print(f"\t {var} not in history file, will try to derive if possible") + + # Check if variable can be derived + diag_var_list, constit_dict = check_derive(self, res, var, case_name, + diag_var_list, constit_dict, + hist_file_ds, hist_files[0]) + # Move to the next variable + continue + # End if # Check if variable has a "lev" dimension according to first file: has_lev = bool("lev" in hist_file_ds[var].dims) @@ -654,12 +573,12 @@ def call_ncrcat(cmd): if has_lev and vert_coord_type: # For now, only add these variables if using CAM: if "cam" in hist_str: - # PS might be in a different history file. If so, continue without error. + # PS may be in a different history file. If so, continue without error. ncrcat_var_list = ncrcat_var_list + ",hyam,hybm,hyai,hybi" if "PS" in hist_file_var_list: ncrcat_var_list = ncrcat_var_list + ",PS" - print("Adding PS to file") + print("\t Adding PS to file") else: wmsg = "WARNING: PS not found in history file." wmsg += " It might be needed at some point." @@ -675,7 +594,7 @@ def call_ncrcat(cmd): # PMID file to each one of those targets separately. -JN if "PMID" in hist_file_var_list: ncrcat_var_list = ncrcat_var_list + ",PMID" - print("Adding PMID to file") + print("\t Adding PMID to file") else: wmsg = "WARNING: PMID not found in history file." wmsg += " It might be needed at some point." @@ -693,19 +612,18 @@ def call_ncrcat(cmd): # Add to command list for use in multi-processing pool: list_of_commands.append(cmd) - # End variable loop # Now run the "ncrcat" subprocesses in parallel: with mp.Pool(processes=self.num_procs) as mpool: _ = mpool.map(call_ncrcat, list_of_commands) - - if vars_to_derive: - self.derive_variables( - res=res, hist_str=hist_str, vars_to_derive=vars_to_derive, - constit_dict=constit_dict, ts_dir=ts_dir[case_idx] - ) # End with + + # Finally, run through the derived variables if applicable + if constit_dict: + for der_var, constit_list in constit_dict.items(): + derive_variable(self, case_name, der_var, res, + ts_dir[case_idx], constit_list) # End for hist_str # End cases loop @@ -1085,137 +1003,6 @@ def setup_run_cvdp(self): ######### - def derive_variables(self, res=None, hist_str=None, vars_to_derive=None, ts_dir=None, - constit_dict=None, overwrite=None): - """ - Derive variables acccording to steps given here. Since derivations will depend on the - variable, each variable to derive will need its own set of steps below. - - Caution: this method assumes that there will be one time series file per variable - - If the file for the derived variable exists, the kwarg `overwrite` determines - whether to overwrite the file (true) or exit with a warning message. - - """ - - # Loop through derived variables - for var in vars_to_derive: - print(f"\t - deriving time series for {var}") - - # Grab list of constituents for this variable - constit_list = constit_dict[var] - - # Grab all required time series files for derived variable - constit_files = [] - for constit in constit_list: - # Check if the constituent file is present, if so add it to list - if hist_str: - const_glob_str = f"*{hist_str}*.{constit}.*.nc" - else: - const_glob_str = f"*.{constit}.*.nc" - # end if - if glob.glob(os.path.join(ts_dir, const_glob_str)): - constit_files.append(glob.glob(os.path.join(ts_dir, const_glob_str ))[0]) - - # Check if all the necessary constituent files were found - if len(constit_files) != len(constit_list): - ermsg = f"\t ** Not all constituent files present; {var} cannot be calculated." - ermsg += f" Please remove {var} from 'diag_var_list' or find the " - ermsg += "relevant CAM files.\n" - print(ermsg) - if constit_files: - # Add what's missing to debug log - dmsg = "create time series:" - dmsg += "\n\tneeded constituents for derivation of " - dmsg += f"{var}:\n\t\t- {constit_list}\n\tfound constituent file(s) in " - dmsg += f"{Path(constit_files[0]).parent}:\n\t\t" - dmsg += f"- {[Path(f).parts[-1] for f in constit_files if Path(f).is_file()]}" - self.debug_log(dmsg) - else: - dmsg = "create time series:" - dmsg += "\n\tneeded constituents for derivation of " - dmsg += f"{var}:\n\t\t- {constit_list}\n" - dmsg += "\tNo constituent(s) found in history files" - self.debug_log(dmsg) - - else: - # Open a new dataset with all the constituent files/variables - ds = xr.open_mfdataset(constit_files).compute() - - # Grab attributes from first constituent file to be used in derived variable - attrs = ds[constit_list[0]].attrs - - # create new file name for derived variable - derived_file = constit_files[0].replace(constit_list[0], var) - - # Check if clobber is true for file - if Path(derived_file).is_file(): - if overwrite: - Path(derived_file).unlink() - else: - msg = f"[{__name__}] Warning: '{var}' file was found " - msg += "and overwrite is False. Will use existing file." - print(msg) - continue - - # NOTE: this will need to be changed when derived equations are more complex! - JR - if var == "RESTOM": - der_val = ds["FSNT"]-ds["FLNT"] - else: - # Loop through all constituents and sum - der_val = 0 - for v in constit_list: - der_val += ds[v] - - # Set derived variable name and add to dataset - der_val.name = var - ds[var] = der_val - - # Aerosol Calculations - # ---------------------------------------------------------------------------------- - # These will be multiplied by rho (density of dry air) - ds_pmid_done = False - ds_t_done = False - - # User-defined defaults might not include aerosol zonal list - azl = res.get("aerosol_zonal_list", []) - if var in azl: - # Only calculate once for all aerosol vars - if not ds_pmid_done: - ds_pmid = _load_dataset(glob.glob(os.path.join(ts_dir, "*.PMID.*"))[0]) - ds_pmid_done = True - if not ds_pmid: - errmsg = "Missing necessary files for dry air density" - errmsg += " (rho) calculation.\n" - errmsg += "Please make sure 'PMID' is in the CAM run" - errmsg += " for aerosol calculations" - print(errmsg) - continue - if not ds_t_done: - ds_t = _load_dataset(glob.glob(os.path.join(ts_dir, "*.T.*"))[0]) - ds_t_done = True - if not ds_t: - errmsg = "Missing necessary files for dry air density" - errmsg += " (rho) calculation.\n" - errmsg += "Please make sure 'T' is in the CAM run" - errmsg += " for aerosol calculations" - print(errmsg) - continue - - # Multiply aerosol by dry air density (rho): (P/Rd*T) - ds[var] = ds[var]*(ds_pmid["PMID"]/(res["Rgas"]*ds_t["T"])) - - # Sulfate conversion factor - if var == "SO4": - ds[var] = ds[var]*(96./115.) - # ---------------------------------------------------------------------------------- - - # Drop all constituents from final saved dataset - # These are not necessary because they have their own time series files - ds_final = ds.drop_vars(constit_list) - # Copy attributes from constituent file to derived variable - ds_final[var].attrs = attrs - ds_final.to_netcdf(derived_file, unlimited_dims='time', mode='w') ######### MDTF functions ######### def setup_run_mdtf(self): @@ -1329,11 +1116,8 @@ def move_tsfiles_for_mdtf(self, verbose): # Going to need a dict to translate. # Use cesm_freq_strings = freq_string_options.keys # and then freq = freq_string_option(freq_string_found) - freq_string_cesm = ["month", "day", "hour_6", "hour_3", "hour_1"] #keys - freq_string_options = ["month", "day", "6hr", "3hr", "1hr"] #values - freq_string_dict = dict(zip(freq_string_cesm,freq_string_options)) #make dict + freq_string_options = ["month", "day", "6hr", "3hr", "1hr"] - hist_str_list = self.get_cam_info("hist_str") case_names = self.get_cam_info("cam_case_name", required=True) var_list = self.diag_var_list @@ -1388,7 +1172,7 @@ def move_tsfiles_for_mdtf(self, verbose): continue found_strings = [ - word for word in freq_string_cesm if word in dataset_freq + word for word in freq_string_options if word in dataset_freq ] if len(found_strings) == 1: if verbose > 2: @@ -1403,14 +1187,13 @@ def move_tsfiles_for_mdtf(self, verbose): else: if verbose > 0: print( - f"WARNING: None of the frequency options {freq_string_cesm} are present in the time_period_freq attribute {dataset_freq}" + f"WARNING: None of the frequency options {freq_string_options} are present in the time_period_freq attribute {dataset_freq}" ) print(f"Skipping {adf_file}") freq = "frequency_missing" continue - freq = freq_string_dict.get(found_strings[0]) - print(f"Translated {found_strings[0]} to {freq}") - + freq = found_strings[0] + # # Destination file is MDTF directory and name structure # @@ -1438,47 +1221,4 @@ def move_tsfiles_for_mdtf(self, verbose): shutil.copyfile(adf_file, mdtf_file) # end for hist_str # end for var - # end for case - - -######## - -# Helper Function(s) - - -def _load_dataset(fils): - """ - This method exists to get an xarray Dataset from input file information that - can be passed into the plotting methods. - - Parameters - ---------- - fils : list - strings or paths to input file(s) - - Returns - ------- - xr.Dataset - - Notes - ----- - When just one entry is provided, use `open_dataset`, otherwise `open_mfdatset` - """ - import warnings # use to warn user about missing files. - - #Format warning messages: - def my_formatwarning(msg, *args, **kwargs): - """Issue `msg` as warning.""" - return str(msg) + '\n' - warnings.formatwarning = my_formatwarning - - if len(fils) == 0: - warnings.warn("Input file list is empty.") - return None - if len(fils) > 1: - return xr.open_mfdataset(fils, combine='by_coords') - else: - return xr.open_dataset(fils[0]) - #End if -# End def -######## + # end for case \ No newline at end of file From ae1e3507c892824fa8cdae95c37f2d1b9b070454 Mon Sep 17 00:00:00 2001 From: justin-richling Date: Fri, 30 Aug 2024 16:28:04 -0600 Subject: [PATCH 03/15] Clean up print statements --- lib/adf_derive.py | 2 +- lib/adf_diag.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/adf_derive.py b/lib/adf_derive.py index 279cceef6..39bf83230 100644 --- a/lib/adf_derive.py +++ b/lib/adf_derive.py @@ -72,7 +72,7 @@ def check_derive(self, res, var, case_name, diag_var_list, constit_dict, hist_fi # No time series creation exit_msg = f"WARNING: {var} is not in the file {hist0} and can't be derived." - exit_msg += "\n\t ** No time series will be generated. **\n" + exit_msg += "\t ** No time series will be generated. **" # Initialiaze list for constituents # NOTE: This is if the variable is NOT derivable but needs diff --git a/lib/adf_diag.py b/lib/adf_diag.py index 0497f5305..0ce044622 100644 --- a/lib/adf_diag.py +++ b/lib/adf_diag.py @@ -529,7 +529,7 @@ def call_ncrcat(cmd): # Check if current variable is not in history file(s) if var not in hist_file_var_list: - # Let user know variable is not in history file + # Let user know variable is not print(f"\t {var} not in history file, will try to derive if possible") # Check if variable can be derived From d8eec24261568f4d5663704abc41be93e13ec618 Mon Sep 17 00:00:00 2001 From: justin-richling Date: Tue, 8 Oct 2024 12:02:48 -0600 Subject: [PATCH 04/15] Pull newest changed from main --- config_amwg_default_plots.yaml | 8 ++------ config_cam_baseline_example.yaml | 11 ++++------- lib/adf_diag.py | 7 +++---- lib/adf_web.py | 20 +++++++++++++++++++- lib/website_templates/template_index.html | 18 ++++++++++++++++++ 5 files changed, 46 insertions(+), 18 deletions(-) diff --git a/config_amwg_default_plots.yaml b/config_amwg_default_plots.yaml index ce22d562c..39a81ea58 100644 --- a/config_amwg_default_plots.yaml +++ b/config_amwg_default_plots.yaml @@ -303,6 +303,8 @@ diag_cvdp_info: # If mdtf_run: true, the MDTF will be set up and # run in background mode, likely completing after the ADF has completed. # +# WARNING: This currently only runs on CASPER (not derecho) +# # The variables required depend on the diagnostics (PODs) selected. # AMWG-developed PODS and their required variables: # (Note that PRECT can be computed from PRECC & PRECL) @@ -332,12 +334,6 @@ diag_mdtf_info: conda_env_root : ${mdtf_codebase_path}/miniconda2/envs.MDTFv3.1.20230412/ OBS_DATA_ROOT : ${mdtf_codebase_path}/obs_data - - - # Set to default for same as the ADF plot_location. Anything else here overrides that - OUTPUT_DIR : default - WORKING_DIR : default - # SET this to a writable dir. The ADF will place ts files here for the MDTF to read (adds the casename) MODEL_DATA_ROOT : ${diag_cam_climo.cam_ts_loc}/mdtf/inputdata/model diff --git a/config_cam_baseline_example.yaml b/config_cam_baseline_example.yaml index c7037a92e..015503270 100644 --- a/config_cam_baseline_example.yaml +++ b/config_cam_baseline_example.yaml @@ -388,6 +388,8 @@ diag_cvdp_info: # If mdtf_run: true, the MDTF will be set up and # run in background mode, likely completing after the ADF has completed. # +# WARNING: This currently only runs on CASPER (not derecho) +# # The variables required depend on the diagnostics (PODs) selected. # AMWG-developed PODS and their required variables: # (Note that PRECT can be computed from PRECC & PRECL) @@ -417,12 +419,6 @@ diag_mdtf_info: conda_env_root : ${mdtf_codebase_path}/miniconda2/envs.MDTFv3.1.20230412/ OBS_DATA_ROOT : ${mdtf_codebase_path}/obs_data - - - # Set to default for same as the ADF plot_location. Anything else here overrides that - OUTPUT_DIR : default - WORKING_DIR : default - # SET this to a writable dir. The ADF will place ts files here for the MDTF to read (adds the casename) MODEL_DATA_ROOT : ${diag_cam_climo.cam_ts_loc}/mdtf/inputdata/model @@ -510,7 +506,8 @@ diag_var_list: # # MDTF recommended variables -# - OMEGA +# - FLUT +# - OMEGA500 # - PRECT # - PS # - PSL diff --git a/lib/adf_diag.py b/lib/adf_diag.py index 0ce044622..1db601f7e 100644 --- a/lib/adf_diag.py +++ b/lib/adf_diag.py @@ -1053,8 +1053,7 @@ def setup_run_mdtf(self): case_idx = 0 plot_path = os.path.join(self.plot_location[case_idx], "mdtf") for var in ["WORKING_DIR", "OUTPUT_DIR"]: - if mdtf_info[var] == "default": - mdtf_info[var] = plot_path + mdtf_info[var] = plot_path # # Write the input settings json file @@ -1141,7 +1140,7 @@ def move_tsfiles_for_mdtf(self, verbose): adf_file_list = glob.glob(adf_file_str) if len(adf_file_list) == 1: - if verbose > 2: + if verbose > 1: print(f"Copying ts file: {adf_file_list} to MDTF dir") elif len(adf_file_list) > 1: if verbose > 0: @@ -1149,7 +1148,7 @@ def move_tsfiles_for_mdtf(self, verbose): f"WARNING: found multiple timeseries files {adf_file_list}. Continuing with best guess; suggest cleaning up multiple dates in ts dir" ) else: - if verbose > 0: + if verbose > 1: print( f"WARNING: No files matching {case_name}.{hist_str}.{var} found in {adf_file_str}. Skipping" ) diff --git a/lib/adf_web.py b/lib/adf_web.py index f7b3e0d3d..43f88c871 100644 --- a/lib/adf_web.py +++ b/lib/adf_web.py @@ -146,6 +146,18 @@ def __init__(self, config_file, debug=False): #Specify where CSS files will be stored: css_files_dir = website_dir / "templates" + #Add links to external packages (if applicable) + self.external_package_links = {} + + #MDTF puts directory under case[0] + if self.get_mdtf_info('mdtf_run'): + syear = self.climo_yrs["syears"] + eyear = self.climo_yrs["eyears"] + mdtf_path = f"../mdtf/MDTF_{case_name}" + mdtf_path += f"_{syear[0]}_{eyear[0]}" + self.external_package_links['MDTF'] = mdtf_path + #End if + #Add all relevant paths to dictionary for specific case: self.__case_web_paths[case_name] = {'website_dir': website_dir, 'img_pages_dir': img_pages_dir, @@ -691,6 +703,10 @@ def jinja_list(seas_list): if ptype not in avail_plot_types: avail_plot_types.append(plot_types) + + # External packages that can be run through ADF + avail_external_packages = {'MDTF':'mdtf_html_path', 'CVDP':'cvdp_html_path'} + #Construct index.html index_title = "AMP Diagnostics Prototype" index_tmpl = jinenv.get_template('template_index.html') @@ -700,7 +716,9 @@ def jinja_list(seas_list): case_yrs=case_yrs, baseline_yrs=baseline_yrs, plot_types=plot_types, - avail_plot_types=avail_plot_types) + avail_plot_types=avail_plot_types, + avail_external_packages=avail_external_packages, + external_package_links=self.external_package_links) #Write Mean diagnostics index HTML file: with open(index_html_file, 'w', encoding='utf-8') as ofil: diff --git a/lib/website_templates/template_index.html b/lib/website_templates/template_index.html index 4b2659cf8..574e02a01 100644 --- a/lib/website_templates/template_index.html +++ b/lib/website_templates/template_index.html @@ -48,5 +48,23 @@

Plot Types

{% endfor %} +
+

External Diagnostic Packages

+
+ +
+ {% for avail_type in avail_external_packages %} + {% if avail_type in external_package_links.keys() %} + + {% else %} +
+ {{ avail_type }} +
+ {% endif %} + {% endfor %} +
+ \ No newline at end of file From e7e06424ff09108f5ac94b56b80e94ca1a42f7e9 Mon Sep 17 00:00:00 2001 From: Justin Richling Date: Thu, 23 Oct 2025 15:52:40 -0600 Subject: [PATCH 05/15] Bring up current ADF code --- lib/adf_diag.py | 70 +++++++++++++--- lib/adf_web.py | 218 +++++++++++++++++++++++++++++++++++++++--------- 2 files changed, 238 insertions(+), 50 deletions(-) diff --git a/lib/adf_diag.py b/lib/adf_diag.py index 1db601f7e..22c74d141 100644 --- a/lib/adf_diag.py +++ b/lib/adf_diag.py @@ -357,7 +357,7 @@ def call_ncrcat(cmd): case_names = self.get_cam_info("cam_case_name", required=True) cam_ts_done = self.get_cam_info("cam_ts_done") cam_hist_locs = self.get_cam_info("cam_hist_loc") - ts_dir = self.get_cam_info("cam_ts_loc", required=True) + ts_dirs = self.get_cam_info("cam_ts_loc", required=True) overwrite_ts = self.get_cam_info("cam_overwrite_ts") start_years = self.climo_yrs["syears"] end_years = self.climo_yrs["eyears"] @@ -380,8 +380,8 @@ def call_ncrcat(cmd): # Check if particular case should be processed: if cam_ts_done[case_idx]: - emsg = "\tConfiguration file indicates time series files have been pre-computed." - emsg += f" Will rely on those files directly." + emsg = "\tNOTE: Configuration file indicates time series files have been pre-computed" + emsg += f" for case '{case_name}'. Will rely on those files directly." print(emsg) continue # End if @@ -400,6 +400,9 @@ def call_ncrcat(cmd): self.end_diag_fail(emsg) # End if + # Extract time series file location + ts_dir = ts_dirs[case_idx] + # Check if history files actually exist. If not then kill script: hist_str_case = hist_str_list[case_idx] for hist_str in hist_str_case: @@ -413,6 +416,9 @@ def call_ncrcat(cmd): self.end_diag_fail(emsg) # End if + # Notify user that script has started: + print(f"\n\t Writing time series files to:\n\t{ts_dir}") + # Create empty list: files_list = [] @@ -495,7 +501,7 @@ def call_ncrcat(cmd): # Check if time series directory exists, and if not, then create it: # Use pathlib to create parent directories, if necessary. - Path(ts_dir[case_idx]).mkdir(parents=True, exist_ok=True) + Path(ts_dir).mkdir(parents=True, exist_ok=True) # INPUT NAME TEMPLATE: $CASE.$scomp.[$type.][$string.]$date[$ending] first_file_split = str(hist_files[0]).split(".") @@ -512,6 +518,8 @@ def call_ncrcat(cmd): # Intitialize list for NCO commands list_of_commands = [] + list_of_ncattend_commands = [] + list_of_hist_commands = [] # Create copy of var list that can be modified for derivable variables diag_var_list = self.diag_var_list @@ -573,12 +581,12 @@ def call_ncrcat(cmd): if has_lev and vert_coord_type: # For now, only add these variables if using CAM: if "cam" in hist_str: - # PS may be in a different history file. If so, continue without error. + # PS might be in a different history file. If so, continue w/o error. ncrcat_var_list = ncrcat_var_list + ",hyam,hybm,hyai,hybi" if "PS" in hist_file_var_list: ncrcat_var_list = ncrcat_var_list + ",PS" - print("\t Adding PS to file") + print(f"\t INFO: Adding PS to file for '{var}'") else: wmsg = "WARNING: PS not found in history file." wmsg += " It might be needed at some point." @@ -610,8 +618,38 @@ def call_ncrcat(cmd): + ["-o", ts_outfil_str] ) + # Example ncatted command (you can modify it with the specific attribute changes you need) + #cmd_ncatted = ["ncatted", "-O", "-a", f"adf_user,global,a,c,{self.user}", ts_outfil_str] + # Step 1: Convert Path objects to strings and concatenate the list of historical files into a single string + hist_files_str = ', '.join(str(f.name) for f in hist_files) + hist_locs_str = ', '.join(str(loc) for loc in cam_hist_locs) + + # Step 2: Create the ncatted command to add both global attributes + cmd_ncatted = [ + "ncatted", "-O", + "-a", "adf_user,global,a,c," + f"{self.user}", + "-a", "hist_file_locs,global,a,c," + f"{hist_locs_str}", + "-a", "hist_file_list,global,a,c," + f"{hist_files_str}", + ts_outfil_str + ] + + # Step 3: Create the ncatted command to remove the history attribute + cmd_remove_history = [ + "ncatted", "-O", "-h", + "-a", "history,global,d,,", + ts_outfil_str + ] + # Add to command list for use in multi-processing pool: + # ----------------------------------------------------- + # generate time series files list_of_commands.append(cmd) + # Add global attributes: user, original hist file loc(s) and all filenames + list_of_ncattend_commands.append(cmd_ncatted) + # Remove the `history` attr that gets tacked on (for clean up) + # NOTE: this may not be best practice, but it the history attr repeats + # the files attrs so the global attrs become obtrusive... + list_of_hist_commands.append(cmd_remove_history) # End variable loop # Now run the "ncrcat" subprocesses in parallel: @@ -865,9 +903,11 @@ def setup_run_cvdp(self): else: cvdp_dir = self.get_cvdp_info("cvdp_loc", required=True) + case_names[0] # end if + + cvdp_dir = os.path.abspath(cvdp_dir) if not os.path.isdir(cvdp_dir): shutil.copytree( - self.get_cvdp_info("cvdp_codebase_loc", required=True), cvdp_dir + self.get_cvdp_info("cvdp_codebase_loc"), cvdp_dir ) # End if @@ -1009,6 +1049,7 @@ def setup_run_mdtf(self): """ Create MDTF directory tree, generate input settings jsonc file Submit MDTF diagnostics. + Returns mdtf_proc for sub-process control (waits for it to finish in run_adf_diag) """ @@ -1085,19 +1126,21 @@ def setup_run_mdtf(self): if copy_files_only: print("\t ...Copy files only. NOT Running MDTF") print(f"\t Command: {mdtf_exe} Log: {mdtf_log}") + return 0 else: print( f"\t ...Running MDTF in background. Command: {mdtf_exe} Log: {mdtf_log}" ) print(f"Running MDTF in background. Command: {mdtf_exe} Log: {mdtf_log}") with open(mdtf_log, "w", encoding="utf-8") as subout: - _ = subprocess.Popen( + mdtf_proc_var = subprocess.Popen( [mdtf_exe], shell=True, stdout=subout, stderr=subout, close_fds=True, ) + return mdtf_proc_var def move_tsfiles_for_mdtf(self, verbose): """ @@ -1115,7 +1158,9 @@ def move_tsfiles_for_mdtf(self, verbose): # Going to need a dict to translate. # Use cesm_freq_strings = freq_string_options.keys # and then freq = freq_string_option(freq_string_found) - freq_string_options = ["month", "day", "6hr", "3hr", "1hr"] + freq_string_cesm = ["month", "day", "hour_6", "hour_3", "hour_1"] #keys + freq_string_options = ["month", "day", "6hr", "3hr", "1hr"] #values + freq_string_dict = dict(zip(freq_string_cesm,freq_string_options)) #make dict hist_str_list = self.get_cam_info("hist_str") case_names = self.get_cam_info("cam_case_name", required=True) @@ -1171,7 +1216,7 @@ def move_tsfiles_for_mdtf(self, verbose): continue found_strings = [ - word for word in freq_string_options if word in dataset_freq + word for word in freq_string_cesm if word in dataset_freq ] if len(found_strings) == 1: if verbose > 2: @@ -1186,12 +1231,13 @@ def move_tsfiles_for_mdtf(self, verbose): else: if verbose > 0: print( - f"WARNING: None of the frequency options {freq_string_options} are present in the time_period_freq attribute {dataset_freq}" + f"WARNING: None of the frequency options {freq_string_cesm} are present in the time_period_freq attribute {dataset_freq}" ) print(f"Skipping {adf_file}") freq = "frequency_missing" continue - freq = found_strings[0] + freq = freq_string_dict.get(found_strings[0]) + print(f"Translated {found_strings[0]} to {freq}") # # Destination file is MDTF directory and name structure diff --git a/lib/adf_web.py b/lib/adf_web.py index 43f88c871..75f35d133 100644 --- a/lib/adf_web.py +++ b/lib/adf_web.py @@ -22,13 +22,14 @@ import os import os.path - from pathlib import Path #+++++++++++++++++++++++++++++++++++++++++++++++++ #import non-standard python modules, including ADF #+++++++++++++++++++++++++++++++++++++++++++++++++ +import markdown + #ADF modules: from adf_obs import AdfObs @@ -117,7 +118,6 @@ def __init__(self, config_file, debug=False): #Extract needed variables from yaml file: case_names = self.get_cam_info('cam_case_name', required=True) - #Also extract baseline case (if applicable), and append to case_names list: if not self.compare_obs: baseline_name = self.get_baseline_info('cam_case_name', required=True) @@ -157,7 +157,7 @@ def __init__(self, config_file, debug=False): mdtf_path += f"_{syear[0]}_{eyear[0]}" self.external_package_links['MDTF'] = mdtf_path #End if - + #Add all relevant paths to dictionary for specific case: self.__case_web_paths[case_name] = {'website_dir': website_dir, 'img_pages_dir': img_pages_dir, @@ -182,6 +182,27 @@ def __init__(self, config_file, debug=False): 'table_pages_dir': table_pages_dir, 'css_files_dir': css_files_dir} #End if + + # Gather ADF run env info + active_env = self.get_active_conda_environment() + if not active_env: + active_env = "--" + + run_info = '' + if self.debug_log: + log_name = self.debug_fname + run_info = f"{log_name}".replace("debug","run_info").replace(".log",".md") + self.run_info = run_info + self._write_run_info_to_log(config_file, active_env) + #Do nothing if user is not requesting a website to be generated: + if self.create_html and self.debug_log: + plot_path = Path(self.plot_location[0]) + + #Create directory path where the website will be built: + website_dir = plot_path / "website" + Path(website_dir).mkdir(parents=True, exist_ok=True) + run_info = f"{website_dir}/{run_info}" + self._write_run_info_to_web(run_info, config_file, active_env) ######### @@ -192,6 +213,91 @@ def create_html(self): return self.get_basic_info('create_html') ######### + def _write_run_info_to_web(self, run_info, config_file, active_env): + """ + If user requests webpage, then add run info to webpages table of contents + """ + four_space = "    " + two_space = "  " + font_22 = "style='font-size:22px;'" + font_18 = "style='font-size:18px;'" + font_16 = "style='font-size:16px;'" + + with open(run_info, "w") as f: + + # Gather config yaml file info + f.write("

") + f.write(f"Config file used
") + f.write(f"{two_space}{config_file}

") + + f.write(f" Config file options
") + for key,val in self.config_dict().items(): + if isinstance(val,dict): + f.write(f"{two_space}{key}:
") + for key2,val2 in val.items(): + f.write(f"{four_space}{key2}: {val2}
") + elif isinstance(val,list): + f.write(f"{two_space}{key}:
") + for val2 in val: + f.write(f"{four_space}{val2}
") + else: + f.write(f"{two_space}{key}: {val}
") + + # Gather Conda environment + f.write("\n") + f.write(f"
Conda env used
") + f.write(f"{two_space}{active_env}") + + # Gather Git info + git_info = self.get_git_info() + f.write("\n") + f.write(f"

Git Info
") + for key,val in git_info.items(): + f.write(f"{two_space}{key}: {val}
") + f.write("

") + + def _write_run_info_to_log(self, config_file, active_env): + + log_msg = "adf_info: ADF run info:" + + # Gather config yaml file info + config_file_msg = "\nConfig file used:" + msg = f"{config_file_msg}\n{'-' * (len(config_file_msg))}\n {config_file}\n" + log_msg += msg + + + config_msg = "\n Config file options:" + msg = f"{config_msg}\n {'- ' * (int(len(config_msg)/2)-1)}" + log_msg += msg + + for key,val in self.config_dict().items(): + if isinstance(val,dict): + log_msg += f"\n {key}:" + for key2,val2 in val.items(): + log_msg += f"\n {key2}: {val2}" + elif isinstance(val,list): + log_msg += f"\n {key}:" + for val2 in val: + log_msg += f"\n {val2}" + else: + log_msg += f"\n {key}: {val}" + + # Gather Conda environment + conda_msg = "\nConda env used:" + msg = f"{conda_msg}\n{'-' * (len(conda_msg)-1)}\n" + log_msg += f"\n {msg}" + log_msg += f" {active_env}" + + # Gather Git info + git_info = self.get_git_info() + git_msg = "\nGit Info:" + msg = f"{git_msg}\n{'-' * (len(git_msg)-1)}\n" + log_msg += f"\n {msg}" + + for key,val in git_info.items(): + log_msg += f" {key}: {val}\n" + + self.debug_log(log_msg) def add_website_data(self, web_data, web_name, case_name, category = None, @@ -347,20 +453,24 @@ def create_website(self): self.end_diag_fail(emsg) #End except - #Make a jinja function that mimics python list object. This will allow for - # the use of 'list' in the html rendering. + #Make jinja functions that mimics python functions. + # - This will allow for the use of 'list' in the html rendering. def jinja_list(seas_list): return list(seas_list) + # - This will allow for the use of 'enumerate' in the html rendering. + def jinja_enumerate(arg): + return enumerate(arg) #Notify user that script has started: print("\n Generating Diagnostics webpages...") + case_sites = OrderedDict() + #If there is more than one non-baseline case, then create new website directory: if self.num_cases > 1: main_site_path = Path(self.get_basic_info('cam_diag_plot_loc', required=True)) main_site_path = main_site_path / "main_website" main_site_path.mkdir(exist_ok=True) - case_sites = OrderedDict() else: main_site_path = "" #Set main_site_path to blank value #End if @@ -604,6 +714,15 @@ def jinja_list(seas_list): "table_html": table_html, "multi_head": False} rend_kwarg_dict["plot_types"] = multi_plot_type_html + + if web_data.name == case1: + rend_kwarg_dict["disp_table_name"] = case1 + rend_kwarg_dict["disp_table_html"] = table_html + + if web_data.name == "Case Comparison": + rend_kwarg_dict["disp_table_name"] = "Case Comparison" + rend_kwarg_dict["disp_table_html"] = table_html + table_tmpl = jinenv.get_template('template_table.html') table_rndr = table_tmpl.render(rend_kwarg_dict) @@ -614,18 +733,16 @@ def jinja_list(seas_list): #Check if the mean plot type page exists for this case (or for multi-case): mean_table_file = table_pages_dir / "mean_tables.html" - if not mean_table_file.exists(): - #Construct mean_table.html - mean_table_tmpl = jinenv.get_template('template_mean_tables.html') - #Reuse the rend_kwarg_dict, but ignore certain keys - #since all others are the same - new_dict = {k: rend_kwarg_dict[k] for k in rend_kwarg_dict.keys() - {'table_name', 'table_html'}} - mean_table_rndr = mean_table_tmpl.render(new_dict) - #Write mean diagnostic tables HTML file: - with open(mean_table_file, 'w', encoding='utf-8') as ofil: - ofil.write(mean_table_rndr) - #End with - #End if + + #Construct mean_table.html + mean_table_tmpl = jinenv.get_template('template_mean_tables.html') + #Reuse the rend_kwarg_dict + mean_table_rndr = mean_table_tmpl.render(rend_kwarg_dict) + #Write mean diagnostic tables HTML file: + with open(mean_table_file, 'w', encoding='utf-8') as ofil: + ofil.write(mean_table_rndr) + #End with + #End if (tables) else: #Plot image @@ -645,18 +762,19 @@ def jinja_list(seas_list): #End if rend_kwarg_dict = {"title": main_title, - "var_title": web_data.name, - "season_title": web_data.season, - "case_name": web_data.case, - "case_yrs": case_yrs, - "base_name": data_name, - "baseline_yrs": baseline_yrs, - "plottype_title": web_data.plot_type, - "imgs": img_data, - "mydata": mean_html_info[web_data.plot_type], - "plot_types": plot_types, - "seasons": seasons, - "non_seasons": non_seasons[web_data.plot_type]} + "var_title": web_data.name, + "season_title": web_data.season, + "case_name": web_data.case, + "case_yrs": case_yrs, + "base_name": data_name, + "baseline_yrs": baseline_yrs, + "plottype_title": web_data.plot_type, + "imgs": img_data, + "mydata": mean_html_info[web_data.plot_type], + "plot_types": plot_types, + "seasons": seasons, + "non_seasons": non_seasons[web_data.plot_type]} + tmpl = jinenv.get_template('template.html') #Set template rndr = tmpl.render(rend_kwarg_dict) #The template rendered @@ -671,10 +789,9 @@ def jinja_list(seas_list): #Construct individual plot type mean_diag html files mean_tmpl = jinenv.get_template('template_mean_diag.html') - #Remove keys from main dictionary for this html page - templ_rend_kwarg_dict = {k: rend_kwarg_dict[k] for k in rend_kwarg_dict.keys() - {'imgs', 'var_title', 'season_title'}} - templ_rend_kwarg_dict["list"] = jinja_list - mean_rndr = mean_tmpl.render(templ_rend_kwarg_dict) + rend_kwarg_dict["enumerate"] = jinja_enumerate + rend_kwarg_dict["list"] = jinja_list + mean_rndr = mean_tmpl.render(rend_kwarg_dict) #Write mean diagnostic plots HTML file: with open(mean_ptype_file,'w', encoding='utf-8') as ofil: @@ -686,6 +803,30 @@ def jinja_list(seas_list): index_html_file = \ self.__case_web_paths[web_data.case]['website_dir'] / "index.html" + # Create run info web page + run_info_md_file = \ + self.__case_web_paths[web_data.case]['website_dir'] / self.run_info + + # Read the markdown file + with open(run_info_md_file, "r", encoding="utf-8") as mdfile: + md_text = mdfile.read() + + # Convert markdown to HTML + run_info_html = markdown.markdown(md_text) + index_title = "CAM Diagnostics" + run_info_html_file = self.__case_web_paths[web_data.case]['website_dir'] / "run_info.html" + run_info_tmpl = jinenv.get_template('template_run_info.html') + run_info_rndr = run_info_tmpl.render(title=index_title, + case_name=web_data.case, + base_name=data_name, + case_yrs=case_yrs, + baseline_yrs=baseline_yrs, + plot_types=plot_types, + run_info=run_info_html) + + with open(run_info_html_file, "w", encoding="utf-8") as htmlfile: + htmlfile.write(run_info_rndr) + #Re-et plot types list: if web_data.case == 'multi-case': plot_types = multi_plot_type_html @@ -696,7 +837,7 @@ def jinja_list(seas_list): #List of ADF default plot types avail_plot_types = res["default_ptypes"] - + #Check if current plot type is in ADF default. #If not, add it so the index.html file can include it for ptype in plot_types.keys(): @@ -706,9 +847,9 @@ def jinja_list(seas_list): # External packages that can be run through ADF avail_external_packages = {'MDTF':'mdtf_html_path', 'CVDP':'cvdp_html_path'} - + #Construct index.html - index_title = "AMP Diagnostics Prototype" + index_title = "CAM Diagnostics" index_tmpl = jinenv.get_template('template_index.html') index_rndr = index_tmpl.render(title=index_title, case_name=web_data.case, @@ -718,7 +859,8 @@ def jinja_list(seas_list): plot_types=plot_types, avail_plot_types=avail_plot_types, avail_external_packages=avail_external_packages, - external_package_links=self.external_package_links) + external_package_links=self.external_package_links, + run_info=run_info_html) #Write Mean diagnostics index HTML file: with open(index_html_file, 'w', encoding='utf-8') as ofil: From 5c36377928fee993d8ad537fab21e2c82b5b8f8b Mon Sep 17 00:00:00 2001 From: justin-richling Date: Fri, 30 Aug 2024 16:21:18 -0600 Subject: [PATCH 06/15] Pull derivation code to external script Pull these checks and calculations out of `adf-diag.py` to clean that file up. --- lib/adf_derive.py | 274 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 274 insertions(+) create mode 100644 lib/adf_derive.py diff --git a/lib/adf_derive.py b/lib/adf_derive.py new file mode 100644 index 000000000..279cceef6 --- /dev/null +++ b/lib/adf_derive.py @@ -0,0 +1,274 @@ +import glob +import os +from pathlib import Path +import xarray as xr + + +def check_derive(self, res, var, case_name, diag_var_list, constit_dict, hist_file_ds, hist0): + """ + For incoming variable, look for list of constituents if available + - as a list in variable defaults file + + If the variable does not have the argument `derivable_from` or `derivable_from_cam_chem`, + then it will be assumed not to be a derivable variable, just missing from history file + + If the variable does have the argument `derivable_from` or `derivable_from_cam_chem`, + first check cam-chem, then regular cam. + + Arguments + --------- + self: AdfDiag + - ADF object + res: dict + - variable defaults dictionary from yaml file + var: str + - derived variable name + case_name: str + - model case + diag_var_list: list + - list of variables for diagnostics + NOTE: this is user supplied, but gets modified here for constituents + constit_dict: dict + - dictionary of derived variables as keys and list of constituents as values + hist_file_ds: xarray.DataSet + - history file dataset for checking if constituents are available + hist0: str + - history number for case + + Returns + ------- + constit_list: list + - list of declared consituents from the variable defaults yaml file + - empty list: + * if missing `derived_from` argument(s) + * if `derived_from` argument(s) exist but not declared + + diag_var_list: list + - updated list (if applicable) of ADF variables for time series creation + """ + + # Aerosol Calcs + #-------------- + + # Always make sure PMID is made if aerosols are desired in config file + # Since there's no requirement for `aerosol_zonal_list`, allow it to be absent: + azl = res.get("aerosol_zonal_list", []) + if azl: + if "PMID" not in diag_var_list: + if any(item in azl for item in diag_var_list): + diag_var_list += ["PMID"] + if "T" not in diag_var_list: + if any(item in azl for item in diag_var_list): + diag_var_list += ["T"] + # End aerosol calcs + + # Set error messages for printing/debugging + # Derived variable, but missing constituent list + constit_errmsg = f"create time series for {case_name}:" + constit_errmsg += f"\n Can't create time series for {var}. \n\tThis variable" + constit_errmsg += " is flagged for derivation, but is missing list of constiuents." + constit_errmsg += "\n\tPlease add list of constituents to 'derivable_from' " + constit_errmsg += f"for {var} in variable defaults yaml file." + + # No time series creation + exit_msg = f"WARNING: {var} is not in the file {hist0} and can't be derived." + exit_msg += "\n\t ** No time series will be generated. **\n" + + # Initialiaze list for constituents + # NOTE: This is if the variable is NOT derivable but needs + # an empty list as a check later + constit_list = [] + + try_cam_constits = True + # Try finding info from variable defaults yaml file + try: + vres = res[var] + except KeyError: + print(exit_msg) + self.debug_log(exit_msg) + return diag_var_list, constit_dict + + # Check first if variable is potentially part of a CAM-CHEM run + if "derivable_from_cam_chem" in vres: + constit_list = vres["derivable_from_cam_chem"] + + if constit_list: + if all(item in hist_file_ds.data_vars for item in constit_list): + # Set check to look for regular CAM constituents in variable defaults + try_cam_constits = False + msg = f"derive time series for {case_name}:" + msg += "\n\tLooks like this a CAM-CHEM run, " + msg += f"checking constituents for '{var}'" + self.debug_log(msg) + else: + self.debug_log(constit_errmsg) + # End if + # End if + + # If not CAM-CHEM, check regular CAM runs + if try_cam_constits: + if "derivable_from" in vres: + constit_list = vres["derivable_from"] + else: + # Missing variable or missing derivable_from argument + der_from_msg = f"derive time series for {case_name}:" + der_from_msg += f"\n Can't create time series for {var}.\n\tEither " + der_from_msg += "the variable is missing from CAM output or it is a " + der_from_msg += "derived quantity and is missing the 'derivable_from' " + der_from_msg += "config argument.\n\tPlease add variable to CAM run " + der_from_msg += "or set appropriate argument in variable " + der_from_msg += "defaults yaml file." + self.debug_log(der_from_msg) + # End if + # End if + + # Log if this variable can be derived but is missing list of constituents + if isinstance(constit_list, list) and not constit_list: + self.debug_log(constit_errmsg) + + # Check if any constituents were found + if constit_list: + # Add variable and constituent list to dictionary + constit_dict[var] = constit_list + + # Add constituents to ADF diag variable list for time series generation + for constit in constit_list: + if constit not in diag_var_list: + diag_var_list.append(constit) + else: + print(exit_msg) + self.debug_log(exit_msg) + # End if + + return diag_var_list, constit_dict + +######## + +def derive_variable(self, case_name, var, res=None, ts_dir=None, + constit_list=None, overwrite=None): + """ + Derive variables acccording to steps given here. Since derivations will depend on the + variable, each variable to derive will need its own set of steps below. + + Caution: this method assumes that there will be one time series file per variable + + If the file for the derived variable exists, the kwarg `overwrite` determines + whether to overwrite the file (true) or exit with a warning message. + + """ + + # Loop through derived variables + print(f"\t - deriving time series for {var}") + + # Grab all required time series files for derived variable + constit_files = [] + for constit in constit_list: + # Check if the constituent file is present, if so add it to list + if glob.glob(os.path.join(ts_dir, f"*.{constit}.*.nc")): + constit_files.append(glob.glob(os.path.join(ts_dir, f"*.{constit}.*"))[0]) + # End for + + # Check if all the necessary constituent files were found + if len(constit_files) != len(constit_list): + ermsg = f"\t ** Not all constituent files present; {var} cannot be calculated. **\n" + ermsg += f"\t Please remove {var} from 'diag_var_list' or find the " + ermsg += "relevant CAM files.\n" + print(ermsg) + if constit_files: + # Add what's missing to debug log + dmsg = f"derived time series for {case_name}:" + dmsg += f"\n\tneeded constituents for derivation of " + dmsg += f"{var}:\n\t\t- {constit_list}\n\tfound constituent file(s) in " + dmsg += f"{Path(constit_files[0]).parent}:\n\t\t" + dmsg += f"- {[Path(f).parts[-1] for f in constit_files if Path(f).is_file()]}" + self.debug_log(dmsg) + else: + dmsg = f"derived time series for {case_name}:" + dmsg += f"\n\tneeded constituents for derivation of " + dmsg += f"{var}:\n\t\t- {constit_list}\n" + dmsg += f"\tNo constituent(s) found in history files" + self.debug_log(dmsg) + # End if + else: + # Open a new dataset with all the constituent files/variables + ds = self.data.load_dataset(constit_files) + if not ds: + dmsg = f"derived time series for {case_name}:" + dmsg += f"\n\tNo files to open." + self.debug_log(dmsg) + return + + # Grab attributes from first constituent file to be used in derived variable + attrs = ds[constit_list[0]].attrs + + # create new file name for derived variable + derived_file = constit_files[0].replace(constit_list[0], var) + + # Check if clobber is true for file + if Path(derived_file).is_file(): + if overwrite: + Path(derived_file).unlink() + else: + msg = f"[{__name__}] Warning: '{var}' file was found " + msg += "and overwrite is False. Will use existing file." + print(msg) + + #NOTE: this will need to be changed when derived equations are more complex! - JR + if var == "RESTOM": + der_val = ds["FSNT"]-ds["FLNT"] + else: + # Loop through all constituents and sum + der_val = 0 + for v in constit_list: + der_val += ds[v] + + # Set derived variable name and add to dataset + der_val.name = var + ds[var] = der_val + + # Aerosol Calculations + #---------------------------------------------------------------------------------- + # These will be multiplied by rho (density of dry air) + + # User-defined defaults might not include aerosol zonal list + azl = res.get("aerosol_zonal_list", []) + if var in azl: + # Check if PMID is in file: + ds_pmid = self.data.load_dataset(glob.glob(os.path.join(ts_dir, "*.PMID.*"))[0]) + if not ds_pmid: + errmsg = "Missing necessary files for dry air density (rho) " + errmsg += "calculation.\nPlease make sure 'PMID' is in the CAM " + errmsg += "run for aerosol calculations" + print(errmsg) + dmsg = "derived time series:" + dmsg += f"\n\t missing 'PMID' in {ts_dir}, can't make time series for {var} " + self.debug_log(dmsg) + + # Check if T is in file: + ds_t = self.data.load_dataset(glob.glob(os.path.join(ts_dir, "*.T.*"))[0]) + if not ds_t: + errmsg = "Missing necessary files for dry air density (rho) " + errmsg += "calculation.\nPlease make sure 'T' is in the CAM " + errmsg += "run for aerosol calculations" + print(errmsg) + + dmsg = "derived time series:" + dmsg += f"\n\t missing 'T' in {ts_dir}, can't make time series for {var} " + self.debug_log(dmsg) + + # Multiply aerosol by dry air density (rho): (P/Rd*T) + ds[var] = ds[var]*(ds_pmid["PMID"]/(res["Rgas"]*ds_t["T"])) + + # Sulfate conversion factor + if var == "SO4": + ds[var] = ds[var]*(96./115.) + #---------------------------------------------------------------------------------- + + # Drop all constituents from final saved dataset + # These are not necessary because they have their own time series files + ds_final = ds.drop_vars(constit_list) + # Copy attributes from constituent file to derived variable + ds_final[var].attrs = attrs + ds_final.to_netcdf(derived_file, unlimited_dims='time', mode='w') + # End if (all the necessary constituent files exist) +######## \ No newline at end of file From 96d0cc42349e6f04189d3fc506d31cb585f91769 Mon Sep 17 00:00:00 2001 From: justin-richling Date: Fri, 30 Aug 2024 16:22:06 -0600 Subject: [PATCH 07/15] Update adf_diag.py Now call the `adf_derive.py` script for derived variables --- lib/adf_diag.py | 349 +++++------------------------------------------- 1 file changed, 37 insertions(+), 312 deletions(-) diff --git a/lib/adf_diag.py b/lib/adf_diag.py index ddf452ecc..82373ecec 100644 --- a/lib/adf_diag.py +++ b/lib/adf_diag.py @@ -97,6 +97,7 @@ # Finally, import needed ADF modules: from adf_web import AdfWeb from adf_dataset import AdfData +from adf_derive import check_derive, derive_variable ################# # Helper functions @@ -340,14 +341,11 @@ def call_ncrcat(cmd): It is declared as global to avoid AttributeError. """ return subprocess.run(cmd, shell=False) - # End def - # Check if baseline time-series files are being created: if baseline: - # Use baseline settings, while converting them all - # to lists: + # Use baseline settings, while converting them all to lists: case_names = [self.get_baseline_info("cam_case_name", required=True)] cam_ts_done = [self.get_baseline_info("cam_ts_done")] cam_hist_locs = [self.get_baseline_info("cam_hist_loc")] @@ -380,6 +378,10 @@ def call_ncrcat(cmd): # Loop over cases: for case_idx, case_name in enumerate(case_names): + # Notify user that script has started: + print(f"\n Generating CAM time series files for '{case_name}'...") + print(f"\n Writing time series files to {ts_dir[case_idx]}") + # Check if particular case should be processed: if cam_ts_done[case_idx]: emsg = "\tNOTE: Configuration file indicates time series files have been pre-computed" @@ -405,7 +407,7 @@ def call_ncrcat(cmd): # Extract time series file location ts_dir = ts_dirs[case_idx] - # Check if history files actually exqist. If not then kill script: + # Check if history files actually exist. If not then kill script: hist_str_case = hist_str_list[case_idx] for hist_str in hist_str_case: @@ -518,138 +520,35 @@ def call_ncrcat(cmd): time_string_finish = last_file_split[-1].replace("-", "") time_string = "-".join([time_string_start, time_string_finish]) - # Loop over CAM history variables: + # Intitialize list for NCO commands list_of_commands = [] - list_of_ncattend_commands = [] - list_of_hist_commands = [] - vars_to_derive = [] - # create copy of var list that can be modified for derivable variables + + # Create copy of var list that can be modified for derivable variables diag_var_list = self.diag_var_list - # Aerosol Calcs - # -------------- - # Always make sure PMID is made if aerosols are desired in config file - # Since there's no requirement for `aerosol_zonal_list` to be included, - # allow it to be absent: - - azl = res.get("aerosol_zonal_list", []) - if "PMID" not in diag_var_list: - if any(item in azl for item in diag_var_list): - diag_var_list += ["PMID"] - if "T" not in diag_var_list: - if any(item in azl for item in diag_var_list): - diag_var_list += ["T"] - # End aerosol calcs - - # Initialize dictionary for derived variable with needed list of constituents + # Intitialize dictionary for derived variables, if appplicable constit_dict = {} + # Loop over CAM history variables: for var in diag_var_list: # Notify user of new time series file: print(f"\t - time series for {var}") - # Create full path name, file name template: - # $cam_case_name.$hist_str.$variable.YYYYMM-YYYYMM.nc - ts_outfil_str = ( - ts_dir - + os.sep - + ".".join([case_name, hist_str, var, time_string, "nc"]) - ) - - # Check if clobber is true for file - if Path(ts_outfil_str).is_file(): - if overwrite_ts[case_idx]: - Path(ts_outfil_str).unlink() - else: - #msg = f"[{__name__}] Warning: '{var}' file was found " - msg = f"\t INFO: '{var}' file was found " - msg += "and overwrite is False. Will use existing file." - print(msg) - continue - - # Set error messages for printing/debugging - # Derived variable, but missing constituent list - constit_errmsg = f"create time series for {case_name}:" - constit_errmsg += f"\n Can't create time series for {var}. \n\tThis variable" - constit_errmsg += " is flagged for derivation, but is missing list of constiuents." - constit_errmsg += "\n\tPlease add list of constituents to 'derivable_from' " - constit_errmsg += f"for {var} in variable defaults yaml file." + # Initialize list for constituents if variable is derivable + constit_list = [] - # Check if current variable is a derived quantity + # Check if current variable is not in history file(s) if var not in hist_file_var_list: - vres = res.get(var, {}) - - # Initialiaze list for constituents - # NOTE: This is if the variable is NOT derivable but needs - # an empty list as a check later - constit_list = [] - - # intialize boolean to check if variable is derivable - derive = False # assume it can't be derived and update if it can - - # intialize boolean for regular CAM variable constituents - try_cam_constits = True - - # Check first if variable is potentially part of a CAM-CHEM run - if "derivable_from_cam_chem" in vres: - constit_list = vres["derivable_from_cam_chem"] - if constit_list: - if all(item in hist_file_ds.data_vars for item in constit_list): - # Set check to look for regular CAM constituents - try_cam_constits = False - derive = True - msg = f"create time series for {case_name}:" - msg += "\n\tLooks like this a CAM-CHEM run, " - msg += f"checking constituents for '{var}'" - self.debug_log(msg) - else: - self.debug_log(constit_errmsg) - # End if - # End if - - # If not CAM-CHEM, check regular CAM runs - if try_cam_constits: - if "derivable_from" in vres: - derive = True - constit_list = vres["derivable_from"] - else: - # Missing variable or missing derivable_from argument - der_from_msg = f"create time series for {case_name}:" - der_from_msg += f"\n Can't create time series for {var}.\n\tEither " - der_from_msg += "the variable is missing from CAM output or it is a " - der_from_msg += "derived quantity and is missing the 'derivable_from' " - der_from_msg += "config argument.\n\tPlease add variable to CAM run " - der_from_msg += "or set appropriate argument in variable " - der_from_msg += "defaults yaml file." - self.debug_log(der_from_msg) - # End if - - # Check if this variable can be derived - if (derive) and (constit_list): - for constit in constit_list: - if constit not in diag_var_list: - diag_var_list.append(constit) - # Add variable to list to derive - vars_to_derive.append(var) - # Add constituent list to variable key in dictionary - constit_dict[var] = constit_list - continue - # Log if variable can be derived but is missing list of constituents - elif (derive) and (not constit_list): - self.debug_log(constit_errmsg) - continue - # Lastly, raise error if the variable is not a derived quanitity - # but is also not in the history file(s) - else: - msg = f"\t WARNING: {var} is not in the history file for case '{case_name}' " - msg += "nor can it be derived. Script will continue to next variable." - print(msg) - logmsg = f"create time series for {case_name}:" - logmsg += f"\n {var} is not in the file {hist_files[0]} " - self.debug_log(logmsg) - continue - # End if - # End if (var in var_diag_list) + # Let user know variable is not in history file + print(f"\t {var} not in history file, will try to derive if possible") + + # Check if variable can be derived + diag_var_list, constit_dict = check_derive(self, res, var, case_name, + diag_var_list, constit_dict, + hist_file_ds, hist_files[0]) + # Move to the next variable + continue + # End if # Check if variable has a "lev" dimension according to first file: has_lev = bool("lev" in hist_file_ds[var].dims or "ilev" in hist_file_ds[var].dims) @@ -675,12 +574,12 @@ def call_ncrcat(cmd): if has_lev and vert_coord_type: # For now, only add these variables if using CAM: if "cam" in hist_str: - # PS might be in a different history file. If so, continue w/o error. + # PS may be in a different history file. If so, continue without error. ncrcat_var_list = ncrcat_var_list + ",hyam,hybm,hyai,hybi" if "PS" in hist_file_var_list: ncrcat_var_list = ncrcat_var_list + ",PS" - print(f"\t INFO: Adding PS to file for '{var}'") + print("\t Adding PS to file") else: wmsg = "WARNING: PS not found in history file." wmsg += " It might be needed at some point." @@ -696,7 +595,7 @@ def call_ncrcat(cmd): # PMID file to each one of those targets separately. -JN if "PMID" in hist_file_var_list: ncrcat_var_list = ncrcat_var_list + ",PMID" - print("Adding PMID to file") + print("\t Adding PMID to file") else: wmsg = "WARNING: PMID not found in history file." wmsg += " It might be needed at some point." @@ -738,13 +637,6 @@ def call_ncrcat(cmd): # ----------------------------------------------------- # generate time series files list_of_commands.append(cmd) - # Add global attributes: user, original hist file loc(s) and all filenames - list_of_ncattend_commands.append(cmd_ncatted) - # Remove the `history` attr that gets tacked on (for clean up) - # NOTE: this may not be best practice, but it the history attr repeats - # the files attrs so the global attrs become obtrusive... - list_of_hist_commands.append(cmd_remove_history) - # End variable loop # Now run the "ncrcat" subprocesses in parallel: @@ -766,6 +658,12 @@ def call_ncrcat(cmd): constit_dict=constit_dict, ts_dir=ts_dir ) # End with + + # Finally, run through the derived variables if applicable + if constit_dict: + for der_var, constit_list in constit_dict.items(): + derive_variable(self, case_name, der_var, res, + ts_dir[case_idx], constit_list) # End for hist_str # End cases loop @@ -1147,137 +1045,6 @@ def setup_run_cvdp(self): ######### - def derive_variables(self, res=None, hist_str=None, vars_to_derive=None, ts_dir=None, - constit_dict=None, overwrite=None): - """ - Derive variables acccording to steps given here. Since derivations will depend on the - variable, each variable to derive will need its own set of steps below. - - Caution: this method assumes that there will be one time series file per variable - - If the file for the derived variable exists, the kwarg `overwrite` determines - whether to overwrite the file (true) or exit with a warning message. - - """ - - # Loop through derived variables - for var in vars_to_derive: - print(f"\t - deriving time series for {var}") - - # Grab list of constituents for this variable - constit_list = constit_dict[var] - - # Grab all required time series files for derived variable - constit_files = [] - for constit in constit_list: - # Check if the constituent file is present, if so add it to list - if hist_str: - const_glob_str = f"*{hist_str}*.{constit}.*.nc" - else: - const_glob_str = f"*.{constit}.*.nc" - # end if - if glob.glob(os.path.join(ts_dir, const_glob_str)): - constit_files.append(glob.glob(os.path.join(ts_dir, const_glob_str ))[0]) - - # Check if all the necessary constituent files were found - if len(constit_files) != len(constit_list): - ermsg = f"\t WARNING: Not all constituent files present; {var} cannot be calculated." - ermsg += f" Please remove {var} from 'diag_var_list' or find the " - ermsg += "relevant CAM files.\n" - print(ermsg) - if constit_files: - # Add what's missing to debug log - dmsg = "create time series:" - dmsg += "\n\tneeded constituents for derivation of " - dmsg += f"{var}:\n\t\t- {constit_list}\n\tfound constituent file(s) in " - dmsg += f"{Path(constit_files[0]).parent}:\n\t\t" - dmsg += f"- {[Path(f).parts[-1] for f in constit_files if Path(f).is_file()]}" - self.debug_log(dmsg) - else: - dmsg = "create time series:" - dmsg += "\n\tneeded constituents for derivation of " - dmsg += f"{var}:\n\t\t- {constit_list}\n" - dmsg += "\tNo constituent(s) found in history files" - self.debug_log(dmsg) - - else: - # Open a new dataset with all the constituent files/variables - ds = xr.open_mfdataset(constit_files).compute() - - # Grab attributes from first constituent file to be used in derived variable - attrs = ds[constit_list[0]].attrs - - # create new file name for derived variable - derived_file = constit_files[0].replace(constit_list[0], var) - - # Check if clobber is true for file - if Path(derived_file).is_file(): - if overwrite: - Path(derived_file).unlink() - else: - msg = f"\t INFO: '{var}' file was found " - msg += "and overwrite is False. Will use existing file." - print(msg) - continue - - # NOTE: this will need to be changed when derived equations are more complex! - JR - if var == "RESTOM": - der_val = ds["FSNT"]-ds["FLNT"] - else: - # Loop through all constituents and sum - der_val = 0 - for v in constit_list: - der_val += ds[v] - - # Set derived variable name and add to dataset - der_val.name = var - ds[var] = der_val - - # Aerosol Calculations - # ---------------------------------------------------------------------------------- - # These will be multiplied by rho (density of dry air) - ds_pmid_done = False - ds_t_done = False - - # User-defined defaults might not include aerosol zonal list - azl = res.get("aerosol_zonal_list", []) - if var in azl: - # Only calculate once for all aerosol vars - if not ds_pmid_done: - ds_pmid = _load_dataset(glob.glob(os.path.join(ts_dir, "*.PMID.*"))[0]) - ds_pmid_done = True - if not ds_pmid: - errmsg = "\t WARNING: Missing necessary files for dry air density" - errmsg += " (rho) calculation.\n" - errmsg += "\t Please make sure 'PMID' is in the CAM run" - errmsg += " for aerosol calculations" - print(errmsg) - continue - if not ds_t_done: - ds_t = _load_dataset(glob.glob(os.path.join(ts_dir, "*.T.*"))[0]) - ds_t_done = True - if not ds_t: - errmsg = "\t WARNING: Missing necessary files for dry air density" - errmsg += " (rho) calculation.\n" - errmsg += "\t Please make sure 'T' is in the CAM run" - errmsg += " for aerosol calculations" - print(errmsg) - continue - - # Multiply aerosol by dry air density (rho): (P/Rd*T) - ds[var] = ds[var]*(ds_pmid["PMID"]/(res["Rgas"]*ds_t["T"])) - - # Sulfate conversion factor - if var == "SO4": - ds[var] = ds[var]*(96./115.) - # ---------------------------------------------------------------------------------- - - # Drop all constituents from final saved dataset - # These are not necessary because they have their own time series files - ds_final = ds.drop_vars(constit_list) - # Copy attributes from constituent file to derived variable - ds_final[var].attrs = attrs - ds_final.to_netcdf(derived_file, unlimited_dims='time', mode='w') ######### MDTF functions ######### def setup_run_mdtf(self): @@ -1393,9 +1160,7 @@ def move_tsfiles_for_mdtf(self, verbose): # Going to need a dict to translate. # Use cesm_freq_strings = freq_string_options.keys # and then freq = freq_string_option(freq_string_found) - freq_string_cesm = ["month", "day", "hour_6", "hour_3", "hour_1"] #keys - freq_string_options = ["month", "day", "6hr", "3hr", "1hr"] #values - freq_string_dict = dict(zip(freq_string_cesm,freq_string_options)) #make dict + freq_string_options = ["month", "day", "6hr", "3hr", "1hr"] hist_str_list = self.get_cam_info("hist_str") case_names = self.get_cam_info("cam_case_name", required=True) @@ -1451,7 +1216,7 @@ def move_tsfiles_for_mdtf(self, verbose): continue found_strings = [ - word for word in freq_string_cesm if word in dataset_freq + word for word in freq_string_options if word in dataset_freq ] if len(found_strings) == 1: if verbose > 2: @@ -1466,7 +1231,7 @@ def move_tsfiles_for_mdtf(self, verbose): else: if verbose > 0: print( - f"WARNING: None of the frequency options {freq_string_cesm} are present in the time_period_freq attribute {dataset_freq}" + f"WARNING: None of the frequency options {freq_string_options} are present in the time_period_freq attribute {dataset_freq}" ) print(f"Skipping {adf_file}") freq = "frequency_missing" @@ -1502,43 +1267,3 @@ def move_tsfiles_for_mdtf(self, verbose): # end for hist_str # end for var # end for case - - -######## - -# Helper Function(s) - - -def _load_dataset(fils): - """ - This method exists to get an xarray Dataset from input file information that - can be passed into the plotting methods. - - Parameters - ---------- - fils : list - strings or paths to input file(s) - - Returns - ------- - xr.Dataset - - Notes - ----- - When just one entry is provided, use `open_dataset`, otherwise `open_mfdatset` - """ - - import adf_utils as utils - import warnings # use to warn user about missing files - warnings.formatwarning = utils.my_formatwarning - - if len(fils) == 0: - warnings.warn("\t WARNING: Input file list is empty.") - return None - if len(fils) > 1: - return xr.open_mfdataset(fils, combine='by_coords') - else: - return xr.open_dataset(fils[0]) - #End if -# End def -######## From 49871a26c87dfb1bda263999ffd2e484a45d7491 Mon Sep 17 00:00:00 2001 From: justin-richling Date: Fri, 30 Aug 2024 16:28:04 -0600 Subject: [PATCH 08/15] Clean up print statements --- lib/adf_derive.py | 2 +- lib/adf_diag.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/adf_derive.py b/lib/adf_derive.py index 279cceef6..39bf83230 100644 --- a/lib/adf_derive.py +++ b/lib/adf_derive.py @@ -72,7 +72,7 @@ def check_derive(self, res, var, case_name, diag_var_list, constit_dict, hist_fi # No time series creation exit_msg = f"WARNING: {var} is not in the file {hist0} and can't be derived." - exit_msg += "\n\t ** No time series will be generated. **\n" + exit_msg += "\t ** No time series will be generated. **" # Initialiaze list for constituents # NOTE: This is if the variable is NOT derivable but needs diff --git a/lib/adf_diag.py b/lib/adf_diag.py index 82373ecec..232ef599f 100644 --- a/lib/adf_diag.py +++ b/lib/adf_diag.py @@ -539,7 +539,7 @@ def call_ncrcat(cmd): # Check if current variable is not in history file(s) if var not in hist_file_var_list: - # Let user know variable is not in history file + # Let user know variable is not print(f"\t {var} not in history file, will try to derive if possible") # Check if variable can be derived From bc15d4c9d59341588bd6e71a68bb9a896e381212 Mon Sep 17 00:00:00 2001 From: justin-richling Date: Tue, 8 Oct 2024 12:02:48 -0600 Subject: [PATCH 09/15] Pull newest changed from main --- lib/adf_web.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/lib/adf_web.py b/lib/adf_web.py index b5d8e77ed..f0bf6ba43 100644 --- a/lib/adf_web.py +++ b/lib/adf_web.py @@ -157,7 +157,11 @@ def __init__(self, config_file, debug=False): mdtf_path += f"_{syear[0]}_{eyear[0]}" self.external_package_links['MDTF'] = mdtf_path #End if +<<<<<<< HEAD +======= + +>>>>>>> d8eec242 (Pull newest changed from main) #Add all relevant paths to dictionary for specific case: self.__case_web_paths[case_name] = {'website_dir': website_dir, 'img_pages_dir': img_pages_dir, @@ -847,7 +851,11 @@ def jinja_enumerate(arg): # External packages that can be run through ADF avail_external_packages = {'MDTF':'mdtf_html_path', 'CVDP':'cvdp_html_path'} +<<<<<<< HEAD +======= + +>>>>>>> d8eec242 (Pull newest changed from main) #Construct index.html index_title = "CAM Diagnostics" index_tmpl = jinenv.get_template('template_index.html') @@ -859,8 +867,12 @@ def jinja_enumerate(arg): plot_types=plot_types, avail_plot_types=avail_plot_types, avail_external_packages=avail_external_packages, +<<<<<<< HEAD external_package_links=self.external_package_links, run_info=run_info_html) +======= + external_package_links=self.external_package_links) +>>>>>>> d8eec242 (Pull newest changed from main) #Write Mean diagnostics index HTML file: with open(index_html_file, 'w', encoding='utf-8') as ofil: From fafd70d27f1f733f67e466083b044d54c8d478a8 Mon Sep 17 00:00:00 2001 From: Justin Richling Date: Thu, 23 Oct 2025 15:52:40 -0600 Subject: [PATCH 10/15] Bring up current ADF code --- lib/adf_diag.py | 20 +++++++++++++++----- lib/adf_web.py | 12 ------------ 2 files changed, 15 insertions(+), 17 deletions(-) diff --git a/lib/adf_diag.py b/lib/adf_diag.py index 232ef599f..7731e20e9 100644 --- a/lib/adf_diag.py +++ b/lib/adf_diag.py @@ -522,6 +522,8 @@ def call_ncrcat(cmd): # Intitialize list for NCO commands list_of_commands = [] + list_of_ncattend_commands = [] + list_of_hist_commands = [] # Create copy of var list that can be modified for derivable variables diag_var_list = self.diag_var_list @@ -574,12 +576,12 @@ def call_ncrcat(cmd): if has_lev and vert_coord_type: # For now, only add these variables if using CAM: if "cam" in hist_str: - # PS may be in a different history file. If so, continue without error. + # PS might be in a different history file. If so, continue w/o error. ncrcat_var_list = ncrcat_var_list + ",hyam,hybm,hyai,hybi" if "PS" in hist_file_var_list: ncrcat_var_list = ncrcat_var_list + ",PS" - print("\t Adding PS to file") + print(f"\t INFO: Adding PS to file for '{var}'") else: wmsg = "WARNING: PS not found in history file." wmsg += " It might be needed at some point." @@ -637,6 +639,12 @@ def call_ncrcat(cmd): # ----------------------------------------------------- # generate time series files list_of_commands.append(cmd) + # Add global attributes: user, original hist file loc(s) and all filenames + list_of_ncattend_commands.append(cmd_ncatted) + # Remove the `history` attr that gets tacked on (for clean up) + # NOTE: this may not be best practice, but it the history attr repeats + # the files attrs so the global attrs become obtrusive... + list_of_hist_commands.append(cmd_remove_history) # End variable loop # Now run the "ncrcat" subprocesses in parallel: @@ -1160,7 +1168,9 @@ def move_tsfiles_for_mdtf(self, verbose): # Going to need a dict to translate. # Use cesm_freq_strings = freq_string_options.keys # and then freq = freq_string_option(freq_string_found) - freq_string_options = ["month", "day", "6hr", "3hr", "1hr"] + freq_string_cesm = ["month", "day", "hour_6", "hour_3", "hour_1"] #keys + freq_string_options = ["month", "day", "6hr", "3hr", "1hr"] #values + freq_string_dict = dict(zip(freq_string_cesm,freq_string_options)) #make dict hist_str_list = self.get_cam_info("hist_str") case_names = self.get_cam_info("cam_case_name", required=True) @@ -1216,7 +1226,7 @@ def move_tsfiles_for_mdtf(self, verbose): continue found_strings = [ - word for word in freq_string_options if word in dataset_freq + word for word in freq_string_cesm if word in dataset_freq ] if len(found_strings) == 1: if verbose > 2: @@ -1231,7 +1241,7 @@ def move_tsfiles_for_mdtf(self, verbose): else: if verbose > 0: print( - f"WARNING: None of the frequency options {freq_string_options} are present in the time_period_freq attribute {dataset_freq}" + f"WARNING: None of the frequency options {freq_string_cesm} are present in the time_period_freq attribute {dataset_freq}" ) print(f"Skipping {adf_file}") freq = "frequency_missing" diff --git a/lib/adf_web.py b/lib/adf_web.py index f0bf6ba43..5ce9b5523 100644 --- a/lib/adf_web.py +++ b/lib/adf_web.py @@ -157,11 +157,7 @@ def __init__(self, config_file, debug=False): mdtf_path += f"_{syear[0]}_{eyear[0]}" self.external_package_links['MDTF'] = mdtf_path #End if -<<<<<<< HEAD - -======= ->>>>>>> d8eec242 (Pull newest changed from main) #Add all relevant paths to dictionary for specific case: self.__case_web_paths[case_name] = {'website_dir': website_dir, 'img_pages_dir': img_pages_dir, @@ -851,11 +847,7 @@ def jinja_enumerate(arg): # External packages that can be run through ADF avail_external_packages = {'MDTF':'mdtf_html_path', 'CVDP':'cvdp_html_path'} -<<<<<<< HEAD - -======= ->>>>>>> d8eec242 (Pull newest changed from main) #Construct index.html index_title = "CAM Diagnostics" index_tmpl = jinenv.get_template('template_index.html') @@ -867,12 +859,8 @@ def jinja_enumerate(arg): plot_types=plot_types, avail_plot_types=avail_plot_types, avail_external_packages=avail_external_packages, -<<<<<<< HEAD external_package_links=self.external_package_links, run_info=run_info_html) -======= - external_package_links=self.external_package_links) ->>>>>>> d8eec242 (Pull newest changed from main) #Write Mean diagnostics index HTML file: with open(index_html_file, 'w', encoding='utf-8') as ofil: From 553c70b5bbcfe64cbc6d830455855e3abf971a6d Mon Sep 17 00:00:00 2001 From: Justin Richling Date: Fri, 24 Oct 2025 10:39:45 -0600 Subject: [PATCH 11/15] Clean up linting errors --- lib/adf_diag.py | 58 +++++++++++++++++++++++++++++++++++++------------ 1 file changed, 44 insertions(+), 14 deletions(-) diff --git a/lib/adf_diag.py b/lib/adf_diag.py index 7731e20e9..5a48dfa03 100644 --- a/lib/adf_diag.py +++ b/lib/adf_diag.py @@ -384,8 +384,8 @@ def call_ncrcat(cmd): # Check if particular case should be processed: if cam_ts_done[case_idx]: - emsg = "\tNOTE: Configuration file indicates time series files have been pre-computed" - emsg += f" for case '{case_name}'. Will rely on those files directly." + emsg = "\tNOTE: Configuration file indicates time series files have been " + emsg += f"pre-computed for case '{case_name}'. Will rely on those files directly." print(emsg) continue # End if @@ -536,6 +536,25 @@ def call_ncrcat(cmd): # Notify user of new time series file: print(f"\t - time series for {var}") + # Create full path name, file name template: + # $cam_case_name.$hist_str.$variable.YYYYMM-YYYYMM.nc + ts_outfil_str = ( + ts_dir + + os.sep + + ".".join([case_name, hist_str, var, time_string, "nc"]) + ) + + # Check if clobber is true for file + if Path(ts_outfil_str).is_file(): + if overwrite_ts[case_idx]: + Path(ts_outfil_str).unlink() + else: + #msg = f"[{__name__}] Warning: '{var}' file was found " + msg = f"\t INFO: '{var}' file was found " + msg += "and overwrite is False. Will use existing file." + print(msg) + continue + # Initialize list for constituents if variable is derivable constit_list = [] @@ -615,7 +634,8 @@ def call_ncrcat(cmd): # Example ncatted command (you can modify it with the specific attribute changes you need) #cmd_ncatted = ["ncatted", "-O", "-a", f"adf_user,global,a,c,{self.user}", ts_outfil_str] - # Step 1: Convert Path objects to strings and concatenate the list of historical files into a single string + # Step 1: Convert Path objects to strings and concatenate the list of + # historical files into a single string hist_files_str = ', '.join(str(f.name) for f in hist_files) hist_locs_str = ', '.join(str(loc) for loc in cam_hist_locs) @@ -656,7 +676,8 @@ def call_ncrcat(cmd): with mp.Pool(processes=self.num_procs) as mpool: _ = mpool.map(call_ncrcat, list_of_ncattend_commands) - # Run ncatted command to remove history attribute after the global attributes are set + # Run ncatted command to remove history attribute + # after the global attributes are set with mp.Pool(processes=self.num_procs) as mpool: _ = mpool.map(call_ncrcat, list_of_hist_commands) @@ -666,7 +687,7 @@ def call_ncrcat(cmd): constit_dict=constit_dict, ts_dir=ts_dir ) # End with - + # Finally, run through the derived variables if applicable if constit_dict: for der_var, constit_list in constit_dict.items(): @@ -1074,8 +1095,8 @@ def setup_run_mdtf(self): # # Create a dict with all the case info needed for MDTF case_list - # Note that model and convention are hard-coded to CESM because that's all we expect here - # This could be changed by inputing them into ADF with other MDTF-specific variables + # Note that model and convention are hard-coded to CESM because that's all we expect here + # - This could be changed by inputing them into ADF with other MDTF-specific variables # case_list_keys = ["CASENAME", "FIRSTYR", "LASTYR", "model", "convention"] @@ -1131,7 +1152,9 @@ def setup_run_mdtf(self): # # Submit the MDTF script in background mode, send output to mdtf.out file # - mdtf_log = "mdtf.out" # maybe set this to cam_diag_plot_loc: /glade/scratch/${user}/ADF/plots + mdtf_log = "mdtf.out" + # maybe set this to cam_diag_plot_loc: /glade/scratch/${user}/ADF/plots + mdtf_exe = mdtf_codebase + os.sep + "mdtf -f " + mdtf_input_settings_filename if copy_files_only: print("\t ...Copy files only. NOT Running MDTF") @@ -1200,17 +1223,21 @@ def move_tsfiles_for_mdtf(self, verbose): elif len(adf_file_list) > 1: if verbose > 0: print( - f"WARNING: found multiple timeseries files {adf_file_list}. Continuing with best guess; suggest cleaning up multiple dates in ts dir" + f"""WARNING: found multiple timeseries files {adf_file_list}. + Continuing with best guess; suggest cleaning up multiple + dates in ts dir""" ) else: if verbose > 1: print( - f"WARNING: No files matching {case_name}.{hist_str}.{var} found in {adf_file_str}. Skipping" + f"""WARNING: No files matching {case_name}.{hist_str}.{var} + found in {adf_file_str}. Skipping""" ) continue # skip this case/hist_str/var file adf_file = adf_file_list[0] - # If freq is not set, it means we just started this hist_str. So check the first ADF file to find it + # If freq is not set, it means we just started this hist_str. + # So check the first ADF file to find it hist_file_ds = xr.open_dataset( adf_file, decode_cf=False, decode_times=False ) @@ -1221,7 +1248,8 @@ def move_tsfiles_for_mdtf(self, verbose): else: if verbose > 0: print( - f"WARNING: Necessary 'time_period_freq' attribute missing from {adf_file}. Skipping file." + f"""WARNING: Necessary 'time_period_freq' attribute missing + from {adf_file}. Skipping file.""" ) continue @@ -1236,12 +1264,14 @@ def move_tsfiles_for_mdtf(self, verbose): elif len(found_strings) > 1: if verbose > 0: print( - f"WARNING: Found dataset_freq {dataset_freq} matches multiple string possibilities:{', '.join(found_strings)}" + f"""WARNING: Found dataset_freq {dataset_freq} matches multiple + string possibilities:{', '.join(found_strings)}""" ) else: if verbose > 0: print( - f"WARNING: None of the frequency options {freq_string_cesm} are present in the time_period_freq attribute {dataset_freq}" + f"""WARNING: None of the frequency options {freq_string_cesm} are + present in the time_period_freq attribute {dataset_freq}""" ) print(f"Skipping {adf_file}") freq = "frequency_missing" From 31b814d8f9e4433832f76a3e2e083227d3166c78 Mon Sep 17 00:00:00 2001 From: Justin Richling Date: Fri, 24 Oct 2025 10:53:01 -0600 Subject: [PATCH 12/15] Remove unused code and clean up comments --- lib/adf_diag.py | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/lib/adf_diag.py b/lib/adf_diag.py index 5a48dfa03..d14b6fdce 100644 --- a/lib/adf_diag.py +++ b/lib/adf_diag.py @@ -632,14 +632,12 @@ def call_ncrcat(cmd): + ["-o", ts_outfil_str] ) - # Example ncatted command (you can modify it with the specific attribute changes you need) - #cmd_ncatted = ["ncatted", "-O", "-a", f"adf_user,global,a,c,{self.user}", ts_outfil_str] - # Step 1: Convert Path objects to strings and concatenate the list of + # Convert Path objects to strings and concatenate the list of # historical files into a single string hist_files_str = ', '.join(str(f.name) for f in hist_files) hist_locs_str = ', '.join(str(loc) for loc in cam_hist_locs) - # Step 2: Create the ncatted command to add both global attributes + # Create the ncatted command to add both global attributes cmd_ncatted = [ "ncatted", "-O", "-a", "adf_user,global,a,c," + f"{self.user}", @@ -648,7 +646,7 @@ def call_ncrcat(cmd): ts_outfil_str ] - # Step 3: Create the ncatted command to remove the history attribute + # Create the ncatted command to remove the history attribute cmd_remove_history = [ "ncatted", "-O", "-h", "-a", "history,global,d,,", @@ -670,7 +668,6 @@ def call_ncrcat(cmd): # Now run the "ncrcat" subprocesses in parallel: with mp.Pool(processes=self.num_procs) as mpool: _ = mpool.map(call_ncrcat, list_of_commands) - # End with # Run ncatted commands after ncrcat is done with mp.Pool(processes=self.num_procs) as mpool: @@ -681,13 +678,6 @@ def call_ncrcat(cmd): with mp.Pool(processes=self.num_procs) as mpool: _ = mpool.map(call_ncrcat, list_of_hist_commands) - if vars_to_derive: - self.derive_variables( - res=res, hist_str=hist_str, vars_to_derive=vars_to_derive, - constit_dict=constit_dict, ts_dir=ts_dir - ) - # End with - # Finally, run through the derived variables if applicable if constit_dict: for der_var, constit_list in constit_dict.items(): From 7da7b29a38b33bf0b26c207fa1ced36757ab4bb4 Mon Sep 17 00:00:00 2001 From: justin-richling Date: Tue, 4 Nov 2025 13:54:08 -0700 Subject: [PATCH 13/15] Bring in recent changes to `adf_diag.py` --- lib/adf_diag.py | 71 ++++++++++++++++++++++++++++++++++++------------- 1 file changed, 53 insertions(+), 18 deletions(-) diff --git a/lib/adf_diag.py b/lib/adf_diag.py index 64b3fa9d9..d14b6fdce 100644 --- a/lib/adf_diag.py +++ b/lib/adf_diag.py @@ -384,8 +384,8 @@ def call_ncrcat(cmd): # Check if particular case should be processed: if cam_ts_done[case_idx]: - emsg = "\tNOTE: Configuration file indicates time series files have been pre-computed" - emsg += f" for case '{case_name}'. Will rely on those files directly." + emsg = "\tNOTE: Configuration file indicates time series files have been " + emsg += f"pre-computed for case '{case_name}'. Will rely on those files directly." print(emsg) continue # End if @@ -536,6 +536,25 @@ def call_ncrcat(cmd): # Notify user of new time series file: print(f"\t - time series for {var}") + # Create full path name, file name template: + # $cam_case_name.$hist_str.$variable.YYYYMM-YYYYMM.nc + ts_outfil_str = ( + ts_dir + + os.sep + + ".".join([case_name, hist_str, var, time_string, "nc"]) + ) + + # Check if clobber is true for file + if Path(ts_outfil_str).is_file(): + if overwrite_ts[case_idx]: + Path(ts_outfil_str).unlink() + else: + #msg = f"[{__name__}] Warning: '{var}' file was found " + msg = f"\t INFO: '{var}' file was found " + msg += "and overwrite is False. Will use existing file." + print(msg) + continue + # Initialize list for constituents if variable is derivable constit_list = [] @@ -613,13 +632,12 @@ def call_ncrcat(cmd): + ["-o", ts_outfil_str] ) - # Example ncatted command (you can modify it with the specific attribute changes you need) - #cmd_ncatted = ["ncatted", "-O", "-a", f"adf_user,global,a,c,{self.user}", ts_outfil_str] - # Step 1: Convert Path objects to strings and concatenate the list of historical files into a single string + # Convert Path objects to strings and concatenate the list of + # historical files into a single string hist_files_str = ', '.join(str(f.name) for f in hist_files) hist_locs_str = ', '.join(str(loc) for loc in cam_hist_locs) - # Step 2: Create the ncatted command to add both global attributes + # Create the ncatted command to add both global attributes cmd_ncatted = [ "ncatted", "-O", "-a", "adf_user,global,a,c," + f"{self.user}", @@ -628,7 +646,7 @@ def call_ncrcat(cmd): ts_outfil_str ] - # Step 3: Create the ncatted command to remove the history attribute + # Create the ncatted command to remove the history attribute cmd_remove_history = [ "ncatted", "-O", "-h", "-a", "history,global,d,,", @@ -650,8 +668,16 @@ def call_ncrcat(cmd): # Now run the "ncrcat" subprocesses in parallel: with mp.Pool(processes=self.num_procs) as mpool: _ = mpool.map(call_ncrcat, list_of_commands) - # End with - + + # Run ncatted commands after ncrcat is done + with mp.Pool(processes=self.num_procs) as mpool: + _ = mpool.map(call_ncrcat, list_of_ncattend_commands) + + # Run ncatted command to remove history attribute + # after the global attributes are set + with mp.Pool(processes=self.num_procs) as mpool: + _ = mpool.map(call_ncrcat, list_of_hist_commands) + # Finally, run through the derived variables if applicable if constit_dict: for der_var, constit_list in constit_dict.items(): @@ -1059,8 +1085,8 @@ def setup_run_mdtf(self): # # Create a dict with all the case info needed for MDTF case_list - # Note that model and convention are hard-coded to CESM because that's all we expect here - # This could be changed by inputing them into ADF with other MDTF-specific variables + # Note that model and convention are hard-coded to CESM because that's all we expect here + # - This could be changed by inputing them into ADF with other MDTF-specific variables # case_list_keys = ["CASENAME", "FIRSTYR", "LASTYR", "model", "convention"] @@ -1116,7 +1142,9 @@ def setup_run_mdtf(self): # # Submit the MDTF script in background mode, send output to mdtf.out file # - mdtf_log = "mdtf.out" # maybe set this to cam_diag_plot_loc: /glade/scratch/${user}/ADF/plots + mdtf_log = "mdtf.out" + # maybe set this to cam_diag_plot_loc: /glade/scratch/${user}/ADF/plots + mdtf_exe = mdtf_codebase + os.sep + "mdtf -f " + mdtf_input_settings_filename if copy_files_only: print("\t ...Copy files only. NOT Running MDTF") @@ -1185,17 +1213,21 @@ def move_tsfiles_for_mdtf(self, verbose): elif len(adf_file_list) > 1: if verbose > 0: print( - f"WARNING: found multiple timeseries files {adf_file_list}. Continuing with best guess; suggest cleaning up multiple dates in ts dir" + f"""WARNING: found multiple timeseries files {adf_file_list}. + Continuing with best guess; suggest cleaning up multiple + dates in ts dir""" ) else: if verbose > 1: print( - f"WARNING: No files matching {case_name}.{hist_str}.{var} found in {adf_file_str}. Skipping" + f"""WARNING: No files matching {case_name}.{hist_str}.{var} + found in {adf_file_str}. Skipping""" ) continue # skip this case/hist_str/var file adf_file = adf_file_list[0] - # If freq is not set, it means we just started this hist_str. So check the first ADF file to find it + # If freq is not set, it means we just started this hist_str. + # So check the first ADF file to find it hist_file_ds = xr.open_dataset( adf_file, decode_cf=False, decode_times=False ) @@ -1206,7 +1238,8 @@ def move_tsfiles_for_mdtf(self, verbose): else: if verbose > 0: print( - f"WARNING: Necessary 'time_period_freq' attribute missing from {adf_file}. Skipping file." + f"""WARNING: Necessary 'time_period_freq' attribute missing + from {adf_file}. Skipping file.""" ) continue @@ -1221,12 +1254,14 @@ def move_tsfiles_for_mdtf(self, verbose): elif len(found_strings) > 1: if verbose > 0: print( - f"WARNING: Found dataset_freq {dataset_freq} matches multiple string possibilities:{', '.join(found_strings)}" + f"""WARNING: Found dataset_freq {dataset_freq} matches multiple + string possibilities:{', '.join(found_strings)}""" ) else: if verbose > 0: print( - f"WARNING: None of the frequency options {freq_string_cesm} are present in the time_period_freq attribute {dataset_freq}" + f"""WARNING: None of the frequency options {freq_string_cesm} are + present in the time_period_freq attribute {dataset_freq}""" ) print(f"Skipping {adf_file}") freq = "frequency_missing" From f15d3369df267fe5fcef722fae74857e673085c1 Mon Sep 17 00:00:00 2001 From: justin-richling Date: Tue, 4 Nov 2025 13:56:33 -0700 Subject: [PATCH 14/15] Remove constituent list This is now being generated in `adf_derive.py` --- lib/adf_diag.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/lib/adf_diag.py b/lib/adf_diag.py index d14b6fdce..988296c44 100644 --- a/lib/adf_diag.py +++ b/lib/adf_diag.py @@ -555,9 +555,6 @@ def call_ncrcat(cmd): print(msg) continue - # Initialize list for constituents if variable is derivable - constit_list = [] - # Check if current variable is not in history file(s) if var not in hist_file_var_list: # Let user know variable is not From 26dd8dd4b02b8f849e9f5526c3aed824ca709bf7 Mon Sep 17 00:00:00 2001 From: justin-richling Date: Tue, 4 Nov 2025 13:58:49 -0700 Subject: [PATCH 15/15] Remove whitespaces --- lib/adf_web.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/lib/adf_web.py b/lib/adf_web.py index 5ce9b5523..6704b3517 100644 --- a/lib/adf_web.py +++ b/lib/adf_web.py @@ -157,7 +157,7 @@ def __init__(self, config_file, debug=False): mdtf_path += f"_{syear[0]}_{eyear[0]}" self.external_package_links['MDTF'] = mdtf_path #End if - + #Add all relevant paths to dictionary for specific case: self.__case_web_paths[case_name] = {'website_dir': website_dir, 'img_pages_dir': img_pages_dir, @@ -847,7 +847,7 @@ def jinja_enumerate(arg): # External packages that can be run through ADF avail_external_packages = {'MDTF':'mdtf_html_path', 'CVDP':'cvdp_html_path'} - + #Construct index.html index_title = "CAM Diagnostics" index_tmpl = jinenv.get_template('template_index.html') @@ -866,7 +866,6 @@ def jinja_enumerate(arg): with open(index_html_file, 'w', encoding='utf-8') as ofil: ofil.write(index_rndr) #End with - #End for (web data loop) #If this is a multi-case instance, then copy website to "main" directory: