From 54032e08d402ffb99c3bf237b0643c75f67b66d8 Mon Sep 17 00:00:00 2001
From: justin-richling <richling@ucar.edu>
Date: Fri, 30 Aug 2024 16:21:18 -0600
Subject: [PATCH 01/15] Pull derivation code to external script

Pull these checks and calculations out of `adf-diag.py` to clean that file up.
---
 lib/adf_derive.py | 274 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 274 insertions(+)
 create mode 100644 lib/adf_derive.py

diff --git a/lib/adf_derive.py b/lib/adf_derive.py
new file mode 100644
index 000000000..279cceef6
--- /dev/null
+++ b/lib/adf_derive.py
@@ -0,0 +1,274 @@
+import glob
+import os
+from pathlib import Path
+import xarray as xr
+
+
+def check_derive(self, res, var, case_name, diag_var_list, constit_dict, hist_file_ds, hist0):
+    """
+    For incoming variable, look for list of constituents if available
+     - as a list in variable defaults file
+
+     If the variable does not have the argument `derivable_from` or `derivable_from_cam_chem`,
+     then it will be assumed not to be a derivable variable, just missing from history file
+
+     If the variable does have the argument `derivable_from` or `derivable_from_cam_chem`,
+     first check cam-chem, then regular cam.
+
+    Arguments
+    ---------
+        self: AdfDiag
+            - ADF object
+        res: dict
+            - variable defaults dictionary from yaml file
+        var: str
+            - derived variable name
+        case_name: str
+            - model case
+        diag_var_list: list
+            - list of variables for diagnostics
+            NOTE: this is user supplied, but gets modified here for constituents
+        constit_dict: dict
+            - dictionary of derived variables as keys and list of constituents as values
+        hist_file_ds: xarray.DataSet
+            - history file dataset for checking if constituents are available
+        hist0: str
+            - history number for case
+    
+    Returns
+    -------
+        constit_list: list
+           - list of declared consituents from the variable defaults yaml file
+           - empty list:
+             * if missing `derived_from` argument(s)
+             * if `derived_from` argument(s) exist but not declared
+        
+        diag_var_list: list
+           - updated list (if applicable) of ADF variables for time series creation
+    """
+
+    # Aerosol Calcs
+    #--------------
+
+    # Always make sure PMID is made if aerosols are desired in config file
+    # Since there's no requirement for `aerosol_zonal_list`, allow it to be absent:
+    azl = res.get("aerosol_zonal_list", [])
+    if azl:
+        if "PMID" not in diag_var_list:
+            if any(item in azl for item in diag_var_list):
+                diag_var_list += ["PMID"]
+        if "T" not in diag_var_list:
+            if any(item in azl for item in diag_var_list):
+                diag_var_list += ["T"]
+    # End aerosol calcs
+
+    # Set error messages for printing/debugging
+    # Derived variable, but missing constituent list
+    constit_errmsg = f"create time series for {case_name}:"
+    constit_errmsg += f"\n Can't create time series for {var}. \n\tThis variable"
+    constit_errmsg += " is flagged for derivation, but is missing list of constiuents."
+    constit_errmsg += "\n\tPlease add list of constituents to 'derivable_from' "
+    constit_errmsg += f"for {var} in variable defaults yaml file."
+
+    # No time series creation
+    exit_msg = f"WARNING: {var} is not in the file {hist0} and can't be derived."
+    exit_msg += "\n\t  ** No time series will be generated. **\n"
+
+    # Initialiaze list for constituents
+    # NOTE: This is if the variable is NOT derivable but needs
+    #       an empty list as a check later
+    constit_list = []
+
+    try_cam_constits = True
+    # Try finding info from variable defaults yaml file
+    try:
+        vres = res[var]
+    except KeyError:
+        print(exit_msg)
+        self.debug_log(exit_msg)
+        return diag_var_list, constit_dict
+
+    # Check first if variable is potentially part of a CAM-CHEM run
+    if "derivable_from_cam_chem" in vres:
+        constit_list = vres["derivable_from_cam_chem"]
+
+        if constit_list:
+            if all(item in hist_file_ds.data_vars for item in constit_list):
+                # Set check to look for regular CAM constituents in variable defaults
+                try_cam_constits = False
+                msg = f"derive time series for {case_name}:"
+                msg += "\n\tLooks like this a CAM-CHEM run, "
+                msg += f"checking constituents for '{var}'"
+                self.debug_log(msg)
+        else:
+            self.debug_log(constit_errmsg)
+        # End if
+    # End if
+    
+    # If not CAM-CHEM, check regular CAM runs
+    if try_cam_constits:
+        if "derivable_from" in vres:
+            constit_list = vres["derivable_from"]
+        else:
+            # Missing variable or missing derivable_from argument
+            der_from_msg = f"derive time series for {case_name}:"
+            der_from_msg += f"\n Can't create time series for {var}.\n\tEither "
+            der_from_msg += "the variable is missing from CAM output or it is a "
+            der_from_msg += "derived quantity and is missing the 'derivable_from' "
+            der_from_msg += "config argument.\n\tPlease add variable to CAM run "
+            der_from_msg += "or set appropriate argument in variable "
+            der_from_msg += "defaults yaml file."
+            self.debug_log(der_from_msg)
+        # End if
+    # End if
+
+    # Log if this variable can be derived but is missing list of constituents
+    if isinstance(constit_list, list) and not constit_list:
+        self.debug_log(constit_errmsg)
+
+    # Check if any constituents were found
+    if constit_list:
+        # Add variable and constituent list to dictionary
+        constit_dict[var] = constit_list
+
+        # Add constituents to ADF diag variable list for time series generation
+        for constit in constit_list:
+            if constit not in diag_var_list:
+                diag_var_list.append(constit)
+    else:
+        print(exit_msg)
+        self.debug_log(exit_msg)
+    # End if
+
+    return diag_var_list, constit_dict
+
+########
+
+def derive_variable(self, case_name, var, res=None, ts_dir=None,
+                         constit_list=None, overwrite=None):
+    """
+    Derive variables acccording to steps given here.  Since derivations will depend on the
+    variable, each variable to derive will need its own set of steps below.
+
+    Caution: this method assumes that there will be one time series file per variable
+
+    If the file for the derived variable exists, the kwarg `overwrite` determines
+    whether to overwrite the file (true) or exit with a warning message.
+
+    """
+
+    # Loop through derived variables
+    print(f"\t - deriving time series for {var}")
+
+    # Grab all required time series files for derived variable
+    constit_files = []
+    for constit in constit_list:
+        # Check if the constituent file is present, if so add it to list
+        if glob.glob(os.path.join(ts_dir, f"*.{constit}.*.nc")):
+            constit_files.append(glob.glob(os.path.join(ts_dir, f"*.{constit}.*"))[0])
+    # End for
+
+    # Check if all the necessary constituent files were found
+    if len(constit_files) != len(constit_list):
+        ermsg = f"\t   ** Not all constituent files present; {var} cannot be calculated. **\n"
+        ermsg += f"\t     Please remove {var} from 'diag_var_list' or find the "
+        ermsg += "relevant CAM files.\n"
+        print(ermsg)
+        if constit_files:
+            # Add what's missing to debug log
+            dmsg = f"derived time series for {case_name}:"
+            dmsg += f"\n\tneeded constituents for derivation of "
+            dmsg += f"{var}:\n\t\t- {constit_list}\n\tfound constituent file(s) in "
+            dmsg += f"{Path(constit_files[0]).parent}:\n\t\t"
+            dmsg += f"- {[Path(f).parts[-1] for f in constit_files if Path(f).is_file()]}"
+            self.debug_log(dmsg)
+        else:
+            dmsg = f"derived time series for {case_name}:"
+            dmsg += f"\n\tneeded constituents for derivation of "
+            dmsg += f"{var}:\n\t\t- {constit_list}\n"
+            dmsg += f"\tNo constituent(s) found in history files"
+            self.debug_log(dmsg)
+        # End if
+    else:
+        # Open a new dataset with all the constituent files/variables
+        ds = self.data.load_dataset(constit_files)
+        if not ds:
+            dmsg = f"derived time series for {case_name}:"
+            dmsg += f"\n\tNo files to open."
+            self.debug_log(dmsg)
+            return
+
+        # Grab attributes from first constituent file to be used in derived variable
+        attrs = ds[constit_list[0]].attrs
+
+        # create new file name for derived variable
+        derived_file = constit_files[0].replace(constit_list[0], var)
+
+        # Check if clobber is true for file
+        if Path(derived_file).is_file():
+            if overwrite:
+                Path(derived_file).unlink()
+            else:
+                msg = f"[{__name__}] Warning: '{var}' file was found "
+                msg += "and overwrite is False. Will use existing file."
+                print(msg)
+
+        #NOTE: this will need to be changed when derived equations are more complex! - JR
+        if var == "RESTOM":
+            der_val = ds["FSNT"]-ds["FLNT"]
+        else:
+            # Loop through all constituents and sum
+            der_val = 0
+            for v in constit_list:
+                der_val += ds[v]
+
+        # Set derived variable name and add to dataset
+        der_val.name = var
+        ds[var] = der_val
+
+        # Aerosol Calculations
+        #----------------------------------------------------------------------------------
+        # These will be multiplied by rho (density of dry air)
+
+        # User-defined defaults might not include aerosol zonal list
+        azl = res.get("aerosol_zonal_list", [])
+        if var in azl:
+            # Check if PMID is in file:
+            ds_pmid = self.data.load_dataset(glob.glob(os.path.join(ts_dir, "*.PMID.*"))[0])
+            if not ds_pmid:
+                errmsg = "Missing necessary files for dry air density (rho) "
+                errmsg += "calculation.\nPlease make sure 'PMID' is in the CAM "
+                errmsg += "run for aerosol calculations"
+                print(errmsg)
+                dmsg = "derived time series:"
+                dmsg += f"\n\t missing 'PMID' in {ts_dir}, can't make time series for {var} "
+                self.debug_log(dmsg)
+
+            # Check if T is in file:
+            ds_t = self.data.load_dataset(glob.glob(os.path.join(ts_dir, "*.T.*"))[0])
+            if not ds_t:
+                errmsg = "Missing necessary files for dry air density (rho) "
+                errmsg += "calculation.\nPlease make sure 'T' is in the CAM "
+                errmsg += "run for aerosol calculations"
+                print(errmsg)
+
+                dmsg = "derived time series:"
+                dmsg += f"\n\t missing 'T' in {ts_dir}, can't make time series for {var} "
+                self.debug_log(dmsg)
+
+            # Multiply aerosol by dry air density (rho): (P/Rd*T)
+            ds[var] = ds[var]*(ds_pmid["PMID"]/(res["Rgas"]*ds_t["T"]))
+
+            # Sulfate conversion factor
+            if var == "SO4":
+                ds[var] = ds[var]*(96./115.)
+        #----------------------------------------------------------------------------------
+
+        # Drop all constituents from final saved dataset
+        # These are not necessary because they have their own time series files
+        ds_final = ds.drop_vars(constit_list)
+        # Copy attributes from constituent file to derived variable
+        ds_final[var].attrs = attrs
+        ds_final.to_netcdf(derived_file, unlimited_dims='time', mode='w')
+    # End if (all the necessary constituent files exist)
+########
\ No newline at end of file

From 273107bfe02e08063eb85982faf8a55a15e67195 Mon Sep 17 00:00:00 2001
From: justin-richling <richling@ucar.edu>
Date: Fri, 30 Aug 2024 16:22:06 -0600
Subject: [PATCH 02/15] Update adf_diag.py

Now call the `adf_derive.py` script for derived variables
---
 lib/adf_diag.py | 344 ++++++------------------------------------------
 1 file changed, 42 insertions(+), 302 deletions(-)

diff --git a/lib/adf_diag.py b/lib/adf_diag.py
index bf87cb498..0497f5305 100644
--- a/lib/adf_diag.py
+++ b/lib/adf_diag.py
@@ -97,6 +97,7 @@
 # Finally, import needed ADF modules:
 from adf_web import AdfWeb
 from adf_dataset import AdfData
+from adf_derive import check_derive, derive_variable
 
 #################
 # Helper functions
@@ -336,14 +337,11 @@ def call_ncrcat(cmd):
             It is declared as global to avoid AttributeError.
             """
             return subprocess.run(cmd, shell=False)
-
         # End def
 
-
         # Check if baseline time-series files are being created:
         if baseline:
-            # Use baseline settings, while converting them all
-            # to lists:
+            # Use baseline settings, while converting them all to lists:
             case_names = [self.get_baseline_info("cam_case_name", required=True)]
             cam_ts_done = [self.get_baseline_info("cam_ts_done")]
             cam_hist_locs = [self.get_baseline_info("cam_hist_loc")]
@@ -365,10 +363,6 @@ def call_ncrcat(cmd):
             end_years = self.climo_yrs["eyears"]
             case_type_string="case"
             hist_str_list = self.hist_string["test_hist_str"]
-
-        # Notify user that script has started:
-        print(f"\n  Writing time series files to {ts_dir}")
-
         # End if
 
         # Read hist_str (component.hist_num) from the yaml file, or set to default
@@ -380,10 +374,14 @@ def call_ncrcat(cmd):
 
         # Loop over cases:
         for case_idx, case_name in enumerate(case_names):
+            # Notify user that script has started:
+            print(f"\n  Generating CAM time series files for '{case_name}'...")
+            print(f"\n    Writing time series files to {ts_dir[case_idx]}")
+
             # Check if particular case should be processed:
             if cam_ts_done[case_idx]:
-                emsg = " Configuration file indicates time series files have been pre-computed"
-                emsg += f" for case '{case_name}'.  Will rely on those files directly."
+                emsg = "\tConfiguration file indicates time series files have been pre-computed."
+                emsg += f" Will rely on those files directly."
                 print(emsg)
                 continue
             # End if
@@ -402,7 +400,7 @@ def call_ncrcat(cmd):
                 self.end_diag_fail(emsg)
             # End if
 
-            # Check if history files actually exqist. If not then kill script:
+            # Check if history files actually exist. If not then kill script:
             hist_str_case = hist_str_list[case_idx]
             for hist_str in hist_str_case:
 
@@ -512,114 +510,35 @@ def call_ncrcat(cmd):
                     time_string_finish = last_file_split[-1].replace("-", "")
                 time_string = "-".join([time_string_start, time_string_finish])
 
-                # Loop over CAM history variables:
+                # Intitialize list for NCO commands
                 list_of_commands = []
-                vars_to_derive = []
-                # create copy of var list that can be modified for derivable variables
+
+                # Create copy of var list that can be modified for derivable variables
                 diag_var_list = self.diag_var_list
 
-                # Aerosol Calcs
-                # --------------
-                # Always make sure PMID is made if aerosols are desired in config file
-                # Since there's no requirement for `aerosol_zonal_list` to be included, allow it to be absent:
-
-                azl = res.get("aerosol_zonal_list", [])
-                if "PMID" not in diag_var_list:
-                    if any(item in azl for item in diag_var_list):
-                        diag_var_list += ["PMID"]
-                if "T" not in diag_var_list:
-                    if any(item in azl for item in diag_var_list):
-                        diag_var_list += ["T"]
-                # End aerosol calcs
-
-                # Initialize dictionary for derived variable with needed list of constituents
+                # Intitialize dictionary for derived variables, if appplicable
                 constit_dict = {}
 
+                # Loop over CAM history variables:
                 for var in diag_var_list:
                     # Notify user of new time series file:
                     print(f"\t - time series for {var}")
 
-                    # Set error messages for printing/debugging
-                    # Derived variable, but missing constituent list
-                    constit_errmsg = f"create time series for {case_name}:"
-                    constit_errmsg += f"\n Can't create time series for {var}. \n\tThis variable"
-                    constit_errmsg += " is flagged for derivation, but is missing list of constiuents."
-                    constit_errmsg += "\n\tPlease add list of constituents to 'derivable_from' "
-                    constit_errmsg += f"for {var} in variable defaults yaml file."
+                    # Initialize list for constituents if variable is derivable
+                    constit_list = []
 
-                    # Check if current variable is a derived quantity
+                    # Check if current variable is not in history file(s)
                     if var not in hist_file_var_list:
-                        vres = res.get(var, {})
-
-                        # Initialiaze list for constituents
-                        # NOTE: This is if the variable is NOT derivable but needs
-                        # an empty list as a check later
-                        constit_list = []
-
-                        # intialize boolean to check if variable is derivable
-                        derive = False # assume it can't be derived and update if it can
-
-                        # intialize boolean for regular CAM variable constituents
-                        try_cam_constits = True
-
-                        # Check first if variable is potentially part of a CAM-CHEM run
-                        if "derivable_from_cam_chem" in vres:
-                            constit_list = vres["derivable_from_cam_chem"]
-                            if constit_list:
-                                if all(item in hist_file_ds.data_vars for item in constit_list):
-                                    # Set check to look for regular CAM constituents in variable defaults
-                                    try_cam_constits = False
-                                    derive = True
-                                    msg = f"create time series for {case_name}:"
-                                    msg += "\n\tLooks like this a CAM-CHEM run, "
-                                    msg += f"checking constituents for '{var}'"
-                                    self.debug_log(msg)
-                            else:
-                                self.debug_log(constit_errmsg)
-                                # End if
-                            # End if
-
-                        # If not CAM-CHEM, check regular CAM runs
-                        if try_cam_constits:
-                            if "derivable_from" in vres:
-                                derive = True
-                                constit_list = vres["derivable_from"]
-                        else:
-                            # Missing variable or missing derivable_from argument
-                            der_from_msg = f"create time series for {case_name}:"
-                            der_from_msg += f"\n Can't create time series for {var}.\n\tEither "
-                            der_from_msg += "the variable is missing from CAM output or it is a "
-                            der_from_msg += "derived quantity and is missing the 'derivable_from' "
-                            der_from_msg += "config argument.\n\tPlease add variable to CAM run "
-                            der_from_msg += "or set appropriate argument in variable "
-                            der_from_msg += "defaults yaml file."
-                            self.debug_log(der_from_msg)
-                        # End if
-
-                        # Check if this variable can be derived
-                        if (derive) and (constit_list):
-                            for constit in constit_list:
-                                if constit not in diag_var_list:
-                                    diag_var_list.append(constit)
-                            # Add variable to list to derive
-                            vars_to_derive.append(var)
-                            # Add constituent list to variable key in dictionary
-                            constit_dict[var] = constit_list
-                            continue
-                            # Log if this variable can be derived but is missing list of constituents
-                        elif (derive) and (not constit_list):
-                            self.debug_log(constit_errmsg)
-                            continue
-                        # Lastly, raise error if the variable is not a derived quanitity but is also not
-                        # in the history file(s)
-                        else:
-                            msg = f"WARNING: {var} is not in the file {hist_files[0]} "
-                            msg += "nor can it be derived.\n"
-                            msg += "\t  ** No time series will be generated."
-                            print(msg)
-                            continue
-                        # End if
-                    # End if (var in var_diag_list)
+                        # Let user know variable is not in history file
+                        print(f"\t     {var} not in history file, will try to derive if possible")
+
+                        # Check if variable can be derived
+                        diag_var_list, constit_dict = check_derive(self, res, var, case_name,
+                                                                    diag_var_list, constit_dict,
+                                                                    hist_file_ds, hist_files[0])
+                        # Move to the next variable
+                        continue
+                    # End if
 
                     # Check if variable has a "lev" dimension according to first file:
                     has_lev = bool("lev" in hist_file_ds[var].dims)
@@ -654,12 +573,12 @@ def call_ncrcat(cmd):
                     if has_lev and vert_coord_type:
                         # For now, only add these variables if using CAM:
                         if "cam" in hist_str:
-                            # PS might be in a different history file. If so, continue without error.
+                            # PS may be in a different history file. If so, continue without error.
                             ncrcat_var_list = ncrcat_var_list + ",hyam,hybm,hyai,hybi"
 
                             if "PS" in hist_file_var_list:
                                 ncrcat_var_list = ncrcat_var_list + ",PS"
-                                print("Adding PS to file")
+                                print("\t     Adding PS to file")
                             else:
                                 wmsg = "WARNING: PS not found in history file."
                                 wmsg += " It might be needed at some point."
@@ -675,7 +594,7 @@ def call_ncrcat(cmd):
                                 # PMID file to each one of those targets separately. -JN
                                 if "PMID" in hist_file_var_list:
                                     ncrcat_var_list = ncrcat_var_list + ",PMID"
-                                    print("Adding PMID to file")
+                                    print("\t     Adding PMID to file")
                                 else:
                                     wmsg = "WARNING: PMID not found in history file."
                                     wmsg += " It might be needed at some point."
@@ -693,19 +612,18 @@ def call_ncrcat(cmd):
 
                     # Add to command list for use in multi-processing pool:
                     list_of_commands.append(cmd)
-
                 # End variable loop
 
                 # Now run the "ncrcat" subprocesses in parallel:
                 with mp.Pool(processes=self.num_procs) as mpool:
                     _ = mpool.map(call_ncrcat, list_of_commands)
-
-                    if vars_to_derive:
-                        self.derive_variables(
-                            res=res, hist_str=hist_str, vars_to_derive=vars_to_derive,
-                            constit_dict=constit_dict, ts_dir=ts_dir[case_idx]
-                        )
                 # End with
+                
+                # Finally, run through the derived variables if applicable
+                if constit_dict:
+                    for der_var, constit_list in constit_dict.items():
+                        derive_variable(self, case_name, der_var, res,
+                                        ts_dir[case_idx], constit_list)
             # End for hist_str
         # End cases loop
 
@@ -1085,137 +1003,6 @@ def setup_run_cvdp(self):
 
     #########
 
-    def derive_variables(self, res=None, hist_str=None, vars_to_derive=None, ts_dir=None,
-                         constit_dict=None, overwrite=None):
-        """
-        Derive variables acccording to steps given here.  Since derivations will depend on the
-        variable, each variable to derive will need its own set of steps below.
-
-        Caution: this method assumes that there will be one time series file per variable
-
-        If the file for the derived variable exists, the kwarg `overwrite` determines
-        whether to overwrite the file (true) or exit with a warning message.
-
-        """
-
-        # Loop through derived variables
-        for var in vars_to_derive:
-            print(f"\t - deriving time series for {var}")
-
-            # Grab list of constituents for this variable
-            constit_list = constit_dict[var]
-
-            # Grab all required time series files for derived variable
-            constit_files = []
-            for constit in constit_list:
-                # Check if the constituent file is present, if so add it to list
-                if hist_str:
-                    const_glob_str = f"*{hist_str}*.{constit}.*.nc"
-                else:
-                    const_glob_str = f"*.{constit}.*.nc"
-                # end if
-                if glob.glob(os.path.join(ts_dir, const_glob_str)):
-                    constit_files.append(glob.glob(os.path.join(ts_dir, const_glob_str ))[0])
-
-            # Check if all the necessary constituent files were found
-            if len(constit_files) != len(constit_list):
-                ermsg = f"\t   ** Not all constituent files present; {var} cannot be calculated."
-                ermsg += f" Please remove {var} from 'diag_var_list' or find the "
-                ermsg += "relevant CAM files.\n"
-                print(ermsg)
-                if constit_files:
-                    # Add what's missing to debug log
-                    dmsg = "create time series:"
-                    dmsg += "\n\tneeded constituents for derivation of "
-                    dmsg += f"{var}:\n\t\t- {constit_list}\n\tfound constituent file(s) in "
-                    dmsg += f"{Path(constit_files[0]).parent}:\n\t\t"
-                    dmsg += f"- {[Path(f).parts[-1] for f in constit_files if Path(f).is_file()]}"
-                    self.debug_log(dmsg)
-                else:
-                    dmsg = "create time series:"
-                    dmsg += "\n\tneeded constituents for derivation of "
-                    dmsg += f"{var}:\n\t\t- {constit_list}\n"
-                    dmsg += "\tNo constituent(s) found in history files"
-                    self.debug_log(dmsg)
-
-            else:
-                # Open a new dataset with all the constituent files/variables
-                ds = xr.open_mfdataset(constit_files).compute()
-
-                # Grab attributes from first constituent file to be used in derived variable
-                attrs = ds[constit_list[0]].attrs
-
-                # create new file name for derived variable
-                derived_file = constit_files[0].replace(constit_list[0], var)
-
-                # Check if clobber is true for file
-                if Path(derived_file).is_file():
-                    if overwrite:
-                        Path(derived_file).unlink()
-                    else:
-                        msg = f"[{__name__}] Warning: '{var}' file was found "
-                        msg += "and overwrite is False. Will use existing file."
-                        print(msg)
-                        continue
-
-                # NOTE: this will need to be changed when derived equations are more complex! - JR
-                if var == "RESTOM":
-                    der_val = ds["FSNT"]-ds["FLNT"]
-                else:
-                    # Loop through all constituents and sum
-                    der_val = 0
-                    for v in constit_list:
-                        der_val += ds[v]
-
-                # Set derived variable name and add to dataset
-                der_val.name = var
-                ds[var] = der_val
-
-                # Aerosol Calculations
-                # ----------------------------------------------------------------------------------
-                # These will be multiplied by rho (density of dry air)
-                ds_pmid_done = False
-                ds_t_done = False
-
-                # User-defined defaults might not include aerosol zonal list
-                azl = res.get("aerosol_zonal_list", [])
-                if var in azl:
-                    # Only calculate once for all aerosol vars
-                    if not ds_pmid_done:
-                        ds_pmid = _load_dataset(glob.glob(os.path.join(ts_dir, "*.PMID.*"))[0])
-                        ds_pmid_done = True
-                        if not ds_pmid:
-                            errmsg = "Missing necessary files for dry air density"
-                            errmsg += " (rho) calculation.\n"
-                            errmsg += "Please make sure 'PMID' is in the CAM run"
-                            errmsg += " for aerosol calculations"
-                            print(errmsg)
-                            continue
-                    if not ds_t_done:
-                        ds_t = _load_dataset(glob.glob(os.path.join(ts_dir, "*.T.*"))[0])
-                        ds_t_done = True
-                        if not ds_t:
-                            errmsg = "Missing necessary files for dry air density"
-                            errmsg += " (rho) calculation.\n"
-                            errmsg += "Please make sure 'T' is in the CAM run"
-                            errmsg += " for aerosol calculations"
-                            print(errmsg)
-                            continue
-
-                    # Multiply aerosol by dry air density (rho): (P/Rd*T)
-                    ds[var] = ds[var]*(ds_pmid["PMID"]/(res["Rgas"]*ds_t["T"]))
-
-                    # Sulfate conversion factor
-                    if var == "SO4":
-                        ds[var] = ds[var]*(96./115.)
-                # ----------------------------------------------------------------------------------
-
-                # Drop all constituents from final saved dataset
-                # These are not necessary because they have their own time series files
-                ds_final = ds.drop_vars(constit_list)
-                # Copy attributes from constituent file to derived variable
-                ds_final[var].attrs = attrs
-                ds_final.to_netcdf(derived_file, unlimited_dims='time', mode='w')
 
     ######### MDTF functions #########
     def setup_run_mdtf(self):
@@ -1329,11 +1116,8 @@ def move_tsfiles_for_mdtf(self, verbose):
         # Going to need a dict to translate.
         # Use cesm_freq_strings = freq_string_options.keys
         # and then freq = freq_string_option(freq_string_found)
-        freq_string_cesm    = ["month", "day", "hour_6", "hour_3", "hour_1"]  #keys
-        freq_string_options = ["month", "day", "6hr", "3hr", "1hr"]           #values
-        freq_string_dict    = dict(zip(freq_string_cesm,freq_string_options)) #make dict
+        freq_string_options = ["month", "day", "6hr", "3hr", "1hr"]
 
-        
         hist_str_list = self.get_cam_info("hist_str")
         case_names = self.get_cam_info("cam_case_name", required=True)
         var_list = self.diag_var_list
@@ -1388,7 +1172,7 @@ def move_tsfiles_for_mdtf(self, verbose):
                         continue
 
                     found_strings = [
-                        word for word in freq_string_cesm if word in dataset_freq
+                        word for word in freq_string_options if word in dataset_freq
                     ]
                     if len(found_strings) == 1:
                         if verbose > 2:
@@ -1403,14 +1187,13 @@ def move_tsfiles_for_mdtf(self, verbose):
                     else:
                         if verbose > 0:
                             print(
-                                f"WARNING: None of the frequency options {freq_string_cesm} are present in the time_period_freq attribute {dataset_freq}"
+                                f"WARNING: None of the frequency options {freq_string_options} are present in the time_period_freq attribute {dataset_freq}"
                             )
                             print(f"Skipping {adf_file}")
                             freq = "frequency_missing"
                         continue
-                    freq = freq_string_dict.get(found_strings[0])
-                    print(f"Translated {found_strings[0]} to {freq}")
-                    
+                    freq = found_strings[0]
+
                     #
                     # Destination file is MDTF directory and name structure
                     #
@@ -1438,47 +1221,4 @@ def move_tsfiles_for_mdtf(self, verbose):
                     shutil.copyfile(adf_file, mdtf_file)
                 # end for hist_str
             # end for var
-        # end for case
-
-
-########
-
-# Helper Function(s)
-
-
-def _load_dataset(fils):
-    """
-    This method exists to get an xarray Dataset from input file information that
-    can be passed into the plotting methods.
-
-    Parameters
-    ----------
-    fils : list
-        strings or paths to input file(s)
-
-    Returns
-    -------
-    xr.Dataset
-
-    Notes
-    -----
-    When just one entry is provided, use `open_dataset`, otherwise `open_mfdatset`
-    """
-    import warnings  # use to warn user about missing files.
-
-    #Format warning messages:
-    def my_formatwarning(msg, *args, **kwargs):
-        """Issue `msg` as warning."""
-        return str(msg) + '\n'
-    warnings.formatwarning = my_formatwarning
-
-    if len(fils) == 0:
-        warnings.warn("Input file list is empty.")
-        return None
-    if len(fils) > 1:
-        return xr.open_mfdataset(fils, combine='by_coords')
-    else:
-        return xr.open_dataset(fils[0])
-    #End if
-# End def
-########
+        # end for case
\ No newline at end of file

From ae1e3507c892824fa8cdae95c37f2d1b9b070454 Mon Sep 17 00:00:00 2001
From: justin-richling <richling@ucar.edu>
Date: Fri, 30 Aug 2024 16:28:04 -0600
Subject: [PATCH 03/15] Clean up print statements

---
 lib/adf_derive.py | 2 +-
 lib/adf_diag.py   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/adf_derive.py b/lib/adf_derive.py
index 279cceef6..39bf83230 100644
--- a/lib/adf_derive.py
+++ b/lib/adf_derive.py
@@ -72,7 +72,7 @@ def check_derive(self, res, var, case_name, diag_var_list, constit_dict, hist_fi
 
     # No time series creation
     exit_msg = f"WARNING: {var} is not in the file {hist0} and can't be derived."
-    exit_msg += "\n\t  ** No time series will be generated. **\n"
+    exit_msg += "\t     ** No time series will be generated. **"
 
     # Initialiaze list for constituents
     # NOTE: This is if the variable is NOT derivable but needs
diff --git a/lib/adf_diag.py b/lib/adf_diag.py
index 0497f5305..0ce044622 100644
--- a/lib/adf_diag.py
+++ b/lib/adf_diag.py
@@ -529,7 +529,7 @@ def call_ncrcat(cmd):
 
                     # Check if current variable is not in history file(s)
                     if var not in hist_file_var_list:
-                        # Let user know variable is not in history file
+                        # Let user know variable is not
                         print(f"\t     {var} not in history file, will try to derive if possible")
 
                         # Check if variable can be derived

From d8eec24261568f4d5663704abc41be93e13ec618 Mon Sep 17 00:00:00 2001
From: justin-richling <richling@ucar.edu>
Date: Tue, 8 Oct 2024 12:02:48 -0600
Subject: [PATCH 04/15] Pull newest changed from main

---
 config_amwg_default_plots.yaml            |  8 ++------
 config_cam_baseline_example.yaml          | 11 ++++-------
 lib/adf_diag.py                           |  7 +++----
 lib/adf_web.py                            | 20 +++++++++++++++++++-
 lib/website_templates/template_index.html | 18 ++++++++++++++++++
 5 files changed, 46 insertions(+), 18 deletions(-)

diff --git a/config_amwg_default_plots.yaml b/config_amwg_default_plots.yaml
index ce22d562c..39a81ea58 100644
--- a/config_amwg_default_plots.yaml
+++ b/config_amwg_default_plots.yaml
@@ -303,6 +303,8 @@ diag_cvdp_info:
 # If mdtf_run: true, the MDTF will be set up and 
 # run in background mode, likely completing after the ADF has completed.
 #
+# WARNING: This currently only runs on CASPER (not derecho)
+#
 # The variables required depend on the diagnostics (PODs) selected. 
 # AMWG-developed PODS and their required variables:
 #   (Note that PRECT can be computed from PRECC & PRECL)
@@ -332,12 +334,6 @@ diag_mdtf_info:
     conda_env_root     : ${mdtf_codebase_path}/miniconda2/envs.MDTFv3.1.20230412/
     OBS_DATA_ROOT      : ${mdtf_codebase_path}/obs_data
 
-
-
-    # Set to default for same as the ADF plot_location. Anything else here overrides that
-    OUTPUT_DIR         : default
-    WORKING_DIR        : default
-
     # SET this to a writable dir. The ADF will place ts files here for the MDTF to read (adds the casename)
     MODEL_DATA_ROOT     : ${diag_cam_climo.cam_ts_loc}/mdtf/inputdata/model     
 
diff --git a/config_cam_baseline_example.yaml b/config_cam_baseline_example.yaml
index c7037a92e..015503270 100644
--- a/config_cam_baseline_example.yaml
+++ b/config_cam_baseline_example.yaml
@@ -388,6 +388,8 @@ diag_cvdp_info:
 # If mdtf_run: true, the MDTF will be set up and 
 # run in background mode, likely completing after the ADF has completed.
 #
+# WARNING: This currently only runs on CASPER (not derecho)
+#
 # The variables required depend on the diagnostics (PODs) selected. 
 # AMWG-developed PODS and their required variables:
 #   (Note that PRECT can be computed from PRECC & PRECL)
@@ -417,12 +419,6 @@ diag_mdtf_info:
     conda_env_root     : ${mdtf_codebase_path}/miniconda2/envs.MDTFv3.1.20230412/
     OBS_DATA_ROOT      : ${mdtf_codebase_path}/obs_data
 
-
-
-    # Set to default for same as the ADF plot_location. Anything else here overrides that
-    OUTPUT_DIR         : default
-    WORKING_DIR        : default
-
     # SET this to a writable dir. The ADF will place ts files here for the MDTF to read (adds the casename)
     MODEL_DATA_ROOT     : ${diag_cam_climo.cam_ts_loc}/mdtf/inputdata/model     
 
@@ -510,7 +506,8 @@ diag_var_list:
 
 #<Add more variables here.>
 # MDTF recommended variables
-#    - OMEGA
+#    - FLUT
+#    - OMEGA500
 #    - PRECT
 #    - PS
 #    - PSL
diff --git a/lib/adf_diag.py b/lib/adf_diag.py
index 0ce044622..1db601f7e 100644
--- a/lib/adf_diag.py
+++ b/lib/adf_diag.py
@@ -1053,8 +1053,7 @@ def setup_run_mdtf(self):
         case_idx = 0
         plot_path = os.path.join(self.plot_location[case_idx], "mdtf")
         for var in ["WORKING_DIR", "OUTPUT_DIR"]:
-            if mdtf_info[var] == "default":
-                mdtf_info[var] = plot_path
+            mdtf_info[var] = plot_path
 
         #
         # Write the input settings json file
@@ -1141,7 +1140,7 @@ def move_tsfiles_for_mdtf(self, verbose):
                     adf_file_list = glob.glob(adf_file_str)
 
                     if len(adf_file_list) == 1:
-                        if verbose > 2:
+                        if verbose > 1:
                             print(f"Copying ts file: {adf_file_list} to MDTF dir")
                     elif len(adf_file_list) > 1:
                         if verbose > 0:
@@ -1149,7 +1148,7 @@ def move_tsfiles_for_mdtf(self, verbose):
                                 f"WARNING: found multiple timeseries files {adf_file_list}. Continuing with best guess; suggest cleaning up multiple dates in ts dir"
                             )
                     else:
-                        if verbose > 0:
+                        if verbose > 1:
                             print(
                                 f"WARNING: No files matching {case_name}.{hist_str}.{var} found in {adf_file_str}. Skipping"
                             )
diff --git a/lib/adf_web.py b/lib/adf_web.py
index f7b3e0d3d..43f88c871 100644
--- a/lib/adf_web.py
+++ b/lib/adf_web.py
@@ -146,6 +146,18 @@ def __init__(self, config_file, debug=False):
             #Specify where CSS files will be stored:
             css_files_dir = website_dir / "templates"
 
+            #Add links to external packages (if applicable)
+            self.external_package_links = {}
+
+            #MDTF puts directory under case[0]
+            if self.get_mdtf_info('mdtf_run'):
+                syear = self.climo_yrs["syears"]
+                eyear = self.climo_yrs["eyears"]
+                mdtf_path = f"../mdtf/MDTF_{case_name}"
+                mdtf_path += f"_{syear[0]}_{eyear[0]}"
+                self.external_package_links['MDTF'] = mdtf_path
+            #End if
+            
             #Add all relevant paths to dictionary for specific case:
             self.__case_web_paths[case_name] = {'website_dir': website_dir,
                                                 'img_pages_dir': img_pages_dir,
@@ -691,6 +703,10 @@ def jinja_list(seas_list):
                 if ptype not in avail_plot_types:
                     avail_plot_types.append(plot_types)
 
+
+            # External packages that can be run through ADF
+            avail_external_packages = {'MDTF':'mdtf_html_path', 'CVDP':'cvdp_html_path'}
+            
             #Construct index.html
             index_title = "AMP Diagnostics Prototype"
             index_tmpl = jinenv.get_template('template_index.html')
@@ -700,7 +716,9 @@ def jinja_list(seas_list):
                                             case_yrs=case_yrs,
                                             baseline_yrs=baseline_yrs,
                                             plot_types=plot_types,
-                                            avail_plot_types=avail_plot_types)
+                                            avail_plot_types=avail_plot_types,
+                                            avail_external_packages=avail_external_packages,
+                                            external_package_links=self.external_package_links)
 
             #Write Mean diagnostics index HTML file:
             with open(index_html_file, 'w', encoding='utf-8') as ofil:
diff --git a/lib/website_templates/template_index.html b/lib/website_templates/template_index.html
index 4b2659cf8..574e02a01 100644
--- a/lib/website_templates/template_index.html
+++ b/lib/website_templates/template_index.html
@@ -48,5 +48,23 @@ <h1>Plot Types</h1>
       {% endfor %}
     </div><!--grid-container-ptype-->
 
+    <div class="center">
+      <h1>External Diagnostic Packages</h1>
+    </div>
+    
+    <div class="grid-container">
+      {% for avail_type in avail_external_packages %}
+        {% if avail_type in external_package_links.keys() %}
+          <div class="grid-item">
+            <a href={{ external_package_links[avail_type] }} style="font-size: 30px;"> &nbsp; {{ avail_type }} </a>
+          </div><!--grid-item-->
+        {% else %}
+          <div class="grid-item-blocked">
+            <a-blocked style="font-size: 30px;">{{ avail_type }}</a-blocked>
+          </div><!--grid-item-blocked-->
+        {% endif %}
+      {% endfor %}
+    </div><!--grid-container-external-plots-->
+
   </body>
 </html>
\ No newline at end of file

From e7e06424ff09108f5ac94b56b80e94ca1a42f7e9 Mon Sep 17 00:00:00 2001
From: Justin Richling <richling@ucar.edu>
Date: Thu, 23 Oct 2025 15:52:40 -0600
Subject: [PATCH 05/15] Bring up current ADF code

---
 lib/adf_diag.py |  70 +++++++++++++---
 lib/adf_web.py  | 218 +++++++++++++++++++++++++++++++++++++++---------
 2 files changed, 238 insertions(+), 50 deletions(-)

diff --git a/lib/adf_diag.py b/lib/adf_diag.py
index 1db601f7e..22c74d141 100644
--- a/lib/adf_diag.py
+++ b/lib/adf_diag.py
@@ -357,7 +357,7 @@ def call_ncrcat(cmd):
             case_names = self.get_cam_info("cam_case_name", required=True)
             cam_ts_done = self.get_cam_info("cam_ts_done")
             cam_hist_locs = self.get_cam_info("cam_hist_loc")
-            ts_dir = self.get_cam_info("cam_ts_loc", required=True)
+            ts_dirs = self.get_cam_info("cam_ts_loc", required=True)
             overwrite_ts = self.get_cam_info("cam_overwrite_ts")
             start_years = self.climo_yrs["syears"]
             end_years = self.climo_yrs["eyears"]
@@ -380,8 +380,8 @@ def call_ncrcat(cmd):
 
             # Check if particular case should be processed:
             if cam_ts_done[case_idx]:
-                emsg = "\tConfiguration file indicates time series files have been pre-computed."
-                emsg += f" Will rely on those files directly."
+                emsg = "\tNOTE: Configuration file indicates time series files have been pre-computed"
+                emsg += f" for case '{case_name}'.  Will rely on those files directly."
                 print(emsg)
                 continue
             # End if
@@ -400,6 +400,9 @@ def call_ncrcat(cmd):
                 self.end_diag_fail(emsg)
             # End if
 
+            # Extract time series file location
+            ts_dir = ts_dirs[case_idx]
+
             # Check if history files actually exist. If not then kill script:
             hist_str_case = hist_str_list[case_idx]
             for hist_str in hist_str_case:
@@ -413,6 +416,9 @@ def call_ncrcat(cmd):
                     self.end_diag_fail(emsg)
                 # End if
 
+                # Notify user that script has started:
+                print(f"\n\t Writing time series files to:\n\t{ts_dir}")
+
                 # Create empty list:
                 files_list = []
 
@@ -495,7 +501,7 @@ def call_ncrcat(cmd):
 
                 # Check if time series directory exists, and if not, then create it:
                 # Use pathlib to create parent directories, if necessary.
-                Path(ts_dir[case_idx]).mkdir(parents=True, exist_ok=True)
+                Path(ts_dir).mkdir(parents=True, exist_ok=True)
 
                 # INPUT NAME TEMPLATE: $CASE.$scomp.[$type.][$string.]$date[$ending]
                 first_file_split = str(hist_files[0]).split(".")
@@ -512,6 +518,8 @@ def call_ncrcat(cmd):
 
                 # Intitialize list for NCO commands
                 list_of_commands = []
+                list_of_ncattend_commands = []
+                list_of_hist_commands = []
 
                 # Create copy of var list that can be modified for derivable variables
                 diag_var_list = self.diag_var_list
@@ -573,12 +581,12 @@ def call_ncrcat(cmd):
                     if has_lev and vert_coord_type:
                         # For now, only add these variables if using CAM:
                         if "cam" in hist_str:
-                            # PS may be in a different history file. If so, continue without error.
+                            # PS might be in a different history file. If so, continue w/o error.
                             ncrcat_var_list = ncrcat_var_list + ",hyam,hybm,hyai,hybi"
 
                             if "PS" in hist_file_var_list:
                                 ncrcat_var_list = ncrcat_var_list + ",PS"
-                                print("\t     Adding PS to file")
+                                print(f"\t    INFO: Adding PS to file for '{var}'")
                             else:
                                 wmsg = "WARNING: PS not found in history file."
                                 wmsg += " It might be needed at some point."
@@ -610,8 +618,38 @@ def call_ncrcat(cmd):
                         + ["-o", ts_outfil_str]
                     )
 
+                    # Example ncatted command (you can modify it with the specific attribute changes you need)
+                    #cmd_ncatted = ["ncatted", "-O", "-a", f"adf_user,global,a,c,{self.user}", ts_outfil_str]
+                    # Step 1: Convert Path objects to strings and concatenate the list of historical files into a single string
+                    hist_files_str = ', '.join(str(f.name) for f in hist_files)
+                    hist_locs_str = ', '.join(str(loc) for loc in cam_hist_locs)
+
+                    # Step 2: Create the ncatted command to add both global attributes
+                    cmd_ncatted = [
+                        "ncatted", "-O",
+                        "-a", "adf_user,global,a,c," + f"{self.user}",
+                        "-a", "hist_file_locs,global,a,c," + f"{hist_locs_str}",
+                        "-a", "hist_file_list,global,a,c," + f"{hist_files_str}",
+                        ts_outfil_str
+                    ]
+
+                    # Step 3: Create the ncatted command to remove the history attribute
+                    cmd_remove_history = [
+                        "ncatted", "-O", "-h",
+                        "-a", "history,global,d,,",
+                        ts_outfil_str
+                    ]
+
                     # Add to command list for use in multi-processing pool:
+                    # -----------------------------------------------------
+                    # generate time series files
                     list_of_commands.append(cmd)
+                    # Add global attributes: user, original hist file loc(s) and all filenames
+                    list_of_ncattend_commands.append(cmd_ncatted)
+                    # Remove the `history` attr that gets tacked on (for clean up)
+                    # NOTE: this may not be best practice, but it the history attr repeats
+                    #       the files attrs so the global attrs become obtrusive...
+                    list_of_hist_commands.append(cmd_remove_history)
                 # End variable loop
 
                 # Now run the "ncrcat" subprocesses in parallel:
@@ -865,9 +903,11 @@ def setup_run_cvdp(self):
         else:
             cvdp_dir = self.get_cvdp_info("cvdp_loc", required=True) + case_names[0]
         # end if
+
+        cvdp_dir = os.path.abspath(cvdp_dir)
         if not os.path.isdir(cvdp_dir):
             shutil.copytree(
-                self.get_cvdp_info("cvdp_codebase_loc", required=True), cvdp_dir
+                self.get_cvdp_info("cvdp_codebase_loc"), cvdp_dir
             )
         # End if
 
@@ -1009,6 +1049,7 @@ def setup_run_mdtf(self):
         """
         Create MDTF directory tree, generate input settings jsonc file
         Submit MDTF diagnostics.
+        Returns mdtf_proc for sub-process control (waits for it to finish in run_adf_diag)
 
         """
 
@@ -1085,19 +1126,21 @@ def setup_run_mdtf(self):
         if copy_files_only:
             print("\t ...Copy files only. NOT Running MDTF")
             print(f"\t    Command: {mdtf_exe} Log: {mdtf_log}")
+            return 0
         else:
             print(
                 f"\t ...Running MDTF in background. Command: {mdtf_exe} Log: {mdtf_log}"
             )
             print(f"Running MDTF in background. Command: {mdtf_exe} Log: {mdtf_log}")
             with open(mdtf_log, "w", encoding="utf-8") as subout:
-                _ = subprocess.Popen(
+                mdtf_proc_var = subprocess.Popen(
                     [mdtf_exe],
                     shell=True,
                     stdout=subout,
                     stderr=subout,
                     close_fds=True,
                 )
+            return mdtf_proc_var
 
     def move_tsfiles_for_mdtf(self, verbose):
         """
@@ -1115,7 +1158,9 @@ def move_tsfiles_for_mdtf(self, verbose):
         # Going to need a dict to translate.
         # Use cesm_freq_strings = freq_string_options.keys
         # and then freq = freq_string_option(freq_string_found)
-        freq_string_options = ["month", "day", "6hr", "3hr", "1hr"]
+        freq_string_cesm    = ["month", "day", "hour_6", "hour_3", "hour_1"]  #keys
+        freq_string_options = ["month", "day", "6hr", "3hr", "1hr"]           #values
+        freq_string_dict    = dict(zip(freq_string_cesm,freq_string_options)) #make dict
 
         hist_str_list = self.get_cam_info("hist_str")
         case_names = self.get_cam_info("cam_case_name", required=True)
@@ -1171,7 +1216,7 @@ def move_tsfiles_for_mdtf(self, verbose):
                         continue
 
                     found_strings = [
-                        word for word in freq_string_options if word in dataset_freq
+                        word for word in freq_string_cesm if word in dataset_freq
                     ]
                     if len(found_strings) == 1:
                         if verbose > 2:
@@ -1186,12 +1231,13 @@ def move_tsfiles_for_mdtf(self, verbose):
                     else:
                         if verbose > 0:
                             print(
-                                f"WARNING: None of the frequency options {freq_string_options} are present in the time_period_freq attribute {dataset_freq}"
+                                f"WARNING: None of the frequency options {freq_string_cesm} are present in the time_period_freq attribute {dataset_freq}"
                             )
                             print(f"Skipping {adf_file}")
                             freq = "frequency_missing"
                         continue
-                    freq = found_strings[0]
+                    freq = freq_string_dict.get(found_strings[0])
+                    print(f"Translated {found_strings[0]} to {freq}")
 
                     #
                     # Destination file is MDTF directory and name structure
diff --git a/lib/adf_web.py b/lib/adf_web.py
index 43f88c871..75f35d133 100644
--- a/lib/adf_web.py
+++ b/lib/adf_web.py
@@ -22,13 +22,14 @@
 
 import os
 import os.path
-
 from pathlib import Path
 
 #+++++++++++++++++++++++++++++++++++++++++++++++++
 #import non-standard python modules, including ADF
 #+++++++++++++++++++++++++++++++++++++++++++++++++
 
+import markdown
+
 #ADF modules:
 from adf_obs import AdfObs
 
@@ -117,7 +118,6 @@ def __init__(self, config_file, debug=False):
 
         #Extract needed variables from yaml file:
         case_names = self.get_cam_info('cam_case_name', required=True)
-
         #Also extract baseline case (if applicable), and append to case_names list:
         if not self.compare_obs:
             baseline_name = self.get_baseline_info('cam_case_name', required=True)
@@ -157,7 +157,7 @@ def __init__(self, config_file, debug=False):
                 mdtf_path += f"_{syear[0]}_{eyear[0]}"
                 self.external_package_links['MDTF'] = mdtf_path
             #End if
-            
+
             #Add all relevant paths to dictionary for specific case:
             self.__case_web_paths[case_name] = {'website_dir': website_dir,
                                                 'img_pages_dir': img_pages_dir,
@@ -182,6 +182,27 @@ def __init__(self, config_file, debug=False):
                                                    'table_pages_dir': table_pages_dir,
                                                    'css_files_dir': css_files_dir}
         #End if
+        
+        # Gather ADF run env info
+        active_env = self.get_active_conda_environment()
+        if not active_env:
+            active_env = "--"
+
+        run_info = ''
+        if self.debug_log:
+            log_name = self.debug_fname
+            run_info = f"{log_name}".replace("debug","run_info").replace(".log",".md")
+            self.run_info = run_info
+            self._write_run_info_to_log(config_file, active_env)
+        #Do nothing if user is not requesting a website to be generated:
+        if self.create_html and self.debug_log:
+            plot_path = Path(self.plot_location[0])
+
+            #Create directory path where the website will be built:
+            website_dir = plot_path / "website"
+            Path(website_dir).mkdir(parents=True, exist_ok=True)
+            run_info = f"{website_dir}/{run_info}"
+            self._write_run_info_to_web(run_info, config_file, active_env)
 
     #########
 
@@ -192,6 +213,91 @@ def create_html(self):
         return self.get_basic_info('create_html')
 
     #########
+    def _write_run_info_to_web(self, run_info, config_file, active_env):
+        """
+        If user requests webpage, then add run info to webpages table of contents
+        """
+        four_space = "&nbsp;&nbsp;&nbsp;&nbsp;"
+        two_space = "&nbsp;&nbsp;"
+        font_22 = "style='font-size:22px;'"
+        font_18 = "style='font-size:18px;'"
+        font_16 = "style='font-size:16px;'"
+
+        with open(run_info, "w") as f:
+
+            # Gather config yaml file info
+            f.write("<p style=color:black>")
+            f.write(f"<strong><a {font_22}>Config file used</a></strong></u><br>")
+            f.write(f"{two_space}<a {font_16}>{config_file}</a><br><br>")
+
+            f.write(f"&nbsp;<u><a {font_18}>Config file options</a></u><br>")
+            for key,val in self.config_dict().items():
+                if isinstance(val,dict):
+                    f.write(f"{two_space}<a {font_16}><strong>{key}:</strong></a><br>")
+                    for key2,val2 in val.items():
+                        f.write(f"{four_space}<a {font_16}><strong>{key2}:</strong> {val2}</a><br>")
+                elif isinstance(val,list):
+                    f.write(f"{two_space}<a {font_16}><strong>{key}:</strong></a><br>")
+                    for val2 in val:
+                        f.write(f"{four_space}<a {font_16}>{val2}</a><br>")
+                else:
+                    f.write(f"{two_space}<a {font_16}><strong>{key}:</strong> {val}</a><br>")
+
+            # Gather Conda environment
+            f.write("\n")
+            f.write(f"<br><strong><a {font_22}>Conda env used</a></strong><br>")
+            f.write(f"<a {font_16}>{two_space}{active_env}</a>")
+
+            # Gather Git info
+            git_info = self.get_git_info()
+            f.write("\n")
+            f.write(f"<br><br><strong><a {font_22}>Git Info</a></strong><br>")
+            for key,val in git_info.items():
+                f.write(f"{two_space}<a {font_16}><strong>{key}:</strong> {val}</a></><br>")
+            f.write("</p>")
+
+    def _write_run_info_to_log(self, config_file, active_env):
+
+        log_msg = "adf_info: ADF run info:"
+
+        # Gather config yaml file info
+        config_file_msg = "\nConfig file used:"
+        msg = f"{config_file_msg}\n{'-' * (len(config_file_msg))}\n  {config_file}\n"
+        log_msg += msg
+
+
+        config_msg = "\n  Config file options:"
+        msg = f"{config_msg}\n  {'- ' * (int(len(config_msg)/2)-1)}"
+        log_msg += msg
+
+        for key,val in self.config_dict().items():
+            if isinstance(val,dict):
+                log_msg += f"\n  {key}:"
+                for key2,val2 in val.items():
+                    log_msg += f"\n    {key2}: {val2}"
+            elif isinstance(val,list):
+                log_msg += f"\n  {key}:"
+                for val2 in val:
+                    log_msg += f"\n    {val2}"
+            else:
+                log_msg += f"\n  {key}: {val}"
+
+        # Gather Conda environment
+        conda_msg = "\nConda env used:"
+        msg = f"{conda_msg}\n{'-' * (len(conda_msg)-1)}\n"
+        log_msg += f"\n  {msg}"
+        log_msg += f"  {active_env}"
+
+        # Gather Git info
+        git_info = self.get_git_info()
+        git_msg = "\nGit Info:"
+        msg = f"{git_msg}\n{'-' * (len(git_msg)-1)}\n"
+        log_msg += f"\n  {msg}"
+
+        for key,val in git_info.items():
+            log_msg += f"  {key}: {val}\n"
+
+        self.debug_log(log_msg)
 
     def add_website_data(self, web_data, web_name, case_name,
                          category = None,
@@ -347,20 +453,24 @@ def create_website(self):
             self.end_diag_fail(emsg)
         #End except
 
-        #Make a jinja function that mimics python list object. This will allow for
-        # the use of 'list' in the html rendering.
+        #Make jinja functions that mimics python functions.
+        #  - This will allow for the use of 'list' in the html rendering.
         def jinja_list(seas_list):
             return list(seas_list)
+        #   - This will allow for the use of 'enumerate' in the html rendering.
+        def jinja_enumerate(arg):
+            return enumerate(arg)
 
         #Notify user that script has started:
         print("\n  Generating Diagnostics webpages...")
 
+        case_sites = OrderedDict()
+
         #If there is more than one non-baseline case, then create new website directory:
         if self.num_cases > 1:
             main_site_path = Path(self.get_basic_info('cam_diag_plot_loc', required=True))
             main_site_path = main_site_path / "main_website"
             main_site_path.mkdir(exist_ok=True)
-            case_sites = OrderedDict()
         else:
             main_site_path = "" #Set main_site_path to blank value
         #End if
@@ -604,6 +714,15 @@ def jinja_list(seas_list):
                                   "table_html": table_html,
                                   "multi_head": False}
                 rend_kwarg_dict["plot_types"] = multi_plot_type_html
+
+                if web_data.name == case1:
+                    rend_kwarg_dict["disp_table_name"] = case1
+                    rend_kwarg_dict["disp_table_html"] = table_html
+
+                if web_data.name == "Case Comparison":
+                    rend_kwarg_dict["disp_table_name"] = "Case Comparison"
+                    rend_kwarg_dict["disp_table_html"] = table_html
+
                 table_tmpl = jinenv.get_template('template_table.html')
                 table_rndr = table_tmpl.render(rend_kwarg_dict)
 
@@ -614,18 +733,16 @@ def jinja_list(seas_list):
 
                 #Check if the mean plot type page exists for this case (or for multi-case):
                 mean_table_file = table_pages_dir / "mean_tables.html"
-                if not mean_table_file.exists():
-                    #Construct mean_table.html
-                    mean_table_tmpl = jinenv.get_template('template_mean_tables.html')
-                    #Reuse the rend_kwarg_dict, but ignore certain keys
-                    #since all others are the same
-                    new_dict = {k: rend_kwarg_dict[k] for k in rend_kwarg_dict.keys() - {'table_name', 'table_html'}}
-                    mean_table_rndr = mean_table_tmpl.render(new_dict)
-                    #Write mean diagnostic tables HTML file:
-                    with open(mean_table_file, 'w', encoding='utf-8') as ofil:
-                        ofil.write(mean_table_rndr)
-                    #End with
-                #End if
+
+                #Construct mean_table.html
+                mean_table_tmpl = jinenv.get_template('template_mean_tables.html')
+                #Reuse the rend_kwarg_dict
+                mean_table_rndr = mean_table_tmpl.render(rend_kwarg_dict)
+                #Write mean diagnostic tables HTML file:
+                with open(mean_table_file, 'w', encoding='utf-8') as ofil:
+                    ofil.write(mean_table_rndr)
+                #End with
+
             #End if (tables)
 
             else: #Plot image
@@ -645,18 +762,19 @@ def jinja_list(seas_list):
                 #End if
 
                 rend_kwarg_dict = {"title": main_title,
-                                       "var_title": web_data.name,
-                                       "season_title": web_data.season,
-                                       "case_name": web_data.case,
-                                       "case_yrs": case_yrs,
-                                       "base_name": data_name,
-                                       "baseline_yrs": baseline_yrs,
-                                       "plottype_title": web_data.plot_type,
-                                       "imgs": img_data,
-                                       "mydata": mean_html_info[web_data.plot_type],
-                                       "plot_types": plot_types,
-                                       "seasons": seasons,
-                                       "non_seasons": non_seasons[web_data.plot_type]}
+                                   "var_title": web_data.name,
+                                   "season_title": web_data.season,
+                                   "case_name": web_data.case,
+                                   "case_yrs": case_yrs,
+                                   "base_name": data_name,
+                                   "baseline_yrs": baseline_yrs,
+                                   "plottype_title": web_data.plot_type,
+                                   "imgs": img_data,
+                                   "mydata": mean_html_info[web_data.plot_type],
+                                   "plot_types": plot_types,
+                                   "seasons": seasons,
+                                   "non_seasons": non_seasons[web_data.plot_type]}
+
                 tmpl = jinenv.get_template('template.html')  #Set template
                 rndr = tmpl.render(rend_kwarg_dict) #The template rendered
 
@@ -671,10 +789,9 @@ def jinja_list(seas_list):
                 #Construct individual plot type mean_diag html files
                 mean_tmpl = jinenv.get_template('template_mean_diag.html')
 
-                #Remove keys from main dictionary for this html page
-                templ_rend_kwarg_dict = {k: rend_kwarg_dict[k] for k in rend_kwarg_dict.keys() - {'imgs', 'var_title', 'season_title'}}
-                templ_rend_kwarg_dict["list"] = jinja_list
-                mean_rndr = mean_tmpl.render(templ_rend_kwarg_dict)
+                rend_kwarg_dict["enumerate"] = jinja_enumerate
+                rend_kwarg_dict["list"] = jinja_list
+                mean_rndr = mean_tmpl.render(rend_kwarg_dict)
 
                 #Write mean diagnostic plots HTML file:
                 with open(mean_ptype_file,'w', encoding='utf-8') as ofil:
@@ -686,6 +803,30 @@ def jinja_list(seas_list):
             index_html_file = \
                 self.__case_web_paths[web_data.case]['website_dir'] / "index.html"
 
+            # Create run info web page
+            run_info_md_file = \
+                self.__case_web_paths[web_data.case]['website_dir'] / self.run_info
+
+            # Read the markdown file
+            with open(run_info_md_file, "r", encoding="utf-8") as mdfile:
+                md_text = mdfile.read()
+
+            # Convert markdown to HTML
+            run_info_html = markdown.markdown(md_text)
+            index_title = "CAM Diagnostics"
+            run_info_html_file = self.__case_web_paths[web_data.case]['website_dir'] / "run_info.html"
+            run_info_tmpl = jinenv.get_template('template_run_info.html')
+            run_info_rndr = run_info_tmpl.render(title=index_title,
+                                            case_name=web_data.case,
+                                            base_name=data_name,
+                                            case_yrs=case_yrs,
+                                            baseline_yrs=baseline_yrs,
+                                            plot_types=plot_types,
+                                            run_info=run_info_html)
+
+            with open(run_info_html_file, "w", encoding="utf-8") as htmlfile:
+                htmlfile.write(run_info_rndr)
+
             #Re-et plot types list:
             if web_data.case == 'multi-case':
                 plot_types = multi_plot_type_html
@@ -696,7 +837,7 @@ def jinja_list(seas_list):
 
             #List of ADF default plot types
             avail_plot_types = res["default_ptypes"]
-            
+
             #Check if current plot type is in ADF default.
             #If not, add it so the index.html file can include it
             for ptype in plot_types.keys():
@@ -706,9 +847,9 @@ def jinja_list(seas_list):
 
             # External packages that can be run through ADF
             avail_external_packages = {'MDTF':'mdtf_html_path', 'CVDP':'cvdp_html_path'}
-            
+
             #Construct index.html
-            index_title = "AMP Diagnostics Prototype"
+            index_title = "CAM Diagnostics"
             index_tmpl = jinenv.get_template('template_index.html')
             index_rndr = index_tmpl.render(title=index_title,
                                             case_name=web_data.case,
@@ -718,7 +859,8 @@ def jinja_list(seas_list):
                                             plot_types=plot_types,
                                             avail_plot_types=avail_plot_types,
                                             avail_external_packages=avail_external_packages,
-                                            external_package_links=self.external_package_links)
+                                            external_package_links=self.external_package_links,
+                                            run_info=run_info_html)
 
             #Write Mean diagnostics index HTML file:
             with open(index_html_file, 'w', encoding='utf-8') as ofil:

From 5c36377928fee993d8ad537fab21e2c82b5b8f8b Mon Sep 17 00:00:00 2001
From: justin-richling <richling@ucar.edu>
Date: Fri, 30 Aug 2024 16:21:18 -0600
Subject: [PATCH 06/15] Pull derivation code to external script

Pull these checks and calculations out of `adf-diag.py` to clean that file up.
---
 lib/adf_derive.py | 274 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 274 insertions(+)
 create mode 100644 lib/adf_derive.py

diff --git a/lib/adf_derive.py b/lib/adf_derive.py
new file mode 100644
index 000000000..279cceef6
--- /dev/null
+++ b/lib/adf_derive.py
@@ -0,0 +1,274 @@
+import glob
+import os
+from pathlib import Path
+import xarray as xr
+
+
+def check_derive(self, res, var, case_name, diag_var_list, constit_dict, hist_file_ds, hist0):
+    """
+    For incoming variable, look for list of constituents if available
+     - as a list in variable defaults file
+
+     If the variable does not have the argument `derivable_from` or `derivable_from_cam_chem`,
+     then it will be assumed not to be a derivable variable, just missing from history file
+
+     If the variable does have the argument `derivable_from` or `derivable_from_cam_chem`,
+     first check cam-chem, then regular cam.
+
+    Arguments
+    ---------
+        self: AdfDiag
+            - ADF object
+        res: dict
+            - variable defaults dictionary from yaml file
+        var: str
+            - derived variable name
+        case_name: str
+            - model case
+        diag_var_list: list
+            - list of variables for diagnostics
+            NOTE: this is user supplied, but gets modified here for constituents
+        constit_dict: dict
+            - dictionary of derived variables as keys and list of constituents as values
+        hist_file_ds: xarray.DataSet
+            - history file dataset for checking if constituents are available
+        hist0: str
+            - history number for case
+    
+    Returns
+    -------
+        constit_list: list
+           - list of declared consituents from the variable defaults yaml file
+           - empty list:
+             * if missing `derived_from` argument(s)
+             * if `derived_from` argument(s) exist but not declared
+        
+        diag_var_list: list
+           - updated list (if applicable) of ADF variables for time series creation
+    """
+
+    # Aerosol Calcs
+    #--------------
+
+    # Always make sure PMID is made if aerosols are desired in config file
+    # Since there's no requirement for `aerosol_zonal_list`, allow it to be absent:
+    azl = res.get("aerosol_zonal_list", [])
+    if azl:
+        if "PMID" not in diag_var_list:
+            if any(item in azl for item in diag_var_list):
+                diag_var_list += ["PMID"]
+        if "T" not in diag_var_list:
+            if any(item in azl for item in diag_var_list):
+                diag_var_list += ["T"]
+    # End aerosol calcs
+
+    # Set error messages for printing/debugging
+    # Derived variable, but missing constituent list
+    constit_errmsg = f"create time series for {case_name}:"
+    constit_errmsg += f"\n Can't create time series for {var}. \n\tThis variable"
+    constit_errmsg += " is flagged for derivation, but is missing list of constiuents."
+    constit_errmsg += "\n\tPlease add list of constituents to 'derivable_from' "
+    constit_errmsg += f"for {var} in variable defaults yaml file."
+
+    # No time series creation
+    exit_msg = f"WARNING: {var} is not in the file {hist0} and can't be derived."
+    exit_msg += "\n\t  ** No time series will be generated. **\n"
+
+    # Initialiaze list for constituents
+    # NOTE: This is if the variable is NOT derivable but needs
+    #       an empty list as a check later
+    constit_list = []
+
+    try_cam_constits = True
+    # Try finding info from variable defaults yaml file
+    try:
+        vres = res[var]
+    except KeyError:
+        print(exit_msg)
+        self.debug_log(exit_msg)
+        return diag_var_list, constit_dict
+
+    # Check first if variable is potentially part of a CAM-CHEM run
+    if "derivable_from_cam_chem" in vres:
+        constit_list = vres["derivable_from_cam_chem"]
+
+        if constit_list:
+            if all(item in hist_file_ds.data_vars for item in constit_list):
+                # Set check to look for regular CAM constituents in variable defaults
+                try_cam_constits = False
+                msg = f"derive time series for {case_name}:"
+                msg += "\n\tLooks like this a CAM-CHEM run, "
+                msg += f"checking constituents for '{var}'"
+                self.debug_log(msg)
+        else:
+            self.debug_log(constit_errmsg)
+        # End if
+    # End if
+    
+    # If not CAM-CHEM, check regular CAM runs
+    if try_cam_constits:
+        if "derivable_from" in vres:
+            constit_list = vres["derivable_from"]
+        else:
+            # Missing variable or missing derivable_from argument
+            der_from_msg = f"derive time series for {case_name}:"
+            der_from_msg += f"\n Can't create time series for {var}.\n\tEither "
+            der_from_msg += "the variable is missing from CAM output or it is a "
+            der_from_msg += "derived quantity and is missing the 'derivable_from' "
+            der_from_msg += "config argument.\n\tPlease add variable to CAM run "
+            der_from_msg += "or set appropriate argument in variable "
+            der_from_msg += "defaults yaml file."
+            self.debug_log(der_from_msg)
+        # End if
+    # End if
+
+    # Log if this variable can be derived but is missing list of constituents
+    if isinstance(constit_list, list) and not constit_list:
+        self.debug_log(constit_errmsg)
+
+    # Check if any constituents were found
+    if constit_list:
+        # Add variable and constituent list to dictionary
+        constit_dict[var] = constit_list
+
+        # Add constituents to ADF diag variable list for time series generation
+        for constit in constit_list:
+            if constit not in diag_var_list:
+                diag_var_list.append(constit)
+    else:
+        print(exit_msg)
+        self.debug_log(exit_msg)
+    # End if
+
+    return diag_var_list, constit_dict
+
+########
+
+def derive_variable(self, case_name, var, res=None, ts_dir=None,
+                         constit_list=None, overwrite=None):
+    """
+    Derive variables acccording to steps given here.  Since derivations will depend on the
+    variable, each variable to derive will need its own set of steps below.
+
+    Caution: this method assumes that there will be one time series file per variable
+
+    If the file for the derived variable exists, the kwarg `overwrite` determines
+    whether to overwrite the file (true) or exit with a warning message.
+
+    """
+
+    # Loop through derived variables
+    print(f"\t - deriving time series for {var}")
+
+    # Grab all required time series files for derived variable
+    constit_files = []
+    for constit in constit_list:
+        # Check if the constituent file is present, if so add it to list
+        if glob.glob(os.path.join(ts_dir, f"*.{constit}.*.nc")):
+            constit_files.append(glob.glob(os.path.join(ts_dir, f"*.{constit}.*"))[0])
+    # End for
+
+    # Check if all the necessary constituent files were found
+    if len(constit_files) != len(constit_list):
+        ermsg = f"\t   ** Not all constituent files present; {var} cannot be calculated. **\n"
+        ermsg += f"\t     Please remove {var} from 'diag_var_list' or find the "
+        ermsg += "relevant CAM files.\n"
+        print(ermsg)
+        if constit_files:
+            # Add what's missing to debug log
+            dmsg = f"derived time series for {case_name}:"
+            dmsg += f"\n\tneeded constituents for derivation of "
+            dmsg += f"{var}:\n\t\t- {constit_list}\n\tfound constituent file(s) in "
+            dmsg += f"{Path(constit_files[0]).parent}:\n\t\t"
+            dmsg += f"- {[Path(f).parts[-1] for f in constit_files if Path(f).is_file()]}"
+            self.debug_log(dmsg)
+        else:
+            dmsg = f"derived time series for {case_name}:"
+            dmsg += f"\n\tneeded constituents for derivation of "
+            dmsg += f"{var}:\n\t\t- {constit_list}\n"
+            dmsg += f"\tNo constituent(s) found in history files"
+            self.debug_log(dmsg)
+        # End if
+    else:
+        # Open a new dataset with all the constituent files/variables
+        ds = self.data.load_dataset(constit_files)
+        if not ds:
+            dmsg = f"derived time series for {case_name}:"
+            dmsg += f"\n\tNo files to open."
+            self.debug_log(dmsg)
+            return
+
+        # Grab attributes from first constituent file to be used in derived variable
+        attrs = ds[constit_list[0]].attrs
+
+        # create new file name for derived variable
+        derived_file = constit_files[0].replace(constit_list[0], var)
+
+        # Check if clobber is true for file
+        if Path(derived_file).is_file():
+            if overwrite:
+                Path(derived_file).unlink()
+            else:
+                msg = f"[{__name__}] Warning: '{var}' file was found "
+                msg += "and overwrite is False. Will use existing file."
+                print(msg)
+
+        #NOTE: this will need to be changed when derived equations are more complex! - JR
+        if var == "RESTOM":
+            der_val = ds["FSNT"]-ds["FLNT"]
+        else:
+            # Loop through all constituents and sum
+            der_val = 0
+            for v in constit_list:
+                der_val += ds[v]
+
+        # Set derived variable name and add to dataset
+        der_val.name = var
+        ds[var] = der_val
+
+        # Aerosol Calculations
+        #----------------------------------------------------------------------------------
+        # These will be multiplied by rho (density of dry air)
+
+        # User-defined defaults might not include aerosol zonal list
+        azl = res.get("aerosol_zonal_list", [])
+        if var in azl:
+            # Check if PMID is in file:
+            ds_pmid = self.data.load_dataset(glob.glob(os.path.join(ts_dir, "*.PMID.*"))[0])
+            if not ds_pmid:
+                errmsg = "Missing necessary files for dry air density (rho) "
+                errmsg += "calculation.\nPlease make sure 'PMID' is in the CAM "
+                errmsg += "run for aerosol calculations"
+                print(errmsg)
+                dmsg = "derived time series:"
+                dmsg += f"\n\t missing 'PMID' in {ts_dir}, can't make time series for {var} "
+                self.debug_log(dmsg)
+
+            # Check if T is in file:
+            ds_t = self.data.load_dataset(glob.glob(os.path.join(ts_dir, "*.T.*"))[0])
+            if not ds_t:
+                errmsg = "Missing necessary files for dry air density (rho) "
+                errmsg += "calculation.\nPlease make sure 'T' is in the CAM "
+                errmsg += "run for aerosol calculations"
+                print(errmsg)
+
+                dmsg = "derived time series:"
+                dmsg += f"\n\t missing 'T' in {ts_dir}, can't make time series for {var} "
+                self.debug_log(dmsg)
+
+            # Multiply aerosol by dry air density (rho): (P/Rd*T)
+            ds[var] = ds[var]*(ds_pmid["PMID"]/(res["Rgas"]*ds_t["T"]))
+
+            # Sulfate conversion factor
+            if var == "SO4":
+                ds[var] = ds[var]*(96./115.)
+        #----------------------------------------------------------------------------------
+
+        # Drop all constituents from final saved dataset
+        # These are not necessary because they have their own time series files
+        ds_final = ds.drop_vars(constit_list)
+        # Copy attributes from constituent file to derived variable
+        ds_final[var].attrs = attrs
+        ds_final.to_netcdf(derived_file, unlimited_dims='time', mode='w')
+    # End if (all the necessary constituent files exist)
+########
\ No newline at end of file

From 96d0cc42349e6f04189d3fc506d31cb585f91769 Mon Sep 17 00:00:00 2001
From: justin-richling <richling@ucar.edu>
Date: Fri, 30 Aug 2024 16:22:06 -0600
Subject: [PATCH 07/15] Update adf_diag.py

Now call the `adf_derive.py` script for derived variables
---
 lib/adf_diag.py | 349 +++++-------------------------------------------
 1 file changed, 37 insertions(+), 312 deletions(-)

diff --git a/lib/adf_diag.py b/lib/adf_diag.py
index ddf452ecc..82373ecec 100644
--- a/lib/adf_diag.py
+++ b/lib/adf_diag.py
@@ -97,6 +97,7 @@
 # Finally, import needed ADF modules:
 from adf_web import AdfWeb
 from adf_dataset import AdfData
+from adf_derive import check_derive, derive_variable
 
 #################
 # Helper functions
@@ -340,14 +341,11 @@ def call_ncrcat(cmd):
             It is declared as global to avoid AttributeError.
             """
             return subprocess.run(cmd, shell=False)
-
         # End def
 
-
         # Check if baseline time-series files are being created:
         if baseline:
-            # Use baseline settings, while converting them all
-            # to lists:
+            # Use baseline settings, while converting them all to lists:
             case_names = [self.get_baseline_info("cam_case_name", required=True)]
             cam_ts_done = [self.get_baseline_info("cam_ts_done")]
             cam_hist_locs = [self.get_baseline_info("cam_hist_loc")]
@@ -380,6 +378,10 @@ def call_ncrcat(cmd):
 
         # Loop over cases:
         for case_idx, case_name in enumerate(case_names):
+            # Notify user that script has started:
+            print(f"\n  Generating CAM time series files for '{case_name}'...")
+            print(f"\n    Writing time series files to {ts_dir[case_idx]}")
+
             # Check if particular case should be processed:
             if cam_ts_done[case_idx]:
                 emsg = "\tNOTE: Configuration file indicates time series files have been pre-computed"
@@ -405,7 +407,7 @@ def call_ncrcat(cmd):
             # Extract time series file location
             ts_dir = ts_dirs[case_idx]
 
-            # Check if history files actually exqist. If not then kill script:
+            # Check if history files actually exist. If not then kill script:
             hist_str_case = hist_str_list[case_idx]
             for hist_str in hist_str_case:
 
@@ -518,138 +520,35 @@ def call_ncrcat(cmd):
                     time_string_finish = last_file_split[-1].replace("-", "")
                 time_string = "-".join([time_string_start, time_string_finish])
 
-                # Loop over CAM history variables:
+                # Intitialize list for NCO commands
                 list_of_commands = []
-                list_of_ncattend_commands = []
-                list_of_hist_commands = []
-                vars_to_derive = []
-                # create copy of var list that can be modified for derivable variables
+
+                # Create copy of var list that can be modified for derivable variables
                 diag_var_list = self.diag_var_list
 
-                # Aerosol Calcs
-                # --------------
-                # Always make sure PMID is made if aerosols are desired in config file
-                # Since there's no requirement for `aerosol_zonal_list` to be included,
-                # allow it to be absent:
-
-                azl = res.get("aerosol_zonal_list", [])
-                if "PMID" not in diag_var_list:
-                    if any(item in azl for item in diag_var_list):
-                        diag_var_list += ["PMID"]
-                if "T" not in diag_var_list:
-                    if any(item in azl for item in diag_var_list):
-                        diag_var_list += ["T"]
-                # End aerosol calcs
-
-                # Initialize dictionary for derived variable with needed list of constituents
+                # Intitialize dictionary for derived variables, if appplicable
                 constit_dict = {}
 
+                # Loop over CAM history variables:
                 for var in diag_var_list:
                     # Notify user of new time series file:
                     print(f"\t - time series for {var}")
 
-                    # Create full path name, file name template:
-                    # $cam_case_name.$hist_str.$variable.YYYYMM-YYYYMM.nc
-                    ts_outfil_str = (
-                        ts_dir
-                        + os.sep
-                        + ".".join([case_name, hist_str, var, time_string, "nc"])
-                    )
-
-                    # Check if clobber is true for file
-                    if Path(ts_outfil_str).is_file():
-                        if overwrite_ts[case_idx]:
-                            Path(ts_outfil_str).unlink()
-                        else:
-                            #msg = f"[{__name__}] Warning: '{var}' file was found "
-                            msg = f"\t    INFO: '{var}' file was found "
-                            msg += "and overwrite is False. Will use existing file."
-                            print(msg)
-                            continue
-
-                    # Set error messages for printing/debugging
-                    # Derived variable, but missing constituent list
-                    constit_errmsg = f"create time series for {case_name}:"
-                    constit_errmsg += f"\n Can't create time series for {var}. \n\tThis variable"
-                    constit_errmsg += " is flagged for derivation, but is missing list of constiuents."
-                    constit_errmsg += "\n\tPlease add list of constituents to 'derivable_from' "
-                    constit_errmsg += f"for {var} in variable defaults yaml file."
+                    # Initialize list for constituents if variable is derivable
+                    constit_list = []
 
-                    # Check if current variable is a derived quantity
+                    # Check if current variable is not in history file(s)
                     if var not in hist_file_var_list:
-                        vres = res.get(var, {})
-
-                        # Initialiaze list for constituents
-                        # NOTE: This is if the variable is NOT derivable but needs
-                        # an empty list as a check later
-                        constit_list = []
-
-                        # intialize boolean to check if variable is derivable
-                        derive = False # assume it can't be derived and update if it can
-
-                        # intialize boolean for regular CAM variable constituents
-                        try_cam_constits = True
-
-                        # Check first if variable is potentially part of a CAM-CHEM run
-                        if "derivable_from_cam_chem" in vres:
-                            constit_list = vres["derivable_from_cam_chem"]
-                            if constit_list:
-                                if all(item in hist_file_ds.data_vars for item in constit_list):
-                                    # Set check to look for regular CAM constituents
-                                    try_cam_constits = False
-                                    derive = True
-                                    msg = f"create time series for {case_name}:"
-                                    msg += "\n\tLooks like this a CAM-CHEM run, "
-                                    msg += f"checking constituents for '{var}'"
-                                    self.debug_log(msg)
-                            else:
-                                self.debug_log(constit_errmsg)
-                                # End if
-                            # End if
-
-                        # If not CAM-CHEM, check regular CAM runs
-                        if try_cam_constits:
-                            if "derivable_from" in vres:
-                                derive = True
-                                constit_list = vres["derivable_from"]
-                        else:
-                            # Missing variable or missing derivable_from argument
-                            der_from_msg = f"create time series for {case_name}:"
-                            der_from_msg += f"\n Can't create time series for {var}.\n\tEither "
-                            der_from_msg += "the variable is missing from CAM output or it is a "
-                            der_from_msg += "derived quantity and is missing the 'derivable_from' "
-                            der_from_msg += "config argument.\n\tPlease add variable to CAM run "
-                            der_from_msg += "or set appropriate argument in variable "
-                            der_from_msg += "defaults yaml file."
-                            self.debug_log(der_from_msg)
-                        # End if
-
-                        # Check if this variable can be derived
-                        if (derive) and (constit_list):
-                            for constit in constit_list:
-                                if constit not in diag_var_list:
-                                    diag_var_list.append(constit)
-                            # Add variable to list to derive
-                            vars_to_derive.append(var)
-                            # Add constituent list to variable key in dictionary
-                            constit_dict[var] = constit_list
-                            continue
-                            # Log if variable can be derived but is missing list of constituents
-                        elif (derive) and (not constit_list):
-                            self.debug_log(constit_errmsg)
-                            continue
-                        # Lastly, raise error if the variable is not a derived quanitity
-                        # but is also not in the history file(s)
-                        else:
-                            msg = f"\t    WARNING: {var} is not in the history file for case '{case_name}' "
-                            msg += "nor can it be derived. Script will continue to next variable."
-                            print(msg)
-                            logmsg = f"create time series for {case_name}:"
-                            logmsg += f"\n {var} is not in the file {hist_files[0]} "
-                            self.debug_log(logmsg)
-                            continue
-                        # End if
-                    # End if (var in var_diag_list)
+                        # Let user know variable is not in history file
+                        print(f"\t     {var} not in history file, will try to derive if possible")
+
+                        # Check if variable can be derived
+                        diag_var_list, constit_dict = check_derive(self, res, var, case_name,
+                                                                    diag_var_list, constit_dict,
+                                                                    hist_file_ds, hist_files[0])
+                        # Move to the next variable
+                        continue
+                    # End if
 
                     # Check if variable has a "lev" dimension according to first file:
                     has_lev = bool("lev" in hist_file_ds[var].dims or "ilev" in hist_file_ds[var].dims)
@@ -675,12 +574,12 @@ def call_ncrcat(cmd):
                     if has_lev and vert_coord_type:
                         # For now, only add these variables if using CAM:
                         if "cam" in hist_str:
-                            # PS might be in a different history file. If so, continue w/o error.
+                            # PS may be in a different history file. If so, continue without error.
                             ncrcat_var_list = ncrcat_var_list + ",hyam,hybm,hyai,hybi"
 
                             if "PS" in hist_file_var_list:
                                 ncrcat_var_list = ncrcat_var_list + ",PS"
-                                print(f"\t    INFO: Adding PS to file for '{var}'")
+                                print("\t     Adding PS to file")
                             else:
                                 wmsg = "WARNING: PS not found in history file."
                                 wmsg += " It might be needed at some point."
@@ -696,7 +595,7 @@ def call_ncrcat(cmd):
                                 # PMID file to each one of those targets separately. -JN
                                 if "PMID" in hist_file_var_list:
                                     ncrcat_var_list = ncrcat_var_list + ",PMID"
-                                    print("Adding PMID to file")
+                                    print("\t     Adding PMID to file")
                                 else:
                                     wmsg = "WARNING: PMID not found in history file."
                                     wmsg += " It might be needed at some point."
@@ -738,13 +637,6 @@ def call_ncrcat(cmd):
                     # -----------------------------------------------------
                     # generate time series files
                     list_of_commands.append(cmd)
-                    # Add global attributes: user, original hist file loc(s) and all filenames
-                    list_of_ncattend_commands.append(cmd_ncatted)
-                    # Remove the `history` attr that gets tacked on (for clean up)
-                    # NOTE: this may not be best practice, but it the history attr repeats
-                    #       the files attrs so the global attrs become obtrusive...
-                    list_of_hist_commands.append(cmd_remove_history)
-
                 # End variable loop
 
                 # Now run the "ncrcat" subprocesses in parallel:
@@ -766,6 +658,12 @@ def call_ncrcat(cmd):
                         constit_dict=constit_dict, ts_dir=ts_dir
                     )
                 # End with
+                
+                # Finally, run through the derived variables if applicable
+                if constit_dict:
+                    for der_var, constit_list in constit_dict.items():
+                        derive_variable(self, case_name, der_var, res,
+                                        ts_dir[case_idx], constit_list)
             # End for hist_str
         # End cases loop
 
@@ -1147,137 +1045,6 @@ def setup_run_cvdp(self):
 
     #########
 
-    def derive_variables(self, res=None, hist_str=None, vars_to_derive=None, ts_dir=None,
-                         constit_dict=None, overwrite=None):
-        """
-        Derive variables acccording to steps given here.  Since derivations will depend on the
-        variable, each variable to derive will need its own set of steps below.
-
-        Caution: this method assumes that there will be one time series file per variable
-
-        If the file for the derived variable exists, the kwarg `overwrite` determines
-        whether to overwrite the file (true) or exit with a warning message.
-
-        """
-
-        # Loop through derived variables
-        for var in vars_to_derive:
-            print(f"\t - deriving time series for {var}")
-
-            # Grab list of constituents for this variable
-            constit_list = constit_dict[var]
-
-            # Grab all required time series files for derived variable
-            constit_files = []
-            for constit in constit_list:
-                # Check if the constituent file is present, if so add it to list
-                if hist_str:
-                    const_glob_str = f"*{hist_str}*.{constit}.*.nc"
-                else:
-                    const_glob_str = f"*.{constit}.*.nc"
-                # end if
-                if glob.glob(os.path.join(ts_dir, const_glob_str)):
-                    constit_files.append(glob.glob(os.path.join(ts_dir, const_glob_str ))[0])
-
-            # Check if all the necessary constituent files were found
-            if len(constit_files) != len(constit_list):
-                ermsg = f"\t    WARNING: Not all constituent files present; {var} cannot be calculated."
-                ermsg += f" Please remove {var} from 'diag_var_list' or find the "
-                ermsg += "relevant CAM files.\n"
-                print(ermsg)
-                if constit_files:
-                    # Add what's missing to debug log
-                    dmsg = "create time series:"
-                    dmsg += "\n\tneeded constituents for derivation of "
-                    dmsg += f"{var}:\n\t\t- {constit_list}\n\tfound constituent file(s) in "
-                    dmsg += f"{Path(constit_files[0]).parent}:\n\t\t"
-                    dmsg += f"- {[Path(f).parts[-1] for f in constit_files if Path(f).is_file()]}"
-                    self.debug_log(dmsg)
-                else:
-                    dmsg = "create time series:"
-                    dmsg += "\n\tneeded constituents for derivation of "
-                    dmsg += f"{var}:\n\t\t- {constit_list}\n"
-                    dmsg += "\tNo constituent(s) found in history files"
-                    self.debug_log(dmsg)
-
-            else:
-                # Open a new dataset with all the constituent files/variables
-                ds = xr.open_mfdataset(constit_files).compute()
-
-                # Grab attributes from first constituent file to be used in derived variable
-                attrs = ds[constit_list[0]].attrs
-
-                # create new file name for derived variable
-                derived_file = constit_files[0].replace(constit_list[0], var)
-
-                # Check if clobber is true for file
-                if Path(derived_file).is_file():
-                    if overwrite:
-                        Path(derived_file).unlink()
-                    else:
-                        msg = f"\t    INFO: '{var}' file was found "
-                        msg += "and overwrite is False. Will use existing file."
-                        print(msg)
-                        continue
-
-                # NOTE: this will need to be changed when derived equations are more complex! - JR
-                if var == "RESTOM":
-                    der_val = ds["FSNT"]-ds["FLNT"]
-                else:
-                    # Loop through all constituents and sum
-                    der_val = 0
-                    for v in constit_list:
-                        der_val += ds[v]
-
-                # Set derived variable name and add to dataset
-                der_val.name = var
-                ds[var] = der_val
-
-                # Aerosol Calculations
-                # ----------------------------------------------------------------------------------
-                # These will be multiplied by rho (density of dry air)
-                ds_pmid_done = False
-                ds_t_done = False
-
-                # User-defined defaults might not include aerosol zonal list
-                azl = res.get("aerosol_zonal_list", [])
-                if var in azl:
-                    # Only calculate once for all aerosol vars
-                    if not ds_pmid_done:
-                        ds_pmid = _load_dataset(glob.glob(os.path.join(ts_dir, "*.PMID.*"))[0])
-                        ds_pmid_done = True
-                        if not ds_pmid:
-                            errmsg = "\t    WARNING: Missing necessary files for dry air density"
-                            errmsg += " (rho) calculation.\n"
-                            errmsg += "\t     Please make sure 'PMID' is in the CAM run"
-                            errmsg += " for aerosol calculations"
-                            print(errmsg)
-                            continue
-                    if not ds_t_done:
-                        ds_t = _load_dataset(glob.glob(os.path.join(ts_dir, "*.T.*"))[0])
-                        ds_t_done = True
-                        if not ds_t:
-                            errmsg = "\t    WARNING: Missing necessary files for dry air density"
-                            errmsg += " (rho) calculation.\n"
-                            errmsg += "\t     Please make sure 'T' is in the CAM run"
-                            errmsg += " for aerosol calculations"
-                            print(errmsg)
-                            continue
-
-                    # Multiply aerosol by dry air density (rho): (P/Rd*T)
-                    ds[var] = ds[var]*(ds_pmid["PMID"]/(res["Rgas"]*ds_t["T"]))
-
-                    # Sulfate conversion factor
-                    if var == "SO4":
-                        ds[var] = ds[var]*(96./115.)
-                # ----------------------------------------------------------------------------------
-
-                # Drop all constituents from final saved dataset
-                # These are not necessary because they have their own time series files
-                ds_final = ds.drop_vars(constit_list)
-                # Copy attributes from constituent file to derived variable
-                ds_final[var].attrs = attrs
-                ds_final.to_netcdf(derived_file, unlimited_dims='time', mode='w')
 
     ######### MDTF functions #########
     def setup_run_mdtf(self):
@@ -1393,9 +1160,7 @@ def move_tsfiles_for_mdtf(self, verbose):
         # Going to need a dict to translate.
         # Use cesm_freq_strings = freq_string_options.keys
         # and then freq = freq_string_option(freq_string_found)
-        freq_string_cesm    = ["month", "day", "hour_6", "hour_3", "hour_1"]  #keys
-        freq_string_options = ["month", "day", "6hr", "3hr", "1hr"]           #values
-        freq_string_dict    = dict(zip(freq_string_cesm,freq_string_options)) #make dict
+        freq_string_options = ["month", "day", "6hr", "3hr", "1hr"]
 
         hist_str_list = self.get_cam_info("hist_str")
         case_names = self.get_cam_info("cam_case_name", required=True)
@@ -1451,7 +1216,7 @@ def move_tsfiles_for_mdtf(self, verbose):
                         continue
 
                     found_strings = [
-                        word for word in freq_string_cesm if word in dataset_freq
+                        word for word in freq_string_options if word in dataset_freq
                     ]
                     if len(found_strings) == 1:
                         if verbose > 2:
@@ -1466,7 +1231,7 @@ def move_tsfiles_for_mdtf(self, verbose):
                     else:
                         if verbose > 0:
                             print(
-                                f"WARNING: None of the frequency options {freq_string_cesm} are present in the time_period_freq attribute {dataset_freq}"
+                                f"WARNING: None of the frequency options {freq_string_options} are present in the time_period_freq attribute {dataset_freq}"
                             )
                             print(f"Skipping {adf_file}")
                             freq = "frequency_missing"
@@ -1502,43 +1267,3 @@ def move_tsfiles_for_mdtf(self, verbose):
                 # end for hist_str
             # end for var
         # end for case
-
-
-########
-
-# Helper Function(s)
-
-
-def _load_dataset(fils):
-    """
-    This method exists to get an xarray Dataset from input file information that
-    can be passed into the plotting methods.
-
-    Parameters
-    ----------
-    fils : list
-        strings or paths to input file(s)
-
-    Returns
-    -------
-    xr.Dataset
-
-    Notes
-    -----
-    When just one entry is provided, use `open_dataset`, otherwise `open_mfdatset`
-    """
-
-    import adf_utils as utils
-    import warnings # use to warn user about missing files
-    warnings.formatwarning = utils.my_formatwarning
-
-    if len(fils) == 0:
-        warnings.warn("\t    WARNING: Input file list is empty.")
-        return None
-    if len(fils) > 1:
-        return xr.open_mfdataset(fils, combine='by_coords')
-    else:
-        return xr.open_dataset(fils[0])
-    #End if
-# End def
-########

From 49871a26c87dfb1bda263999ffd2e484a45d7491 Mon Sep 17 00:00:00 2001
From: justin-richling <richling@ucar.edu>
Date: Fri, 30 Aug 2024 16:28:04 -0600
Subject: [PATCH 08/15] Clean up print statements

---
 lib/adf_derive.py | 2 +-
 lib/adf_diag.py   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/adf_derive.py b/lib/adf_derive.py
index 279cceef6..39bf83230 100644
--- a/lib/adf_derive.py
+++ b/lib/adf_derive.py
@@ -72,7 +72,7 @@ def check_derive(self, res, var, case_name, diag_var_list, constit_dict, hist_fi
 
     # No time series creation
     exit_msg = f"WARNING: {var} is not in the file {hist0} and can't be derived."
-    exit_msg += "\n\t  ** No time series will be generated. **\n"
+    exit_msg += "\t     ** No time series will be generated. **"
 
     # Initialiaze list for constituents
     # NOTE: This is if the variable is NOT derivable but needs
diff --git a/lib/adf_diag.py b/lib/adf_diag.py
index 82373ecec..232ef599f 100644
--- a/lib/adf_diag.py
+++ b/lib/adf_diag.py
@@ -539,7 +539,7 @@ def call_ncrcat(cmd):
 
                     # Check if current variable is not in history file(s)
                     if var not in hist_file_var_list:
-                        # Let user know variable is not in history file
+                        # Let user know variable is not
                         print(f"\t     {var} not in history file, will try to derive if possible")
 
                         # Check if variable can be derived

From bc15d4c9d59341588bd6e71a68bb9a896e381212 Mon Sep 17 00:00:00 2001
From: justin-richling <richling@ucar.edu>
Date: Tue, 8 Oct 2024 12:02:48 -0600
Subject: [PATCH 09/15] Pull newest changed from main

---
 lib/adf_web.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/lib/adf_web.py b/lib/adf_web.py
index b5d8e77ed..f0bf6ba43 100644
--- a/lib/adf_web.py
+++ b/lib/adf_web.py
@@ -157,7 +157,11 @@ def __init__(self, config_file, debug=False):
                 mdtf_path += f"_{syear[0]}_{eyear[0]}"
                 self.external_package_links['MDTF'] = mdtf_path
             #End if
+<<<<<<< HEAD
 
+=======
+            
+>>>>>>> d8eec242 (Pull newest changed from main)
             #Add all relevant paths to dictionary for specific case:
             self.__case_web_paths[case_name] = {'website_dir': website_dir,
                                                 'img_pages_dir': img_pages_dir,
@@ -847,7 +851,11 @@ def jinja_enumerate(arg):
 
             # External packages that can be run through ADF
             avail_external_packages = {'MDTF':'mdtf_html_path', 'CVDP':'cvdp_html_path'}
+<<<<<<< HEAD
 
+=======
+            
+>>>>>>> d8eec242 (Pull newest changed from main)
             #Construct index.html
             index_title = "CAM Diagnostics"
             index_tmpl = jinenv.get_template('template_index.html')
@@ -859,8 +867,12 @@ def jinja_enumerate(arg):
                                             plot_types=plot_types,
                                             avail_plot_types=avail_plot_types,
                                             avail_external_packages=avail_external_packages,
+<<<<<<< HEAD
                                             external_package_links=self.external_package_links,
                                             run_info=run_info_html)
+=======
+                                            external_package_links=self.external_package_links)
+>>>>>>> d8eec242 (Pull newest changed from main)
 
             #Write Mean diagnostics index HTML file:
             with open(index_html_file, 'w', encoding='utf-8') as ofil:

From fafd70d27f1f733f67e466083b044d54c8d478a8 Mon Sep 17 00:00:00 2001
From: Justin Richling <richling@ucar.edu>
Date: Thu, 23 Oct 2025 15:52:40 -0600
Subject: [PATCH 10/15] Bring up current ADF code

---
 lib/adf_diag.py | 20 +++++++++++++++-----
 lib/adf_web.py  | 12 ------------
 2 files changed, 15 insertions(+), 17 deletions(-)

diff --git a/lib/adf_diag.py b/lib/adf_diag.py
index 232ef599f..7731e20e9 100644
--- a/lib/adf_diag.py
+++ b/lib/adf_diag.py
@@ -522,6 +522,8 @@ def call_ncrcat(cmd):
 
                 # Intitialize list for NCO commands
                 list_of_commands = []
+                list_of_ncattend_commands = []
+                list_of_hist_commands = []
 
                 # Create copy of var list that can be modified for derivable variables
                 diag_var_list = self.diag_var_list
@@ -574,12 +576,12 @@ def call_ncrcat(cmd):
                     if has_lev and vert_coord_type:
                         # For now, only add these variables if using CAM:
                         if "cam" in hist_str:
-                            # PS may be in a different history file. If so, continue without error.
+                            # PS might be in a different history file. If so, continue w/o error.
                             ncrcat_var_list = ncrcat_var_list + ",hyam,hybm,hyai,hybi"
 
                             if "PS" in hist_file_var_list:
                                 ncrcat_var_list = ncrcat_var_list + ",PS"
-                                print("\t     Adding PS to file")
+                                print(f"\t    INFO: Adding PS to file for '{var}'")
                             else:
                                 wmsg = "WARNING: PS not found in history file."
                                 wmsg += " It might be needed at some point."
@@ -637,6 +639,12 @@ def call_ncrcat(cmd):
                     # -----------------------------------------------------
                     # generate time series files
                     list_of_commands.append(cmd)
+                    # Add global attributes: user, original hist file loc(s) and all filenames
+                    list_of_ncattend_commands.append(cmd_ncatted)
+                    # Remove the `history` attr that gets tacked on (for clean up)
+                    # NOTE: this may not be best practice, but it the history attr repeats
+                    #       the files attrs so the global attrs become obtrusive...
+                    list_of_hist_commands.append(cmd_remove_history)
                 # End variable loop
 
                 # Now run the "ncrcat" subprocesses in parallel:
@@ -1160,7 +1168,9 @@ def move_tsfiles_for_mdtf(self, verbose):
         # Going to need a dict to translate.
         # Use cesm_freq_strings = freq_string_options.keys
         # and then freq = freq_string_option(freq_string_found)
-        freq_string_options = ["month", "day", "6hr", "3hr", "1hr"]
+        freq_string_cesm    = ["month", "day", "hour_6", "hour_3", "hour_1"]  #keys
+        freq_string_options = ["month", "day", "6hr", "3hr", "1hr"]           #values
+        freq_string_dict    = dict(zip(freq_string_cesm,freq_string_options)) #make dict
 
         hist_str_list = self.get_cam_info("hist_str")
         case_names = self.get_cam_info("cam_case_name", required=True)
@@ -1216,7 +1226,7 @@ def move_tsfiles_for_mdtf(self, verbose):
                         continue
 
                     found_strings = [
-                        word for word in freq_string_options if word in dataset_freq
+                        word for word in freq_string_cesm if word in dataset_freq
                     ]
                     if len(found_strings) == 1:
                         if verbose > 2:
@@ -1231,7 +1241,7 @@ def move_tsfiles_for_mdtf(self, verbose):
                     else:
                         if verbose > 0:
                             print(
-                                f"WARNING: None of the frequency options {freq_string_options} are present in the time_period_freq attribute {dataset_freq}"
+                                f"WARNING: None of the frequency options {freq_string_cesm} are present in the time_period_freq attribute {dataset_freq}"
                             )
                             print(f"Skipping {adf_file}")
                             freq = "frequency_missing"
diff --git a/lib/adf_web.py b/lib/adf_web.py
index f0bf6ba43..5ce9b5523 100644
--- a/lib/adf_web.py
+++ b/lib/adf_web.py
@@ -157,11 +157,7 @@ def __init__(self, config_file, debug=False):
                 mdtf_path += f"_{syear[0]}_{eyear[0]}"
                 self.external_package_links['MDTF'] = mdtf_path
             #End if
-<<<<<<< HEAD
-
-=======
             
->>>>>>> d8eec242 (Pull newest changed from main)
             #Add all relevant paths to dictionary for specific case:
             self.__case_web_paths[case_name] = {'website_dir': website_dir,
                                                 'img_pages_dir': img_pages_dir,
@@ -851,11 +847,7 @@ def jinja_enumerate(arg):
 
             # External packages that can be run through ADF
             avail_external_packages = {'MDTF':'mdtf_html_path', 'CVDP':'cvdp_html_path'}
-<<<<<<< HEAD
-
-=======
             
->>>>>>> d8eec242 (Pull newest changed from main)
             #Construct index.html
             index_title = "CAM Diagnostics"
             index_tmpl = jinenv.get_template('template_index.html')
@@ -867,12 +859,8 @@ def jinja_enumerate(arg):
                                             plot_types=plot_types,
                                             avail_plot_types=avail_plot_types,
                                             avail_external_packages=avail_external_packages,
-<<<<<<< HEAD
                                             external_package_links=self.external_package_links,
                                             run_info=run_info_html)
-=======
-                                            external_package_links=self.external_package_links)
->>>>>>> d8eec242 (Pull newest changed from main)
 
             #Write Mean diagnostics index HTML file:
             with open(index_html_file, 'w', encoding='utf-8') as ofil:

From 553c70b5bbcfe64cbc6d830455855e3abf971a6d Mon Sep 17 00:00:00 2001
From: Justin Richling <richling@ucar.edu>
Date: Fri, 24 Oct 2025 10:39:45 -0600
Subject: [PATCH 11/15] Clean up linting errors

---
 lib/adf_diag.py | 58 +++++++++++++++++++++++++++++++++++++------------
 1 file changed, 44 insertions(+), 14 deletions(-)

diff --git a/lib/adf_diag.py b/lib/adf_diag.py
index 7731e20e9..5a48dfa03 100644
--- a/lib/adf_diag.py
+++ b/lib/adf_diag.py
@@ -384,8 +384,8 @@ def call_ncrcat(cmd):
 
             # Check if particular case should be processed:
             if cam_ts_done[case_idx]:
-                emsg = "\tNOTE: Configuration file indicates time series files have been pre-computed"
-                emsg += f" for case '{case_name}'.  Will rely on those files directly."
+                emsg = "\tNOTE: Configuration file indicates time series files have been "
+                emsg += f"pre-computed for case '{case_name}'.  Will rely on those files directly."
                 print(emsg)
                 continue
             # End if
@@ -536,6 +536,25 @@ def call_ncrcat(cmd):
                     # Notify user of new time series file:
                     print(f"\t - time series for {var}")
 
+                    # Create full path name, file name template:
+                    # $cam_case_name.$hist_str.$variable.YYYYMM-YYYYMM.nc
+                    ts_outfil_str = (
+                        ts_dir
+                        + os.sep
+                        + ".".join([case_name, hist_str, var, time_string, "nc"])
+                    )
+
+                    # Check if clobber is true for file
+                    if Path(ts_outfil_str).is_file():
+                        if overwrite_ts[case_idx]:
+                            Path(ts_outfil_str).unlink()
+                        else:
+                            #msg = f"[{__name__}] Warning: '{var}' file was found "
+                            msg = f"\t    INFO: '{var}' file was found "
+                            msg += "and overwrite is False. Will use existing file."
+                            print(msg)
+                            continue
+
                     # Initialize list for constituents if variable is derivable
                     constit_list = []
 
@@ -615,7 +634,8 @@ def call_ncrcat(cmd):
 
                     # Example ncatted command (you can modify it with the specific attribute changes you need)
                     #cmd_ncatted = ["ncatted", "-O", "-a", f"adf_user,global,a,c,{self.user}", ts_outfil_str]
-                    # Step 1: Convert Path objects to strings and concatenate the list of historical files into a single string
+                    # Step 1: Convert Path objects to strings and concatenate the list of 
+                    # historical files into a single string
                     hist_files_str = ', '.join(str(f.name) for f in hist_files)
                     hist_locs_str = ', '.join(str(loc) for loc in cam_hist_locs)
 
@@ -656,7 +676,8 @@ def call_ncrcat(cmd):
                 with mp.Pool(processes=self.num_procs) as mpool:
                     _ = mpool.map(call_ncrcat, list_of_ncattend_commands)
 
-                # Run ncatted command to remove history attribute after the global attributes are set
+                # Run ncatted command to remove history attribute
+                # after the global attributes are set
                 with mp.Pool(processes=self.num_procs) as mpool:
                     _ = mpool.map(call_ncrcat, list_of_hist_commands)
 
@@ -666,7 +687,7 @@ def call_ncrcat(cmd):
                         constit_dict=constit_dict, ts_dir=ts_dir
                     )
                 # End with
-                
+
                 # Finally, run through the derived variables if applicable
                 if constit_dict:
                     for der_var, constit_list in constit_dict.items():
@@ -1074,8 +1095,8 @@ def setup_run_mdtf(self):
 
         #
         # Create a dict with all the case info needed for MDTF case_list
-        #     Note that model and convention are hard-coded to CESM because that's all we expect here
-        #     This could be changed by inputing them into ADF with other MDTF-specific variables
+        #   Note that model and convention are hard-coded to CESM because that's all we expect here
+        #     - This could be changed by inputing them into ADF with other MDTF-specific variables
         #
         case_list_keys = ["CASENAME", "FIRSTYR", "LASTYR", "model", "convention"]
 
@@ -1131,7 +1152,9 @@ def setup_run_mdtf(self):
         #
         # Submit the MDTF script in background mode, send output to mdtf.out file
         #
-        mdtf_log = "mdtf.out" # maybe set this to cam_diag_plot_loc: /glade/scratch/${user}/ADF/plots
+        mdtf_log = "mdtf.out"
+        # maybe set this to cam_diag_plot_loc: /glade/scratch/${user}/ADF/plots
+
         mdtf_exe = mdtf_codebase + os.sep + "mdtf -f " + mdtf_input_settings_filename
         if copy_files_only:
             print("\t ...Copy files only. NOT Running MDTF")
@@ -1200,17 +1223,21 @@ def move_tsfiles_for_mdtf(self, verbose):
                     elif len(adf_file_list) > 1:
                         if verbose > 0:
                             print(
-                                f"WARNING: found multiple timeseries files {adf_file_list}. Continuing with best guess; suggest cleaning up multiple dates in ts dir"
+                                f"""WARNING: found multiple timeseries files {adf_file_list}.
+                                 Continuing with best guess; suggest cleaning up multiple 
+                                    dates in ts dir"""
                             )
                     else:
                         if verbose > 1:
                             print(
-                                f"WARNING: No files matching {case_name}.{hist_str}.{var} found in {adf_file_str}. Skipping"
+                                f"""WARNING: No files matching {case_name}.{hist_str}.{var}
+                                     found in {adf_file_str}. Skipping"""
                             )
                         continue  # skip this case/hist_str/var file
                     adf_file = adf_file_list[0]
 
-                    # If freq is not set, it means we just started this hist_str. So check the first ADF file to find it
+                    # If freq is not set, it means we just started this hist_str. 
+                    # So check the first ADF file to find it
                     hist_file_ds = xr.open_dataset(
                         adf_file, decode_cf=False, decode_times=False
                     )
@@ -1221,7 +1248,8 @@ def move_tsfiles_for_mdtf(self, verbose):
                     else:
                         if verbose > 0:
                             print(
-                                f"WARNING: Necessary 'time_period_freq' attribute missing from {adf_file}. Skipping file."
+                                f"""WARNING: Necessary 'time_period_freq' attribute missing
+                                 from {adf_file}. Skipping file."""
                             )
                         continue
 
@@ -1236,12 +1264,14 @@ def move_tsfiles_for_mdtf(self, verbose):
                     elif len(found_strings) > 1:
                         if verbose > 0:
                             print(
-                                f"WARNING: Found dataset_freq {dataset_freq} matches multiple string possibilities:{', '.join(found_strings)}"
+                                f"""WARNING: Found dataset_freq {dataset_freq} matches multiple
+                                 string possibilities:{', '.join(found_strings)}"""
                             )
                     else:
                         if verbose > 0:
                             print(
-                                f"WARNING: None of the frequency options {freq_string_cesm} are present in the time_period_freq attribute {dataset_freq}"
+                                f"""WARNING: None of the frequency options {freq_string_cesm} are
+                                 present in the time_period_freq attribute {dataset_freq}"""
                             )
                             print(f"Skipping {adf_file}")
                             freq = "frequency_missing"

From 31b814d8f9e4433832f76a3e2e083227d3166c78 Mon Sep 17 00:00:00 2001
From: Justin Richling <richling@ucar.edu>
Date: Fri, 24 Oct 2025 10:53:01 -0600
Subject: [PATCH 12/15] Remove unused code and clean up comments

---
 lib/adf_diag.py | 16 +++-------------
 1 file changed, 3 insertions(+), 13 deletions(-)

diff --git a/lib/adf_diag.py b/lib/adf_diag.py
index 5a48dfa03..d14b6fdce 100644
--- a/lib/adf_diag.py
+++ b/lib/adf_diag.py
@@ -632,14 +632,12 @@ def call_ncrcat(cmd):
                         + ["-o", ts_outfil_str]
                     )
 
-                    # Example ncatted command (you can modify it with the specific attribute changes you need)
-                    #cmd_ncatted = ["ncatted", "-O", "-a", f"adf_user,global,a,c,{self.user}", ts_outfil_str]
-                    # Step 1: Convert Path objects to strings and concatenate the list of 
+                    # Convert Path objects to strings and concatenate the list of 
                     # historical files into a single string
                     hist_files_str = ', '.join(str(f.name) for f in hist_files)
                     hist_locs_str = ', '.join(str(loc) for loc in cam_hist_locs)
 
-                    # Step 2: Create the ncatted command to add both global attributes
+                    # Create the ncatted command to add both global attributes
                     cmd_ncatted = [
                         "ncatted", "-O",
                         "-a", "adf_user,global,a,c," + f"{self.user}",
@@ -648,7 +646,7 @@ def call_ncrcat(cmd):
                         ts_outfil_str
                     ]
 
-                    # Step 3: Create the ncatted command to remove the history attribute
+                    # Create the ncatted command to remove the history attribute
                     cmd_remove_history = [
                         "ncatted", "-O", "-h",
                         "-a", "history,global,d,,",
@@ -670,7 +668,6 @@ def call_ncrcat(cmd):
                 # Now run the "ncrcat" subprocesses in parallel:
                 with mp.Pool(processes=self.num_procs) as mpool:
                     _ = mpool.map(call_ncrcat, list_of_commands)
-                # End with
 
                 # Run ncatted commands after ncrcat is done
                 with mp.Pool(processes=self.num_procs) as mpool:
@@ -681,13 +678,6 @@ def call_ncrcat(cmd):
                 with mp.Pool(processes=self.num_procs) as mpool:
                     _ = mpool.map(call_ncrcat, list_of_hist_commands)
 
-                if vars_to_derive:
-                    self.derive_variables(
-                        res=res, hist_str=hist_str, vars_to_derive=vars_to_derive,
-                        constit_dict=constit_dict, ts_dir=ts_dir
-                    )
-                # End with
-
                 # Finally, run through the derived variables if applicable
                 if constit_dict:
                     for der_var, constit_list in constit_dict.items():

From 7da7b29a38b33bf0b26c207fa1ced36757ab4bb4 Mon Sep 17 00:00:00 2001
From: justin-richling <richling@ucar.edu>
Date: Tue, 4 Nov 2025 13:54:08 -0700
Subject: [PATCH 13/15] Bring in recent changes to `adf_diag.py`

---
 lib/adf_diag.py | 71 ++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 53 insertions(+), 18 deletions(-)

diff --git a/lib/adf_diag.py b/lib/adf_diag.py
index 64b3fa9d9..d14b6fdce 100644
--- a/lib/adf_diag.py
+++ b/lib/adf_diag.py
@@ -384,8 +384,8 @@ def call_ncrcat(cmd):
 
             # Check if particular case should be processed:
             if cam_ts_done[case_idx]:
-                emsg = "\tNOTE: Configuration file indicates time series files have been pre-computed"
-                emsg += f" for case '{case_name}'.  Will rely on those files directly."
+                emsg = "\tNOTE: Configuration file indicates time series files have been "
+                emsg += f"pre-computed for case '{case_name}'.  Will rely on those files directly."
                 print(emsg)
                 continue
             # End if
@@ -536,6 +536,25 @@ def call_ncrcat(cmd):
                     # Notify user of new time series file:
                     print(f"\t - time series for {var}")
 
+                    # Create full path name, file name template:
+                    # $cam_case_name.$hist_str.$variable.YYYYMM-YYYYMM.nc
+                    ts_outfil_str = (
+                        ts_dir
+                        + os.sep
+                        + ".".join([case_name, hist_str, var, time_string, "nc"])
+                    )
+
+                    # Check if clobber is true for file
+                    if Path(ts_outfil_str).is_file():
+                        if overwrite_ts[case_idx]:
+                            Path(ts_outfil_str).unlink()
+                        else:
+                            #msg = f"[{__name__}] Warning: '{var}' file was found "
+                            msg = f"\t    INFO: '{var}' file was found "
+                            msg += "and overwrite is False. Will use existing file."
+                            print(msg)
+                            continue
+
                     # Initialize list for constituents if variable is derivable
                     constit_list = []
 
@@ -613,13 +632,12 @@ def call_ncrcat(cmd):
                         + ["-o", ts_outfil_str]
                     )
 
-                    # Example ncatted command (you can modify it with the specific attribute changes you need)
-                    #cmd_ncatted = ["ncatted", "-O", "-a", f"adf_user,global,a,c,{self.user}", ts_outfil_str]
-                    # Step 1: Convert Path objects to strings and concatenate the list of historical files into a single string
+                    # Convert Path objects to strings and concatenate the list of 
+                    # historical files into a single string
                     hist_files_str = ', '.join(str(f.name) for f in hist_files)
                     hist_locs_str = ', '.join(str(loc) for loc in cam_hist_locs)
 
-                    # Step 2: Create the ncatted command to add both global attributes
+                    # Create the ncatted command to add both global attributes
                     cmd_ncatted = [
                         "ncatted", "-O",
                         "-a", "adf_user,global,a,c," + f"{self.user}",
@@ -628,7 +646,7 @@ def call_ncrcat(cmd):
                         ts_outfil_str
                     ]
 
-                    # Step 3: Create the ncatted command to remove the history attribute
+                    # Create the ncatted command to remove the history attribute
                     cmd_remove_history = [
                         "ncatted", "-O", "-h",
                         "-a", "history,global,d,,",
@@ -650,8 +668,16 @@ def call_ncrcat(cmd):
                 # Now run the "ncrcat" subprocesses in parallel:
                 with mp.Pool(processes=self.num_procs) as mpool:
                     _ = mpool.map(call_ncrcat, list_of_commands)
-                # End with
-                
+
+                # Run ncatted commands after ncrcat is done
+                with mp.Pool(processes=self.num_procs) as mpool:
+                    _ = mpool.map(call_ncrcat, list_of_ncattend_commands)
+
+                # Run ncatted command to remove history attribute
+                # after the global attributes are set
+                with mp.Pool(processes=self.num_procs) as mpool:
+                    _ = mpool.map(call_ncrcat, list_of_hist_commands)
+
                 # Finally, run through the derived variables if applicable
                 if constit_dict:
                     for der_var, constit_list in constit_dict.items():
@@ -1059,8 +1085,8 @@ def setup_run_mdtf(self):
 
         #
         # Create a dict with all the case info needed for MDTF case_list
-        #     Note that model and convention are hard-coded to CESM because that's all we expect here
-        #     This could be changed by inputing them into ADF with other MDTF-specific variables
+        #   Note that model and convention are hard-coded to CESM because that's all we expect here
+        #     - This could be changed by inputing them into ADF with other MDTF-specific variables
         #
         case_list_keys = ["CASENAME", "FIRSTYR", "LASTYR", "model", "convention"]
 
@@ -1116,7 +1142,9 @@ def setup_run_mdtf(self):
         #
         # Submit the MDTF script in background mode, send output to mdtf.out file
         #
-        mdtf_log = "mdtf.out" # maybe set this to cam_diag_plot_loc: /glade/scratch/${user}/ADF/plots
+        mdtf_log = "mdtf.out"
+        # maybe set this to cam_diag_plot_loc: /glade/scratch/${user}/ADF/plots
+
         mdtf_exe = mdtf_codebase + os.sep + "mdtf -f " + mdtf_input_settings_filename
         if copy_files_only:
             print("\t ...Copy files only. NOT Running MDTF")
@@ -1185,17 +1213,21 @@ def move_tsfiles_for_mdtf(self, verbose):
                     elif len(adf_file_list) > 1:
                         if verbose > 0:
                             print(
-                                f"WARNING: found multiple timeseries files {adf_file_list}. Continuing with best guess; suggest cleaning up multiple dates in ts dir"
+                                f"""WARNING: found multiple timeseries files {adf_file_list}.
+                                 Continuing with best guess; suggest cleaning up multiple 
+                                    dates in ts dir"""
                             )
                     else:
                         if verbose > 1:
                             print(
-                                f"WARNING: No files matching {case_name}.{hist_str}.{var} found in {adf_file_str}. Skipping"
+                                f"""WARNING: No files matching {case_name}.{hist_str}.{var}
+                                     found in {adf_file_str}. Skipping"""
                             )
                         continue  # skip this case/hist_str/var file
                     adf_file = adf_file_list[0]
 
-                    # If freq is not set, it means we just started this hist_str. So check the first ADF file to find it
+                    # If freq is not set, it means we just started this hist_str. 
+                    # So check the first ADF file to find it
                     hist_file_ds = xr.open_dataset(
                         adf_file, decode_cf=False, decode_times=False
                     )
@@ -1206,7 +1238,8 @@ def move_tsfiles_for_mdtf(self, verbose):
                     else:
                         if verbose > 0:
                             print(
-                                f"WARNING: Necessary 'time_period_freq' attribute missing from {adf_file}. Skipping file."
+                                f"""WARNING: Necessary 'time_period_freq' attribute missing
+                                 from {adf_file}. Skipping file."""
                             )
                         continue
 
@@ -1221,12 +1254,14 @@ def move_tsfiles_for_mdtf(self, verbose):
                     elif len(found_strings) > 1:
                         if verbose > 0:
                             print(
-                                f"WARNING: Found dataset_freq {dataset_freq} matches multiple string possibilities:{', '.join(found_strings)}"
+                                f"""WARNING: Found dataset_freq {dataset_freq} matches multiple
+                                 string possibilities:{', '.join(found_strings)}"""
                             )
                     else:
                         if verbose > 0:
                             print(
-                                f"WARNING: None of the frequency options {freq_string_cesm} are present in the time_period_freq attribute {dataset_freq}"
+                                f"""WARNING: None of the frequency options {freq_string_cesm} are
+                                 present in the time_period_freq attribute {dataset_freq}"""
                             )
                             print(f"Skipping {adf_file}")
                             freq = "frequency_missing"

From f15d3369df267fe5fcef722fae74857e673085c1 Mon Sep 17 00:00:00 2001
From: justin-richling <richling@ucar.edu>
Date: Tue, 4 Nov 2025 13:56:33 -0700
Subject: [PATCH 14/15] Remove constituent list

This is now being generated in `adf_derive.py`
---
 lib/adf_diag.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/lib/adf_diag.py b/lib/adf_diag.py
index d14b6fdce..988296c44 100644
--- a/lib/adf_diag.py
+++ b/lib/adf_diag.py
@@ -555,9 +555,6 @@ def call_ncrcat(cmd):
                             print(msg)
                             continue
 
-                    # Initialize list for constituents if variable is derivable
-                    constit_list = []
-
                     # Check if current variable is not in history file(s)
                     if var not in hist_file_var_list:
                         # Let user know variable is not

From 26dd8dd4b02b8f849e9f5526c3aed824ca709bf7 Mon Sep 17 00:00:00 2001
From: justin-richling <richling@ucar.edu>
Date: Tue, 4 Nov 2025 13:58:49 -0700
Subject: [PATCH 15/15] Remove whitespaces

---
 lib/adf_web.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/lib/adf_web.py b/lib/adf_web.py
index 5ce9b5523..6704b3517 100644
--- a/lib/adf_web.py
+++ b/lib/adf_web.py
@@ -157,7 +157,7 @@ def __init__(self, config_file, debug=False):
                 mdtf_path += f"_{syear[0]}_{eyear[0]}"
                 self.external_package_links['MDTF'] = mdtf_path
             #End if
-            
+
             #Add all relevant paths to dictionary for specific case:
             self.__case_web_paths[case_name] = {'website_dir': website_dir,
                                                 'img_pages_dir': img_pages_dir,
@@ -847,7 +847,7 @@ def jinja_enumerate(arg):
 
             # External packages that can be run through ADF
             avail_external_packages = {'MDTF':'mdtf_html_path', 'CVDP':'cvdp_html_path'}
-            
+
             #Construct index.html
             index_title = "CAM Diagnostics"
             index_tmpl = jinenv.get_template('template_index.html')
@@ -866,7 +866,6 @@ def jinja_enumerate(arg):
             with open(index_html_file, 'w', encoding='utf-8') as ofil:
                 ofil.write(index_rndr)
             #End with
-
         #End for (web data loop)
 
         #If this is a multi-case instance, then copy website to "main" directory: