From 8dc22d46ec7de3304695adba95764d234fe72942 Mon Sep 17 00:00:00 2001 From: wwieder Date: Thu, 13 Nov 2025 14:20:28 -0700 Subject: [PATCH 1/7] minor fix to get land regridding to work --- scripts/regridding/regrid_se_to_fv.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/scripts/regridding/regrid_se_to_fv.py b/scripts/regridding/regrid_se_to_fv.py index 098188104..617726f62 100644 --- a/scripts/regridding/regrid_se_to_fv.py +++ b/scripts/regridding/regrid_se_to_fv.py @@ -60,13 +60,12 @@ def regrid_se_data_bilinear(regridder, data_to_regrid, comp_grid): ) return regridded -def regrid_se_data_conservative(regridder, data_to_regrid, comp_grid): +def regrid_se_data_conservative(regridder, data_to_regrid, comp_grid="lndgrid"): updated = data_to_regrid.copy().transpose(..., comp_grid).expand_dims("dummy", axis=-2) regridded = regridder(updated.rename({"dummy": "lat", comp_grid: "lon"}) ) return regridded - def regrid_atm_se_data_bilinear(regridder, data_to_regrid, comp_grid='ncol'): if isinstance(data_to_regrid, xr.Dataset): vars_with_ncol = [name for name in data_to_regrid.variables if comp_grid in data_to_regrid[name].dims] From f0cc2e6c77f63370ebd78ec3bbb7af02ceca9ae0 Mon Sep 17 00:00:00 2001 From: wwieder Date: Thu, 13 Nov 2025 14:21:01 -0700 Subject: [PATCH 2/7] start regional TS capability --- scripts/plotting/regional_timeseries.py | 494 ++++++++++++++++++++++++ 1 file changed, 494 insertions(+) create mode 100644 scripts/plotting/regional_timeseries.py diff --git a/scripts/plotting/regional_timeseries.py b/scripts/plotting/regional_timeseries.py new file mode 100644 index 000000000..e6ee3f9c4 --- /dev/null +++ b/scripts/plotting/regional_timeseries.py @@ -0,0 +1,494 @@ +from pathlib import Path +import numpy as np +import yaml +import xarray as xr +import uxarray as ux +import matplotlib.pyplot as plt +import cartopy.crs as ccrs +import warnings # use to warn user about missing files. + +def my_formatwarning(msg, *args, **kwargs): + """custom warning""" + # ignore everything except the message + return str(msg) + "\n" + + +warnings.formatwarning = my_formatwarning + + +def regional_climatology(adfobj): + + """ + load climo file, subset for each region and each var + Make a combined plot, save it, add it to website. + + NOTES (from Meg): There are still a lot of to-do's with this script! + - convert region defintion netCDF file to a yml, read that in instead + - increase number of variables that have a climo plotted; i've just + added two, but left room for more in the subplots + - check that all varaibles have climo files; likely to break otherwise + - add option so that this works with a structured grid too # ...existing code... + if found: + #Check if observations dataset name is specified: + if "obs_name" in default_var_dict: + obs_name = default_var_dict["obs_name"] + else: + obs_name = obs_file_path.name + + if "obs_var_name" in default_var_dict: + obs_var_name = default_var_dict["obs_var_name"] + else: + obs_var_name = field + + # Use the resolved obs_file_path, not the original string + obs_data[field] = xr.open_mfdataset([str(obs_file_path)], combine="by_coords") + plot_obs[field] = True + # ...existing code... + - make sure that climo's are being plotted with the preferred units + - add in observations (need to regrid/area weight) + - need to figure out how to display the figures on the website + + """ + + #Notify user that script has started: + print("\n --- Generating regional climatology plots... ---") + + # Gather ADF configurations + # plot_loc = adfobj.get_basic_info('cam_diag_plot_loc') + # plot_type = adfobj.read_config_var("diag_basic_info").get("plot_type", "png") + plot_locations = adfobj.plot_location + plot_type = adfobj.get_basic_info('plot_type') + if not plot_type: + plot_type = 'png' + #res = adfobj.variable_defaults # will be dict of variable-specific plot preferences + # or an empty dictionary if use_defaults was not specified in YAML. + + # check if existing plots need to be redone + redo_plot = adfobj.get_basic_info('redo_plot') + print(f"\t NOTE: redo_plot is set to {redo_plot}") + + unstruct_plotting = adfobj.unstructured_plotting + print(f"\t unstruct_plotting", unstruct_plotting) + + case_nickname = adfobj.get_cam_info('case_nickname') + base_nickname = adfobj.get_baseline_info('case_nickname') + + region_list = adfobj.region_list + #TODO, make it easier for users decide on these? + regional_climo_var_list = ['TSA','PREC','ELAI', + 'FSDS','FLDS','SNOWDP','ASA', + 'FSH','QRUNOFF_TO_COUPLER','ET','FCTR', + 'GPP','TWS','FCEV','FAREA_BURNED', + ] + + # Extract variables: + baseline_name = adfobj.get_baseline_info("cam_case_name", required=True) + input_climo_baseline = Path(adfobj.get_baseline_info("cam_climo_loc", required=True)) + # TODO hard wired for single case name: + case_name = adfobj.get_cam_info("cam_case_name", required=True)[0] + input_climo_case = Path(adfobj.get_cam_info("cam_climo_loc", required=True)[0]) + + # Get grid file + mesh_file = adfobj.mesh_files["baseline_mesh_file"] + uxgrid = ux.open_grid(mesh_file) + + # Set keywords + kwargs = {} + kwargs["mesh_file"] = mesh_file + kwargs["unstructured_plotting"] = unstruct_plotting + + #Determine local directory: + _adf_lib_dir = adfobj.get_basic_info("obs_data_loc") + + #Determine whether to use adf defaults or custom: + _defaults_file = adfobj.get_basic_info('defaults_file') + # Note this won't work if no defaults_file is given + if _defaults_file is None: + _defaults_file = _adf_lib_dir/'adf_variable_defaults.yaml' + else: + print(f"\n\t Not using ADF default variables yaml file, instead using {_defaults_file}\n") + #End if + + #Open YAML file: + with open(_defaults_file, encoding='UTF-8') as dfil: + adfobj.__variable_defaults = yaml.load(dfil, Loader=yaml.SafeLoader) + + _variable_defaults = adfobj.__variable_defaults + + ## Read regions from yml file: + ymlFilename = adfobj.get_basic_info("regions_file") + with open(ymlFilename, 'r') as file: + regions = yaml.safe_load(file) + + #----------------------------------------- + #Extract the "obs_data_loc" default observational data location: + obs_data_loc = adfobj.get_basic_info("obs_data_loc") + + base_data = {} + case_data = {} + obs_data = {} + obs_name = {} + obs_var_name = {} + plot_obs = {} + + var_obs_dict = adfobj.var_obs_dict + + # First, load all variable data once (instead of inside nested loops) + for field in regional_climo_var_list: + # Load the global climatology for this variable + # TODO unit conversions are not handled consistently here + base_data[field] = adfobj.data.load_reference_climo_da(baseline_name, field, **kwargs) + case_data[field] = adfobj.data.load_climo_da(case_name, field, **kwargs) + + if type(base_data[field]) is type(None): + print('Missing file for ', field) + continue + else: + # get area and landfrac for base and case climo datasets + mdataset = adfobj.data.load_climo_dataset(case_name, field, **kwargs) + area_c = mdataset.area.isel(time=0) # drop time dimension to avoid confusion + landfrac_c = mdataset.landfrac.isel(time=0) + # Redundant, but we'll do this for consistency: + # TODO, won't handle loadling the basecase this way + #area_b = adfobj.data.load_reference_climo_da(baseline_name, 'area', **kwargs) + #landfrac_b = adfobj.data.load_reference_climo_da(baseline_name, 'landfrac', **kwargs) + + mdataset_base = adfobj.data.load_reference_climo_dataset(baseline_name, field, **kwargs) + area_b = mdataset_base.area.isel(time=0) + landfrac_b = mdataset_base.landfrac.isel(time=0) + + # calculate weights + # WW: 1) should actual weight calculation be done after subsetting to region? + # 2) Does this work as intended for different resolutions? + # wgt = area * landfrac # / (area * landfrac).sum() + + #----------------------------------------- + # Now, check if observations are to be plotted for this variable + plot_obs[field] = False + if field in _variable_defaults: + # Extract variable-obs dictionary + default_var_dict = _variable_defaults[field] + + #Check if an observations file is specified: + if "obs_file" in default_var_dict: + #Set found variable: + found = False + + #Extract path/filename: + obs_file_path = Path(default_var_dict["obs_file"]) + + #Check if file exists: + if not obs_file_path.is_file(): + #If not, then check if it is in "obs_data_loc" + if obs_data_loc: + obs_file_path = Path(obs_data_loc)/obs_file_path + + if obs_file_path.is_file(): + found = True + + else: + #File was found: + found = True + #End if + + #If found, then set observations dataset and variable names: + if found: + #Check if observations dataset name is specified: + if "obs_name" in default_var_dict: + obs_name[field] = default_var_dict["obs_name"] + else: + #If not, then just use obs file name: + obs_name[field] = obs_file_path.name + + #Check if observations variable name is specified: + if "obs_var_name" in default_var_dict: + #If so, then set obs_var_name variable: + obs_var_name[field] = default_var_dict["obs_var_name"] + else: + #Assume observation variable name is the same as model variable: + obs_var_name[field] = field + #End if + #Finally read in the obs! + obs_data[field] = xr.open_mfdataset([default_var_dict["obs_file"]], combine="by_coords") + plot_obs[field] = True + # Special handling for some variables:, NOT A GOOD HACK! + # TODO: improve this! + if (field == 'ASA') and ('BRDALB' in obs_data[field].variables): + obs_data[field]['BRDALB'] = obs_data[field]['BRDALB'].swap_dims({'lsmlat':'lat','lsmlon':'lon'}) + + else: + #If not found, then print to log and skip variable: + msg = f'''Unable to find obs file '{default_var_dict["obs_file"]}' ''' + msg += f"for variable '{field}'." + adfobj.debug_log(msg) + continue + # End if + + else: + #No observation file was specified, so print to log and skip variable: + adfobj.debug_log(f"No observations file was listed for variable '{field}'.") + continue + else: + #Variable not in defaults file, so print to log and skip variable: + msg = f"Variable '{field}' not found in variable defaults file: `{_defaults_file}`" + adfobj.debug_log(msg) + # End if + # End of observation loading + #----------------------------------------- + + #----------------------------------------- + # Loop over regions for selected variable + for iReg in range(len(region_list)): + print(f"\n\t - Plotting regional climatology for: {region_list[iReg]}") + # regionDS_thisRg = regionDS.isel(region=region_indexList[iReg]) + box_west, box_east, box_south, box_north, region_category = get_region_boundaries(regions, region_list[iReg]) + ## Set up figure + ## TODO: Make the plot size/number of subplots resopnsive to number of fields specified + fig,axs = plt.subplots(4,4, figsize=(18,12)) + axs = axs.ravel() + + plt_counter = 1 + for field in regional_climo_var_list: + mdataset = adfobj.data.load_climo_dataset(case_name, field, **kwargs) + + if type(base_data[field]) is type(None): + continue + else: + if unstruct_plotting == True: + # uxarray output is time*nface, sum over nface + base_var,wgt_sub = getRegion_uxarray(uxgrid, base_data, field, area_b, landfrac_b, + box_west, box_east, + box_south, box_north) + base_var_wgtd = np.sum(base_var * wgt_sub, axis=-1) # WW not needed?/ np.sum(wgt_sub) + + case_var,wgt_sub = getRegion_uxarray(uxgrid, case_data, field, area_c, landfrac_c, + box_west, box_east, + box_south, box_north) + case_var_wgtd = np.sum(case_var * wgt_sub, axis=-1) #/ np.sum(wgt_sub) + + else: # regular lat/lon grid + # xarray output is time*lat*lon, sum over lat/lon + base_var, wgt_sub = getRegion_xarray(base_data[field], field, + box_west, box_east, + box_south, box_north, + area_b, landfrac_b) + base_var_wgtd = np.sum(base_var * wgt_sub, axis=(1,2)) + + case_var, wgt_sub = getRegion_xarray(case_data[field], field, + box_west, box_east, + box_south, box_north, + area_c, landfrac_c) + case_var_wgtd = np.sum(case_var * wgt_sub, axis=(1,2)) + + # Read in observations, if available + if plot_obs[field] == True: + # obs output is time*lat*lon, sum over lat/lon + obs_var, wgt_sub = getRegion_xarray(obs_data[field], field, + box_west, box_east, + box_south, box_north, + obs_var_name=obs_var_name[field]) + obs_var_wgtd = np.sum(obs_var * wgt_sub, axis=(1,2)) #/ np.sum(wgt_sub) + + ## Plot the map: + if plt_counter==1: + ## Define region in first subplot + fig.delaxes(axs[0]) + + transform = ccrs.PlateCarree() + projection = ccrs.PlateCarree() + base_var_mask = base_var.isel(time=0) + + if unstruct_plotting == True: + base_var_mask[np.isfinite(base_var_mask)]=1 + collection = base_var_mask.to_polycollection() + + collection.set_transform(transform) + collection.set_cmap('rainbow_r') + collection.set_antialiased(False) + map_ax = fig.add_subplot(4, 4, 1, projection=ccrs.PlateCarree()) + + map_ax.coastlines() + map_ax.add_collection(collection) + elif unstruct_plotting == False: + base_var_mask = base_var_mask.copy() + base_var_mask.values[np.isfinite(base_var_mask.values)] = 1 + + map_ax = fig.add_subplot(4, 4, 1, projection=ccrs.PlateCarree()) + map_ax.coastlines() + + # Plot using pcolormesh for structured grids + im = map_ax.pcolormesh(base_var_mask.lon, base_var_mask.lat, + base_var_mask.values, + transform=transform, + cmap='rainbow_r', + shading='auto') + + # Add map extent selection + if region_list[iReg]=='N Hemisphere Land': + map_ax.set_extent([-180, 179, -3, 90],crs=ccrs.PlateCarree()) + elif region_list[iReg]=='Global': + map_ax.set_extent([-180, 179, -89, 90],crs=ccrs.PlateCarree()) + elif region_list[iReg]=='S Hemisphere Land': + map_ax.set_extent([-180, 179, -89, 3],crs=ccrs.PlateCarree()) + elif region_list[iReg]=='Polar': + map_ax.set_extent([-180, 179, 45, 90],crs=ccrs.PlateCarree()) + else: + if ((box_south >= 30) & (box_east<=-5) ): + map_ax.set_extent([-180, 0, 30, 90],crs=ccrs.PlateCarree()) + elif ((box_south >= 30) & (box_east>=-5) ): + map_ax.set_extent([-10, 179, 30, 90],crs=ccrs.PlateCarree()) + elif ((box_south <= 30) & (box_south >= -30) & + (box_east<=-5) ): + map_ax.set_extent([-180, 0, -30, 30],crs=ccrs.PlateCarree()) + elif ((box_south <= 30) & (box_south >= -30) & + (box_east>=-5) ): + map_ax.set_extent([-10, 179, -30, 30],crs=ccrs.PlateCarree()) + elif ((box_south <= -30) & (box_south >= -60) & + (box_east>=-5) ): + map_ax.set_extent([-10, 179, -89, -30],crs=ccrs.PlateCarree()) + elif ((box_south <= -30) & (box_south >= -60) & + (box_east<=-5) ): + map_ax.set_extent([-180, 0, -89, -30],crs=ccrs.PlateCarree()) + elif ((box_south <= -60)): + map_ax.set_extent([-180, 179, -89, -60],crs=ccrs.PlateCarree()) + # End if for plotting map extent + + ## Plot the climatology: + if type(base_data[field]) is type(None): + print('Missing file for ', field) + continue + else: + axs[plt_counter].plot(np.arange(12)+1, case_var_wgtd, + label=case_nickname, linewidth=2) + axs[plt_counter].plot(np.arange(12)+1, base_var_wgtd, + label=base_nickname, linewidth=2) + if plot_obs[field] == True: + axs[plt_counter].plot(np.arange(12)+1, obs_var_wgtd, + label=obs_name[field], color='black', linewidth=2) + axs[plt_counter].set_title(field) + axs[plt_counter].set_ylabel(base_data[field].units) + axs[plt_counter].set_xticks(np.arange(1, 13, 2)) + axs[plt_counter].legend() + + + plt_counter = plt_counter+1 + + fig.subplots_adjust(hspace=0.3, wspace=0.3) + + # Save out figure + plot_loc = Path(plot_locations[0]) / f'{region_list[iReg]}_plot_RegionalClimo_Mean.{plot_type}' + + # Check redo_plot. If set to True: remove old plots, if they already exist: + if (not redo_plot) and plot_loc.is_file(): + #Add already-existing plot to website (if enabled): + adfobj.debug_log(f"'{plot_loc}' exists and clobber is false.") + adfobj.add_website_data(plot_loc, region_list[iReg], None, season=None, multi_case=True, + category=region_category, non_season=True, plot_type = "RegionalClimo") + + #Continue to next iteration: + return + elif (redo_plot): + if plot_loc.is_file(): + plot_loc.unlink() + + fig.savefig(plot_loc, bbox_inches='tight', facecolor='white') + plt.close() + + #Add plot to website (if enabled): + adfobj.add_website_data(plot_loc, region_list[iReg], None, season=None, multi_case=True, + non_season=True, category=region_category, plot_type = "RegionalClimo") + + return + +print("\n --- Regional climatology plots generated successfully! ---") + +def getRegion_uxarray(gridDS, varDS, varName, area, landfrac, BOX_W, BOX_E, BOX_S, BOX_N): + # Method 2: Filter mesh nodes based on coordinates + node_lons = gridDS.face_lon + node_lats = gridDS.face_lat + + # Create a boolean mask for nodes within your domain + in_domain = ((node_lons >= BOX_W) & (node_lons <= BOX_E) & + (node_lats >= BOX_S) & (node_lats <= BOX_N)) + + # Get the indices of nodes within your domain + node_indices = np.where(in_domain)[0] + + # Subset the dataset using these node indices + domain_subset = varDS[varName].isel(n_face=node_indices) + area_subset = area.isel(n_face=node_indices) + landfrac_subset = landfrac.isel(n_face=node_indices) + wgt_subset = area_subset * landfrac_subset / (area_subset* landfrac_subset).sum() + + return domain_subset,wgt_subset + +def getRegion_xarray(varDS, varName, + BOX_W, BOX_E, BOX_S, BOX_N, + area=None, landfrac=None, + obs_var_name=None): + # Assumes regular lat/lon grid in xarray Dataset + # Assumes varDS has 'lon' and 'lat' coordinates w/ lon in [0,360] + # Convert BOX_W and BOX_E to [0,360] if necessary + # Also assumes global weights have already been calculated & masked appropriately + if (BOX_W == -180) & (BOX_E == 180): + BOX_W, BOX_E = 0, 360 # Special case for global domain + if BOX_W < 0: BOX_W = BOX_W + 360 + if BOX_E < 0: BOX_E = BOX_E + 360 + + if varName not in varDS: + varName = obs_var_name + + # TODO is there a less brittle way to do this? + if (area is not None) and (landfrac is not None): + weight = area * landfrac + elif ('weight' in varDS) and ('datamask' in varDS): + weight = varDS['weight'] * varDS['datamask'] + elif ('weight' in varDS) and ('LANDFRAC' in varDS): + #used for MODIS albedo product + weight = varDS['weight'] * varDS['LANDFRAC'] + elif 'area' in varDS and 'landfrac' in varDS: + weight = varDS['area'] * varDS['landfrac'] + elif 'area' in varDS and 'landmask' in varDS: + weight = varDS['area'] * varDS['landmask'] + # Fluxnet data also has a datamask + if 'datamask' in varDS: + weight = weight * varDS['datamask'] + else: + raise ValueError("No valid weight, area, or landmask found in {varName} dataset.") + + # check we have a data array for the variable + if isinstance(varDS, xr.Dataset): + varDS = varDS[varName] + + # Subset the dataarray using the specified box + if BOX_W < BOX_E: + domain_subset = varDS.sel(lat=slice(BOX_S, BOX_N), + lon=slice(BOX_W, BOX_E)) + weight_subset = weight.sel(lat=slice(BOX_S, BOX_N), + lon=slice(BOX_W, BOX_E)) + + else: + # Use boolean indexing to select the region + # The parentheses are important due to operator precedence + west_of_0 = varDS.lon >= BOX_W + east_of_0 = varDS.lon <= BOX_E + domain_subset = varDS.sel(lat=slice(BOX_S, BOX_N), + lon=(west_of_0 | east_of_0)) + weight_subset = weight.sel(lat=slice(BOX_S, BOX_N), + lon=(west_of_0 | east_of_0)) + + wgt_subset = weight_subset / weight_subset.sum() + weight_subset = weight.sel + return domain_subset,wgt_subset + +def get_region_boundaries(regions, region_name): + """Get the boundaries of a specific region.""" + if region_name not in regions: + raise ValueError(f"Region '{region_name}' not found in regions dictionary") + + region = regions[region_name] + south, north = region['lat_bounds'] + west, east = region['lon_bounds'] + region_category = region['region_category'] if 'region_category' in region else None + + return west, east, south, north, region_category From 57022a309e49a9df035ebd3a3601141e1ce5606f Mon Sep 17 00:00:00 2001 From: wwieder Date: Thu, 13 Nov 2025 15:52:48 -0700 Subject: [PATCH 3/7] my last adf_dataset commit --- lib/adf_dataset.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/lib/adf_dataset.py b/lib/adf_dataset.py index ba1bacf50..bba19ee21 100644 --- a/lib/adf_dataset.py +++ b/lib/adf_dataset.py @@ -118,7 +118,7 @@ def get_ref_timeseries_file(self, field): return ts_files - def load_timeseries_dataset(self, fils): + def load_timeseries_dataset(self, fils, **kwargs): """Return DataSet from time series file(s) and assign time to midpoint of interval""" if (len(fils) == 0): warnings.warn("\t WARNING: Input file list is empty.") @@ -146,7 +146,7 @@ def load_timeseries_dataset(self, fils): warnings.warn("\t INFO: Timeseries file does not have time bounds info.") return xr.decode_cf(ds) - def load_timeseries_da(self, case, variablename): + def load_timeseries_da(self, case, variablename, **kwargs): """Return DataArray from time series file(s). Uses defaults file to convert units. """ @@ -155,9 +155,9 @@ def load_timeseries_da(self, case, variablename): if not fils: warnings.warn(f"\t WARNING: Did not find case time series file(s), variable: {variablename}") return None - return self.load_da(fils, variablename, add_offset=add_offset, scale_factor=scale_factor) + return self.load_da(fils, variablename, add_offset=add_offset, scale_factor=scale_factor,**kwargs) - def load_reference_timeseries_da(self, field): + def load_reference_timeseries_da(self, field, **kwargs): """Return a DataArray time series to be used as reference (aka baseline) for variable field. """ @@ -165,7 +165,7 @@ def load_reference_timeseries_da(self, field): if not fils: warnings.warn(f"\t WARNING: Did not find reference time series file(s), variable: {field}") return None - #Change the variable name from CAM standard to what is + # Change the variable name from CAM standard to what is # listed in variable defaults for this observation field if self.adf.compare_obs: field = self.ref_var_nam[field] @@ -174,7 +174,7 @@ def load_reference_timeseries_da(self, field): else: add_offset, scale_factor = self.get_value_converters(self.ref_case_label, field) - return self.load_da(fils, field, add_offset=add_offset, scale_factor=scale_factor) + return self.load_da(fils, field, add_offset=add_offset, scale_factor=scale_factor, **kwargs) #------------------ @@ -305,7 +305,7 @@ def load_reference_regrid_da(self, case, field, **kwargs): #------------------ - + # DataSet and DataArray load #--------------------------- # TODO, make uxarray options fo all of these fuctions. From 71d1b7cd6cc8fe80cd020f173dfcd4e3856e03a5 Mon Sep 17 00:00:00 2001 From: wwieder Date: Fri, 14 Nov 2025 08:23:03 -0700 Subject: [PATCH 4/7] working regional ts plots --- scripts/plotting/regional_timeseries.py | 123 ++++++++++++------------ 1 file changed, 62 insertions(+), 61 deletions(-) diff --git a/scripts/plotting/regional_timeseries.py b/scripts/plotting/regional_timeseries.py index e6ee3f9c4..d57b27341 100644 --- a/scripts/plotting/regional_timeseries.py +++ b/scripts/plotting/regional_timeseries.py @@ -1,10 +1,16 @@ +""" +Use time series files to produce regional mean time series plots for LDF web site. + +""" from pathlib import Path +from types import NoneType import numpy as np import yaml import xarray as xr import uxarray as ux import matplotlib.pyplot as plt import cartopy.crs as ccrs +import plotting_functions as pf import warnings # use to warn user about missing files. def my_formatwarning(msg, *args, **kwargs): @@ -16,18 +22,15 @@ def my_formatwarning(msg, *args, **kwargs): warnings.formatwarning = my_formatwarning -def regional_climatology(adfobj): +def regional_timeseries(adfobj): """ - load climo file, subset for each region and each var + load timeseries file, subset for each region and each var + calculate regional annual mean time series Make a combined plot, save it, add it to website. - NOTES (from Meg): There are still a lot of to-do's with this script! - - convert region defintion netCDF file to a yml, read that in instead - - increase number of variables that have a climo plotted; i've just - added two, but left room for more in the subplots - - check that all varaibles have climo files; likely to break otherwise - - add option so that this works with a structured grid too # ...existing code... + TODO + - check that all varaibles have TS files; likely to break otherwise if found: #Check if observations dataset name is specified: if "obs_name" in default_var_dict: @@ -44,14 +47,12 @@ def regional_climatology(adfobj): obs_data[field] = xr.open_mfdataset([str(obs_file_path)], combine="by_coords") plot_obs[field] = True # ...existing code... - - make sure that climo's are being plotted with the preferred units - - add in observations (need to regrid/area weight) - - need to figure out how to display the figures on the website + - make sure that TS's are being plotted with the preferred units """ #Notify user that script has started: - print("\n --- Generating regional climatology plots... ---") + print("\n --- Generating regional time series plots... ---") # Gather ADF configurations # plot_loc = adfobj.get_basic_info('cam_diag_plot_loc') @@ -74,19 +75,19 @@ def regional_climatology(adfobj): base_nickname = adfobj.get_baseline_info('case_nickname') region_list = adfobj.region_list - #TODO, make it easier for users decide on these? - regional_climo_var_list = ['TSA','PREC','ELAI', - 'FSDS','FLDS','SNOWDP','ASA', - 'FSH','QRUNOFF_TO_COUPLER','ET','FCTR', - 'GPP','TWS','FCEV','FAREA_BURNED', - ] + #TODO, make it easier for users decide on these by adding to yaml file + regional_ts_var_list = ['TSA','PREC','ELAI', + 'FSDS','FLDS','SNOWDP','ASA', + 'FSH','QRUNOFF_TO_COUPLER','ET','FCTR', + 'GPP','TWS','FCEV','FAREA_BURNED', + ] # Extract variables: baseline_name = adfobj.get_baseline_info("cam_case_name", required=True) - input_climo_baseline = Path(adfobj.get_baseline_info("cam_climo_loc", required=True)) + input_ts_baseline = Path(adfobj.get_baseline_info("cam_ts_loc", required=True)) # TODO hard wired for single case name: case_name = adfobj.get_cam_info("cam_case_name", required=True)[0] - input_climo_case = Path(adfobj.get_cam_info("cam_climo_loc", required=True)[0]) + input_ts_case = Path(adfobj.get_cam_info("cam_ts_loc", required=True)[0]) # Get grid file mesh_file = adfobj.mesh_files["baseline_mesh_file"] @@ -134,34 +135,26 @@ def regional_climatology(adfobj): var_obs_dict = adfobj.var_obs_dict # First, load all variable data once (instead of inside nested loops) - for field in regional_climo_var_list: - # Load the global climatology for this variable + for field in regional_ts_var_list: + # Load the global time series for this variable # TODO unit conversions are not handled consistently here - base_data[field] = adfobj.data.load_reference_climo_da(baseline_name, field, **kwargs) - case_data[field] = adfobj.data.load_climo_da(case_name, field, **kwargs) + base_data[field] = adfobj.data.load_reference_timeseries_da(field, **kwargs) + case_data[field] = adfobj.data.load_timeseries_da(case_name, field, **kwargs) if type(base_data[field]) is type(None): print('Missing file for ', field) continue else: - # get area and landfrac for base and case climo datasets - mdataset = adfobj.data.load_climo_dataset(case_name, field, **kwargs) - area_c = mdataset.area.isel(time=0) # drop time dimension to avoid confusion - landfrac_c = mdataset.landfrac.isel(time=0) - # Redundant, but we'll do this for consistency: - # TODO, won't handle loadling the basecase this way - #area_b = adfobj.data.load_reference_climo_da(baseline_name, 'area', **kwargs) - #landfrac_b = adfobj.data.load_reference_climo_da(baseline_name, 'landfrac', **kwargs) - - mdataset_base = adfobj.data.load_reference_climo_dataset(baseline_name, field, **kwargs) - area_b = mdataset_base.area.isel(time=0) - landfrac_b = mdataset_base.landfrac.isel(time=0) + # get area and landfrac for base and case ts datasets + mdataset = adfobj.data.load_timeseries_dataset(case_name, field, **kwargs) + # TODO: check with for structured grids + area_c = mdataset.area #.isel(time=0) # drop time dimension to avoid confusion + landfrac_c = mdataset.landfrac #.isel(time=0) + + mdataset_base = adfobj.data.load_reference_timeseries_dataset(field, **kwargs) + area_b = mdataset_base.area#.isel(time=0) + landfrac_b = mdataset_base.landfrac#.isel(time=0) - # calculate weights - # WW: 1) should actual weight calculation be done after subsetting to region? - # 2) Does this work as intended for different resolutions? - # wgt = area * landfrac # / (area * landfrac).sum() - #----------------------------------------- # Now, check if observations are to be plotted for this variable plot_obs[field] = False @@ -239,7 +232,7 @@ def regional_climatology(adfobj): #----------------------------------------- # Loop over regions for selected variable for iReg in range(len(region_list)): - print(f"\n\t - Plotting regional climatology for: {region_list[iReg]}") + print(f"\n\t - Plotting regional timeseries for: {region_list[iReg]}") # regionDS_thisRg = regionDS.isel(region=region_indexList[iReg]) box_west, box_east, box_south, box_north, region_category = get_region_boundaries(regions, region_list[iReg]) ## Set up figure @@ -248,8 +241,8 @@ def regional_climatology(adfobj): axs = axs.ravel() plt_counter = 1 - for field in regional_climo_var_list: - mdataset = adfobj.data.load_climo_dataset(case_name, field, **kwargs) + for field in regional_ts_var_list: + mdataset = adfobj.data.load_timeseries_dataset(case_name, field, **kwargs) if type(base_data[field]) is type(None): continue @@ -259,12 +252,16 @@ def regional_climatology(adfobj): base_var,wgt_sub = getRegion_uxarray(uxgrid, base_data, field, area_b, landfrac_b, box_west, box_east, box_south, box_north) - base_var_wgtd = np.sum(base_var * wgt_sub, axis=-1) # WW not needed?/ np.sum(wgt_sub) - + base_var_wgtd = np.sum(base_var * wgt_sub, axis=-1) + case_var,wgt_sub = getRegion_uxarray(uxgrid, case_data, field, area_c, landfrac_c, box_west, box_east, box_south, box_north) - case_var_wgtd = np.sum(case_var * wgt_sub, axis=-1) #/ np.sum(wgt_sub) + case_var_wgtd = np.sum(case_var * wgt_sub, axis=-1) + + # annually averaged + base_var_ann = pf.annual_mean(base_var_wgtd, whole_years=True, time_name="time") + case_var_ann = pf.annual_mean(case_var_wgtd, whole_years=True, time_name="time") else: # regular lat/lon grid # xarray output is time*lat*lon, sum over lat/lon @@ -280,6 +277,10 @@ def regional_climatology(adfobj): area_c, landfrac_c) case_var_wgtd = np.sum(case_var * wgt_sub, axis=(1,2)) + # annually averaged + base_var_ann = pf.annual_mean(base_var_wgtd, whole_years=True, time_name="time") + case_var_ann = pf.annual_mean(case_var_wgtd, whole_years=True, time_name="time") + # Read in observations, if available if plot_obs[field] == True: # obs output is time*lat*lon, sum over lat/lon @@ -353,37 +354,37 @@ def regional_climatology(adfobj): map_ax.set_extent([-180, 179, -89, -60],crs=ccrs.PlateCarree()) # End if for plotting map extent - ## Plot the climatology: + ## Plot the timeseries: if type(base_data[field]) is type(None): print('Missing file for ', field) continue else: - axs[plt_counter].plot(np.arange(12)+1, case_var_wgtd, - label=case_nickname, linewidth=2) - axs[plt_counter].plot(np.arange(12)+1, base_var_wgtd, + # WW set time axis to years + axs[plt_counter].plot(base_var_ann.year, base_var_ann, label=base_nickname, linewidth=2) - if plot_obs[field] == True: - axs[plt_counter].plot(np.arange(12)+1, obs_var_wgtd, - label=obs_name[field], color='black', linewidth=2) + axs[plt_counter].plot(case_var_ann.year, case_var_ann, + label=case_nickname, linewidth=2) + # TODO, reinstate obs plotting once obs TS files are available + # if plot_obs[field] == True: + # axs[plt_counter].plot(np.arange(12)+1, obs_var_wgtd, + # label=obs_name[field], color='black', linewidth=2) axs[plt_counter].set_title(field) axs[plt_counter].set_ylabel(base_data[field].units) - axs[plt_counter].set_xticks(np.arange(1, 13, 2)) - axs[plt_counter].legend() - + axs[plt_counter].legend() plt_counter = plt_counter+1 fig.subplots_adjust(hspace=0.3, wspace=0.3) # Save out figure - plot_loc = Path(plot_locations[0]) / f'{region_list[iReg]}_plot_RegionalClimo_Mean.{plot_type}' + plot_loc = Path(plot_locations[0]) / f'{region_list[iReg]}_plot_RegionalTimeSeries_Mean.{plot_type}' # Check redo_plot. If set to True: remove old plots, if they already exist: if (not redo_plot) and plot_loc.is_file(): #Add already-existing plot to website (if enabled): adfobj.debug_log(f"'{plot_loc}' exists and clobber is false.") adfobj.add_website_data(plot_loc, region_list[iReg], None, season=None, multi_case=True, - category=region_category, non_season=True, plot_type = "RegionalClimo") + category=region_category, non_season=True, plot_type = "RegionalTimeSeries") #Continue to next iteration: return @@ -396,11 +397,11 @@ def regional_climatology(adfobj): #Add plot to website (if enabled): adfobj.add_website_data(plot_loc, region_list[iReg], None, season=None, multi_case=True, - non_season=True, category=region_category, plot_type = "RegionalClimo") + non_season=True, category=region_category, plot_type = "RegionalTimeSeries") return -print("\n --- Regional climatology plots generated successfully! ---") +print("\n --- Regional time series plots generated successfully! ---") def getRegion_uxarray(gridDS, varDS, varName, area, landfrac, BOX_W, BOX_E, BOX_S, BOX_N): # Method 2: Filter mesh nodes based on coordinates From a7f78928503ba3c800637ce671aaf78be5ac6e9e Mon Sep 17 00:00:00 2001 From: wwieder Date: Fri, 14 Nov 2025 08:23:58 -0700 Subject: [PATCH 5/7] improved load functions from ADF --- lib/adf_dataset.py | 238 +++++++++++++++++++++++---------------------- 1 file changed, 123 insertions(+), 115 deletions(-) diff --git a/lib/adf_dataset.py b/lib/adf_dataset.py index bba19ee21..18544b7a4 100644 --- a/lib/adf_dataset.py +++ b/lib/adf_dataset.py @@ -1,13 +1,9 @@ from pathlib import Path import xarray as xr import uxarray as ux - +#import adf_utils as utils import warnings # use to warn user about missing files - -def my_formatwarning(msg, *args, **kwargs): - # ignore everything except the message - return str(msg) + '\n' -warnings.formatwarning = my_formatwarning +#warnings.formatwarning = utils.my_formatwarning # "reference data" # It is often just a "baseline case", @@ -48,8 +44,7 @@ class AdfData: def __init__(self, adfobj): self.adf = adfobj # provides quick access to the AdfDiag object # paths - #self.model_rgrid_loc = adfobj.get_basic_info("cam_climo_regrid_loc", required=True) - #self.model_rgrid_loc = adfobj.get_cam_info("cam_climo_regrid_loc") + self.model_rgrid_loc = adfobj.get_basic_info("cam_regrid_loc", required=True) # variables (and info for unit transform) # use self.adf.diag_var_list and self.adf.self.adf.variable_defaults @@ -66,11 +61,13 @@ def __init__(self, adfobj): def set_reference(self): """Set attributes for reference (aka baseline) data location, names, and variables.""" if self.adf.compare_obs: - self.ref_var_loc = {v: self.adf.var_obs_dict[v]['obs_file'] for v in self.adf.var_obs_dict} - self.ref_labels = {v: self.adf.var_obs_dict[v]['obs_name'] for v in self.adf.var_obs_dict} - self.ref_var_nam = {v: self.adf.var_obs_dict[v]['obs_var'] for v in self.adf.var_obs_dict} - self.ref_case_label = "Obs" - if not self.adf.var_obs_dict: + if "var_obs_dict" in dir(self.adf): + self.ref_var_loc = {v: self.adf.var_obs_dict[v]['obs_file'] for v in self.adf.var_obs_dict} + self.ref_labels = {v: self.adf.var_obs_dict[v]['obs_name'] for v in self.adf.var_obs_dict} + self.ref_var_nam = {v: self.adf.var_obs_dict[v]['obs_var'] for v in self.adf.var_obs_dict} + self.ref_case_label = "Obs" + else: + #if not self.adf.var_obs_dict: warnings.warn("\t WARNING: reference is observations, but no observations found to plot against.") else: self.ref_var_loc = {} @@ -97,13 +94,30 @@ def set_ref_var_loc(self): # Test case(s) def get_timeseries_file(self, case, field): """Return list of test time series files""" + ts_locs = self.adf.get_cam_info("cam_ts_loc", required=True) # list of paths (could be multiple cases) caseindex = (self.case_names).index(case) - ts_locs = self.adf.get_cam_info("cam_ts_loc") - ts_loc = Path(ts_locs[caseindex]) ts_filenames = f'{case}.*.{field}.*nc' ts_files = sorted(ts_loc.glob(ts_filenames)) return ts_files + + def load_timeseries_dataset(self, case, field, **kwargs): + """Return a data set for variable field.""" + fils = self.get_timeseries_file(case, field) + if not fils: + warnings.warn(f"\t WARNING: Did not find time series file(s) for case: {case}, variable: {field}") + return None + return self.load_dataset(fils, type="tseries", **kwargs) + + def load_timeseries_da(self, case, variablename, **kwargs): + """Return DataArray from time series file(s). + Uses defaults file to convert units. + """ + fils = self.get_timeseries_file(case, variablename) + if not fils: + warnings.warn(f"\t WARNING: Did not find case time series file(s), variable: {variablename}") + return None + return self.load_da(fils, variablename, case, **kwargs) # Reference case (baseline/obs) def get_ref_timeseries_file(self, field): @@ -112,69 +126,30 @@ def get_ref_timeseries_file(self, field): warnings.warn("\t WARNING: ADF does not currently expect observational time series files.") return None else: - ts_loc = Path(self.adf.get_baseline_info("cam_ts_loc")) + ts_loc = Path(self.adf.get_baseline_info("cam_ts_loc", required=True)) ts_filenames = f'{self.ref_case_label}.*.{field}.*nc' ts_files = sorted(ts_loc.glob(ts_filenames)) return ts_files - - - def load_timeseries_dataset(self, fils, **kwargs): - """Return DataSet from time series file(s) and assign time to midpoint of interval""" - if (len(fils) == 0): - warnings.warn("\t WARNING: Input file list is empty.") - return None - elif (len(fils) > 1): - ds = xr.open_mfdataset(fils, decode_times=False) - else: - sfil = str(fils[0]) - if not Path(sfil).is_file(): - warnings.warn(f"\t WARNING: Expecting to find file: {sfil}") - return None - ds = xr.open_dataset(sfil, decode_times=False) - if ds is None: - warnings.warn(f"\t WARNING: invalid data on load_dataset") - # assign time to midpoint of interval (even if it is already) - if 'time_bnds' in ds: - t = ds['time_bnds'].mean(dim='nbnd') - t.attrs = ds['time'].attrs - ds = ds.assign_coords({'time':t}) - elif 'time_bounds' in ds: - t = ds['time_bounds'].mean(dim='nbnd') - t.attrs = ds['time'].attrs - ds = ds.assign_coords({'time':t}) - else: - warnings.warn("\t INFO: Timeseries file does not have time bounds info.") - return xr.decode_cf(ds) - - def load_timeseries_da(self, case, variablename, **kwargs): - """Return DataArray from time series file(s). - Uses defaults file to convert units. - """ - add_offset, scale_factor = self.get_value_converters(case, variablename) - fils = self.get_timeseries_file(case, variablename) + + def load_reference_timeseries_dataset(self, field, **kwargs): + """Return a data set for variable field.""" + case = self.ref_case_label + fils = self.get_ref_timeseries_file(field) if not fils: - warnings.warn(f"\t WARNING: Did not find case time series file(s), variable: {variablename}") + warnings.warn(f"\t WARNING: Did not find time series file(s) for case: {case}, variable: {field}") return None - return self.load_da(fils, variablename, add_offset=add_offset, scale_factor=scale_factor,**kwargs) + return self.load_dataset(fils, type="tseries", **kwargs) def load_reference_timeseries_da(self, field, **kwargs): """Return a DataArray time series to be used as reference (aka baseline) for variable field. """ + case = self.ref_case_label fils = self.get_ref_timeseries_file(field) if not fils: warnings.warn(f"\t WARNING: Did not find reference time series file(s), variable: {field}") return None - # Change the variable name from CAM standard to what is - # listed in variable defaults for this observation field - if self.adf.compare_obs: - field = self.ref_var_nam[field] - add_offset = 0 - scale_factor = 1 - else: - add_offset, scale_factor = self.get_value_converters(self.ref_case_label, field) - - return self.load_da(fils, field, add_offset=add_offset, scale_factor=scale_factor, **kwargs) + return self.load_da(fils, field, case, **kwargs) #------------------ @@ -184,58 +159,66 @@ def load_reference_timeseries_da(self, field, **kwargs): #------------------ # Test case(s) - def load_climo_da(self, case, variablename, **kwargs): - """Return DataArray from climo file""" - add_offset, scale_factor = self.get_value_converters(case, variablename) - fils = self.get_climo_file(case, variablename) - return self.load_da(fils, variablename, add_offset=add_offset, scale_factor=scale_factor, **kwargs) - - - def load_climo_dataset(self, case, field, **kwargs): - """Return a data set to be used as reference (aka baseline) for variable field.""" - fils = self.get_climo_file(case, field) - if not fils: - return None - return self.load_dataset(fils, **kwargs) - - def get_climo_file(self, case, variablename): """Retrieve the climo file path(s) for variablename for a specific case.""" - caseindex = (self.case_names).index(case) # the entry for specified case a = self.adf.get_cam_info("cam_climo_loc", required=True) # list of paths (could be multiple cases) + caseindex = (self.case_names).index(case) # the entry for specified case model_cl_loc = Path(a[caseindex]) return sorted(model_cl_loc.glob(f"{case}_{variablename}_climo.nc")) - - - # Reference case (baseline/obs) - def load_reference_climo_da(self, case, variablename, **kwargs): - """Return DataArray from reference (aka baseline) climo file""" - add_offset, scale_factor = self.get_value_converters(case, variablename) - fils = self.get_reference_climo_file(variablename) - return self.load_da(fils, variablename, add_offset=add_offset, scale_factor=scale_factor, **kwargs) - - def load_reference_climo_dataset(self, case, field, **kwargs): - """Return a data set to be used as reference (aka baseline) for variable field.""" - fils = self.get_reference_climo_file(field) + + def load_climo_dataset(self, case, variablename, **kwargs): + """Return Dataset for climo of field""" + fils = self.get_climo_file(case, variablename) if not fils: + warnings.warn(f"\t WARNING: Did not find climo file(s) for case: {case}, variable: {variablename}") return None return self.load_dataset(fils, **kwargs) + + def load_climo_da(self, case, variablename, **kwargs): + """Return DataArray from climo file""" + fils = self.get_climo_file(case, variablename) + if not fils: + warnings.warn(f"\t WARNING: Did not find climo file(s) for case: {case}, variable: {variablename}") + return None + return self.load_da(fils, variablename, case, **kwargs) + # Reference case (baseline/obs) def get_reference_climo_file(self, var): """Return a list of files to be used as reference (aka baseline) for variable var.""" if self.adf.compare_obs: fils = self.ref_var_loc.get(var, None) return [fils] if fils is not None else None ref_loc = self.adf.get_baseline_info("cam_climo_loc") + if not ref_loc: + return None # NOTE: originally had this looking for *_baseline.nc fils = sorted(Path(ref_loc).glob(f"{self.ref_case_label}_{var}_climo.nc")) if fils: return fils return None + + def load_reference_climo_dataset(self, field, **kwargs): + """Return Dataset for climo of field""" + case = self.ref_case_label + fils = self.get_reference_climo_file(field) + if not fils: + warnings.warn(f"\t WARNING: Did not find climo file(s) for case: {case}, variable: {field}") + return None + return self.load_dataset(fils, **kwargs) + + def load_reference_climo_da(self, field, **kwargs): + """Return DataArray from reference (aka baseline) climo file""" + case = self.ref_case_label + fils = self.get_reference_climo_file(field) + if not fils: + warnings.warn(f"\t WARNING: Did not find climo file(s) for case: {case}, variable: {field}") + return None + return self.load_da(fils, field, case, **kwargs) #------------------ + # Regridded files #------------------ @@ -258,17 +241,17 @@ def load_regrid_dataset(self, case, field, **kwargs): def load_regrid_da(self, case, field, **kwargs): """Return a data array to be used as reference (aka baseline) for variable field.""" - add_offset, scale_factor = self.get_value_converters(case, field) fils = self.get_regrid_file(case, field) if not fils: warnings.warn(f"\t WARNING: Did not find regrid file(s) for case: {case}, variable: {field}") return None - return self.load_da(fils, field, add_offset=add_offset, scale_factor=scale_factor, **kwargs) + return self.load_da(fils, field, case, **kwargs) # Reference case (baseline/obs) - def get_ref_regrid_file(self, case, field): + def get_ref_regrid_file(self, field): """Return list of reference regridded files""" + case = self.ref_case_label if self.adf.compare_obs: obs_loc = self.ref_var_loc.get(field, None) if obs_loc: @@ -281,43 +264,41 @@ def get_ref_regrid_file(self, case, field): return fils - def load_reference_regrid_dataset(self, case, field, **kwargs): + def load_reference_regrid_dataset(self, field, **kwargs): """Return a data set to be used as reference (aka baseline) for variable field.""" - fils = self.get_ref_regrid_file(case, field) + case = self.ref_case_label + fils = self.get_ref_regrid_file(field) if not fils: - warnings.warn(f"\t DATASET WARNING: Did not find regridded file(s) for case: {case}, variable: {field}") + warnings.warn(f"\t WARNING: Did not find regridded file(s) for case: {case}, variable: {field}") return None return self.load_dataset(fils, **kwargs) - def load_reference_regrid_da(self, case, field, **kwargs): + def load_reference_regrid_da(self, field, **kwargs): """Return a data array to be used as reference (aka baseline) for variable field.""" - add_offset, scale_factor = self.get_value_converters(case, field) - fils = self.get_ref_regrid_file(case, field) + case = self.ref_case_label + fils = self.get_ref_regrid_file(field) if not fils: - warnings.warn(f"\t DATAARRAY WARNING: Did not find regridded file(s) for case: {case}, variable: {field}") + warnings.warn(f"\t WARNING: Did not find regridded file(s) for case: {case}, variable: {field}") return None #Change the variable name from CAM standard to what is # listed in variable defaults for this observation field if self.adf.compare_obs: field = self.ref_var_nam[field] - return self.load_da(fils, field, add_offset=add_offset, scale_factor=scale_factor, **kwargs) + return self.load_da(fils, field, case, **kwargs) #------------------ - + # DataSet and DataArray load #--------------------------- - # TODO, make uxarray options fo all of these fuctions. - # What's the most robust way to handle this? # Load DataSet - def load_dataset(self, fils, **kwargs): + def load_dataset(self, fils, type=None, **kwargs): """Return xarray DataSet from file(s)""" - unstructured_plotting = kwargs.get("unstructured_plotting",False) - if not fils: + if (len(fils) == 0): warnings.warn("\t WARNING: Input file list is empty.") return None elif (len(fils) > 1): @@ -327,6 +308,7 @@ def load_dataset(self, fils, **kwargs): if not Path(sfil).is_file(): warnings.warn(f"\t WARNING: Expecting to find file: {sfil}") return None + # Open unstructured data if requested if unstructured_plotting: if "mesh_file" not in kwargs: msg = "\t WARNING: Unstructured plotting is requested, but no available mesh file." @@ -339,24 +321,50 @@ def load_dataset(self, fils, **kwargs): ds = xr.open_dataset(sfil) if ds is None: warnings.warn(f"\t WARNING: invalid data on load_dataset") + if type == "tseries": + # assign time to midpoint of interval (even if it is already) + if 'time_bnds' in ds: + t = ds['time_bnds'].mean(dim='nbnd') + t.attrs = ds['time'].attrs + ds = ds.assign_coords({'time':t}) + # TODO check this for old land model output + elif 'hist_interval' in ds['time_bounds'].dims: + t = ds['time_bounds'].mean(dim='hist_interval')#.value + t.attrs = ds['time'].attrs + ds = ds.assign_coords({'time':t}) + elif 'time_bounds' in ds: + t = ds['time_bounds'].mean(dim='nbnd') + t.attrs = ds['time'].attrs + ds = ds.assign_coords({'time':t}) + + + else: + warnings.warn("\t INFO: dataset does not have time bounds info.") return ds # Load DataArray - def load_da(self, fils, variablename, **kwargs): + def load_da(self, fils, variablename, case, type=None, **kwargs): """Return xarray DataArray from files(s) w/ optional scale factor, offset, and/or new units""" ds = self.load_dataset(fils, **kwargs) if ds is None: warnings.warn(f"\t WARNING: Load failed for {variablename}") return None da = (ds[variablename]).squeeze() - scale_factor = kwargs.get('scale_factor', 1) - add_offset = kwargs.get('add_offset', 0) + units = da.attrs.get('units', '--') + add_offset, scale_factor = self.get_value_converters(case, variablename) da = da * scale_factor + add_offset + da.attrs['scale_factor'] = scale_factor + da.attrs['add_offset'] = add_offset + da = self.update_unit(variablename, da, units) + da.attrs['original_unit'] = units + return da + + def update_unit(self, variablename, da, units): if variablename in self.adf.variable_defaults: vres = self.adf.variable_defaults[variablename] - da.attrs['units'] = vres.get("new_unit", da.attrs.get('units', 'none')) + da.attrs['units'] = vres.get("new_unit", units) else: - da.attrs['units'] = 'none' + da.attrs['units'] = '--' return da # Get variable conversion defaults, if applicable @@ -389,4 +397,4 @@ def get_value_converters(self, case, variablename): - + \ No newline at end of file From 7fb295ec7c455537a6a6469dda92149bdadfa34b Mon Sep 17 00:00:00 2001 From: wwieder Date: Fri, 14 Nov 2025 08:56:03 -0700 Subject: [PATCH 6/7] updated load_reference_ds calls --- config_clm_structured_plots.yaml | 44 ++++++++++++-------- scripts/plotting/global_latlon_map.py | 9 ++-- scripts/plotting/polar_map.py | 6 +-- scripts/plotting/regional_climatology.py | 4 +- scripts/regridding/regrid_and_vert_interp.py | 4 +- 5 files changed, 38 insertions(+), 29 deletions(-) diff --git a/config_clm_structured_plots.yaml b/config_clm_structured_plots.yaml index 4e9a577d4..12508b64d 100644 --- a/config_clm_structured_plots.yaml +++ b/config_clm_structured_plots.yaml @@ -99,6 +99,9 @@ diag_basic_info: #Uncomment and change path for custom variable defaults file defaults_file: lib/ldf_variable_defaults.yaml + # location of land regions YAML file (only used in regional_climatology plots) + regions_file: lib/regions_lnd.yaml + #Longitude line on which to center all lat/lon maps. #If this config option is missing then the central #longitude will default to 180 degrees E. @@ -137,13 +140,13 @@ diag_cam_climo: cam_overwrite_climo: false #Name of CAM case (or CAM run name): - cam_case_name: ctsm53065_54surfdata_PPEcal115_115_HIST + cam_case_name: ctsm5.4.CMIP7_ciso_ctsm5.3.075_f09_124_HIST #Case nickname #NOTE: if nickname starts with '0' - nickname must be in quotes! # ie '026a' as opposed to 026a #If missing or left blank, will default to cam_case_name - case_nickname: 'PPE_115_HIST' + case_nickname: 'ctsm5.4_124_HIST' #Location of CAM history (h0) files: cam_hist_loc: /glade/derecho/scratch/wwieder/archive/${diag_cam_climo.cam_case_name}/lnd/hist/ @@ -190,7 +193,7 @@ diag_cam_baseline_climo: # eg. cam.h0 or ocn.pop.h.ecosys.nday1 or hist_str: [cam.h2,cam.h0] # Only affects timeseries as everything else uses the created timeseries # Default: - hist_str: clm2.h0 + hist_str: clm2.h0a #Calculate cam baseline climatologies? #If false, the climatology files will not be created: @@ -201,13 +204,13 @@ diag_cam_baseline_climo: cam_overwrite_climo: false #Name of CAM baseline case: - cam_case_name: ctsm53041_54surfdata_PPEbaseline_101_HIST + cam_case_name: ctsm5.4_5.3.068_PPEcal115f09_118_HIST #Baseline case nickname #NOTE: if nickname starts with '0' - nickname must be in quotes! # ie '026a' as opposed to 026a #If missing or left blank, will default to cam_case_name - case_nickname: 'PPE_101_HIST' + case_nickname: 'ctsm5.4_118_HIST' #Location of CAM baseline history (h0) files: #Example test files @@ -281,6 +284,7 @@ plotting_scripts: - global_latlon_map - global_mean_timeseries_lnd - polar_map + - regional_timeseries - regional_climatology #List of variables that will be processesd: @@ -313,7 +317,13 @@ diag_var_list: - FAREA_BURNED - DSTFLXT - MEG_isoprene - + - TWS + - GRAINC_TO_FOOD + - C13_GPP_pm + - C13_TOTVEGC_pm + - C14_GPP_pm + - C14_TOTVEGC_pm + region_list: - Global - N Hemisphere Land @@ -321,22 +331,22 @@ region_list: - Polar - Alaskan Arctic - Canadian Arctic - #- Greenland + - Greenland - Russian Arctic #- Antarctica - Alaska - #- Northwest Canada - #- Central Canada + - Northwest Canada + - Central Canada - Eastern Canada - Northern Europe - Western Siberia - Eastern Siberia - Western US - #- Central US - #- Eastern US - #- Europe - #- Mediterranean - #- Central America + - Central US + - Eastern US + - Europe + - Mediterranean + - Central America - Amazonia - Central Africa - Indonesia @@ -344,10 +354,10 @@ region_list: - Sahel - Southern Africa - India - #- Indochina + - Indochina #- Sahara Desert - #- Arabian Peninsula - #- Australia + - Arabian Peninsula + - Australia #- Central Asia ## Was Broken... probably because there were two? #- Mongolia #- Tibetan Plateau diff --git a/scripts/plotting/global_latlon_map.py b/scripts/plotting/global_latlon_map.py index 0bb4b2a9b..60b5b4479 100644 --- a/scripts/plotting/global_latlon_map.py +++ b/scripts/plotting/global_latlon_map.py @@ -195,17 +195,16 @@ def global_latlon_map(adfobj): if unstruct_plotting: mesh_file = adfobj.mesh_files["baseline_mesh_file"] kwargs["mesh_file"] = mesh_file - odata = adfobj.data.load_reference_climo_da(base_name, var, **kwargs) + odata = adfobj.data.load_reference_climo_da(var, **kwargs) unstruct_base = True - odataset = adfobj.data.load_reference_climo_dataset(base_name, var, **kwargs) + odataset = adfobj.data.load_reference_climo_dataset(var, **kwargs) area = odataset.area.isel(time=0) landfrac = odataset.landfrac.isel(time=0) # calculate weights wgt_base = area * landfrac / (area * landfrac).sum() else: - #odata = adfobj.data.load_reference_regrid_da(base_name, var, **kwargs) - odata = adfobj.data.load_reference_regrid_da(base_name, var) + odata = adfobj.data.load_reference_regrid_da(var) if odata is None: dmsg = f"\t WARNING: No regridded baseline file for {base_name} for variable `{var}`, global lat/lon mean plotting skipped." @@ -632,7 +631,7 @@ def aod_latlon(adfobj): base_name = adfobj.data.ref_case_label # Gather reference variable data - ds_base = adfobj.data.load_reference_climo_da(base_name, var) + ds_base = adfobj.data.load_reference_climo_da(var) if ds_base is None: dmsg = f"\t WARNING: No baseline climo file for {base_name} for variable `{var}`, global lat/lon plots skipped." adfobj.debug_log(dmsg) diff --git a/scripts/plotting/polar_map.py b/scripts/plotting/polar_map.py index 8aa1f5b7a..8d87029e4 100644 --- a/scripts/plotting/polar_map.py +++ b/scripts/plotting/polar_map.py @@ -179,17 +179,17 @@ def polar_map(adfobj): if unstruct_plotting: mesh_file = adfobj.mesh_files["baseline_mesh_file"] kwargs["mesh_file"] = mesh_file - odata = adfobj.data.load_reference_climo_da(data_name, data_var, **kwargs) + odata = adfobj.data.load_reference_climo_da(data_var, **kwargs) #if ('ncol' in odata.dims) or ('lndgrid' in odata.dims): if 1==1: unstruct_base = True - odataset = adfobj.data.load_reference_climo_dataset(data_name, data_var, **kwargs) + odataset = adfobj.data.load_reference_climo_dataset(data_var, **kwargs) area = odataset.area.isel(time=0) landfrac = odataset.landfrac.isel(time=0) # calculate weights wgt_base = area * landfrac / (area * landfrac).sum() else: - odata = adfobj.data.load_reference_regrid_da(data_name, data_var, **kwargs) + odata = adfobj.data.load_reference_regrid_da(data_var, **kwargs) if odata is None: print("\t WARNING: Did not find any regridded reference climo files. Will try to skip.") print(f"\t INFO: Data Location, dclimo_loc is {dclimo_loc}") diff --git a/scripts/plotting/regional_climatology.py b/scripts/plotting/regional_climatology.py index e6ee3f9c4..73d4d79d1 100644 --- a/scripts/plotting/regional_climatology.py +++ b/scripts/plotting/regional_climatology.py @@ -137,7 +137,7 @@ def regional_climatology(adfobj): for field in regional_climo_var_list: # Load the global climatology for this variable # TODO unit conversions are not handled consistently here - base_data[field] = adfobj.data.load_reference_climo_da(baseline_name, field, **kwargs) + base_data[field] = adfobj.data.load_reference_climo_da(field, **kwargs) case_data[field] = adfobj.data.load_climo_da(case_name, field, **kwargs) if type(base_data[field]) is type(None): @@ -153,7 +153,7 @@ def regional_climatology(adfobj): #area_b = adfobj.data.load_reference_climo_da(baseline_name, 'area', **kwargs) #landfrac_b = adfobj.data.load_reference_climo_da(baseline_name, 'landfrac', **kwargs) - mdataset_base = adfobj.data.load_reference_climo_dataset(baseline_name, field, **kwargs) + mdataset_base = adfobj.data.load_reference_climo_dataset(field, **kwargs) area_b = mdataset_base.area.isel(time=0) landfrac_b = mdataset_base.landfrac.isel(time=0) diff --git a/scripts/regridding/regrid_and_vert_interp.py b/scripts/regridding/regrid_and_vert_interp.py index 44695d8a2..44b8fb99d 100644 --- a/scripts/regridding/regrid_and_vert_interp.py +++ b/scripts/regridding/regrid_and_vert_interp.py @@ -230,8 +230,8 @@ def regrid_and_vert_interp(adf): #Write to debug log if enabled: #adf.debug_log(f"regrid_example: tclim_fils (n={len(tclim_fils)}): {tclim_fils}") - - tclim_ds = adf.data.load_reference_climo_dataset(target, var) + print(var) + tclim_ds = adf.data.load_reference_climo_dataset(var) if tclim_ds is None: print(f"\t WARNING: regridding {var} failed, no climo file for case '{target}'. Continuing to next variable.") continue From 2fcb8b95d55966557b9a786facc968233f3a6cb3 Mon Sep 17 00:00:00 2001 From: wwieder Date: Fri, 14 Nov 2025 11:42:08 -0700 Subject: [PATCH 7/7] add some LDF titles to websites --- lib/adf_web.py | 6 +++--- lib/website_templates/template.html | 6 +++--- lib/website_templates/template_index.html | 6 +++--- lib/website_templates/template_mean_diag.html | 6 +++--- lib/website_templates/template_mean_tables.html | 8 ++++---- lib/website_templates/template_multi_case_index.html | 6 +++--- lib/website_templates/template_table.html | 8 ++++---- 7 files changed, 23 insertions(+), 23 deletions(-) diff --git a/lib/adf_web.py b/lib/adf_web.py index e39981f8f..fc971de8a 100644 --- a/lib/adf_web.py +++ b/lib/adf_web.py @@ -427,7 +427,7 @@ def jinja_enumerate(arg): #End if #Set main title for website: - main_title = "CAM Diagnostics" + main_title = "CLM Diagnostics" #List of seasons seasons = ["ANN","DJF","MAM","JJA","SON"] @@ -718,7 +718,7 @@ def jinja_enumerate(arg): avail_external_packages = {'MDTF':'mdtf_html_path', 'CVDP':'cvdp_html_path'} #Construct index.html - index_title = "AMP Diagnostics Prototype" + index_title = "CLM Diagnostics" index_tmpl = jinenv.get_template('template_index.html') index_rndr = index_tmpl.render(title=index_title, case_name=web_data.case, @@ -768,7 +768,7 @@ def jinja_enumerate(arg): #End for (model case loop) #Create multi-case site: - main_title = "ADF Diagnostics" + main_title = "CLM Diagnostics" main_tmpl = jinenv.get_template('template_multi_case_index.html') main_rndr = main_tmpl.render(title=main_title, case_sites=case_sites, diff --git a/lib/website_templates/template.html b/lib/website_templates/template.html index 3a06250cb..85dba9860 100644 --- a/lib/website_templates/template.html +++ b/lib/website_templates/template.html @@ -1,7 +1,7 @@ - ADF {{var_title}} + CLM Diagnostics {{var_title}} @@ -22,8 +22,8 @@
  • Links ▾
  • About
  • diff --git a/lib/website_templates/template_index.html b/lib/website_templates/template_index.html index db49975e1..9e411c410 100644 --- a/lib/website_templates/template_index.html +++ b/lib/website_templates/template_index.html @@ -1,7 +1,7 @@ - ADF Diagnostics + CLM Diagnostics @@ -15,8 +15,8 @@
  • Links ▾
  • About
  • diff --git a/lib/website_templates/template_mean_diag.html b/lib/website_templates/template_mean_diag.html index a62250af8..e748d93a7 100644 --- a/lib/website_templates/template_mean_diag.html +++ b/lib/website_templates/template_mean_diag.html @@ -1,7 +1,7 @@ - ADF Mean Plots + LDF Mean Plots @@ -22,8 +22,8 @@
  • Links ▾
  • About
  • diff --git a/lib/website_templates/template_mean_tables.html b/lib/website_templates/template_mean_tables.html index 877d96e26..d6c53dee8 100644 --- a/lib/website_templates/template_mean_tables.html +++ b/lib/website_templates/template_mean_tables.html @@ -1,7 +1,7 @@ - ADF Mean Tables + LDF Mean Tables @@ -22,8 +22,8 @@
  • Links ▾
  • About
  • @@ -38,7 +38,7 @@

    Baseline Case:

    -

    AMWG Tables

    +

    LMWG Tables

    diff --git a/lib/website_templates/template_multi_case_index.html b/lib/website_templates/template_multi_case_index.html index 0ba930dec..4ff607e98 100644 --- a/lib/website_templates/template_multi_case_index.html +++ b/lib/website_templates/template_multi_case_index.html @@ -1,7 +1,7 @@ - ADF diagnostics + CLM Diagnostics @@ -12,8 +12,8 @@
  • Links ▾
  • About
  • diff --git a/lib/website_templates/template_table.html b/lib/website_templates/template_table.html index 82158dd2c..581af913b 100644 --- a/lib/website_templates/template_table.html +++ b/lib/website_templates/template_table.html @@ -1,7 +1,7 @@ - ADF Mean Tables + LDF Mean Tables @@ -22,8 +22,8 @@
  • Links ▾
  • About
  • @@ -38,7 +38,7 @@

    Baseline Case:

    -

    AMWG Tables

    +

    LMWG Tables