diff --git a/CHANGELOG.md b/CHANGELOG.md index 58f1af55a..0a7486d17 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ and uses [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] ### Changed * [743](https://github.com/dbekaert/RAiDER/pull/743) - Switched from HTTPS to DAP4 for retrieving MERRA2 data, and suppressed a warning for using DAP4 for GMAO data where doing so is not possible. +* []() - Changed GMAO and MERRA2 fetch implementations to use xarray, eliminating pydap as a dependency. ### Fixed * [759](https://github.com/dbekaert/RAiDER/pull/759) - Added a `browse` S3 tag for `.png` files when uploaded to AWS. diff --git a/docs/WeatherModels.md b/docs/WeatherModels.md index 8a48feb64..b21e110ec 100644 --- a/docs/WeatherModels.md +++ b/docs/WeatherModels.md @@ -71,7 +71,7 @@ ECMWF requires a license agreement to be able to access, download, and use their ## 4. NASA weather models (GMAO, MERRA2) -1. The Global Modeling and Assimilation Office (__[GMAO](https://www.nccs.nasa.gov/services/data-collections/coupled-products/geos5-forecast#:~:text=The%20Global%20Modeling%20and%20Assimilation,near%2Dreal%2Dtime%20production.)__) at NASA generates reanalysis weather models. GMAO datasets can also be accessed without a license agreement through the pyDAP interface implemented in RAiDER. GMAO has a horizontal grid spacing of approximately 33 km, and its projection is EPSG code 4326 (WGS-84). +1. The Global Modeling and Assimilation Office (__[GMAO](https://www.nccs.nasa.gov/services/data-collections/coupled-products/geos5-forecast#:~:text=The%20Global%20Modeling%20and%20Assimilation,near%2Dreal%2Dtime%20production.)__) at NASA generates reanalysis weather models. GMAO datasets can be accessed without a license agreement through the OPeNDAP interface implemented in RAiDER. GMAO has a horizontal grid spacing of approximately 33 km, and its projection is EPSG code 4326 (WGS-84). 2. The Modern-Era Retrospective analysis for Research and Applications, Version 2 (__[MERRA-2](https://gmao.gsfc.nasa.gov/reanalysis/MERRA-2/#:~:text=MERRA%2D2%20is%20the%20first,(say)%20Greenland%20and%20Antarctica.)__) provides data beginning in 1980. MERRA-2 is also produced by NASA and has a spatial resolution of about 50 km and a global projection (EPSG 4326, WGS-84). @@ -88,12 +88,4 @@ Reference: __[The Modern-Era Retrospective Analysis for Research and Application **Note**: the username and password represent the user's username and password. -4. Add the application `NASA GESDISC DATA ARCHIVE` by clicking on the `Applications->Authorized Apps` on the menu after logging into your Earthdata profile, and then scrolling down to the application `NASA GESDISC DATA ARCHIVE` to approve it. _This seems not required for GMAO for now, but recommended to do so for all OpenDAP-based weather models._ -5. Install the OpenDAP using pip: - - pip install pydap==3.2.1 - - - **Note**: this step has been included in the conda install of RAiDER, thus can be omitted if one uses the recommended conda install of RAiDER - - **Note**: PyDAP v3.2.1 is required for now (thus specified in the above pip install command) because the latest v3.2.2 (as of now) has a known [bug](https://colab.research.google.com/drive/1f_ss1Oa3VzgAOd_p8sgekdnLVE5NW6s5) in accessing and slicing the GMAO data. This bug is expected to be fixed in newer versions of PyDAP. +4. Add the application `NASA GESDISC DATA ARCHIVE` by clicking on the `Applications->Authorized Apps` on the menu after logging into your Earthdata profile, and then scrolling down to the application `NASA GESDISC DATA ARCHIVE` to approve it. _This seems not required for GMAO for now, but recommended to do so for all OPeNDAP-based weather models._ diff --git a/test/test_downloaders.py b/test/test_downloaders.py index 5eaa2d6f2..debfc53b1 100644 --- a/test/test_downloaders.py +++ b/test/test_downloaders.py @@ -13,26 +13,29 @@ from test import random_string -DATETIME = dt.datetime(2020, 1, 1, 0, 0, 0).replace(tzinfo=dt.timezone(offset=dt.timedelta())) BOUNDS = np.array([10, 10.2, -72, -72]) +DATETIME = dt.datetime(2020, 1, 1, 0, 0, 0).replace(tzinfo=dt.timezone(offset=dt.timedelta())) +DATETIME_GMAO_OLD = dt.datetime(2017, 1, 1, 0, 0, 0).replace(tzinfo=dt.timezone(offset=dt.timedelta())) @pytest.mark.long @pytest.mark.parametrize( - 'name,Model', + 'Model,time', [ - ('ERA5', ERA5), - ('ERA5T', ERA5T), - pytest.param('HRES', HRES, marks=pytest.mark.skip), # Paid access - ('GMAO', GMAO), - ('MERRA2', MERRA2), + pytest.param(ERA5, DATETIME, id='ERA5'), + pytest.param(ERA5T, DATETIME, id='ERA5T'), + pytest.param(HRES, DATETIME, id='HRES', marks=pytest.mark.skip), # Paid access + # HRRR: see test_weather_model.py + pytest.param(GMAO, DATETIME, id='GMAO new'), + pytest.param(GMAO, DATETIME_GMAO_OLD, id='GMAO old'), + pytest.param(MERRA2, DATETIME, id='MERRA2'), ], ) -def test_downloader(tmp_path: Path, name: str, Model: Type[WeatherModel]) -> None: - out_path = tmp_path / f'test_{name}.nc' +def test_downloader(tmp_path: Path, Model: Type[WeatherModel], time: dt.datetime) -> None: wm = Model() + out_path = tmp_path / f'test_{wm._Name}.nc' wm.set_latlon_bounds(BOUNDS) - wm.fetch(out_path, DATETIME) + wm.fetch(out_path, time) @pytest.mark.long diff --git a/test/test_util.py b/test/test_util.py index a0a0e5084..74f250305 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -33,7 +33,7 @@ transform_bbox, unproject, writeArrayToRaster, - writeWeatherVarsXarray, + write_weather_vars_to_ds, ) from test import TEST_DIR, pushd @@ -917,7 +917,7 @@ def test_UTM_to_WGS84_empty_input(): # Test writeWeatherVarsXarray -def test_writeWeatherVarsXarray(tmp_path): +def test_write_weather_vars_to_ds(tmp_path): """Test writing weather variables to an xarray dataset and NetCDF file.""" # Mock inputs lat = np.random.rand(91, 144) * 180 - 90 # Random latitudes between -90 and 90 @@ -938,7 +938,7 @@ def test_writeWeatherVarsXarray(tmp_path): out_path = tmp_path / "test_output.nc" # Call the function - writeWeatherVarsXarray(lat, lon, h, q, p, t, datetime_value, crs, out_path) + write_weather_vars_to_ds(lat, lon, h, q, p, t, datetime_value, crs, out_path) # Check that the file was created assert out_path.exists() diff --git a/test/test_weather_model.py b/test/test_weather_model.py index 276a6a07d..a992b3790 100644 --- a/test/test_weather_model.py +++ b/test/test_weather_model.py @@ -403,7 +403,7 @@ def test_ztd(model: MockWeatherModel) -> None: def test_get_bounds_indices() -> None: """Test bounds indices.""" - snwe = [-10, 10, -10, 10] + snwe = (-10, 10, -10, 10) ll = np.arange(-20, 20) lats, lons = np.meshgrid(ll, ll) xmin, xmax, ymin, ymax = get_bounds_indices(snwe, lats, lons) @@ -415,7 +415,7 @@ def test_get_bounds_indices() -> None: def test_get_bounds_indices_2() -> None: """Test bounds indices.""" - snwe = [-10, 10, 170, -170] + snwe = (-10, 10, 170, -170) l = np.arange(-20, 20) l2 = (((np.arange(160, 200) + 180) % 360) - 180) lats, lons = np.meshgrid(l, l2) @@ -425,7 +425,7 @@ def test_get_bounds_indices_2() -> None: def test_get_bounds_indices_2b() -> None: """Test bounds indices.""" - snwe = [-10, 10, 170, 190] + snwe = (-10, 10, 170, 190) l = np.arange(-20, 20) l2 = np.arange(160, 200) lats, lons = np.meshgrid(l, l2) @@ -438,7 +438,7 @@ def test_get_bounds_indices_2b() -> None: def test_get_bounds_indices_3() -> None: """Test bounds indices""" - snwe = [-10, 10, -10, 10] + snwe = (-10, 10, -10, 10) l = np.arange(-20, 20) l2 = (((np.arange(160, 200) + 180) % 360) - 180) lats, lons = np.meshgrid(l, l2) @@ -448,7 +448,7 @@ def test_get_bounds_indices_3() -> None: def test_get_bounds_indices_4() -> None: """Test bounds_indices.""" - snwe = [55, 60, 175, 185] + snwe = (55, 60, 175, 185) l = np.arange(55, 60, 1) l2 = np.arange(175, 185, 1) lats, lons = np.meshgrid(l, l2) diff --git a/tools/RAiDER/models/gmao.py b/tools/RAiDER/models/gmao.py index f1176ffe1..d1e00a1ca 100755 --- a/tools/RAiDER/models/gmao.py +++ b/tools/RAiDER/models/gmao.py @@ -1,11 +1,10 @@ import datetime as dt import shutil -import warnings from pathlib import Path import h5py import numpy as np -import pydap.client +import xarray as xr from pyproj import CRS from RAiDER.logger import logger @@ -13,7 +12,7 @@ LEVELS_137_HEIGHTS, ) from RAiDER.models.weatherModel import TIME_RES, WeatherModel -from RAiDER.utilFcns import requests_retry_session, round_date, writeWeatherVarsXarray +from RAiDER.utilFcns import requests_retry_session, round_date, write_weather_vars_to_ds class GMAO(WeatherModel): @@ -80,53 +79,42 @@ def _fetch(self, out: Path) -> None: if corrected_DT >= T0: # open the dataset and pull the data url = 'https://opendap.nccs.nasa.gov/dods/GEOS-5/fp/0.25_deg/assim/inst3_3d_asm_Nv' - # For warning from pydap when using HTTPS instead of DAP2 or DAP4: - # pydap is incompatible with DAP data from opendap.nccs.nasa.gov. See issue #736 - with warnings.catch_warnings(): - warnings.simplefilter("ignore") - ds = pydap.client.open_url(url) - - q = ( - ds['qv'] - .array[ - time_ind, - ml_min : (ml_max + 1), - lat_min_ind : (lat_max_ind + 1), - lon_min_ind : (lon_max_ind + 1), - ] - .data[0] - ) - p = ( - ds['pl'] - .array[ - time_ind, ml_min : (ml_max + 1), lat_min_ind : (lat_max_ind + 1), lon_min_ind : (lon_max_ind + 1) - ] - .data[0] - ) - t = ( - ds['t'] - .array[ - time_ind, ml_min : (ml_max + 1), lat_min_ind : (lat_max_ind + 1), lon_min_ind : (lon_max_ind + 1) - ] - .data[0] - ) - h = ( - ds['h'] - .array[ - time_ind, ml_min : (ml_max + 1), lat_min_ind : (lat_max_ind + 1), lon_min_ind : (lon_max_ind + 1) - ] - .data[0] - ) + ds = xr.open_dataset(url, decode_times=False) + + q = ds['qv'][ + time_ind, + ml_min : (ml_max + 1), + lat_min_ind : (lat_max_ind + 1), + lon_min_ind : (lon_max_ind + 1), + ] + p = ds['pl'][ + time_ind, + ml_min : (ml_max + 1), + lat_min_ind : (lat_max_ind + 1), + lon_min_ind : (lon_max_ind + 1), + ] + t = ds['t'][ + time_ind, + ml_min : (ml_max + 1), + lat_min_ind : (lat_max_ind + 1), + lon_min_ind : (lon_max_ind + 1), + ] + h = ds['h'][ + time_ind, + ml_min : (ml_max + 1), + lat_min_ind : (lat_max_ind + 1), + lon_min_ind : (lon_max_ind + 1), + ] else: root = 'https://portal.nccs.nasa.gov/datashare/gmao/geos-fp/das/Y{}/M{:02d}/D{:02d}' - base = f'GEOS.fp.asm.inst3_3d_asm_Nv.{corrected_DT.strftime("%Y%m%d")}_{corrected_DT.hour:02}00.V01.nc4' - URL = f'{root.format(corrected_DT.year, corrected_DT.month, corrected_DT.day)}/{base}' + filename = f'GEOS.fp.asm.inst3_3d_asm_Nv.{corrected_DT.strftime("%Y%m%d")}_{corrected_DT.hour:02}00.V01.nc4' + url = f'{root.format(corrected_DT.year, corrected_DT.month, corrected_DT.day)}/{filename}' path = Path(f'{out.stem}_raw{out.suffix}') if not path.exists(): - logger.info('Fetching URL: %s', URL) + logger.info('Fetching URL: %s', url) session = requests_retry_session() - resp = session.get(URL, stream=True) + resp = session.get(url, stream=True) assert resp.ok, f'Could not access url for datetime: {corrected_DT}' with path.open('wb') as fout: shutil.copyfileobj(resp.raw, fout) @@ -140,15 +128,21 @@ def _fetch(self, out: Path) -> None: h = ds['H'][0, :, lat_min_ind : (lat_max_ind + 1), lon_min_ind : (lon_max_ind + 1)] path.unlink() - lats = np.arange((-90 + lat_min_ind * self._lat_res), (-90 + (lat_max_ind + 1) * self._lat_res), self._lat_res) + lats = np.arange( + -90 + lat_min_ind * self._lat_res, + -90 + (lat_max_ind + 1) * self._lat_res, + self._lat_res, + ) lons = np.arange( - (-180 + lon_min_ind * self._lon_res), (-180 + (lon_max_ind + 1) * self._lon_res), self._lon_res + -180 + lon_min_ind * self._lon_res, + -180 + (lon_max_ind + 1) * self._lon_res, + self._lon_res, ) lon, lat = np.meshgrid(lons, lats) try: # Note that lat/lon gets written twice for GMAO because they are the same as y/x - writeWeatherVarsXarray(lat, lon, h, q, p, t, self._time, self._proj, out) + write_weather_vars_to_ds(lat, lon, h, q, p, t, self._time, self._proj, out) except: logger.exception('Unable to save weathermodel to file:') raise @@ -164,7 +158,7 @@ def load_weather(self, f=None) -> None: self._load_model_level(f) def _load_model_level(self, filename) -> None: - """Get the variables from the GMAO link using OpenDAP.""" + """Get the variables from the GMAO link using OPeNDAP.""" # adding the import here should become absolute when transition to netcdf from netCDF4 import Dataset diff --git a/tools/RAiDER/models/hrrr.py b/tools/RAiDER/models/hrrr.py index 40f2b5ff5..50619a616 100644 --- a/tools/RAiDER/models/hrrr.py +++ b/tools/RAiDER/models/hrrr.py @@ -1,14 +1,15 @@ import datetime as dt -import os from pathlib import Path +from typing import List, Tuple, Union, cast +from RAiDER.types import BB, Array2D, FloatArray2D import geopandas as gpd +import herbie +import herbie.accessors import numpy as np import xarray as xr -from herbie import Herbie from pyproj import CRS, Transformer from shapely.geometry import Polygon, box -from typing import Optional, Union, List, Tuple from RAiDER.logger import logger from RAiDER.models.customExceptions import NoWeatherModelData @@ -29,13 +30,13 @@ def check_hrrr_dataset_availability(datetime: dt.datetime, model='hrrr') -> bool: """Note a file could still be missing within the models valid range.""" - herbie = Herbie( + h = herbie.Herbie( datetime, model=model, product='nat', fxx=0, ) - return herbie.grib_source is not None + return h.grib_source is not None def download_hrrr_file(ll_bounds, DATE, out: Path, model='hrrr', product='nat', fxx=0, verbose=False) -> None: @@ -54,7 +55,7 @@ def download_hrrr_file(ll_bounds, DATE, out: Path, model='hrrr', product='nat', Returns: None, writes data to a netcdf file """ - herbie = Herbie( + h = herbie.Herbie( DATE.strftime('%Y-%m-%d %H:%M'), model=model, product=product, @@ -66,7 +67,12 @@ def download_hrrr_file(ll_bounds, DATE, out: Path, model='hrrr', product='nat', # Iterate through the list of datasets try: - ds_list = herbie.xarray(':(SPFH|PRES|TMP|HGT):', verbose=verbose) + # cast: Herbie.xarray can return one Dataset or a list + ds_list = cast( + Union[xr.Dataset, List[xr.Dataset]], + h.xarray(':(SPFH|PRES|TMP|HGT):', verbose=verbose), + ) + assert isinstance(ds_list, list) except ValueError as e: logger.error(e) raise @@ -74,14 +80,14 @@ def download_hrrr_file(ll_bounds, DATE, out: Path, model='hrrr', product='nat', # Note order coord names are request for `test_HRRR_ztd` matters # when both coord names are retreived by Herbie is ds_list possibly in # Different orders on different machines; `hybrid` is what is expected for the test. - ds_list_filt_0 = [ds for ds in ds_list if 'hybrid' in ds._coord_names] - ds_list_filt_1 = [ds for ds in ds_list if 'isobaricInhPa' in ds._coord_names] - if ds_list_filt_0: + ds_list_filt_0 = [ds for ds in ds_list if 'hybrid' in ds.coords] + ds_list_filt_1 = [ds for ds in ds_list if 'isobaricInhPa' in ds.coords] + if len(ds_list_filt_0) > 0: ds_out = ds_list_filt_0[0] coord = 'hybrid' # I do not think that this coord name will result in successful processing nominally as variables are # gh,gribfile_projection for test_HRRR_ztd - elif ds_list_filt_1: + elif len(ds_list_filt_1) > 0: ds_out = ds_list_filt_1[0] coord = 'isobaricInhPa' else: @@ -91,22 +97,25 @@ def download_hrrr_file(ll_bounds, DATE, out: Path, model='hrrr', product='nat', try: x_min, x_max, y_min, y_max = get_bounds_indices( ll_bounds, - ds_out.latitude.to_numpy(), - ds_out.longitude.to_numpy(), + ds_out['latitude'].to_numpy(), + ds_out['longitude'].to_numpy(), ) except NoWeatherModelData as e: logger.error(e) - logger.error('lat/lon bounds: %s', ll_bounds) - logger.error('Weather model is {}'.format(model)) + logger.error(f'lat/lon bounds: {ll_bounds}') + logger.error(f'Weather model is {model}') raise # bookkeepping ds_out = ds_out.rename({'gh': 'z', coord: 'levels'}) + # Herbie injects brainworms into all its xarray Datasets + crs = cast(herbie.accessors.HerbieAccessor, ds_out.herbie).crs + # projection information ds_out['proj'] = 0 - for k, v in CRS.from_user_input(ds_out.herbie.crs).to_cf().items(): - ds_out.proj.attrs[k] = v + for k, v in CRS.from_user_input(crs).to_cf().items(): + ds_out['proj'].attrs[k] = v for var in ds_out.data_vars: ds_out[var].attrs['grid_mapping'] = 'proj' @@ -121,53 +130,50 @@ def download_hrrr_file(ll_bounds, DATE, out: Path, model='hrrr', product='nat', t = Transformer.from_crs(4326, proj, always_xy=True) xl, yl = t.transform(ds_out['longitude'].values, ds_out['latitude'].values) - W, E, S, N = np.nanmin(xl), np.nanmax(xl), np.nanmin(yl), np.nanmax(yl) + s, n, w, e = np.nanmin(yl), np.nanmax(yl), np.nanmin(xl), np.nanmax(xl) grid_x = 3000 # meters grid_y = 3000 # meters - xs = np.arange(W, E + grid_x / 2, grid_x) - ys = np.arange(S, N + grid_y / 2, grid_y) + xs = np.arange(w, e + grid_x / 2, grid_x) + ys = np.arange(s, n + grid_y / 2, grid_y) ds_out['x'] = xs ds_out['y'] = ys ds_sub = ds_out.isel(x=slice(x_min, x_max), y=slice(y_min, y_max)) - ds_sub.to_netcdf(out, engine='netcdf4') + ds_sub.to_netcdf(out) -def get_bounds_indices(SNWE, lats, lons): +def get_bounds_indices(snwe: BB.SNWE, lats: FloatArray2D, lons: FloatArray2D) -> BB.WSEN: """Convert SNWE lat/lon bounds to index bounds.""" # Unpack the bounds and find the relevent indices - S, N, W, E = SNWE + s, n, w, e = snwe # Need to account for crossing the international date line - if W < E: - m1 = (S <= lats) & (N >= lats) & (W <= lons) & (E >= lons) - else: + if w >= e: raise ValueError( - 'Longitude is either flipped or you are crossing the international date line;' - + 'if the latter please give me longitudes from 0-360' + 'Longitude is either flipped or you are crossing the international date line; ' + 'if the latter please give me longitudes from 0-360' ) - if np.sum(m1) == 0: + m1: Array2D[np.bool] = (s <= lats) & (n >= lats) & (w <= lons) & (e >= lons) + if np.sum(m1) == 0: # All False lons = np.mod(lons, 360) - W, E = np.mod([W, E], 360) - m1 = (S <= lats) & (N >= lats) & (W <= lons) & (E >= lons) - if np.sum(m1) == 0: + w, e = np.mod([w, e], 360) + # try again + m1 = (s <= lats) & (n >= lats) & (w <= lons) & (e >= lons) + if np.sum(m1) == 0: # All False raise NoWeatherModelData('Area of Interest has no overlap with the HRRR model available extent') # Y extent - shp = lats.shape - m1_y = np.argwhere(np.sum(m1, axis=1) != 0) - y_min = max(m1_y[0][0], 0) - y_max = min(m1_y[-1][0], shp[0]) - m1_y = None + m1_y: Array2D[np.integer] = np.argwhere(np.sum(m1, axis=1) != 0) + y_min: int = max(m1_y[0][0], 0) + y_max: int = min(m1_y[-1][0], lats.shape[0]) + del m1_y # big # X extent - m1_x = np.argwhere(np.sum(m1, axis=0) != 0) - x_min = max(m1_x[0][0], 0) - x_max = min(m1_x[-1][0], shp[1]) - m1_x = None - m1 = None + m1_x: Array2D[np.integer] = np.argwhere(np.sum(m1, axis=0) != 0) + x_min: int = max(m1_x[0][0], 0) + x_max: int = min(m1_x[-1][0], lats.shape[1]) return x_min, x_max, y_min, y_max diff --git a/tools/RAiDER/models/merra2.py b/tools/RAiDER/models/merra2.py index c35f0b4e9..c3c56990c 100755 --- a/tools/RAiDER/models/merra2.py +++ b/tools/RAiDER/models/merra2.py @@ -3,7 +3,6 @@ from pathlib import Path import numpy as np -import pydap.client import xarray as xr from pyproj import CRS @@ -12,7 +11,7 @@ LEVELS_137_HEIGHTS, ) from RAiDER.models.weatherModel import WeatherModel -from RAiDER.utilFcns import writeWeatherVarsXarray +from RAiDER.utilFcns import write_weather_vars_to_ds # Path to Netrc file, can be controlled by env var @@ -69,7 +68,11 @@ def __init__(self) -> None: self._proj = CRS.from_epsg(4326) def _fetch(self, out: Path) -> None: - """Fetch weather model data from GMAO: note we only extract the lat/lon bounds for this weather model; fetching data is not needed here as we don't actually download any data using OpenDAP.""" + """Fetch weather model data from GMAO. + + Note: we only extract the lat/lon bounds for this weather model; fetching data is not needed here as we don't + actually download any data using OPeNDAP. + """ time = self._time # check whether the file already exists @@ -100,28 +103,45 @@ def _fetch(self, out: Path) -> None: # open the dataset and pull the data url = ( - 'dap4://goldsmr5.gesdisc.eosdis.nasa.gov/opendap/MERRA2/M2T3NVASM.5.12.4/' - + time.strftime('%Y/%m') - + '/MERRA2_' - + str(url_sub) - + '.tavg3_3d_asm_Nv.' - + time.strftime('%Y%m%d') - + '.nc4' + f'dap4://goldsmr5.gesdisc.eosdis.nasa.gov/opendap/MERRA2/M2T3NVASM.5.12.4/' + f'{time.strftime('%Y/%m')}/' + f'MERRA2_{url_sub}.tavg3_3d_asm_Nv.{time.strftime('%Y%m%d')}.nc4' ) - stream = pydap.client.open_url(url) - - q = stream['QV'][0, :, lat_min_ind : lat_max_ind + 1, lon_min_ind : lon_max_ind + 1].data.squeeze() - p = stream['PL'][0, :, lat_min_ind : lat_max_ind + 1, lon_min_ind : lon_max_ind + 1].data.squeeze() - t = stream['T'][0, :, lat_min_ind : lat_max_ind + 1, lon_min_ind : lon_max_ind + 1].data.squeeze() - h = stream['H'][0, :, lat_min_ind : lat_max_ind + 1, lon_min_ind : lon_max_ind + 1].data.squeeze() + # pydap engine required for password-protected datasets + ds = xr.open_dataset(url, decode_times=False, engine='pydap') + + q = ds['QV'][ + 0, + :, + lat_min_ind : lat_max_ind + 1, + lon_min_ind : lon_max_ind + 1, + ].data.squeeze() + p = ds['PL'][ + 0, + :, + lat_min_ind : lat_max_ind + 1, + lon_min_ind : lon_max_ind + 1, + ].data.squeeze() + t = ds['T'][ + 0, + :, + lat_min_ind : lat_max_ind + 1, + lon_min_ind : lon_max_ind + 1, + ].data.squeeze() + h = ds['H'][ + 0, + :, + lat_min_ind : lat_max_ind + 1, + lon_min_ind : lon_max_ind + 1, + ].data.squeeze() try: - writeWeatherVarsXarray(lat, lon, h, q, p, t, time, self._proj, out_path=out) + write_weather_vars_to_ds(lat, lon, h, q, p, t, time, self._proj, out_path=out) except Exception as e: logger.debug(e) - logger.exception('MERRA-2: Unable to save weathermodel to file') - raise RuntimeError(f'MERRA-2 failed with the following error: {e}') + logger.exception('MERRA-2: Unable to save weather model query to file') + raise def load_weather(self, f=None, *args, **kwargs) -> None: """ @@ -134,7 +154,7 @@ def load_weather(self, f=None, *args, **kwargs) -> None: self._load_model_level(f) def _load_model_level(self, filename) -> None: - """Get the variables from the GMAO link using OpenDAP.""" + """Get the variables from the GMAO link using OPeNDAP.""" # adding the import here should become absolute when transition to netcdf ds = xr.load_dataset(filename) lons = ds['longitude'].values diff --git a/tools/RAiDER/models/ncmr.py b/tools/RAiDER/models/ncmr.py index 58338f398..3918017da 100755 --- a/tools/RAiDER/models/ncmr.py +++ b/tools/RAiDER/models/ncmr.py @@ -18,7 +18,7 @@ from RAiDER.utilFcns import ( read_NCMR_loginInfo, show_progress, - writeWeatherVarsXarray, + write_weather_vars_to_ds, ) @@ -193,7 +193,7 @@ def _download_ncmr_file(self, out: Path, date_time, bounding_box) -> None: ######################################################################################################################## try: - writeWeatherVarsXarray(lats, lons, hgt, q, p, t, self._time, self._proj, out_path=out) + write_weather_vars_to_ds(lats, lons, hgt, q, p, t, self._time, self._proj, out_path=out) except: logger.exception('Unable to save weathermodel to file') diff --git a/tools/RAiDER/models/template.py b/tools/RAiDER/models/template.py index e7b2cf7b0..8f0df1c7f 100644 --- a/tools/RAiDER/models/template.py +++ b/tools/RAiDER/models/template.py @@ -77,7 +77,7 @@ def _fetch(self, out) -> None: # download dataset of the custom weather model "ABCD" from a server and then save it to a file named out. # This function needs to be writen by the users. For download from the weather model server, the weather model # name, time and bounding box may be needed to retrieve the dataset; for cases where no file is actually - # downloaded, e.g. the GMAO and MERRA-2 models using OpenDAP, this function can be omitted leaving the data + # downloaded, e.g. the GMAO and MERRA-2 models using OPeNDAP, this function can be omitted leaving the data # retrieval to the following "load_weather" function. self._files = self._download_abcd_file(out, 'abcd', self._time, self._ll_bounds) @@ -91,7 +91,7 @@ def load_weather(self, filename) -> None: # read individual variables (in 3-D cube format with exactly the same dimension) from downloaded file # This function needs to be writen by the users. For downloaded file from the weather model server, # this function extracts the individual variables from the saved file named filename; - # for cases where no file is actually downloaded, e.g. the GMAO and MERRA-2 models using OpenDAP, + # for cases where no file is actually downloaded, e.g. the GMAO and MERRA-2 models using OPeNDAP, # this function retrieves the individual variables directly from the weblink of the weather model. lats, lons, xs, ys, t, q, p, hgt = self._makeDataCubes(filename) diff --git a/tools/RAiDER/types/__init__.py b/tools/RAiDER/types/__init__.py index 58d00736b..036cbdd98 100644 --- a/tools/RAiDER/types/__init__.py +++ b/tools/RAiDER/types/__init__.py @@ -1,10 +1,32 @@ -"""Types specific to RAiDER.""" +"""Helper types used throughout RAiDER.""" -from typing import Literal, Union +from typing import Literal, TypeVar, Union +import numpy as np from pyproj import CRS LookDir = Literal['right', 'left'] TimeInterpolationMethod = Literal['none', 'center_time', 'azimuth_time_grid'] CRSLike = Union[CRS, str, int] + + +# From numpy +_ScalarT_co = TypeVar("_ScalarT_co", bound=np.generic, covariant=True) + +# Helpers for type annotating the dimensions of a numpy array. +# When you use these, your type checker will alert you when an annotated array +# is used in a way inconsistent with its dimensions. +Array1D = np.ndarray[tuple[int], np.dtype[_ScalarT_co]] +Array2D = np.ndarray[tuple[int, int], np.dtype[_ScalarT_co]] +Array3D = np.ndarray[tuple[int, int, int], np.dtype[_ScalarT_co]] +# ... (repeat the pattern as needed for higher dimensions) + +# Any number of dimensions -- when ndim is not able to be known ahead of time +ArrayND = np.ndarray[tuple[int, ...], np.dtype[_ScalarT_co]] + + +FloatArray1D = Array1D[np.floating] +FloatArray2D = Array2D[np.floating] +FloatArray3D = Array3D[np.floating] +FloatArrayND = ArrayND[np.floating] diff --git a/tools/RAiDER/utilFcns.py b/tools/RAiDER/utilFcns.py index 0dee94634..8a538f04d 100644 --- a/tools/RAiDER/utilFcns.py +++ b/tools/RAiDER/utilFcns.py @@ -22,7 +22,7 @@ from RAiDER.constants import _g1 as G1 from RAiDER.llreader import AOI from RAiDER.logger import logger -from RAiDER.types import BB, RIO, CRSLike +from RAiDER.types import BB, RIO, CRSLike, FloatArray2D, FloatArray3D # Optional imports @@ -638,68 +638,76 @@ def requests_retry_session(retries: int=10, session=None): # noqa: ANN001, ANN2 return session -def writeWeatherVarsXarray( - lat: npt.NDArray[np.floating], - lon: npt.NDArray[np.floating], - h: npt.NDArray[np.floating], - q: npt.NDArray[np.floating], - p: npt.NDArray[np.floating], - t: npt.NDArray[np.floating], +def write_weather_vars_to_ds( + lat_range: FloatArray2D, + lon_range: FloatArray2D, + h: FloatArray3D, + q: FloatArray3D, + p: FloatArray3D, + t: FloatArray3D, datetime: dt.datetime, crs: CRS, out_path: Path, NoDataValue: int = -9999, - chunksize: Tuple[int, ...] = (1, 91, 144), + chunksize: Tuple[int, int, int] = (1, 91, 144), ) -> None: - assert len(h.shape) == 3, "Invalid h array dimensions" - assert len(q.shape) == 3, "Invalid q array dimensions" - assert len(p.shape) == 3, "Invalid p array dimensions" - assert len(t.shape) == 3, "Invalid t array dimensions" - dataset_dict = { - 'h': (('z', 'y', 'x'), h), - 'q': (('z', 'y', 'x'), q), - 'p': (('z', 'y', 'x'), p), - 't': (('z', 'y', 'x'), t), - } - assert len(lat.shape) == 2, "Invalid lats array dimensions" - assert len(lon.shape) == 2, "Invalid lons array dimensions" - dimension_dict = { - 'latitude': (('y', 'x'), lat), - 'longitude': (('y', 'x'), lon), - } - # I added datetime as an input to the function and just copied these two lines from merra2 for the attrs_dict - attrs_dict = { - 'datetime': datetime.strftime('%Y_%m_%dT%H_%M_%S'), - 'date_created': datetime.now().strftime('%Y_%m_%dT%H_%M_%S'), - 'NoDataValue': NoDataValue, - 'chunksize': chunksize, - # 'mapping_name': mapping_name, - } - - ds = xr.Dataset( - data_vars=dataset_dict, - coords=dimension_dict, - attrs=attrs_dict, - ) - - ds['h'].attrs['standard_name'] = 'mid_layer_heights' - ds['p'].attrs['standard_name'] = 'mid_level_pressure' - ds['q'].attrs['standard_name'] = 'specific_humidity' - ds['t'].attrs['standard_name'] = 'air_temperature' - - ds['h'].attrs['units'] = 'm' - ds['p'].attrs['units'] = 'Pa' - ds['q'].attrs['units'] = 'kg kg-1' - ds['t'].attrs['units'] = 'K' - - ds['proj'] = 0 - for k, v in crs.to_cf().items(): - ds.proj.attrs[k] = v - for var in ds.data_vars: - ds[var].attrs['grid_mapping'] = 'proj' - - ds.to_netcdf(out_path) - del ds + DIMS_3D = ('z', 'y', 'x') + DIMS_2D = ('y', 'x') + with xr.Dataset( + data_vars={ + 'h': xr.Variable( + dims=DIMS_3D, + data=h, + attrs={ + 'standard_name': 'mid_layer_heights', + 'units': 'm', + }, + ), + 'q': xr.Variable( + dims=DIMS_3D, + data=q, + attrs={ + 'standard_name': 'specific_humidity', + 'units': 'kg kg-1', + }, + ), + 'p': xr.Variable( + dims=DIMS_3D, + data=p, + attrs={ + 'standard_name': 'mid_level_pressure', + 'units': 'Pa', + }, + ), + 't': xr.Variable( + dims=DIMS_3D, + data=t, + attrs={ + 'standard_name': 'air_temperature', + 'units': 'K', + }, + ), + 'proj': 0, + }, + coords={ + 'latitude': xr.Variable(dims=DIMS_2D, data=lat_range), + 'longitude': xr.Variable(dims=DIMS_2D, data=lon_range), + }, + attrs={ + # I added datetime as an input to the function and just copied these + # two lines from merra2 for the attrs_dict + 'datetime': datetime.strftime('%Y_%m_%dT%H_%M_%S'), + 'date_created': dt.datetime.now().strftime('%Y_%m_%dT%H_%M_%S'), + 'NoDataValue': NoDataValue, + 'chunksize': chunksize, + # 'mapping_name': mapping_name, + }, + ) as ds: + for k, v in crs.to_cf().items(): + ds['proj'].attrs[k] = v + for var in ds.data_vars: + ds[var].attrs['grid_mapping'] = 'proj' + ds.to_netcdf(out_path) def convertLons(inLons: np.ndarray) -> np.ndarray: