Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ and uses [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## [Unreleased]
### Changed
* [743](https://github.com/dbekaert/RAiDER/pull/743) - Switched from HTTPS to DAP4 for retrieving MERRA2 data, and suppressed a warning for using DAP4 for GMAO data where doing so is not possible.
* []() - Changed GMAO and MERRA2 fetch implementations to use xarray, eliminating pydap as a dependency.

### Fixed
* [759](https://github.com/dbekaert/RAiDER/pull/759) - Added a `browse` S3 tag for `.png` files when uploaded to AWS.
Expand Down
12 changes: 2 additions & 10 deletions docs/WeatherModels.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ ECMWF requires a license agreement to be able to access, download, and use their

## 4. NASA weather models (GMAO, MERRA2)

1. The Global Modeling and Assimilation Office (__[GMAO](https://www.nccs.nasa.gov/services/data-collections/coupled-products/geos5-forecast#:~:text=The%20Global%20Modeling%20and%20Assimilation,near%2Dreal%2Dtime%20production.)__) at NASA generates reanalysis weather models. GMAO datasets can also be accessed without a license agreement through the pyDAP interface implemented in RAiDER. GMAO has a horizontal grid spacing of approximately 33 km, and its projection is EPSG code 4326 (WGS-84).
1. The Global Modeling and Assimilation Office (__[GMAO](https://www.nccs.nasa.gov/services/data-collections/coupled-products/geos5-forecast#:~:text=The%20Global%20Modeling%20and%20Assimilation,near%2Dreal%2Dtime%20production.)__) at NASA generates reanalysis weather models. GMAO datasets can be accessed without a license agreement through the OPeNDAP interface implemented in RAiDER. GMAO has a horizontal grid spacing of approximately 33 km, and its projection is EPSG code 4326 (WGS-84).

2. The Modern-Era Retrospective analysis for Research and Applications, Version 2 (__[MERRA-2](https://gmao.gsfc.nasa.gov/reanalysis/MERRA-2/#:~:text=MERRA%2D2%20is%20the%20first,(say)%20Greenland%20and%20Antarctica.)__) provides data beginning in 1980. MERRA-2 is also produced by NASA and has a spatial resolution of about 50 km and a global projection (EPSG 4326, WGS-84).

Expand All @@ -88,12 +88,4 @@ Reference: __[The Modern-Era Retrospective Analysis for Research and Application

**Note**: the username and password represent the user's username and password.

4. Add the application `NASA GESDISC DATA ARCHIVE` by clicking on the `Applications->Authorized Apps` on the menu after logging into your Earthdata profile, and then scrolling down to the application `NASA GESDISC DATA ARCHIVE` to approve it. _This seems not required for GMAO for now, but recommended to do so for all OpenDAP-based weather models._
5. Install the OpenDAP using pip:

pip install pydap==3.2.1


**Note**: this step has been included in the conda install of RAiDER, thus can be omitted if one uses the recommended conda install of RAiDER

**Note**: PyDAP v3.2.1 is required for now (thus specified in the above pip install command) because the latest v3.2.2 (as of now) has a known [bug](https://colab.research.google.com/drive/1f_ss1Oa3VzgAOd_p8sgekdnLVE5NW6s5) in accessing and slicing the GMAO data. This bug is expected to be fixed in newer versions of PyDAP.
4. Add the application `NASA GESDISC DATA ARCHIVE` by clicking on the `Applications->Authorized Apps` on the menu after logging into your Earthdata profile, and then scrolling down to the application `NASA GESDISC DATA ARCHIVE` to approve it. _This seems not required for GMAO for now, but recommended to do so for all OPeNDAP-based weather models._
23 changes: 13 additions & 10 deletions test/test_downloaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,26 +13,29 @@
from test import random_string


DATETIME = dt.datetime(2020, 1, 1, 0, 0, 0).replace(tzinfo=dt.timezone(offset=dt.timedelta()))
BOUNDS = np.array([10, 10.2, -72, -72])
DATETIME = dt.datetime(2020, 1, 1, 0, 0, 0).replace(tzinfo=dt.timezone(offset=dt.timedelta()))
DATETIME_GMAO_OLD = dt.datetime(2017, 1, 1, 0, 0, 0).replace(tzinfo=dt.timezone(offset=dt.timedelta()))


@pytest.mark.long
@pytest.mark.parametrize(
'name,Model',
'Model,time',
[
('ERA5', ERA5),
('ERA5T', ERA5T),
pytest.param('HRES', HRES, marks=pytest.mark.skip), # Paid access
('GMAO', GMAO),
('MERRA2', MERRA2),
pytest.param(ERA5, DATETIME, id='ERA5'),
pytest.param(ERA5T, DATETIME, id='ERA5T'),
pytest.param(HRES, DATETIME, id='HRES', marks=pytest.mark.skip), # Paid access
# HRRR: see test_weather_model.py
pytest.param(GMAO, DATETIME, id='GMAO new'),
pytest.param(GMAO, DATETIME_GMAO_OLD, id='GMAO old'),
pytest.param(MERRA2, DATETIME, id='MERRA2'),
],
)
def test_downloader(tmp_path: Path, name: str, Model: Type[WeatherModel]) -> None:
out_path = tmp_path / f'test_{name}.nc'
def test_downloader(tmp_path: Path, Model: Type[WeatherModel], time: dt.datetime) -> None:
wm = Model()
out_path = tmp_path / f'test_{wm._Name}.nc'
wm.set_latlon_bounds(BOUNDS)
wm.fetch(out_path, DATETIME)
wm.fetch(out_path, time)


@pytest.mark.long
Expand Down
6 changes: 3 additions & 3 deletions test/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
transform_bbox,
unproject,
writeArrayToRaster,
writeWeatherVarsXarray,
write_weather_vars_to_ds,
)
from test import TEST_DIR, pushd

Expand Down Expand Up @@ -917,7 +917,7 @@ def test_UTM_to_WGS84_empty_input():


# Test writeWeatherVarsXarray
def test_writeWeatherVarsXarray(tmp_path):
def test_write_weather_vars_to_ds(tmp_path):
"""Test writing weather variables to an xarray dataset and NetCDF file."""
# Mock inputs
lat = np.random.rand(91, 144) * 180 - 90 # Random latitudes between -90 and 90
Expand All @@ -938,7 +938,7 @@ def test_writeWeatherVarsXarray(tmp_path):
out_path = tmp_path / "test_output.nc"

# Call the function
writeWeatherVarsXarray(lat, lon, h, q, p, t, datetime_value, crs, out_path)
write_weather_vars_to_ds(lat, lon, h, q, p, t, datetime_value, crs, out_path)

# Check that the file was created
assert out_path.exists()
Expand Down
10 changes: 5 additions & 5 deletions test/test_weather_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -403,7 +403,7 @@ def test_ztd(model: MockWeatherModel) -> None:

def test_get_bounds_indices() -> None:
"""Test bounds indices."""
snwe = [-10, 10, -10, 10]
snwe = (-10, 10, -10, 10)
ll = np.arange(-20, 20)
lats, lons = np.meshgrid(ll, ll)
xmin, xmax, ymin, ymax = get_bounds_indices(snwe, lats, lons)
Expand All @@ -415,7 +415,7 @@ def test_get_bounds_indices() -> None:

def test_get_bounds_indices_2() -> None:
"""Test bounds indices."""
snwe = [-10, 10, 170, -170]
snwe = (-10, 10, 170, -170)
l = np.arange(-20, 20)
l2 = (((np.arange(160, 200) + 180) % 360) - 180)
lats, lons = np.meshgrid(l, l2)
Expand All @@ -425,7 +425,7 @@ def test_get_bounds_indices_2() -> None:

def test_get_bounds_indices_2b() -> None:
"""Test bounds indices."""
snwe = [-10, 10, 170, 190]
snwe = (-10, 10, 170, 190)
l = np.arange(-20, 20)
l2 = np.arange(160, 200)
lats, lons = np.meshgrid(l, l2)
Expand All @@ -438,7 +438,7 @@ def test_get_bounds_indices_2b() -> None:

def test_get_bounds_indices_3() -> None:
"""Test bounds indices"""
snwe = [-10, 10, -10, 10]
snwe = (-10, 10, -10, 10)
l = np.arange(-20, 20)
l2 = (((np.arange(160, 200) + 180) % 360) - 180)
lats, lons = np.meshgrid(l, l2)
Expand All @@ -448,7 +448,7 @@ def test_get_bounds_indices_3() -> None:

def test_get_bounds_indices_4() -> None:
"""Test bounds_indices."""
snwe = [55, 60, 175, 185]
snwe = (55, 60, 175, 185)
l = np.arange(55, 60, 1)
l2 = np.arange(175, 185, 1)
lats, lons = np.meshgrid(l, l2)
Expand Down
90 changes: 42 additions & 48 deletions tools/RAiDER/models/gmao.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,18 @@
import datetime as dt
import shutil
import warnings
from pathlib import Path

import h5py
import numpy as np
import pydap.client
import xarray as xr
from pyproj import CRS

from RAiDER.logger import logger
from RAiDER.models.model_levels import (
LEVELS_137_HEIGHTS,
)
from RAiDER.models.weatherModel import TIME_RES, WeatherModel
from RAiDER.utilFcns import requests_retry_session, round_date, writeWeatherVarsXarray
from RAiDER.utilFcns import requests_retry_session, round_date, write_weather_vars_to_ds


class GMAO(WeatherModel):
Expand Down Expand Up @@ -80,53 +79,42 @@ def _fetch(self, out: Path) -> None:
if corrected_DT >= T0:
# open the dataset and pull the data
url = 'https://opendap.nccs.nasa.gov/dods/GEOS-5/fp/0.25_deg/assim/inst3_3d_asm_Nv'
# For warning from pydap when using HTTPS instead of DAP2 or DAP4:
# pydap is incompatible with DAP data from opendap.nccs.nasa.gov. See issue #736
with warnings.catch_warnings():
warnings.simplefilter("ignore")
ds = pydap.client.open_url(url)

q = (
ds['qv']
.array[
time_ind,
ml_min : (ml_max + 1),
lat_min_ind : (lat_max_ind + 1),
lon_min_ind : (lon_max_ind + 1),
]
.data[0]
)
p = (
ds['pl']
.array[
time_ind, ml_min : (ml_max + 1), lat_min_ind : (lat_max_ind + 1), lon_min_ind : (lon_max_ind + 1)
]
.data[0]
)
t = (
ds['t']
.array[
time_ind, ml_min : (ml_max + 1), lat_min_ind : (lat_max_ind + 1), lon_min_ind : (lon_max_ind + 1)
]
.data[0]
)
h = (
ds['h']
.array[
time_ind, ml_min : (ml_max + 1), lat_min_ind : (lat_max_ind + 1), lon_min_ind : (lon_max_ind + 1)
]
.data[0]
)
ds = xr.open_dataset(url, decode_times=False)

q = ds['qv'][
time_ind,
ml_min : (ml_max + 1),
lat_min_ind : (lat_max_ind + 1),
lon_min_ind : (lon_max_ind + 1),
]
p = ds['pl'][
time_ind,
ml_min : (ml_max + 1),
lat_min_ind : (lat_max_ind + 1),
lon_min_ind : (lon_max_ind + 1),
]
t = ds['t'][
time_ind,
ml_min : (ml_max + 1),
lat_min_ind : (lat_max_ind + 1),
lon_min_ind : (lon_max_ind + 1),
]
h = ds['h'][
time_ind,
ml_min : (ml_max + 1),
lat_min_ind : (lat_max_ind + 1),
lon_min_ind : (lon_max_ind + 1),
]

else:
root = 'https://portal.nccs.nasa.gov/datashare/gmao/geos-fp/das/Y{}/M{:02d}/D{:02d}'
base = f'GEOS.fp.asm.inst3_3d_asm_Nv.{corrected_DT.strftime("%Y%m%d")}_{corrected_DT.hour:02}00.V01.nc4'
URL = f'{root.format(corrected_DT.year, corrected_DT.month, corrected_DT.day)}/{base}'
filename = f'GEOS.fp.asm.inst3_3d_asm_Nv.{corrected_DT.strftime("%Y%m%d")}_{corrected_DT.hour:02}00.V01.nc4'
url = f'{root.format(corrected_DT.year, corrected_DT.month, corrected_DT.day)}/{filename}'
path = Path(f'{out.stem}_raw{out.suffix}')
if not path.exists():
logger.info('Fetching URL: %s', URL)
logger.info('Fetching URL: %s', url)
session = requests_retry_session()
resp = session.get(URL, stream=True)
resp = session.get(url, stream=True)
assert resp.ok, f'Could not access url for datetime: {corrected_DT}'
with path.open('wb') as fout:
shutil.copyfileobj(resp.raw, fout)
Expand All @@ -140,15 +128,21 @@ def _fetch(self, out: Path) -> None:
h = ds['H'][0, :, lat_min_ind : (lat_max_ind + 1), lon_min_ind : (lon_max_ind + 1)]
path.unlink()

lats = np.arange((-90 + lat_min_ind * self._lat_res), (-90 + (lat_max_ind + 1) * self._lat_res), self._lat_res)
lats = np.arange(
-90 + lat_min_ind * self._lat_res,
-90 + (lat_max_ind + 1) * self._lat_res,
self._lat_res,
)
lons = np.arange(
(-180 + lon_min_ind * self._lon_res), (-180 + (lon_max_ind + 1) * self._lon_res), self._lon_res
-180 + lon_min_ind * self._lon_res,
-180 + (lon_max_ind + 1) * self._lon_res,
self._lon_res,
)
lon, lat = np.meshgrid(lons, lats)

try:
# Note that lat/lon gets written twice for GMAO because they are the same as y/x
writeWeatherVarsXarray(lat, lon, h, q, p, t, self._time, self._proj, out)
write_weather_vars_to_ds(lat, lon, h, q, p, t, self._time, self._proj, out)
except:
logger.exception('Unable to save weathermodel to file:')
raise
Expand All @@ -164,7 +158,7 @@ def load_weather(self, f=None) -> None:
self._load_model_level(f)

def _load_model_level(self, filename) -> None:
"""Get the variables from the GMAO link using OpenDAP."""
"""Get the variables from the GMAO link using OPeNDAP."""
# adding the import here should become absolute when transition to netcdf
from netCDF4 import Dataset

Expand Down
Loading