diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 70c9793..556717f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -52,6 +52,7 @@ jobs: pytest --junitxml=pytest.xml --cov-report=term-missing:skip-covered --cov=ldndctools tests/ | tee pytest-coverage.txt - name: Pytest coverage comment + continue-on-error: true uses: MishaKav/pytest-coverage-comment@main if: ${{ matrix.python-version == '3.10' }} with: diff --git a/data/catalog.yml b/data/catalog.yml index fab0c3d..c7a0654 100644 --- a/data/catalog.yml +++ b/data/catalog.yml @@ -4,6 +4,30 @@ plugins: - module: intake_geopandas sources: + + soil_national: + name: 'SOIL national' + description: 'Default soil data for ldndctools (site file generation)' + driver: netcdf + parameters: + res: + default: 'LR' + allowed: ['LR', 'MR', 'HR'] + description: 'Resolution (LR, MR or HR).' + type: str + args: + urlpath: 'simplecache::s3://ldndcdata/GLOBAL_WISESOIL_S1_{{res}}.nc' + storage_options: + s3: + anon: true + default_fill_cache: false + client_kwargs: + endpoint_url: 'https://s3.imk-ifu.kit.edu:{{port}}' + verify: False + simplecache: + cache_storage: '.cache' + same_names: true + soil: name: 'SOIL' description: 'Default soil data for ldndctools (site file generation)' @@ -21,7 +45,8 @@ sources: anon: true default_fill_cache: false client_kwargs: - endpoint_url: 'https://s3.imk-ifu.kit.edu:8082' + endpoint_url: 'https://s3.imk-ifu.kit.edu:{{port}}' + verify: False simplecache: cache_storage: '.cache' same_names: true @@ -39,7 +64,7 @@ sources: description: 'Resolution (10, 50 or 110m).' type: int args: - urlpath: 'simplecache::https://www.naturalearthdata.com/http//www.naturalearthdata.com/download/{{scale}}m/cultural/ne_{{scale}}m_admin_0_countries.zip' + urlpath: '/Users/kraus-d/projects/ldndctools-kraus/data/ne_50m_admin_0_countries.zip' use_fsspec: True storage_options: simplecache: @@ -63,7 +88,7 @@ sources: anon: true default_fill_cache: false client_kwargs: - endpoint_url: 'https://s3.imk-ifu.kit.edu:8082' + endpoint_url: 'https://s3.imk-ifu.kit.edu:10443' simplecache: cache_storage: '.cache' same_names: true @@ -73,12 +98,11 @@ sources: driver: zarr args: urlpath: 's3://era5land-zarr/data.zarr' - consolidated: True storage_options: - use_ssl: False + use_ssl: True anon: True client_kwargs: - endpoint_url: 'https://s3.imk-ifu.kit.edu:8082' + endpoint_url: 'https://s3.imk-ifu.kit.edu:10443' verify: False @@ -100,7 +124,7 @@ sources: anon: true default_fill_cache: false client_kwargs: - endpoint_url: 'https://s3.imk-ifu.kit.edu:8082' + endpoint_url: 'https://s3.imk-ifu.kit.edu:10443' simplecache: cache_storage: '.cache' same_names: true diff --git a/data/ldndctools.conf b/data/ldndctools.conf index 603455b..6557650 100644 --- a/data/ldndctools.conf +++ b/data/ldndctools.conf @@ -4,8 +4,8 @@ # author info info: - author: Christian Werner - email: christian.werner@kit.edu + author: David Kraus + email: david.kraus@kit.edu institution: IMK-IFU, Karlsruhe Institut of Technology, Garmisch-Partenkirchen, Germany # project info diff --git a/examples/examples.conf b/examples/examples.conf new file mode 100644 index 0000000..5090c3b --- /dev/null +++ b/examples/examples.conf @@ -0,0 +1,26 @@ +# default ldndctools.conf file +# +# + +# author info +info: + author: David Kraus + email: david.kraus@kit.edu + +# project info +project: + dataset: test + version: 0.1 + source: IMK-IFU, KIT + +soil: national +output: file #stream +outfile: test_out.xml +bbox: 105,28,106,29 #[lon1,lat1,lon2,lat2] +gui: False +file: None +interactive: False +resolution: 'LR' #HR +rcode: None +storeconfig: False +verbose: False diff --git a/examples/readme.txt b/examples/readme.txt new file mode 100644 index 0000000..c5032aa --- /dev/null +++ b/examples/readme.txt @@ -0,0 +1,2 @@ +Run on your terminal: +dlsc -c examples.conf \ No newline at end of file diff --git a/ldndctools/cdgen.py b/ldndctools/cdgen.py index f9e8430..e87a1a2 100644 --- a/ldndctools/cdgen.py +++ b/ldndctools/cdgen.py @@ -14,7 +14,7 @@ import pandas as pd import urllib3 import xarray as xr -from dask.distributed import Client +from dask.distributed import LocalCluster, Client from pydantic import ValidationError from ldndctools.misc.geohash import coords2geohash_dec @@ -50,9 +50,22 @@ def subset_climate_data( with resources.path("data", "catalog.yml") as cat: catalog = intake.open_catalog(str(cat)) + #load the data into a Dask Dataset ds = catalog["climate_era5land_hr"].to_dask() - + ds = xr.open_zarr( + store="s3://era5land-zarr/data.zarr", + consolidated=True, + storage_options={ + "anon": True, + "client_kwargs": { + "endpoint_url": "https://s3.imk-ifu.kit.edu:10443", + "verify": False + }, + "use_ssl": True, + }, + ) if mask is not None: + #match the latitude and longitude coordinates of ds mask = mask.interp(lat=ds["lat"], lon=ds["lon"]) ds = ds.where(mask>0) @@ -161,7 +174,7 @@ def inner_func(x, lat, lon): def geohash_xr(mask: xr.DataArray) -> xr.DataArray: lon_xr = mask.lon.broadcast_like(mask) lat_xr = mask.lat.broadcast_like(mask) - data = xr.apply_ufunc(inner_func, mask, lat_xr, lon_xr, output_dtypes=[np.int64]) + data = xr.apply_ufunc(inner_func, mask, lat_xr, lon_xr, output_dtypes=[np.int64], dask="allowed") assert data.dtype == np.int64 return data @@ -249,7 +262,14 @@ def conf(): def main(): - client = Client(dashboard_address=":1234") + + # Create a LocalCluster with 2 workers and set the dashboard address + cluster = LocalCluster(n_workers=2, memory_limit="2GB", dashboard_address=":1234") + + # Connect a Client to the LocalCluster + client = Client(cluster) + + #client = Client(dashboard_address=":1234") print(f"NOTE: You can see progress at {platform.node()}:1234 if bokeh is installed") @@ -258,6 +278,7 @@ def main(): bbox = get_boundingbox(args.bbox) mask = get_mask(args.mask) + print("subset climate") # mask = xr.open_dataset("VN_MISC5_V2.nc")["rice_rot"] # mask = xr.where(mask > 0, 1, np.nan) ds = subset_climate_data( @@ -268,6 +289,7 @@ def main(): date_max=args.date_max, ) + print("group") tavg_year = ds.tavg.groupby("time.year").mean(dim="time").mean(dim="year") tamp_year = ds.tavg.groupby("time.year").apply(amplitude).mean(dim="year") prec_year = ds.prec.groupby("time.year").sum(dim="time").mean(dim="year") @@ -307,6 +329,7 @@ def main(): # match coords (usually means take lat/ lon from ref dataset) ds = ds.assign_coords({"lat": stats.lat, "lon": stats.lon}) + print("df stats") df_stats = ( stats.to_dataframe() .dropna(subset=["tavg", "tamp", "wind"], how="all") @@ -314,6 +337,7 @@ def main(): ) # ignore prec for now + print("lookup") lookup: Dict[int, ClimateSiteStats] = {} for _, row in df_stats.iterrows(): lookup[int(row["geohash"])] = ClimateSiteStats( diff --git a/ldndctools/cli/cli.py b/ldndctools/cli/cli.py index c71bf61..4feb956 100644 --- a/ldndctools/cli/cli.py +++ b/ldndctools/cli/cli.py @@ -28,7 +28,7 @@ def __call__(self, parser, namespace, values=None, option_string=None): handlers = logging.getLogger().handlers for handler in handlers: if type(handler) is logging.StreamHandler: - handler.setLevel(logging.DEBUG) + handler.setLevel(logging.INFO) setattr(namespace, self.dest, True) diff --git a/ldndctools/cli/selector.py b/ldndctools/cli/selector.py index 4d125b0..61c31d4 100644 --- a/ldndctools/cli/selector.py +++ b/ldndctools/cli/selector.py @@ -220,14 +220,18 @@ def ask(self): class CoordinateSelection: - def __init__(self, infile, lon_col="lon", lat_col="lat", id_col="ID"): - df = pd.read_csv(infile, delim_whitespace=True) - - self.lons = df[lon_col].values - self.lats = df[lat_col].values - self.ids = ( - df[id_col].values if id_col in list(df.columns) else range(len(self.lats)) - ) + + def __init__(self, infile=None, lon="lon", lat="lat", cid="ID"): + if infile: + df = pd.read_csv(infile, delim_whitespace=True) + + self.lons = df[lon].values + self.lats = df[lat].values + self.ids = df[cid].values if cid in list(df.columns) else range(len(self.lats)) + else: + self.lons = [lon] + self.lats = [lat] + self.ids = [cid] @property def selected(self): diff --git a/ldndctools/dlsc.py b/ldndctools/dlsc.py index d300c3c..ed5528c 100755 --- a/ldndctools/dlsc.py +++ b/ldndctools/dlsc.py @@ -28,29 +28,33 @@ from ldndctools.misc.create_data import create_dataset from ldndctools.misc.types import BoundingBox, RES from ldndctools.sources.soil.soil_iscricwise import ISRICWISE_SoilDataset +from ldndctools.sources.soil.soil_national import NATIONAL_SoilDataset log = logging.getLogger(__name__) - -log.setLevel("DEBUG") +log.setLevel("INFO") NODATA = "-99.99" # TODO: there has to be a better way here... # also, tqdm takes no effect -with resources.path("data", "") as dpath: - DPATH = Path(dpath) +#with resources.path("data", "") as dpath: +# DPATH = Path(dpath) -def main(): +def main( **kwargs): # parse args args = cli() # read config - cfg = get_config(args.config) + if 'config' in kwargs: + cfg = kwargs['config'] + else: + cfg = get_config(args.config) # write config - if args.storeconfig: - set_config(cfg) + #if args.storeconfig: + # set_config(cfg) + # def _get_cfg_item(group, item, save="na"): # return cfg[group].get(item, save) @@ -65,9 +69,12 @@ def main(): # SOURCE=_get_cfg_item("project", "source"), # ) - if (args.rcode is not None) or (args.file is not None): + if False: #(args.rcode is not None) or (args.file is not None): log.info("Non-interactive mode...") cfg["interactive"] = False + else: + pass + #log.info("Interactive mode...") # query environment or command flags for selection (non-interactive mode) args.rcode = os.environ.get("DLSC_REGION", args.rcode) @@ -77,31 +84,37 @@ def main(): log.error(f"Wrong resolution: {args.resolution}. Use HR, MR or LR.") exit(-1) - res = RES[args.resolution] + res = RES[cfg["resolution"]] + bbox = None - if args.bbox: - x1, y1, x2, y2 = [float(x) for x in args.bbox.split(",")] + if ('lat' in cfg) and ('lon' in cfg): + cfg['bbox'] = [float(cfg["lon"]-1.0), float(cfg["lat"]-1.0), + float(cfg["lon"]+1.0), float(cfg["lat"]+1.0)] + log.info("Creating bounding box for coordinates specified.") + + if cfg["bbox"]: + if type( cfg["bbox"]) == list: + x1, y1, x2, y2 = cfg["bbox"] + else: + x1, y1, x2, y2 = [float(x) for x in cfg["bbox"].split(",")] try: bbox = BoundingBox(x1=x1, y1=y1, x2=x2, y2=y2) except ValidationError: - print("Ilegal bounding box coordinates specified.") - print("required: x1,y1,x2,y2 [cond: x1 0: + found = True + delta += 0.1 + if delta > 10: + log.info("No valid data to process for this region/ bbox request.") + exit(1) + + cid = coords2geohash_dec( lat=sel["lat"].values.item(), lon=sel["lon"].values.item()) + site_xml = xmlwriter.write(progressbar=progressbar, status_widget=status_widget, id_selection=[cid]) + else: + site_xml = xmlwriter.write(progressbar=progressbar, status_widget=status_widget) else: + log.info("Incorrect selector.") + exit(1) # WARNING: THIS BRANCH IS DEFUNCT!!! soil.clip_mask_box( minx=min(selector.lons), diff --git a/ldndctools/misc/types.py b/ldndctools/misc/types.py index ced5896..95a00f8 100644 --- a/ldndctools/misc/types.py +++ b/ldndctools/misc/types.py @@ -2,7 +2,7 @@ from enum import Enum from typing import Optional -from pydantic import BaseModel, confloat, conint, root_validator, ValidationError +from pydantic import BaseModel, confloat, conint, model_validator, ValidationError from pydantic.dataclasses import dataclass from pydantic.json import pydantic_encoder @@ -40,17 +40,15 @@ class BoundingBox: y1: confloat(ge=-90, le=90) = -90 y2: confloat(ge=-90, le=90) = 90 - @root_validator(allow_reuse=True) + @model_validator(mode='after') def check_x1_smaller_x2(cls, values): - x1, x2 = values.get("x1"), values.get("x2") - if x1 >= x2: + if values.x1 >= values.x2: raise ValidationError("x1 must be smaller x2") return values - @root_validator(allow_reuse=True) + @model_validator(mode='after') def check_y1_smaller_y2(cls, values): - y1, y2 = values.get("y1"), values.get("y2") - if y1 >= y2: + if values.y1 >= values.y2: raise ValidationError("y1 must be smaller y2") return values @@ -58,7 +56,7 @@ def check_y1_smaller_y2(cls, values): NODATA = -99.99 -# map from isiric-wise fields and units to ldndc +# map from isric-wise fields and units to ldndc # ldndcname, conversion, significant digits nmap = { "TOTC": ("corg", 0.001, 5), @@ -97,7 +95,7 @@ class LayerData(BaseModel): class Config: validate_assignment = True - # @root_validator + # @model_validator # def check_wcmin_smaller_wcmax(cls, values): # wcmin, wcmax = values.get("wcmin"), values.get("wcmax") # if None not in [wcmin, wcmax]: @@ -105,7 +103,7 @@ class Config: # raise ValidationError("wcmin must be smaller wcmax") # return values - @root_validator(allow_reuse=True) + #@model_validator(mode='after') def check_texture_is_plausible(cls, values): sand, silt, clay = values.get("sand"), values.get("silt"), values.get("clay") args = [a for a in [sand, silt, clay] if a is not None] diff --git a/ldndctools/sources/soil/soil_base.py b/ldndctools/sources/soil/soil_base.py index abd112f..c0356c5 100644 --- a/ldndctools/sources/soil/soil_base.py +++ b/ldndctools/sources/soil/soil_base.py @@ -85,10 +85,6 @@ def mask(self) -> Union[xr.DataArray, None]: @property def mask_3d(self) -> xr.DataArray: """return 3d mask to clip soildata""" - lev_max_idx = self.layer_mask.max(skipna=True).astype(int).item() - mask = (self.layer_mask.values >= np.arange(lev_max_idx)[:, None, None]).astype( - int - ) for v in self.original.data_vars: if len(self.original[v].squeeze(drop=True).shape) == 3: @@ -100,6 +96,10 @@ def mask_3d(self) -> xr.DataArray: mask_3d = xr.ones_like( self.original[v].sel(lat=self.layer_mask.lat, lon=self.layer_mask.lon) ) + + lev_max_idx = np.shape(mask_3d)[0] #self.layer_mask.max(skipna=True).astype(int).item() + mask = (self.layer_mask.values >= np.arange(lev_max_idx)[:, None, None]).astype(int) + mask_3d[:] = mask mask_3d = mask_3d.where(mask_3d == 1) return mask_3d diff --git a/requirements.txt b/requirements.txt index 96988a1..d5071bd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,10 +2,10 @@ aiohttp >= 3.8.1 boto3 >= 1.20.49 dask[distributed] >= 2022.2.0 geopandas >= 0.10.2 -git+https://github.com/rasterio/rasterio.git +rasterio #cython >= 0.29.22 #cytoolz >= 0.11.2 -git+https://github.com/Toblerity/Fiona.git # required for 3.10 compat +fiona intake >= 0.6.5 intake-geopandas >= 0.4.0 intake-xarray >= 0.6.0