cwerner · deekaey · Apr 18, 2023 · Apr 18, 2023 · Apr 18, 2023 · Apr 18, 2023
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -52,6 +52,7 @@ jobs:
           pytest --junitxml=pytest.xml --cov-report=term-missing:skip-covered --cov=ldndctools tests/ | tee pytest-coverage.txt
 
     - name: Pytest coverage comment
+      continue-on-error: true
       uses: MishaKav/pytest-coverage-comment@main
       if: ${{ matrix.python-version == '3.10' }}
       with:

diff --git a/data/catalog.yml b/data/catalog.yml
@@ -4,6 +4,30 @@ plugins:
     - module: intake_geopandas
 
 sources:
+
+  soil_national:
+    name: 'SOIL national'
+    description: 'Default soil data for ldndctools (site file generation)'
+    driver: netcdf
+    parameters:
+      res:
+        default: 'LR'
+        allowed: ['LR', 'MR', 'HR']
+        description: 'Resolution (LR, MR or HR).'
+        type: str
+    args:
+      urlpath: 'simplecache::s3://ldndcdata/GLOBAL_WISESOIL_S1_{{res}}.nc'
+      storage_options:
+        s3:
+          anon: true
+          default_fill_cache: false
+          client_kwargs:
+            endpoint_url: 'https://s3.imk-ifu.kit.edu:{{port}}'
+            verify: False
+        simplecache:
+          cache_storage: '.cache'
+          same_names: true
+
   soil:
     name: 'SOIL'
     description: 'Default soil data for ldndctools (site file generation)'
@@ -21,7 +45,8 @@ sources:
           anon: true
           default_fill_cache: false
           client_kwargs:
-            endpoint_url: 'https://s3.imk-ifu.kit.edu:8082'
+            endpoint_url: 'https://s3.imk-ifu.kit.edu:{{port}}'
+            verify: False
         simplecache:
           cache_storage: '.cache'
           same_names: true
@@ -39,7 +64,7 @@ sources:
         description: 'Resolution (10, 50 or 110m).'
         type: int
     args:
-      urlpath: 'simplecache::https://www.naturalearthdata.com/http//www.naturalearthdata.com/download/{{scale}}m/cultural/ne_{{scale}}m_admin_0_countries.zip'
+      urlpath: '/Users/kraus-d/projects/ldndctools-kraus/data/ne_50m_admin_0_countries.zip'
       use_fsspec: True
       storage_options:
         simplecache:
@@ -63,7 +88,7 @@ sources:
           anon: true
           default_fill_cache: false
           client_kwargs:
-            endpoint_url: 'https://s3.imk-ifu.kit.edu:8082'
+            endpoint_url: 'https://s3.imk-ifu.kit.edu:10443'
         simplecache:
           cache_storage: '.cache'
           same_names: true
@@ -73,12 +98,11 @@ sources:
     driver: zarr
     args:
       urlpath: 's3://era5land-zarr/data.zarr'
-      consolidated: True
       storage_options:
-        use_ssl: False
+        use_ssl: True
         anon: True
         client_kwargs:
-          endpoint_url: 'https://s3.imk-ifu.kit.edu:8082'
+          endpoint_url: 'https://s3.imk-ifu.kit.edu:10443'
           verify: False
 
 
@@ -100,7 +124,7 @@ sources:
           anon: true
           default_fill_cache: false
           client_kwargs:
-            endpoint_url: 'https://s3.imk-ifu.kit.edu:8082'
+            endpoint_url: 'https://s3.imk-ifu.kit.edu:10443'
         simplecache:
           cache_storage: '.cache'
           same_names: true
diff --git a/data/ldndctools.conf b/data/ldndctools.conf
@@ -4,8 +4,8 @@
 
 # author info
 info:
-    author: Christian Werner
-    email: christian.werner@kit.edu
+    author: David Kraus
+    email: david.kraus@kit.edu
     institution: IMK-IFU, Karlsruhe Institut of Technology, Garmisch-Partenkirchen, Germany
 
 # project info

diff --git a/examples/examples.conf b/examples/examples.conf
@@ -0,0 +1,26 @@
+# default ldndctools.conf file
+#
+#
+
+# author info
+info:
+    author: David Kraus
+    email: david.kraus@kit.edu
+
+# project info
+project:
+    dataset: test
+    version: 0.1
+    source: IMK-IFU, KIT
+
+soil: national
+output: file #stream
+outfile: test_out.xml
+bbox: 105,28,106,29 #[lon1,lat1,lon2,lat2]
+gui: False
+file: None 
+interactive: False
+resolution: 'LR' #HR
+rcode: None
+storeconfig: False
+verbose: False
diff --git a/examples/readme.txt b/examples/readme.txt
@@ -0,0 +1,2 @@
+Run on your terminal:
+dlsc -c examples.conf
diff --git a/ldndctools/cdgen.py b/ldndctools/cdgen.py
@@ -14,7 +14,7 @@
 import pandas as pd
 import urllib3
 import xarray as xr
-from dask.distributed import Client
+from dask.distributed import LocalCluster, Client
 from pydantic import ValidationError
 
 from ldndctools.misc.geohash import coords2geohash_dec
@@ -50,9 +50,22 @@ def subset_climate_data(
 
     with resources.path("data", "catalog.yml") as cat:
         catalog = intake.open_catalog(str(cat))
+        #load the data into a Dask Dataset
         ds = catalog["climate_era5land_hr"].to_dask()
-
+        ds = xr.open_zarr(
+            store="s3://era5land-zarr/data.zarr",
+            consolidated=True,
+            storage_options={
+                "anon": True,
+                "client_kwargs": {
+                    "endpoint_url": "https://s3.imk-ifu.kit.edu:10443",
+                    "verify": False
+                },
+                "use_ssl": True,
+            },
+        )
         if mask is not None:
+            #match the latitude and longitude coordinates of ds
             mask = mask.interp(lat=ds["lat"], lon=ds["lon"])
             ds = ds.where(mask>0)
 
@@ -161,7 +174,7 @@ def inner_func(x, lat, lon):
 def geohash_xr(mask: xr.DataArray) -> xr.DataArray:
     lon_xr = mask.lon.broadcast_like(mask)
     lat_xr = mask.lat.broadcast_like(mask)
-    data = xr.apply_ufunc(inner_func, mask, lat_xr, lon_xr, output_dtypes=[np.int64])
+    data = xr.apply_ufunc(inner_func, mask, lat_xr, lon_xr, output_dtypes=[np.int64], dask="allowed")
     assert data.dtype == np.int64
     return data
 
@@ -249,7 +262,14 @@ def conf():
 
 
 def main():
-    client = Client(dashboard_address=":1234")
+
+    # Create a LocalCluster with 2 workers and set the dashboard address
+    cluster = LocalCluster(n_workers=2, memory_limit="2GB", dashboard_address=":1234")
+
+    # Connect a Client to the LocalCluster
+    client = Client(cluster)
+
+    #client = Client(dashboard_address=":1234")
 
     print(f"NOTE: You can see progress at {platform.node()}:1234 if bokeh is installed")
 
@@ -258,6 +278,7 @@ def main():
     bbox = get_boundingbox(args.bbox)
     mask = get_mask(args.mask)
 
+    print("subset climate")
     # mask = xr.open_dataset("VN_MISC5_V2.nc")["rice_rot"]
     # mask = xr.where(mask > 0, 1, np.nan)
     ds = subset_climate_data(
@@ -268,6 +289,7 @@ def main():
         date_max=args.date_max,
     )
 
+    print("group")
     tavg_year = ds.tavg.groupby("time.year").mean(dim="time").mean(dim="year")
     tamp_year = ds.tavg.groupby("time.year").apply(amplitude).mean(dim="year")
     prec_year = ds.prec.groupby("time.year").sum(dim="time").mean(dim="year")
@@ -307,13 +329,15 @@ def main():
     # match coords (usually means take lat/ lon from ref dataset)
     ds = ds.assign_coords({"lat": stats.lat, "lon": stats.lon})
 
+    print("df stats")
     df_stats = (
         stats.to_dataframe()
         .dropna(subset=["tavg", "tamp", "wind"], how="all")
         .reset_index()
     )
     # ignore prec for now
 
+    print("lookup")
     lookup: Dict[int, ClimateSiteStats] = {}
     for _, row in df_stats.iterrows():
         lookup[int(row["geohash"])] = ClimateSiteStats(

diff --git a/ldndctools/cli/cli.py b/ldndctools/cli/cli.py
@@ -28,7 +28,7 @@ def __call__(self, parser, namespace, values=None, option_string=None):
         handlers = logging.getLogger().handlers
         for handler in handlers:
             if type(handler) is logging.StreamHandler:
-                handler.setLevel(logging.DEBUG)
+                handler.setLevel(logging.INFO)
         setattr(namespace, self.dest, True)
 
 

diff --git a/ldndctools/cli/selector.py b/ldndctools/cli/selector.py
@@ -220,14 +220,18 @@ def ask(self):
 
 
 class CoordinateSelection:
-    def __init__(self, infile, lon_col="lon", lat_col="lat", id_col="ID"):
-        df = pd.read_csv(infile, delim_whitespace=True)
-
-        self.lons = df[lon_col].values
-        self.lats = df[lat_col].values
-        self.ids = (
-            df[id_col].values if id_col in list(df.columns) else range(len(self.lats))
-        )
+
+    def __init__(self, infile=None, lon="lon", lat="lat", cid="ID"):
+        if infile:
+            df = pd.read_csv(infile, delim_whitespace=True)
+
+            self.lons = df[lon].values
+            self.lats = df[lat].values
+            self.ids  = df[cid].values if cid in list(df.columns) else range(len(self.lats))
+        else:
+            self.lons = [lon]
+            self.lats = [lat]
+            self.ids  = [cid]
 
     @property
     def selected(self):