From 539a742f9e8cc4f8867eb1c3126b71ab5a7c3faa Mon Sep 17 00:00:00 2001 From: dvezinet Date: Wed, 13 Nov 2024 23:09:23 +0000 Subject: [PATCH 01/25] [#159] Moving Bins to datastock --- datastock/_class03_Bins.py | 151 ++++++++ datastock/_class03_binning.py | 359 ++++++++++++++++++++ datastock/_class03_checks.py | 220 ++++++++++++ datastock/{_class3.py => _class04_plots.py} | 0 datastock/_class1_compute.py | 2 +- 5 files changed, 731 insertions(+), 1 deletion(-) create mode 100644 datastock/_class03_Bins.py create mode 100644 datastock/_class03_binning.py create mode 100644 datastock/_class03_checks.py rename datastock/{_class3.py => _class04_plots.py} (100%) diff --git a/datastock/_class03_Bins.py b/datastock/_class03_Bins.py new file mode 100644 index 0000000..aad4d2f --- /dev/null +++ b/datastock/_class03_Bins.py @@ -0,0 +1,151 @@ +# -*- coding: utf-8 -*- + + +# Built-in +import copy + + +# Common +import numpy as np + + +# local +from ._class02_BSplines2D import BSplines2D as Previous +from . import _class03_checks as _checks +from . import _class03_binning as _binning + + +__all__ = ['Bins'] + + +# ############################################################################# +# ############################################################################# +# +# ############################################################################# + + +class Bins(Previous): + + _which_bins = 'bins' + _ddef = copy.deepcopy(Previous._ddef) + _dshow = dict(Previous._dshow) + + _dshow.update({ + _which_bins: [ + 'nd', + 'cents', + 'shape', + 'ref', + ], + }) + + # ----------------- + # bsplines + # ------------------ + + def add_bins( + self, + key=None, + edges=None, + # custom names + key_ref=None, + key_cents=None, + key_res=None, + # attributes + **kwdargs, + ): + """ Add bin """ + + # -------------- + # check inputs + + key, dref, ddata, dobj = _checks.check( + coll=self, + key=key, + edges=edges, + # custom names + key_cents=key_cents, + key_ref=key_ref, + # attributes + **kwdargs, + ) + + # -------------- + # update dict and crop if relevant + + self.update(dobj=dobj, ddata=ddata, dref=dref) + + def remove_bins( + self, + key=None, + propagate=None, + ): + + _checks.remove_bins( + coll=self, + key=key, + propagate=propagate, + ) + + # ----------------- + # binning tools + # ------------------ + + def binning( + self, + data=None, + data_units=None, + axis=None, + # binning + bins0=None, + bins1=None, + bin_data0=None, + bin_data1=None, + bin_units0=None, + # kind of binning + integrate=None, + statistic=None, + # options + safety_ratio=None, + dref_vector=None, + verb=None, + returnas=None, + # storing + store=None, + store_keys=None, + ): + """ Bin data along ref_key + + Binning is treated here as an integral + Hence, if: + - the data has units [ph/eV] + - the ref_key has units [eV] + - the binned data has units [ph] + + return a dict with data and units per key + + """ + + return _binning.binning( + coll=self, + data=data, + data_units=data_units, + axis=axis, + # binning + bins0=bins0, + bins1=bins1, + bin_data0=bin_data0, + bin_data1=bin_data1, + bin_units0=bin_units0, + # kind of binning + integrate=integrate, + statistic=statistic, + # options + safety_ratio=safety_ratio, + dref_vector=dref_vector, + verb=verb, + returnas=returnas, + # storing + store=store, + store_keys=store_keys, + ) \ No newline at end of file diff --git a/datastock/_class03_binning.py b/datastock/_class03_binning.py new file mode 100644 index 0000000..20256e4 --- /dev/null +++ b/datastock/_class03_binning.py @@ -0,0 +1,359 @@ +# -*- coding: utf-8 -*- +""" +Created on Thu Jan 5 20:14:40 2023 + +@author: dvezinet +""" + + +import warnings + + +import numpy as np +import datastock as ds + + +# ############################################################ +# ############################################################ +# interpolate spectral +# ############################################################ + + +def binning( + coll=None, + data=None, + data_units=None, + axis=None, + # binning + bins0=None, + bins1=None, + bin_data0=None, + bin_data1=None, + bin_units0=None, + # kind of binning + integrate=None, + statistic=None, + # options + safety_ratio=None, + dref_vector=None, + ref_vector_strategy=None, + verb=None, + returnas=None, + # storing + store=None, + store_keys=None, +): + """ Return the spectrally interpolated coefs + + Either E xor Ebins can be provided + - E: return interpolated coefs + - Ebins: return binned (integrated) coefs + """ + + # ---------- + # checks + + # keys + isbs, bin_data0 = _check_bs( + coll=coll, + bin_data0=bin_data0, + bin_data1=bin_data1, + ) + + # ---------- + # trivial + + nobin = False + if isbs: + + # add ref and data + kr, kd, ddatan, nobin = _interpolate( + coll=coll, + data=data, + data_units=data_units, + # binning + bins0=bins0, + bin_data0=bin_data0, + # options + dref_vector=dref_vector, + verb=verb, + store=store, + store_keys=store_keys, + ) + + # safety check + if nobin is False: + lk = list(ddatan.keys()) + data = [ddatan[k0]['data'] for k0 in lk] + bin_data0 = [ddatan[k0]['bin_data'] for k0 in lk] + + # -------------------- + # do the actua binning + + if nobin is False: + dout = ds._class1_binning.binning( + coll=coll, + data=data, + data_units=data_units, + axis=axis, + # binning + bins0=bins0, + bins1=bins1, + bin_data0=bin_data0, + bin_data1=bin_data1, + bin_units0=bin_units0, + # kind of binning + integrate=integrate, + statistic=statistic, + # options + safety_ratio=safety_ratio, + dref_vector=dref_vector, + ref_vector_strategy=ref_vector_strategy, + verb=verb, + returnas=True, + # storing + store=store, + store_keys=store_keys, + ) + + # -------------------------------- + # remove intermediate ref and data + + if isbs is True: + for dd in data + bin_data0 + [kd]: + if dd in coll.ddata.keys(): + coll.remove_data(dd) + if kr in coll.dref.keys(): + coll.remove_ref(kr) + + for k0 in data: + k1 = [k1 for k1, v1 in ddatan.items() if v1['data'] == k0][0] + dout[k1] = dict(dout[k0]) + del dout[k0] + else: + dout = nobin + + # ---------- + # return + + if returnas is True: + return dout + + +# ###################################################### +# ###################################################### +# check +# ###################################################### + + +def _check_bs( + coll=None, + bin_data0=None, + bin_data1=None, +): + + wbs = coll._which_bsplines + lok_bs = [ + k0 for k0, v0 in coll.dobj.get(wbs, {}).items() + if len(v0['ref']) == 1 + ] + lok_dbs = [ + k0 for k0, v0 in coll.ddata.items() + if v0.get(wbs) is not None + and len(v0[wbs]) == 1 + and v0[wbs][0] in coll.dobj.get(wbs, {}).keys() + and len(coll.dobj[wbs][v0[wbs][0]]['ref']) == 1 + ] + + c0 = ( + isinstance(bin_data0, str) + and bin_data1 is None + and bin_data0 in lok_dbs + lok_bs + ) + + if bin_data0 in lok_bs: + bin_data0 = coll.dobj[wbs][bin_data0]['apex'][0] + + return c0, bin_data0 + + +# ###################################################### +# ###################################################### +# interpolate +# ###################################################### + + +def _interpolate( + coll=None, + data=None, + data_units=None, + # binning + bins0=None, + bin_data0=None, + # options + dref_vector=None, + verb=None, + store=None, + store_keys=None, +): + + # --------- + # sampling + + # mesh knots + wm = coll._which_mesh + wbs = coll._which_bsplines + key_bs = coll.ddata[bin_data0][wbs][0] + keym = coll.dobj[wbs][key_bs][wm] + kknots = coll.dobj[wm][keym]['knots'][0] + + # resolution + vect = coll.ddata[kknots]['data'] + res0 = np.abs(np.min(np.diff(vect))) + + # --------- + # sampling + + ddata = ds._class1_binning._check_data( + coll=coll, + data=data, + data_units=data_units, + store=True, + ) + lkdata = list(ddata.keys()) + + # -------------------- + # bins + + dbins0 = ds._class1_binning._check_bins( + coll=coll, + lkdata=lkdata, + bins=bins0, + dref_vector=dref_vector, + store=store, + ) + + # ---------------------- + # npts for interpolation + + dv = np.abs(np.diff(vect)) + dvmean = np.mean(dv) + np.std(dv) + db = np.mean(np.diff(dbins0[lkdata[0]]['edges'])) + npts = (coll.dobj[wbs][key_bs]['deg'] + 3) * max(1, dvmean / db) + 3 + + # sample mesh, update dv + Dx0 = [dbins0[lkdata[0]]['edges'][0], dbins0[lkdata[0]]['edges'][-1]] + xx = coll.get_sample_mesh( + keym, + res=res0 / npts, + mode='abs', + Dx0=Dx0, + )['x0']['data'] + + if xx.size == 0: + nobins = _get_nobins( + coll=coll, + key_bs=key_bs, + ddata=ddata, + dbins0=dbins0, + store=store, + store_keys=store_keys, + ) + return None, None, None, nobins + + # ------------------- + # add ref + + kr = "ntemp" + kd = "xxtemp" + + coll.add_ref(kr, size=xx.size) + coll.add_data(kd, data=xx, ref=kr, units=coll.ddata[kknots]['units']) + + ddata_new = {} + for ii, (k0, v0) in enumerate(ddata.items()): + + # interpolate bin_data + kbdn = f"kbdn{ii}_temp" + # try: + coll.interpolate( + keys=bin_data0, + ref_key=key_bs, + x0=kd, + val_out=0., + returnas=False, + store=True, + inplace=True, + store_keys=kbdn, + ) + + # except Exception as err: + # msg = ( + # err.args[0] + # + "\n\n" + # f"\t- k0 = {k0}\n" + # f"\t- ii = {ii}\n" + # f"\t- bin_data0 = {bin_data0}\n" + # f"\t- key_bs = {key_bs}\n" + # f"\t- kd = {kd}\n" + # f"\t- xx.size: {xx.size}\n" + # f"\t- kbdn = {kbdn}\n" + # ) + # err.args = (msg,) + # raise err + + # interpolate_data + kdn = f"kbd{ii}_temp" + coll.interpolate( + keys=k0, + ref_key=key_bs, + x0=kd, + val_out=0., + returnas=False, + store=True, + inplace=True, + store_keys=kdn, + ) + ddata_new[k0] = {'bin_data': kbdn, 'data': kdn} + + return kr, kd, ddata_new, False + + +def _get_nobins( + coll=None, + key_bs=None, + ddata=None, + dbins0=None, + store=None, + store_keys=None, +): + + lk = list(ddata.keys()) + wbs = coll._which_bsplines + + if isinstance(store_keys, str): + store_keys = [store_keys] + + dout = {} + for ii, k0 in enumerate(lk): + + axis = ddata[k0]['ref'].index(coll.dobj[wbs][key_bs]['ref'][0]) + + shape = list(ddata[k0]['data'].shape) + nb = dbins0[k0]['edges'].size - 1 + shape[axis] = nb + + ref = list(ddata[k0]['ref']) + ref[axis] = dbins0[k0]['bin_ref'][0] + + dout[store_keys[ii]] = { + 'data': np.zeros(shape, dtype=float), + 'ref': tuple(ref), + 'units': ddata[k0]['units'], + } + + if store is True: + for k0, v0 in dout.items(): + coll.add_data(key=k0, **v0) + + return dout \ No newline at end of file diff --git a/datastock/_class03_checks.py b/datastock/_class03_checks.py new file mode 100644 index 0000000..4e38fe8 --- /dev/null +++ b/datastock/_class03_checks.py @@ -0,0 +1,220 @@ +# -*- coding: utf-8 -*- + + +# Common +import numpy as np +import datastock as ds + + +# ############################################################################# +# ############################################################################# +# bins generic check +# ############################################################################# + + +def check( + coll=None, + key=None, + edges=None, + # custom names + key_cents=None, + key_ref=None, + # additional attributes + **kwdargs, +): + + # -------- + # keys + + # key + key = ds._generic_check._obj_key( + d0=coll._dobj.get(coll._which_bins, {}), + short='b', + key=key, + ) + + # ------------ + # edges + + edges = ds._generic_check._check_flat1darray( + edges, 'edges', + dtype=float, + unique=True, + can_be_None=False, + ) + + nb = edges.size - 1 + cents = 0.5*(edges[:-1] + edges[1:]) + + # -------------------- + # safety check on keys + + # key_ref + defk = f"{key}_nb" + lout = [k0 for k0, v0 in coll.dref.items() if v0['size'] != nb] + key_ref = ds._generic_check._check_var( + key_ref, 'key_ref', + types=str, + default=defk, + excluded=lout, + ) + + # key_cents + defk = f"{key}_c" + lout = [ + k0 for k0, v0 in coll.ddata.items() + if not ( + v0['shape'] == (nb,) + and key_ref in coll.dref.keys() + and v0['ref'] == (key_ref,) + and v0['monot'] == (True,) + ) + ] + key_cents = ds._generic_check._check_var( + key_cents, 'key_cents', + types=str, + default=defk, + excluded=lout, + ) + + # -------------- + # to dict + + dref, ddata, dobj = _to_dict( + coll=coll, + key=key, + edges=edges, + nb=nb, + cents=cents, + # custom names + key_cents=key_cents, + key_ref=key_ref, + # attributes + **kwdargs, + ) + + return key, dref, ddata, dobj + + +# ############################################################## +# ############################################################### +# to_dict +# ############################################################### + + +def _to_dict( + coll=None, + key=None, + edges=None, + nb=None, + cents=None, + # custom names + key_cents=None, + key_ref=None, + # additional attributes + **kwdargs, +): + + # attributes + latt = ['dim', 'quant', 'name', 'units'] + dim, quant, name, units = [kwdargs.get(ss) for ss in latt] + + # ------------- + # prepare dict + + # dref + if key_ref not in coll.dref.keys(): + dref = { + key_ref: { + 'size': nb, + }, + } + else: + dref = None + + # ddata + if key_cents not in coll.ddata.keys(): + ddata = { + key_cents: { + 'data': cents, + 'units': units, + # 'source': None, + 'dim': dim, + 'quant': quant, + 'name': name, + 'ref': key_ref, + }, + } + else: + ddata = None + + # dobj + dobj = { + coll._which_bins: { + key: { + 'nd': '1d', + 'edges': edges, + 'cents': (key_cents,), + 'ref': (key_ref,), + 'shape': (nb,), + }, + }, + } + + # additional attributes + for k0, v0 in kwdargs.items(): + if k0 not in latt: + dobj[coll._which_bins][key][k0] = v0 + + return dref, ddata, dobj + + +# ############################################################## +# ############################################################### +# remove bins +# ############################################################### + + +def remove_bins(coll=None, key=None, propagate=None): + + # ---------- + # check + + # key + wbins = coll._which_bins + if wbins not in coll.dobj.keys(): + return + + if isinstance(key, str): + key = [key] + key = ds._generic_check._check_var_iter( + key, 'key', + types=(list, tuple), + types_iter=str, + allowed=coll.dobj.get(wbins, {}).keys(), + ) + + # propagate + propagate = ds._generic_check._check_var( + propagate, 'propagate', + types=bool, + default=True, + ) + + # --------- + # remove + + for k0 in key: + + # specific data + kdata = list(coll.dobj[wbins][k0]['cents']) + coll.remove_data(kdata, propagate=propagate) + + # specific ref + lref = list(coll.dobj[wbins][k0]['ref']) + for rr in lref: + if rr in coll.dref.keys(): + coll.remove_ref(rr, propagate=propagate) + + # obj + coll.remove_obj(which=wbins, key=k0, propagate=propagate) diff --git a/datastock/_class3.py b/datastock/_class04_plots.py similarity index 100% rename from datastock/_class3.py rename to datastock/_class04_plots.py diff --git a/datastock/_class1_compute.py b/datastock/_class1_compute.py index a137eea..a204115 100644 --- a/datastock/_class1_compute.py +++ b/datastock/_class1_compute.py @@ -1233,4 +1233,4 @@ def _extract_select( # lkey=[idq2dR], # return_all=True, # ) - # return out \ No newline at end of file + # return out From fd94263ffa37f3d28fe31283e82da04006a4004a Mon Sep 17 00:00:00 2001 From: dvezinet Date: Wed, 13 Nov 2024 23:16:22 +0000 Subject: [PATCH 02/25] [#159] Renaming ongoing --- datastock/_class03_Bins.py | 4 ++-- ....py => _class04_plot_BvsA_as_distribution.py} | 0 ...lot_as_array.py => _class04_plot_as_array.py} | 0 ...lines.py => _class04_plot_as_mobile_lines.py} | 0 ...rofile1d.py => _class04_plot_as_profile1d.py} | 0 ...elations.py => _class04_plot_correlations.py} | 0 datastock/_class04_plots.py | 16 ++++++++-------- 7 files changed, 10 insertions(+), 10 deletions(-) rename datastock/{_plot_BvsA_as_distribution.py => _class04_plot_BvsA_as_distribution.py} (100%) rename datastock/{_plot_as_array.py => _class04_plot_as_array.py} (100%) rename datastock/{_plot_as_mobile_lines.py => _class04_plot_as_mobile_lines.py} (100%) rename datastock/{_plot_as_profile1d.py => _class04_plot_as_profile1d.py} (100%) rename datastock/{_plot_correlations.py => _class04_plot_correlations.py} (100%) diff --git a/datastock/_class03_Bins.py b/datastock/_class03_Bins.py index aad4d2f..b119376 100644 --- a/datastock/_class03_Bins.py +++ b/datastock/_class03_Bins.py @@ -10,7 +10,7 @@ # local -from ._class02_BSplines2D import BSplines2D as Previous +from ._class2 import DataStock2 as Previous from . import _class03_checks as _checks from . import _class03_binning as _binning @@ -148,4 +148,4 @@ def binning( # storing store=store, store_keys=store_keys, - ) \ No newline at end of file + ) diff --git a/datastock/_plot_BvsA_as_distribution.py b/datastock/_class04_plot_BvsA_as_distribution.py similarity index 100% rename from datastock/_plot_BvsA_as_distribution.py rename to datastock/_class04_plot_BvsA_as_distribution.py diff --git a/datastock/_plot_as_array.py b/datastock/_class04_plot_as_array.py similarity index 100% rename from datastock/_plot_as_array.py rename to datastock/_class04_plot_as_array.py diff --git a/datastock/_plot_as_mobile_lines.py b/datastock/_class04_plot_as_mobile_lines.py similarity index 100% rename from datastock/_plot_as_mobile_lines.py rename to datastock/_class04_plot_as_mobile_lines.py diff --git a/datastock/_plot_as_profile1d.py b/datastock/_class04_plot_as_profile1d.py similarity index 100% rename from datastock/_plot_as_profile1d.py rename to datastock/_class04_plot_as_profile1d.py diff --git a/datastock/_plot_correlations.py b/datastock/_class04_plot_correlations.py similarity index 100% rename from datastock/_plot_correlations.py rename to datastock/_class04_plot_correlations.py diff --git a/datastock/_class04_plots.py b/datastock/_class04_plots.py index 34fe7f2..4511ec7 100644 --- a/datastock/_class04_plots.py +++ b/datastock/_class04_plots.py @@ -1,14 +1,14 @@ -from ._class2 import * -from . import _plot_as_array -from . import _plot_as_profile1d -from . import _plot_as_mobile_lines -from . import _plot_correlations -from . import _plot_BvsA_as_distribution +from ._class03_Bins import Bins as Previous +from . import _class04_plot_as_array as _plot_as_array +from . import _class04_plot_as_profile1d as _plot_as_profile1d +from . import _class04_plot_as_mobile_lines as _plot_as_mobile_lines +from . import _class04_plot_correlations as _plot_correlations +from . import _class04_plot_BvsA_as_distribution as _plot_BvsA_as_distribution -class DataStock3(DataStock2): +class Plots(Previous): """ Provide default interactive plots """ # ------------------- @@ -424,4 +424,4 @@ def plot_BvsA_as_distribution( __all__ = [ sorted([k0 for k0 in locals() if k0.startswith('DataStock')])[-1] -] \ No newline at end of file +] From 115cefe3b7caea5797159bfecb843c496fcc5dca Mon Sep 17 00:00:00 2001 From: dvezinet Date: Wed, 13 Nov 2024 23:17:20 +0000 Subject: [PATCH 03/25] [#159] Renaming again --- ...ution_check.py => _class04_plot_BvsA_as_distribution_check.py} | 0 datastock/{_plot_as_array_1d.py => _class04_plot_as_array_1d.py} | 0 .../{_plot_as_array_234d.py => _class04_plot_as_array_234d.py} | 0 datastock/{_plot_text.py => _class04_plot_text.py} | 0 4 files changed, 0 insertions(+), 0 deletions(-) rename datastock/{_plot_BvsA_as_distribution_check.py => _class04_plot_BvsA_as_distribution_check.py} (100%) rename datastock/{_plot_as_array_1d.py => _class04_plot_as_array_1d.py} (100%) rename datastock/{_plot_as_array_234d.py => _class04_plot_as_array_234d.py} (100%) rename datastock/{_plot_text.py => _class04_plot_text.py} (100%) diff --git a/datastock/_plot_BvsA_as_distribution_check.py b/datastock/_class04_plot_BvsA_as_distribution_check.py similarity index 100% rename from datastock/_plot_BvsA_as_distribution_check.py rename to datastock/_class04_plot_BvsA_as_distribution_check.py diff --git a/datastock/_plot_as_array_1d.py b/datastock/_class04_plot_as_array_1d.py similarity index 100% rename from datastock/_plot_as_array_1d.py rename to datastock/_class04_plot_as_array_1d.py diff --git a/datastock/_plot_as_array_234d.py b/datastock/_class04_plot_as_array_234d.py similarity index 100% rename from datastock/_plot_as_array_234d.py rename to datastock/_class04_plot_as_array_234d.py diff --git a/datastock/_plot_text.py b/datastock/_class04_plot_text.py similarity index 100% rename from datastock/_plot_text.py rename to datastock/_class04_plot_text.py From b54517034def071e464b7c64d530ad9bedce4baa Mon Sep 17 00:00:00 2001 From: dvezinet Date: Wed, 13 Nov 2024 23:45:57 +0000 Subject: [PATCH 04/25] [#159] Renaming ongoing --- datastock/{_class1.py => _class01.py} | 30 +++++++++---------- ..._class1_binning.py => _class01_binning.py} | 0 .../{_class1_check.py => _class01_check.py} | 0 ..._class1_compute.py => _class01_compute.py} | 0 .../{_class1_domain.py => _class01_domain.py} | 0 ...interpolate.py => _class01_interpolate.py} | 0 .../{_class1_show.py => _class01_show.py} | 0 ...1_uniformize.py => _class01_uniformize.py} | 0 datastock/_class03_Bins.py | 2 +- .../{_class04_plots.py => _class04_Plots.py} | 0 .../_class04_plot_BvsA_as_distribution.py | 8 ++--- datastock/_class04_plot_as_array.py | 8 ++--- datastock/_class04_plot_as_array_1d.py | 4 +-- datastock/_class04_plot_as_array_234d.py | 6 ++-- datastock/_class04_plot_as_mobile_lines.py | 14 ++++----- datastock/_class04_plot_as_profile1d.py | 12 ++++---- datastock/_class04_plot_correlations.py | 6 ++-- 17 files changed, 45 insertions(+), 45 deletions(-) rename datastock/{_class1.py => _class01.py} (97%) rename datastock/{_class1_binning.py => _class01_binning.py} (100%) rename datastock/{_class1_check.py => _class01_check.py} (100%) rename datastock/{_class1_compute.py => _class01_compute.py} (100%) rename datastock/{_class1_domain.py => _class01_domain.py} (100%) rename datastock/{_class1_interpolate.py => _class01_interpolate.py} (100%) rename datastock/{_class1_show.py => _class01_show.py} (100%) rename datastock/{_class1_uniformize.py => _class01_uniformize.py} (100%) rename datastock/{_class04_plots.py => _class04_Plots.py} (100%) diff --git a/datastock/_class1.py b/datastock/_class01.py similarity index 97% rename from datastock/_class1.py rename to datastock/_class01.py index 27e2304..0b94700 100644 --- a/datastock/_class1.py +++ b/datastock/_class01.py @@ -16,11 +16,11 @@ from . import _class1_check from . import _class1_show from ._class0 import * -from . import _class1_compute -from . import _class1_domain -from . import _class1_binning -from . import _class1_interpolate -from . import _class1_uniformize +from . import _class01_compute as _compute +from . import _class01_domain as _domain +from . import _class01_binning as _binning +from . import _class01_interpolate as _interpolate +from . import _class01_uniformize as _uniformize from . import _export_dataframe from . import _find_plateau @@ -427,7 +427,7 @@ def propagate_indices_per_ref( - 'index': set matching indices (default) - param: set matching monotonous quantities depending on ref """ - _class1_compute.propagate_indices_per_ref( + _compute.propagate_indices_per_ref( ref=ref, lref=lref, ldata=ldata, @@ -470,7 +470,7 @@ def extract( """ - return _class1_compute._extract_instance( + return _compute._extract_instance( self, keys=keys, # optional includes @@ -657,7 +657,7 @@ def get_ref_vector( """ - return _class1_uniformize.get_ref_vector( + return _uniformize.get_ref_vector( # ressources ddata=self._ddata, dref=self._dref, @@ -712,7 +712,7 @@ def get_ref_vector_common( """ - return _class1_uniformize.get_ref_vector_common( + return _uniformize.get_ref_vector_common( # ressources ddata=self._ddata, dref=self._dref, @@ -750,7 +750,7 @@ def uniformize( returnas=None, ): - return _class1_uniformize.uniformize( + return _uniformize.uniformize( coll=self, keys=keys, refs=refs, @@ -770,7 +770,7 @@ def get_domain_ref( """ Return a dict of index of valid steps based on desired domain """ - return _class1_domain.domain_ref(coll=self, domain=domain) + return _domain.domain_ref(coll=self, domain=domain) # --------------------- # Binning @@ -841,7 +841,7 @@ def binning( """ - return _class1_binning.binning( + return _binning.binning( coll=self, data=data, data_units=data_units, @@ -897,7 +897,7 @@ def interpolate( """ Interpolate keys in desired dimension """ - return _class1_interpolate.interpolate( + return _interpolate.interpolate( coll=self, # interpolation base keys=keys, @@ -935,7 +935,7 @@ def compute_correlations( verb=None, returnas=None, ): - return _class1_compute.correlations( + return _compute.correlations( data=data, ref=ref, correlations=correlations, @@ -1052,4 +1052,4 @@ def show_links(self): __all__ = [ sorted([k0 for k0 in locals() if k0.startswith('DataStock')])[-1] -] \ No newline at end of file +] diff --git a/datastock/_class1_binning.py b/datastock/_class01_binning.py similarity index 100% rename from datastock/_class1_binning.py rename to datastock/_class01_binning.py diff --git a/datastock/_class1_check.py b/datastock/_class01_check.py similarity index 100% rename from datastock/_class1_check.py rename to datastock/_class01_check.py diff --git a/datastock/_class1_compute.py b/datastock/_class01_compute.py similarity index 100% rename from datastock/_class1_compute.py rename to datastock/_class01_compute.py diff --git a/datastock/_class1_domain.py b/datastock/_class01_domain.py similarity index 100% rename from datastock/_class1_domain.py rename to datastock/_class01_domain.py diff --git a/datastock/_class1_interpolate.py b/datastock/_class01_interpolate.py similarity index 100% rename from datastock/_class1_interpolate.py rename to datastock/_class01_interpolate.py diff --git a/datastock/_class1_show.py b/datastock/_class01_show.py similarity index 100% rename from datastock/_class1_show.py rename to datastock/_class01_show.py diff --git a/datastock/_class1_uniformize.py b/datastock/_class01_uniformize.py similarity index 100% rename from datastock/_class1_uniformize.py rename to datastock/_class01_uniformize.py diff --git a/datastock/_class03_Bins.py b/datastock/_class03_Bins.py index b119376..b1c0ab1 100644 --- a/datastock/_class03_Bins.py +++ b/datastock/_class03_Bins.py @@ -10,7 +10,7 @@ # local -from ._class2 import DataStock2 as Previous +from ._class02 import DataStock2 as Previous from . import _class03_checks as _checks from . import _class03_binning as _binning diff --git a/datastock/_class04_plots.py b/datastock/_class04_Plots.py similarity index 100% rename from datastock/_class04_plots.py rename to datastock/_class04_Plots.py diff --git a/datastock/_class04_plot_BvsA_as_distribution.py b/datastock/_class04_plot_BvsA_as_distribution.py index 89a0723..1a37e02 100644 --- a/datastock/_class04_plot_BvsA_as_distribution.py +++ b/datastock/_class04_plot_BvsA_as_distribution.py @@ -16,9 +16,9 @@ # library-specific from . import _generic_check -from . import _plot_BvsA_as_distribution_check -from . import _plot_text -from . import _class1_compute +from . import _class04_plot_BvsA_as_distribution_check as _plot_BvsA_as_distribution_check +from . import _class04_plot_text as _plot_text +from . import _class01_compute __all__ = ['plot_BvsA_as_distribution'] @@ -187,7 +187,7 @@ def plot_BvsA_as_distribution( if ndim == 1: sli = lambda ind: ind else: - sli = _class1_compute._get_slice(laxis=[1-axis], ndim=2) + sli = _class01_compute._get_slice(laxis=[1-axis], ndim=2) # -------------- # Prepare data diff --git a/datastock/_class04_plot_as_array.py b/datastock/_class04_plot_as_array.py index 4b6c33f..7b86427 100644 --- a/datastock/_class04_plot_as_array.py +++ b/datastock/_class04_plot_as_array.py @@ -8,10 +8,10 @@ # library-specific from . import _generic_check -from . import _class1_compute +from . import _class01_compute from . import _generic_utils_plot as _uplot -from . import _plot_as_array_1d -from . import _plot_as_array_234d +from . import _class04_plot_as_array_1d as _plot_as_array_1d +from . import _class04_plot_as_array_234d as _plot_as_array_234d __all__ = ['plot_as_array'] @@ -869,7 +869,7 @@ def get_data_str(dk=None, coll2=None, key=None, ndim=None, dscale=None): dk[k1]['axis'] for k1 in lorder if k1 != k0 and dk[k1]['key'] is not None ] - dk[k0]['sli'] = _class1_compute._get_slice( + dk[k0]['sli'] = _class01_compute._get_slice( laxis=laxis, ndim=ndim, ) diff --git a/datastock/_class04_plot_as_array_1d.py b/datastock/_class04_plot_as_array_1d.py index 1b757c5..543a4a6 100644 --- a/datastock/_class04_plot_as_array_1d.py +++ b/datastock/_class04_plot_as_array_1d.py @@ -9,7 +9,7 @@ # library-specific from . import _generic_check -from . import _plot_text +from . import _class04_plot_text as _plot_text # ############################################################# @@ -295,4 +295,4 @@ def _label_axes( ax.set_xticks([]) ax.set_yticks([]) - return dax \ No newline at end of file + return dax diff --git a/datastock/_class04_plot_as_array_234d.py b/datastock/_class04_plot_as_array_234d.py index ecfaf15..e505af3 100644 --- a/datastock/_class04_plot_as_array_234d.py +++ b/datastock/_class04_plot_as_array_234d.py @@ -9,8 +9,8 @@ # library-specific from . import _generic_check -from . import _class1_compute -from . import _plot_text +from . import _class01_compute +from . import _class04_plot_text as _plot_text # ############################################################# @@ -82,7 +82,7 @@ def sliZ2(*args): elif ndim >= 3: # here slice X => slice in dim Y and vice-versa - sliZ2 = _class1_compute._get_slice( + sliZ2 = _class01_compute._get_slice( laxis=[dkeys[ss]['axis'] for ss in lorder], ndim=ndim, ) diff --git a/datastock/_class04_plot_as_mobile_lines.py b/datastock/_class04_plot_as_mobile_lines.py index c60cf4a..e5859a7 100644 --- a/datastock/_class04_plot_as_mobile_lines.py +++ b/datastock/_class04_plot_as_mobile_lines.py @@ -10,9 +10,9 @@ # library-specific from . import _generic_check -from . import _plot_text -from . import _class1_compute -from ._plot_as_array import _check_keyXYZ +from . import _class04_plot_text as _plot_text +from . import _class01_compute +from ._class04_plot_as_array import _check_keyXYZ from ._generic_utils_plot import _get_str_datadlab @@ -445,7 +445,7 @@ def _plot_as_mobile_lines2d( # prepare slicing # here slice X and Y alike => slice in dim Y and vice-versa - sli = _class1_compute._get_slice(laxis=[axisch], ndim=2) + sli = _class01_compute._get_slice(laxis=[axisch], ndim=2) # -------------- # plot - prepare @@ -688,7 +688,7 @@ def _plot_as_mobile_lines3d( # reshape into (nt, nch*(npts+1)) ntot = nch*(npts + 1) order = 'C' if axisch < axispts else 'F' - slibck = _class1_compute._get_slice(laxis=[axist], ndim=3) + slibck = _class01_compute._get_slice(laxis=[axist], ndim=3) bckx = np.array([ bckx[slibck(ii)].reshape((ntot,), order=order) for ii in range(nt) @@ -723,7 +723,7 @@ def _plot_as_mobile_lines3d( # prepare slicing # here slice X and Y alike => slice in dim Y and vice-versa - sli = _class1_compute._get_slice(laxis=[axist, axisch], ndim=3) + sli = _class01_compute._get_slice(laxis=[axist, axisch], ndim=3) # -------------- # plot - prepare @@ -962,4 +962,4 @@ def _plot_as_mobile_lines3d( bstr_dict=bstr_dict, ) - return coll, dax, dgroup \ No newline at end of file + return coll, dax, dgroup diff --git a/datastock/_class04_plot_as_profile1d.py b/datastock/_class04_plot_as_profile1d.py index df70e1e..f17b454 100644 --- a/datastock/_class04_plot_as_profile1d.py +++ b/datastock/_class04_plot_as_profile1d.py @@ -10,9 +10,9 @@ # library-specific from . import _generic_check -from . import _plot_text -from . import _class1_compute -from ._plot_as_array import _check_keyXYZ +from . import _class04_plot_text as _plot_text +from . import _class01_compute +from ._class04_plot_as_array import _check_keyXYZ from ._generic_utils_plot import _get_str_datadlab @@ -595,8 +595,8 @@ def _plot_as_profile1d( # prepare slicing # here slice X => slice in dim Y and vice-versa - slit = _class1_compute._get_slice(laxis=[1-axist], ndim=2) - sliX = _class1_compute._get_slice(laxis=[1-axisX], ndim=2) + slit = _class01_compute._get_slice(laxis=[1-axist], ndim=2) + sliX = _class01_compute._get_slice(laxis=[1-axisX], ndim=2) sliXt = _get_sliceXt(laxis=[axist], ndim=dataX.ndim) # -------------- @@ -971,4 +971,4 @@ def _plot_as_profile1d( bstr_dict=bstr_dict, ) - return coll, dax, dgroup \ No newline at end of file + return coll, dax, dgroup diff --git a/datastock/_class04_plot_correlations.py b/datastock/_class04_plot_correlations.py index d19be76..70eee1d 100644 --- a/datastock/_class04_plot_correlations.py +++ b/datastock/_class04_plot_correlations.py @@ -15,9 +15,9 @@ # library-specific from . import _generic_check -from . import _plot_BvsA_as_distribution_check -from . import _plot_text -from . import _class2_interactivity +from . import _class04_plot_BvsA_as_distribution_check as _plot_BvsA_as_distribution_check +from . import _class04_plot_text as _plot_text +from . import _class02_interactivity __all__ = ['plot_correlations'] From 06ee0a79c67d7fe192d3a26945fb4eea776ad15b Mon Sep 17 00:00:00 2001 From: dvezinet Date: Wed, 13 Nov 2024 23:53:59 +0000 Subject: [PATCH 05/25] [#159] Renaming almost done --- datastock/__init__.py | 4 +- datastock/_class.py | 3 -- datastock/{_class0.py => _class00.py} | 0 datastock/_class01.py | 38 +++++++++---------- datastock/{_class2.py => _class02.py} | 0 ...ractivity.py => _class02_interactivity.py} | 0 6 files changed, 21 insertions(+), 24 deletions(-) delete mode 100644 datastock/_class.py rename datastock/{_class0.py => _class00.py} (100%) rename datastock/{_class2.py => _class02.py} (100%) rename datastock/{_class2_interactivity.py => _class02_interactivity.py} (100%) diff --git a/datastock/__init__.py b/datastock/__init__.py index f3a72d8..1d81fdb 100644 --- a/datastock/__init__.py +++ b/datastock/__init__.py @@ -4,7 +4,7 @@ from . import _generic_check from ._generic_utils_plot import * -from ._class import DataStock +from ._class04 import Plots as Collection from ._saveload import load, get_files from ._direct_calls import * -from . import tests \ No newline at end of file +from . import tests diff --git a/datastock/_class.py b/datastock/_class.py deleted file mode 100644 index 77a9235..0000000 --- a/datastock/_class.py +++ /dev/null @@ -1,3 +0,0 @@ - - -from ._class3 import DataStock3 as DataStock diff --git a/datastock/_class0.py b/datastock/_class00.py similarity index 100% rename from datastock/_class0.py rename to datastock/_class00.py diff --git a/datastock/_class01.py b/datastock/_class01.py index 0b94700..80cce2b 100644 --- a/datastock/_class01.py +++ b/datastock/_class01.py @@ -11,11 +11,11 @@ # library-specific +from ._class00 import DataStock0 as Previous from . import _generic_check from . import _generic_utils -from . import _class1_check -from . import _class1_show -from ._class0 import * +from . import _class01_check as _check +from . import _class01_show as _show from . import _class01_compute as _compute from . import _class01_domain as _domain from . import _class01_binning as _binning @@ -32,7 +32,7 @@ ############################################# -class DataStock1(DataStock0): +class DataStock1(Previous): """ A generic class for handling data Provides methods for: @@ -117,7 +117,7 @@ def update( # Check consistency ( self._dref, self._ddata, self._dobj, self.__dlinks, - ) = _class1_check._consistency( + ) = _check._consistency( dobj=dobj, dobj0=self._dobj, ddata=ddata, ddata0=self._ddata, dref=dref, dref0=self._dref, @@ -173,7 +173,7 @@ def remove_ref(self, key=None, propagate=None): """ Remove a ref (or list of refs) and all associated data """ ( self._dref, self._ddata, self._dobj, self.__dlinks, - ) = _class1_check._remove_ref( + ) = _check._remove_ref( key=key, dref0=self._dref, ddata0=self._ddata, dobj0=self._dobj, @@ -189,7 +189,7 @@ def remove_data(self, key=None, propagate=True): """ Remove a data (or list of data) """ ( self._dref, self._ddata, self._dobj, self.__dlinks, - ) = _class1_check._remove_data( + ) = _check._remove_data( key=key, dref0=self._dref, ddata0=self._ddata, dobj0=self._dobj, @@ -205,7 +205,7 @@ def remove_obj(self, key=None, which=None, propagate=True): """ Remove a data (or list of data) """ ( self._dref, self._ddata, self._dobj, self.__dlinks, - ) = _class1_check._remove_obj( + ) = _check._remove_obj( key=key, which=which, propagate=propagate, @@ -247,7 +247,7 @@ def remove_all(self, excluded=None): def __check_which(self, which=None, return_dict=None): """ Check which in ['data'] + list(self._dobj.keys() """ - return _class1_check._check_which( + return _check._check_which( dref=self._dref, ddata=self._ddata, dobj=self._dobj, @@ -266,7 +266,7 @@ def get_lparam(self, which=None, for_show=None): which, dd = self.__check_which(which, return_dict=True) if which in ['ref', 'data']: for_show = False - return _class1_show._get_lparam(dd=dd, for_show=for_show) + return _show._get_lparam(dd=dd, for_show=for_show) def get_param( self, @@ -291,7 +291,7 @@ def get_param( """ which, dd = self.__check_which(which, return_dict=True) - return _class1_check._get_param( + return _check._get_param( dd=dd, dd_name=which, param=param, key=key, ind=ind, returnas=returnas, ) @@ -322,7 +322,7 @@ def set_param( """ which, dd = self.__check_which(which, return_dict=True) - param = _class1_check._set_param( + param = _check._set_param( dd=dd, dd_name=which, param=param, value=value, ind=ind, key=key, distribute=distribute, @@ -340,7 +340,7 @@ def add_param( ): """ Add a parameter, optionnally also set its value """ which, dd = self.__check_which(which, return_dict=True) - param = _class1_check._add_param( + param = _check._add_param( dd=dd, dd_name=which, param=param, @@ -358,7 +358,7 @@ def remove_param( ): """ Remove a parameter, none by default, all if param = 'all' """ which, dd = self.__check_which(which, return_dict=True) - _class1_check._remove_param( + _check._remove_param( dd=dd, dd_name=which, param=param, @@ -529,7 +529,7 @@ def select(self, which=None, log=None, returnas=None, **kwdargs): """ which, dd = self.__check_which(which, return_dict=True) - return _class1_check._select( + return _check._select( dd=dd, dd_name=which, log=log, returnas=returnas, **kwdargs, @@ -544,7 +544,7 @@ def _ind_tofrom_key( ): """ Return ind from key or key from ind for all data """ which, dd = self.__check_which(which, return_dict=True) - return _class1_check._ind_tofrom_key( + return _check._ind_tofrom_key( dd=dd, dd_name=which, ind=ind, key=key, returnas=returnas, ) @@ -963,7 +963,7 @@ def show( returnas=False, ): """ Summary description of the object content """ - return _class1_show.main( + return _show.main( coll=self, show_which=show_which, show=show, @@ -978,7 +978,7 @@ def show( ) def _get_show_obj(self, which=None): - return _class1_show._show_obj_def + return _show._show_obj_def def show_data(self): self.show(show_which=['ref', 'data']) @@ -1003,7 +1003,7 @@ def show_details( returnas=False, ): """ Summary description of the object content """ - return _class1_show.main_details( + return _show.main_details( coll=self, which=which, key=key, diff --git a/datastock/_class2.py b/datastock/_class02.py similarity index 100% rename from datastock/_class2.py rename to datastock/_class02.py diff --git a/datastock/_class2_interactivity.py b/datastock/_class02_interactivity.py similarity index 100% rename from datastock/_class2_interactivity.py rename to datastock/_class02_interactivity.py From c0f68f2be5be843fa6ea510b3da0326419af4902 Mon Sep 17 00:00:00 2001 From: dvezinet Date: Thu, 14 Nov 2024 12:21:17 +0000 Subject: [PATCH 06/25] [#159] Renaming continued --- datastock/__init__.py | 5 +++- datastock/_class02.py | 39 ++++++++++++++--------------- datastock/_class02_interactivity.py | 8 +++--- datastock/_class03_Bins.py | 1 + datastock/_class03_binning.py | 2 +- 5 files changed, 29 insertions(+), 26 deletions(-) diff --git a/datastock/__init__.py b/datastock/__init__.py index 1d81fdb..28029a9 100644 --- a/datastock/__init__.py +++ b/datastock/__init__.py @@ -1,10 +1,13 @@ +from . import _class1_compute +from . import _class1_compute +from . import _class1_compute from .version import __version__ from . import _generic_check from ._generic_utils_plot import * -from ._class04 import Plots as Collection +from ._class04_Plots import Plots as Collection from ._saveload import load, get_files from ._direct_calls import * from . import tests diff --git a/datastock/_class02.py b/datastock/_class02.py index 8d08e05..1c98e1b 100644 --- a/datastock/_class02.py +++ b/datastock/_class02.py @@ -13,9 +13,8 @@ from . import _generic_check -from ._class1 import * -from . import _class2_interactivity -from . import _class1_compute +from ._class01 import DataStock1 as Previous +from . import _class02_interactivity as _interactivity # ################################################################# @@ -24,7 +23,7 @@ # ################################################################# -class DataStock2(DataStock1): +class DataStock2(Previous): """ Handles matplotlib interactivity """ _LPAXES = ['axes', 'type'] @@ -311,7 +310,7 @@ def dinteractivity(self): # ------------------ def show_commands(self, verb=None, returnas=None): - return _class2_interactivity.show_commands( + return _interactivity.show_commands( verb=verb, returnas=returnas, ) @@ -363,7 +362,7 @@ def setup_interactivity( # ---------- # Check dgroup - dgroup, newgroup = _class2_interactivity._setup_dgroup( + dgroup, newgroup = _interactivity._setup_dgroup( dgroup=dgroup, dobj0=self._dobj, dref0=self._dref, @@ -372,7 +371,7 @@ def setup_interactivity( # ---------- # Check increment dict - dinc, newinc = _class2_interactivity._setup_dinc( + dinc, newinc = _interactivity._setup_dinc( dinc=dinc, lparam_ref=self.get_lparam(which='ref'), dref0=self._dref, @@ -381,7 +380,7 @@ def setup_interactivity( # ---------------------------------------------------------- # make sure all refs are known and are associated to a group - drefgroup = _class2_interactivity._setup_drefgroup( + drefgroup = _interactivity._setup_drefgroup( dref0=self._dref, dgroup=dgroup, ) @@ -451,7 +450,7 @@ def setup_interactivity( # -------------------------- # update mobile with group, group_vis and func - _class2_interactivity._setup_mobile( + _interactivity._setup_mobile( dmobile=self._dobj['mobile'], dref=self._dref, ddata=self._ddata, @@ -486,7 +485,7 @@ def setup_interactivity( # --------- # dkeys - dkeys = _class2_interactivity._setup_keys(dkeys=dkeys, dgroup=dgroup) + dkeys = _interactivity._setup_keys(dkeys=dkeys, dgroup=dgroup) # implement dict for ii, (k0, v0) in enumerate(dkeys.items()): @@ -532,7 +531,7 @@ def setup_interactivity( **dinter, ) - _class2_interactivity._set_dbck( + _interactivity._set_dbck( lax=self._dobj['axes'].keys(), daxes=self._dobj['axes'], dcanvas=self._dobj['canvas'], @@ -904,7 +903,7 @@ def _update_mobiles(self, lmobiles=None): # ---- update data of group objects ---- 0.15 s for k0 in lmobiles: - _class2_interactivity._update_mobile( + _interactivity._update_mobile( dmobile=self._dobj['mobile'], dref=self._dref, ddata=self._ddata, @@ -941,7 +940,7 @@ def _update_mobiles(self, lmobiles=None): # ---------------------- def resize(self, event): - _class2_interactivity._set_dbck( + _interactivity._set_dbck( lax=self._dobj['axes'].keys(), daxes=self._dobj['axes'], dcanvas=self._dobj['canvas'], @@ -955,7 +954,7 @@ def new_home(self, *args): v0['handle'].manager.toolbar.__class__, v0['handle'].manager.toolbar, ).home(*args) - _class2_interactivity._set_dbck( + _interactivity._set_dbck( lax=self._dobj['axes'].keys(), daxes=self._dobj['axes'], dcanvas=self._dobj['canvas'], @@ -1014,7 +1013,7 @@ def mouseclic(self, event): gax += self._dobj['axes'][kax]['groupy'] for gg in set([cur_groupx, cur_groupy]): if gg is not None and gg in gax: - out = _class2_interactivity._update_indices_nb( + out = _interactivity._update_indices_nb( group=gg, dgroup=self._dobj['group'], ctrl=ctrl, @@ -1055,7 +1054,7 @@ def mouseclic(self, event): and cur_refx in self._dobj['axes'][kax]['refx'] ) if c0x: - ix = _class2_interactivity._get_ix_for_refx_only_1or2d( + ix = _interactivity._get_ix_for_refx_only_1or2d( cur_data=cur_datax, cur_ref=cur_refx, eventdata=event.xdata, @@ -1072,7 +1071,7 @@ def mouseclic(self, event): and cur_refy in self._dobj['axes'][kax]['refy'] ) if c0y: - iy = _class2_interactivity._get_ix_for_refx_only_1or2d( + iy = _interactivity._get_ix_for_refx_only_1or2d( cur_data=cur_datay, cur_ref=cur_refy, eventdata=event.ydata, @@ -1142,7 +1141,7 @@ def mouserelease(self, event): ][0] for ax in lax ] - _class2_interactivity._set_dbck( + _interactivity._set_dbck( lax=lax, daxes=self._dobj['axes'], dcanvas=self._dobj['canvas'], @@ -1339,7 +1338,7 @@ def onkeypress(self, event): return # update nb of visible indices - out = _class2_interactivity._update_indices_nb( + out = _interactivity._update_indices_nb( group=group, dgroup=self._dobj['group'], ctrl=ctrl, @@ -1393,4 +1392,4 @@ def on_close(self, event): __all__ = [ sorted([k0 for k0 in locals() if k0.startswith('DataStock')])[-1] -] \ No newline at end of file +] diff --git a/datastock/_class02_interactivity.py b/datastock/_class02_interactivity.py index 9cc62e0..35c7c26 100644 --- a/datastock/_class02_interactivity.py +++ b/datastock/_class02_interactivity.py @@ -9,7 +9,7 @@ from . import _generic_check from . import _generic_utils -from . import _class1_compute +from . import _class01_compute _INCREMENTS = [1, 10] @@ -256,7 +256,7 @@ def _setup_mobile( # functions for slicing dmobile[k0]['func_slice'] = [ - _class1_compute._get_slice( + _class01_compute._get_slice( laxis=dmobile[k0]['axis'][ii], ndim=( 1 if dmobile[k0]['data'][ii] == 'index' @@ -442,7 +442,7 @@ def _get_ix_for_refx_only_1or2d( raise NotImplementedError() # get index of datax corresponding to clicked point - return _class1_compute._get_index_from_data( + return _class01_compute._get_index_from_data( data=cd, data_pick=np.r_[eventdata], monot=monot, @@ -588,4 +588,4 @@ def _update_mobile(k0=None, dmobile=None, dref=None, ddata=None): # ddata[dmobile[k0]['data'][ii]]['data'][ # dmobile[k0]['func_slice'][ii](iref[ii]) # ] - # ) \ No newline at end of file + # ) diff --git a/datastock/_class03_Bins.py b/datastock/_class03_Bins.py index b1c0ab1..355e368 100644 --- a/datastock/_class03_Bins.py +++ b/datastock/_class03_Bins.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +from . import _class1_compute # Built-in diff --git a/datastock/_class03_binning.py b/datastock/_class03_binning.py index 20256e4..9685f06 100644 --- a/datastock/_class03_binning.py +++ b/datastock/_class03_binning.py @@ -356,4 +356,4 @@ def _get_nobins( for k0, v0 in dout.items(): coll.add_data(key=k0, **v0) - return dout \ No newline at end of file + return dout From 1c202e2a1fadda0d379f8cf6a349e3e284f7b081 Mon Sep 17 00:00:00 2001 From: dvezinet Date: Thu, 14 Nov 2024 12:23:14 +0000 Subject: [PATCH 07/25] [#159] Small fixes --- datastock/__init__.py | 5 ----- datastock/_class01_domain.py | 2 +- datastock/_class03_Bins.py | 1 - 3 files changed, 1 insertion(+), 7 deletions(-) diff --git a/datastock/__init__.py b/datastock/__init__.py index 28029a9..7d99b7d 100644 --- a/datastock/__init__.py +++ b/datastock/__init__.py @@ -1,8 +1,3 @@ - -from . import _class1_compute -from . import _class1_compute -from . import _class1_compute - from .version import __version__ from . import _generic_check diff --git a/datastock/_class01_domain.py b/datastock/_class01_domain.py index d483833..de3b74b 100644 --- a/datastock/_class01_domain.py +++ b/datastock/_class01_domain.py @@ -244,4 +244,4 @@ def _set_ind_from_domain( ind = ind_in & (~ind_out) - return ind \ No newline at end of file + return ind diff --git a/datastock/_class03_Bins.py b/datastock/_class03_Bins.py index 355e368..b1c0ab1 100644 --- a/datastock/_class03_Bins.py +++ b/datastock/_class03_Bins.py @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- -from . import _class1_compute # Built-in From cb3b6234d50018ebbc82cc2709c9a6d841818193 Mon Sep 17 00:00:00 2001 From: dvezinet Date: Thu, 14 Nov 2024 14:24:31 +0000 Subject: [PATCH 08/25] [#159] Renaming continued --- datastock/_class04_Plots.py | 11 ----------- datastock/_direct_calls.py | 6 +++--- 2 files changed, 3 insertions(+), 14 deletions(-) diff --git a/datastock/_class04_Plots.py b/datastock/_class04_Plots.py index 4511ec7..c6f7632 100644 --- a/datastock/_class04_Plots.py +++ b/datastock/_class04_Plots.py @@ -414,14 +414,3 @@ def plot_BvsA_as_distribution( # figsize=figsize, dmargin=dmargin, # wintit=wintit, tit=tit, # ) - - -# ############################################################################# -# ############################################################################# -# set __all__ -# ############################################################################# - - -__all__ = [ - sorted([k0 for k0 in locals() if k0.startswith('DataStock')])[-1] -] diff --git a/datastock/_direct_calls.py b/datastock/_direct_calls.py index eadc214..d5be8fc 100644 --- a/datastock/_direct_calls.py +++ b/datastock/_direct_calls.py @@ -4,7 +4,7 @@ # library-specific -from ._class import DataStock +from ._class04_Plots import Plots as Collection __all__ = [ @@ -39,7 +39,7 @@ def plot_as_array(data=None): # --------------------- # Instanciate datastock - st = DataStock() + st = Collection() st.add_data(key='data', data=data) return st.plot_as_array(inplace=True) @@ -67,7 +67,7 @@ def plot_BvsA_as_distribution(dataA=None, dataB=None): # --------------------- # Instanciate datastock - st = DataStock() + st = Collection() st.add_data(key='dataA', data=dataA) st.add_data(key='dataB', data=dataB) From d41fa02882fae7880d251c475bd03a1f7c941761 Mon Sep 17 00:00:00 2001 From: dvezinet Date: Thu, 14 Nov 2024 14:28:18 +0000 Subject: [PATCH 09/25] [#159] Renaming in tests --- datastock/tests/test_01_DataStock.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/datastock/tests/test_01_DataStock.py b/datastock/tests/test_01_DataStock.py index 0ebd01e..15f1d6a 100644 --- a/datastock/tests/test_01_DataStock.py +++ b/datastock/tests/test_01_DataStock.py @@ -14,7 +14,7 @@ import matplotlib.pyplot as plt # datastock-specific -from .._class import DataStock +from .._class04_Plots import Plots as Collection from .._saveload import load @@ -210,7 +210,7 @@ class Test01_Instanciate(): @classmethod def setup_class(cls): - cls.st = DataStock() + cls.st = Collection() cls.nc = 5 cls.nx = 80 cls.lnt = [100, 90, 80, 120, 80] @@ -240,7 +240,7 @@ class Test02_Manipulate(): @classmethod def setup_class(cls): - cls.st = DataStock() + cls.st = Collection() cls.nc = 5 cls.nx = 80 cls.lnt = [100, 90, 80, 120, 80] @@ -609,4 +609,4 @@ def test23_saveload(self, verb=False): msg = st2.__eq__(self.st, returnas=str) if msg is not True: raise Exception(msg) - os.remove(pfe) \ No newline at end of file + os.remove(pfe) From b255f15cb9ecc5d8dd2d1cdc23662ee98f8364db Mon Sep 17 00:00:00 2001 From: dvezinet Date: Thu, 14 Nov 2024 15:45:00 +0000 Subject: [PATCH 10/25] [#159] Renaming in tests --- datastock/tests/test_01_DataStock.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/datastock/tests/test_01_DataStock.py b/datastock/tests/test_01_DataStock.py index 15f1d6a..f1f82f7 100644 --- a/datastock/tests/test_01_DataStock.py +++ b/datastock/tests/test_01_DataStock.py @@ -6,7 +6,6 @@ # Built-in import os -import warnings # Standard @@ -70,7 +69,10 @@ def _add_data(st=None, nc=None, nx=None, lnt=None): ne = np.logspace(15, 21, 11) Te = np.logspace(1, 5, 21) - pec = np.exp(-(ne[:, None] - 1e18)**2/1e5**2 - (Te[None, :] - 5e3)**2/3e3**2) + pec = np.exp( + -(ne[:, None] - 1e18)**2/1e5**2 + - (Te[None, :] - 5e3)**2/3e3**2 + ) lt = [np.linspace(1, 10, nt) for nt in lnt] lprof = [(1 + np.cos(t)[:, None]) * x[None, :] for t in lt] @@ -392,9 +394,10 @@ def test09_binning(self): shape.insert(ax[0], nb) if dout[k0]['data'].shape != tuple(shape): + shstr = dout[k0]['data'].shape msg = ( "Mismatching shapes for case {ii}!\n" - f"\t- dout['{k0}']['data'].shape = {dout[k0]['data'].shape}\n" + f"\t- dout['{k0}']['data'].shape = {shstr}\n" f"\t- expected: {tuple(shape)}" ) raise Exception(msg) @@ -416,7 +419,7 @@ def test10_interpolate(self): zipall = zip(lk, lref, lax, llog, lgrid, lx0, lx1, ldom) for ii, (kk, rr, aa, lg, gg, x0, x1, dom) in enumerate(zipall): - domain = self.st.get_domain_ref(domain=dom) + _ = self.st.get_domain_ref(domain=dom) dout = self.st.interpolate( keys=kk, @@ -490,7 +493,7 @@ def test11_interpolate_common_refs(self): assert isinstance(dout[kk]['data'], np.ndarray) if not (dout[kk]['data'].shape == ss and dout[kk]['ref'] == ri): - lstr = [f'\t- {k0}: {v0}' for k0, v0 in dparams.items()] + # lstr = [f'\t- {k0}: {v0}' for k0, v0 in dparams.items()] msg = ( "Wrong interpolation shape / ref:\n" f"\t- ii: {ii}\n" @@ -512,7 +515,6 @@ def test11_interpolate_common_refs(self): ) raise Exception(msg) - # Not tested: float, store=True, inplace # ------------------------ @@ -553,7 +555,9 @@ def test17_plot_as_array_4d(self): del dax # def test18_plot_BvsA_as_distribution(self): - # dax = self.st.plot_BvsA_as_distribution(keyA='prof0', keyB='prof0-bis') + # dax = self.st.plot_BvsA_as_distribution( + # keyA='prof0', keyB='prof0-bis', + # ) # plt.close('all') # del dax From 7e7f71deed472a3dd178ba4e4cd878657c8885a0 Mon Sep 17 00:00:00 2001 From: dvezinet Date: Thu, 14 Nov 2024 16:04:17 +0000 Subject: [PATCH 11/25] [#159] Cleanup --- datastock/_class02.py | 110 +++++++++++++++++++++++------------------- 1 file changed, 60 insertions(+), 50 deletions(-) diff --git a/datastock/_class02.py b/datastock/_class02.py index 1c98e1b..ce2fb98 100644 --- a/datastock/_class02.py +++ b/datastock/_class02.py @@ -149,7 +149,7 @@ def add_mobile( else: msg = ( f"In dmobile['{key}']:\n" - "Nb. of different dtypes must match nb of different data!\n" + "Nb. of diff. dtypes must match nb of diff. data!\n" f"\t- dtype: {dtype}\n" f"\t- data: {data}\n" ) @@ -192,15 +192,15 @@ def add_axes( # check refx, refy # if refx is None and refy is None: - # msg = f"Please provide at least refx or refy for axes {key}!" - # raise Exception(msg) + # msg = f"Please provide at least refx or refy for axes {key}!" + # raise Exception(msg) if isinstance(refx, str): refx = [refx] if isinstance(refy, str): refy = [refy] - c0 =( + c0 = ( isinstance(refx, list) and all([rr in self._dref.keys() for rr in refx]) ) @@ -208,7 +208,7 @@ def add_axes( msg = "Arg refx must be a list of valid ref keys!" raise Exception(msg) - c0 =( + c0 = ( isinstance(refy, list) and all([rr in self._dref.keys() for rr in refy]) ) @@ -388,8 +388,9 @@ def setup_interactivity( # add indices to ref for k0, v0 in self._dref.items(): if drefgroup[k0] is not None: + zeros = np.zeros((dgroup[drefgroup[k0]]['nmax'],), dtype=int) self.add_indices_per_ref( - indices=np.zeros((dgroup[drefgroup[k0]]['nmax'],), dtype=int), + indices=zeros, ref=k0, distribute=False, ) @@ -459,7 +460,7 @@ def setup_interactivity( # -------------------- # axes mobile, refs and canvas - daxcan = dict.fromkeys(self._dobj['axes'].keys()) + # daxcan = dict.fromkeys(self._dobj['axes'].keys()) for k0, v0 in self._dobj['axes'].items(): # Update mobile @@ -599,43 +600,44 @@ def connect(self): if self._warn_ifnotInteractive(): return for k0, v0 in self._dobj['canvas'].items(): - keyp = v0['handle'].mpl_connect('key_press_event', self.onkeypress) - keyr = v0['handle'].mpl_connect('key_release_event', self.onkeypress) - butp = v0['handle'].mpl_connect('button_press_event', self.mouseclic) - res = v0['handle'].mpl_connect('resize_event', self.resize) - butr = v0['handle'].mpl_connect('button_release_event', self.mouserelease) - close = v0['handle'].mpl_connect('close_event', self.on_close) - draw = v0['handle'].mpl_connect('draw_event', self.on_draw) + hand = v0['handle'] + keyp = hand.mpl_connect('key_press_event', self.onkeypress) + keyr = hand.mpl_connect('key_release_event', self.onkeypress) + butp = hand.mpl_connect('button_press_event', self.mouseclic) + res = hand.mpl_connect('resize_event', self.resize) + butr = hand.mpl_connect('button_release_event', self.mouserelease) + close = hand.mpl_connect('close_event', self.on_close) + # draw = hand.mpl_connect('draw_event', self.on_draw) # Make sure resizing is doen before resize_event # works without re-initializing because not a Qt Action - v0['handle'].manager.toolbar.release = self.mouserelease + hand.manager.toolbar.release = self.mouserelease # v0['handle'].manager.toolbar.release_zoom = self.mouserelease # v0['handle'].manager.toolbar.release_pan = self.mouserelease # make sure home button triggers background update # requires re-initializing because home is a Qt Action # only created by toolbar.addAction() - v0['handle'].manager.toolbar.home = self.new_home + hand.manager.toolbar.home = self.new_home # if _init_toolbar() implemented (matplotlib > ) error = False - if hasattr(v0['handle'].manager.toolbar, '_init_toolbar'): + if hasattr(hand.manager.toolbar, '_init_toolbar'): try: - v0['handle'].manager.toolbar._init_toolbar() + hand.manager.toolbar._init_toolbar() except NotImplementedError: - v0['handle'].manager.toolbar.__init__( - v0['handle'], - v0['handle'].parent(), + hand.manager.toolbar.__init__( + hand, + hand.parent(), ) except Exception as err: error = err - elif hasattr(v0['handle'], 'parent'): + elif hasattr(hand, 'parent'): try: - v0['handle'].manager.toolbar.__init__( - v0['handle'], - v0['handle'].parent(), + hand.manager.toolbar.__init__( + hand, + hand.parent(), ) - except Exception as err: + except Exception: error = True else: error = True @@ -643,9 +645,8 @@ def connect(self): if error is not False: import platform import sys - import inspect - lstr0 = [f"\t- {k1}" for k1 in dir(v0['handle'])] - lstr1 = [f"\t- {k1}" for k1 in dir(v0['handle'].manager.toolbar)] + lstr0 = [f"\t- {k1}" for k1 in dir(hand)] + lstr1 = [f"\t- {k1}" for k1 in dir(hand.manager.toolbar)] msg = ( f"platform: {platform.platform()}\n" f"python: {sys.version}\n" @@ -656,7 +657,7 @@ def connect(self): + "\n".join(lstr1) ) if error is not True: - msg += '\n' + str(err) + msg += '\n' + str(error) warnings.warn(msg) self._dobj['canvas'][k0]['cid'] = { @@ -725,8 +726,8 @@ def _get_current_grouprefdata_from_kax(self, kax=None): # Get current group and ref groupx = self._dobj['axes'][kax]['groupx'] groupy = self._dobj['axes'][kax]['groupy'] - refx = self._dobj['axes'][kax]['refx'] - refy = self._dobj['axes'][kax]['refy'] + # refx = self._dobj['axes'][kax]['refx'] + # refy = self._dobj['axes'][kax]['refy'] # Get kinter kinter = list(self._dobj['interactivity'].keys())[0] @@ -801,7 +802,7 @@ def _getset_current_axref(self, event): types=str, allowed=lkax, ) - ax = self._dobj['axes'][kax]['handle'] + # ax = self._dobj['axes'][kax]['handle'] # Check axes is relevant and toolbar not active lc = [ @@ -830,8 +831,8 @@ def update_interactivity(self): cur_groupy = self._dobj['interactivity'][self.kinter]['cur_groupy'] cur_refx = self._dobj['interactivity'][self.kinter]['cur_refx'] cur_refy = self._dobj['interactivity'][self.kinter]['cur_refy'] - cur_datax = self._dobj['interactivity'][self.kinter]['cur_datax'] - cur_datay = self._dobj['interactivity'][self.kinter]['cur_datay'] + # cur_datax = self._dobj['interactivity'][self.kinter]['cur_datax'] + # cur_datay = self._dobj['interactivity'][self.kinter]['cur_datay'] # Propagate indices through refs if cur_refx is not None: @@ -870,7 +871,7 @@ def update_interactivity(self): ]) ] - self._update_mobiles(lmobiles=lmobiles) # 0.2 s + self._update_mobiles(lmobiles=lmobiles) # 0.2 s if self.debug: self.show_debug() @@ -912,20 +913,21 @@ def _update_mobiles(self, lmobiles=None): # --- Redraw all objects (due to background restore) --- 25 ms for k0, v0 in self._dobj['mobile'].items(): - v0['handle'].set_visible(v0['visible']) + hand = v0['handle'] + hand.set_visible(v0['visible']) try: - self._dobj['axes'][v0['axes']]['handle'].draw_artist(v0['handle']) + self._dobj['axes'][v0['axes']]['handle'].draw_artist(hand) except Exception as err: print() print(0, k0) # DB print(1, v0['axes']) # DB print(2, self._dobj['axes'][v0['axes']]['handle']) # DB - print(3, v0['handle']) # DB + print(3, hand) # DB print( 4, 'x and y data shapes: ', - [vv.shape for vv in v0['handle'].get_data()] + [vv.shape for vv in hand.get_data()] ) # DB - print(5, 'data: ', v0['handle'].get_data()) + print(5, 'data: ', hand.get_data()) print(err) # DB print() # DB @@ -1003,7 +1005,9 @@ def mouseclic(self, event): cur_datay = self._dobj['interactivity'][kinter]['cur_datay'] shift = self._dobj['key']['shift']['val'] - ctrl = any([self._dobj['key'][ss]['val'] for ss in ['control', 'ctrl']]) + ctrl = any([ + self._dobj['key'][ss]['val'] for ss in ['control', 'ctrl'] + ]) # Update number of indices (for visibility) gax = [] @@ -1119,15 +1123,15 @@ def mouserelease(self, event): if v0['handle'] == event.inaxes.figure.canvas ][0] mode = self._dobj['canvas'][can]['handle'].manager.toolbar.mode.lower() - c0 = 'pan' in mode + c0 = 'pan' in mode c1 = 'zoom' in mode if c0 or c1: kax = self._dobj['interactivity'][self.kinter]['cur_ax_panzoom'] if kax is None: msg = ( - "Make sure you release the mouse button on an axes !" - "\n Otherwise the background plot cannot be properly updated !" + "Make sure you release the mouse button on an axes!" + "\n Otherwise background plot can't be properly updated!" ) raise Exception(msg) ax = self._dobj['axes'][kax]['handle'] @@ -1192,7 +1196,7 @@ def onkeypress(self, event): ln = np.r_[ngen, nmov, ngrp, nind] if np.any(ln > 1) or np.sum(ln) > 2: return - if np.sum(ln) == 2 and (ngrp == 1 or nind ==1 ): + if np.sum(ln) == 2 and (ngrp == 1 or nind == 1): return # only keep relevant keys @@ -1221,7 +1225,7 @@ def onkeypress(self, event): # group group = self._dobj['key'][event.key]['group'] cx = any([ - v0['groupx'] is not None and group in v0['groupx'] + v0['groupx'] is not None and group in v0['groupx'] for v0 in self._dobj['axes'].values() ]) if cx: @@ -1275,7 +1279,10 @@ def onkeypress(self, event): imax = self._dobj['group'][groupx]['nmaxcur'] ii = int(event.key) if ii > imax: - msg = "Set to current max index for group '{groupx}': {imax}" + msg = ( + f"Set to current max index for group '{groupx}':" + f" {imax}" + ) print(msg) ii = min(ii, imax) self._dobj['group'][groupx]['indcur'] = ii @@ -1285,7 +1292,10 @@ def onkeypress(self, event): imax = self._dobj['group'][groupy]['nmaxcur'] ii = int(event.key) if ii > imax: - msg = "Set to current max index for group '{groupy}': {imax}" + msg = ( + f"Set to current max index for group '{groupy}':" + f" {imax}" + ) print(msg) ii = min(ii, imax) self._dobj['group'][groupy]['indcur'] = ii @@ -1379,7 +1389,7 @@ def onkeypress(self, event): # ------------------- def on_close(self, event): - self.remove_all(excluded=['canvas']) # to avoid crash + self.remove_all(excluded=['canvas']) # to avoid crash print("\n---- CLOSING interactive figure ----") print(f"\tleft in dax: {self.get_nbytes()[0]/1000} Ko\n") From 50264bec568887285e0224bfea2680f45df413fc Mon Sep 17 00:00:00 2001 From: dvezinet Date: Thu, 14 Nov 2024 16:20:02 +0000 Subject: [PATCH 12/25] [#159] Cleanup 2 --- datastock/_class01.py | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/datastock/_class01.py b/datastock/_class01.py index 80cce2b..8835c51 100644 --- a/datastock/_class01.py +++ b/datastock/_class01.py @@ -1,10 +1,6 @@ # -*- coding: utf-8 -*- -# Built-in -import copy - - # Common import numpy as np import astropy.units as asunits @@ -556,7 +552,14 @@ def _get_sort_index(self, which=None, param=None): return if param == 'key': - ind = np.argsort(list(dd.keys())) + if which == 'ref': + lk = list(self.dref.keys()) + elif which == 'data': + lk = list(self.ddata.keys()) + else: + lk = list(self.dobj.get(which, {}).keys()) + ind = np.argsort(lk) + elif isinstance(param, str): ind = np.argsort( self.get_param(param, which=which, returnas=np.ndarray)[param] @@ -564,6 +567,7 @@ def _get_sort_index(self, which=None, param=None): else: msg = "Arg param must be a valid str\n Provided: {}".format(param) raise Exception(msg) + return ind def sortby(self, param=None, order=None, which=None): @@ -640,7 +644,9 @@ def get_ref_vector( >>> st.add_data(key='t0', data=t0) >>> st.add_data(key='x', data=x) >>> st.add_data(key='xt', data=xt) - >>> hasref, hasvect, ref, key_vect, dind = st.get_ref_vector(key='xt', ref='nt', values=[2, 3, 3.1, 5]) + >>> hasref, hasvect, ref, key_vect, dind = st.get_ref_vector( + >>> key='xt', ref='nt', values=[2, 3, 3.1, 5], + >>> ) In the above example: - hasref = True: 'xt' has 'nt' has ref @@ -651,7 +657,7 @@ def get_ref_vector( 'key': [2, 3, 3.1, 5], # the desired time points 'ind': [2, 3, 3, 5], # the indices of t in t0 'indu': [2, 3, 5] # the unique indices of t in t0 - 'indr': (3, 4), # bool array showing, for each indu, matching ind + 'indr': (3, 4), # bool array with ind for each indu 'indok': [True, False, ...] } @@ -820,7 +826,7 @@ def binning( bin_data0: the data used to compute binning indices, can be: - a str, key to a ddata item - a np.ndarray - _ a list of any of the above if each data has different size along axis + - a list of any of the above if each data has diff. size along axis bin_units: str only used if integrate = True and bin_data is a np.ndarray @@ -829,7 +835,7 @@ def binning( flag indicating whether binning is used for integration Implies that: Only usable for 1d binning (axis has to be a single index) - data is multiplied by the underlying bin_data0 step prior to binning + data is multiplied by bin_data0 step prior to binning statistic: str the statistic kwd feed to scipy.stats.binned_statistic() From 3bbee5c742a5c30734e89d6588b81676f699eab1 Mon Sep 17 00:00:00 2001 From: dvezinet Date: Thu, 14 Nov 2024 16:30:01 +0000 Subject: [PATCH 13/25] [#159] Cleanup 3 --- datastock/_class03_binning.py | 122 ++++++++++++++++++++-------------- 1 file changed, 73 insertions(+), 49 deletions(-) diff --git a/datastock/_class03_binning.py b/datastock/_class03_binning.py index 9685f06..123732b 100644 --- a/datastock/_class03_binning.py +++ b/datastock/_class03_binning.py @@ -6,9 +6,6 @@ """ -import warnings - - import numpy as np import datastock as ds @@ -55,7 +52,7 @@ def binning( # keys isbs, bin_data0 = _check_bs( - coll=coll, + coll=coll, bin_data0=bin_data0, bin_data1=bin_data1, ) @@ -65,7 +62,7 @@ def binning( nobin = False if isbs: - + # add ref and data kr, kd, ddatan, nobin = _interpolate( coll=coll, @@ -80,16 +77,16 @@ def binning( store=store, store_keys=store_keys, ) - + # safety check if nobin is False: lk = list(ddatan.keys()) data = [ddatan[k0]['data'] for k0 in lk] bin_data0 = [ddatan[k0]['bin_data'] for k0 in lk] - + # -------------------- # do the actua binning - + if nobin is False: dout = ds._class1_binning.binning( coll=coll, @@ -118,14 +115,14 @@ def binning( # -------------------------------- # remove intermediate ref and data - + if isbs is True: for dd in data + bin_data0 + [kd]: if dd in coll.ddata.keys(): coll.remove_data(dd) if kr in coll.dref.keys(): coll.remove_ref(kr) - + for k0 in data: k1 = [k1 for k1, v1 in ddatan.items() if v1['data'] == k0][0] dout[k1] = dict(dout[k0]) @@ -151,31 +148,58 @@ def _check_bs( bin_data0=None, bin_data1=None, ): - - wbs = coll._which_bsplines - lok_bs = [ - k0 for k0, v0 in coll.dobj.get(wbs, {}).items() - if len(v0['ref']) == 1 - ] - lok_dbs = [ - k0 for k0, v0 in coll.ddata.items() - if v0.get(wbs) is not None - and len(v0[wbs]) == 1 - and v0[wbs][0] in coll.dobj.get(wbs, {}).keys() - and len(coll.dobj[wbs][v0[wbs][0]]['ref']) == 1 - ] - - c0 = ( - isinstance(bin_data0, str) - and bin_data1 is None - and bin_data0 in lok_dbs + lok_bs - ) - - if bin_data0 in lok_bs: - bin_data0 = coll.dobj[wbs][bin_data0]['apex'][0] - + + # ---------------- + # Has bsplines + # ---------------- + + if hasattr(coll, '_which_bsplines'): + + # ---------------- + # list of bsplines + + wbs = coll._which_bsplines + lok_bs = [ + k0 for k0, v0 in coll.dobj.get(wbs, {}).items() + if len(v0['ref']) == 1 + ] + + # ---------------- + # list data with bsplines + + lok_dbs = [ + k0 for k0, v0 in coll.ddata.items() + if v0.get(wbs) is not None + and len(v0[wbs]) == 1 + and v0[wbs][0] in coll.dobj.get(wbs, {}).keys() + and len(coll.dobj[wbs][v0[wbs][0]]['ref']) == 1 + ] + + # ---------------- + # flag whether is bsplines + + c0 = ( + isinstance(bin_data0, str) + and bin_data1 is None + and bin_data0 in lok_dbs + lok_bs + ) + + # ----------------- + # adjust bin_data0 from key_bs to key_apex + + if bin_data0 in lok_bs: + bin_data0 = coll.dobj[wbs][bin_data0]['apex'][0] + + # ---------------- + # Does not have bsplines + # ---------------- + + else: + + c0 = False + return c0, bin_data0 - + # ###################################################### # ###################################################### @@ -213,7 +237,7 @@ def _interpolate( # --------- # sampling - + ddata = ds._class1_binning._check_data( coll=coll, data=data, @@ -221,7 +245,7 @@ def _interpolate( store=True, ) lkdata = list(ddata.keys()) - + # -------------------- # bins @@ -235,7 +259,7 @@ def _interpolate( # ---------------------- # npts for interpolation - + dv = np.abs(np.diff(vect)) dvmean = np.mean(dv) + np.std(dv) db = np.mean(np.diff(dbins0[lkdata[0]]['edges'])) @@ -263,10 +287,10 @@ def _interpolate( # ------------------- # add ref - + kr = "ntemp" kd = "xxtemp" - + coll.add_ref(kr, size=xx.size) coll.add_data(kd, data=xx, ref=kr, units=coll.ddata[kknots]['units']) @@ -301,7 +325,7 @@ def _interpolate( # ) # err.args = (msg,) # raise err - + # interpolate_data kdn = f"kbd{ii}_temp" coll.interpolate( @@ -327,33 +351,33 @@ def _get_nobins( store=None, store_keys=None, ): - + lk = list(ddata.keys()) wbs = coll._which_bsplines - + if isinstance(store_keys, str): store_keys = [store_keys] - + dout = {} for ii, k0 in enumerate(lk): - + axis = ddata[k0]['ref'].index(coll.dobj[wbs][key_bs]['ref'][0]) - + shape = list(ddata[k0]['data'].shape) nb = dbins0[k0]['edges'].size - 1 shape[axis] = nb - + ref = list(ddata[k0]['ref']) ref[axis] = dbins0[k0]['bin_ref'][0] - + dout[store_keys[ii]] = { 'data': np.zeros(shape, dtype=float), 'ref': tuple(ref), 'units': ddata[k0]['units'], } - + if store is True: for k0, v0 in dout.items(): coll.add_data(key=k0, **v0) - + return dout From f6563c8d5c38c8fea88693f579dd8d6521d6b13c Mon Sep 17 00:00:00 2001 From: dvezinet Date: Thu, 14 Nov 2024 17:09:38 +0000 Subject: [PATCH 14/25] [#159] Cleanup 4 --- datastock/_class03_binning.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/datastock/_class03_binning.py b/datastock/_class03_binning.py index 123732b..f7664b5 100644 --- a/datastock/_class03_binning.py +++ b/datastock/_class03_binning.py @@ -7,7 +7,9 @@ import numpy as np -import datastock as ds + + +from . import _class01_binning # ############################################################ @@ -88,7 +90,7 @@ def binning( # do the actua binning if nobin is False: - dout = ds._class1_binning.binning( + dout = _class01_binning.binning( coll=coll, data=data, data_units=data_units, @@ -238,7 +240,7 @@ def _interpolate( # --------- # sampling - ddata = ds._class1_binning._check_data( + ddata = _class01_binning._check_data( coll=coll, data=data, data_units=data_units, @@ -249,7 +251,7 @@ def _interpolate( # -------------------- # bins - dbins0 = ds._class1_binning._check_bins( + dbins0 = _class01_binning._check_bins( coll=coll, lkdata=lkdata, bins=bins0, From 473a9b02360e6b5f3cd13c06a17e6563dc1265ec Mon Sep 17 00:00:00 2001 From: dvezinet Date: Thu, 14 Nov 2024 17:16:09 +0000 Subject: [PATCH 15/25] [#159] clean-up 5 --- datastock/_class04_plot_as_array.py | 6 ++---- datastock/_saveload.py | 25 +++++++++++++++++-------- 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/datastock/_class04_plot_as_array.py b/datastock/_class04_plot_as_array.py index 7b86427..4e3acce 100644 --- a/datastock/_class04_plot_as_array.py +++ b/datastock/_class04_plot_as_array.py @@ -10,7 +10,7 @@ from . import _generic_check from . import _class01_compute from . import _generic_utils_plot as _uplot -from . import _class04_plot_as_array_1d as _plot_as_array_1d +from . import _class04_plot_as_array_1d as _plot_as_array_1d from . import _class04_plot_as_array_234d as _plot_as_array_234d @@ -126,8 +126,7 @@ def plot_as_array( # -------------------------------- if sameref: - from ._class import DataStock - cc = DataStock() + cc = coll.__class__() lk = ['keyX', 'keyY', 'keyZ', 'keyU'] lk = [k0 for k0 in lk if dkeys[k0]['ref'] is not None] for ii, k0 in enumerate(lk): @@ -603,7 +602,6 @@ def _check( else: dvminmax2[k1]['min'] = dvminmax[kk]['min'] - if dvminmax is None or dvminmax.get(kk, {}).get('max') is None: dvminmax2[k1]['max'] = nanmax + margin else: diff --git a/datastock/_saveload.py b/datastock/_saveload.py index 187991f..25167d9 100644 --- a/datastock/_saveload.py +++ b/datastock/_saveload.py @@ -125,8 +125,8 @@ def load( # cls if cls is None: - from ._class import DataStock - cls = DataStock + from ._class04_Plots import Plots as Collection + cls = Collection if not (type(cls) is type and hasattr(cls, 'from_dict')): msg = ( @@ -168,6 +168,10 @@ def load( # ---------- # reshape + # sparse types + lsparse = ['csc_', 'bsr_', 'coo_', 'csr_', 'dia_', 'dok_', 'lil_'] + + # loop dout = {} for k0, v0 in dflat.items(): @@ -201,7 +205,7 @@ def load( dout[k0] = None elif typ == 'ndarray': dout[k0] = dflat[k0] - elif any([ss in typ for ss in ['csc_', 'bsr_', 'coo_', 'csr_', 'dia_', 'dok_', 'lil_']]): + elif any([ss in typ for ss in lsparse]): assert typ in type(dflat[k0]).__name__ dout[k0] = dflat[k0] elif 'Unit' in typ: @@ -276,8 +280,10 @@ def get_files( lc = [ isinstance(dpfe, (str, tuple)), - isinstance(dpfe, list) and all([isinstance(pp, (str, tuple)) for pp in dpfe]), - isinstance(dpfe, dict) and all([isinstance(pp, str) for pp in dpfe.keys()]) + isinstance(dpfe, list) + and all([isinstance(pp, (str, tuple)) for pp in dpfe]), + isinstance(dpfe, dict) + and all([isinstance(pp, str) for pp in dpfe.keys()]) ] if not any(lc): @@ -288,7 +294,7 @@ def get_files( "\t\tkeys = valid path str\n" "\t\tvalues =\n" "\t\t\t- str: valid file names in the associated path\n" - "\t\t\t- str: pattern to be found in the files names in that path\n" + "\t\t\t- str: pattern to be found in the files names in path\n" "\t\t\t- list of str: list of the above (file names or patterns)\n" ) raise Exception(msg) @@ -406,7 +412,10 @@ def _get_files_from_path( lc = [ any([os.path.isfile(pfe) for pfe in lpfe if isinstance(pfe, str)]), - any([os.path.isfile(os.path.join(path, pfe)) for pfe in lpfe if isinstance(pfe, str)]), + any([ + os.path.isfile(os.path.join(path, pfe)) + for pfe in lpfe if isinstance(pfe, str) + ]), ] # --------------------- @@ -469,4 +478,4 @@ def _get_files_from_path( else: warnings.warn(msg) - return out \ No newline at end of file + return out From d3a9ac83cdd35c760c423280d5a6d01dc34af2fb Mon Sep 17 00:00:00 2001 From: dvezinet Date: Thu, 14 Nov 2024 19:15:17 +0000 Subject: [PATCH 16/25] [#159] Restructuring binning --- datastock/_class01.py | 94 --- datastock/_class01_binning.py | 1113 --------------------------- datastock/_class03_Bins.py | 45 +- datastock/_class03_bin_vs_bs.py | 385 ++++++++++ datastock/_class03_binning.py | 1250 ++++++++++++++++++++++++------- 5 files changed, 1413 insertions(+), 1474 deletions(-) delete mode 100644 datastock/_class01_binning.py create mode 100644 datastock/_class03_bin_vs_bs.py diff --git a/datastock/_class01.py b/datastock/_class01.py index 8835c51..39d7ee0 100644 --- a/datastock/_class01.py +++ b/datastock/_class01.py @@ -14,7 +14,6 @@ from . import _class01_show as _show from . import _class01_compute as _compute from . import _class01_domain as _domain -from . import _class01_binning as _binning from . import _class01_interpolate as _interpolate from . import _class01_uniformize as _uniformize from . import _export_dataframe @@ -778,99 +777,6 @@ def get_domain_ref( return _domain.domain_ref(coll=self, domain=domain) - # --------------------- - # Binning - # --------------------- - - def binning( - self, - data=None, - data_units=None, - axis=None, - # binning - bins0=None, - bins1=None, - bin_data0=None, - bin_data1=None, - bin_units0=None, - # kind of binning - integrate=None, - statistic=None, - # options - safety_ratio=None, - dref_vector=None, - verb=None, - returnas=None, - # storing - store=None, - store_keys=None, - ): - """ Return the binned data - - data: the data on which to apply binning, can be - - a list of np.ndarray to be binned - (any dimension as long as they all have the same) - - a list of keys to ddata items sharing the same refs - - data_units: str only necessary if data is a list of arrays - - axis: int or array of int indices - the axis of data along which to bin - data will be flattened along all those axis priori to binning - If None, assumes bin_data is not variable and uses all its axis - - bins0: the bins (centers), can be - - a 1d vector of monotonous bins - - a int, used to compute a bins vector from max(data), min(data) - - bin_data0: the data used to compute binning indices, can be: - - a str, key to a ddata item - - a np.ndarray - - a list of any of the above if each data has diff. size along axis - - bin_units: str - only used if integrate = True and bin_data is a np.ndarray - - integrate: bool - flag indicating whether binning is used for integration - Implies that: - Only usable for 1d binning (axis has to be a single index) - data is multiplied by bin_data0 step prior to binning - - statistic: str - the statistic kwd feed to scipy.stats.binned_statistic() - automatically set to 'sum' if integrate = True - - store: bool - If True, will sotre the result in ddata - Only possible if all (data, bin_data and bin) are provided as keys - - """ - - return _binning.binning( - coll=self, - data=data, - data_units=data_units, - axis=axis, - # binning - bins0=bins0, - bins1=bins1, - bin_data0=bin_data0, - bin_data1=bin_data1, - bin_units0=bin_units0, - # kind of binning - integrate=integrate, - statistic=statistic, - # options - safety_ratio=safety_ratio, - dref_vector=dref_vector, - verb=verb, - returnas=returnas, - # storing - store=store, - store_keys=store_keys, - ) - # --------------------- # Interpolation # --------------------- diff --git a/datastock/_class01_binning.py b/datastock/_class01_binning.py deleted file mode 100644 index 302781d..0000000 --- a/datastock/_class01_binning.py +++ /dev/null @@ -1,1113 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Created on Thu Jan 5 20:14:40 2023 - -@author: dvezinet -""" - - -import itertools as itt - - -import numpy as np -# import astropy.units as asunits -import scipy.stats as scpst - - -# specific -from . import _generic_check -from . import _generic_utils - - -# Dict of statistic <=> ufunc -_DUFUNC = { - 'sum': np.add.reduceat, - 'max': np.maximum.reduceat, - 'min': np.minimum.reduceat, -} - - -# ############################################################ -# ############################################################ -# interpolate spectral -# ############################################################ - - -def binning( - coll=None, - data=None, - data_units=None, - axis=None, - # binning - bins0=None, - bins1=None, - bin_data0=None, - bin_data1=None, - bin_units0=None, - # kind of binning - integrate=None, - statistic=None, - # options - safety_ratio=None, - dref_vector=None, - ref_vector_strategy=None, - verb=None, - returnas=None, - # storing - store=None, - store_keys=None, -): - """ Return the binned data - - data: the data on which to apply binning, can be - - a list of np.ndarray to be binned - (any dimension as long as they all have the same) - - a list of keys to ddata items sharing the same refs - - data_units: str only necessary if data is a list of arrays - - axis: int or array of int indices - the axis of data along which to bin - data will be flattened along all those axis priori to binning - If None, assumes bin_data is not variable and uses all its axis - - bins0: the bins (centers), can be - - a 1d vector of monotonous bins - - a int, used to compute a bins vector from max(data), min(data) - - bin_data0: the data used to compute binning indices, can be: - - a str, key to a ddata item - - a np.ndarray - _ a list of any of the above if each data has different size along axis - - bin_units: str - only used if integrate = True and bin_data is a np.ndarray - - integrate: bool - flag indicating whether binning is used for integration - Implies that: - Only usable for 1d binning (axis has to be a single index) - data is multiplied by the underlying bin_data0 step prior to binning - - statistic: str - the statistic kwd feed to scipy.stats.binned_statistic() - automatically set to 'sum' if integrate = True - - store: bool - If True, will sotre the result in ddata - Only possible if all (data, bin_data and bin) are provided as keys - - """ - - # ---------- - # checks - - # keys - ( - ddata, dbins0, dbins1, axis, - statistic, dvariable, - dref_vector, - verb, store, returnas, - ) = _check(**locals()) - - # -------------- - # actual binning - - if dvariable['bin0'] is False and dvariable['bin1'] is False: - - dout = {k0: {'units': v0['units']} for k0, v0 in ddata.items()} - for k0, v0 in ddata.items(): - - # handle dbins1 - if dbins1 is None: - bins1, vect1, bin_ref1 = None, None, None - else: - bins1 = dbins1['edges'] - vect1 = dbins1['data'] - bin_ref1 = dbins1[k0].get('bin_ref') - - # compute - dout[k0]['data'], dout[k0]['ref'] = _bin_fixed_bin( - # data to bin - data=v0['data'], - data_ref=v0['ref'], - # binning quantities - vect0=dbins0[k0]['data'], - vect1=vect1, - # bins - bins0=dbins0[k0]['edges'], - bins1=bins1, - bin_ref0=dbins0[k0].get('bin_ref'), - bin_ref1=bin_ref1, - # axis - axis=axis, - # statistic - statistic=statistic, - # integration - variable_data=dvariable['data'], - ) - - else: - msg = ( - "Variable bin vectors not implemented yet!\n" - f"\t- axis: {axis}\n" - f"\t- bin_data0 variable: {dvariable['bin0']}\n" - f"\t- bin_data1 variable: {dvariable['bin1']}\n" - ) - raise NotImplementedError(msg) - - # -------------- - # storing - - if store is True: - - _store( - coll=coll, - dout=dout, - store_keys=store_keys, - ) - - # ------------- - # return - - if returnas is True: - return dout - - -# #################################### -# check -# #################################### - - -def _check( - coll=None, - data=None, - data_units=None, - axis=None, - # binning - bins0=None, - bins1=None, - bin_data0=None, - bin_data1=None, - bin_units0=None, - # kind of binning - integrate=None, - statistic=None, - # options - safety_ratio=None, - dref_vector=None, - ref_vector_strategy=None, - verb=None, - returnas=None, - # storing - store=None, - # non-used - **kwdargs -): - - # ----------------- - # store and verb - # ------------------- - - # verb - verb = _generic_check._check_var( - verb, 'verb', - types=bool, - default=True, - ) - - # ------------------ - # data: str vs array - # ------------------- - - ddata = _check_data( - coll=coll, - data=data, - data_units=data_units, - store=store, - ) - - ndim_data = list(ddata.values())[0]['data'].ndim - - # ----------------- - # check statistic - # ------------------- - - # statistic - if integrate is True: - statistic = 'sum' - else: - statistic = _generic_check._check_var( - statistic, 'statistic', - types=str, - default='sum', - ) - - # ----------- - # bins - # ------------ - - dbins0 = _check_bins( - coll=coll, - lkdata=list(ddata.keys()), - bins=bins0, - dref_vector=dref_vector, - store=store, - ) - if bins1 is not None: - dbins1 = _check_bins( - coll=coll, - lkdata=list(ddata.keys()), - bins=bins1, - dref_vector=dref_vector, - store=store, - ) - - # ----------- - # bins - # ------------ - - # dbins0 - dbins0, variable_bin0, axis = _check_bins_data( - coll=coll, - axis=axis, - ddata=ddata, - bin_data=bin_data0, - dbins=dbins0, - bin_units=bin_units0, - dref_vector=dref_vector, - safety_ratio=safety_ratio, - store=store, - ) - - # data vs axis - if np.any(axis > ndim_data - 1): - msg = f"axis too large\n{axis}" - raise Exception(msg) - - variable_data = len(axis) < ndim_data - - # dbins1 - if bin_data1 is not None: - dbins1, variable_bin1, axis = _check_bins_data( - coll=coll, - axis=axis, - ddata=ddata, - bin_data=bin_data1, - dbins=dbins1, - bin_units=None, - dref_vector=dref_vector, - safety_ratio=safety_ratio, - store=store, - ) - - if variable_bin0 != variable_bin1: - msg = "bin_data0 and bin_data1 have different shapes, todo" - raise NotImplementedError(msg) - - else: - dbins1 = None - variable_bin1 = False - - # ----------------- - # check integrate - # ------------------- - - # integrate - integrate = _generic_check._check_var( - integrate, 'integrate', - types=bool, - default=False, - ) - - # safety checks - if integrate is True: - - if bin_data1 is not None: - msg = ( - "If integrate = True, bin_data1 must be None!\n" - "\t- bin_data1: {bin_data1}\n" - ) - raise Exception(msg) - - if len(axis) > 1: - msg = ( - "If integrate is true, binning can only be done on one axis!\n" - f"\t- axis: {axis}\n" - ) - raise Exception(msg) - - - # ----------------------- - # additional safety check - - if integrate is True: - - if variable_bin0: - axbin = axis[0] - else: - axbin = 0 - - for k0, v0 in ddata.items(): - - ddata[k0]['units'] = v0['units'] * dbins0[k0]['units'] - if dbins0[k0]['data'].size == 0: - continue - - dv = np.diff(dbins0[k0]['data'], axis=axbin) - dv = np.concatenate( - (np.take(dv, [0], axis=axbin), dv), - axis=axbin, - ) - - # reshape - if variable_data != variable_bin0: - - if variable_data: - shape_dv = np.ones((ndim_data,), dtype=int) - shape_dv[axis[0]] = -1 - dv = dv.reshape(tuple(shape_dv)) - - if variable_bin0: - raise NotImplementedError() - - ddata[k0]['data'] = v0['data'] * dv - - # -------- - # variability dict - - dvariable = { - 'data': variable_data, - 'bin0': variable_bin0, - 'bin1': variable_bin1, - } - - # -------- - # returnas - - returnas = _generic_check._check_var( - returnas, 'returnas', - types=bool, - default=store is False, - ) - - return ( - ddata, dbins0, dbins1, axis, - statistic, dvariable, - dref_vector, - verb, store, returnas, - ) - - -def _check_data( - coll=None, - data=None, - data_units=None, - store=None, -): - # ----------- - # store - - store = _generic_check._check_var( - store, 'store', - types=bool, - default=False, - ) - - # --------------------- - # make sure it's a list - - if isinstance(data, (np.ndarray, str)): - data = [data] - assert isinstance(data, list) - - # ------------------------------------------------ - # identify case: str vs array, all with same ndim - - lc = [ - all([ - isinstance(dd, str) - and dd in coll.ddata.keys() - and coll.ddata[dd]['data'].ndim == coll.ddata[data[0]]['data'].ndim - for dd in data - ]), - all([ - isinstance(dd, np.ndarray) - and dd.ndim == data[0].ndim - for dd in data - ]), - ] - - # vs store - if store is True: - if not lc[0]: - msg = "If storing, all data, bin data and bins must be declared!" - raise Exception(msg) - - - # if none => err - if np.sum(lc) != 1: - msg = ( - "Arg data must be a list of either:\n" - "\t- keys to ddata with identical ref\n" - "\t- np.ndarrays with identical shape\n" - f"Provided:\n{data}" - ) - raise Exception(msg) - - # -------------------- - # sort cases - - # str => keys to existing data - if lc[0]: - ddata = { - k0: { - 'key': k0, - 'data': coll.ddata[k0]['data'], - 'ref': coll.ddata[k0]['ref'], - 'units': coll.ddata[k0]['units'], - } - for k0 in data - } - - # arrays - else: - ddata = { - ii: { - 'key': None, - 'data': data[ii], - 'ref': None, - 'units': data_units, - } - for ii in range(len(data)) - } - - return ddata - - -def _check_bins( - coll=None, - lkdata=None, - bins=None, - dref_vector=None, - store=None, -): - - dbins = {k0: {} for k0 in lkdata} - if np.isscalar(bins) and not isinstance(bins, str): - bins = int(bins) - - elif isinstance(bins, str): - lok_data = list(coll.ddata.keys()) - lok_ref = list(coll.dref.keys()) - if hasattr(coll, '_which_bins'): - wb = coll._which_bins - lok_bins = list(coll.dobj.get(wb, {}).keys()) - else: - lok_bins = [] - - bins = _generic_check._check_var( - bins, 'bins', - types=str, - allowed=lok_data + lok_ref + lok_bins, - ) - - else: - bins = _generic_check._check_flat1darray( - bins, 'bins', - dtype=float, - unique=True, - can_be_None=False, - ) - - # -------------- - # check vs store - - if store is True and not isinstance(bins, str): - msg = "With store=True, bins must be keys to coll.dobj['bins'] items!" - raise Exception(msg) - - # ---------------------------- - # compute bin edges if needed - - if isinstance(bins, str): - - if bins in lok_bins: - for k0 in lkdata: - dbins[k0]['bin_ref'] = coll.dobj[wb][bins]['ref'] - dbins[k0]['edges'] = coll.dobj[wb][bins]['edges'] - - else: - - if bins in lok_ref: - - if dref_vector is None: - dref_vector = {} - - bins = coll.get_ref_vector( - ref=bins, - **dref_vector, - )[3] - if bins is None: - msg = "No ref vector identified!" - raise Exception(msg) - - binc = coll.ddata[bins]['data'] - for k0 in lkdata: - dbins[k0]['bin_ref'] = coll.ddata[bins]['ref'] - dbins[k0]['edges'] = np.r_[ - binc[0] - 0.5*(binc[1] - binc[0]), - 0.5*(binc[1:] + binc[:-1]), - binc[-1] + 0.5*(binc[-1] - binc[-2]), - ] - - else: - - for k0 in lkdata: - bin_edges = np.r_[ - bins[0] - 0.5*(bins[1] - bins[0]), - 0.5*(bins[1:] + bins[:-1]), - bins[-1] + 0.5*(bins[-1] - bins[-2]), - ] - - dbins[k0]['edges'] = bin_edges - - return dbins - - -def _check_bins_data( - coll=None, - axis=None, - ddata=None, - bin_data=None, - dbins=None, - bin_units=None, - dref_vector=None, - store=None, - # if bsplines - strict=None, - safety_ratio=None, - deg=None, -): - - # -------------- - # options - # -------------- - - # check - strict = _generic_check._check_var( - strict, 'strict', - types=bool, - default=True, - ) - - # check - safety_ratio = float(_generic_check._check_var( - safety_ratio, 'safety_ratio', - types=(int, float), - default=1.5, - sign='>0.' - )) - - # ------------- - # bin_data - # -------------- - - # make list - if isinstance(bin_data, (str, np.ndarray)): - bin_data = [bin_data for ii in range(len(ddata))] - - # check consistency - if not (isinstance(bin_data, list) and len(bin_data) == len(ddata)): - msg = ( - "Arg bin_data must be a list of len() == len(data)\n" - f"\t- type(bin_data) = {type(bin_data)}\n" - ) - if isinstance(bin_data, list): - msg += ( - f"\t- len(data) = {len(ddata)}\n" - f"\t- len(bin_data) = {len(bin_data)}\n" - ) - raise Exception(msg) - - # ------------- - # case sorting - - lok_ref = list(coll.dref.keys()) - lok_data = [k0 for k0, v0 in coll.ddata.items()] - - lok = lok_data + lok_ref - lc = [ - all([isinstance(bb, str) and bb in lok for bb in bin_data]), - all([isinstance(bb, np.ndarray) for bb in bin_data]), - ] - if np.sum(lc) != 1: - msg = ( - "Arg bin_data must be a list of:\n" - f"\t- np.ndarrays\n" - f"\t- keys to coll.ddata items\n" - f"Provided:\n{bin_data}\n" - f"Available:\n{sorted(lok)}" - ) - raise Exception(msg) - - # -------------- - # check vs store - - if store is True and not lc[0]: - msg = "With store=True, all bin_data must be keys to ddata or ref" - raise Exception(msg) - - # case with all str - if lc[0]: - - if dref_vector is None: - dref_vector = {} - - # derive dbins - for ii, k0 in enumerate(ddata.keys()): - - # if ref => identify vector - if bin_data[ii] in lok_ref: - - key_vect = coll.get_ref_vector( - ref=bin_data[ii], - **dref_vector, - )[3] - - if key_vect is None: - msg = "bin_data '{bin_data[ii]}' has no reference vector!" - raise Exception(msg) - - bin_data[ii] = key_vect - - # fill dict - dbins[k0].update({ - 'key': bin_data[ii], - 'data': coll.ddata[bin_data[ii]]['data'], - 'ref': coll.ddata[bin_data[ii]]['ref'], - 'units': coll.ddata[bin_data[ii]]['units'], - }) - - else: - for ii, k0 in enumerate(ddata.keys()): - dbins[k0].update({ - 'key': None, - 'data': bin_data[ii], - 'ref': None, - 'units': bin_units, - }) - - # ----------------------------------- - # check nb of dimensions consistency - - ldim = list(set([v0['data'].ndim for v0 in dbins.values()])) - if len(ldim) > 1: - msg = ( - "All bin_data provided must have the same nb of dimensions!\n" - f"Provided: {ldim}" - ) - raise Exception(msg) - - # ------------------------- - # check dimensions vs axis - - # None => set to all bin (assuming variable_bin = False) - if axis is None: - for k0, v0 in dbins.items(): - - if ddata[k0]['ref'] is not None and v0['ref'] is not None: - seq_data = list(ddata[k0]['ref']) - seq_bin = v0['ref'] - - else: - seq_data = list(ddata[k0]['data'].shape) - seq_bin = v0['data'].shape - - # get start indices of subsequence seq_bin in sequence seq_data - laxis0 = list(_generic_utils.KnuthMorrisPratt(seq_data, seq_bin)) - if len(laxis0) != 1: - msg = ( - "Please specify axis, ambiguous results from ref / shape\n" - f"\t- data '{k0}': {seq_data}\n" - f"\t- bin '{v0['key']}': {seq_bin}\n" - f"=> laxis0 = {laxis0}\n" - ) - raise Exception(msg) - - axisi = laxis0[0] + np.arange(0, len(seq_bin)) - if axis is None: - axis = axisi - else: - assert axis == axisi - - # -------------- - # axis - # ------------------- - - axis = _generic_check._check_flat1darray( - axis, 'axis', - dtype=int, - unique=True, - can_be_None=False, - sign='>=0', - ) - - if np.any(np.diff(axis) > 1): - msg = f"axis must be adjacent indices!\n{axis}" - raise Exception(msg) - - # check - ndim_bin = ldim[0] - if ndim_bin < len(axis): - msg = ( - "bin_data seems to have insufficient number of dimensions!\n" - f"\t- axis: {axis}\n" - f"\t- ndim_bin: {ndim_bin}\n" - f"\t- bin_data: {bin_data}" - ) - raise Exception(msg) - - variable_bin = ndim_bin > len(axis) - - # ------------------------------- - # check vs data shape along axis - - ndim_data = list(ddata.values())[0]['data'].ndim - variable_data = len(axis) < ndim_data - for k0, v0 in dbins.items(): - - shape_data = ddata[k0]['data'].shape - shape_bin = v0['data'].shape - - if variable_bin == variable_data and shape_data != v0['data'].shape: - msg = ( - "variable_bin == variable_data => shapes should be the same!\n" - f"\t- variable_data = {variable_data}\n" - f"\t- variable_bin = {variable_bin}\n" - f"\t- axis = {axis}\n" - f"\t- data '{k0}' shape = {shape_data}\n" - f"\t- bin_data '{v0['key']}' shape = {v0['data'].shape}\n" - ) - raise Exception(msg) - - else: - if variable_data: - sh_var, sh_fix = shape_data, shape_bin - else: - sh_fix, sh_var = shape_data, shape_bin - - shape_axis = [ss for ii, ss in enumerate(sh_var) if ii in axis] - if sh_fix != tuple(shape_axis): - msg = ( - f"Wrong shapes: data '{k0}' vs bin_data '{v0['key']}'!\n" - f"\t- shape_data: {shape_data}\n" - f"\t- shape_bin: {shape_bin}\n" - f"\t- axis: {axis}\n" - ) - raise Exception(msg) - - # ---------------------------------------- - # safety check on bin sizes - # ---------------------------------------- - - if len(axis) == 1: - - for k0, v0 in dbins.items(): - - if variable_bin: - raise NotImplementedError() - else: - dv = np.abs(np.diff(v0['data'])) - - dvmean = np.mean(dv) + np.std(dv) - - if strict is True: - - lim = safety_ratio * dvmean - db = np.mean(np.diff(dbins[k0]['edges'])) - if db < lim: - msg = ( - f"Uncertain binning for bin_data '{v0['key']}':\n" - f"Binning steps ({db}) are < {safety_ratio} * bin_data ({lim}) step" - ) - raise Exception(msg) - - return dbins, variable_bin, axis - - -# #################################### -# #################################### -# binning -# #################################### - - -def _bin_fixed_bin( - data=None, - data_ref=None, - vect0=None, - vect1=None, - bins0=None, - bins1=None, - bin_ref0=None, - bin_ref1=None, - axis=None, - statistic=None, - # integration - variable_data=None, -): - - # ---------------------------- - # select only relevant indices - - indin = np.isfinite(vect0) - indin[indin] = (vect0[indin] >= bins0[0]) & (vect0[indin] < bins0[-1]) - if bins1 is not None: - indin[indin] = np.isfinite(vect1[indin]) - indin[indin] = (vect1[indin] >= bins1[0]) & (vect1[indin] < bins1[-1]) - - if not variable_data: - indin[indin] = np.isfinite(data[indin]) - - # ------------- - # prepare shape - - shape_data = data.shape - ind_other = np.arange(data.ndim) - nomit = len(axis) - 1 - ind_other_flat = np.r_[ind_other[:axis[0]], ind_other[axis[-1]+1:] - nomit] - ind_other = np.r_[ind_other[:axis[0]], ind_other[axis[-1]+1:]] - - shape_other = [ss for ii, ss in enumerate(shape_data) if ii not in axis] - - shape_val = list(shape_other) - shape_val.insert(axis[0], int(bins0.size - 1)) - if bins1 is not None: - shape_val.insert(axis[0] + 1, int(bins1.size - 1)) - val = np.zeros(shape_val, dtype=data.dtype) - - if not np.any(indin): - return val - - # ------------- - # subset - - # vect - vect0 = vect0[indin] - if bins1 is not None: - vect1 = vect1[indin] - - # data - sli = [slice(None) for ii in shape_other] - sli.insert(axis[0], indin) - - data = data[tuple(sli)] - - # --------------- - # custom - - if statistic == 'sum_smooth': - stat = 'mean' - else: - stat = statistic - - # ------------------ - # simple case - - if variable_data is False: - - if bins1 is None: - - # compute - val[...] = scpst.binned_statistic( - vect0, - data, - bins=bins0, - statistic=stat, - )[0] - - else: - val[...] = scpst.binned_statistic_2d( - vect0, - vect1, - data, - bins=[bins0, bins1], - statistic=stat, - )[0] - - # ------------------------------------------------------- - # variable data, but axis = int and ufunc exists (faster) - - elif len(axis) == 1 and stat in _DUFUNC.keys() and bins1 is None: - - if statistic == 'sum_smooth': - msg = "statistic 'sum_smooth' not properly handled here yet" - raise NotImplementedError(msg) - - # safety check - vect0s = np.sort(vect0) - if not np.allclose(vect0s, vect0): - msg = ( - "Non-sorted vect0 for binning 1d with ufunc!\n" - f"\t- axis: {axis}\n" - f"\t- shape_data: {shape_data}\n" - f"\t- shape_other: {shape_other}\n" - f"\t- shape_val: {shape_val}\n" - f"\t- vect0.shape: {vect0.shape}\n" - f"\t- vect0: {vect0}\n" - f"\t- vect0s: {vect0s}\n" - ) - raise Exception(msg) - - # get ufunc - ufunc = _DUFUNC[stat] - - # get indices - ind0 = np.searchsorted( - bins0, - vect0, - sorter=None, - ) - ind0[ind0 == 0] = 1 - - # ind - indu = np.unique(ind0 - 1) - - # cases - if indu.size == 1: - sli[axis[0]] = indu[0] - val[tuple(sli)] = np.nansum(data, axis=axis[0]) - - else: - - sli[axis[0]] = indu - - # neutralize nans - data[np.isnan(data)] = 0. - ind = np.r_[0, np.where(np.diff(ind0))[0] + 1] - - # sum - val[tuple(sli)] = ufunc(data, ind, axis=axis[0]) - - # ----------------------------------- - # other statistic with variable data - - else: - - # indices - linds = [range(nn) for nn in shape_other] - - # slice_data - sli = [0 for ii in shape_other] - sli.insert(axis[0], slice(None)) - sli = np.array(sli) - - if bins1 is None: - - for ind in itt.product(linds): - sli[ind_other_flat] = ind - - val[tuple(sli)] = scpst.binned_statistic( - vect0, - data[tuple(sli)], - bins=bins0, - statistic=stat, - )[0] - - if statistic == 'sum_smooth': - val[tuple(sli)] *= ( - np.nansum(data[tuple(sli)]) / np.nansum(val[tuple(sli)]) - ) - - else: - - sli_val = np.copy(sli) - sli_val = np.insert(axis[0] + 1, slice(None)) - - for ind in itt.product(linds): - - sli[ind_other_flat] = ind - sli_val[ind_other_flat] = ind - - val[tuple(sli_val)] = scpst.binned_statistic_2d( - vect0, - vect1, - data[tuple(sli)], - bins=[bins0, bins1], - statistic=stat, - )[0] - - if statistic == 'sum_smooth': - val[tuple(sli_val)] *= ( - np.nansum(data[tuple(sli)]) / np.nansum(val[tuple(sli_val)]) - ) - - # --------------- - # adjust custom - - if statistic == 'sum_smooth': - if variable_data is False: - val[...] *= np.nansum(data) / np.nansum(val) - - # ------------ - # references - - if data_ref is not None: - ref = [ - rr for ii, rr in enumerate(data_ref) - if ii not in axis - ] - - if bin_ref0 is not None: - bin_ref0 = bin_ref0[0] - if bin_ref1 is not None: - bin_ref1 = bin_ref1[0] - - ref.insert(axis[0], bin_ref0) - if bins1 is not None: - ref.insert(axis[0] + 1, bin_ref1) - - ref = tuple(ref) - else: - ref = None - - return val, ref - -# ####################################################### -# Store -# ####################################################### - - -def _store( - coll=None, - dout=None, - store_keys=None, -): - - - # ---------------- - # check store_keys - - if len(dout) == 1 and isinstance(store_keys, str): - store_keys = [store_keys] - - ldef = [f"{k0}_binned" for k0 in dout.items()] - lex = list(coll.ddata.keys()) - store_keys = _generic_check._check_var_iter( - store_keys, 'store_keys', - types=list, - types_iter=str, - default=ldef, - excluded=lex, - ) - - # ------------- - # store - - for ii, (k0, v0) in enumerate(dout.items()): - coll.add_data( - key=store_keys[ii], - data=v0['data'], - ref=v0['ref'], - units=v0['units'], - ) \ No newline at end of file diff --git a/datastock/_class03_Bins.py b/datastock/_class03_Bins.py index b1c0ab1..79d37b0 100644 --- a/datastock/_class03_Bins.py +++ b/datastock/_class03_Bins.py @@ -5,14 +5,10 @@ import copy -# Common -import numpy as np - - # local from ._class02 import DataStock2 as Previous from . import _class03_checks as _checks -from . import _class03_binning as _binning +from . import _class03_bin_vs_bs as _bin_vs_bs __all__ = ['Bins'] @@ -124,9 +120,46 @@ def binning( return a dict with data and units per key + data: the data on which to apply binning, can be + - a list of np.ndarray to be binned + (any dimension as long as they all have the same) + - a list of keys to ddata items sharing the same refs + + data_units: str only necessary if data is a list of arrays + + axis: int or array of int indices + the axis of data along which to bin + data will be flattened along all those axis priori to binning + If None, assumes bin_data is not variable and uses all its axis + + bins0: the bins (centers), can be + - a 1d vector of monotonous bins + - a int, used to compute a bins vector from max(data), min(data) + + bin_data0: the data used to compute binning indices, can be: + - a str, key to a ddata item + - a np.ndarray + - a list of any of the above if each data has diff. size along axis + + bin_units: str + only used if integrate = True and bin_data is a np.ndarray + + integrate: bool + flag indicating whether binning is used for integration + Implies that: + Only usable for 1d binning (axis has to be a single index) + data is multiplied by bin_data0 step prior to binning + + statistic: str + the statistic kwd feed to scipy.stats.binned_statistic() + automatically set to 'sum' if integrate = True + + store: bool + If True, will sotre the result in ddata + Only possible if all (data, bin_data and bin) are provided as keys """ - return _binning.binning( + return _bin_vs_bs.main( coll=self, data=data, data_units=data_units, diff --git a/datastock/_class03_bin_vs_bs.py b/datastock/_class03_bin_vs_bs.py new file mode 100644 index 0000000..4e98937 --- /dev/null +++ b/datastock/_class03_bin_vs_bs.py @@ -0,0 +1,385 @@ +# -*- coding: utf-8 -*- +""" +Created on Thu Jan 5 20:14:40 2023 + +@author: dvezinet +""" + + +import numpy as np + + +from . import _class03_binning as _binning + + +# ############################################################ +# ############################################################ +# interpolate spectral +# ############################################################ + + +def binning( + coll=None, + data=None, + data_units=None, + axis=None, + # binning + bins0=None, + bins1=None, + bin_data0=None, + bin_data1=None, + bin_units0=None, + # kind of binning + integrate=None, + statistic=None, + # options + safety_ratio=None, + dref_vector=None, + ref_vector_strategy=None, + verb=None, + returnas=None, + # storing + store=None, + store_keys=None, +): + """ Return the spectrally interpolated coefs + + Either E xor Ebins can be provided + - E: return interpolated coefs + - Ebins: return binned (integrated) coefs + """ + + # ---------- + # checks + + # keys + isbs, bin_data0 = _check_bs( + coll=coll, + bin_data0=bin_data0, + bin_data1=bin_data1, + ) + + # ---------- + # trivial + + nobin = False + if isbs: + + # add ref and data + kr, kd, ddatan, nobin = _interpolate( + coll=coll, + data=data, + data_units=data_units, + # binning + bins0=bins0, + bin_data0=bin_data0, + # options + dref_vector=dref_vector, + verb=verb, + store=store, + store_keys=store_keys, + ) + + # safety check + if nobin is False: + lk = list(ddatan.keys()) + data = [ddatan[k0]['data'] for k0 in lk] + bin_data0 = [ddatan[k0]['bin_data'] for k0 in lk] + + # -------------------- + # do the actua binning + + if nobin is False: + dout = _binning.main( + coll=coll, + data=data, + data_units=data_units, + axis=axis, + # binning + bins0=bins0, + bins1=bins1, + bin_data0=bin_data0, + bin_data1=bin_data1, + bin_units0=bin_units0, + # kind of binning + integrate=integrate, + statistic=statistic, + # options + safety_ratio=safety_ratio, + dref_vector=dref_vector, + ref_vector_strategy=ref_vector_strategy, + verb=verb, + returnas=True, + # storing + store=store, + store_keys=store_keys, + ) + + # -------------------------------- + # remove intermediate ref and data + + if isbs is True: + for dd in data + bin_data0 + [kd]: + if dd in coll.ddata.keys(): + coll.remove_data(dd) + if kr in coll.dref.keys(): + coll.remove_ref(kr) + + for k0 in data: + k1 = [k1 for k1, v1 in ddatan.items() if v1['data'] == k0][0] + dout[k1] = dict(dout[k0]) + del dout[k0] + else: + dout = nobin + + # ---------- + # return + + if returnas is True: + return dout + + +# ###################################################### +# ###################################################### +# check +# ###################################################### + + +def _check_bs( + coll=None, + bin_data0=None, + bin_data1=None, +): + + # ---------------- + # Has bsplines + # ---------------- + + if hasattr(coll, '_which_bsplines'): + + # ---------------- + # list of bsplines + + wbs = coll._which_bsplines + lok_bs = [ + k0 for k0, v0 in coll.dobj.get(wbs, {}).items() + if len(v0['ref']) == 1 + ] + + # ---------------- + # list data with bsplines + + lok_dbs = [ + k0 for k0, v0 in coll.ddata.items() + if v0.get(wbs) is not None + and len(v0[wbs]) == 1 + and v0[wbs][0] in coll.dobj.get(wbs, {}).keys() + and len(coll.dobj[wbs][v0[wbs][0]]['ref']) == 1 + ] + + # ---------------- + # flag whether is bsplines + + c0 = ( + isinstance(bin_data0, str) + and bin_data1 is None + and bin_data0 in lok_dbs + lok_bs + ) + + # ----------------- + # adjust bin_data0 from key_bs to key_apex + + if bin_data0 in lok_bs: + bin_data0 = coll.dobj[wbs][bin_data0]['apex'][0] + + # ---------------- + # Does not have bsplines + # ---------------- + + else: + + c0 = False + + return c0, bin_data0 + + +# ###################################################### +# ###################################################### +# interpolate +# ###################################################### + + +def _interpolate( + coll=None, + data=None, + data_units=None, + # binning + bins0=None, + bin_data0=None, + # options + dref_vector=None, + verb=None, + store=None, + store_keys=None, +): + + # --------- + # sampling + + # mesh knots + wm = coll._which_mesh + wbs = coll._which_bsplines + key_bs = coll.ddata[bin_data0][wbs][0] + keym = coll.dobj[wbs][key_bs][wm] + kknots = coll.dobj[wm][keym]['knots'][0] + + # resolution + vect = coll.ddata[kknots]['data'] + res0 = np.abs(np.min(np.diff(vect))) + + # --------- + # sampling + + ddata = _binning._check_data( + coll=coll, + data=data, + data_units=data_units, + store=True, + ) + lkdata = list(ddata.keys()) + + # -------------------- + # bins + + dbins0 = _binning._check_bins( + coll=coll, + lkdata=lkdata, + bins=bins0, + dref_vector=dref_vector, + store=store, + ) + + # ---------------------- + # npts for interpolation + + dv = np.abs(np.diff(vect)) + dvmean = np.mean(dv) + np.std(dv) + db = np.mean(np.diff(dbins0[lkdata[0]]['edges'])) + npts = (coll.dobj[wbs][key_bs]['deg'] + 3) * max(1, dvmean / db) + 3 + + # sample mesh, update dv + Dx0 = [dbins0[lkdata[0]]['edges'][0], dbins0[lkdata[0]]['edges'][-1]] + xx = coll.get_sample_mesh( + keym, + res=res0 / npts, + mode='abs', + Dx0=Dx0, + )['x0']['data'] + + if xx.size == 0: + nobins = _get_nobins( + coll=coll, + key_bs=key_bs, + ddata=ddata, + dbins0=dbins0, + store=store, + store_keys=store_keys, + ) + return None, None, None, nobins + + # ------------------- + # add ref + + kr = "ntemp" + kd = "xxtemp" + + coll.add_ref(kr, size=xx.size) + coll.add_data(kd, data=xx, ref=kr, units=coll.ddata[kknots]['units']) + + ddata_new = {} + for ii, (k0, v0) in enumerate(ddata.items()): + + # interpolate bin_data + kbdn = f"kbdn{ii}_temp" + # try: + coll.interpolate( + keys=bin_data0, + ref_key=key_bs, + x0=kd, + val_out=0., + returnas=False, + store=True, + inplace=True, + store_keys=kbdn, + ) + + # except Exception as err: + # msg = ( + # err.args[0] + # + "\n\n" + # f"\t- k0 = {k0}\n" + # f"\t- ii = {ii}\n" + # f"\t- bin_data0 = {bin_data0}\n" + # f"\t- key_bs = {key_bs}\n" + # f"\t- kd = {kd}\n" + # f"\t- xx.size: {xx.size}\n" + # f"\t- kbdn = {kbdn}\n" + # ) + # err.args = (msg,) + # raise err + + # interpolate_data + kdn = f"kbd{ii}_temp" + coll.interpolate( + keys=k0, + ref_key=key_bs, + x0=kd, + val_out=0., + returnas=False, + store=True, + inplace=True, + store_keys=kdn, + ) + ddata_new[k0] = {'bin_data': kbdn, 'data': kdn} + + return kr, kd, ddata_new, False + + +def _get_nobins( + coll=None, + key_bs=None, + ddata=None, + dbins0=None, + store=None, + store_keys=None, +): + + lk = list(ddata.keys()) + wbs = coll._which_bsplines + + if isinstance(store_keys, str): + store_keys = [store_keys] + + dout = {} + for ii, k0 in enumerate(lk): + + axis = ddata[k0]['ref'].index(coll.dobj[wbs][key_bs]['ref'][0]) + + shape = list(ddata[k0]['data'].shape) + nb = dbins0[k0]['edges'].size - 1 + shape[axis] = nb + + ref = list(ddata[k0]['ref']) + ref[axis] = dbins0[k0]['bin_ref'][0] + + dout[store_keys[ii]] = { + 'data': np.zeros(shape, dtype=float), + 'ref': tuple(ref), + 'units': ddata[k0]['units'], + } + + if store is True: + for k0, v0 in dout.items(): + coll.add_data(key=k0, **v0) + + return dout diff --git a/datastock/_class03_binning.py b/datastock/_class03_binning.py index f7664b5..c3cbf94 100644 --- a/datastock/_class03_binning.py +++ b/datastock/_class03_binning.py @@ -6,19 +6,34 @@ """ +import itertools as itt + + import numpy as np +# import astropy.units as asunits +import scipy.stats as scpst + +# specific +from . import _generic_check +from . import _generic_utils -from . import _class01_binning + +# Dict of statistic <=> ufunc +_DUFUNC = { + 'sum': np.add.reduceat, + 'max': np.maximum.reduceat, + 'min': np.minimum.reduceat, +} # ############################################################ # ############################################################ -# interpolate spectral +# main # ############################################################ -def binning( +def main( coll=None, data=None, data_units=None, @@ -42,344 +57,1057 @@ def binning( store=None, store_keys=None, ): - """ Return the spectrally interpolated coefs + """ Return the binned data + + data: the data on which to apply binning, can be + - a list of np.ndarray to be binned + (any dimension as long as they all have the same) + - a list of keys to ddata items sharing the same refs + + data_units: str only necessary if data is a list of arrays + + axis: int or array of int indices + the axis of data along which to bin + data will be flattened along all those axis priori to binning + If None, assumes bin_data is not variable and uses all its axis + + bins0: the bins (centers), can be + - a 1d vector of monotonous bins + - a int, used to compute a bins vector from max(data), min(data) + + bin_data0: the data used to compute binning indices, can be: + - a str, key to a ddata item + - a np.ndarray + _ a list of any of the above if each data has different size along axis + + bin_units: str + only used if integrate = True and bin_data is a np.ndarray + + integrate: bool + flag indicating whether binning is used for integration + Implies that: + Only usable for 1d binning (axis has to be a single index) + data is multiplied by the underlying bin_data0 step before binning + + statistic: str + the statistic kwd feed to scipy.stats.binned_statistic() + automatically set to 'sum' if integrate = True + + store: bool + If True, will sotre the result in ddata + Only possible if all (data, bin_data and bin) are provided as keys - Either E xor Ebins can be provided - - E: return interpolated coefs - - Ebins: return binned (integrated) coefs """ # ---------- # checks # keys - isbs, bin_data0 = _check_bs( + ( + ddata, dbins0, dbins1, axis, + statistic, dvariable, + dref_vector, + verb, store, returnas, + ) = _check(**locals()) + + # -------------- + # actual binning + + if dvariable['bin0'] is False and dvariable['bin1'] is False: + + dout = {k0: {'units': v0['units']} for k0, v0 in ddata.items()} + for k0, v0 in ddata.items(): + + # handle dbins1 + if dbins1 is None: + bins1, vect1, bin_ref1 = None, None, None + else: + bins1 = dbins1['edges'] + vect1 = dbins1['data'] + bin_ref1 = dbins1[k0].get('bin_ref') + + # compute + dout[k0]['data'], dout[k0]['ref'] = _bin_fixed_bin( + # data to bin + data=v0['data'], + data_ref=v0['ref'], + # binning quantities + vect0=dbins0[k0]['data'], + vect1=vect1, + # bins + bins0=dbins0[k0]['edges'], + bins1=bins1, + bin_ref0=dbins0[k0].get('bin_ref'), + bin_ref1=bin_ref1, + # axis + axis=axis, + # statistic + statistic=statistic, + # integration + variable_data=dvariable['data'], + ) + + else: + msg = ( + "Variable bin vectors not implemented yet!\n" + f"\t- axis: {axis}\n" + f"\t- bin_data0 variable: {dvariable['bin0']}\n" + f"\t- bin_data1 variable: {dvariable['bin1']}\n" + ) + raise NotImplementedError(msg) + + # -------------- + # storing + + if store is True: + + _store( + coll=coll, + dout=dout, + store_keys=store_keys, + ) + + # ------------- + # return + + if returnas is True: + return dout + + +# #################################### +# check +# #################################### + + +def _check( + coll=None, + data=None, + data_units=None, + axis=None, + # binning + bins0=None, + bins1=None, + bin_data0=None, + bin_data1=None, + bin_units0=None, + # kind of binning + integrate=None, + statistic=None, + # options + safety_ratio=None, + dref_vector=None, + ref_vector_strategy=None, + verb=None, + returnas=None, + # storing + store=None, + # non-used + **kwdargs +): + + # ----------------- + # store and verb + # ------------------- + + # verb + verb = _generic_check._check_var( + verb, 'verb', + types=bool, + default=True, + ) + + # ------------------ + # data: str vs array + # ------------------- + + ddata = _check_data( coll=coll, - bin_data0=bin_data0, - bin_data1=bin_data1, + data=data, + data_units=data_units, + store=store, ) - # ---------- - # trivial + ndim_data = list(ddata.values())[0]['data'].ndim + + # ----------------- + # check statistic + # ------------------- - nobin = False - if isbs: + # statistic + if integrate is True: + statistic = 'sum' + else: + statistic = _generic_check._check_var( + statistic, 'statistic', + types=str, + default='sum', + ) - # add ref and data - kr, kd, ddatan, nobin = _interpolate( + # ----------- + # bins + # ------------ + + dbins0 = _check_bins( + coll=coll, + lkdata=list(ddata.keys()), + bins=bins0, + dref_vector=dref_vector, + store=store, + ) + if bins1 is not None: + dbins1 = _check_bins( coll=coll, - data=data, - data_units=data_units, - # binning - bins0=bins0, - bin_data0=bin_data0, - # options + lkdata=list(ddata.keys()), + bins=bins1, dref_vector=dref_vector, - verb=verb, store=store, - store_keys=store_keys, ) - # safety check - if nobin is False: - lk = list(ddatan.keys()) - data = [ddatan[k0]['data'] for k0 in lk] - bin_data0 = [ddatan[k0]['bin_data'] for k0 in lk] + # ----------- + # bins + # ------------ - # -------------------- - # do the actua binning + # dbins0 + dbins0, variable_bin0, axis = _check_bins_data( + coll=coll, + axis=axis, + ddata=ddata, + bin_data=bin_data0, + dbins=dbins0, + bin_units=bin_units0, + dref_vector=dref_vector, + safety_ratio=safety_ratio, + store=store, + ) - if nobin is False: - dout = _class01_binning.binning( + # data vs axis + if np.any(axis > ndim_data - 1): + msg = f"axis too large\n{axis}" + raise Exception(msg) + + variable_data = len(axis) < ndim_data + + # dbins1 + if bin_data1 is not None: + dbins1, variable_bin1, axis = _check_bins_data( coll=coll, - data=data, - data_units=data_units, axis=axis, - # binning - bins0=bins0, - bins1=bins1, - bin_data0=bin_data0, - bin_data1=bin_data1, - bin_units0=bin_units0, - # kind of binning - integrate=integrate, - statistic=statistic, - # options - safety_ratio=safety_ratio, + ddata=ddata, + bin_data=bin_data1, + dbins=dbins1, + bin_units=None, dref_vector=dref_vector, - ref_vector_strategy=ref_vector_strategy, - verb=verb, - returnas=True, - # storing + safety_ratio=safety_ratio, store=store, - store_keys=store_keys, ) - # -------------------------------- - # remove intermediate ref and data - - if isbs is True: - for dd in data + bin_data0 + [kd]: - if dd in coll.ddata.keys(): - coll.remove_data(dd) - if kr in coll.dref.keys(): - coll.remove_ref(kr) + if variable_bin0 != variable_bin1: + msg = "bin_data0 and bin_data1 have different shapes, todo" + raise NotImplementedError(msg) - for k0 in data: - k1 = [k1 for k1, v1 in ddatan.items() if v1['data'] == k0][0] - dout[k1] = dict(dout[k0]) - del dout[k0] else: - dout = nobin + dbins1 = None + variable_bin1 = False - # ---------- - # return + # ----------------- + # check integrate + # ------------------- - if returnas is True: - return dout + # integrate + integrate = _generic_check._check_var( + integrate, 'integrate', + types=bool, + default=False, + ) + + # safety checks + if integrate is True: + + if bin_data1 is not None: + msg = ( + "If integrate = True, bin_data1 must be None!\n" + "\t- bin_data1: {bin_data1}\n" + ) + raise Exception(msg) + + if len(axis) > 1: + msg = ( + "If integrate is true, binning can only be done on one axis!\n" + f"\t- axis: {axis}\n" + ) + raise Exception(msg) + + # ----------------------- + # additional safety check + + if integrate is True: + + if variable_bin0: + axbin = axis[0] + else: + axbin = 0 + + for k0, v0 in ddata.items(): + + ddata[k0]['units'] = v0['units'] * dbins0[k0]['units'] + if dbins0[k0]['data'].size == 0: + continue + dv = np.diff(dbins0[k0]['data'], axis=axbin) + dv = np.concatenate( + (np.take(dv, [0], axis=axbin), dv), + axis=axbin, + ) -# ###################################################### -# ###################################################### -# check -# ###################################################### + # reshape + if variable_data != variable_bin0: + if variable_data: + shape_dv = np.ones((ndim_data,), dtype=int) + shape_dv[axis[0]] = -1 + dv = dv.reshape(tuple(shape_dv)) -def _check_bs( + if variable_bin0: + raise NotImplementedError() + + ddata[k0]['data'] = v0['data'] * dv + + # -------- + # variability dict + + dvariable = { + 'data': variable_data, + 'bin0': variable_bin0, + 'bin1': variable_bin1, + } + + # -------- + # returnas + + returnas = _generic_check._check_var( + returnas, 'returnas', + types=bool, + default=store is False, + ) + + return ( + ddata, dbins0, dbins1, axis, + statistic, dvariable, + dref_vector, + verb, store, returnas, + ) + + +def _check_data( coll=None, - bin_data0=None, - bin_data1=None, + data=None, + data_units=None, + store=None, ): + # ----------- + # store - # ---------------- - # Has bsplines - # ---------------- + store = _generic_check._check_var( + store, 'store', + types=bool, + default=False, + ) - if hasattr(coll, '_which_bsplines'): + # --------------------- + # make sure it's a list + + if isinstance(data, (np.ndarray, str)): + data = [data] + assert isinstance(data, list) + + # ------------------------------------------------ + # identify case: str vs array, all with same ndim + + lc = [ + all([ + isinstance(dd, str) + and dd in coll.ddata.keys() + and coll.ddata[dd]['data'].ndim == coll.ddata[data[0]]['data'].ndim + for dd in data + ]), + all([ + isinstance(dd, np.ndarray) + and dd.ndim == data[0].ndim + for dd in data + ]), + ] + + # vs store + if store is True: + if not lc[0]: + msg = "If storing, all data, bin data and bins must be declared!" + raise Exception(msg) + + # if none => err + if np.sum(lc) != 1: + msg = ( + "Arg data must be a list of either:\n" + "\t- keys to ddata with identical ref\n" + "\t- np.ndarrays with identical shape\n" + f"Provided:\n{data}" + ) + raise Exception(msg) - # ---------------- - # list of bsplines + # -------------------- + # sort cases + + # str => keys to existing data + if lc[0]: + ddata = { + k0: { + 'key': k0, + 'data': coll.ddata[k0]['data'], + 'ref': coll.ddata[k0]['ref'], + 'units': coll.ddata[k0]['units'], + } + for k0 in data + } - wbs = coll._which_bsplines - lok_bs = [ - k0 for k0, v0 in coll.dobj.get(wbs, {}).items() - if len(v0['ref']) == 1 - ] + # arrays + else: + ddata = { + ii: { + 'key': None, + 'data': data[ii], + 'ref': None, + 'units': data_units, + } + for ii in range(len(data)) + } - # ---------------- - # list data with bsplines + return ddata - lok_dbs = [ - k0 for k0, v0 in coll.ddata.items() - if v0.get(wbs) is not None - and len(v0[wbs]) == 1 - and v0[wbs][0] in coll.dobj.get(wbs, {}).keys() - and len(coll.dobj[wbs][v0[wbs][0]]['ref']) == 1 - ] - # ---------------- - # flag whether is bsplines +def _check_bins( + coll=None, + lkdata=None, + bins=None, + dref_vector=None, + store=None, +): - c0 = ( - isinstance(bin_data0, str) - and bin_data1 is None - and bin_data0 in lok_dbs + lok_bs + dbins = {k0: {} for k0 in lkdata} + if np.isscalar(bins) and not isinstance(bins, str): + bins = int(bins) + + elif isinstance(bins, str): + lok_data = list(coll.ddata.keys()) + lok_ref = list(coll.dref.keys()) + if hasattr(coll, '_which_bins'): + wb = coll._which_bins + lok_bins = list(coll.dobj.get(wb, {}).keys()) + else: + lok_bins = [] + + bins = _generic_check._check_var( + bins, 'bins', + types=str, + allowed=lok_data + lok_ref + lok_bins, ) - # ----------------- - # adjust bin_data0 from key_bs to key_apex + else: + bins = _generic_check._check_flat1darray( + bins, 'bins', + dtype=float, + unique=True, + can_be_None=False, + ) - if bin_data0 in lok_bs: - bin_data0 = coll.dobj[wbs][bin_data0]['apex'][0] + # -------------- + # check vs store - # ---------------- - # Does not have bsplines - # ---------------- + if store is True and not isinstance(bins, str): + msg = "With store=True, bins must be keys to coll.dobj['bins'] items!" + raise Exception(msg) - else: + # ---------------------------- + # compute bin edges if needed - c0 = False + if isinstance(bins, str): - return c0, bin_data0 + if bins in lok_bins: + for k0 in lkdata: + dbins[k0]['bin_ref'] = coll.dobj[wb][bins]['ref'] + dbins[k0]['edges'] = coll.dobj[wb][bins]['edges'] + else: -# ###################################################### -# ###################################################### -# interpolate -# ###################################################### + if bins in lok_ref: + if dref_vector is None: + dref_vector = {} -def _interpolate( + bins = coll.get_ref_vector( + ref=bins, + **dref_vector, + )[3] + if bins is None: + msg = "No ref vector identified!" + raise Exception(msg) + + binc = coll.ddata[bins]['data'] + for k0 in lkdata: + dbins[k0]['bin_ref'] = coll.ddata[bins]['ref'] + dbins[k0]['edges'] = np.r_[ + binc[0] - 0.5*(binc[1] - binc[0]), + 0.5*(binc[1:] + binc[:-1]), + binc[-1] + 0.5*(binc[-1] - binc[-2]), + ] + + else: + + for k0 in lkdata: + bin_edges = np.r_[ + bins[0] - 0.5*(bins[1] - bins[0]), + 0.5*(bins[1:] + bins[:-1]), + bins[-1] + 0.5*(bins[-1] - bins[-2]), + ] + + dbins[k0]['edges'] = bin_edges + + return dbins + + +def _check_bins_data( coll=None, - data=None, - data_units=None, - # binning - bins0=None, - bin_data0=None, - # options + axis=None, + ddata=None, + bin_data=None, + dbins=None, + bin_units=None, dref_vector=None, - verb=None, store=None, - store_keys=None, + # if bsplines + strict=None, + safety_ratio=None, + deg=None, ): - # --------- - # sampling + # -------------- + # options + # -------------- - # mesh knots - wm = coll._which_mesh - wbs = coll._which_bsplines - key_bs = coll.ddata[bin_data0][wbs][0] - keym = coll.dobj[wbs][key_bs][wm] - kknots = coll.dobj[wm][keym]['knots'][0] + # check + strict = _generic_check._check_var( + strict, 'strict', + types=bool, + default=True, + ) - # resolution - vect = coll.ddata[kknots]['data'] - res0 = np.abs(np.min(np.diff(vect))) + # check + safety_ratio = float(_generic_check._check_var( + safety_ratio, 'safety_ratio', + types=(int, float), + default=1.5, + sign='>0.' + )) + + # ------------- + # bin_data + # -------------- + + # make list + if isinstance(bin_data, (str, np.ndarray)): + bin_data = [bin_data for ii in range(len(ddata))] + + # check consistency + if not (isinstance(bin_data, list) and len(bin_data) == len(ddata)): + msg = ( + "Arg bin_data must be a list of len() == len(data)\n" + f"\t- type(bin_data) = {type(bin_data)}\n" + ) + if isinstance(bin_data, list): + msg += ( + f"\t- len(data) = {len(ddata)}\n" + f"\t- len(bin_data) = {len(bin_data)}\n" + ) + raise Exception(msg) + + # ------------- + # case sorting + + lok_ref = list(coll.dref.keys()) + lok_data = [k0 for k0, v0 in coll.ddata.items()] + + lok = lok_data + lok_ref + lc = [ + all([isinstance(bb, str) and bb in lok for bb in bin_data]), + all([isinstance(bb, np.ndarray) for bb in bin_data]), + ] + if np.sum(lc) != 1: + msg = ( + "Arg bin_data must be a list of:\n" + f"\t- np.ndarrays\n" + f"\t- keys to coll.ddata items\n" + f"Provided:\n{bin_data}\n" + f"Available:\n{sorted(lok)}" + ) + raise Exception(msg) - # --------- - # sampling + # -------------- + # check vs store - ddata = _class01_binning._check_data( - coll=coll, - data=data, - data_units=data_units, - store=True, - ) - lkdata = list(ddata.keys()) + if store is True and not lc[0]: + msg = "With store=True, all bin_data must be keys to ddata or ref" + raise Exception(msg) - # -------------------- - # bins + # case with all str + if lc[0]: - dbins0 = _class01_binning._check_bins( - coll=coll, - lkdata=lkdata, - bins=bins0, - dref_vector=dref_vector, - store=store, - ) + if dref_vector is None: + dref_vector = {} - # ---------------------- - # npts for interpolation - - dv = np.abs(np.diff(vect)) - dvmean = np.mean(dv) + np.std(dv) - db = np.mean(np.diff(dbins0[lkdata[0]]['edges'])) - npts = (coll.dobj[wbs][key_bs]['deg'] + 3) * max(1, dvmean / db) + 3 - - # sample mesh, update dv - Dx0 = [dbins0[lkdata[0]]['edges'][0], dbins0[lkdata[0]]['edges'][-1]] - xx = coll.get_sample_mesh( - keym, - res=res0 / npts, - mode='abs', - Dx0=Dx0, - )['x0']['data'] - - if xx.size == 0: - nobins = _get_nobins( - coll=coll, - key_bs=key_bs, - ddata=ddata, - dbins0=dbins0, - store=store, - store_keys=store_keys, - ) - return None, None, None, nobins + # derive dbins + for ii, k0 in enumerate(ddata.keys()): - # ------------------- - # add ref - - kr = "ntemp" - kd = "xxtemp" - - coll.add_ref(kr, size=xx.size) - coll.add_data(kd, data=xx, ref=kr, units=coll.ddata[kknots]['units']) - - ddata_new = {} - for ii, (k0, v0) in enumerate(ddata.items()): - - # interpolate bin_data - kbdn = f"kbdn{ii}_temp" - # try: - coll.interpolate( - keys=bin_data0, - ref_key=key_bs, - x0=kd, - val_out=0., - returnas=False, - store=True, - inplace=True, - store_keys=kbdn, + # if ref => identify vector + if bin_data[ii] in lok_ref: + + key_vect = coll.get_ref_vector( + ref=bin_data[ii], + **dref_vector, + )[3] + + if key_vect is None: + msg = "bin_data '{bin_data[ii]}' has no reference vector!" + raise Exception(msg) + + bin_data[ii] = key_vect + + # fill dict + dbins[k0].update({ + 'key': bin_data[ii], + 'data': coll.ddata[bin_data[ii]]['data'], + 'ref': coll.ddata[bin_data[ii]]['ref'], + 'units': coll.ddata[bin_data[ii]]['units'], + }) + + else: + for ii, k0 in enumerate(ddata.keys()): + dbins[k0].update({ + 'key': None, + 'data': bin_data[ii], + 'ref': None, + 'units': bin_units, + }) + + # ----------------------------------- + # check nb of dimensions consistency + + ldim = list(set([v0['data'].ndim for v0 in dbins.values()])) + if len(ldim) > 1: + msg = ( + "All bin_data provided must have the same nb of dimensions!\n" + f"Provided: {ldim}" ) + raise Exception(msg) + + # ------------------------- + # check dimensions vs axis + + # None => set to all bin (assuming variable_bin = False) + if axis is None: + for k0, v0 in dbins.items(): + + if ddata[k0]['ref'] is not None and v0['ref'] is not None: + seq_data = list(ddata[k0]['ref']) + seq_bin = v0['ref'] + + else: + seq_data = list(ddata[k0]['data'].shape) + seq_bin = v0['data'].shape + + # get start indices of subsequence seq_bin in sequence seq_data + laxis0 = list(_generic_utils.KnuthMorrisPratt(seq_data, seq_bin)) + if len(laxis0) != 1: + msg = ( + "Please specify axis, ambiguous results from ref / shape\n" + f"\t- data '{k0}': {seq_data}\n" + f"\t- bin '{v0['key']}': {seq_bin}\n" + f"=> laxis0 = {laxis0}\n" + ) + raise Exception(msg) + + axisi = laxis0[0] + np.arange(0, len(seq_bin)) + if axis is None: + axis = axisi + else: + assert axis == axisi + + # -------------- + # axis + # ------------------- + + axis = _generic_check._check_flat1darray( + axis, 'axis', + dtype=int, + unique=True, + can_be_None=False, + sign='>=0', + ) - # except Exception as err: - # msg = ( - # err.args[0] - # + "\n\n" - # f"\t- k0 = {k0}\n" - # f"\t- ii = {ii}\n" - # f"\t- bin_data0 = {bin_data0}\n" - # f"\t- key_bs = {key_bs}\n" - # f"\t- kd = {kd}\n" - # f"\t- xx.size: {xx.size}\n" - # f"\t- kbdn = {kbdn}\n" - # ) - # err.args = (msg,) - # raise err - - # interpolate_data - kdn = f"kbd{ii}_temp" - coll.interpolate( - keys=k0, - ref_key=key_bs, - x0=kd, - val_out=0., - returnas=False, - store=True, - inplace=True, - store_keys=kdn, + if np.any(np.diff(axis) > 1): + msg = f"axis must be adjacent indices!\n{axis}" + raise Exception(msg) + + # check + ndim_bin = ldim[0] + if ndim_bin < len(axis): + msg = ( + "bin_data seems to have insufficient number of dimensions!\n" + f"\t- axis: {axis}\n" + f"\t- ndim_bin: {ndim_bin}\n" + f"\t- bin_data: {bin_data}" ) - ddata_new[k0] = {'bin_data': kbdn, 'data': kdn} + raise Exception(msg) - return kr, kd, ddata_new, False + variable_bin = ndim_bin > len(axis) + # ------------------------------- + # check vs data shape along axis -def _get_nobins( - coll=None, - key_bs=None, - ddata=None, - dbins0=None, - store=None, - store_keys=None, + ndim_data = list(ddata.values())[0]['data'].ndim + variable_data = len(axis) < ndim_data + for k0, v0 in dbins.items(): + + shape_data = ddata[k0]['data'].shape + shape_bin = v0['data'].shape + + if variable_bin == variable_data and shape_data != v0['data'].shape: + msg = ( + "variable_bin == variable_data => shapes should be the same!\n" + f"\t- variable_data = {variable_data}\n" + f"\t- variable_bin = {variable_bin}\n" + f"\t- axis = {axis}\n" + f"\t- data '{k0}' shape = {shape_data}\n" + f"\t- bin_data '{v0['key']}' shape = {v0['data'].shape}\n" + ) + raise Exception(msg) + + else: + if variable_data: + sh_var, sh_fix = shape_data, shape_bin + else: + sh_fix, sh_var = shape_data, shape_bin + + shape_axis = [ss for ii, ss in enumerate(sh_var) if ii in axis] + if sh_fix != tuple(shape_axis): + msg = ( + f"Wrong shapes: data '{k0}' vs bin_data '{v0['key']}'!\n" + f"\t- shape_data: {shape_data}\n" + f"\t- shape_bin: {shape_bin}\n" + f"\t- axis: {axis}\n" + ) + raise Exception(msg) + + # ---------------------------------------- + # safety check on bin sizes + # ---------------------------------------- + + if len(axis) == 1: + + for k0, v0 in dbins.items(): + + if variable_bin: + raise NotImplementedError() + else: + dv = np.abs(np.diff(v0['data'])) + + dvmean = np.mean(dv) + np.std(dv) + + if strict is True: + + lim = safety_ratio * dvmean + db = np.mean(np.diff(dbins[k0]['edges'])) + if db < lim: + ss = f"{db}) are < {safety_ratio} * bin_data ({lim}" + msg = ( + f"Uncertain binning for bin_data '{v0['key']}':\n" + f"Binning steps ({ss}) step" + ) + raise Exception(msg) + + return dbins, variable_bin, axis + + +# #################################### +# #################################### +# binning +# #################################### + + +def _bin_fixed_bin( + data=None, + data_ref=None, + vect0=None, + vect1=None, + bins0=None, + bins1=None, + bin_ref0=None, + bin_ref1=None, + axis=None, + statistic=None, + # integration + variable_data=None, ): - lk = list(ddata.keys()) - wbs = coll._which_bsplines + # ---------------------------- + # select only relevant indices - if isinstance(store_keys, str): - store_keys = [store_keys] + indin = np.isfinite(vect0) + indin[indin] = (vect0[indin] >= bins0[0]) & (vect0[indin] < bins0[-1]) + if bins1 is not None: + indin[indin] = np.isfinite(vect1[indin]) + indin[indin] = (vect1[indin] >= bins1[0]) & (vect1[indin] < bins1[-1]) - dout = {} - for ii, k0 in enumerate(lk): + if not variable_data: + indin[indin] = np.isfinite(data[indin]) - axis = ddata[k0]['ref'].index(coll.dobj[wbs][key_bs]['ref'][0]) + # ------------- + # prepare shape - shape = list(ddata[k0]['data'].shape) - nb = dbins0[k0]['edges'].size - 1 - shape[axis] = nb + shape_data = data.shape + ind_other = np.arange(data.ndim) + nomit = len(axis) - 1 + ind_other_flat = np.r_[ind_other[:axis[0]], ind_other[axis[-1]+1:] - nomit] + ind_other = np.r_[ind_other[:axis[0]], ind_other[axis[-1]+1:]] - ref = list(ddata[k0]['ref']) - ref[axis] = dbins0[k0]['bin_ref'][0] + shape_other = [ss for ii, ss in enumerate(shape_data) if ii not in axis] - dout[store_keys[ii]] = { - 'data': np.zeros(shape, dtype=float), - 'ref': tuple(ref), - 'units': ddata[k0]['units'], - } + shape_val = list(shape_other) + shape_val.insert(axis[0], int(bins0.size - 1)) + if bins1 is not None: + shape_val.insert(axis[0] + 1, int(bins1.size - 1)) + val = np.zeros(shape_val, dtype=data.dtype) - if store is True: - for k0, v0 in dout.items(): - coll.add_data(key=k0, **v0) + if not np.any(indin): + return val + + # ------------- + # subset + + # vect + vect0 = vect0[indin] + if bins1 is not None: + vect1 = vect1[indin] + + # data + sli = [slice(None) for ii in shape_other] + sli.insert(axis[0], indin) + + data = data[tuple(sli)] + + # --------------- + # custom + + if statistic == 'sum_smooth': + stat = 'mean' + else: + stat = statistic + + # ------------------ + # simple case + + if variable_data is False: + + if bins1 is None: + + # compute + val[...] = scpst.binned_statistic( + vect0, + data, + bins=bins0, + statistic=stat, + )[0] + + else: + val[...] = scpst.binned_statistic_2d( + vect0, + vect1, + data, + bins=[bins0, bins1], + statistic=stat, + )[0] + + # ------------------------------------------------------- + # variable data, but axis = int and ufunc exists (faster) + + elif len(axis) == 1 and stat in _DUFUNC.keys() and bins1 is None: + + if statistic == 'sum_smooth': + msg = "statistic 'sum_smooth' not properly handled here yet" + raise NotImplementedError(msg) + + # safety check + vect0s = np.sort(vect0) + if not np.allclose(vect0s, vect0): + msg = ( + "Non-sorted vect0 for binning 1d with ufunc!\n" + f"\t- axis: {axis}\n" + f"\t- shape_data: {shape_data}\n" + f"\t- shape_other: {shape_other}\n" + f"\t- shape_val: {shape_val}\n" + f"\t- vect0.shape: {vect0.shape}\n" + f"\t- vect0: {vect0}\n" + f"\t- vect0s: {vect0s}\n" + ) + raise Exception(msg) + + # get ufunc + ufunc = _DUFUNC[stat] + + # get indices + ind0 = np.searchsorted( + bins0, + vect0, + sorter=None, + ) + ind0[ind0 == 0] = 1 + + # ind + indu = np.unique(ind0 - 1) + + # cases + if indu.size == 1: + sli[axis[0]] = indu[0] + val[tuple(sli)] = np.nansum(data, axis=axis[0]) + + else: + + sli[axis[0]] = indu + + # neutralize nans + data[np.isnan(data)] = 0. + ind = np.r_[0, np.where(np.diff(ind0))[0] + 1] + + # sum + val[tuple(sli)] = ufunc(data, ind, axis=axis[0]) + + # ----------------------------------- + # other statistic with variable data + + else: + + # indices + linds = [range(nn) for nn in shape_other] - return dout + # slice_data + sli = [0 for ii in shape_other] + sli.insert(axis[0], slice(None)) + sli = np.array(sli) + + if bins1 is None: + + for ind in itt.product(linds): + sli[ind_other_flat] = ind + + val[tuple(sli)] = scpst.binned_statistic( + vect0, + data[tuple(sli)], + bins=bins0, + statistic=stat, + )[0] + + if statistic == 'sum_smooth': + val[tuple(sli)] *= ( + np.nansum(data[tuple(sli)]) + / np.nansum(val[tuple(sli)]) + ) + + else: + + sli_val = np.copy(sli) + sli_val = np.insert(axis[0] + 1, slice(None)) + + for ind in itt.product(linds): + + sli[ind_other_flat] = ind + sli_val[ind_other_flat] = ind + + val[tuple(sli_val)] = scpst.binned_statistic_2d( + vect0, + vect1, + data[tuple(sli)], + bins=[bins0, bins1], + statistic=stat, + )[0] + + if statistic == 'sum_smooth': + val[tuple(sli_val)] *= ( + np.nansum(data[tuple(sli)]) + / np.nansum(val[tuple(sli_val)]) + ) + + # --------------- + # adjust custom + + if statistic == 'sum_smooth': + if variable_data is False: + val[...] *= np.nansum(data) / np.nansum(val) + + # ------------ + # references + + if data_ref is not None: + ref = [ + rr for ii, rr in enumerate(data_ref) + if ii not in axis + ] + + if bin_ref0 is not None: + bin_ref0 = bin_ref0[0] + if bin_ref1 is not None: + bin_ref1 = bin_ref1[0] + + ref.insert(axis[0], bin_ref0) + if bins1 is not None: + ref.insert(axis[0] + 1, bin_ref1) + + ref = tuple(ref) + else: + ref = None + + return val, ref + +# ####################################################### +# Store +# ####################################################### + + +def _store( + coll=None, + dout=None, + store_keys=None, +): + + # ---------------- + # check store_keys + + if len(dout) == 1 and isinstance(store_keys, str): + store_keys = [store_keys] + + ldef = [f"{k0}_binned" for k0 in dout.items()] + lex = list(coll.ddata.keys()) + store_keys = _generic_check._check_var_iter( + store_keys, 'store_keys', + types=list, + types_iter=str, + default=ldef, + excluded=lex, + ) + + # ------------- + # store + + for ii, (k0, v0) in enumerate(dout.items()): + coll.add_data( + key=store_keys[ii], + data=v0['data'], + ref=v0['ref'], + units=v0['units'], + ) From 92d384db07cba9dac5fbc5b43fab04f4c5644c2d Mon Sep 17 00:00:00 2001 From: dvezinet Date: Thu, 14 Nov 2024 21:18:36 +0000 Subject: [PATCH 17/25] [#159] Cleanup --- datastock/_class03_binning.py | 54 ++++++++++++++++++++++++++++------- 1 file changed, 44 insertions(+), 10 deletions(-) diff --git a/datastock/_class03_binning.py b/datastock/_class03_binning.py index c3cbf94..f42284d 100644 --- a/datastock/_class03_binning.py +++ b/datastock/_class03_binning.py @@ -99,8 +99,9 @@ def main( """ - # ---------- - # checks + # --------------------- + # checks inputs + # --------------------- # keys ( @@ -110,15 +111,18 @@ def main( verb, store, returnas, ) = _check(**locals()) - # -------------- - # actual binning + # ------------------------- + # binning with fixed edges + # ------------------------- if dvariable['bin0'] is False and dvariable['bin1'] is False: dout = {k0: {'units': v0['units']} for k0, v0 in ddata.items()} for k0, v0 in ddata.items(): + # ------------- # handle dbins1 + if dbins1 is None: bins1, vect1, bin_ref1 = None, None, None else: @@ -126,7 +130,9 @@ def main( vect1 = dbins1['data'] bin_ref1 = dbins1[k0].get('bin_ref') + # ------------ # compute + dout[k0]['data'], dout[k0]['ref'] = _bin_fixed_bin( # data to bin data=v0['data'], @@ -147,6 +153,10 @@ def main( variable_data=dvariable['data'], ) + # ------------------------- + # binning with variable edges + # ------------------------- + else: msg = ( "Variable bin vectors not implemented yet!\n" @@ -156,8 +166,9 @@ def main( ) raise NotImplementedError(msg) - # -------------- + # --------------------- # storing + # --------------------- if store is True: @@ -167,16 +178,18 @@ def main( store_keys=store_keys, ) - # ------------- + # --------------------- # return + # --------------------- if returnas is True: return dout -# #################################### -# check -# #################################### +# ################################################################ +# ################################################################ +# Check inputs +# ################################################################ def _check( @@ -398,6 +411,12 @@ def _check( ) +# ################################################################ +# ################################################################ +# Check data +# ################################################################ + + def _check_data( coll=None, data=None, @@ -427,7 +446,7 @@ def _check_data( all([ isinstance(dd, str) and dd in coll.ddata.keys() - and coll.ddata[dd]['data'].ndim == coll.ddata[data[0]]['data'].ndim + and coll.ddata[dd]['ref'] == coll.ddata[data[0]]['ref'] for dd in data ]), all([ @@ -483,6 +502,12 @@ def _check_data( return ddata +# ################################################################ +# ################################################################ +# Check bins +# ################################################################ + + def _check_bins( coll=None, lkdata=None, @@ -573,6 +598,12 @@ def _check_bins( return dbins +# ################################################################ +# ################################################################ +# Check bins data +# ################################################################ + + def _check_bins_data( coll=None, axis=None, @@ -1074,6 +1105,8 @@ def _bin_fixed_bin( return val, ref + +# ####################################################### # ####################################################### # Store # ####################################################### @@ -1093,6 +1126,7 @@ def _store( ldef = [f"{k0}_binned" for k0 in dout.items()] lex = list(coll.ddata.keys()) + store_keys = _generic_check._check_var_iter( store_keys, 'store_keys', types=list, From da7ef07be39a9a8c15c9c3d83334afe1f0f4a226 Mon Sep 17 00:00:00 2001 From: dvezinet Date: Thu, 14 Nov 2024 21:26:29 +0000 Subject: [PATCH 18/25] [#159] Revamping tests --- datastock/tests/test_01_DataStock.py | 5 +++++ datastock/tests/test_inputs.py | 21 +++++++++++++++++++++ 2 files changed, 26 insertions(+) create mode 100644 datastock/tests/test_inputs.py diff --git a/datastock/tests/test_01_DataStock.py b/datastock/tests/test_01_DataStock.py index f1f82f7..70347d1 100644 --- a/datastock/tests/test_01_DataStock.py +++ b/datastock/tests/test_01_DataStock.py @@ -15,6 +15,8 @@ # datastock-specific from .._class04_Plots import Plots as Collection from .._saveload import load +from . import test_input as _input + _PATH_HERE = os.path.dirname(__file__) @@ -402,6 +404,9 @@ def test09_binning(self): ) raise Exception(msg) + def test10_add_bins(self): + _input.add_bins(self.coll) + def test10_interpolate(self): lk = ['y', 'y', 'prof0', 'prof0', 'prof0', '3d'] diff --git a/datastock/tests/test_inputs.py b/datastock/tests/test_inputs.py new file mode 100644 index 0000000..cdabf98 --- /dev/null +++ b/datastock/tests/test_inputs.py @@ -0,0 +1,21 @@ + + +# ############################################################### +# ############################################################### +# +# ############################################################### + + +def add_bins(coll): + + # --------------- + # check if needed + + wbins = coll._which_bins + if coll.dobj.get(wbins) is not None: + return + + # --------------- + # define bins + + return From fe87c5d5a4633581aa6613854030a6c6028f8694 Mon Sep 17 00:00:00 2001 From: dvezinet Date: Thu, 14 Nov 2024 22:48:51 +0000 Subject: [PATCH 19/25] [#159] Revamping Bins to address 2d bins too + tests, TBF --- datastock/_class03_checks.py | 257 ++++++++++++------ .../tests/{test_inputs.py => test_input.py} | 6 + 2 files changed, 184 insertions(+), 79 deletions(-) rename datastock/tests/{test_inputs.py => test_input.py} (78%) diff --git a/datastock/_class03_checks.py b/datastock/_class03_checks.py index 4e38fe8..934a0eb 100644 --- a/datastock/_class03_checks.py +++ b/datastock/_class03_checks.py @@ -1,9 +1,11 @@ # -*- coding: utf-8 -*- -# Common import numpy as np -import datastock as ds + + +# Common +from . import _generic_check # ############################################################################# @@ -17,17 +19,18 @@ def check( key=None, edges=None, # custom names + key_edges=None, key_cents=None, key_ref=None, # additional attributes **kwdargs, ): - # -------- - # keys - + # ------------- # key - key = ds._generic_check._obj_key( + # ------------- + + key = _generic_check._obj_key( d0=coll._dobj.get(coll._which_bins, {}), short='b', key=key, @@ -35,80 +38,155 @@ def check( # ------------ # edges + # ------------ - edges = ds._generic_check._check_flat1darray( - edges, 'edges', - dtype=float, - unique=True, - can_be_None=False, - ) + # ----------------------- + # first conformity check + + lc = [ + _check_edges_str(edges, coll), + _check_edges_array(edges), + isinstance(edges, tuple) + and len(edges) in (1, 2) + and all([ + _check_edges_str(ee, coll) or _check_edges_array(ee) + for ee in edges + ]) + ] + + if np.sum(lc) != 1: + msg = ( + f"For Bins '{key}', arg edges must be:\n" + "\t- a str pointing to a n existing monotonous vector\n" + "\t- an array/list/tuple of unique increasing values\n" + "\t- a tuple of 1 or 2 of the above\n" + "Provided:\n\t{edges}" + ) + raise Exception(msg) + + if lc[0] or lc[1]: + edges = (edges,) - nb = edges.size - 1 - cents = 0.5*(edges[:-1] + edges[1:]) + # ---------------------------- + # make tuple of 1d flat arrays - # -------------------- - # safety check on keys + edges_new = [] + for ii, ee in enumerate(edges): + if isinstance(ee, str): + edges_new.append(ee) + else: + edges_new.append(_generic_check._check_flat1darray( + ee, f'edges[{ii}]', + dtype=float, + unique=True, + can_be_None=False, + )) + edges = edges_new + nd = f"{len(edges)}d" + + # ----------------- + # kwdargs + # ----------------- + + for k0, v0 in kwdargs.items(): + if isinstance(v0, str) or v0 is None: + if nd == 1: + kwdargs[k0] = (v0,) + else: + kwdargs[k0] = (v0, v0) + + c0 = ( + isinstance(kwdargs[k0], tuple) + and len(kwdargs[k0]) == nd + and all([isinstance(vv, str) or vv is None for vv in kwdargs[k0]]) + ) + if not c0: + msg = ( + f"Bins '{key}', arg kwdargs must be dict of data attributes\n" + "Where each attribute is provided as a tuple of " + "len() = len(edges)\n" + f"Provided:\n\t{kwdargs}" + ) + raise Exception(msg) + + # ----------------- + # other keys + # ----------------- + + # ----------------- # key_ref - defk = f"{key}_nb" - lout = [k0 for k0, v0 in coll.dref.items() if v0['size'] != nb] - key_ref = ds._generic_check._check_var( - key_ref, 'key_ref', - types=str, - default=defk, - excluded=lout, - ) - # key_cents - defk = f"{key}_c" - lout = [ - k0 for k0, v0 in coll.ddata.items() - if not ( - v0['shape'] == (nb,) - and key_ref in coll.dref.keys() - and v0['ref'] == (key_ref,) - and v0['monot'] == (True,) + dref = {} + ddata = {} + cents = [None for ii in edges] + for ii, ee in enumerate(edges): + + edges[ii], cents[ii] = _to_dict( + coll=coll, + key=key, + ii=ii, + edge=ee, + # custom names + key_cents=key_cents, + key_ref=key_ref, + # dict + dref=dref, + ddata=ddata, + # attributes + **{kk: vv[ii] for kk, vv in kwdargs.items()}, ) - ] - key_cents = ds._generic_check._check_var( - key_cents, 'key_cents', - types=str, - default=defk, - excluded=lout, - ) # -------------- - # to dict + # dobj + # -------------- - dref, ddata, dobj = _to_dict( - coll=coll, - key=key, - edges=edges, - nb=nb, - cents=cents, - # custom names - key_cents=key_cents, - key_ref=key_ref, - # attributes - **kwdargs, - ) + # dobj + dobj = { + coll._which_bins: { + key: { + 'nd': '1d', + 'edges': tuple(edges), + 'cents': (key_cents,), + 'ref': (key_ref,), + # 'shape': (nb,), + }, + }, + } return key, dref, ddata, dobj +def _check_edges_str(edges, coll): + return ( + isinstance(edges, str) + and edges in coll.ddata.keys() + and coll.ddata[edges]['monot'] == (True,) + ) + + +def _check_edges_array(edges): + return ( + ) + + # ############################################################## # ############################################################### # to_dict # ############################################################### +# TBF def _to_dict( coll=None, key=None, - edges=None, - nb=None, - cents=None, + ii=None, + ee=None, + # dict + dref=None, + ddata=None, # custom names + key_edge=None, key_cents=None, key_ref=None, # additional attributes @@ -122,21 +200,60 @@ def _to_dict( # ------------- # prepare dict + # ref + if isinstance(ee, str): + pass + else: + defk = f"{key}_ne{ii}" + lout = [k0 for k0, v0 in coll.dref.items()] + key_ref = _generic_check._check_var( + key_ref[ii], defk, + types=str, + default=defk, + excluded=lout, + ) + dref[key_ref] = {'size': ee.size} + + # + defk = f"{key}_e{ii}" + key_edge = _generic_check._check_var( + key_edge, defk, + types=str, + default=defk, + excluded=lout, + ) + ddata[key_edge] = { + 'data': ee, + 'ref': key_ref, + **kwdargs, + } + + defk = f"{key}_nc{ii}" + lout = [k0 for k0, v0 in coll.dref.items()] + key_ref = _generic_check._check_var( + key_ref, defk, + types=str, + default=defk, + excluded=lout, + ) + dref[key_ref] = {'size': ee.size - 1} + # dref if key_ref not in coll.dref.keys(): dref = { key_ref: { - 'size': nb, + 'size': ee.size, }, } else: dref = None # ddata + key_cent = None if key_cents not in coll.ddata.keys(): ddata = { key_cents: { - 'data': cents, + # 'data': cents, 'units': units, # 'source': None, 'dim': dim, @@ -148,25 +265,7 @@ def _to_dict( else: ddata = None - # dobj - dobj = { - coll._which_bins: { - key: { - 'nd': '1d', - 'edges': edges, - 'cents': (key_cents,), - 'ref': (key_ref,), - 'shape': (nb,), - }, - }, - } - - # additional attributes - for k0, v0 in kwdargs.items(): - if k0 not in latt: - dobj[coll._which_bins][key][k0] = v0 - - return dref, ddata, dobj + return key_edge, key_cent # ############################################################## @@ -187,7 +286,7 @@ def remove_bins(coll=None, key=None, propagate=None): if isinstance(key, str): key = [key] - key = ds._generic_check._check_var_iter( + key = _generic_check._check_var_iter( key, 'key', types=(list, tuple), types_iter=str, @@ -195,7 +294,7 @@ def remove_bins(coll=None, key=None, propagate=None): ) # propagate - propagate = ds._generic_check._check_var( + propagate = _generic_check._check_var( propagate, 'propagate', types=bool, default=True, diff --git a/datastock/tests/test_inputs.py b/datastock/tests/test_input.py similarity index 78% rename from datastock/tests/test_inputs.py rename to datastock/tests/test_input.py index cdabf98..83c8381 100644 --- a/datastock/tests/test_inputs.py +++ b/datastock/tests/test_input.py @@ -1,5 +1,8 @@ +import numpy as np + + # ############################################################### # ############################################################### # @@ -18,4 +21,7 @@ def add_bins(coll): # --------------- # define bins + # linear uniform + coll.add_bins('bin0', edges=np.linspace(0, 1, 10), units='m') + return From 7b971a7ac5bddc02631d7f0f5397db676c8f60d4 Mon Sep 17 00:00:00 2001 From: dvezinet Date: Fri, 15 Nov 2024 22:00:41 +0000 Subject: [PATCH 20/25] [#159] add_bins() operational --- datastock/_class03_Bins.py | 32 +++- datastock/_class03_checks.py | 282 +++++++++++++++++++++++++++-------- 2 files changed, 243 insertions(+), 71 deletions(-) diff --git a/datastock/_class03_Bins.py b/datastock/_class03_Bins.py index 79d37b0..cc5d04c 100644 --- a/datastock/_class03_Bins.py +++ b/datastock/_class03_Bins.py @@ -29,9 +29,11 @@ class Bins(Previous): _dshow.update({ _which_bins: [ 'nd', - 'cents', - 'shape', - 'ref', + 'shape_edges', + 'edges', + 'ref_edges', + 'is_linear', + 'is_log', ], }) @@ -44,13 +46,25 @@ def add_bins( key=None, edges=None, # custom names - key_ref=None, + key_edges=None, key_cents=None, - key_res=None, - # attributes + key_ref_edges=None, + key_ref_cents=None, + # additional attributes **kwdargs, ): - """ Add bin """ + """ Add bin + + Defined from edges, which can be: + - np.ndarray or tuple of 2 + - key to existing monotnous array or tuple of 2 + + key names are generated automatically + But can also be specified: + - for creation + - or for refering to existing data + + """ # -------------- # check inputs @@ -60,8 +74,10 @@ def add_bins( key=key, edges=edges, # custom names + key_edges=key_edges, key_cents=key_cents, - key_ref=key_ref, + key_ref_edges=key_ref_edges, + key_ref_cents=key_ref_cents, # attributes **kwdargs, ) diff --git a/datastock/_class03_checks.py b/datastock/_class03_checks.py index 934a0eb..4f5df57 100644 --- a/datastock/_class03_checks.py +++ b/datastock/_class03_checks.py @@ -21,7 +21,8 @@ def check( # custom names key_edges=None, key_cents=None, - key_ref=None, + key_ref_edges=None, + key_ref_cents=None, # additional attributes **kwdargs, ): @@ -70,17 +71,35 @@ def check( # ---------------------------- # make tuple of 1d flat arrays - edges_new = [] + edges_new = [None for ee in edges] for ii, ee in enumerate(edges): if isinstance(ee, str): - edges_new.append(ee) + edges_new[ii] = ee else: - edges_new.append(_generic_check._check_flat1darray( + edges_new[ii] = _generic_check._check_flat1darray( ee, f'edges[{ii}]', dtype=float, unique=True, can_be_None=False, - )) + ) + + # --------------------- + # safety check for NaNs + + for ii, ee in enumerate(edges_new): + if isinstance(ee, str): + ee = coll.ddata[ee]['data'] + + isnan = np.any(np.isnan(ee)) + if isnan: + msg = ( + f"Bins '{key}', provided edges have NaNs!\n" + f"\t- edges[{ii}]: {ee}" + ) + raise Exception(msg) + + # -------------- + # wrap up edges = edges_new nd = f"{len(edges)}d" @@ -91,21 +110,21 @@ def check( for k0, v0 in kwdargs.items(): if isinstance(v0, str) or v0 is None: - if nd == 1: + if nd == '1d': kwdargs[k0] = (v0,) else: kwdargs[k0] = (v0, v0) c0 = ( isinstance(kwdargs[k0], tuple) - and len(kwdargs[k0]) == nd + and len(kwdargs[k0]) == len(edges) and all([isinstance(vv, str) or vv is None for vv in kwdargs[k0]]) ) if not c0: msg = ( f"Bins '{key}', arg kwdargs must be dict of data attributes\n" "Where each attribute is provided as a tuple of " - "len() = len(edges)\n" + f"len() = len(edges) = ({len(edges)})\n" f"Provided:\n\t{kwdargs}" ) raise Exception(msg) @@ -114,22 +133,41 @@ def check( # other keys # ----------------- + key_edges = _check_keys_ref(key_edges, edges, key, 'key_edges') + key_cents = _check_keys_ref(key_cents, edges, key, 'key_cents') + key_ref_edges = _check_keys_ref(key_ref_edges, edges, key, 'key_ref_edges') + key_ref_cents = _check_keys_ref(key_ref_cents, edges, key, 'key_ref_cents') + + # ----------------- + # edges, cents + # ----------------- + # ----------------- # key_ref dref = {} ddata = {} - cents = [None for ii in edges] + shape_edges = [None for ee in edges] + is_linear = [None for ee in edges] + is_log = [None for ee in edges] + units = [None for ee in edges] for ii, ee in enumerate(edges): - - edges[ii], cents[ii] = _to_dict( + ( + key_edges[ii], key_cents[ii], + key_ref_edges[ii], key_ref_cents[ii], + shape_edges[ii], + is_linear[ii], is_log[ii], + units[ii], + ) = _to_dict( coll=coll, key=key, ii=ii, - edge=ee, + ee=ee, # custom names - key_cents=key_cents, - key_ref=key_ref, + key_edge=key_edges[ii], + key_cent=key_cents[ii], + key_ref_edge=key_ref_edges[ii], + key_ref_cent=key_ref_cents[ii], # dict dref=dref, ddata=ddata, @@ -137,6 +175,11 @@ def check( **{kk: vv[ii] for kk, vv in kwdargs.items()}, ) + # ------------- + # ref and shape + + shape_cents = tuple([ss - 1 for ss in shape_edges]) + # -------------- # dobj # -------------- @@ -145,11 +188,16 @@ def check( dobj = { coll._which_bins: { key: { - 'nd': '1d', - 'edges': tuple(edges), - 'cents': (key_cents,), - 'ref': (key_ref,), - # 'shape': (nb,), + 'nd': nd, + 'edges': tuple(key_edges), + 'cents': tuple(key_cents), + 'ref_edges': tuple(key_ref_edges), + 'ref_cents': tuple(key_ref_cents), + 'shape_edges': tuple(shape_edges), + 'shape_cents': tuple(shape_cents), + 'units': tuple(units), + 'is_linear': tuple(is_linear), + 'is_log': tuple(is_log), }, }, } @@ -167,55 +215,109 @@ def _check_edges_str(edges, coll): def _check_edges_array(edges): return ( + isinstance(edges, (list, tuple, np.ndarray)) + and np.array(edges).ndim == 1 + and np.array(edges).size > 1 ) +def _check_keys_ref(keys, edges, key, keys_name): + if keys is None: + keys = [None for ee in edges] + elif isinstance(keys, str): + keys = [keys for ee in edges] + elif isinstance(keys, (list, tuple)): + c0 = ( + len(keys) == len(edges) + and all([isinstance(ss, str) or ss is None for ss in keys]) + ) + if not c0: + msg = ( + f"Bins '{key}', arg '{keys_name}' should be either:\n" + "\t- None (automatically set)\n" + "\t- str to existing key\n" + "\t- tuple of the above of len() = {len(edges)}\n" + "Provided:\n\t{keys}" + ) + raise Exception(msg) + return keys + + # ############################################################## # ############################################################### # to_dict # ############################################################### -# TBF def _to_dict( coll=None, key=None, ii=None, ee=None, + # custom names + key_edge=None, + key_cent=None, + key_ref_edge=None, + key_ref_cent=None, # dict dref=None, ddata=None, - # custom names - key_edge=None, - key_cents=None, - key_ref=None, # additional attributes **kwdargs, ): + """ check key_edge, key_cents, key_ref_edge, key_ref_cent + + If new, append to dref and ddata + """ + # ------------- # attributes + # ------------- + latt = ['dim', 'quant', 'name', 'units'] dim, quant, name, units = [kwdargs.get(ss) for ss in latt] # ------------- - # prepare dict + # edges + # ------------- # ref if isinstance(ee, str): - pass + key_edge = ee + ee = coll.ddata[key_edge]['data'] + units = coll.ddata[key_edge]['units'] + else: + + # ------------------ + # key_ref_edge + defk = f"{key}_ne{ii}" lout = [k0 for k0, v0 in coll.dref.items()] - key_ref = _generic_check._check_var( - key_ref[ii], defk, + key_ref_edge = _generic_check._check_var( + key_ref_edge, defk, types=str, default=defk, - excluded=lout, ) - dref[key_ref] = {'size': ee.size} + if key_ref_edge in lout: + size = coll.dref[key_ref_edge]['size'] + c0 = size == ee.size + if not c0: + msg = ( + f"Bins '{key}', arg key_ref_edges[{ii}]" + " conflicts with existing ref:\n" + f"\t- coll.dref['{key_ref_edge}']['size'] = {size}" + f"\t- edges['{ii}'].size = {ee.size}\n" + ) + raise Exception(msg) + else: + dref[key_ref_edge] = {'size': ee.size} + + # --------------- + # key_edge - # defk = f"{key}_e{ii}" + lout = [k0 for k0, v0 in coll.ddata.items()] key_edge = _generic_check._check_var( key_edge, defk, types=str, @@ -224,48 +326,96 @@ def _to_dict( ) ddata[key_edge] = { 'data': ee, - 'ref': key_ref, + 'ref': key_ref_edge, **kwdargs, } + units = kwdargs.get('units') + + # shape + shape_edge = ee.size + + # ------------------ + # is_linear, is_log + # ------------------ + + is_log = ( + np.all(ee > 0.) + and np.allclose(ee[1:] / ee[:-1], ee[1]/ee[0], atol=0, rtol=1e-6) + ) + + is_linear = np.allclose(np.diff(ee), ee[1] - ee[0], atol=0, rtol=1e-6) + assert not (is_log and is_linear), ee + + # ------------ + # cents + # ------------ + + # ------------ + # key_ref_cent + defk = f"{key}_nc{ii}" lout = [k0 for k0, v0 in coll.dref.items()] - key_ref = _generic_check._check_var( - key_ref, defk, + key_ref_cent = _generic_check._check_var( + key_ref_cent, defk, types=str, default=defk, - excluded=lout, ) - dref[key_ref] = {'size': ee.size - 1} + if key_ref_cent in lout: + size = coll.dref[key_ref_cent]['size'] + c0 = size == (ee.size - 1) + if not c0: + msg = ( + f"Bins '{key}', arg key_ref_cents[{ii}]" + " conflicts with existing ref:\n" + f"\t- coll.dref['{key_ref_edge}']['size'] = {size}" + f"\t- edges['{ii}'].size - 1 = {ee.size-1}\n" + ) + raise Exception(msg) + else: + dref[key_ref_cent] = {'size': ee.size - 1} + + # ------------ + # key_cent + + defk = f"{key}_c{ii}" + lout = [k0 for k0, v0 in coll.ddata.items()] + key_cent = _generic_check._check_var( + key_cent, defk, + types=str, + default=defk, + ) + if key_cent in lout: + ref = coll.ddata[key_cent]['ref'] + c0 = ref == (key_ref_cent,) + if not c0: + msg = ( + f"Bins '{key}', arg key_ref_cents[{ii}]" + " conflicts with existing ref:\n" + f"\t- coll.ddata['{key_ref_cent}']['ref'] = {ref}" + f"\t- key_ref_cent = {key_ref_cent}\n" + ) + raise Exception(msg) - # dref - if key_ref not in coll.dref.keys(): - dref = { - key_ref: { - 'size': ee.size, - }, - } else: - dref = None - - # ddata - key_cent = None - if key_cents not in coll.ddata.keys(): - ddata = { - key_cents: { - # 'data': cents, - 'units': units, - # 'source': None, - 'dim': dim, - 'quant': quant, - 'name': name, - 'ref': key_ref, - }, + if is_log: + cents = np.sqrt(ee[:-1] * ee[1:]) + else: + cents = 0.5 * (ee[1:] + ee[:-1]) + + ddata[key_cent] = { + 'data': cents, + 'ref': (key_ref_cent,), + **kwdargs, } - else: - ddata = None - return key_edge, key_cent + return ( + key_edge, key_cent, + key_ref_edge, key_ref_cent, + shape_edge, + is_linear, is_log, + units, + ) # ############################################################## @@ -306,11 +456,17 @@ def remove_bins(coll=None, key=None, propagate=None): for k0 in key: # specific data - kdata = list(coll.dobj[wbins][k0]['cents']) + kdata = ( + coll.dobj[wbins][k0]['cents'] + + coll.dobj[wbins][k0]['edges'] + ) coll.remove_data(kdata, propagate=propagate) # specific ref - lref = list(coll.dobj[wbins][k0]['ref']) + lref = ( + coll.dobj[wbins][k0]['ref_cents'] + + coll.dobj[wbins][k0]['ref_edges'] + ) for rr in lref: if rr in coll.dref.keys(): coll.remove_ref(rr, propagate=propagate) From 896f29a93106a36c9c010a371a71cc8cbd0d3a60 Mon Sep 17 00:00:00 2001 From: dvezinet Date: Fri, 15 Nov 2024 22:06:43 +0000 Subject: [PATCH 21/25] [#159] updating unit tests for add_bins() --- datastock/tests/test_01_DataStock.py | 1 - datastock/tests/test_input.py | 26 +++++++++++++++++++++++--- 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/datastock/tests/test_01_DataStock.py b/datastock/tests/test_01_DataStock.py index 70347d1..91c906e 100644 --- a/datastock/tests/test_01_DataStock.py +++ b/datastock/tests/test_01_DataStock.py @@ -18,7 +18,6 @@ from . import test_input as _input - _PATH_HERE = os.path.dirname(__file__) _PATH_OUTPUT = os.path.join(_PATH_HERE, 'output') diff --git a/datastock/tests/test_input.py b/datastock/tests/test_input.py index 83c8381..0da4467 100644 --- a/datastock/tests/test_input.py +++ b/datastock/tests/test_input.py @@ -18,10 +18,30 @@ def add_bins(coll): if coll.dobj.get(wbins) is not None: return - # --------------- - # define bins + # ------------------------- + # define bins from scratch + # ------------------------- + + # linear uniform 1d + coll.add_bins('bin0', edges=np.linspace(0, 1, 10), units='m') + + # log uniform 1d + coll.add_bins(edges=np.logspace(0, 1, 10), units='eV') - # linear uniform + # non-uniform 1d + coll.add_bins(edges=np.r_[1, 2, 5, 10, 12, 20], units='s') + + # linear uniform 2d coll.add_bins('bin0', edges=np.linspace(0, 1, 10), units='m') + # log uniform 2d + coll.add_bins(edges=np.logspace(0, 1, 10), units='eV') + + # non-uniform 2d + coll.add_bins(edges=np.r_[1, 2, 5, 10, 12, 20], units='s') + + # ------------------------- + # define bins pre-existing + # ------------------------- + return From 6151bd1ae0cf80fea9bc719bd999325b6c9be2b9 Mon Sep 17 00:00:00 2001 From: dvezinet Date: Fri, 15 Nov 2024 22:40:23 +0000 Subject: [PATCH 22/25] [#159] updating unit tests for add_bins() --- datastock/tests/test_01_DataStock.py | 61 +++------------------- datastock/tests/test_input.py | 78 ++++++++++++++++++++++++---- 2 files changed, 74 insertions(+), 65 deletions(-) diff --git a/datastock/tests/test_01_DataStock.py b/datastock/tests/test_01_DataStock.py index 91c906e..079b48b 100644 --- a/datastock/tests/test_01_DataStock.py +++ b/datastock/tests/test_01_DataStock.py @@ -350,63 +350,14 @@ def test08_domain_ref(self): lk = list(domain.keys()) assert all([isinstance(dout[k0]['ind'], np.ndarray) for k0 in lk]) - def test09_binning(self): - - bins = np.linspace(1, 5, 8) - lk = [ - ('y', 'nx', bins, 0, False, False, 'y_bin0'), - ('y', 'nx', bins, 0, True, False, 'y_bin1'), - ('y', 'nx', 'x', 0, False, True, 'y_bin2'), - ('y', 'nx', 'x', 0, True, True, 'y_bin3'), - ('prof0', 'x', 'nt0', 1, False, True, 'p0_bin0'), - ('prof0', 'x', 'nt0', 1, True, True, 'p0_bin1'), - ('prof0-bis', 'prof0', 'x', [0, 1], False, True, 'p1_bin0'), - ] - - for ii, (k0, kr, kb, ax, integ, store, kbin) in enumerate(lk): - dout = self.st.binning( - data=k0, - bin_data0=kr, - bins0=kb, - axis=ax, - integrate=integ, - store=store, - store_keys=kbin, - safety_ratio=0.95, - returnas=True, - ) - - if np.isscalar(ax): - ax = [ax] - - if isinstance(kb, str): - if kb in self.st.ddata: - nb = self.st.ddata[kb]['data'].size - else: - nb = self.st.dref[kb]['size'] - else: - nb = bins.size - - k0 = list(dout.keys())[0] - shape = [ - ss for ii, ss in enumerate(self.st.ddata[k0]['data'].shape) - if ii not in ax - ] - - shape.insert(ax[0], nb) - if dout[k0]['data'].shape != tuple(shape): - shstr = dout[k0]['data'].shape - msg = ( - "Mismatching shapes for case {ii}!\n" - f"\t- dout['{k0}']['data'].shape = {shstr}\n" - f"\t- expected: {tuple(shape)}" - ) - raise Exception(msg) - - def test10_add_bins(self): + def test09_add_bins(self): _input.add_bins(self.coll) - def test10_interpolate(self): + def test10_binning(self): + # _input.binning(self.coll) + pass + + def test11_interpolate(self): lk = ['y', 'y', 'prof0', 'prof0', 'prof0', '3d'] lref = [None, 'nx', 't0', ['nt0', 'nx'], ['t0', 'x'], ['t0', 'x']] diff --git a/datastock/tests/test_input.py b/datastock/tests/test_input.py index 0da4467..3a2a4b8 100644 --- a/datastock/tests/test_input.py +++ b/datastock/tests/test_input.py @@ -23,25 +23,83 @@ def add_bins(coll): # ------------------------- # linear uniform 1d - coll.add_bins('bin0', edges=np.linspace(0, 1, 10), units='m') + coll.add_bins('b1d_lin', edges=np.linspace(0, 1, 10), units='m') # log uniform 1d - coll.add_bins(edges=np.logspace(0, 1, 10), units='eV') + coll.add_bins('b1d_log', edges=np.logspace(0, 1, 10), units='eV') # non-uniform 1d - coll.add_bins(edges=np.r_[1, 2, 5, 10, 12, 20], units='s') + coll.add_bins('b2d_rand', edges=np.r_[1, 2, 5, 10, 12, 20], units='s') # linear uniform 2d - coll.add_bins('bin0', edges=np.linspace(0, 1, 10), units='m') - - # log uniform 2d - coll.add_bins(edges=np.logspace(0, 1, 10), units='eV') - - # non-uniform 2d - coll.add_bins(edges=np.r_[1, 2, 5, 10, 12, 20], units='s') + coll.add_bins( + 'b2d_lin', + edges=(np.linspace(0, 1, 10), np.linspace(0, 3, 20)), + units='m', + ) + + # log uniform mix 2d + coll.add_bins( + 'b2d_mix', + edges=(np.logspace(0, 1, 10), np.pi*np.r_[0, 0.5, 1, 1.2, 1.5, 2]), + units=('eV', 'rad'), + ) # ------------------------- # define bins pre-existing # ------------------------- return + + +def binning(coll): + bins = np.linspace(1, 5, 8) + lk = [ + ('y', 'nx', bins, 0, False, False, 'y_bin0'), + ('y', 'nx', bins, 0, True, False, 'y_bin1'), + ('y', 'nx', 'x', 0, False, True, 'y_bin2'), + ('y', 'nx', 'x', 0, True, True, 'y_bin3'), + ('prof0', 'x', 'nt0', 1, False, True, 'p0_bin0'), + ('prof0', 'x', 'nt0', 1, True, True, 'p0_bin1'), + ('prof0-bis', 'prof0', 'x', [0, 1], False, True, 'p1_bin0'), + ] + + for ii, (k0, kr, kb, ax, integ, store, kbin) in enumerate(lk): + dout = coll.binning( + data=k0, + bin_data0=kr, + bins0=kb, + axis=ax, + integrate=integ, + store=store, + store_keys=kbin, + safety_ratio=0.95, + returnas=True, + ) + + if np.isscalar(ax): + ax = [ax] + + if isinstance(kb, str): + if kb in coll.ddata: + nb = coll.ddata[kb]['data'].size + else: + nb = coll.dref[kb]['size'] + else: + nb = bins.size + + k0 = list(dout.keys())[0] + shape = [ + ss for ii, ss in enumerate(coll.ddata[k0]['data'].shape) + if ii not in ax + ] + + shape.insert(ax[0], nb) + if dout[k0]['data'].shape != tuple(shape): + shstr = dout[k0]['data'].shape + msg = ( + "Mismatching shapes for case {ii}!\n" + f"\t- dout['{k0}']['data'].shape = {shstr}\n" + f"\t- expected: {tuple(shape)}" + ) + raise Exception(msg) From a5d50dd752ed246c1016c834f19a66846e502a04 Mon Sep 17 00:00:00 2001 From: dvezinet Date: Fri, 15 Nov 2024 22:49:51 +0000 Subject: [PATCH 23/25] [#159] updating unit tests for add_bins(), TBF --- datastock/_class03_checks.py | 1 + datastock/tests/test_01_DataStock.py | 102 +++++++++++++-------------- 2 files changed, 52 insertions(+), 51 deletions(-) diff --git a/datastock/_class03_checks.py b/datastock/_class03_checks.py index 4f5df57..9f9289c 100644 --- a/datastock/_class03_checks.py +++ b/datastock/_class03_checks.py @@ -216,6 +216,7 @@ def _check_edges_str(edges, coll): def _check_edges_array(edges): return ( isinstance(edges, (list, tuple, np.ndarray)) + and all([np.isscalar(ee) for ee in edges]) and np.array(edges).ndim == 1 and np.array(edges).size > 1 ) diff --git a/datastock/tests/test_01_DataStock.py b/datastock/tests/test_01_DataStock.py index 079b48b..38e09e2 100644 --- a/datastock/tests/test_01_DataStock.py +++ b/datastock/tests/test_01_DataStock.py @@ -213,7 +213,7 @@ class Test01_Instanciate(): @classmethod def setup_class(cls): - cls.st = Collection() + cls.coll = Collection() cls.nc = 5 cls.nx = 80 cls.lnt = [100, 90, 80, 120, 80] @@ -223,13 +223,13 @@ def setup_class(cls): # ------------------------ def test01_add_ref(self): - _add_ref(st=self.st, nc=self.nc, nx=self.nx, lnt=self.lnt) + _add_ref(st=self.coll, nc=self.nc, nx=self.nx, lnt=self.lnt) def test02_add_data(self): - _add_data(st=self.st, nc=self.nc, nx=self.nx, lnt=self.lnt) + _add_data(st=self.coll, nc=self.nc, nx=self.nx, lnt=self.lnt) def test03_add_obj(self): - _add_obj(st=self.st, nc=self.nc) + _add_obj(st=self.coll, nc=self.nc) ####################################################### @@ -243,14 +243,14 @@ class Test02_Manipulate(): @classmethod def setup_class(cls): - cls.st = Collection() + cls.coll = Collection() cls.nc = 5 cls.nx = 80 cls.lnt = [100, 90, 80, 120, 80] - _add_ref(st=cls.st, nc=cls.nc, nx=cls.nx, lnt=cls.lnt) - _add_data(st=cls.st, nc=cls.nc, nx=cls.nx, lnt=cls.lnt) - _add_obj(st=cls.st, nc=cls.nc) + _add_ref(st=cls.coll, nc=cls.nc, nx=cls.nx, lnt=cls.lnt) + _add_data(st=cls.coll, nc=cls.nc, nx=cls.nx, lnt=cls.lnt) + _add_obj(st=cls.coll, nc=cls.nc) # ------------------------ # Add / remove @@ -258,17 +258,17 @@ def setup_class(cls): def test01_add_param(self): # create new 'campaign' parameter for data arrays - self.st.add_param('campaign', which='data') + self.coll.add_param('campaign', which='data') # tag each data with its campaign for ii in range(self.nc): - self.st.set_param( + self.coll.set_param( which='data', key=f't{ii}', param='campaign', value=f'c{ii}', ) - self.st.set_param( + self.coll.set_param( which='data', key=f'prof{ii}', param='campaign', @@ -276,38 +276,38 @@ def test01_add_param(self): ) def test02_remove_param(self): - self.st.add_param('blabla', which='campaign') - self.st.remove_param('blabla', which='campaign') + self.coll.add_param('blabla', which='campaign') + self.coll.remove_param('blabla', which='campaign') # ------------------------ # Selection / sorting # ------------------------ def test03_select(self): - key = self.st.select(which='data', units='s', returnas=str) + key = self.coll.select(which='data', units='s', returnas=str) assert key.tolist() == ['t0', 't1', 't2', 't3', 't4'] - out = self.st.select(dim='time', returnas=int) + out = self.coll.select(dim='time', returnas=int) assert len(out) == 5, out # test quantitative param selection - out = self.st.select(which='campaign', index=[2, 4]) + out = self.coll.select(which='campaign', index=[2, 4]) assert len(out) == 3 - out = self.st.select(which='campaign', index=(2, 4)) + out = self.coll.select(which='campaign', index=(2, 4)) assert len(out) == 2 def test04_sortby(self): - self.st.sortby(which='data', param='units') + self.coll.sortby(which='data', param='units') # ------------------------ # show # ------------------------ def test05_show(self): - self.st.show() - self.st.show_data() - self.st.show_obj() + self.coll.show() + self.coll.show_data() + self.coll.show_obj() # ------------------------ # Interpolate @@ -318,7 +318,7 @@ def test06_get_ref_vector(self): hasref, hasvector, ref, key_vector, values, dind, - ) = self.st.get_ref_vector( + ) = self.coll.get_ref_vector( key='prof0', ref='nx', values=[1, 2, 2.01, 3], @@ -329,7 +329,7 @@ def test06_get_ref_vector(self): assert dind['indr'].shape == (2, 4) def test07_get_ref_vector_common(self): - hasref, ref, key, val, dout = self.st.get_ref_vector_common( + hasref, ref, key, val, dout = self.coll.get_ref_vector_common( keys=['t0', 'prof0', 'prof1', 't3'], dim='time', ) @@ -342,10 +342,10 @@ def test08_domain_ref(self): 'y': [[0, 0.9], (0.1, 0.2)], 't0': {'domain': [2, 3]}, 't1': {'domain': [[2, 3], (2.5, 3), [4, 6]]}, - 't2': {'ind': self.st.ddata['t2']['data'] > 5}, + 't2': {'ind': self.coll.ddata['t2']['data'] > 5}, } - dout = self.st.get_domain_ref(domain=domain) + dout = self.coll.get_domain_ref(domain=domain) lk = list(domain.keys()) assert all([isinstance(dout[k0]['ind'], np.ndarray) for k0 in lk]) @@ -374,9 +374,9 @@ def test11_interpolate(self): zipall = zip(lk, lref, lax, llog, lgrid, lx0, lx1, ldom) for ii, (kk, rr, aa, lg, gg, x0, x1, dom) in enumerate(zipall): - _ = self.st.get_domain_ref(domain=dom) + _ = self.coll.get_domain_ref(domain=dom) - dout = self.st.interpolate( + dout = self.coll.interpolate( keys=kk, ref_key=rr, x0=x0, @@ -391,7 +391,7 @@ def test11_interpolate(self): assert isinstance(dout, dict) assert isinstance(dout[kk]['data'], np.ndarray) - shape = list(self.st.ddata[kk]['data'].shape) + shape = list(self.coll.ddata[kk]['data'].shape) x0s = np.array(x0).shape if gg is False else (len(x0), len(x1)) if dom is None: shape = tuple(np.r_[shape[:aa[0]], x0s, shape[aa[-1]+1:]]) @@ -409,10 +409,10 @@ def test11_interpolate_common_refs(self): llog = [False, True, False] # add data for common ref interpolation - nt0 = self.st.dref['nt0']['size'] - nt1 = self.st.dref['nt1']['size'] - nc = self.st.dref['nc']['size'] - self.st.add_data( + nt0 = self.coll.dref['nt0']['size'] + nt1 = self.coll.dref['nt1']['size'] + nc = self.coll.dref['nc']['size'] + self.coll.add_data( key='data_com', data=1. + np.random.random((nc, nt1, nt0))*2, ref=('nc', 'nt1', 'nt0'), @@ -429,7 +429,7 @@ def test11_interpolate_common_refs(self): zipall = zip(lk, lref, lax, llog, lx1, lrefc, ls, lr) for ii, (kk, rr, aa, lg, x1, refc, ss, ri) in enumerate(zipall): - dout, dparams = self.st.interpolate( + dout, dparams = self.coll.interpolate( keys=kk, ref_key=rr, x0='data_com', @@ -457,8 +457,8 @@ def test11_interpolate_common_refs(self): f"\t- x1: {x1}\n" f"\t- ref_com: {refc}\n" f"\t- log_log: {lg}\n" - f"\t- key['ref']: {self.st.ddata[kk]['ref']}\n" - f"\t- x0['ref']: {self.st.ddata['data_com']['ref']}\n" + f"\t- key['ref']: {self.coll.ddata[kk]['ref']}\n" + f"\t- x0['ref']: {self.coll.ddata['data_com']['ref']}\n" "\n" # + "\n".join(lstr) "\n" @@ -477,17 +477,17 @@ def test11_interpolate_common_refs(self): # ------------------------ def test12_plot_as_array_1d(self): - dax = self.st.plot_as_array(key='t0') + dax = self.coll.plot_as_array(key='t0') plt.close('all') del dax def test13_plot_as_array_2d(self): - dax = self.st.plot_as_array(key='prof0') + dax = self.coll.plot_as_array(key='prof0') plt.close('all') del dax def test14_plot_as_array_2d_log(self): - dax = self.st.plot_as_array( + dax = self.coll.plot_as_array( key='pec', keyX='ne', keyY='Te', dscale={'data': 'log'}, ) @@ -495,29 +495,29 @@ def test14_plot_as_array_2d_log(self): del dax def test15_plot_as_array_3d(self): - dax = self.st.plot_as_array(key='3d', dvminmax={'keyX': {'min': 0}}) + dax = self.coll.plot_as_array(key='3d', dvminmax={'keyX': {'min': 0}}) plt.close('all') del dax def test16_plot_as_array_3d_ZNonMonot(self): - dax = self.st.plot_as_array(key='3d', keyZ='y') + dax = self.coll.plot_as_array(key='3d', keyZ='y') plt.close('all') del dax def test17_plot_as_array_4d(self): - dax = self.st.plot_as_array(key='4d', dscale={'keyU': 'linear'}) + dax = self.coll.plot_as_array(key='4d', dscale={'keyU': 'linear'}) plt.close('all') del dax # def test18_plot_BvsA_as_distribution(self): - # dax = self.st.plot_BvsA_as_distribution( + # dax = self.coll.plot_BvsA_as_distribution( # keyA='prof0', keyB='prof0-bis', # ) # plt.close('all') # del dax def test19_plot_as_profile1d(self): - dax = self.st.plot_as_profile1d( + dax = self.coll.plot_as_profile1d( key='prof0', key_time='t0', keyX='prof0-bis', @@ -529,7 +529,7 @@ def test19_plot_as_profile1d(self): # def test20_plot_as_mobile_lines(self): # # 3d - # dax = self.st.plot_as_mobile_lines( + # dax = self.coll.plot_as_mobile_lines( # keyX='3d', # keyY='3d-bis', # key_time='t0', @@ -537,7 +537,7 @@ def test19_plot_as_profile1d(self): # ) # # 2d - # dax = self.st.plot_as_mobile_lines( + # dax = self.coll.plot_as_mobile_lines( # keyX='prof2', # keyY='prof2-bis', # key_chan='nx', @@ -551,21 +551,21 @@ def test19_plot_as_profile1d(self): # ------------------------ def test21_copy_equal(self): - st2 = self.st.copy() - assert st2 is not self.st + st2 = self.coll.copy() + assert st2 is not self.coll - msg = st2.__eq__(self.st, returnas=str) + msg = st2.__eq__(self.coll, returnas=str) if msg is not True: raise Exception(msg) def test22_get_nbytes(self): - nb, dnb = self.st.get_nbytes() + nb, dnb = self.coll.get_nbytes() def test23_saveload(self, verb=False): - pfe = self.st.save(path=_PATH_OUTPUT, verb=verb, return_pfe=True) + pfe = self.coll.save(path=_PATH_OUTPUT, verb=verb, return_pfe=True) st2 = load(pfe, verb=verb) # Just to check the loaded version works fine - msg = st2.__eq__(self.st, returnas=str) + msg = st2.__eq__(self.coll, returnas=str) if msg is not True: raise Exception(msg) os.remove(pfe) From 2d69b1f43ea87176d14f9cd02fbc7c2d33e07389 Mon Sep 17 00:00:00 2001 From: dvezinet Date: Fri, 15 Nov 2024 22:54:03 +0000 Subject: [PATCH 24/25] [#159] updating unit tests for add_bins(), TBF --- datastock/tests/test_01_DataStock.py | 45 +------------------- datastock/tests/test_input.py | 61 +++++++++++++++++++++++++++- 2 files changed, 62 insertions(+), 44 deletions(-) diff --git a/datastock/tests/test_01_DataStock.py b/datastock/tests/test_01_DataStock.py index 38e09e2..943b20e 100644 --- a/datastock/tests/test_01_DataStock.py +++ b/datastock/tests/test_01_DataStock.py @@ -358,50 +358,9 @@ def test10_binning(self): pass def test11_interpolate(self): + _input.interpolate(self.coll) - lk = ['y', 'y', 'prof0', 'prof0', 'prof0', '3d'] - lref = [None, 'nx', 't0', ['nt0', 'nx'], ['t0', 'x'], ['t0', 'x']] - lax = [[0], [0], [0], [0, 1], [0, 1], [1, 2]] - lgrid = [False, False, False, False, True, False] - llog = [False, False, False, True, False, False] - - x2d = np.array([[1.5, 2.5], [1, 2]]) - x3d = np.random.random((5, 4, 3)) - lx0 = [x2d, [1.5, 2.5], [1.5, 2.5], x2d, [1.5, 2.5], x3d] - lx1 = [None, None, None, x2d, [1.2, 2.3], x3d] - ldom = [None, None, {'nx': [1.5, 2]}, None, None, None] - - zipall = zip(lk, lref, lax, llog, lgrid, lx0, lx1, ldom) - for ii, (kk, rr, aa, lg, gg, x0, x1, dom) in enumerate(zipall): - - _ = self.coll.get_domain_ref(domain=dom) - - dout = self.coll.interpolate( - keys=kk, - ref_key=rr, - x0=x0, - x1=x1, - grid=gg, - deg=2, - deriv=None, - log_log=lg, - return_params=False, - domain=dom, - ) - - assert isinstance(dout, dict) - assert isinstance(dout[kk]['data'], np.ndarray) - shape = list(self.coll.ddata[kk]['data'].shape) - x0s = np.array(x0).shape if gg is False else (len(x0), len(x1)) - if dom is None: - shape = tuple(np.r_[shape[:aa[0]], x0s, shape[aa[-1]+1:]]) - else: - shape = tuple(np.r_[x0s, 39]) if ii == 2 else None - if dout[kk]['data'].shape != tuple(shape): - msg = str(dout[kk]['data'].shape, shape, kk, rr) - raise Exception(msg) - - def test11_interpolate_common_refs(self): + def test12_interpolate_common_refs(self): lk = ['3d', '3d', '3d'] lref = ['t0', ['nt0', 'nx'], ['nx']] lrefc = ['nc', 'nc', 'nt0'] diff --git a/datastock/tests/test_input.py b/datastock/tests/test_input.py index 3a2a4b8..0d30e05 100644 --- a/datastock/tests/test_input.py +++ b/datastock/tests/test_input.py @@ -5,7 +5,7 @@ # ############################################################### # ############################################################### -# +# Add bins # ############################################################### @@ -52,6 +52,12 @@ def add_bins(coll): return +# ############################################################### +# ############################################################### +# Binning +# ############################################################### + + def binning(coll): bins = np.linspace(1, 5, 8) lk = [ @@ -103,3 +109,56 @@ def binning(coll): f"\t- expected: {tuple(shape)}" ) raise Exception(msg) + + +# ############################################################### +# ############################################################### +# Interpolate +# ############################################################### + + +def interpolate(coll): + + lk = ['y', 'y', 'prof0', 'prof0', 'prof0', '3d'] + lref = [None, 'nx', 't0', ['nt0', 'nx'], ['t0', 'x'], ['t0', 'x']] + lax = [[0], [0], [0], [0, 1], [0, 1], [1, 2]] + lgrid = [False, False, False, False, True, False] + llog = [False, False, False, True, False, False] + + x2d = np.array([[1.5, 2.5], [1, 2]]) + x3d = np.random.random((5, 4, 3)) + lx0 = [x2d, [1.5, 2.5], [1.5, 2.5], x2d, [1.5, 2.5], x3d] + lx1 = [None, None, None, x2d, [1.2, 2.3], x3d] + ldom = [None, None, {'nx': [1.5, 2]}, None, None, None] + + zipall = zip(lk, lref, lax, llog, lgrid, lx0, lx1, ldom) + for ii, (kk, rr, aa, lg, gg, x0, x1, dom) in enumerate(zipall): + + _ = coll.get_domain_ref(domain=dom) + + dout = coll.interpolate( + keys=kk, + ref_key=rr, + x0=x0, + x1=x1, + grid=gg, + deg=2, + deriv=None, + log_log=lg, + return_params=False, + domain=dom, + ) + + assert isinstance(dout, dict) + assert isinstance(dout[kk]['data'], np.ndarray) + shape = list(coll.ddata[kk]['data'].shape) + x0s = np.array(x0).shape if gg is False else (len(x0), len(x1)) + if dom is None: + shape = tuple(np.r_[shape[:aa[0]], x0s, shape[aa[-1]+1:]]) + else: + shape = tuple(np.r_[x0s, 39]) if ii == 2 else None + if dout[kk]['data'].shape != tuple(shape): + msg = str(dout[kk]['data'].shape, shape, kk, rr) + raise Exception(msg) + + return From f2c17e8c4f040be4f88f73a6f02a79b7858b8808 Mon Sep 17 00:00:00 2001 From: dvezinet Date: Fri, 7 Mar 2025 23:38:19 +0000 Subject: [PATCH 25/25] [#159] progressing on Bins, TBF --- datastock/_class03_Bins.py | 20 ++-- datastock/_class03_binning.py | 180 +++++++++++++++++++++++++++------- datastock/tests/test_input.py | 16 +++ 3 files changed, 168 insertions(+), 48 deletions(-) diff --git a/datastock/_class03_Bins.py b/datastock/_class03_Bins.py index cc5d04c..5aa3efd 100644 --- a/datastock/_class03_Bins.py +++ b/datastock/_class03_Bins.py @@ -105,15 +105,13 @@ def remove_bins( def binning( self, + # data to be binned data=None, - data_units=None, - axis=None, - # binning - bins0=None, - bins1=None, bin_data0=None, bin_data1=None, - bin_units0=None, + axis=None, + # bins + bins=None, # kind of binning integrate=None, statistic=None, @@ -177,15 +175,13 @@ def binning( return _bin_vs_bs.main( coll=self, + # data to be binned data=data, - data_units=data_units, - axis=axis, - # binning - bins0=bins0, - bins1=bins1, bin_data0=bin_data0, bin_data1=bin_data1, - bin_units0=bin_units0, + axis=axis, + # bins + bins=bins, # kind of binning integrate=integrate, statistic=statistic, diff --git a/datastock/_class03_binning.py b/datastock/_class03_binning.py index f42284d..e37911f 100644 --- a/datastock/_class03_binning.py +++ b/datastock/_class03_binning.py @@ -198,11 +198,9 @@ def _check( data_units=None, axis=None, # binning - bins0=None, - bins1=None, + bins=None, bin_data0=None, bin_data1=None, - bin_units0=None, # kind of binning integrate=None, statistic=None, @@ -229,18 +227,45 @@ def _check( default=True, ) + # store + store = _generic_check._check_var( + store, 'store', + types=bool, + default=False, + ) + + # ----------- + # bins + # ------------ + + wb = coll._which_bins + lok_bins = list(coll.dobj.get(wb, {}).keys()) + bins = _generic_check._check_var( + bins, 'bins', + types=str, + allowed=lok_bins, + ) + # ------------------ - # data: str vs array + # data to be binned # ------------------- - ddata = _check_data( + ( + data, bin_data0, bin_data1, nd_bins, units0, units1, + axis, dvariable, + ) = _check_data( coll=coll, data=data, - data_units=data_units, - store=store, + bin_data0=bin_data0, + bin_data1=bin_data1, + bins=bins, + axis=axis, ) - ndim_data = list(ddata.values())[0]['data'].ndim + if data is None: + ndim_data = None + else: + ndim_data = coll.ddata[data]['data'].ndim # ----------------- # check statistic @@ -260,26 +285,6 @@ def _check( # bins # ------------ - dbins0 = _check_bins( - coll=coll, - lkdata=list(ddata.keys()), - bins=bins0, - dref_vector=dref_vector, - store=store, - ) - if bins1 is not None: - dbins1 = _check_bins( - coll=coll, - lkdata=list(ddata.keys()), - bins=bins1, - dref_vector=dref_vector, - store=store, - ) - - # ----------- - # bins - # ------------ - # dbins0 dbins0, variable_bin0, axis = _check_bins_data( coll=coll, @@ -418,19 +423,121 @@ def _check( def _check_data( + coll=None, + data=None, + bin_data0=None, + bin_data1=None, + bins=None, + axis=None, +): + + # --------------- + # get bin features + # --------------- + + wbins = coll._which_bins + nd_bins = int(coll.dobj[wbins][bins]['nd'][0]) + units0 = coll.ddata[coll.dobj[wbins][bins]['edges'][0]] + if nd_bins == 2: + units1 = coll.ddata[coll.dobj[wbins][bins]['edges'][1]] + else: + units1 = None + + # --------------- + # bin_data0 + # --------------- + + lok = list(coll.ddata.keys()) + bin_data0 = _generic_check._check_var( + bin_data0, 'bin_data0', + types=str, + allowed=lok, + ) + + # check units + _check_units( + bin_data=bin_data0, + bin_data_name='bin_data0', + ii=0, + units_bins=units0, + ) + + bin_data_ref = coll.ddata[bin_data0]['ref'] + + # --------------- + # bin_data1 + # --------------- + + if nd_bin == 2: + lok = [ + k0 for k0, v0 in coll.ddata.items() + if v0['ref'] == bin_data_ref + ] + bin_data1 = _generic_check._check_var( + bin_data0, 'bin_data1', + types=str, + allowed=lok, + ) + + # check units + _check_units( + bin_data=bin_data1, + bin_data_name='bin_data1', + ii=1, + units_bins=units1, + ) + + else: + bin_data1 = None + + # --------------- + # data + # --------------- + + if data is not None: + + lok = [ + k0 for k0, v0 in coll.ddata.items() + if tuple([rr for rr in v0['ref'] if rr in bin_data_ref]) == bin_data_ref + ] + data = _generic_check._check_var( + data, 'data', + types=str, + allowed=lok, + ) + + return data, bin_data0, bin_data1, nd_bins, units0, units1 + + +def _check_units(bin_data=None, bin_data_name=None, ii=None, units_bins=None): + units = coll.ddata[bin_data]['units'] + c0 = units is not None and units == units1 + if not c0: + msg = ( + "Binning oddity:\n" + "\t- detected: unmatching 'units' between bins and bin_data\n" + f"\t- Bins: '{bins}' (edges[{ii}])\n" + f"\t- Bins units: '{units_bins}'\n" + f"\t- {bin_data_name}: '{bin_data}'\n" + f"\t- {bin_data_name} units: '{units}'\n" + ) + warnings.warn(msg) + return + + +# DEPRECATED +def _check_data_old( coll=None, data=None, data_units=None, store=None, ): - # ----------- - # store - store = _generic_check._check_var( - store, 'store', - types=bool, - default=False, - ) + # --------------- + # trivial + + if data is None: + return None, store # --------------------- # make sure it's a list @@ -499,7 +606,7 @@ def _check_data( for ii in range(len(data)) } - return ddata + return ddata, store # ################################################################ @@ -508,6 +615,7 @@ def _check_data( # ################################################################ +# DEPRECATED def _check_bins( coll=None, lkdata=None, diff --git a/datastock/tests/test_input.py b/datastock/tests/test_input.py index 0d30e05..035aab1 100644 --- a/datastock/tests/test_input.py +++ b/datastock/tests/test_input.py @@ -13,6 +13,7 @@ def add_bins(coll): # --------------- # check if needed + # --------------- wbins = coll._which_bins if coll.dobj.get(wbins) is not None: @@ -59,6 +60,19 @@ def add_bins(coll): def binning(coll): + + # --------------- + # check if needed + # --------------- + + wbins = coll._which_bins + if coll.dobj.get(wbins) is None: + add_bins(coll) + + # ------------------- + # Binning + # ------------------- + bins = np.linspace(1, 5, 8) lk = [ ('y', 'nx', bins, 0, False, False, 'y_bin0'), @@ -110,6 +124,8 @@ def binning(coll): ) raise Exception(msg) + return + # ############################################################### # ###############################################################