diff --git a/docs/crps_estimators.md b/docs/crps_estimators.md index 26e7b1a..c7e768e 100644 --- a/docs/crps_estimators.md +++ b/docs/crps_estimators.md @@ -215,6 +215,26 @@ $$ Some examples are given below. +More generally, for any positive definite kernel $k$, we have that + +$$ \mathbb{E} S_{k}(F_{M}, y) = \mathbb{E} S_{k}(F, y) + \frac{1}{2M} \left( \mathbb{E} k(x_{1}, x_{1}) - \mathbb{E} k(x_{1}, x_{2}) \right). $$ + +Using this, a fair version of the kernel score is + +$$ + S_{k}^{f}(F_{M}, y) = \frac{1}{M(M - 1)} \sum_{i=1}^{M-1} \sum_{j=i+1}^{M} k(x_{i}, x_{j}) + \frac{1}{2} k(y, y) - \frac{1}{M} \sum_{i=1}^{M} k(x_{i}, y). +$$ + +The first term on the right-hand side is simply the mean of $k(x_i, x_j)$ over all $i,j = 1, \dots, M$ such that $i \neq j$. + +For kernels defined in terms of conditionally negative definite functions, as described above, we have that + +$$ +\mathbb{E} k(x_{1}, x_{1}) - \mathbb{E} k(x_{1}, x_{2}) = 2 \mathbb{E} \rho(x_1, x_{0}) - \mathbb{E} \left[ \rho(x_1, x_0) + \rho(x_2, x_{0}) - \rho(x_1, x_2) \right] = \mathbb{E} \rho(x_1, x_2), +$$ + +showing that we recover the previous results in this case. + #### CRPS diff --git a/scoringrules/_crps.py b/scoringrules/_crps.py index 135ffe5..8cb74b5 100644 --- a/scoringrules/_crps.py +++ b/scoringrules/_crps.py @@ -14,7 +14,7 @@ def crps_ensemble( m_axis: int = -1, *, sorted_ensemble: bool = False, - estimator: str = "pwm", + estimator: str = "qd", backend: "Backend" = None, ) -> "Array": r"""Estimate the Continuous Ranked Probability Score (CRPS) for a finite ensemble. @@ -130,7 +130,7 @@ def twcrps_ensemble( m_axis: int = -1, *, v_func: tp.Callable[["ArrayLike"], "ArrayLike"] = None, - estimator: str = "pwm", + estimator: str = "qd", sorted_ensemble: bool = False, backend: "Backend" = None, ) -> "Array": @@ -231,7 +231,6 @@ def owcrps_ensemble( m_axis: int = -1, *, w_func: tp.Callable[["ArrayLike"], "ArrayLike"] = None, - estimator: tp.Literal["nrg"] = "nrg", backend: "Backend" = None, ) -> "Array": r"""Estimate the outcome-weighted CRPS (owCRPS) for a finite ensemble. @@ -308,11 +307,6 @@ def owcrps_ensemble( B = backends.active if backend is None else backends[backend] obs, fct = map(B.asarray, (obs, fct)) - if estimator != "nrg": - raise ValueError( - "Only the energy form of the estimator is available " - "for the outcome-weighted CRPS." - ) if m_axis != -1: fct = B.moveaxis(fct, m_axis, -1) @@ -325,9 +319,7 @@ def w_func(x): obs_weights, fct_weights = map(B.asarray, (obs_weights, fct_weights)) if backend == "numba": - return crps.estimator_gufuncs["ow" + estimator]( - obs, fct, obs_weights, fct_weights - ) + return crps.estimator_gufuncs["ownrg"](obs, fct, obs_weights, fct_weights) return crps.ow_ensemble(obs, fct, obs_weights, fct_weights, backend=backend) @@ -341,7 +333,6 @@ def vrcrps_ensemble( m_axis: int = -1, *, w_func: tp.Callable[["ArrayLike"], "ArrayLike"] = None, - estimator: tp.Literal["nrg"] = "nrg", backend: "Backend" = None, ) -> "Array": r"""Estimate the vertically re-scaled CRPS (vrCRPS) for a finite ensemble. @@ -415,11 +406,6 @@ def vrcrps_ensemble( B = backends.active if backend is None else backends[backend] obs, fct = map(B.asarray, (obs, fct)) - if estimator != "nrg": - raise ValueError( - "Only the energy form of the estimator is available " - "for the outcome-weighted CRPS." - ) if m_axis != -1: fct = B.moveaxis(fct, m_axis, -1) @@ -432,9 +418,7 @@ def w_func(x): obs_weights, fct_weights = map(B.asarray, (obs_weights, fct_weights)) if backend == "numba": - return crps.estimator_gufuncs["vr" + estimator]( - obs, fct, obs_weights, fct_weights - ) + return crps.estimator_gufuncs["vrnrg"](obs, fct, obs_weights, fct_weights) return crps.vr_ensemble(obs, fct, obs_weights, fct_weights, backend=backend) diff --git a/scoringrules/_energy.py b/scoringrules/_energy.py index b4fd990..263deec 100644 --- a/scoringrules/_energy.py +++ b/scoringrules/_energy.py @@ -15,6 +15,7 @@ def es_ensemble( m_axis: int = -2, v_axis: int = -1, *, + estimator: str = "nrg", backend: "Backend" = None, ) -> "Array": r"""Compute the Energy Score for a finite multivariate ensemble. @@ -39,6 +40,8 @@ def es_ensemble( v_axis : int The axis corresponding to the variables dimension on the forecasts array (or the observations array with an extra dimension on `m_axis`). Defaults to -1. + estimator : str + The energy score estimator to be used. backend : str The name of the backend used for computations. Defaults to 'numba' if available, else 'numpy'. @@ -63,9 +66,14 @@ def es_ensemble( obs, fct = multivariate_array_check(obs, fct, m_axis, v_axis, backend=backend) if backend == "numba": - return energy._energy_score_gufunc(obs, fct) + if estimator not in energy.estimator_gufuncs: + raise ValueError( + f"{estimator} is not a valid estimator. " + f"Must be one of {energy.estimator_gufuncs.keys()}" + ) + return energy.estimator_gufuncs[estimator](obs, fct) - return energy.nrg(obs, fct, backend=backend) + return energy.es(obs, fct, estimator=estimator, backend=backend) def twes_ensemble( @@ -76,6 +84,7 @@ def twes_ensemble( m_axis: int = -2, v_axis: int = -1, *, + estimator: str = "nrg", backend: "Backend" = None, ) -> "Array": r"""Compute the Threshold-Weighted Energy Score (twES) for a finite multivariate ensemble. @@ -105,6 +114,8 @@ def twes_ensemble( The axis corresponding to the ensemble dimension. Defaults to -2. v_axis : int or tuple of int The axis corresponding to the variables dimension. Defaults to -1. + estimator : str + The energy score estimator to be used. backend : str The name of the backend used for computations. Defaults to 'numba' if available, else 'numpy'. @@ -114,7 +125,9 @@ def twes_ensemble( The computed Threshold-Weighted Energy Score. """ obs, fct = map(v_func, (obs, fct)) - return es_ensemble(obs, fct, m_axis=m_axis, v_axis=v_axis, backend=backend) + return es_ensemble( + obs, fct, m_axis=m_axis, v_axis=v_axis, estimator=estimator, backend=backend + ) def owes_ensemble( @@ -173,9 +186,9 @@ def owes_ensemble( obs_weights = B.apply_along_axis(w_func, obs, -1) if B.name == "numba": - return energy._owenergy_score_gufunc(obs, fct, obs_weights, fct_weights) + return energy.estimator_gufuncs["ownrg"](obs, fct, obs_weights, fct_weights) - return energy.ownrg(obs, fct, obs_weights, fct_weights, backend=backend) + return energy.owes(obs, fct, obs_weights, fct_weights, backend=backend) def vres_ensemble( @@ -235,6 +248,6 @@ def vres_ensemble( obs_weights = B.apply_along_axis(w_func, obs, -1) if backend == "numba": - return energy._vrenergy_score_gufunc(obs, fct, obs_weights, fct_weights) + return energy.estimator_gufuncs["vrnrg"](obs, fct, obs_weights, fct_weights) - return energy.vrnrg(obs, fct, obs_weights, fct_weights, backend=backend) + return energy.vres(obs, fct, obs_weights, fct_weights, backend=backend) diff --git a/scoringrules/_kernels.py b/scoringrules/_kernels.py index 3ea14c2..c78e923 100644 --- a/scoringrules/_kernels.py +++ b/scoringrules/_kernels.py @@ -60,24 +60,17 @@ def gksuv_ensemble( B = backends.active if backend is None else backends[backend] obs, fct = map(B.asarray, (obs, fct)) + if m_axis != -1: + fct = B.moveaxis(fct, m_axis, -1) + if backend == "numba": if estimator not in kernels.estimator_gufuncs: raise ValueError( f"{estimator} is not a valid estimator. " f"Must be one of {kernels.estimator_gufuncs.keys()}" ) - else: - if estimator not in ["fair", "nrg"]: - raise ValueError( - f"{estimator} is not a valid estimator. " - f"Must be one of ['fair', 'nrg']" - ) - - if m_axis != -1: - fct = B.moveaxis(fct, m_axis, -1) - - if backend == "numba": - return kernels.estimator_gufuncs[estimator](obs, fct) + else: + return kernels.estimator_gufuncs[estimator](obs, fct) return kernels.ensemble_uv(obs, fct, estimator, backend=backend) @@ -386,14 +379,14 @@ def gksmv_ensemble( backend = backend if backend is not None else backends._active obs, fct = multivariate_array_check(obs, fct, m_axis, v_axis, backend=backend) - if estimator not in kernels.estimator_gufuncs_mv: - raise ValueError( - f"{estimator} is not a valid estimator. " - f"Must be one of {kernels.estimator_gufuncs_mv.keys()}" - ) - if backend == "numba": - return kernels.estimator_gufuncs_mv[estimator](obs, fct) + if estimator not in kernels.estimator_gufuncs_mv: + raise ValueError( + f"{estimator} is not a valid estimator. " + f"Must be one of {kernels.estimator_gufuncs_mv.keys()}" + ) + else: + return kernels.estimator_gufuncs_mv[estimator](obs, fct) return kernels.ensemble_mv(obs, fct, estimator, backend=backend) diff --git a/scoringrules/_variogram.py b/scoringrules/_variogram.py index b60db8e..fb2b118 100644 --- a/scoringrules/_variogram.py +++ b/scoringrules/_variogram.py @@ -15,7 +15,8 @@ def vs_ensemble( m_axis: int = -2, v_axis: int = -1, *, - p: float = 1.0, + p: float = 0.5, + estimator: str = "nrg", backend: "Backend" = None, ) -> "Array": r"""Compute the Variogram Score for a finite multivariate ensemble. @@ -42,6 +43,8 @@ def vs_ensemble( The axis corresponding to the ensemble dimension. Defaults to -2. v_axis : int The axis corresponding to the variables dimension. Defaults to -1. + estimator : str + The variogram score estimator to be used. backend: str The name of the backend used for computations. Defaults to 'numba' if available, else 'numpy'. @@ -69,9 +72,12 @@ def vs_ensemble( obs, fct = multivariate_array_check(obs, fct, m_axis, v_axis, backend=backend) if backend == "numba": - return variogram._variogram_score_gufunc(obs, fct, p) + if estimator == "nrg": + return variogram._variogram_score_nrg_gufunc(obs, fct, p) + elif estimator == "fair": + return variogram._variogram_score_fair_gufunc(obs, fct, p) - return variogram.vs(obs, fct, p, backend=backend) + return variogram.vs(obs, fct, p, estimator=estimator, backend=backend) def twvs_ensemble( @@ -82,7 +88,8 @@ def twvs_ensemble( m_axis: int = -2, v_axis: int = -1, *, - p: float = 1.0, + p: float = 0.5, + estimator: str = "nrg", backend: "Backend" = None, ) -> "Array": r"""Compute the Threshold-Weighted Variogram Score (twVS) for a finite multivariate ensemble. @@ -111,6 +118,8 @@ def twvs_ensemble( The axis corresponding to the ensemble dimension. Defaults to -2. v_axis : int The axis corresponding to the variables dimension. Defaults to -1. + estimator : str + The variogram score estimator to be used. backend : str The name of the backend used for computations. Defaults to 'numba' if available, else 'numpy'. @@ -137,7 +146,9 @@ def twvs_ensemble( array([5.94996894, 4.72029765, 6.08947229]) """ obs, fct = map(v_func, (obs, fct)) - return vs_ensemble(obs, fct, m_axis, v_axis, p=p, backend=backend) + return vs_ensemble( + obs, fct, m_axis, v_axis, p=p, estimator=estimator, backend=backend + ) def owvs_ensemble( @@ -148,7 +159,7 @@ def owvs_ensemble( m_axis: int = -2, v_axis: int = -1, *, - p: float = 1.0, + p: float = 0.5, backend: "Backend" = None, ) -> "Array": r"""Compute the Outcome-Weighted Variogram Score (owVS) for a finite multivariate ensemble. @@ -223,7 +234,7 @@ def vrvs_ensemble( m_axis: int = -2, v_axis: int = -1, *, - p: float = 1.0, + p: float = 0.5, backend: "Backend" = None, ) -> "Array": r"""Compute the Vertically Re-scaled Variogram Score (vrVS) for a finite multivariate ensemble. diff --git a/scoringrules/backend/base.py b/scoringrules/backend/base.py index 132dede..5e0e710 100644 --- a/scoringrules/backend/base.py +++ b/scoringrules/backend/base.py @@ -299,6 +299,10 @@ def size(self, x: "Array") -> int: def indices(self, x: "Array") -> int: """Return an array representing the indices of a grid.""" + @abc.abstractmethod + def roll(self, x: "Array", shift: int = 1, axis: int = -1) -> int: + """Roll elements of an array along a given axis.""" + @abc.abstractmethod def inv(self, x: "Array") -> "Array": """Return the inverse of a matrix.""" diff --git a/scoringrules/backend/jax.py b/scoringrules/backend/jax.py index 4d52429..279781a 100644 --- a/scoringrules/backend/jax.py +++ b/scoringrules/backend/jax.py @@ -271,6 +271,9 @@ def size(self, x: "Array") -> int: def indices(self, dimensions: tuple) -> "Array": return jnp.indices(dimensions) + def roll(self, x: "Array", shift: int = 1, axis: int = -1) -> "Array": + return jnp.roll(x, shift=shift, axis=axis) + def inv(self, x: "Array") -> "Array": return jnp.linalg.inv(x) diff --git a/scoringrules/backend/numpy.py b/scoringrules/backend/numpy.py index 577cd2d..03f78d5 100644 --- a/scoringrules/backend/numpy.py +++ b/scoringrules/backend/numpy.py @@ -267,6 +267,9 @@ def size(self, x: "NDArray") -> int: def indices(self, dimensions: tuple) -> "NDArray": return np.indices(dimensions) + def roll(self, x: "NDArray", shift: int = 1, axis: int = -1) -> "NDArray": + return np.roll(x, shift=shift, axis=axis) + def inv(self, x: "NDArray") -> "NDArray": return np.linalg.inv(x) diff --git a/scoringrules/backend/tensorflow.py b/scoringrules/backend/tensorflow.py index c017933..70060c3 100644 --- a/scoringrules/backend/tensorflow.py +++ b/scoringrules/backend/tensorflow.py @@ -306,6 +306,9 @@ def indices(self, dimensions: tuple) -> "Tensor": indices = tf.stack(index_grids) return indices + def roll(self, x: "Tensor", shift: int = 1, axis: int = -1) -> "Tensor": + return tf.roll(x, shift=shift, axis=axis) + def inv(self, x: "Tensor") -> "Tensor": return tf.linalg.inv(x) diff --git a/scoringrules/backend/torch.py b/scoringrules/backend/torch.py index ef10c09..1ef9831 100644 --- a/scoringrules/backend/torch.py +++ b/scoringrules/backend/torch.py @@ -289,6 +289,9 @@ def indices(self, dimensions: tuple) -> "Tensor": indices = torch.stack(index_grids) return indices + def roll(self, x: "Tensor", shift: int = 1, axis: int = -1) -> "Tensor": + return torch.roll(x, shifts=shift, dims=axis) + def inv(self, x: "Tensor") -> "Tensor": return torch.linalg.inv(x) diff --git a/scoringrules/core/crps/_approx.py b/scoringrules/core/crps/_approx.py index 8a40abd..3f282a2 100644 --- a/scoringrules/core/crps/_approx.py +++ b/scoringrules/core/crps/_approx.py @@ -19,12 +19,18 @@ def ensemble( out = _crps_ensemble_pwm(obs, fct, backend=backend) elif estimator == "fair": out = _crps_ensemble_fair(obs, fct, backend=backend) + elif estimator == "qd": + out = _crps_ensemble_qd(obs, fct, backend=backend) + elif estimator == "akr": + out = _crps_ensemble_akr(obs, fct, backend=backend) + elif estimator == "akr_circperm": + out = _crps_ensemble_akr_circperm(obs, fct, backend=backend) + elif estimator == "int": + out = _crps_ensemble_int(obs, fct, backend=backend) else: raise ValueError( - f"{estimator} can only be used with `numpy` " - "backend and needs `numba` to be installed" + f"{estimator} not a valid estimator, must be one of 'nrg', 'fair', 'pwm', 'qd', 'akr', 'akr_circperm' and 'int'." ) - return out @@ -65,6 +71,58 @@ def _crps_ensemble_pwm( return expected_diff + β_0 - 2.0 * β_1 +def _crps_ensemble_akr( + obs: "Array", fct: "Array", backend: "Backend" = None +) -> "Array": + """CRPS estimator based on the approximate kernel representation.""" + B = backends.active if backend is None else backends[backend] + M: int = fct.shape[-1] + e_1 = B.sum(B.abs(obs[..., None] - fct), axis=-1) / M + e_2 = B.sum(B.abs(fct - B.roll(fct, shift=1, axis=-1)), -1) / M + return e_1 - 0.5 * e_2 + + +def _crps_ensemble_akr_circperm( + obs: "Array", fct: "Array", backend: "Backend" = None +) -> "Array": + """CRPS estimator based on the AKR with cyclic permutation.""" + B = backends.active if backend is None else backends[backend] + M: int = fct.shape[-1] + e_1 = B.sum(B.abs(obs[..., None] - fct), axis=-1) / M + shift = M // 2 + e_2 = B.sum(B.abs(fct - B.roll(fct, shift=shift, axis=-1)), -1) / M + return e_1 - 0.5 * e_2 + + +def _crps_ensemble_qd(obs: "Array", fct: "Array", backend: "Backend" = None) -> "Array": + """CRPS estimator based on the quantile decomposition form.""" + B = backends.active if backend is None else backends[backend] + M: int = fct.shape[-1] + alpha = B.arange(1, M + 1) - 0.5 + below = (fct <= obs[..., None]) * alpha * (obs[..., None] - fct) + above = (fct > obs[..., None]) * (M - alpha) * (fct - obs[..., None]) + out = B.sum(below + above, axis=-1) / (M**2) + return 2 * out + + +def _crps_ensemble_int( + obs: "Array", fct: "Array", backend: "Backend" = None +) -> "Array": + """CRPS estimator based on the integral representation.""" + B = backends.active if backend is None else backends[backend] + M: int = fct.shape[-1] + y_pos = B.mean((fct <= obs[..., None]) * 1.0, axis=-1, keepdims=True) + fct_cdf = B.zeros(fct.shape) + B.arange(1, M + 1) / M + fct_cdf = B.concat((fct_cdf, y_pos), axis=-1) + fct_cdf = B.sort(fct_cdf, axis=-1) + fct_exp = B.concat((fct, obs[..., None]), axis=-1) + fct_exp = B.sort(fct_exp, axis=-1) + fct_dif = fct_exp[..., 1:] - fct_exp[..., :M] + obs_cdf = (obs[..., None] <= fct_exp) * 1.0 + out = fct_dif * (fct_cdf[..., :M] - obs_cdf[..., :M]) ** 2 + return B.sum(out, axis=-1) + + def quantile_pinball( obs: "Array", fct: "Array", alpha: "Array", backend: "Backend" = None ) -> "Array": diff --git a/scoringrules/core/energy/__init__.py b/scoringrules/core/energy/__init__.py index b9bc987..6144c79 100644 --- a/scoringrules/core/energy/__init__.py +++ b/scoringrules/core/energy/__init__.py @@ -1,23 +1,15 @@ +from ._score import es_ensemble as es +from ._score import owes_ensemble as owes +from ._score import vres_ensemble as vres + try: - from ._gufuncs import ( - _energy_score_gufunc, - _owenergy_score_gufunc, - _vrenergy_score_gufunc, - ) + from ._gufuncs import estimator_gufuncs except ImportError: - _energy_score_gufunc = None - _owenergy_score_gufunc = None - _vrenergy_score_gufunc = None - -from ._score import es_ensemble as nrg -from ._score import owes_ensemble as ownrg -from ._score import vres_ensemble as vrnrg + estimator_gufuncs = None __all__ = [ - "nrg", - "ownrg", - "vrnrg", - "_energy_score_gufunc", - "_owenergy_score_gufunc", - "_vrenergy_score_gufunc", + "es", + "owes", + "vres", + "estimator_gufuncs", ] diff --git a/scoringrules/core/energy/_gufuncs.py b/scoringrules/core/energy/_gufuncs.py index 13aae9c..bd38fba 100644 --- a/scoringrules/core/energy/_gufuncs.py +++ b/scoringrules/core/energy/_gufuncs.py @@ -12,7 +12,7 @@ "(d),(m,d)->()", cache=True, ) -def _energy_score_gufunc( +def _energy_score_nrg_gufunc( obs: np.ndarray, fct: np.ndarray, out: np.ndarray, @@ -30,6 +30,65 @@ def _energy_score_gufunc( out[0] = e_1 / M - 0.5 / (M**2) * e_2 +@guvectorize( + [ + "void(float32[:], float32[:,:], float32[:])", + "void(float64[:], float64[:,:], float64[:])", + ], + "(d),(m,d)->()", + cache=True, +) +def _energy_score_fair_gufunc( + obs: np.ndarray, + fct: np.ndarray, + out: np.ndarray, +): + """Compute the fair Energy Score for a finite ensemble.""" + M = fct.shape[0] + + e_1 = 0.0 + e_2 = 0.0 + for i in range(M): + e_1 += float(np.linalg.norm(fct[i] - obs)) + for j in range(i + 1, M): + e_2 += 2 * float(np.linalg.norm(fct[i] - fct[j])) + + out[0] = e_1 / M - 0.5 / (M * (M - 1)) * e_2 + + +@lazy_gufunc_wrapper_mv +@guvectorize("(d),(m,d)->()") +def _energy_score_akr_gufunc(obs: np.ndarray, fct: np.ndarray, out: np.ndarray): + """Compute the Energy Score for a finite ensemble using the approximate kernel representation.""" + M = fct.shape[0] + + e_1 = 0.0 + e_2 = 0.0 + for i in range(M): + e_1 += float(np.linalg.norm(fct[i] - obs)) + e_2 += float(np.linalg.norm(fct[i] - fct[i - 1])) + + out[0] = e_1 / M - 0.5 * 1 / M * e_2 + + +@lazy_gufunc_wrapper_mv +@guvectorize("(d),(m,d)->()") +def _energy_score_akr_circperm_gufunc( + obs: np.ndarray, fct: np.ndarray, out: np.ndarray +): + """Compute the Energy Score for a finite ensemble using the AKR with cyclic permutation.""" + M = fct.shape[0] + + e_1 = 0.0 + e_2 = 0.0 + for i in range(M): + sigma_i = int((i + (M // 2)) % M) + e_1 += float(np.linalg.norm(fct[i] - obs)) + e_2 += float(np.linalg.norm(fct[i] - fct[sigma_i])) + + out[0] = e_1 / M - 0.5 * 1 / M * e_2 + + @lazy_gufunc_wrapper_mv @guvectorize("(d),(m,d),(),(m)->()") def _owenergy_score_gufunc( @@ -80,3 +139,22 @@ def _vrenergy_score_gufunc( wabs_y = np.linalg.norm(obs) * ow out[0] = e_1 / M - 0.5 * e_2 / (M**2) + (wabs_x - wabs_y) * (wbar - ow) + + +estimator_gufuncs = { + "akr_circperm": _energy_score_akr_circperm_gufunc, + "akr": _energy_score_akr_gufunc, + "fair": _energy_score_fair_gufunc, + "nrg": _energy_score_nrg_gufunc, + "ownrg": _owenergy_score_gufunc, + "vrnrg": _vrenergy_score_gufunc, +} + +__all__ = [ + "_energy_score_akr_circperm_gufunc", + "_energy_score_akr_gufunc", + "_energy_score_fair_gufunc", + "_energy_score_nrg_gufunc", + "_owenergy_score_gufunc", + "_vrenergy_score_gufunc", +] diff --git a/scoringrules/core/energy/_score.py b/scoringrules/core/energy/_score.py index e264c4b..ad9f63c 100644 --- a/scoringrules/core/energy/_score.py +++ b/scoringrules/core/energy/_score.py @@ -6,12 +6,32 @@ from scoringrules.core.typing import Array, Backend -def es_ensemble(obs: "Array", fct: "Array", backend=None) -> "Array": +def es_ensemble( + obs: "Array", fct: "Array", estimator: str = "nrg", backend=None +) -> "Array": """ Compute the energy score based on a finite ensemble. The ensemble and variables axes are on the second last and last dimensions respectively. """ + if estimator == "nrg": + out = _es_ensemble_nrg(obs, fct, backend=backend) + elif estimator == "fair": + out = _es_ensemble_fair(obs, fct, backend=backend) + elif estimator == "akr": + out = _es_ensemble_akr(obs, fct, backend=backend) + elif estimator == "akr_circperm": + out = _es_ensemble_akr_circperm(obs, fct, backend=backend) + else: + raise ValueError( + f"For the energy score, {estimator} must be one of 'nrg', 'fair', 'akr', and 'akr_circperm'." + ) + + return out + + +def _es_ensemble_nrg(obs: "Array", fct: "Array", backend: "Backend" = None) -> "Array": + """Compute the Energy Score for a finite ensemble.""" B = backends.active if backend is None else backends[backend] M: int = fct.shape[-2] @@ -20,6 +40,52 @@ def es_ensemble(obs: "Array", fct: "Array", backend=None) -> "Array": spread_norm = B.norm(B.expand_dims(fct, -3) - B.expand_dims(fct, -2), -1) E_2 = B.sum(spread_norm, (-2, -1)) / (M**2) + + return E_1 - 0.5 * E_2 + + +def _es_ensemble_fair(obs: "Array", fct: "Array", backend: "Backend" = None) -> "Array": + """Compute the fair Energy Score for a finite ensemble.""" + B = backends.active if backend is None else backends[backend] + M: int = fct.shape[-2] + + err_norm = B.norm(fct - B.expand_dims(obs, -2), -1) + E_1 = B.sum(err_norm, -1) / M + + spread_norm = B.norm(B.expand_dims(fct, -3) - B.expand_dims(fct, -2), -1) + E_2 = B.sum(spread_norm, (-2, -1)) / (M * (M - 1)) + + return E_1 - 0.5 * E_2 + + +def _es_ensemble_akr(obs: "Array", fct: "Array", backend: "Backend" = None) -> "Array": + """Compute the Energy Score for a finite ensemble using the approximate kernel representation.""" + B = backends.active if backend is None else backends[backend] + M: int = fct.shape[-2] + + err_norm = B.norm(fct - B.expand_dims(obs, -2), -1) + E_1 = B.sum(err_norm, -1) / M + + spread_norm = B.norm(fct - B.roll(fct, shift=1, axis=-2), -1) + E_2 = B.sum(spread_norm, -1) / M + + return E_1 - 0.5 * E_2 + + +def _es_ensemble_akr_circperm( + obs: "Array", fct: "Array", backend: "Backend" = None +) -> "Array": + """Compute the Energy Score for a finite ensemble using the AKR with cyclic permutation.""" + B = backends.active if backend is None else backends[backend] + M: int = fct.shape[-2] + + err_norm = B.norm(fct - B.expand_dims(obs, -2), -1) + E_1 = B.sum(err_norm, -1) / M + + shift = M // 2 + spread_norm = B.norm(fct - B.roll(fct, shift=shift, axis=-2), -1) + E_2 = B.sum(spread_norm, -1) / M + return E_1 - 0.5 * E_2 diff --git a/scoringrules/core/kernels/_approx.py b/scoringrules/core/kernels/_approx.py index 1d581bf..76f8bce 100644 --- a/scoringrules/core/kernels/_approx.py +++ b/scoringrules/core/kernels/_approx.py @@ -29,19 +29,90 @@ def ensemble_uv( backend: "Backend" = None, ) -> "Array": """Compute a kernel score for a finite ensemble.""" + if estimator == "nrg": + out = _ks_ensemble_nrg_uv(obs, fct, backend=backend) + elif estimator == "fair": + out = _ks_ensemble_fair_uv(obs, fct, backend=backend) + elif estimator == "akr": + out = _ks_ensemble_akr_uv(obs, fct, backend=backend) + elif estimator == "akr_circperm": + out = _ks_ensemble_akr_circperm_uv(obs, fct, backend=backend) + else: + raise ValueError( + f"{estimator} not a valid estimator, must be one of 'nrg', 'fair', 'akr', and 'akr_circperm'." + ) + + return out + + +def _ks_ensemble_nrg_uv( + obs: "ArrayLike", fct: "Array", backend: "Backend" = None +) -> "Array": + """Standard version of the kernel score.""" B = backends.active if backend is None else backends[backend] M: int = fct.shape[-1] - e_1 = B.sum(gauss_kern_uv(obs[..., None], fct, backend=backend), axis=-1) / M + e_1 = B.sum(gauss_kern_uv(obs[..., None], fct, backend=backend), axis=-1) e_2 = B.sum( gauss_kern_uv(fct[..., None], fct[..., None, :], backend=backend), axis=(-1, -2), - ) / (M**2) - e_3 = gauss_kern_uv(obs, obs) + ) + e_3 = gauss_kern_uv(obs, obs, backend=backend) - if estimator == "nrg": - out = e_1 - 0.5 * e_2 - 0.5 * e_3 - elif estimator == "fair": - out = e_1 - 0.5 * e_2 * (M / (M - 1)) - 0.5 * e_3 + out = e_1 / M - 0.5 * e_2 / (M**2) - 0.5 * e_3 + + return -out + + +def _ks_ensemble_fair_uv( + obs: "ArrayLike", fct: "Array", backend: "Backend" = None +) -> "Array": + """Fair version of the kernel score.""" + B = backends.active if backend is None else backends[backend] + M: int = fct.shape[-1] + e_1 = B.sum(gauss_kern_uv(obs[..., None], fct, backend=backend), axis=-1) + e_2 = B.sum( + gauss_kern_uv(fct[..., None], fct[..., None, :], backend=backend), + axis=(-1, -2), + ) + e_2 -= B.sum(gauss_kern_uv(fct, fct, backend=backend), axis=-1) + e_3 = gauss_kern_uv(obs, obs, backend=backend) + + out = e_1 / M - 0.5 * e_2 / (M * (M - 1)) - 0.5 * e_3 + + return -out + + +def _ks_ensemble_akr_uv( + obs: "ArrayLike", fct: "Array", backend: "Backend" = None +) -> "Array": + """Approximate kernel representation estimator of the kernel score.""" + B = backends.active if backend is None else backends[backend] + M: int = fct.shape[-1] + e_1 = B.sum(gauss_kern_uv(obs[..., None], fct, backend=backend), axis=-1) + e_2 = B.sum( + gauss_kern_uv(fct, B.roll(fct, shift=1, axis=-1), backend=backend), axis=-1 + ) + e_3 = gauss_kern_uv(obs, obs, backend=backend) + + out = e_1 / M - 0.5 * e_2 / M - 0.5 * e_3 + + return -out + + +def _ks_ensemble_akr_circperm_uv( + obs: "ArrayLike", fct: "Array", backend: "Backend" = None +) -> "Array": + """AKR estimator of the kernel score with cyclic permutation.""" + B = backends.active if backend is None else backends[backend] + M: int = fct.shape[-1] + e_1 = B.sum(gauss_kern_uv(obs[..., None], fct, backend=backend), axis=-1) + shift = M // 2 + e_2 = B.sum( + gauss_kern_uv(fct, B.roll(fct, shift=shift, axis=-1), backend=backend), axis=-1 + ) + e_3 = gauss_kern_uv(obs, obs, backend=backend) + + out = e_1 / M - 0.5 * e_2 / M - 0.5 * e_3 return -out @@ -56,22 +127,90 @@ def ensemble_mv( The ensemble and variables axes are on the second last and last dimensions respectively. """ + if estimator == "nrg": + out = _ks_ensemble_nrg_mv(obs, fct, backend=backend) + elif estimator == "fair": + out = _ks_ensemble_fair_mv(obs, fct, backend=backend) + elif estimator == "akr": + out = _ks_ensemble_akr_mv(obs, fct, backend=backend) + elif estimator == "akr_circperm": + out = _ks_ensemble_akr_circperm_mv(obs, fct, backend=backend) + else: + raise ValueError( + f"{estimator} not a valid estimator, must be one of 'nrg', 'fair', 'akr', and 'akr_circperm'." + ) + + return out + + +def _ks_ensemble_nrg_mv( + obs: "ArrayLike", fct: "Array", backend: "Backend" = None +) -> "Array": B = backends.active if backend is None else backends[backend] M: int = fct.shape[-2] - e_1 = ( - B.sum(gauss_kern_mv(B.expand_dims(obs, -2), fct, backend=backend), axis=-1) / M + e_1 = B.sum(gauss_kern_mv(B.expand_dims(obs, -2), fct, backend=backend), axis=-1) + e_2 = B.sum( + gauss_kern_mv(B.expand_dims(fct, -3), B.expand_dims(fct, -2), backend=backend), + axis=(-2, -1), ) + e_3 = gauss_kern_mv(obs, obs, backend=backend) + + out = e_1 / M - 0.5 * e_2 / (M**2) - 0.5 * e_3 + + return -out + + +def _ks_ensemble_fair_mv( + obs: "ArrayLike", fct: "Array", backend: "Backend" = None +) -> "Array": + B = backends.active if backend is None else backends[backend] + M: int = fct.shape[-2] + + e_1 = B.sum(gauss_kern_mv(B.expand_dims(obs, -2), fct, backend=backend), axis=-1) e_2 = B.sum( gauss_kern_mv(B.expand_dims(fct, -3), B.expand_dims(fct, -2), backend=backend), axis=(-2, -1), - ) / (M**2) - e_3 = gauss_kern_mv(obs, obs) + ) + e_2 -= B.sum(gauss_kern_mv(fct, fct, backend=backend), axis=-1) + e_3 = gauss_kern_mv(obs, obs, backend=backend) - if estimator == "nrg": - out = e_1 - 0.5 * e_2 - 0.5 * e_3 - elif estimator == "fair": - out = e_1 - 0.5 * e_2 * (M / (M - 1)) - 0.5 * e_3 + out = e_1 / M - 0.5 * e_2 / (M * (M - 1)) - 0.5 * e_3 + + return -out + + +def _ks_ensemble_akr_mv( + obs: "ArrayLike", fct: "Array", backend: "Backend" = None +) -> "Array": + B = backends.active if backend is None else backends[backend] + M: int = fct.shape[-2] + + e_1 = B.sum(gauss_kern_mv(B.expand_dims(obs, -2), fct, backend=backend), axis=-1) + e_2 = B.sum( + gauss_kern_mv(fct, B.roll(fct, shift=1, axis=-2), backend=backend), axis=-1 + ) + e_3 = gauss_kern_mv(obs, obs, backend=backend) + + out = e_1 / M - 0.5 * e_2 / M - 0.5 * e_3 + + return -out + + +def _ks_ensemble_akr_circperm_mv( + obs: "ArrayLike", fct: "Array", backend: "Backend" = None +) -> "Array": + B = backends.active if backend is None else backends[backend] + M: int = fct.shape[-2] + + e_1 = B.sum(gauss_kern_mv(B.expand_dims(obs, -2), fct, backend=backend), axis=-1) + shift = M // 2 + e_2 = B.sum( + gauss_kern_mv(fct, B.roll(fct, shift=shift, axis=-2), backend=backend), axis=-1 + ) + e_3 = gauss_kern_mv(obs, obs, backend=backend) + + out = e_1 / M - 0.5 * e_2 / M - 0.5 * e_3 return -out diff --git a/scoringrules/core/kernels/_gufuncs.py b/scoringrules/core/kernels/_gufuncs.py index 91c8c86..90fe8fa 100644 --- a/scoringrules/core/kernels/_gufuncs.py +++ b/scoringrules/core/kernels/_gufuncs.py @@ -55,15 +55,58 @@ def _ks_ensemble_uv_fair_gufunc(obs: np.ndarray, fct: np.ndarray, out: np.ndarra e_1 = 0 e_2 = 0 - for x_i in fct: - e_1 += _gauss_kern_uv(x_i, obs) - for x_j in fct: - e_2 += _gauss_kern_uv(x_i, x_j) + for i in range(M): + e_1 += _gauss_kern_uv(fct[i], obs) + for j in range(i + 1, M): # important to start from i + 1 and not i + e_2 += 2 * _gauss_kern_uv(fct[j], fct[i]) e_3 = _gauss_kern_uv(obs, obs) out[0] = -(e_1 / M - 0.5 * e_2 / (M * (M - 1)) - 0.5 * e_3) +@lazy_gufunc_wrapper_uv +@guvectorize("(),(n)->()") +def _ks_ensemble_uv_akr_gufunc(obs: np.ndarray, fct: np.ndarray, out: np.ndarray): + """Approximate kernel representation estimator of the kernel score.""" + M = fct.shape[-1] + + if np.isnan(obs): + out[0] = np.nan + return + + e_1 = 0 + e_2 = 0 + for i in range(M): + e_1 += _gauss_kern_uv(fct[i], obs) + e_2 += _gauss_kern_uv(fct[i], fct[i - 1]) + e_3 = _gauss_kern_uv(obs, obs) + + out[0] = -(e_1 / M - 0.5 * e_2 / M - 0.5 * e_3) + + +@lazy_gufunc_wrapper_uv +@guvectorize("(),(n)->()") +def _ks_ensemble_uv_akr_circperm_gufunc( + obs: np.ndarray, fct: np.ndarray, out: np.ndarray +): + """AKR estimator of the kernel score with cyclic permutation.""" + M = fct.shape[-1] + + if np.isnan(obs): + out[0] = np.nan + return + + e_1 = 0 + e_2 = 0 + for i in range(M): + sigma_i = int((i + 1 + ((M - 1) / 2)) % M) + e_1 += _gauss_kern_uv(fct[i], obs) + e_2 += _gauss_kern_uv(fct[i], fct[sigma_i]) + e_3 = _gauss_kern_uv(obs, obs) + + out[0] = -(e_1 / M - 0.5 * e_2 / M - 0.5 * e_3) + + @lazy_gufunc_wrapper_uv @guvectorize("(),(n),(),(n)->()") def _owks_ensemble_uv_gufunc( @@ -149,11 +192,46 @@ def _ks_ensemble_mv_fair_gufunc(obs: np.ndarray, fct: np.ndarray, out: np.ndarra e_2 = 0.0 for i in range(M): e_1 += float(_gauss_kern_mv(fct[i], obs)) - for j in range(M): + for j in range(i + 1, M): e_2 += float(_gauss_kern_mv(fct[i], fct[j])) e_3 = float(_gauss_kern_mv(obs, obs)) - out[0] = -(e_1 / M - 0.5 * e_2 / (M * (M - 1)) - 0.5 * e_3) + out[0] = -(e_1 / M - e_2 / (M * (M - 1)) - 0.5 * e_3) + + +@lazy_gufunc_wrapper_mv +@guvectorize("(d),(m,d)->()") +def _ks_ensemble_mv_akr_gufunc(obs: np.ndarray, fct: np.ndarray, out: np.ndarray): + """Approximate kernel representation estimator of the multivariate kernel score.""" + M = fct.shape[0] + + e_1 = 0 + e_2 = 0 + for i in range(M): + e_1 += float(_gauss_kern_mv(fct[i], obs)) + e_2 += float(_gauss_kern_mv(fct[i], fct[i - 1])) + e_3 = float(_gauss_kern_mv(obs, obs)) + + out[0] = -(e_1 / M - 0.5 * e_2 / M - 0.5 * e_3) + + +@lazy_gufunc_wrapper_mv +@guvectorize("(d),(m,d)->()") +def _ks_ensemble_mv_akr_circperm_gufunc( + obs: np.ndarray, fct: np.ndarray, out: np.ndarray +): + """AKR estimator of the multivariate kernel score with cyclic permutation.""" + M = fct.shape[0] + + e_1 = 0 + e_2 = 0 + for i in range(M): + sigma_i = int((i + 1 + ((M - 1) / 2)) % M) + e_1 += float(_gauss_kern_mv(fct[i], obs)) + e_2 += float(_gauss_kern_mv(fct[i], fct[sigma_i])) + e_3 = float(_gauss_kern_mv(obs, obs)) + + out[0] = -(e_1 / M - 0.5 * e_2 / M - 0.5 * e_3) @lazy_gufunc_wrapper_mv @@ -205,6 +283,8 @@ def _vrks_ensemble_mv_gufunc( estimator_gufuncs = { + "akr": _ks_ensemble_uv_akr_gufunc, + "akr_circperm": _ks_ensemble_uv_akr_circperm_gufunc, "fair": _ks_ensemble_uv_fair_gufunc, "nrg": _ks_ensemble_uv_nrg_gufunc, "ow": _owks_ensemble_uv_gufunc, @@ -212,6 +292,8 @@ def _vrks_ensemble_mv_gufunc( } estimator_gufuncs_mv = { + "akr": _ks_ensemble_mv_akr_gufunc, + "akr_circperm": _ks_ensemble_mv_akr_circperm_gufunc, "fair": _ks_ensemble_mv_fair_gufunc, "nrg": _ks_ensemble_mv_nrg_gufunc, "ow": _owks_ensemble_mv_gufunc, @@ -219,10 +301,14 @@ def _vrks_ensemble_mv_gufunc( } __all__ = [ + "_ks_ensemble_uv_akr_gufunc", + "_ks_ensemble_uv_akr_circperm_gufunc", "_ks_ensemble_uv_fair_gufunc", "_ks_ensemble_uv_nrg_gufunc", "_owks_ensemble_uv_gufunc", "_vrks_ensemble_uv_gufunc", + "_ks_ensemble_mv_akr_gufunc", + "_ks_ensemble_mv_akr_circperm_gufunc", "_ks_ensemble_mv_fair_gufunc", "_ks_ensemble_mv_nrg_gufunc", "_owks_ensemble_mv_gufunc", diff --git a/scoringrules/core/variogram/__init__.py b/scoringrules/core/variogram/__init__.py index 1e74527..26a37d4 100644 --- a/scoringrules/core/variogram/__init__.py +++ b/scoringrules/core/variogram/__init__.py @@ -1,12 +1,14 @@ try: from ._gufuncs import ( + _variogram_score_nrg_gufunc, + _variogram_score_fair_gufunc, _owvariogram_score_gufunc, - _variogram_score_gufunc, _vrvariogram_score_gufunc, ) except ImportError: + _variogram_score_nrg_gufunc = None + _variogram_score_fair_gufunc = None _owvariogram_score_gufunc = None - _variogram_score_gufunc = None _vrvariogram_score_gufunc = None from ._score import owvs_ensemble as owvs @@ -17,7 +19,8 @@ "vs", "owvs", "vrvs", - "_variogram_score_gufunc", + "_variogram_score_nrg_gufunc", + "_variogram_score_fair_gufunc", "_owvariogram_score_gufunc", "_vrvariogram_score_gufunc", ] diff --git a/scoringrules/core/variogram/_gufuncs.py b/scoringrules/core/variogram/_gufuncs.py index e3eb1f4..1402d28 100644 --- a/scoringrules/core/variogram/_gufuncs.py +++ b/scoringrules/core/variogram/_gufuncs.py @@ -11,7 +11,7 @@ ], "(d),(m,d),()->()", ) -def _variogram_score_gufunc(obs, fct, p, out): +def _variogram_score_nrg_gufunc(obs, fct, p, out): M = fct.shape[-2] D = fct.shape[-1] out[0] = 0.0 @@ -25,6 +25,30 @@ def _variogram_score_gufunc(obs, fct, p, out): out[0] += (vobs - vfct) ** 2 +@lazy_gufunc_wrapper_mv +@guvectorize("(d),(m,d),()->()") +def _variogram_score_fair_gufunc(obs, fct, p, out): + M = fct.shape[-2] + D = fct.shape[-1] + + e_1 = 0.0 + e_2 = 0.0 + for k in range(M): + for i in range(D): + for j in range(D): + rho1 = abs(fct[k, i] - fct[k, j]) ** p + rho2 = abs(obs[i] - obs[j]) ** p + e_1 += (rho1 - rho2) ** 2 + for m in range(k + 1, M): + for i in range(D): + for j in range(D): + rho1 = abs(fct[k, i] - fct[k, j]) ** p + rho2 = abs(fct[m, i] - fct[m, j]) ** p + e_2 += 2 * ((rho1 - rho2) ** 2) + + out[0] = e_1 / M - 0.5 * e_2 / (M * (M - 1)) + + @lazy_gufunc_wrapper_mv @guvectorize("(d),(m,d),(),(),(m)->()") def _owvariogram_score_gufunc(obs, fct, p, ow, fw, out): diff --git a/scoringrules/core/variogram/_score.py b/scoringrules/core/variogram/_score.py index 8850cb5..f6d091b 100644 --- a/scoringrules/core/variogram/_score.py +++ b/scoringrules/core/variogram/_score.py @@ -9,17 +9,37 @@ def vs_ensemble( obs: "Array", # (... D) fct: "Array", # (... M D) - p: float = 1, + p: float = 0.5, + estimator: str = "nrg", backend: "Backend" = None, ) -> "Array": """Compute the Variogram Score for a multivariate finite ensemble.""" B = backends.active if backend is None else backends[backend] M: int = fct.shape[-2] - fct_diff = B.expand_dims(fct, -2) - B.expand_dims(fct, -1) # (... M D D) - vfct = B.sum(B.abs(fct_diff) ** p, axis=-3) / M # (... D D) - obs_diff = B.expand_dims(obs, -2) - B.expand_dims(obs, -1) # (... D D) - vobs = B.abs(obs_diff) ** p # (... D D) - return B.sum((vobs - vfct) ** 2, axis=(-2, -1)) # (...) + + fct_diff = ( + B.abs(B.expand_dims(fct, -2) - B.expand_dims(fct, -1)) ** p + ) # (... M D D) + obs_diff = B.abs(B.expand_dims(obs, -2) - B.expand_dims(obs, -1)) ** p # (... D D) + + if estimator == "nrg": + vfct = B.sum(fct_diff, axis=-3) / M # (... D D) + out = B.sum((obs_diff - vfct) ** 2, axis=(-2, -1)) # (...) + + elif estimator == "fair": + E_1 = (fct_diff - B.expand_dims(obs_diff, axis=-3)) ** 2 # (... M D D) + E_1 = B.sum(E_1, axis=(-2, -1)) # (... M) + E_1 = B.sum(E_1, axis=-1) / M # (...) + + E_2 = ( + B.expand_dims(fct_diff, -3) - B.expand_dims(fct_diff, -4) + ) ** 2 # (... M M D D) + E_2 = B.sum(E_2, axis=(-2, -1)) # (... M M) + E_2 = B.sum(E_2, axis=(-2, -1)) / (M * (M - 1)) # (...) + + out = E_1 - 0.5 * E_2 + + return out def owvs_ensemble( @@ -27,34 +47,25 @@ def owvs_ensemble( fct: "Array", ow: "Array", fw: "Array", - p: float = 1, + p: float = 0.5, backend: "Backend" = None, ) -> "Array": """Compute the Outcome-Weighted Variogram Score for a multivariate finite ensemble.""" B = backends.active if backend is None else backends[backend] - M: int = fct.shape[-2] - wbar = B.mean(fw, axis=-1) - - fct_diff = B.expand_dims(fct, -2) - B.expand_dims(fct, -1) # (... M D D) - fct_diff = B.abs(fct_diff) ** p # (... M D D) + M = fct.shape[-2] + wbar = B.sum(fw, -1) / M - obs_diff = B.expand_dims(obs, -2) - B.expand_dims(obs, -1) # (... D D) - obs_diff = B.abs(obs_diff) ** p # (... D D) - del obs, fct - - E_1 = (fct_diff - B.expand_dims(obs_diff, -3)) ** 2 # (... M D D) - E_1 = B.sum(E_1, axis=(-2, -1)) # (... M) - E_1 = B.sum(E_1 * fw * B.expand_dims(ow, -1), axis=-1) / (M * wbar) # (...) + fct_diff = ( + B.abs(B.expand_dims(fct, -2) - B.expand_dims(fct, -1)) ** p + ) # (... M D D) + obs_diff = B.abs(B.expand_dims(obs, -2) - B.expand_dims(obs, -1)) ** p # (... D D) - fct_diff_spread = B.expand_dims(fct_diff, -3) - B.expand_dims( - fct_diff, -4 - ) # (... M M D D) - fw_prod = B.expand_dims(fw, -2) * B.expand_dims(fw, -1) # (... M M) - E_2 = B.sum(fct_diff_spread**2, axis=(-2, -1)) # (... M M) - E_2 *= fw_prod * B.expand_dims(ow, (-2, -1)) # (... M M) - E_2 = B.sum(E_2, axis=(-2, -1)) / (M**2 * wbar**2) # (...) + vfct = B.sum(fct_diff * B.expand_dims(fw, (-2, -1)), axis=-3) / ( + M * B.expand_dims(wbar, (-2, -1)) + ) # (... D D) + out = B.sum(((obs_diff - vfct) ** 2), axis=(-2, -1)) * ow # (...) - return E_1 - 0.5 * E_2 + return out def vrvs_ensemble( @@ -62,7 +73,7 @@ def vrvs_ensemble( fct: "Array", ow: "Array", fw: "Array", - p: float = 1, + p: float = 0.5, backend: "Backend" = None, ) -> "Array": """Compute the Vertically Re-scaled Variogram Score for a multivariate finite ensemble.""" diff --git a/tests/test_crps.py b/tests/test_crps.py index d61672b..3970cc1 100644 --- a/tests/test_crps.py +++ b/tests/test_crps.py @@ -20,15 +20,9 @@ def test_crps_ensemble(estimator, backend): fct = np.random.randn(N, ENSEMBLE_SIZE) * sigma[..., None] + mu[..., None] # test exceptions - if backend in ["numpy", "jax", "torch", "tensorflow"]: - if estimator not in ["nrg", "fair", "pwm"]: - with pytest.raises(ValueError): - sr.crps_ensemble(obs, fct, estimator=estimator, backend=backend) - return - if backend == "numba": - with pytest.raises(ValueError): - est = "undefined_estimator" - sr.crps_ensemble(obs, fct, estimator=est, backend=backend) + with pytest.raises(ValueError): + est = "undefined_estimator" + sr.crps_ensemble(obs, fct, estimator=est, backend=backend) # test shapes res = sr.crps_ensemble(obs, fct, estimator=estimator, backend=backend) @@ -55,6 +49,25 @@ def test_crps_ensemble(estimator, backend): assert not np.any(res - 0.0 > 0.0001) +@pytest.mark.parametrize("backend", BACKENDS) +def test_crps_estimators(backend): + obs = np.random.randn(N) + mu = obs + np.random.randn(N) * 0.3 + sigma = abs(np.random.randn(N)) * 0.5 + fct = np.random.randn(N, ENSEMBLE_SIZE) * sigma[..., None] + mu[..., None] + + # equality of estimators + res_nrg = sr.crps_ensemble(obs, fct, estimator="nrg", backend=backend) + res_fair = sr.crps_ensemble(obs, fct, estimator="fair", backend=backend) + res_pwm = sr.crps_ensemble(obs, fct, estimator="pwm", backend=backend) + res_qd = sr.crps_ensemble(obs, fct, estimator="qd", backend=backend) + res_int = sr.crps_ensemble(obs, fct, estimator="int", backend=backend) + + assert np.allclose(res_nrg, res_qd) + assert np.allclose(res_nrg, res_int) + assert np.allclose(res_fair, res_pwm) + + @pytest.mark.parametrize("backend", BACKENDS) def test_crps_quantile(backend): # test shapes diff --git a/tests/test_energy.py b/tests/test_energy.py index 90c74d9..8cc4848 100644 --- a/tests/test_energy.py +++ b/tests/test_energy.py @@ -8,26 +8,56 @@ N = 20 N_VARS = 3 +ESTIMATORS = ["nrg", "fair", "akr", "akr_circperm"] + +@pytest.mark.parametrize("estimator", ESTIMATORS) @pytest.mark.parametrize("backend", BACKENDS) -def test_energy_score(backend): - # test exceptions TODO: add more checks in the code +def test_energy_score(estimator, backend): + # test exceptions + + # incorrect dimensions with pytest.raises(ValueError): obs = np.random.randn(N, N_VARS) fct = np.random.randn(N, ENSEMBLE_SIZE, N_VARS - 1) - sr.es_ensemble(obs, fct, backend=backend) + sr.es_ensemble(obs, fct, estimator=estimator, backend=backend) + + # undefined estimator + with pytest.raises(ValueError): + fct = np.random.randn(N, ENSEMBLE_SIZE, N_VARS) + est = "undefined_estimator" + sr.es_ensemble(obs, fct, estimator=est, backend=backend) + + # test output - # test shapes + # shapes obs = np.random.randn(N, N_VARS) fct = np.expand_dims(obs, axis=-2) + np.random.randn(N, ENSEMBLE_SIZE, N_VARS) - res = sr.es_ensemble(obs, fct, backend=backend) + res = sr.es_ensemble(obs, fct, estimator=estimator, backend=backend) assert res.shape == (N,) - # test correctness + # correctness fct = np.array( - [[0.79546742, 0.4777960, 0.2164079], [0.02461368, 0.7584595, 0.3181810]] + [ + [0.79546742, 0.4777960, 0.2164079, 0.5409873], + [0.02461368, 0.7584595, 0.3181810, 0.1319422], + ] ).T obs = np.array([0.2743836, 0.8146400]) - res = sr.es_ensemble(obs, fct, backend=backend) - expected = 0.334542 # TODO: test this against scoringRules - np.testing.assert_allclose(res, expected, atol=1e-6) + + if estimator == "nrg": + res = sr.es_ensemble(obs, fct, estimator=estimator, backend=backend) + expected = 0.3949793 + np.testing.assert_allclose(res, expected, atol=1e-6) + elif estimator == "fair": + res = sr.es_ensemble(obs, fct, estimator=estimator, backend=backend) + expected = 0.3274585 + np.testing.assert_allclose(res, expected, atol=1e-6) + elif estimator == "akr": + res = sr.es_ensemble(obs, fct, estimator=estimator, backend=backend) + expected = 0.3522818 + np.testing.assert_allclose(res, expected, atol=1e-6) + elif estimator == "akr_circperm": + res = sr.es_ensemble(obs, fct, estimator=estimator, backend=backend) + expected = 0.2778118 + np.testing.assert_allclose(res, expected, atol=1e-6) diff --git a/tests/test_kernels.py b/tests/test_kernels.py index 6c42122..259204d 100644 --- a/tests/test_kernels.py +++ b/tests/test_kernels.py @@ -9,7 +9,7 @@ N = 10 N_VARS = 3 -ESTIMATORS = ["nrg", "fair"] +ESTIMATORS = ["nrg", "fair", "akr", "akr_circperm"] @pytest.mark.parametrize("estimator", ESTIMATORS) @@ -20,46 +20,50 @@ def test_gksuv(estimator, backend): sigma = abs(np.random.randn(N)) * 0.3 fct = np.random.randn(N, ENSEMBLE_SIZE) * sigma[..., None] + mu[..., None] - # non-negative values - res = sr.gksuv_ensemble(obs, fct, estimator=estimator, backend=backend) - res = np.asarray(res) - assert not np.any(res < 0.0) + # undefined estimator + with pytest.raises(ValueError): + est = "undefined_estimator" + sr.gksuv_ensemble(obs, fct, estimator=est, backend=backend) # m_axis keyword - res = sr.gksuv_ensemble( - obs, - np.random.randn(ENSEMBLE_SIZE, N), - m_axis=0, - estimator=estimator, - backend=backend, - ) - res = np.asarray(res) - assert not np.any(res < 0.0) + res = sr.gksuv_ensemble(obs, fct, estimator=estimator, backend=backend) + res2 = sr.gksuv_ensemble(obs, fct.T, m_axis=0, estimator=estimator, backend=backend) + assert np.allclose(res, res2) if estimator == "nrg": + # non-negative values + res = sr.gksuv_ensemble(obs, fct, estimator=estimator, backend=backend) + res = np.asarray(res) + assert not np.any(res < 0.0) + # approx zero when perfect forecast perfect_fct = obs[..., None] + np.random.randn(N, ENSEMBLE_SIZE) * 0.00001 res = sr.gksuv_ensemble(obs, perfect_fct, estimator=estimator, backend=backend) res = np.asarray(res) assert not np.any(res - 0.0 > 0.0001) - # test correctness - obs, fct = 11.6, np.array([9.8, 8.7, 11.9, 12.1, 13.4]) + # test correctness + obs, fct = 11.6, np.array([9.8, 8.7, 11.9, 12.1, 13.4]) + + if estimator == "nrg": res = sr.gksuv_ensemble(obs, fct, estimator=estimator, backend=backend) expected = 0.2490516 assert np.isclose(res, expected) elif estimator == "fair": - # test correctness - obs, fct = 11.6, np.array([9.8, 8.7, 11.9, 12.1, 13.4]) res = sr.gksuv_ensemble(obs, fct, estimator=estimator, backend=backend) - expected = 0.2987752 + expected = 0.1737752 assert np.isclose(res, expected) - # test exceptions - with pytest.raises(ValueError): - est = "undefined_estimator" - sr.gksuv_ensemble(obs, fct, estimator=est, backend=backend) + elif estimator == "akr": + res = sr.gksuv_ensemble(obs, fct, estimator=estimator, backend=backend) + expected = 0.2464915 + assert np.isclose(res, expected) + + elif estimator == "akr_circperm": + res = sr.gksuv_ensemble(obs, fct, estimator=estimator, backend=backend) + expected = 0.1010588 + assert np.isclose(res, expected) @pytest.mark.parametrize("estimator", ESTIMATORS) @@ -68,6 +72,11 @@ def test_gksmv(estimator, backend): obs = np.random.randn(N, N_VARS) fct = np.expand_dims(obs, axis=-2) + np.random.randn(N, ENSEMBLE_SIZE, N_VARS) + # undefined estimator + with pytest.raises(ValueError): + est = "undefined_estimator" + sr.gksmv_ensemble(obs, fct, estimator=est, backend=backend) + res = sr.gksmv_ensemble(obs, fct, estimator=estimator, backend=backend) if backend in ["numpy", "numba"]: @@ -75,8 +84,8 @@ def test_gksmv(estimator, backend): elif backend == "jax": assert "jax" in res.__module__ + # approx zero when perfect forecast if estimator == "nrg": - # approx zero when perfect forecast perfect_fct = ( np.expand_dims(obs, axis=-2) + np.random.randn(N, ENSEMBLE_SIZE, N_VARS) * 0.00001 @@ -85,29 +94,31 @@ def test_gksmv(estimator, backend): res = np.asarray(res) assert not np.any(res - 0.0 > 0.0001) - # test correctness - obs = np.array([11.6, -23.1]) - fct = np.array( - [[9.8, 8.7, 11.9, 12.1, 13.4], [-24.8, -18.5, -29.9, -18.3, -21.0]] - ).transpose() + # test correctness + obs = np.array([11.6, -23.1]) + fct = np.array( + [[9.8, 8.7, 11.9, 12.1, 13.4], [-24.8, -18.5, -29.9, -18.3, -21.0]] + ).transpose() + + if estimator == "nrg": res = sr.gksmv_ensemble(obs, fct, estimator=estimator, backend=backend) expected = 0.5868737 assert np.isclose(res, expected) elif estimator == "fair": - # test correctness - obs = np.array([11.6, -23.1]) - fct = np.array( - [[9.8, 8.7, 11.9, 12.1, 13.4], [-24.8, -18.5, -29.9, -18.3, -21.0]] - ).transpose() res = sr.gksmv_ensemble(obs, fct, estimator=estimator, backend=backend) - expected = 0.6120162 + expected = 0.4870162 assert np.isclose(res, expected) - # test exceptions - with pytest.raises(ValueError): - est = "undefined_estimator" - sr.gksmv_ensemble(obs, fct, estimator=est, backend=backend) + elif estimator == "akr": + res = sr.gksmv_ensemble(obs, fct, estimator=estimator, backend=backend) + expected = 0.4874259 + assert np.isclose(res, expected) + + elif estimator == "akr_circperm": + res = sr.gksmv_ensemble(obs, fct, estimator=estimator, backend=backend) + expected = 0.4866066 + assert np.isclose(res, expected) @pytest.mark.parametrize("estimator", ESTIMATORS) @@ -212,43 +223,6 @@ def v_func2(x): ) np.testing.assert_allclose(res, 0.0089314, rtol=1e-6) - elif estimator == "fair": - res = np.mean( - np.float64( - sr.twgksuv_ensemble( - obs, fct, v_func=v_func1, estimator=estimator, backend=backend - ) - ) - ) - np.testing.assert_allclose(res, 0.130842, rtol=1e-6) - - res = np.mean( - np.float64( - sr.twgksuv_ensemble( - obs, fct, a=-1.0, estimator=estimator, backend=backend - ) - ) - ) - np.testing.assert_allclose(res, 0.130842, rtol=1e-6) - - res = np.mean( - np.float64( - sr.twgksuv_ensemble( - obs, fct, v_func=v_func2, estimator=estimator, backend=backend - ) - ) - ) - np.testing.assert_allclose(res, 0.1283745, rtol=1e-6) - - res = np.mean( - np.float64( - sr.twgksuv_ensemble( - obs, fct, b=1.85, estimator=estimator, backend=backend - ) - ) - ) - np.testing.assert_allclose(res, 0.1283745, rtol=1e-6) - @pytest.mark.parametrize("backend", BACKENDS) def test_twgksmv(backend): diff --git a/tests/test_variogram.py b/tests/test_variogram.py index 9f62f06..fb4cbf1 100644 --- a/tests/test_variogram.py +++ b/tests/test_variogram.py @@ -9,13 +9,16 @@ N = 100 N_VARS = 30 +ESTIMATORS = ["nrg", "fair"] + +@pytest.mark.parametrize("estimator", ESTIMATORS) @pytest.mark.parametrize("backend", BACKENDS) -def test_variogram_score(backend): +def test_variogram_score(estimator, backend): obs = np.random.randn(N, N_VARS) fct = np.expand_dims(obs, axis=-2) + np.random.randn(N, ENSEMBLE_SIZE, N_VARS) - res = sr.vs_ensemble(obs, fct, backend=backend) + res = sr.vs_ensemble(obs, fct, estimator=estimator, backend=backend) if backend in ["numpy", "numba"]: assert isinstance(res, np.ndarray) @@ -23,12 +26,15 @@ def test_variogram_score(backend): assert "jax" in res.__module__ +@pytest.mark.parametrize("estimator", ESTIMATORS) @pytest.mark.parametrize("backend", BACKENDS) -def test_variogram_score_permuted_dims(backend): +def test_variogram_score_permuted_dims(estimator, backend): obs = np.random.randn(N, N_VARS) fct = np.expand_dims(obs, axis=-2) + np.random.randn(N, ENSEMBLE_SIZE, N_VARS) - res = sr.vs_ensemble(obs, fct, v_axis=-1, m_axis=-2, backend=backend) + res = sr.vs_ensemble( + obs, fct, v_axis=-1, m_axis=-2, estimator=estimator, backend=backend + ) if backend in ["numpy", "numba"]: assert isinstance(res, np.ndarray) @@ -39,13 +45,21 @@ def test_variogram_score_permuted_dims(backend): @pytest.mark.parametrize("backend", BACKENDS) def test_variogram_score_correctness(backend): fct = np.array( - [[0.79546742, 0.4777960, 0.2164079], [0.02461368, 0.7584595, 0.3181810]] - ) - + [ + [0.79546742, 0.4777960, 0.2164079, 0.5409873], + [0.02461368, 0.7584595, 0.3181810, 0.1319422], + ] + ).T obs = np.array([0.2743836, 0.8146400]) - res = sr.vs_ensemble(obs, fct.T, p=0.5, backend=backend) - np.testing.assert_allclose(res, 0.05083489, rtol=1e-5) + res = sr.vs_ensemble(obs, fct, p=0.5, estimator="nrg", backend=backend) + np.testing.assert_allclose(res, 0.04114727, rtol=1e-5) + + res = sr.vs_ensemble(obs, fct, p=0.5, estimator="fair", backend=backend) + np.testing.assert_allclose(res, 0.01407421, rtol=1e-5) + + res = sr.vs_ensemble(obs, fct, p=1.0, estimator="nrg", backend=backend) + np.testing.assert_allclose(res, 0.04480374, rtol=1e-5) - res = sr.vs_ensemble(obs, fct.T, p=1.0, backend=backend) - np.testing.assert_allclose(res, 0.04856365, rtol=1e-5) + res = sr.vs_ensemble(obs, fct, p=1.0, estimator="fair", backend=backend) + np.testing.assert_allclose(res, 0.004730382, rtol=1e-5) diff --git a/tests/test_wcrps.py b/tests/test_wcrps.py index e39d02d..5201a6d 100644 --- a/tests/test_wcrps.py +++ b/tests/test_wcrps.py @@ -7,14 +7,11 @@ M = 11 N = 20 +ESTIMATORS = ["nrg", "fair", "pwm", "qd", "akr", "akr_circperm"] + @pytest.mark.parametrize("backend", BACKENDS) def test_owcrps_ensemble(backend): - # test exceptions - with pytest.raises(ValueError): - est = "not_nrg" - sr.owcrps_ensemble(1, 1.1, w_func=lambda x: x, estimator=est, backend=backend) - # test shapes obs = np.random.randn(N) res = sr.owcrps_ensemble(obs, np.random.randn(N, M), w_func=lambda x: x * 0.0 + 1.0) @@ -27,11 +24,6 @@ def test_owcrps_ensemble(backend): @pytest.mark.parametrize("backend", BACKENDS) def test_vrcrps_ensemble(backend): - # test exceptions - with pytest.raises(ValueError): - est = "not_nrg" - sr.vrcrps_ensemble(1, 1.1, w_func=lambda x: x, estimator=est, backend=backend) - # test shapes obs = np.random.randn(N) res = sr.vrcrps_ensemble(obs, np.random.randn(N, M), w_func=lambda x: x * 0.0 + 1.0) @@ -42,28 +34,29 @@ def test_vrcrps_ensemble(backend): assert res.shape == (N,) +@pytest.mark.parametrize("estimator", ESTIMATORS) @pytest.mark.parametrize("backend", BACKENDS) -def test_twcrps_vs_crps(backend): +def test_twcrps_vs_crps(estimator, backend): obs = np.random.randn(N) mu = obs + np.random.randn(N) * 0.1 sigma = abs(np.random.randn(N)) * 0.3 fct = np.random.randn(N, M) * sigma[..., None] + mu[..., None] - res = sr.crps_ensemble(obs, fct, backend=backend, estimator="nrg") + res = sr.crps_ensemble(obs, fct, estimator=estimator, backend=backend) # no argument given - resw = sr.twcrps_ensemble(obs, fct, estimator="nrg", backend=backend) + resw = sr.twcrps_ensemble(obs, fct, estimator=estimator, backend=backend) np.testing.assert_allclose(res, resw, rtol=1e-10) # a and b resw = sr.twcrps_ensemble( - obs, fct, a=float("-inf"), b=float("inf"), estimator="nrg", backend=backend + obs, fct, a=float("-inf"), b=float("inf"), estimator=estimator, backend=backend ) np.testing.assert_allclose(res, resw, rtol=1e-10) # v_func as identity function resw = sr.twcrps_ensemble( - obs, fct, v_func=lambda x: x, estimator="nrg", backend=backend + obs, fct, v_func=lambda x: x, estimator=estimator, backend=backend ) np.testing.assert_allclose(res, resw, rtol=1e-10) @@ -78,19 +71,17 @@ def test_owcrps_vs_crps(backend): res = sr.crps_ensemble(obs, fct, backend=backend, estimator="nrg") # no argument given - resw = sr.owcrps_ensemble(obs, fct, estimator="nrg", backend=backend) + resw = sr.owcrps_ensemble(obs, fct, backend=backend) np.testing.assert_allclose(res, resw, rtol=1e-5) # a and b resw = sr.owcrps_ensemble( - obs, fct, a=float("-inf"), b=float("inf"), estimator="nrg", backend=backend + obs, fct, a=float("-inf"), b=float("inf"), backend=backend ) np.testing.assert_allclose(res, resw, rtol=1e-5) # w_func as identity function - resw = sr.owcrps_ensemble( - obs, fct, w_func=lambda x: x * 0.0 + 1.0, estimator="nrg", backend=backend - ) + resw = sr.owcrps_ensemble(obs, fct, w_func=lambda x: x * 0.0 + 1.0, backend=backend) np.testing.assert_allclose(res, resw, rtol=1e-5) @@ -104,19 +95,17 @@ def test_vrcrps_vs_crps(backend): res = sr.crps_ensemble(obs, fct, backend=backend, estimator="nrg") # no argument given - resw = sr.vrcrps_ensemble(obs, fct, estimator="nrg", backend=backend) + resw = sr.vrcrps_ensemble(obs, fct, backend=backend) np.testing.assert_allclose(res, resw, rtol=1e-5) # a and b resw = sr.vrcrps_ensemble( - obs, fct, a=float("-inf"), b=float("inf"), estimator="nrg", backend=backend + obs, fct, a=float("-inf"), b=float("inf"), backend=backend ) np.testing.assert_allclose(res, resw, rtol=1e-5) # w_func as identity function - resw = sr.vrcrps_ensemble( - obs, fct, w_func=lambda x: x * 0.0 + 1.0, estimator="nrg", backend=backend - ) + resw = sr.vrcrps_ensemble(obs, fct, w_func=lambda x: x * 0.0 + 1.0, backend=backend) np.testing.assert_allclose(res, resw, rtol=1e-5) diff --git a/tests/test_wenergy.py b/tests/test_wenergy.py index 5dca3fa..16f20aa 100644 --- a/tests/test_wenergy.py +++ b/tests/test_wenergy.py @@ -48,7 +48,7 @@ def test_vres_vs_es(backend): lambda x: backends[backend].mean(x) * 0.0 + 1.0, backend=backend, ) - np.testing.assert_allclose(res, resw, rtol=1e-10) + np.testing.assert_allclose(res, resw, rtol=1e-6) @pytest.mark.parametrize("backend", BACKENDS) diff --git a/tests/test_wvariogram.py b/tests/test_wvariogram.py index 688db83..8e80171 100644 --- a/tests/test_wvariogram.py +++ b/tests/test_wvariogram.py @@ -1,6 +1,7 @@ import numpy as np import pytest + import scoringrules as sr from scoringrules.backend import backends @@ -23,9 +24,7 @@ def test_owvs_vs_vs(backend): lambda x: backends[backend].mean(x) * 0.0 + 1.0, backend=backend, ) - np.testing.assert_allclose( - res, resw, rtol=1e-3 - ) # TODO: not sure why tolerance must be so high + np.testing.assert_allclose(res, resw, rtol=1e-3) @pytest.mark.parametrize("backend", BACKENDS) @@ -50,7 +49,7 @@ def test_vrvs_vs_vs(backend): lambda x: backends[backend].mean(x) * 0.0 + 1.0, backend=backend, ) - np.testing.assert_allclose(res, resw, rtol=5e-4) + np.testing.assert_allclose(res, resw, atol=1e-6) @pytest.mark.parametrize("backend", BACKENDS)