From eb9508b66dc7dae37dd86302ce006cf5bd00de10 Mon Sep 17 00:00:00 2001 From: = <=> Date: Fri, 31 May 2024 12:48:49 +0200 Subject: [PATCH 01/12] Add gufunc for energy score iid and k-band estimators --- scoringrules/core/energy/_gufuncs.py | 61 ++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/scoringrules/core/energy/_gufuncs.py b/scoringrules/core/energy/_gufuncs.py index 56342f7..a4829aa 100644 --- a/scoringrules/core/energy/_gufuncs.py +++ b/scoringrules/core/energy/_gufuncs.py @@ -89,3 +89,64 @@ def _vrenergy_score_gufunc( wabs_y = np.linalg.norm(obs) * ow out[0] = e_1 / M - 0.5 * e_2 / (M**2) + (wabs_x - wabs_y) * (wbar - ow) + + +@guvectorize( + [ + "void(float32[:], float32[:,:], int32, float32[:])", + "void(float64[:], float64[:,:], int64, float64[:])", + ], + "(d), (m, d), ()->()", +) +def _energy_score_iid_gufunc( + obs: np.ndarray, + fct: np.ndarray, + seed: int, + out: np.ndarray, +): + """IID estimator for the energy score.""" + ## Set the seed + np.random.seed(seed) + + M = fct.shape[0] + K = int(M / 2) + e_1 = 0 + e_2 = 0 + perm_fct = np.random.permutation(fct) + for i in range(M): + e_1 += float(np.linalg.norm(perm_fct[i] - obs)) + for i in range(K): + e_2 += float(np.linalg.norm(perm_fct[i] - perm_fct[-i])) + out[0] = e_1 / M - 0.5 / K * e_2 + + +@guvectorize( + [ + "void(float32[:], float32[:,:], int32, int32, float32[:])", + "void(float64[:], float64[:,:], int64, int64, float64[:])", + ], + "(d), (m, d), (), ()->()", +) +def _energy_score_kband_gufunc( + obs: np.ndarray, + fct: np.ndarray, + K: int, + seed: int, + out: np.ndarray, +): + """K-Band estimator for the energy score.""" + ## Set the seed + np.random.seed(seed) + + M = fct.shape[0] + e_1 = 0 + e_2 = 0 + idx = np.random.permutation(np.arange(M, dtype=np.int64)) + + for i in idx: + e_1 += float(np.linalg.norm(fct[i] - obs)) + for k in range(K): + idx_rolled = np.roll(idx, k + 1) + for i, j in zip(idx, idx_rolled): # noqa + e_2 += float(np.linalg.norm(fct[i] - fct[j])) + out[0] = e_1 / M - 0.5 / (K * M) * e_2 From d06b068a9a2761e751c373f8fb24c0195ecec64c Mon Sep 17 00:00:00 2001 From: = <=> Date: Fri, 31 May 2024 12:52:35 +0200 Subject: [PATCH 02/12] Add IID estimator score backend function --- scoringrules/core/energy/_score.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/scoringrules/core/energy/_score.py b/scoringrules/core/energy/_score.py index 3a05419..859cea2 100644 --- a/scoringrules/core/energy/_score.py +++ b/scoringrules/core/energy/_score.py @@ -73,3 +73,28 @@ def vrenergy_score( rhobar = B.sum(B.norm(fct, -1) * fw, -1) / M # (...) E_3 = (rhobar - B.norm(obs, -1) * ow) * (wbar - ow) # (...) return E_1 - 0.5 * E_2 + E_3 + + +def _energy_score_iid( + obs: "Array", fct: "Array", seed: int = 123, backend=None +) -> "Array": + """ + Compute the energy score based on a finite ensemble. + + The ensemble and variables axes are on the second last and last dimensions respectively. + """ + B = backends.active if backend is None else backends[backend] + M: int = fct.shape[-2] + K: int = int(M // 2) + + ## TODO + # Permutate the sample once to ensure that + # some potential structure in the sample does not affect the + # estimator of spread_norm + + err_norm = B.norm(fct - B.expand_dims(obs, -2), -1) + E_1 = B.sum(err_norm, -1) / M + + spread_norm = B.norm(fct[..., :K, :] - fct[..., K:, :], -1) + E_2 = B.sum(spread_norm, -1) / K + return E_1 - 0.5 * E_2 From 0c5bb3530341e5edc30c98d6ece6b7e03ed46761 Mon Sep 17 00:00:00 2001 From: = <=> Date: Fri, 14 Jun 2024 22:13:54 +0200 Subject: [PATCH 03/12] Add roll and permutation --- scoringrules/backend/base.py | 8 ++++++++ scoringrules/backend/jax.py | 8 ++++++++ scoringrules/backend/numpy.py | 7 +++++++ scoringrules/backend/tensorflow.py | 18 ++++++++++++++++++ scoringrules/backend/torch.py | 12 ++++++++++++ 5 files changed, 53 insertions(+) diff --git a/scoringrules/backend/base.py b/scoringrules/backend/base.py index 8f7641d..33ea065 100644 --- a/scoringrules/backend/base.py +++ b/scoringrules/backend/base.py @@ -45,6 +45,14 @@ def max( ) -> "Array": """Return the maximum value of an input array ``x``.""" + @abc.abstractmethod + def roll(self, x: "Array", shift: int, axis: int | None = None) -> "Array": + """Roll the values of array ``x`` by shift along axis.""" + + @abc.abstractmethod + def shuffle(self, x: "Array", axis: int | None = 0) -> "Array": + """Shuffle the values of array ``x`` by along axis.""" + @abc.abstractmethod def moveaxis( self, diff --git a/scoringrules/backend/jax.py b/scoringrules/backend/jax.py index b112f11..0376152 100644 --- a/scoringrules/backend/jax.py +++ b/scoringrules/backend/jax.py @@ -30,6 +30,7 @@ def __init__(self): jax = import_module("jax") jnp = import_module("jax.numpy") jsp = import_module("jax.scipy") + self.key = jax.random.key(1) def asarray(self, obj: "ArrayLike", /, *, dtype: Dtype | None = None) -> "Array": return jnp.asarray(obj, dtype=dtype) @@ -49,6 +50,13 @@ def max( ) -> "Array": return jnp.max(x, axis=axis, keepdims=keepdims) + def roll(self, x: "Array", shift: int, axis: int | None = None) -> "Array": + return jnp.roll(x, shift=shift, axis=axis) + + def shuffle(self, x: "Array", axis: int = 0) -> "Array": + self.key, subkey = jnp.split(self.key) + return jax.random.permutation(subkey, x, axis=axis, independent=False) + def moveaxis( self, x: "Array", diff --git a/scoringrules/backend/numpy.py b/scoringrules/backend/numpy.py index 1c633d6..1557d6b 100644 --- a/scoringrules/backend/numpy.py +++ b/scoringrules/backend/numpy.py @@ -48,6 +48,13 @@ def max( ) -> "NDArray": return np.max(x, axis=axis, keepdims=keepdims) + def roll(self, x: "NDArray", shift: int, axis: int | None = None) -> "NDArray": + return np.roll(x, shift=shift, axis=axis) + + def shuffle(self, x: "NDArray", axis: int = 0) -> "NDArray": + generator = np.random.default_rng() + return generator.permutation(x, axis=axis) + def moveaxis( self, x: "NDArray", diff --git a/scoringrules/backend/tensorflow.py b/scoringrules/backend/tensorflow.py index 28d62d2..a205675 100644 --- a/scoringrules/backend/tensorflow.py +++ b/scoringrules/backend/tensorflow.py @@ -54,6 +54,24 @@ def max( ) -> "Tensor": return tf.math.reduce_max(x, axis=axis, keepdims=keepdims) + def roll( + self, + x: "Tensor", + shift: int, + axis: int | None = None, + ) -> "Tensor": + return tf.roll(x, shift=shift, axis=axis) + + def shuffle(self, x: "Tensor", axis: int = 0) -> "Tensor": + # tensorflow does not allow to shuffle along a certain axis + return tf.experimental.numpy.moveaxis( + tf.random.shuffle( + tf.experimental.numpy.moveaxis(x, source=axis, destination=0) + ), + source=0, + destination=axis, + ) + def moveaxis( self, x: "Tensor", diff --git a/scoringrules/backend/torch.py b/scoringrules/backend/torch.py index 52d2d22..955c666 100644 --- a/scoringrules/backend/torch.py +++ b/scoringrules/backend/torch.py @@ -48,6 +48,18 @@ def max( self, x: "Tensor", axis: int | tuple[int, ...] | None, keepdims: bool = False ) -> "Tensor": return torch.max(x, axis=axis, keepdim=keepdims) + + def roll(self, x: "Tensor", shift: int, axis: int | None = None) -> "Tensor": + torch.roll(x, shifts=shift, dims=axis) + + def shuffle(self, x: "Tensor", axis: int = 0) -> "Tensor": + return torch.moveaxis( + torch.moveaxis(x, source=axis, destination=0)[ + torch.randperm(x.shape[axis]) + ], + source=0, + destination=axis, + ) def moveaxis( self, From 54cff931a61738d5ccdbb73431ea1531aa1ca1ab Mon Sep 17 00:00:00 2001 From: simon-hirsch Date: Tue, 18 Jun 2024 19:03:14 +0200 Subject: [PATCH 04/12] Set seed on function call for base, numpy, jax. --- scoringrules/backend/base.py | 8 ++++++-- scoringrules/backend/jax.py | 8 ++++---- scoringrules/backend/numpy.py | 4 ++-- 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/scoringrules/backend/base.py b/scoringrules/backend/base.py index 33ea065..05725a4 100644 --- a/scoringrules/backend/base.py +++ b/scoringrules/backend/base.py @@ -50,8 +50,12 @@ def roll(self, x: "Array", shift: int, axis: int | None = None) -> "Array": """Roll the values of array ``x`` by shift along axis.""" @abc.abstractmethod - def shuffle(self, x: "Array", axis: int | None = 0) -> "Array": - """Shuffle the values of array ``x`` by along axis.""" + def shuffle(self, x: "Array", axis: int | None = 0, seed: int = 42) -> "Array": + """ + Shuffle the values of array ``x`` by along axis. + + Note that the seed is set on the function call. Hence, calling the function multiple times with the default seed will result in identical outputs. + """ @abc.abstractmethod def moveaxis( diff --git a/scoringrules/backend/jax.py b/scoringrules/backend/jax.py index 0376152..7e83a06 100644 --- a/scoringrules/backend/jax.py +++ b/scoringrules/backend/jax.py @@ -30,7 +30,6 @@ def __init__(self): jax = import_module("jax") jnp = import_module("jax.numpy") jsp = import_module("jax.scipy") - self.key = jax.random.key(1) def asarray(self, obj: "ArrayLike", /, *, dtype: Dtype | None = None) -> "Array": return jnp.asarray(obj, dtype=dtype) @@ -53,9 +52,10 @@ def max( def roll(self, x: "Array", shift: int, axis: int | None = None) -> "Array": return jnp.roll(x, shift=shift, axis=axis) - def shuffle(self, x: "Array", axis: int = 0) -> "Array": - self.key, subkey = jnp.split(self.key) - return jax.random.permutation(subkey, x, axis=axis, independent=False) + def shuffle(self, x: "Array", axis: int = 0, seed: int = 42) -> "Array": + return jax.random.permutation( + jax.random.key(42), x, axis=axis, independent=False + ) def moveaxis( self, diff --git a/scoringrules/backend/numpy.py b/scoringrules/backend/numpy.py index 1557d6b..ccab310 100644 --- a/scoringrules/backend/numpy.py +++ b/scoringrules/backend/numpy.py @@ -51,8 +51,8 @@ def max( def roll(self, x: "NDArray", shift: int, axis: int | None = None) -> "NDArray": return np.roll(x, shift=shift, axis=axis) - def shuffle(self, x: "NDArray", axis: int = 0) -> "NDArray": - generator = np.random.default_rng() + def shuffle(self, x: "NDArray", axis: int = 0, seed: int = 42) -> "NDArray": + generator = np.random.default_rng(seed) return generator.permutation(x, axis=axis) def moveaxis( From e1773f984392b48259c1b1c1d6bb502617dc819c Mon Sep 17 00:00:00 2001 From: simon-hirsch Date: Tue, 18 Jun 2024 19:07:50 +0200 Subject: [PATCH 05/12] Add seed to shuffle for tensorflow and torch --- scoringrules/backend/tensorflow.py | 4 +++- scoringrules/backend/torch.py | 5 +++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/scoringrules/backend/tensorflow.py b/scoringrules/backend/tensorflow.py index a205675..e499995 100644 --- a/scoringrules/backend/tensorflow.py +++ b/scoringrules/backend/tensorflow.py @@ -62,8 +62,10 @@ def roll( ) -> "Tensor": return tf.roll(x, shift=shift, axis=axis) - def shuffle(self, x: "Tensor", axis: int = 0) -> "Tensor": + def shuffle(self, x: "Tensor", axis: int = 0, seed: int = 42) -> "Tensor": # tensorflow does not allow to shuffle along a certain axis + # Hence we move the axis to shuffle in the 0st spot, shuffle, move back + tf.random.set_seed(seed) return tf.experimental.numpy.moveaxis( tf.random.shuffle( tf.experimental.numpy.moveaxis(x, source=axis, destination=0) diff --git a/scoringrules/backend/torch.py b/scoringrules/backend/torch.py index 955c666..d0acad0 100644 --- a/scoringrules/backend/torch.py +++ b/scoringrules/backend/torch.py @@ -48,11 +48,12 @@ def max( self, x: "Tensor", axis: int | tuple[int, ...] | None, keepdims: bool = False ) -> "Tensor": return torch.max(x, axis=axis, keepdim=keepdims) - + def roll(self, x: "Tensor", shift: int, axis: int | None = None) -> "Tensor": torch.roll(x, shifts=shift, dims=axis) - def shuffle(self, x: "Tensor", axis: int = 0) -> "Tensor": + def shuffle(self, x: "Tensor", axis: int = 0, seed: int = 42) -> "Tensor": + torch.manual_seed(seed=seed) return torch.moveaxis( torch.moveaxis(x, source=axis, destination=0)[ torch.randperm(x.shape[axis]) From d5c6a5d08607ba6c2f54b86f4ac0e9ffc64cca20 Mon Sep 17 00:00:00 2001 From: simon-hirsch Date: Tue, 18 Jun 2024 21:30:51 +0200 Subject: [PATCH 06/12] Add backend ES iid and k-band --- scoringrules/core/energy/_score.py | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/scoringrules/core/energy/_score.py b/scoringrules/core/energy/_score.py index 859cea2..6c90418 100644 --- a/scoringrules/core/energy/_score.py +++ b/scoringrules/core/energy/_score.py @@ -75,9 +75,7 @@ def vrenergy_score( return E_1 - 0.5 * E_2 + E_3 -def _energy_score_iid( - obs: "Array", fct: "Array", seed: int = 123, backend=None -) -> "Array": +def energy_score_iid(obs: "Array", fct: "Array", backend=None) -> "Array": """ Compute the energy score based on a finite ensemble. @@ -87,14 +85,22 @@ def _energy_score_iid( M: int = fct.shape[-2] K: int = int(M // 2) - ## TODO - # Permutate the sample once to ensure that - # some potential structure in the sample does not affect the - # estimator of spread_norm - err_norm = B.norm(fct - B.expand_dims(obs, -2), -1) E_1 = B.sum(err_norm, -1) / M spread_norm = B.norm(fct[..., :K, :] - fct[..., K:, :], -1) - E_2 = B.sum(spread_norm, -1) / K - return E_1 - 0.5 * E_2 + E_2 = B.sum(spread_norm, -1) + return E_1 - 0.5 / K * E_2 + + +def energy_score_kband(obs: "Array", fct: "Array", K: int, backend=None) -> "Array": + B = backends.active if backend is None else backends[backend] + + M = fct.shape[-2] + err_norm = B.norm(fct - B.expand_dims(obs, -2), -1) + + E_1 = B.sum(err_norm, axis=-1) / M + E_2 = 0 + for k in range(1, K + 1): + E_2 += B.mean(B.norm(fct - B.roll(fct, shift=k, axis=-2), axis=-1), axis=-1) + return E_1 - 0.5 / K * E_2 From 54ed2fdbdd8558b1373e5d9c1fe12bc6696d4ff0 Mon Sep 17 00:00:00 2001 From: simon-hirsch Date: Tue, 18 Jun 2024 21:32:12 +0200 Subject: [PATCH 07/12] Remove shuffle from gufuncs --- scoringrules/core/energy/_gufuncs.py | 41 +++++++++++----------------- 1 file changed, 16 insertions(+), 25 deletions(-) diff --git a/scoringrules/core/energy/_gufuncs.py b/scoringrules/core/energy/_gufuncs.py index a4829aa..3bdc94f 100644 --- a/scoringrules/core/energy/_gufuncs.py +++ b/scoringrules/core/energy/_gufuncs.py @@ -93,60 +93,51 @@ def _vrenergy_score_gufunc( @guvectorize( [ - "void(float32[:], float32[:,:], int32, float32[:])", - "void(float64[:], float64[:,:], int64, float64[:])", + "void(float32[:], float32[:,:], float32[:])", + "void(float64[:], float64[:,:], float64[:])", ], - "(d), (m, d), ()->()", + "(d), (m, d)->()", ) def _energy_score_iid_gufunc( obs: np.ndarray, fct: np.ndarray, - seed: int, out: np.ndarray, ): """IID estimator for the energy score.""" - ## Set the seed - np.random.seed(seed) - M = fct.shape[0] - K = int(M / 2) + K = int(M // 2) e_1 = 0 e_2 = 0 - perm_fct = np.random.permutation(fct) for i in range(M): - e_1 += float(np.linalg.norm(perm_fct[i] - obs)) + e_1 += float(np.linalg.norm(fct[i] - obs)) for i in range(K): - e_2 += float(np.linalg.norm(perm_fct[i] - perm_fct[-i])) + e_2 += float(np.linalg.norm(fct[i] - fct[K + i])) out[0] = e_1 / M - 0.5 / K * e_2 @guvectorize( [ - "void(float32[:], float32[:,:], int32, int32, float32[:])", - "void(float64[:], float64[:,:], int64, int64, float64[:])", + "void(float32[:], float32[:,:], int32, float32[:])", + "void(float64[:], float64[:,:], int64, float64[:])", ], - "(d), (m, d), (), ()->()", + "(d), (m, d), ()->()", ) def _energy_score_kband_gufunc( obs: np.ndarray, fct: np.ndarray, K: int, - seed: int, out: np.ndarray, ): """K-Band estimator for the energy score.""" - ## Set the seed - np.random.seed(seed) - M = fct.shape[0] + e_1 = 0 e_2 = 0 - idx = np.random.permutation(np.arange(M, dtype=np.int64)) - for i in idx: + for i in range(M): e_1 += float(np.linalg.norm(fct[i] - obs)) - for k in range(K): - idx_rolled = np.roll(idx, k + 1) - for i, j in zip(idx, idx_rolled): # noqa - e_2 += float(np.linalg.norm(fct[i] - fct[j])) - out[0] = e_1 / M - 0.5 / (K * M) * e_2 + for k in range(1, K + 1): + idx_rolled = np.roll(np.arange(M), k) + for i in range(M): + e_2 += float(np.linalg.norm(fct[i] - fct[idx_rolled[i]])) + out[0] = e_1 / M - 0.5 / (M * K) * e_2 From c1baf9205ecf1f8e28c9e90e3fa5a0c02f02a663 Mon Sep 17 00:00:00 2001 From: simon-hirsch Date: Tue, 18 Jun 2024 21:37:12 +0200 Subject: [PATCH 08/12] Add to init --- scoringrules/core/energy/__init__.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/scoringrules/core/energy/__init__.py b/scoringrules/core/energy/__init__.py index af64592..f54273f 100644 --- a/scoringrules/core/energy/__init__.py +++ b/scoringrules/core/energy/__init__.py @@ -1,9 +1,13 @@ from ._gufuncs import ( _energy_score_gufunc, + _energy_score_iid_gufunc, + _energy_score_kband_gufunc, _owenergy_score_gufunc, _vrenergy_score_gufunc, ) from ._score import energy_score as nrg +from ._score import energy_score_iid as iidnrg +from ._score import energy_score_kband as kbnrg from ._score import owenergy_score as ownrg from ._score import vrenergy_score as vrnrg @@ -11,7 +15,11 @@ "nrg", "ownrg", "vrnrg", + "iidnrg", + "kbnrg", "_energy_score_gufunc", "_owenergy_score_gufunc", "_vrenergy_score_gufunc", + "_energy_score_kband_gufunc", + "_energy_score_iid_gufunc", ] From 8d8f37e629bfce9d21ee019f3e295a717aa669d6 Mon Sep 17 00:00:00 2001 From: simon-hirsch Date: Tue, 18 Jun 2024 22:18:29 +0200 Subject: [PATCH 09/12] Add energy score estimators to public API. --- scoringrules/__init__.py | 4 ++ scoringrules/_energy.py | 132 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 136 insertions(+) diff --git a/scoringrules/__init__.py b/scoringrules/__init__.py index 9cf57ce..b2dd279 100644 --- a/scoringrules/__init__.py +++ b/scoringrules/__init__.py @@ -14,6 +14,8 @@ ) from scoringrules._energy import ( energy_score, + energy_score_iid, + energy_score_kband, owenergy_score, twenergy_score, vrenergy_score, @@ -48,6 +50,8 @@ "brier_score", "error_spread_score", "energy_score", + "energy_score_iid", + "energy_score_kband", "owenergy_score", "twenergy_score", "vrenergy_score", diff --git a/scoringrules/_energy.py b/scoringrules/_energy.py index b1a8c01..e3f841b 100644 --- a/scoringrules/_energy.py +++ b/scoringrules/_energy.py @@ -238,3 +238,135 @@ def vrenergy_score( return energy.vrnrg( observations, forecasts, obs_weights, fct_weights, backend=backend ) + + +def energy_score_iid( + observations: "Array", + forecasts: "Array", + /, + m_axis: int = -2, + v_axis: int = -1, + *, + shuffle: bool = True, + seed: int = 42, + backend: "Backend" = None, +) -> "Array": + r"""Compute the Energy Score for a finite multivariate ensemble via the IID estimator. + + The Energy Score is a multivariate scoring rule expressed as + + $$\text{ES}^\text{iid}(F_{ens}, \mathbf{y})= \frac{1}{M} \sum_{m=1}^{M} \| \mathbf{x}_{m} - + \mathbf{y} \| - \frac{1}{K} \sum_{m=1}^{K} \| \mathbf{x}_{m} - \mathbf{x}_{K+m} \| $$ + + where $\mathbf{X}$ are independent samples from $F$, $K = \text{floor}(M / 2)$ + and $||\cdot||$ is the euclidean norm over the input dimensions (the variables). + + To avoid the influence of possible structure during the generation of the samples, we + shuffle the forecast array once before we calculate the $ES^\text{iid}$. + + Parameters + ---------- + observations: Array + The observed values, where the variables dimension is by default the last axis. + forecasts: Array + The predicted forecast ensemble, where the ensemble dimension is by default + represented by the second last axis and the variables dimension by the last axis. + m_axis: int + The axis corresponding to the ensemble dimension on the forecasts array. Defaults to -2. + v_axis: int + The axis corresponding to the variables dimension on the forecasts array (or the observations + array with an extra dimension on `m_axis`). Defaults to -1. + shuffle: bool + Whether to shuffle the forecasts once along the ensemble axis `m_axis`. + seed: int + The random seed for shuffling. + backend: str + The name of the backend used for computations. Defaults to 'numba' if available, else 'numpy'. + + Returns + ------- + energy_score: Array of shape (...) + The computed Energy Score. + """ + backend = backend if backend is not None else backends._active + + B = backends.active if backend is None else backends[backend] + + observations, forecasts = multivariate_array_check( + observations, forecasts, m_axis, v_axis, backend=backend + ) + + if shuffle: + forecasts = B.shuffle(forecasts, axis=m_axis, seed=seed) + + if backend == "numba": + return energy._energy_score_iid_gufunc(observations, forecasts) + + return energy.iidnrg(observations, forecasts, backend=backend) + + +def energy_score_kband( + observations: "Array", + forecasts: "Array", + K: int, + /, + m_axis: int = -2, + v_axis: int = -1, + *, + shuffle: bool = True, + seed: int = 42, + backend: "Backend" = None, +) -> "Array": + r"""Compute the Energy Score for a finite multivariate ensemble via the k-Band estimator. + + The Energy Score is a multivariate scoring rule expressed as + + $$\text{ES}^{k-text{band}}(F_{ens}, \mathbf{y})= \frac{1}{M} \sum_{m=1}^{M} \| \mathbf{x}_{m} - + \mathbf{y} \| - \frac{1}{MK} \sum_{m=1}^{M} \sum_{k=1}^{K} \| \mathbf{x}_{m} - \mathbf{x}_{m+k} \| $$ + + where $\mathbf{X}$ are independent samples from $F$, $K$ is a user-choosen integer + and $||\cdot||$ is the euclidean norm over the input dimensions (the variables). Let us note + that we define $\mathbf{x}_{M+k} = \mathbf{x}_{k}$. + + To avoid the influence of possible structure during the generation of the samples, we + shuffle the forecast array once before we calculate the $ES^\text{iid}$. + + Parameters + ---------- + observations: Array + The observed values, where the variables dimension is by default the last axis. + forecasts: Array + The predicted forecast ensemble, where the ensemble dimension is by default + represented by the second last axis and the variables dimension by the last axis. + K : int + The number of resamples taken to estimate the energy score. + m_axis: int + The axis corresponding to the ensemble dimension on the forecasts array. Defaults to -2. + v_axis: int + The axis corresponding to the variables dimension on the forecasts array (or the observations + array with an extra dimension on `m_axis`). Defaults to -1. + shuffle: bool + Whether to shuffle the forecasts once along the ensemble axis `m_axis`. + seed: int + The random seed for shuffling. + backend: str + The name of the backend used for computations. Defaults to 'numba' if available, else 'numpy'. + + Returns + ------- + energy_score: Array of shape (...) + The computed Energy Score. + """ + B = backends.active if backend is None else backends[backend] + + observations, forecasts = multivariate_array_check( + observations, forecasts, m_axis, v_axis, backend=backend + ) + + if shuffle: + forecasts = B.shuffle(forecasts, axis=m_axis, seed=seed) + + if backend == "numba": + return energy._energy_score_kband_gufunc(observations, forecasts, K) + + return energy.kbnrg(observations, forecasts, backend=backend) From 996421be87a301870cb0c5e6bbd97cb6b7d60ffe Mon Sep 17 00:00:00 2001 From: simon-hirsch Date: Tue, 18 Jun 2024 22:21:31 +0200 Subject: [PATCH 10/12] Add docs for energy estimators --- docs/api/energy.md | 12 +++++++++++- docs/refs.bib | 7 +++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/docs/api/energy.md b/docs/api/energy.md index 9a91cab..6db2220 100644 --- a/docs/api/energy.md +++ b/docs/api/energy.md @@ -20,10 +20,12 @@ In this case, the expectations in the definition of the energy score can be repl sample means over the ensemble members, yielding the following representation of the energy score when evaluating an ensemble forecast $F_{ens}$ with $M$ members. +Next to the full version of the energy score, `scoringrules` provides [weighted versions](#weighted-versions) and [estimators with lower computational complexity](#estimators-for-the-energy-score) for the energy score. + ::: scoringrules.energy_score -

Weighted versions

+## Weighted versions The energy score provides a measure of overall forecast performance. However, it is often the case that certain outcomes are of more interest than others, making it desirable to @@ -85,3 +87,11 @@ energy scores can easily be computed for ensemble forecasts by replacing the expectations with sample means over the ensemble members.

+ +## Estimators for the Energy Score + +The following two estimators can be used if computational power is scarce respectively the dimension of the forecasting setting is large (Ziel and Berk, 2019)[@ziel2019multivariate]. Note that, on the flip-side, lower computational complexity goes hand-in-hand with lower precision for the approximation of the ES. + +::: scoringrules.energy_score_iid + +::: scoringrules.energy_score_kband diff --git a/docs/refs.bib b/docs/refs.bib index 122dfe5..82b81a2 100644 --- a/docs/refs.bib +++ b/docs/refs.bib @@ -91,3 +91,10 @@ @article{winkler1972decision year={1972}, publisher={Taylor \& Francis} } + +@article{ziel2019multivariate, + title={Multivariate forecasting evaluation: On sensitive and strictly proper scoring rules}, + author={Ziel, Florian and Berk, Kevin}, + journal={arXiv preprint arXiv:1910.07325}, + year={2019} +} From 5a7b1b93ad43adb04a7adeeb190a261a2bdb8dcc Mon Sep 17 00:00:00 2001 From: simon-hirsch Date: Tue, 18 Jun 2024 22:24:40 +0200 Subject: [PATCH 11/12] Fix typo --- scoringrules/_energy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scoringrules/_energy.py b/scoringrules/_energy.py index e3f841b..2f77f27 100644 --- a/scoringrules/_energy.py +++ b/scoringrules/_energy.py @@ -321,7 +321,7 @@ def energy_score_kband( The Energy Score is a multivariate scoring rule expressed as - $$\text{ES}^{k-text{band}}(F_{ens}, \mathbf{y})= \frac{1}{M} \sum_{m=1}^{M} \| \mathbf{x}_{m} - + $$\text{ES}^{k-\text{band}}(F_{ens}, \mathbf{y})= \frac{1}{M} \sum_{m=1}^{M} \| \mathbf{x}_{m} - \mathbf{y} \| - \frac{1}{MK} \sum_{m=1}^{M} \sum_{k=1}^{K} \| \mathbf{x}_{m} - \mathbf{x}_{m+k} \| $$ where $\mathbf{X}$ are independent samples from $F$, $K$ is a user-choosen integer From 9cb05aad6ff0329b9ee95ae4a293eb79ac55f56d Mon Sep 17 00:00:00 2001 From: simon-hirsch Date: Tue, 18 Jun 2024 22:51:31 +0200 Subject: [PATCH 12/12] Add test for energy score estimators --- scoringrules/_energy.py | 2 +- tests/test_energy.py | 31 ++++++++++++++++++++++++++++++- 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/scoringrules/_energy.py b/scoringrules/_energy.py index 2f77f27..be7e028 100644 --- a/scoringrules/_energy.py +++ b/scoringrules/_energy.py @@ -369,4 +369,4 @@ def energy_score_kband( if backend == "numba": return energy._energy_score_kband_gufunc(observations, forecasts, K) - return energy.kbnrg(observations, forecasts, backend=backend) + return energy.kbnrg(observations, forecasts, K, backend=backend) diff --git a/tests/test_energy.py b/tests/test_energy.py index 413ad58..81d745d 100644 --- a/tests/test_energy.py +++ b/tests/test_energy.py @@ -1,7 +1,7 @@ import jax import numpy as np import pytest -from scoringrules._energy import energy_score +from scoringrules._energy import energy_score, energy_score_iid, energy_score_kband from .conftest import BACKENDS @@ -21,3 +21,32 @@ def test_energy_score(backend): assert isinstance(res, np.ndarray) elif backend == "jax": assert isinstance(res, jax.Array) + + +@pytest.mark.parametrize("backend", BACKENDS) +def test_energy_score_iid(backend): + M = 1000 + D = 5 + N = 100 + + generator = np.random.default_rng() + fct = generator.normal(0, 1, (N, M, D)) + obs = np.zeros((N, D)) + res_iid = energy_score_iid(obs, fct) + res_full = energy_score(obs, fct) + assert np.all(1 - res_iid / res_full < 0.10), "Error is to full really big" + + +@pytest.mark.parametrize("backend", BACKENDS) +def test_energy_score_kband(backend): + M = 1000 + D = 5 + N = 100 + K = 100 + + generator = np.random.default_rng() + fct = generator.normal(0, 1, (N, M, D)) + obs = np.zeros((N, D)) + res_iid = energy_score_kband(obs, fct, K) + res_full = energy_score(obs, fct) + assert np.all(1 - res_iid / res_full < 0.05), "Error is to full really big"