From 64fb9e079aee4703eae04aae1d82997888491793 Mon Sep 17 00:00:00 2001
From: simon-hirsch <simon.hirsch@stud.uni-due.de>
Date: Wed, 18 Sep 2024 14:09:36 +0200
Subject: [PATCH 01/16] Initial version of the DSS

---
 scoringrules/__init__.py          |  2 ++
 scoringrules/_dawid_sebastiani.py | 49 +++++++++++++++++++++++++++++++
 scoringrules/backend/numpy.py     | 14 +++++++++
 scoringrules/core/dss/__init__.py | 10 +++++++
 scoringrules/core/dss/_gufuncs.py | 19 ++++++++++++
 scoringrules/core/dss/_score.py   | 21 +++++++++++++
 6 files changed, 115 insertions(+)
 create mode 100644 scoringrules/_dawid_sebastiani.py
 create mode 100644 scoringrules/core/dss/__init__.py
 create mode 100644 scoringrules/core/dss/_gufuncs.py
 create mode 100644 scoringrules/core/dss/_score.py

diff --git a/scoringrules/__init__.py b/scoringrules/__init__.py
index 6e27063..ddeabda 100644
--- a/scoringrules/__init__.py
+++ b/scoringrules/__init__.py
@@ -43,6 +43,7 @@
     twenergy_score,
     vrenergy_score,
 )
+from scoringrules._dawid_sebastiani import dawid_sebastiani_score
 from scoringrules._error_spread import error_spread_score
 from scoringrules._interval import interval_score, weighted_interval_score
 from scoringrules._logs import (
@@ -113,6 +114,7 @@
     "crps_quantile",
     "crps_t",
     "crps_uniform",
+    "dawid_sebastiani_score",
     "owcrps_ensemble",
     "twcrps_ensemble",
     "vrcrps_ensemble",
diff --git a/scoringrules/_dawid_sebastiani.py b/scoringrules/_dawid_sebastiani.py
new file mode 100644
index 0000000..27f647e
--- /dev/null
+++ b/scoringrules/_dawid_sebastiani.py
@@ -0,0 +1,49 @@
+import typing as tp
+
+from scoringrules.core import dss
+from scoringrules.core.utils import multivariate_array_check
+
+if tp.TYPE_CHECKING:
+    from scoringrules.core.typing import Array, Backend
+
+
+def dawid_sebastiani_score(
+    observations: "Array",
+    forecasts: "Array",
+    /,
+    m_axis: int = -2,
+    v_axis: int = -1,
+    *,
+    backend: "Backend" = None,
+) -> "Array":
+    r"""Compute the Dawid-Sebastiani-Score for a finite multivariate ensemble.
+
+    ## TO-DO ##
+
+    Parameters
+    ----------
+    forecasts: Array
+        The predicted forecast ensemble, where the ensemble dimension is by default
+        represented by the second last axis and the variables dimension by the last axis.
+    observations: Array
+        The observed values, where the variables dimension is by default the last axis.
+    m_axis: int
+        The axis corresponding to the ensemble dimension. Defaults to -2.
+    v_axis: int
+        The axis corresponding to the variables dimension. Defaults to -1.
+    backend: str
+        The name of the backend used for computations. Defaults to 'numba' if available, else 'numpy'.
+
+    Returns
+    -------
+    ds_score: Array
+        The computed Dawid-Sebastiani-Score.
+    """
+    observations, forecasts = multivariate_array_check(
+        observations, forecasts, m_axis, v_axis, backend=backend
+    )
+
+    if backend == "numba":
+        return dss._dss_gufunc(observations, forecasts)
+
+    return dss._dss(observations, forecasts, backend=backend)
diff --git a/scoringrules/backend/numpy.py b/scoringrules/backend/numpy.py
index 694b934..75c0cb0 100644
--- a/scoringrules/backend/numpy.py
+++ b/scoringrules/backend/numpy.py
@@ -224,6 +224,20 @@ def where(self, condition: "NDArray", x1: "NDArray", x2: "NDArray") -> "NDArray"
     def size(self, x: "NDArray") -> int:
         return x.size
 
+    def inv(self, x: "NDArray") -> "NDArray":
+        return np.linalg.inv(x)
+
+    def cov(self, x: "NDArray") -> "NDArray":
+        # TODO: This deserves some axis parameter probably?
+        if len(x.shape) == 2:
+            return np.cov(x)
+        else:
+            centered = x - x.mean(axis=-2, keepdims=True)
+            return (centered.transpose(0, 2, 1) @ centered) / (x.shape[-2] - 1)
+
+    def det(self, x: "NDArray") -> "NDArray":
+        return np.linalg.det(x)
+
 
 class NumbaBackend(NumpyBackend):
     """Numba backend."""
diff --git a/scoringrules/core/dss/__init__.py b/scoringrules/core/dss/__init__.py
new file mode 100644
index 0000000..13de158
--- /dev/null
+++ b/scoringrules/core/dss/__init__.py
@@ -0,0 +1,10 @@
+try:
+    from ._gufuncs import (
+        _dss_gufunc,
+    )
+except ImportError:
+    _dss_gufunc = None
+
+from ._score import _ds_score as _dss
+
+__all__ = ["_dss_gufunc", "_dss"]
diff --git a/scoringrules/core/dss/_gufuncs.py b/scoringrules/core/dss/_gufuncs.py
new file mode 100644
index 0000000..1697c12
--- /dev/null
+++ b/scoringrules/core/dss/_gufuncs.py
@@ -0,0 +1,19 @@
+import numpy as np
+from numba import guvectorize
+
+
+@guvectorize(
+    [
+        "void(float32[:], float32[:,:], float32[:])",
+        "void(float64[:], float64[:,:], float64[:])",
+    ],
+    "(d),(m,d)->()",
+)
+def _dss_gufunc(obs: np.ndarray, fct: np.ndarray, out: np.ndarray):
+    M = fct.shape[0]
+    bias = obs - (np.sum(fct, axis=0) / M)
+    cov = np.cov(fct, rowvar=False).astype(bias.dtype)
+    prec = np.linalg.inv(cov).astype(bias.dtype)
+    log_det = np.log(np.linalg.det(cov))
+    bias_precision = bias.T @ prec @ bias
+    out[0] = log_det + bias_precision
diff --git a/scoringrules/core/dss/_score.py b/scoringrules/core/dss/_score.py
new file mode 100644
index 0000000..58acf0d
--- /dev/null
+++ b/scoringrules/core/dss/_score.py
@@ -0,0 +1,21 @@
+import typing as tp
+
+from scoringrules.backend import backends
+
+if tp.TYPE_CHECKING:
+    from scoringrules.core.typing import Array, Backend
+
+
+def _ds_score(
+    obs: "Array",  # (... D)
+    fct: "Array",  # (... M D)
+    backend: "Backend" = None,
+) -> "Array":
+    """Compute the Dawid Sebastiani score Score for a multivariate finite ensemble."""
+    B = backends.active if backend is None else backends[backend]
+    cov = B.cov(fct)
+    precision = B.inv(cov)
+    log_det = B.log(B.det(cov))
+    bias = obs - B.mean(fct, axis=-2)
+    bias_precision = B.squeeze(bias[:, None, :] @ precision @ bias[:, :, None])
+    return log_det + bias_precision

From 8fe8bcdf9a71d4616b90be991ad691d76cb9be94 Mon Sep 17 00:00:00 2001
From: sallen12 <sam.allen@stat.unibe.ch>
Date: Mon, 4 Aug 2025 15:46:22 +0200
Subject: [PATCH 02/16] add inv, cov, and det functions for non-numpy backends

---
 scoringrules/backend/base.py       | 12 ++++++++++++
 scoringrules/backend/jax.py        |  9 +++++++++
 scoringrules/backend/numpy.py      |  8 ++------
 scoringrules/backend/tensorflow.py | 15 +++++++++++++++
 scoringrules/backend/torch.py      | 10 ++++++++++
 5 files changed, 48 insertions(+), 6 deletions(-)

diff --git a/scoringrules/backend/base.py b/scoringrules/backend/base.py
index 10d6200..13ab813 100644
--- a/scoringrules/backend/base.py
+++ b/scoringrules/backend/base.py
@@ -297,3 +297,15 @@ def size(self, x: "Array") -> int:
     @abc.abstractmethod
     def indices(self, x: "Array") -> int:
         """Return an array representing the indices of a grid."""
+
+    @abc.abstractmethod
+    def inv(self, x: "Array") -> "Array":
+        """Return the inverse of a matrix."""
+
+    @abc.abstractmethod
+    def cov(self, x: "Array", rowvar: bool, bias: bool) -> "Array":
+        """Return the covariance matrix from a sample."""
+
+    @abc.abstractmethod
+    def det(self, x: "Array") -> "Array":
+        """Return the determinant of a matrix."""
diff --git a/scoringrules/backend/jax.py b/scoringrules/backend/jax.py
index 80332cb..3eed4bb 100644
--- a/scoringrules/backend/jax.py
+++ b/scoringrules/backend/jax.py
@@ -269,6 +269,15 @@ def size(self, x: "Array") -> int:
     def indices(self, dimensions: tuple) -> "Array":
         return jnp.indices(dimensions)
 
+    def inv(self, x: "Array") -> "Array":
+        return jnp.linalg.inv(x)
+
+    def cov(self, x: "Array", rowvar: bool = True, bias: bool = False) -> "Array":
+        return jnp.cov(x, rowavar=rowvar, bias=bias)
+
+    def det(self, x: "Array") -> "Array":
+        return jnp.linalg.det(x)
+
 
 if __name__ == "__main__":
     B = JaxBackend()
diff --git a/scoringrules/backend/numpy.py b/scoringrules/backend/numpy.py
index 6e5e928..b30bc31 100644
--- a/scoringrules/backend/numpy.py
+++ b/scoringrules/backend/numpy.py
@@ -268,12 +268,8 @@ def indices(self, dimensions: tuple) -> "NDArray":
     def inv(self, x: "NDArray") -> "NDArray":
         return np.linalg.inv(x)
 
-    def cov(self, x: "NDArray") -> "NDArray":
-        if len(x.shape) == 2:
-            return np.cov(x)
-        else:
-            centered = x - x.mean(axis=-2, keepdims=True)
-            return (centered.transpose(0, 2, 1) @ centered) / (x.shape[-2] - 1)
+    def cov(self, x: "NDArray", rowvar: bool = True, bias: bool = False) -> "NDArray":
+        return np.cov(x, rowvar=rowvar, bias=bias)
 
     def det(self, x: "NDArray") -> "NDArray":
         return np.linalg.det(x)
diff --git a/scoringrules/backend/tensorflow.py b/scoringrules/backend/tensorflow.py
index 9143607..6bf3a8b 100644
--- a/scoringrules/backend/tensorflow.py
+++ b/scoringrules/backend/tensorflow.py
@@ -304,6 +304,21 @@ def indices(self, dimensions: tuple) -> "Tensor":
         indices = tf.stack(index_grids)
         return indices
 
+    def inv(self, x: "Tensor") -> "Tensor":
+        return tf.linalg.inv(x)
+
+    def cov(self, x: "Tensor", rowvar: bool = True, bias: bool = False) -> "Tensor":
+        if not rowvar:
+            x = tf.transpose(x)
+        x = x - tf.reduce_mean(x, axis=1, keepdims=True)
+        correction = tf.cast(tf.shape(x)[1], x.dtype) - 1.0
+        if bias:
+            correction += 1.0
+        return tf.matmul(x, x, transpose_b=True) / correction
+
+    def det(self, x: "Tensor") -> "Tensor":
+        return tf.linalg.det(x)
+
 
 if __name__ == "__main__":
     B = TensorflowBackend()
diff --git a/scoringrules/backend/torch.py b/scoringrules/backend/torch.py
index 3c7528b..877a036 100644
--- a/scoringrules/backend/torch.py
+++ b/scoringrules/backend/torch.py
@@ -286,3 +286,13 @@ def indices(self, dimensions: tuple) -> "Tensor":
             index_grids = torch.meshgrid(*ranges, indexing="ij")
             indices = torch.stack(index_grids)
         return indices
+
+    def inv(self, x: "Tensor") -> "Tensor":
+        return torch.linalg.inv(x)
+
+    def cov(self, x: "Tensor", rowvar: bool = True, bias: bool = False) -> "Tensor":
+        correction = 0 if bias else 1
+        return torch.cov(x, rowvar=rowvar, correction=correction)
+
+    def det(self, x: "Tensor") -> "Tensor":
+        return torch.linalg.det(x)

From a61492f5571e50c1dab501cf01c823effa851414 Mon Sep 17 00:00:00 2001
From: sallen12 <sam.allen@stat.unibe.ch>
Date: Mon, 4 Aug 2025 16:54:06 +0200
Subject: [PATCH 03/16] add loop across all batch dimensions in Dawid
 Sebastiani score

---
 scoringrules/__init__.py                      |  4 +-
 .../{_dawid_sebastiani.py => _dss.py}         | 29 ++++++------
 scoringrules/core/dss/__init__.py             |  8 ++--
 scoringrules/core/dss/_gufuncs.py             | 25 +++++-----
 scoringrules/core/dss/_score.py               | 47 +++++++++++++++----
 5 files changed, 70 insertions(+), 43 deletions(-)
 rename scoringrules/{_dawid_sebastiani.py => _dss.py} (69%)

diff --git a/scoringrules/__init__.py b/scoringrules/__init__.py
index c5fcd32..b8760a0 100644
--- a/scoringrules/__init__.py
+++ b/scoringrules/__init__.py
@@ -51,7 +51,7 @@
     twes_ensemble,
     vres_ensemble,
 )
-from scoringrules._dawid_sebastiani import dawid_sebastiani_score
+from scoringrules._dss import dss_ensemble
 from scoringrules._error_spread import error_spread_score
 from scoringrules._interval import interval_score, weighted_interval_score
 from scoringrules._logs import (
@@ -164,7 +164,6 @@ def wrapper(*args, **kwargs):
     "crps_quantile",
     "crps_t",
     "crps_uniform",
-    "dawid_sebastiani_score",
     "owcrps_ensemble",
     "twcrps_ensemble",
     "vrcrps_ensemble",
@@ -198,6 +197,7 @@ def wrapper(*args, **kwargs):
     "rps_score",
     "log_score",
     "rls_score",
+    "dss_ensemble",
     "error_spread_score",
     "es_ensemble",
     "owes_ensemble",
diff --git a/scoringrules/_dawid_sebastiani.py b/scoringrules/_dss.py
similarity index 69%
rename from scoringrules/_dawid_sebastiani.py
rename to scoringrules/_dss.py
index 27f647e..e8fe87f 100644
--- a/scoringrules/_dawid_sebastiani.py
+++ b/scoringrules/_dss.py
@@ -1,5 +1,6 @@
 import typing as tp
 
+from scoringrules.backend import backends
 from scoringrules.core import dss
 from scoringrules.core.utils import multivariate_array_check
 
@@ -7,13 +8,14 @@
     from scoringrules.core.typing import Array, Backend
 
 
-def dawid_sebastiani_score(
-    observations: "Array",
-    forecasts: "Array",
+def dss_ensemble(
+    obs: "Array",
+    fct: "Array",
     /,
     m_axis: int = -2,
     v_axis: int = -1,
     *,
+    bias: bool = False,
     backend: "Backend" = None,
 ) -> "Array":
     r"""Compute the Dawid-Sebastiani-Score for a finite multivariate ensemble.
@@ -22,16 +24,16 @@ def dawid_sebastiani_score(
 
     Parameters
     ----------
-    forecasts: Array
+    obs : array_like
+        The observed values, where the variables dimension is by default the last axis.
+    fct : array_like
         The predicted forecast ensemble, where the ensemble dimension is by default
         represented by the second last axis and the variables dimension by the last axis.
-    observations: Array
-        The observed values, where the variables dimension is by default the last axis.
-    m_axis: int
+    m_axis : int
         The axis corresponding to the ensemble dimension. Defaults to -2.
-    v_axis: int
+    v_axis : int or tuple of int
         The axis corresponding to the variables dimension. Defaults to -1.
-    backend: str
+    backend : str
         The name of the backend used for computations. Defaults to 'numba' if available, else 'numpy'.
 
     Returns
@@ -39,11 +41,10 @@ def dawid_sebastiani_score(
     ds_score: Array
         The computed Dawid-Sebastiani-Score.
     """
-    observations, forecasts = multivariate_array_check(
-        observations, forecasts, m_axis, v_axis, backend=backend
-    )
+    backend = backend if backend is not None else backends._active
+    obs, fct = multivariate_array_check(obs, fct, m_axis, v_axis, backend=backend)
 
     if backend == "numba":
-        return dss._dss_gufunc(observations, forecasts)
+        return dss._dss_gufunc(obs, fct, bias)
 
-    return dss._dss(observations, forecasts, backend=backend)
+    return dss.dss(obs, fct, bias, backend=backend)
diff --git a/scoringrules/core/dss/__init__.py b/scoringrules/core/dss/__init__.py
index 13de158..23e5b26 100644
--- a/scoringrules/core/dss/__init__.py
+++ b/scoringrules/core/dss/__init__.py
@@ -1,10 +1,8 @@
 try:
-    from ._gufuncs import (
-        _dss_gufunc,
-    )
+    from ._gufuncs import _dss_gufunc
 except ImportError:
     _dss_gufunc = None
 
-from ._score import _ds_score as _dss
+from ._score import ds_score as dss
 
-__all__ = ["_dss_gufunc", "_dss"]
+__all__ = ["dss", "_dss_gufunc"]
diff --git a/scoringrules/core/dss/_gufuncs.py b/scoringrules/core/dss/_gufuncs.py
index 1697c12..29d31fa 100644
--- a/scoringrules/core/dss/_gufuncs.py
+++ b/scoringrules/core/dss/_gufuncs.py
@@ -1,19 +1,16 @@
 import numpy as np
 from numba import guvectorize
 
+from scoringrules.core.utils import lazy_gufunc_wrapper_mv
 
-@guvectorize(
-    [
-        "void(float32[:], float32[:,:], float32[:])",
-        "void(float64[:], float64[:,:], float64[:])",
-    ],
-    "(d),(m,d)->()",
-)
-def _dss_gufunc(obs: np.ndarray, fct: np.ndarray, out: np.ndarray):
-    M = fct.shape[0]
-    bias = obs - (np.sum(fct, axis=0) / M)
-    cov = np.cov(fct, rowvar=False).astype(bias.dtype)
-    prec = np.linalg.inv(cov).astype(bias.dtype)
+
+@lazy_gufunc_wrapper_mv
+@guvectorize("(d),(m,d)->()")
+def _dss_gufunc(obs: np.ndarray, fct: np.ndarray, bias: bool, out: np.ndarray):
+    ens_mean = np.mean(fct, axis=-2)
+    obs_cent = obs - ens_mean
+    cov = np.cov(fct, rowvar=False, bias=bias)
+    prec = np.linalg.inv(cov)
     log_det = np.log(np.linalg.det(cov))
-    bias_precision = bias.T @ prec @ bias
-    out[0] = log_det + bias_precision
+    bias_precision = np.transpose(obs_cent) @ prec @ obs_cent
+    out[0] = bias_precision + log_det
diff --git a/scoringrules/core/dss/_score.py b/scoringrules/core/dss/_score.py
index 58acf0d..bc282ec 100644
--- a/scoringrules/core/dss/_score.py
+++ b/scoringrules/core/dss/_score.py
@@ -6,16 +6,47 @@
     from scoringrules.core.typing import Array, Backend
 
 
-def _ds_score(
+def ds_score(
     obs: "Array",  # (... D)
     fct: "Array",  # (... M D)
+    bias: bool = False,
     backend: "Backend" = None,
 ) -> "Array":
-    """Compute the Dawid Sebastiani score Score for a multivariate finite ensemble."""
+    """Compute the Dawid Sebastiani Score for a multivariate finite ensemble."""
     B = backends.active if backend is None else backends[backend]
-    cov = B.cov(fct)
-    precision = B.inv(cov)
-    log_det = B.log(B.det(cov))
-    bias = obs - B.mean(fct, axis=-2)
-    bias_precision = B.squeeze(bias[:, None, :] @ precision @ bias[:, :, None])
-    return log_det + bias_precision
+
+    *batch_dims, _, D = fct.shape
+    out = B.zeros(batch_dims)
+
+    # nested loop over all batch dimensions
+    def recursive_loop(current_index, depth):
+        if depth == len(batch_dims):
+            fct_i = fct[tuple(current_index)]  # (M, D)
+            obs_i = obs[tuple(current_index)]
+            score = ds_score_mat(obs_i, fct_i, bias)  # shape (D, D)
+            out[tuple(current_index)] = score
+            return
+
+        for i in range(batch_dims[depth]):
+            recursive_loop(current_index + [i], depth + 1)
+
+    recursive_loop([], 0)
+    return out
+
+
+def ds_score_mat(
+    obs: "Array",  # (D)
+    fct: "Array",  # (M D)
+    bias: bool = False,
+    backend: "Backend" = None,
+) -> "Array":
+    """Compute the Dawid Sebastiani Score for one multivariate finite ensemble."""
+    B = backends.active if backend is None else backends[backend]
+    cov = B.cov(fct, rowvar=False, bias=bias)  # (D D)
+    precision = B.inv(cov)  # (D D)
+    log_det = B.log(B.det(cov))  # ()
+    obs_cent = obs - B.mean(fct, axis=-2)  # (D)
+    bias_precision = B.squeeze(
+        B.expand_dims(obs_cent, axis=-2) @ precision @ B.expand_dims(obs_cent, axis=-1)
+    )  # ()
+    return bias_precision + log_det

From f550dc9154436ad292174bedef8e20a95a800de6 Mon Sep 17 00:00:00 2001
From: sallen12 <sam.allen@stat.unibe.ch>
Date: Mon, 4 Aug 2025 17:33:05 +0200
Subject: [PATCH 04/16] add tests for Dawid-Sebastiani score

---
 tests/test_dss.py | 41 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)
 create mode 100644 tests/test_dss.py

diff --git a/tests/test_dss.py b/tests/test_dss.py
new file mode 100644
index 0000000..38af91d
--- /dev/null
+++ b/tests/test_dss.py
@@ -0,0 +1,41 @@
+import numpy as np
+import pytest
+import scoringrules as sr
+
+from .conftest import BACKENDS
+
+ENSEMBLE_SIZE = 11
+N = 20
+N_VARS = 3
+
+
+@pytest.mark.parametrize("backend", BACKENDS)
+def test_ds_score(backend):
+    with pytest.raises(ValueError):
+        obs = np.random.randn(N, N_VARS)
+        fct = np.random.randn(N, ENSEMBLE_SIZE, N_VARS - 1)
+        sr.dss_ensemble(obs, fct, backend=backend)
+
+    # test shapes
+    obs = np.random.randn(N, N_VARS)
+    fct = np.expand_dims(obs, axis=-2) + np.random.randn(N, ENSEMBLE_SIZE, N_VARS)
+    res = sr.dss_ensemble(obs, fct, backend=backend)
+    assert res.shape == (N,)
+
+    # test correctness
+    fct = np.array(
+        [[0.79546742, 0.4777960, 0.2164079], [0.02461368, 0.7584595, 0.3181810]]
+    ).T
+    obs = np.array([0.2743836, 0.8146400])
+    res = sr.dss_ensemble(obs, fct, backend=backend)
+    expected = -3.161553
+    np.testing.assert_allclose(res, expected, atol=1e-6)
+
+    # test correctness
+    fct = np.random.randn(2, 3, 100, 4)
+    obs = np.random.randn(2, 3, 4)
+    res = sr.dss_ensemble(obs, fct, backend=backend)
+    for i in range(2):
+        for j in range(3):
+            res_ij = sr.dss_ensemble(obs[i, j, :], fct[i, j, :, :], backend=backend)
+            np.testing.assert_allclose(res[i, j], res_ij, atol=1e-6)

From 563bc530677adfab4f1befb56546c259bb2c2abb Mon Sep 17 00:00:00 2001
From: sallen12 <sam.allen@stat.unibe.ch>
Date: Mon, 4 Aug 2025 18:17:10 +0200
Subject: [PATCH 05/16] fix bugs in dss functions

---
 scoringrules/_dss.py              | 2 +-
 scoringrules/backend/torch.py     | 4 +++-
 scoringrules/core/dss/_gufuncs.py | 5 +++--
 scoringrules/core/dss/_score.py   | 6 +++---
 tests/conftest.py                 | 2 +-
 5 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/scoringrules/_dss.py b/scoringrules/_dss.py
index e8fe87f..dd5a35f 100644
--- a/scoringrules/_dss.py
+++ b/scoringrules/_dss.py
@@ -20,7 +20,7 @@ def dss_ensemble(
 ) -> "Array":
     r"""Compute the Dawid-Sebastiani-Score for a finite multivariate ensemble.
 
-    ## TO-DO ##
+
 
     Parameters
     ----------
diff --git a/scoringrules/backend/torch.py b/scoringrules/backend/torch.py
index 877a036..d818ed7 100644
--- a/scoringrules/backend/torch.py
+++ b/scoringrules/backend/torch.py
@@ -292,7 +292,9 @@ def inv(self, x: "Tensor") -> "Tensor":
 
     def cov(self, x: "Tensor", rowvar: bool = True, bias: bool = False) -> "Tensor":
         correction = 0 if bias else 1
-        return torch.cov(x, rowvar=rowvar, correction=correction)
+        if not rowvar:
+            x = torch.transpose(x, -2, -1)
+        return torch.cov(x, correction=correction)
 
     def det(self, x: "Tensor") -> "Tensor":
         return torch.linalg.det(x)
diff --git a/scoringrules/core/dss/_gufuncs.py b/scoringrules/core/dss/_gufuncs.py
index 29d31fa..34fbc39 100644
--- a/scoringrules/core/dss/_gufuncs.py
+++ b/scoringrules/core/dss/_gufuncs.py
@@ -5,9 +5,10 @@
 
 
 @lazy_gufunc_wrapper_mv
-@guvectorize("(d),(m,d)->()")
+@guvectorize("(d),(m,d),()->()")
 def _dss_gufunc(obs: np.ndarray, fct: np.ndarray, bias: bool, out: np.ndarray):
-    ens_mean = np.mean(fct, axis=-2)
+    M = fct.shape[0]
+    ens_mean = np.sum(fct, axis=0) / M
     obs_cent = obs - ens_mean
     cov = np.cov(fct, rowvar=False, bias=bias)
     prec = np.linalg.inv(cov)
diff --git a/scoringrules/core/dss/_score.py b/scoringrules/core/dss/_score.py
index bc282ec..3c9f0a5 100644
--- a/scoringrules/core/dss/_score.py
+++ b/scoringrules/core/dss/_score.py
@@ -15,15 +15,15 @@ def ds_score(
     """Compute the Dawid Sebastiani Score for a multivariate finite ensemble."""
     B = backends.active if backend is None else backends[backend]
 
-    *batch_dims, _, D = fct.shape
+    batch_dims = fct.shape[:-2]
     out = B.zeros(batch_dims)
 
     # nested loop over all batch dimensions
     def recursive_loop(current_index, depth):
         if depth == len(batch_dims):
             fct_i = fct[tuple(current_index)]  # (M, D)
-            obs_i = obs[tuple(current_index)]
-            score = ds_score_mat(obs_i, fct_i, bias)  # shape (D, D)
+            obs_i = obs[tuple(current_index)]  # (D)
+            score = ds_score_mat(obs_i, fct_i, bias, backend=backend)  # ()
             out[tuple(current_index)] = score
             return
 
diff --git a/tests/conftest.py b/tests/conftest.py
index 94c91d4..43a21a3 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -6,7 +6,7 @@
 from scoringrules.backend import backends
 
 DATA_DIR = Path(__file__).parent / "data"
-RUN_TESTS = ["numpy", "numba", "jax", "torch"]
+RUN_TESTS = ["numpy", "numba", "torch"]
 BACKENDS = [b for b in backends.available_backends if b in RUN_TESTS]
 
 if os.getenv("SR_TEST_OUTPUT", "False").lower() in ("true", "1", "t"):

From 0654ea88b4dc501469c8b70b7d2c0e0d6965fd78 Mon Sep 17 00:00:00 2001
From: sallen12 <sam.allen@stat.unibe.ch>
Date: Mon, 4 Aug 2025 18:22:34 +0200
Subject: [PATCH 06/16] update docstring of Dawid Sebastiani score

---
 scoringrules/_dss.py | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/scoringrules/_dss.py b/scoringrules/_dss.py
index dd5a35f..748d2ce 100644
--- a/scoringrules/_dss.py
+++ b/scoringrules/_dss.py
@@ -20,7 +20,13 @@ def dss_ensemble(
 ) -> "Array":
     r"""Compute the Dawid-Sebastiani-Score for a finite multivariate ensemble.
 
+    The Dawid-Sebastiani Score for an ensemble forecast is defined as
 
+    .. math::
+        \text{DSS}(F_{ens}, \mathbf{y})= (\mathbf{y} - \bar{mathbf{x}})^{\top} \Sigma^-1 (\mathbf{y} - \bar{mathbf{x}}) + \log \det(\Sigma)
+
+    where :math:`\bar{mathbf{x}}` is the mean of the ensemble members (along each dimension),
+    and :math:`\Sigma` is the sample covariance matrix estimated from the ensemble members.
 
     Parameters
     ----------
@@ -33,13 +39,16 @@ def dss_ensemble(
         The axis corresponding to the ensemble dimension. Defaults to -2.
     v_axis : int or tuple of int
         The axis corresponding to the variables dimension. Defaults to -1.
+    bias : bool
+        Logical specifying whether the biased or unbiased estimator of the covariance matrix
+        should be used to calculate the score. Default is the unbiased estimator (`bias=False`).
     backend : str
         The name of the backend used for computations. Defaults to 'numba' if available, else 'numpy'.
 
     Returns
     -------
-    ds_score: Array
-        The computed Dawid-Sebastiani-Score.
+    score: Array
+        The computed Dawid-Sebastiani Score.
     """
     backend = backend if backend is not None else backends._active
     obs, fct = multivariate_array_check(obs, fct, m_axis, v_axis, backend=backend)

From 92cc04b77deda553626eedfa44ed1c8917d39b9b Mon Sep 17 00:00:00 2001
From: sallen12 <sam.allen@stat.unibe.ch>
Date: Mon, 4 Aug 2025 18:46:59 +0200
Subject: [PATCH 07/16] add bias argument to standard deviation in backends

---
 scoringrules/backend/base.py       | 1 +
 scoringrules/backend/jax.py        | 4 +++-
 scoringrules/backend/numpy.py      | 4 +++-
 scoringrules/backend/tensorflow.py | 4 +++-
 scoringrules/backend/torch.py      | 4 +++-
 5 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/scoringrules/backend/base.py b/scoringrules/backend/base.py
index 13ab813..7d6a321 100644
--- a/scoringrules/backend/base.py
+++ b/scoringrules/backend/base.py
@@ -50,6 +50,7 @@ def std(
         /,
         *,
         axis: int | tuple[int, ...] | None = None,
+        bias: bool = False,
         keepdims: bool = False,
     ) -> "Array":
         """Calculate the standard deviation of the input array ``x``."""
diff --git a/scoringrules/backend/jax.py b/scoringrules/backend/jax.py
index 3eed4bb..ab66d26 100644
--- a/scoringrules/backend/jax.py
+++ b/scoringrules/backend/jax.py
@@ -53,9 +53,11 @@ def std(
         /,
         *,
         axis: int | tuple[int, ...] | None = None,
+        bias: bool = False,
         keepdims: bool = False,
     ) -> "Array":
-        return jnp.std(x, ddof=1, axis=axis, keepdims=keepdims)
+        ddof = 0 if bias else 1
+        return jnp.std(x, ddof=ddof, axis=axis, keepdims=keepdims)
 
     def quantile(
         self,
diff --git a/scoringrules/backend/numpy.py b/scoringrules/backend/numpy.py
index b30bc31..c59222b 100644
--- a/scoringrules/backend/numpy.py
+++ b/scoringrules/backend/numpy.py
@@ -53,9 +53,11 @@ def std(
         /,
         *,
         axis: int | tuple[int, ...] | None = None,
+        bias: bool = False,
         keepdims: bool = False,
     ) -> "NDArray":
-        return np.std(x, ddof=1, axis=axis, keepdims=keepdims)
+        ddof = 0 if bias else 1
+        return np.std(x, ddof=ddof, axis=axis, keepdims=keepdims)
 
     def quantile(
         self,
diff --git a/scoringrules/backend/tensorflow.py b/scoringrules/backend/tensorflow.py
index 6bf3a8b..5321300 100644
--- a/scoringrules/backend/tensorflow.py
+++ b/scoringrules/backend/tensorflow.py
@@ -58,10 +58,12 @@ def std(
         /,
         *,
         axis: int | tuple[int, ...] | None = None,
+        bias: bool = False,
         keepdims: bool = False,
     ) -> "Tensor":
         n = x.shape.num_elements() if axis is None else x.shape[axis]
-        resc = self.sqrt(n / (n - 1))
+        if not bias:
+            resc = self.sqrt(n / (n - 1))
         return tf.math.reduce_std(x, axis=axis, keepdims=keepdims) * resc
 
     def quantile(
diff --git a/scoringrules/backend/torch.py b/scoringrules/backend/torch.py
index d818ed7..679601e 100644
--- a/scoringrules/backend/torch.py
+++ b/scoringrules/backend/torch.py
@@ -54,9 +54,11 @@ def std(
         /,
         *,
         axis: int | tuple[int, ...] | None = None,
+        bias: bool = False,
         keepdims: bool = False,
     ) -> "Tensor":
-        return torch.std(x, correction=1, axis=axis, keepdim=keepdims)
+        correction = 0 if bias else 1
+        return torch.std(x, correction=correction, axis=axis, keepdim=keepdims)
 
     def quantile(
         self,

From 05fdceac707f65c2e8e97b68d372e2c0a1b1affb Mon Sep 17 00:00:00 2001
From: sallen12 <sam.allen@stat.unibe.ch>
Date: Mon, 4 Aug 2025 18:47:20 +0200
Subject: [PATCH 08/16] add univariate Dawid Sebastian score

---
 scoringrules/_dss.py              | 56 +++++++++++++++++++++++++++++--
 scoringrules/core/dss/__init__.py |  9 ++---
 scoringrules/core/dss/_gufuncs.py | 15 +++++++--
 scoringrules/core/dss/_score.py   | 22 ++++++++++--
 4 files changed, 90 insertions(+), 12 deletions(-)

diff --git a/scoringrules/_dss.py b/scoringrules/_dss.py
index 748d2ce..d92fa4c 100644
--- a/scoringrules/_dss.py
+++ b/scoringrules/_dss.py
@@ -8,7 +8,57 @@
     from scoringrules.core.typing import Array, Backend
 
 
-def dss_ensemble(
+def dssuv_ensemble(
+    obs: "Array",
+    fct: "Array",
+    /,
+    m_axis: int = -2,
+    *,
+    bias: bool = False,
+    backend: "Backend" = None,
+) -> "Array":
+    r"""Compute the Dawid-Sebastiani-Score for a finite univariate ensemble.
+
+    The Dawid-Sebastiani Score for an ensemble forecast is defined as
+
+    .. math::
+        \text{DSS}(F_{ens}, y)= \frac{(y - \bar{x)^2}{\sigma^2} + 2 \log \sigma
+
+    where :math:`\bar{x}` and :math:`\sigma` are the mean and standard deviation of the ensemble members.
+
+    Parameters
+    ----------
+    obs : array_like
+        The observed values.
+    fct : array_like, shape (..., m)
+        The predicted forecast ensemble, where the ensemble dimension is by default
+        represented by the last axis.
+    m_axis : int
+        The axis corresponding to the ensemble. Default is the last axis.
+    bias : bool
+        Logical specifying whether the biased or unbiased estimator of the standard deviation
+        should be used to calculate the score. Default is the unbiased estimator (`bias=False`).
+    backend : str
+        The name of the backend used for computations. Defaults to 'numba' if available, else 'numpy'.
+
+    Returns
+    -------
+    score: Array
+        The computed Dawid-Sebastiani Score.
+    """
+    B = backends.active if backend is None else backends[backend]
+    obs, fct = map(B.asarray, (obs, fct))
+
+    if m_axis != -1:
+        fct = B.moveaxis(fct, m_axis, -1)
+
+    if backend == "numba":
+        return dss._dss_uv_gufunc(obs, fct, bias)
+
+    return dss.ds_score_uv(obs, fct, bias, backend=backend)
+
+
+def dssmv_ensemble(
     obs: "Array",
     fct: "Array",
     /,
@@ -54,6 +104,6 @@ def dss_ensemble(
     obs, fct = multivariate_array_check(obs, fct, m_axis, v_axis, backend=backend)
 
     if backend == "numba":
-        return dss._dss_gufunc(obs, fct, bias)
+        return dss._dss_mv_gufunc(obs, fct, bias)
 
-    return dss.dss(obs, fct, bias, backend=backend)
+    return dss.ds_score_mv(obs, fct, bias, backend=backend)
diff --git a/scoringrules/core/dss/__init__.py b/scoringrules/core/dss/__init__.py
index 23e5b26..fb2997e 100644
--- a/scoringrules/core/dss/__init__.py
+++ b/scoringrules/core/dss/__init__.py
@@ -1,8 +1,9 @@
 try:
-    from ._gufuncs import _dss_gufunc
+    from ._gufuncs import _dss_mv_gufunc, _dss_uv_gufunc
 except ImportError:
-    _dss_gufunc = None
+    _dss_uv_gufunc = None
+    _dss_mv_gufunc = None
 
-from ._score import ds_score as dss
+from ._score import ds_score_uv, ds_score_mv
 
-__all__ = ["dss", "_dss_gufunc"]
+__all__ = ["ds_score_uv", "_dss_uv_gufunc", "ds_score_mv", "_dss_mv_gufunc"]
diff --git a/scoringrules/core/dss/_gufuncs.py b/scoringrules/core/dss/_gufuncs.py
index 34fbc39..ff10d8e 100644
--- a/scoringrules/core/dss/_gufuncs.py
+++ b/scoringrules/core/dss/_gufuncs.py
@@ -1,12 +1,23 @@
 import numpy as np
 from numba import guvectorize
 
-from scoringrules.core.utils import lazy_gufunc_wrapper_mv
+from scoringrules.core.utils import lazy_gufunc_wrapper_uv, lazy_gufunc_wrapper_mv
+
+
+@lazy_gufunc_wrapper_uv
+@guvectorize("(),(n),()->()")
+def _dss_uv_gufunc(obs: np.ndarray, fct: np.ndarray, bias: bool, out: np.ndarray):
+    ens_mean = np.mean(fct)
+    correction = 0 if bias else 1
+    sig = np.std(fct, ddof=correction)
+    log_sig = 2 * np.log(sig)
+    bias_precision = ((obs - ens_mean) / sig) ** 2
+    out[0] = bias_precision + log_sig
 
 
 @lazy_gufunc_wrapper_mv
 @guvectorize("(d),(m,d),()->()")
-def _dss_gufunc(obs: np.ndarray, fct: np.ndarray, bias: bool, out: np.ndarray):
+def _dss_mv_gufunc(obs: np.ndarray, fct: np.ndarray, bias: bool, out: np.ndarray):
     M = fct.shape[0]
     ens_mean = np.sum(fct, axis=0) / M
     obs_cent = obs - ens_mean
diff --git a/scoringrules/core/dss/_score.py b/scoringrules/core/dss/_score.py
index 3c9f0a5..77f8f23 100644
--- a/scoringrules/core/dss/_score.py
+++ b/scoringrules/core/dss/_score.py
@@ -6,7 +6,23 @@
     from scoringrules.core.typing import Array, Backend
 
 
-def ds_score(
+def ds_score_uv(
+    obs: "Array",
+    fct: "Array",
+    bias: bool = False,
+    backend: "Backend" = None,
+) -> "Array":
+    """Compute the Dawid Sebastiani Score for a univariate finite ensemble."""
+    B = backends.active if backend is None else backends[backend]
+    ens_mean = B.mean(fct, axis=-1)
+    obs_cent = obs - ens_mean
+    sig = B.std(fct, axis=-1, bias=bias)
+    bias_precision = ((obs_cent - ens_mean) / sig) ** 2
+    log_sig = 2 * B.log(sig)
+    return bias_precision + log_sig
+
+
+def ds_score_mv(
     obs: "Array",  # (... D)
     fct: "Array",  # (... M D)
     bias: bool = False,
@@ -23,7 +39,7 @@ def recursive_loop(current_index, depth):
         if depth == len(batch_dims):
             fct_i = fct[tuple(current_index)]  # (M, D)
             obs_i = obs[tuple(current_index)]  # (D)
-            score = ds_score_mat(obs_i, fct_i, bias, backend=backend)  # ()
+            score = ds_score_mv_mat(obs_i, fct_i, bias, backend=backend)  # ()
             out[tuple(current_index)] = score
             return
 
@@ -34,7 +50,7 @@ def recursive_loop(current_index, depth):
     return out
 
 
-def ds_score_mat(
+def ds_score_mv_mat(
     obs: "Array",  # (D)
     fct: "Array",  # (M D)
     bias: bool = False,

From 9a4f64e67d53f9879b4c9cf738545affa2b12708 Mon Sep 17 00:00:00 2001
From: sallen12 <sam.allen@stat.unibe.ch>
Date: Mon, 4 Aug 2025 19:02:10 +0200
Subject: [PATCH 09/16] add test for univariate Dawid Sebastiani score

---
 scoringrules/__init__.py        |  5 +++--
 scoringrules/_dss.py            |  2 +-
 scoringrules/core/dss/_score.py |  3 +--
 tests/test_dss.py               | 36 +++++++++++++++++++++++++++------
 4 files changed, 35 insertions(+), 11 deletions(-)

diff --git a/scoringrules/__init__.py b/scoringrules/__init__.py
index b8760a0..0307a7c 100644
--- a/scoringrules/__init__.py
+++ b/scoringrules/__init__.py
@@ -51,7 +51,7 @@
     twes_ensemble,
     vres_ensemble,
 )
-from scoringrules._dss import dss_ensemble
+from scoringrules._dss import dssuv_ensemble, dssmv_ensemble
 from scoringrules._error_spread import error_spread_score
 from scoringrules._interval import interval_score, weighted_interval_score
 from scoringrules._logs import (
@@ -197,7 +197,8 @@ def wrapper(*args, **kwargs):
     "rps_score",
     "log_score",
     "rls_score",
-    "dss_ensemble",
+    "dssuv_ensemble",
+    "dssmv_ensemble",
     "error_spread_score",
     "es_ensemble",
     "owes_ensemble",
diff --git a/scoringrules/_dss.py b/scoringrules/_dss.py
index d92fa4c..10acb23 100644
--- a/scoringrules/_dss.py
+++ b/scoringrules/_dss.py
@@ -12,7 +12,7 @@ def dssuv_ensemble(
     obs: "Array",
     fct: "Array",
     /,
-    m_axis: int = -2,
+    m_axis: int = -1,
     *,
     bias: bool = False,
     backend: "Backend" = None,
diff --git a/scoringrules/core/dss/_score.py b/scoringrules/core/dss/_score.py
index 77f8f23..781a21b 100644
--- a/scoringrules/core/dss/_score.py
+++ b/scoringrules/core/dss/_score.py
@@ -15,9 +15,8 @@ def ds_score_uv(
     """Compute the Dawid Sebastiani Score for a univariate finite ensemble."""
     B = backends.active if backend is None else backends[backend]
     ens_mean = B.mean(fct, axis=-1)
-    obs_cent = obs - ens_mean
     sig = B.std(fct, axis=-1, bias=bias)
-    bias_precision = ((obs_cent - ens_mean) / sig) ** 2
+    bias_precision = ((obs - ens_mean) / sig) ** 2
     log_sig = 2 * B.log(sig)
     return bias_precision + log_sig
 
diff --git a/tests/test_dss.py b/tests/test_dss.py
index 38af91d..41f69f6 100644
--- a/tests/test_dss.py
+++ b/tests/test_dss.py
@@ -10,16 +10,40 @@
 
 
 @pytest.mark.parametrize("backend", BACKENDS)
-def test_ds_score(backend):
+def test_ds_score_uv(backend):
+    obs = np.random.randn(N)
+    mu = obs + np.random.randn(N) * 0.1
+    sigma = abs(np.random.randn(N)) * 0.3
+    fct = np.random.randn(N, ENSEMBLE_SIZE) * sigma[..., None] + mu[..., None]
+
+    # m_axis keyword
+    res = sr.dssuv_ensemble(
+        obs,
+        np.random.randn(ENSEMBLE_SIZE, N),
+        m_axis=0,
+        backend=backend,
+    )
+    res = np.asarray(res)
+    assert res.shape == (N,)
+
+    # test correctness
+    obs, fct = 11.6, np.array([9.8, 8.7, 11.9, 12.1, 13.4])
+    res = sr.dssuv_ensemble(obs, fct, bias=True, backend=backend)
+    expected = 1.115645
+    assert np.isclose(res, expected)
+
+
+@pytest.mark.parametrize("backend", BACKENDS)
+def test_ds_score_mv(backend):
     with pytest.raises(ValueError):
         obs = np.random.randn(N, N_VARS)
         fct = np.random.randn(N, ENSEMBLE_SIZE, N_VARS - 1)
-        sr.dss_ensemble(obs, fct, backend=backend)
+        sr.dssmv_ensemble(obs, fct, backend=backend)
 
     # test shapes
     obs = np.random.randn(N, N_VARS)
     fct = np.expand_dims(obs, axis=-2) + np.random.randn(N, ENSEMBLE_SIZE, N_VARS)
-    res = sr.dss_ensemble(obs, fct, backend=backend)
+    res = sr.dssmv_ensemble(obs, fct, backend=backend)
     assert res.shape == (N,)
 
     # test correctness
@@ -27,15 +51,15 @@ def test_ds_score(backend):
         [[0.79546742, 0.4777960, 0.2164079], [0.02461368, 0.7584595, 0.3181810]]
     ).T
     obs = np.array([0.2743836, 0.8146400])
-    res = sr.dss_ensemble(obs, fct, backend=backend)
+    res = sr.dssmv_ensemble(obs, fct, backend=backend)
     expected = -3.161553
     np.testing.assert_allclose(res, expected, atol=1e-6)
 
     # test correctness
     fct = np.random.randn(2, 3, 100, 4)
     obs = np.random.randn(2, 3, 4)
-    res = sr.dss_ensemble(obs, fct, backend=backend)
+    res = sr.dssmv_ensemble(obs, fct, backend=backend)
     for i in range(2):
         for j in range(3):
-            res_ij = sr.dss_ensemble(obs[i, j, :], fct[i, j, :, :], backend=backend)
+            res_ij = sr.dssmv_ensemble(obs[i, j, :], fct[i, j, :, :], backend=backend)
             np.testing.assert_allclose(res[i, j], res_ij, atol=1e-6)

From 9fd90aa449fccd6018917d00e5234027fb69c909 Mon Sep 17 00:00:00 2001
From: sallen12 <sam.allen@stat.unibe.ch>
Date: Tue, 5 Aug 2025 10:36:50 +0200
Subject: [PATCH 10/16] fix bug in Dawid-Sebastiani score gufunc

---
 scoringrules/core/dss/_gufuncs.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/scoringrules/core/dss/_gufuncs.py b/scoringrules/core/dss/_gufuncs.py
index ff10d8e..d5d76b9 100644
--- a/scoringrules/core/dss/_gufuncs.py
+++ b/scoringrules/core/dss/_gufuncs.py
@@ -8,8 +8,11 @@
 @guvectorize("(),(n),()->()")
 def _dss_uv_gufunc(obs: np.ndarray, fct: np.ndarray, bias: bool, out: np.ndarray):
     ens_mean = np.mean(fct)
-    correction = 0 if bias else 1
-    sig = np.std(fct, ddof=correction)
+    fact = len(fct) - 1.0
+    if bias:
+        fact += 1.0
+    var = np.sum((fct - ens_mean) ** 2) / fact
+    sig = np.sqrt(var)
     log_sig = 2 * np.log(sig)
     bias_precision = ((obs - ens_mean) / sig) ** 2
     out[0] = bias_precision + log_sig

From b663feabd65e26f980b113d1eeaca631dd107b49 Mon Sep 17 00:00:00 2001
From: sallen12 <sam.allen@stat.unibe.ch>
Date: Tue, 5 Aug 2025 10:56:26 +0200
Subject: [PATCH 11/16] re-add jax backend to conftest

---
 tests/conftest.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/conftest.py b/tests/conftest.py
index 43a21a3..94c91d4 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -6,7 +6,7 @@
 from scoringrules.backend import backends
 
 DATA_DIR = Path(__file__).parent / "data"
-RUN_TESTS = ["numpy", "numba", "torch"]
+RUN_TESTS = ["numpy", "numba", "jax", "torch"]
 BACKENDS = [b for b in backends.available_backends if b in RUN_TESTS]
 
 if os.getenv("SR_TEST_OUTPUT", "False").lower() in ("true", "1", "t"):

From 0a64f0619a632a49353a52cd1debcad6ed90f01d Mon Sep 17 00:00:00 2001
From: sallen12 <sam.allen@stat.unibe.ch>
Date: Tue, 5 Aug 2025 10:59:31 +0200
Subject: [PATCH 12/16] remove bug in jax cov function

---
 scoringrules/backend/jax.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scoringrules/backend/jax.py b/scoringrules/backend/jax.py
index ab66d26..427de7f 100644
--- a/scoringrules/backend/jax.py
+++ b/scoringrules/backend/jax.py
@@ -275,7 +275,7 @@ def inv(self, x: "Array") -> "Array":
         return jnp.linalg.inv(x)
 
     def cov(self, x: "Array", rowvar: bool = True, bias: bool = False) -> "Array":
-        return jnp.cov(x, rowavar=rowvar, bias=bias)
+        return jnp.cov(x, rowvar=rowvar, bias=bias)
 
     def det(self, x: "Array") -> "Array":
         return jnp.linalg.det(x)

From a31536532813a9ab5ce7f35ba26b5a6bbeaf4921 Mon Sep 17 00:00:00 2001
From: sallen12 <sam.allen@stat.unibe.ch>
Date: Tue, 5 Aug 2025 11:26:27 +0200
Subject: [PATCH 13/16] add reshape functions to backends

---
 scoringrules/backend/base.py       | 4 ++++
 scoringrules/backend/jax.py        | 3 +++
 scoringrules/backend/numpy.py      | 3 +++
 scoringrules/backend/tensorflow.py | 3 +++
 scoringrules/backend/torch.py      | 3 +++
 5 files changed, 16 insertions(+)

diff --git a/scoringrules/backend/base.py b/scoringrules/backend/base.py
index 7d6a321..132dede 100644
--- a/scoringrules/backend/base.py
+++ b/scoringrules/backend/base.py
@@ -310,3 +310,7 @@ def cov(self, x: "Array", rowvar: bool, bias: bool) -> "Array":
     @abc.abstractmethod
     def det(self, x: "Array") -> "Array":
         """Return the determinant of a matrix."""
+
+    @abc.abstractmethod
+    def reshape(self, x: "Array", shape: int | tuple[int, ...]) -> "Array":
+        """Reshape an array to a new ``shape``."""
diff --git a/scoringrules/backend/jax.py b/scoringrules/backend/jax.py
index 427de7f..4d52429 100644
--- a/scoringrules/backend/jax.py
+++ b/scoringrules/backend/jax.py
@@ -280,6 +280,9 @@ def cov(self, x: "Array", rowvar: bool = True, bias: bool = False) -> "Array":
     def det(self, x: "Array") -> "Array":
         return jnp.linalg.det(x)
 
+    def reshape(self, x: "Array", shape: int | tuple[int, ...]) -> "Array":
+        return jnp.reshape(x, shape)
+
 
 if __name__ == "__main__":
     B = JaxBackend()
diff --git a/scoringrules/backend/numpy.py b/scoringrules/backend/numpy.py
index c59222b..b4298a0 100644
--- a/scoringrules/backend/numpy.py
+++ b/scoringrules/backend/numpy.py
@@ -276,6 +276,9 @@ def cov(self, x: "NDArray", rowvar: bool = True, bias: bool = False) -> "NDArray
     def det(self, x: "NDArray") -> "NDArray":
         return np.linalg.det(x)
 
+    def reshape(self, x: "NDArray", shape: int | tuple[int, ...]) -> "NDArray":
+        return np.reshape(x, shape=shape)
+
 
 class NumbaBackend(NumpyBackend):
     """Numba backend."""
diff --git a/scoringrules/backend/tensorflow.py b/scoringrules/backend/tensorflow.py
index 5321300..c017933 100644
--- a/scoringrules/backend/tensorflow.py
+++ b/scoringrules/backend/tensorflow.py
@@ -321,6 +321,9 @@ def cov(self, x: "Tensor", rowvar: bool = True, bias: bool = False) -> "Tensor":
     def det(self, x: "Tensor") -> "Tensor":
         return tf.linalg.det(x)
 
+    def reshape(self, x: "Tensor", shape: int | tuple[int, ...]) -> "Tensor":
+        return tf.reshape(x, shape)
+
 
 if __name__ == "__main__":
     B = TensorflowBackend()
diff --git a/scoringrules/backend/torch.py b/scoringrules/backend/torch.py
index 679601e..ef10c09 100644
--- a/scoringrules/backend/torch.py
+++ b/scoringrules/backend/torch.py
@@ -300,3 +300,6 @@ def cov(self, x: "Tensor", rowvar: bool = True, bias: bool = False) -> "Tensor":
 
     def det(self, x: "Tensor") -> "Tensor":
         return torch.linalg.det(x)
+
+    def reshape(self, x: "Tensor", shape: int | tuple[int, ...]) -> "Tensor":
+        return torch.reshape(x, shape)

From 9ad9bc989f96b7350e60d2537864b8824fa6b1fc Mon Sep 17 00:00:00 2001
From: sallen12 <sam.allen@stat.unibe.ch>
Date: Tue, 5 Aug 2025 11:33:25 +0200
Subject: [PATCH 14/16] generalise multivariate Dawid-Sebastiani score to
 support jax backend

---
 scoringrules/backend/numpy.py   |  2 +-
 scoringrules/core/dss/_score.py | 25 +++++++++++--------------
 2 files changed, 12 insertions(+), 15 deletions(-)

diff --git a/scoringrules/backend/numpy.py b/scoringrules/backend/numpy.py
index b4298a0..577cd2d 100644
--- a/scoringrules/backend/numpy.py
+++ b/scoringrules/backend/numpy.py
@@ -277,7 +277,7 @@ def det(self, x: "NDArray") -> "NDArray":
         return np.linalg.det(x)
 
     def reshape(self, x: "NDArray", shape: int | tuple[int, ...]) -> "NDArray":
-        return np.reshape(x, shape=shape)
+        return np.reshape(x, shape)
 
 
 class NumbaBackend(NumpyBackend):
diff --git a/scoringrules/core/dss/_score.py b/scoringrules/core/dss/_score.py
index 781a21b..13996f9 100644
--- a/scoringrules/core/dss/_score.py
+++ b/scoringrules/core/dss/_score.py
@@ -30,23 +30,20 @@ def ds_score_mv(
     """Compute the Dawid Sebastiani Score for a multivariate finite ensemble."""
     B = backends.active if backend is None else backends[backend]
 
-    batch_dims = fct.shape[:-2]
-    out = B.zeros(batch_dims)
+    batch_shape = fct.shape[:-2]
+    M, D = fct.shape[-2:]
 
-    # nested loop over all batch dimensions
-    def recursive_loop(current_index, depth):
-        if depth == len(batch_dims):
-            fct_i = fct[tuple(current_index)]  # (M, D)
-            obs_i = obs[tuple(current_index)]  # (D)
-            score = ds_score_mv_mat(obs_i, fct_i, bias, backend=backend)  # ()
-            out[tuple(current_index)] = score
-            return
+    fct_flat = B.reshape(fct, (-1, M, D))  # (... M D)
+    obs_flat = B.reshape(obs, (-1, D))  # (... D)
 
-        for i in range(batch_dims[depth]):
-            recursive_loop(current_index + [i], depth + 1)
+    # list to collect scores for each batch
+    scores = [
+        ds_score_mv_mat(obs_i, fct_i, bias=bias, backend=backend)
+        for obs_i, fct_i in zip(obs_flat, fct_flat)
+    ]
 
-    recursive_loop([], 0)
-    return out
+    # reshape to original batch shape
+    return B.reshape(B.stack(scores), batch_shape)
 
 
 def ds_score_mv_mat(

From c1c0cee798d98ae8e57eb8b3291d13f7bcc9966b Mon Sep 17 00:00:00 2001
From: sallen12 <sam.allen@stat.unibe.ch>
Date: Tue, 5 Aug 2025 11:38:34 +0200
Subject: [PATCH 15/16] adjust tolerance of owgksuv tests

---
 tests/test_kernels.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/test_kernels.py b/tests/test_kernels.py
index b02f511..6c42122 100644
--- a/tests/test_kernels.py
+++ b/tests/test_kernels.py
@@ -303,19 +303,19 @@ def test_owgksuv(backend):
 
     # no argument given
     resw = sr.owgksuv_ensemble(obs, fct, backend=backend)
-    np.testing.assert_allclose(res, resw, rtol=1e-2)
+    np.testing.assert_allclose(res, resw, atol=1e-6)
 
     # a and b
     resw = sr.owgksuv_ensemble(
         obs, fct, a=float("-inf"), b=float("inf"), backend=backend
     )
-    np.testing.assert_allclose(res, resw, rtol=1e-2)
+    np.testing.assert_allclose(res, resw, atol=1e-6)
 
     # w_func as identity function
     resw = sr.owgksuv_ensemble(
         obs, fct, w_func=lambda x: x * 0.0 + 1.0, backend=backend
     )
-    np.testing.assert_allclose(res, resw, rtol=1e-2)
+    np.testing.assert_allclose(res, resw, atol=1e-6)
 
     # test correctness
     fct = np.array(

From c62cae157246e665e0f766fad4e6974bab6e86ce Mon Sep 17 00:00:00 2001
From: sallen12 <sam.allen@stat.unibe.ch>
Date: Mon, 1 Sep 2025 09:32:13 +0200
Subject: [PATCH 16/16] remove redundant code in multivariate dss

---
 scoringrules/_dss.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/scoringrules/_dss.py b/scoringrules/_dss.py
index 10acb23..391b38c 100644
--- a/scoringrules/_dss.py
+++ b/scoringrules/_dss.py
@@ -100,7 +100,6 @@ def dssmv_ensemble(
     score: Array
         The computed Dawid-Sebastiani Score.
     """
-    backend = backend if backend is not None else backends._active
     obs, fct = multivariate_array_check(obs, fct, m_axis, v_axis, backend=backend)
 
     if backend == "numba":