From c668ad405885451691f223f252116f0817f46221 Mon Sep 17 00:00:00 2001
From: Malte Benedikt Kuehl <malte.kuehl@clin.au.dk>
Date: Thu, 22 Jan 2026 12:37:02 +0100
Subject: [PATCH 1/2] Fix pandas 3 data type and copy/write bugs

---
 pydeseq2/dds.py           | 16 +++++++-------
 pydeseq2/ds.py            | 44 +++++++++++++++++++++------------------
 pydeseq2/grid_search.py   |  4 ++--
 pydeseq2/preprocessing.py |  2 +-
 pydeseq2/utils.py         | 10 ++++-----
 pyproject.toml            |  2 +-
 6 files changed, 42 insertions(+), 36 deletions(-)

diff --git a/pydeseq2/dds.py b/pydeseq2/dds.py
index 58d1f1c1..aee7ac44 100644
--- a/pydeseq2/dds.py
+++ b/pydeseq2/dds.py
@@ -657,7 +657,7 @@ def fit_size_factors(
             # Calculate logcounts for x > 0 and take the mean for each gene
             log_counts = np.zeros_like(self.X, dtype=float)
             np.log(self.X, out=log_counts, where=self.X != 0)
-            logmeans = log_counts.mean(0)
+            logmeans = log_counts.mean(axis=0)
 
             # Determine which genes are usable (finite logmeans)
             self.filtered_genes = (~np.isinf(logmeans)) & (logmeans > 0)
@@ -705,7 +705,7 @@ def sizeFactor(x):
             raise ValueError("Counts matrix 'X' is None, cannot fit size factors.")
 
         end = time.time()
-        self.var["_normed_means"] = self.layers["normed_counts"].mean(0)
+        self.var["_normed_means"] = self.layers["normed_counts"].mean(axis=0)
 
         if not self.quiet:
             print(f"... done in {end - start:.2f} seconds.\n", file=sys.stderr)
@@ -793,7 +793,9 @@ def fit_genewise_dispersions(self, vst=False) -> None:
             dispersions_, self.min_disp, self.max_disp
         )
 
-        self.var["_genewise_converged"] = np.full(self.n_vars, np.nan)
+        self.var["_genewise_converged"] = pd.array(
+            [pd.NA] * self.n_vars, dtype="boolean"
+        )
         self.var.loc[self.var["non_zero"], "_genewise_converged"] = l_bfgs_b_converged_
 
     def fit_dispersion_trend(self, vst: bool = False) -> None:
@@ -919,7 +921,7 @@ def fit_MAP_dispersions(self) -> None:
             dispersions_, self.min_disp, self.max_disp
         )
 
-        self.var["_MAP_converged"] = np.full(self.n_vars, np.nan)
+        self.var["_MAP_converged"] = pd.array([pd.NA] * self.n_vars, dtype="boolean")
         self.var.loc[self.var["non_zero"], "_MAP_converged"] = l_bfgs_b_converged_
 
         # Filter outlier genes for which we won't apply shrinkage
@@ -980,7 +982,7 @@ def fit_LFC(self) -> None:
         self.obsm["_mu_LFC"] = mu_
         self.obsm["_hat_diagonals"] = hat_diagonals_
 
-        self.var["_LFC_converged"] = np.full(self.n_vars, np.nan)
+        self.var["_LFC_converged"] = pd.array([pd.NA] * self.n_vars, dtype="boolean")
         self.var.loc[self.var["non_zero"], "_LFC_converged"] = converged_
 
     def calculate_cooks(self) -> None:
@@ -1098,7 +1100,7 @@ def cooks_outlier(self):
 
         cooks_outlier[cooks_outlier] = (
             self.X[:, cooks_outlier] > self.X[:, cooks_outlier][pos, np.arange(len(pos))]
-        ).sum(0) < 3
+        ).sum(axis=0) < 3
 
         if self.low_memory:
             del self.layers["cooks"]
@@ -1423,7 +1425,7 @@ def _refit_without_outliers(
             sub_dds.uns["trend_coeffs"] = self.uns["trend_coeffs"]
         elif sub_dds.uns["disp_function_type"] == "mean":
             sub_dds.uns["mean_disp"] = self.uns["mean_disp"]
-        sub_dds.var["_normed_means"] = sub_dds.layers["normed_counts"].mean(0)
+        sub_dds.var["_normed_means"] = sub_dds.layers["normed_counts"].mean(axis=0)
         # Reshape in case there's a single gene to refit
         sub_dds.var["fitted_dispersions"] = sub_dds.disp_function(
             sub_dds.var["_normed_means"]
diff --git a/pydeseq2/ds.py b/pydeseq2/ds.py
index 14491567..6e22fde7 100644
--- a/pydeseq2/ds.py
+++ b/pydeseq2/ds.py
@@ -411,29 +411,33 @@ def lfc_shrink(self, coeff: str, adapt: bool = True) -> None:
         if not self.quiet:
             print(f"... done in {end - start:.2f} seconds.\n", file=sys.stderr)
 
-        self.LFC.iloc[:, coeff_idx].update(
-            pd.Series(
-                np.array(lfcs)[:, coeff_idx],
-                index=self.dds.non_zero_genes,
-            )
+        new_lfc_values = np.array(lfcs)[:, coeff_idx]
+        new_se_values = np.array(
+            [
+                np.sqrt(np.abs(inv_hess[coeff_idx, coeff_idx]))
+                for inv_hess in inv_hessians
+            ]
         )
-
-        self.SE.update(
-            pd.Series(
-                np.array(
-                    [
-                        np.sqrt(np.abs(inv_hess[coeff_idx, coeff_idx]))
-                        for inv_hess in inv_hessians
-                    ]
-                ),
-                index=self.dds.non_zero_genes,
+        nan_mask = ~np.isfinite(new_lfc_values) | ~np.isfinite(new_se_values)
+
+        if nan_mask.any():
+            warnings.warn(
+                f"{nan_mask.sum()} gene(s) had NaN/infinite values during LFC shrinkage,"
+                " their LFCs and SEs were not updated.",
+                UserWarning,
+                stacklevel=2,
             )
-        )
 
-        self._LFC_shrink_converged = pd.Series(np.nan, index=self.dds.var_names)
-        self._LFC_shrink_converged.update(
-            pd.Series(l_bfgs_b_converged_, index=self.dds.non_zero_genes)
+        # Only update genes with valid (non-NaN) shrinkage results
+        valid_genes = self.dds.non_zero_genes[~nan_mask]
+        self.LFC.loc[valid_genes, coeff] = new_lfc_values[~nan_mask]
+        self.SE.loc[valid_genes] = new_se_values[~nan_mask]
+
+        self._LFC_shrink_converged = pd.Series(
+            pd.array([pd.NA] * len(self.dds.var_names), dtype="boolean"),
+            index=self.dds.var_names,
         )
+        self._LFC_shrink_converged.loc[self.dds.non_zero_genes] = l_bfgs_b_converged_
 
         # Set a flag to indicate that LFCs were shrunk
         self.shrunk_LFCs = True
@@ -511,7 +515,7 @@ def _independent_filtering(self) -> None:
             U2 = self.p_values[use]
             if not U2.empty:
                 result.loc[use, i] = false_discovery_control(U2, method="bh")
-        num_rej = (result < self.alpha).sum(0).to_numpy().astype(int)
+        num_rej = (result < self.alpha).sum(axis=0).to_numpy().astype(int)
         lowess_res = lowess(theta, num_rej, frac=1 / 5)
 
         if num_rej.max() <= 10:
diff --git a/pydeseq2/grid_search.py b/pydeseq2/grid_search.py
index a172ce96..e46290fd 100644
--- a/pydeseq2/grid_search.py
+++ b/pydeseq2/grid_search.py
@@ -42,13 +42,13 @@ def vec_nb_nll(
             -logbinom
             + (counts[:, None] + alpha_neg1) * np.log(mu[:, None] + alpha_neg1)
             - (counts * np.log(mu))[:, None]
-        ).sum(0)
+        ).sum(axis=0)
     else:
         return n * alpha_neg1 * np.log(alpha) + (
             -logbinom
             + (counts[:, None] + alpha_neg1) * np.log(mu + alpha_neg1)
             - (counts[:, None] * np.log(mu))
-        ).sum(0)
+        ).sum(axis=0)
 
 
 def grid_fit_alpha(
diff --git a/pydeseq2/preprocessing.py b/pydeseq2/preprocessing.py
index 0c0a5a34..c20a283f 100644
--- a/pydeseq2/preprocessing.py
+++ b/pydeseq2/preprocessing.py
@@ -49,7 +49,7 @@ def deseq2_norm_fit(counts: pd.DataFrame | np.ndarray) -> tuple[np.ndarray, np.n
     # Compute gene-wise mean log counts
     with np.errstate(divide="ignore"):  # ignore division by zero warnings
         log_counts = np.log(counts)
-    logmeans = log_counts.mean(0)
+    logmeans = log_counts.mean(axis=0)
     # Filter out genes with -∞ log means
     filtered_genes = ~np.isinf(logmeans)
 
diff --git a/pydeseq2/utils.py b/pydeseq2/utils.py
index 1e3bb23f..5fcb50e6 100644
--- a/pydeseq2/utils.py
+++ b/pydeseq2/utils.py
@@ -222,7 +222,7 @@ def nb_nll(
             - logbinom
             + (counts + alpha_neg1) * np.log(mu + alpha_neg1)
             - (counts * np.log(mu))
-        ).sum(0)
+        ).sum(axis=0)
     else:
         return (
             n * alpha_neg1 * np.log(alpha)
@@ -849,7 +849,7 @@ def fit_rough_dispersions(
     y_hat = np.maximum(y_hat, 1)
     alpha_rde = (
         ((normed_counts - y_hat) ** 2 - y_hat) / ((num_samples - num_vars) * y_hat**2)
-    ).sum(0)
+    ).sum(axis=0)
     return np.maximum(alpha_rde, 0)
 
 
@@ -877,7 +877,7 @@ def fit_moments_dispersions(
     # Exclude genes with all zeroes
     normed_counts = normed_counts[:, ~(normed_counts == 0).all(axis=0)]
     # mean inverse size factor
-    s_mean_inv = (1 / size_factors).mean()
+    s_mean_inv = (1 / size_factors).mean(axis=0)
     mu = normed_counts.mean(0)
     sigma = normed_counts.var(0, ddof=1)
     # ddof=1 is to use an unbiased estimator, as in R
@@ -951,7 +951,7 @@ def robust_method_of_moments_disp(
             np.ndarray, trimmed_variance(normed_counts)
         )  # Since normed_counts is always 2D, trimmed_variance returns ndarray
 
-    m = normed_counts.mean(0)
+    m = normed_counts.mean(axis=0)
     alpha = (v - m) / m**2
     # cannot use the typical min_disp = 1e-8 here or else all counts in the same
     # group as the outlier count will get an extreme Cook's distance
@@ -1202,7 +1202,7 @@ def nbinomFn(
 
     nll = (
         counts * xbeta - (counts + size) * np.logaddexp(xbeta + offset, np.log(size))
-    ).sum(0)
+    ).sum(axis=0)
 
     return prior - nll
 
diff --git a/pyproject.toml b/pyproject.toml
index 3828a74c..2cfe8ac7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ requires = [ "hatchling" ]
 
 [project]
 name = "pydeseq2"
-version = "0.5.3"
+version = "0.5.4"
 description = "A python implementation of DESeq2."
 readme = "README.md"
 license = { file = "LICENSE" }

From fe4bd0f5b7d09f6826edca50c6960f437cbbc2cd Mon Sep 17 00:00:00 2001
From: Malte Benedikt Kuehl <malte.kuehl@clin.au.dk>
Date: Thu, 22 Jan 2026 12:49:42 +0100
Subject: [PATCH 2/2] Fix old DataFrame intersphinx reference

---
 docs/source/conf.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/source/conf.py b/docs/source/conf.py
index a9fa9905..12ce74ee 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -230,6 +230,7 @@
 nitpick_ignore = [
     ("py:class", "pd.Series"),
     ("py:class", "pd.DataFrame"),
+    ("py:class", "pandas.core.frame.DataFrame"),
     ("py:class", "ndarray"),
     ("py:class", "numpy._typing._generic_alias.ScalarType"),
     ("py:class", "pydantic.main.BaseModel"),