diff --git a/.github/workflows/pr_validation.yml b/.github/workflows/pr_validation.yml
index 4c14dfcb..4c55eebc 100644
--- a/.github/workflows/pr_validation.yml
+++ b/.github/workflows/pr_validation.yml
@@ -17,8 +17,6 @@ jobs:
     strategy:
       matrix:
         include:
-          - os: ubuntu-latest
-            python: "3.8"
           - os: ubuntu-latest
             python: "3.9"
           - os: ubuntu-latest
diff --git a/README.md b/README.md
index 9eadcbdf..896f24b6 100644
--- a/README.md
+++ b/README.md
@@ -71,7 +71,6 @@ For reference, the code is being tested in a github workflow (CI) with python
 - pandas 1.4.3
 - scikit-learn 1.1.1
 - scipy 1.8.1
-- statsmodels 0.13.2
 ```
 
 Please don't hesitate to open an issue in case you encounter any issue due to possible deprecations.
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 282f7230..d74ce929 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -7,8 +7,6 @@
 
 # -- Path setup --------------------------------------------------------------
 
-import warnings
-
 # If extensions (or modules to document with autodoc) are in another directory,
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
@@ -17,12 +15,9 @@
 from pathlib import Path
 
 import git
-from statsmodels.tools.sm_exceptions import DomainWarning
 
 import pydeseq2
 
-# Ignore DomainWarning raised by statsmodels when fitting a Gamma GLM with identity link.
-warnings.simplefilter("ignore", DomainWarning)
 
 # -- Project information -----------------------------------------------------
 
diff --git a/docs/source/usage/requirements.rst b/docs/source/usage/requirements.rst
index e24e72dd..e091b284 100644
--- a/docs/source/usage/requirements.rst
+++ b/docs/source/usage/requirements.rst
@@ -8,6 +8,5 @@ For reference, the code was tested with python 3.8 and the following package ver
     - pandas==1.4.3
     - scikit-learn==1.1.1
     - scipy==1.8.1
-    - statsmodels==0.13.2
 
 Please don't hesitate to open an issue in case you encounter any problems due to possible deprecations.
\ No newline at end of file
diff --git a/pydeseq2/dds.py b/pydeseq2/dds.py
index 665ef5b9..dbe790a7 100644
--- a/pydeseq2/dds.py
+++ b/pydeseq2/dds.py
@@ -343,7 +343,7 @@ def vst_fit(
         if use_design:
             # Check that the dispersion trend curve was fitted. If not, fit it.
             # This will call previous functions in a cascade.
-            if "trend_coeffs" not in self.uns:
+            if "disp_function" not in self.uns:
                 self.fit_dispersion_trend()
         else:
             # Reduce the design matrix to an intercept and reconstruct at the end
@@ -477,7 +477,7 @@ def fit_size_factors(
             warnings.warn(
                 "Every gene contains at least one zero, "
                 "cannot compute log geometric means. Switching to iterative mode.",
-                RuntimeWarning,
+                UserWarning,
                 stacklevel=2,
             )
             self._fit_iterate_size_factors()
@@ -601,12 +601,35 @@ def fit_dispersion_trend(self) -> None:
         # Initialize coefficients
         old_coeffs = pd.Series([0.1, 0.1])
         coeffs = pd.Series([1.0, 1.0])
-
-        while (np.log(np.abs(coeffs / old_coeffs)) ** 2).sum() >= 1e-6:
+        while (coeffs > 0).all() and (
+            np.log(np.abs(coeffs / old_coeffs)) ** 2
+        ).sum() >= 1e-6:
             old_coeffs = coeffs
-            coeffs, predictions = self.inference.dispersion_trend_gamma_glm(
+            coeffs, predictions, converged = self.inference.dispersion_trend_gamma_glm(
                 covariates, targets
             )
+
+            if not converged:
+                warnings.warn(
+                    "The dispersion trend curve fitting did not converge. "
+                    "Switching to a mean-based dispersion trend.",
+                    UserWarning,
+                    stacklevel=2,
+                )
+                self.uns["mean_disp"] = trim_mean(
+                    self.varm["genewise_dispersions"][
+                        self.varm["genewise_dispersions"] > 10 * self.min_disp
+                    ],
+                    proportiontocut=0.001,
+                )
+
+                self.uns["disp_function_type"] = "mean"
+                self.varm["fitted_dispersions"] = np.full(
+                    self.n_vars, self.uns["mean_disp"]
+                )
+
+                break
+
             # Filter out genes that are too far away from the curve before refitting
             pred_ratios = (
                 self[:, covariates.index].varm["genewise_dispersions"] / predictions
@@ -620,19 +643,25 @@ def fit_dispersion_trend(self) -> None:
                 covariates[(pred_ratios < 1e-4) | (pred_ratios >= 15)].index,
                 inplace=True,
             )
+        self.uns["trend_coeffs"] = pd.Series(coeffs, index=["a0", "a1"])
+
+        self.varm["fitted_dispersions"] = np.full(self.n_vars, np.NaN)
+        self.uns["disp_function_type"] = "parametric"
+        self.varm["fitted_dispersions"][self.varm["non_zero"]] = self.disp_function(
+            self.varm["_normed_means"][self.varm["non_zero"]]
+        )
 
         end = time.time()
 
         if not self.quiet:
             print(f"... done in {end - start:.2f} seconds.\n", file=sys.stderr)
 
-        self.uns["trend_coeffs"] = pd.Series(coeffs, index=["a0", "a1"])
-
-        self.varm["fitted_dispersions"] = np.full(self.n_vars, np.NaN)
-        self.varm["fitted_dispersions"][self.varm["non_zero"]] = dispersion_trend(
-            self.varm["_normed_means"][self.varm["non_zero"]],
-            self.uns["trend_coeffs"],
-        )
+    def disp_function(self, x):
+        """Return the dispersion trend function at x."""
+        if self.uns["disp_function_type"] == "parametric":
+            return dispersion_trend(x, self.uns["trend_coeffs"])
+        elif self.uns["disp_function_type"] == "mean":
+            return self.uns["mean_disp"]
 
     def fit_dispersion_prior(self) -> None:
         """Fit dispersion variance priors and standard deviation of log-residuals.
@@ -1001,11 +1030,14 @@ def _refit_without_outliers(
 
         # Compute trend dispersions.
         # Note: the trend curve is not refitted.
-        sub_dds.uns["trend_coeffs"] = self.uns["trend_coeffs"]
+        sub_dds.uns["disp_function_type"] = self.uns["disp_function_type"]
+        if sub_dds.uns["disp_function_type"] == "parametric":
+            sub_dds.uns["trend_coeffs"] = self.uns["trend_coeffs"]
+        elif sub_dds.uns["disp_function_type"] == "mean":
+            sub_dds.uns["mean_disp"] = self.uns["mean_disp"]
         sub_dds.varm["_normed_means"] = sub_dds.layers["normed_counts"].mean(0)
-        sub_dds.varm["fitted_dispersions"] = dispersion_trend(
-            sub_dds.varm["_normed_means"],
-            sub_dds.uns["trend_coeffs"],
+        sub_dds.varm["fitted_dispersions"] = sub_dds.disp_function(
+            sub_dds.varm["_normed_means"]
         )
 
         # Estimate MAP dispersions.
@@ -1089,9 +1121,18 @@ def objective(p):
                 & self.varm["non_zero"]
             ]
 
-            mean_disp = trimmed_mean(
-                self[:, use_for_mean_genes].varm["genewise_dispersions"], trim=0.001
+            if len(use_for_mean_genes) == 0:
+                print(
+                    "No genes have a dispersion above 10 * min_disp in "
+                    "_fit_iterate_size_factors."
+                )
+                break
+
+            mean_disp = trim_mean(
+                self[:, use_for_mean_genes].varm["genewise_dispersions"],
+                proportiontocut=0.001,
             )
+
             self.varm["fitted_dispersions"] = np.ones(self.n_vars) * mean_disp
             self.fit_dispersion_prior()
             self.fit_MAP_dispersions()
diff --git a/pydeseq2/default_inference.py b/pydeseq2/default_inference.py
index 29ebb65c..923b3b55 100644
--- a/pydeseq2/default_inference.py
+++ b/pydeseq2/default_inference.py
@@ -1,22 +1,17 @@
-import warnings
 from typing import Literal
 from typing import Optional
 from typing import Tuple
 
 import numpy as np
 import pandas as pd
-import statsmodels.api as sm  # type: ignore
 from joblib import Parallel  # type: ignore
 from joblib import delayed
 from joblib import parallel_backend
-from statsmodels.tools.sm_exceptions import DomainWarning  # type: ignore
+from scipy.optimize import minimize  # type: ignore
 
 from pydeseq2 import inference
 from pydeseq2 import utils
 
-# Ignore DomainWarning raised by statsmodels when fitting a Gamma GLM with identity link.
-warnings.simplefilter("ignore", DomainWarning)
-
 
 class DefaultInference(inference.Inference):
     """Default DESeq2-related inference methods, using scipy/sklearn/numpy.
@@ -206,18 +201,32 @@ def wald_test(  # noqa: D102
 
     def dispersion_trend_gamma_glm(  # noqa: D102
         self, covariates: pd.Series, targets: pd.Series
-    ) -> Tuple[np.ndarray, np.ndarray]:
-        covariates_w_intercept = sm.add_constant(covariates)
-        targets_fit = targets.values
+    ) -> Tuple[np.ndarray, np.ndarray, bool]:
+        covariates_w_intercept = covariates.to_frame()
+        covariates_w_intercept.insert(0, "intercept", 1)
         covariates_fit = covariates_w_intercept.values
-        glm_gamma = sm.GLM(
-            targets_fit,
-            covariates_fit,
-            family=sm.families.Gamma(link=sm.families.links.identity()),
+        targets_fit = targets.values
+
+        def loss(coeffs):
+            mu = covariates_fit @ coeffs
+            return np.nanmean(targets_fit / mu + np.log(mu), axis=0)
+
+        def grad(coeffs):
+            mu = covariates_fit @ coeffs
+            return -np.nanmean(
+                ((targets_fit / mu - 1)[:, None] * covariates_fit) / mu[:, None], axis=0
+            )
+
+        res = minimize(
+            loss,
+            x0=np.array([1.0, 1.0]),
+            jac=grad,
+            method="L-BFGS-B",
+            bounds=[(0, np.inf)],
         )
-        res = glm_gamma.fit()
-        coeffs = res.params
-        return (coeffs, covariates_fit @ coeffs)
+
+        coeffs = res.x
+        return coeffs, covariates_fit @ coeffs, res.success
 
     def lfc_shrink_nbinom_glm(  # noqa: D102
         self,
diff --git a/pydeseq2/ds.py b/pydeseq2/ds.py
index 9693c318..a86a9844 100644
--- a/pydeseq2/ds.py
+++ b/pydeseq2/ds.py
@@ -7,14 +7,14 @@
 # import anndata as ad
 import numpy as np
 import pandas as pd
-import statsmodels.api as sm  # type: ignore
 from scipy.optimize import root_scalar  # type: ignore
 from scipy.stats import f  # type: ignore
-from statsmodels.stats.multitest import multipletests  # type: ignore
+from scipy.stats import false_discovery_control  # type: ignore
 
 from pydeseq2.dds import DeseqDataSet
 from pydeseq2.default_inference import DefaultInference
 from pydeseq2.inference import Inference
+from pydeseq2.utils import lowess
 from pydeseq2.utils import make_MA_plot
 from pydeseq2.utils import n_or_more_replicates
 
@@ -526,18 +526,15 @@ def _independent_filtering(self) -> None:
             use = (self.base_mean >= cutoff) & (~self.p_values.isna())
             U2 = self.p_values[use]
             if not U2.empty:
-                result.loc[use, i] = multipletests(
-                    U2, alpha=self.alpha, method="fdr_bh"
-                )[1]
-
-        num_rej = (result < self.alpha).sum(0)
-        lowess = sm.nonparametric.lowess(num_rej, theta, frac=1 / 5)
+                result.loc[use, i] = false_discovery_control(U2, method="bh")
+        num_rej = (result < self.alpha).sum(0).values
+        lowess_res = lowess(num_rej, theta, frac=1 / 5)
 
         if num_rej.max() <= 10:
             j = 0
         else:
-            residual = num_rej[num_rej > 0] - lowess[num_rej > 0, 1]
-            thresh = lowess[:, 1].max() - np.sqrt(np.mean(residual**2))
+            residual = num_rej[num_rej > 0] - lowess_res[num_rej > 0, 1]
+            thresh = lowess_res[:, 1].max() - np.sqrt(np.mean(residual**2))
 
             if np.any(num_rej > thresh):
                 j = np.where(num_rej > thresh)[0][0]
@@ -557,8 +554,8 @@ def _p_value_adjustment(self) -> None:
             self.run_wald_test()
 
         self.padj = pd.Series(np.nan, index=self.dds.var_names)
-        self.padj.loc[~self.p_values.isna()] = multipletests(
-            self.p_values.dropna(), alpha=self.alpha, method="fdr_bh"
+        self.padj.loc[~self.p_values.isna()] = false_discovery_control(
+            self.p_values.dropna(), method="bh"
         )[1]
 
     def _cooks_filtering(self) -> None:
diff --git a/pydeseq2/inference.py b/pydeseq2/inference.py
index e4e50a66..801a86c4 100644
--- a/pydeseq2/inference.py
+++ b/pydeseq2/inference.py
@@ -285,7 +285,7 @@ def fit_moments_dispersions(
     @abstractmethod
     def dispersion_trend_gamma_glm(
         self, covariates: pd.Series, targets: pd.Series
-    ) -> Tuple[np.ndarray, np.ndarray]:
+    ) -> Tuple[np.ndarray, np.ndarray, bool]:
         """Fit a gamma glm on gene dispersions.
 
         The intercept should be concatenated in this method
@@ -304,6 +304,8 @@ def dispersion_trend_gamma_glm(
             Coefficients of the regression.
         predictions : ndarray
             Predictions of the regression.
+        converged : bool
+            Whether the optimization converged.
         """
 
     @abstractmethod
diff --git a/pydeseq2/utils.py b/pydeseq2/utils.py
index 0f04e0f5..3e67ddae 100644
--- a/pydeseq2/utils.py
+++ b/pydeseq2/utils.py
@@ -1,5 +1,6 @@
 import multiprocessing
 import warnings
+from math import ceil
 from math import floor
 from pathlib import Path
 from typing import List
@@ -49,7 +50,8 @@ def load_example_data(
 
     debug : bool
         If true, subsample 10 samples and 100 genes at random.
-        (Note that the "synthetic" dataset is already 10 x 100.) (default: ``False``).
+        (Note that the "synthetic" dataset is already 10 features 100.)
+        (default: ``False``).
 
     debug_seed : int
         Seed for the debug mode. (default: ``42``).
@@ -731,7 +733,7 @@ def trimmed_mean(x, trim: float = 0.1, **kwargs) -> Union[float, np.ndarray]:
 
     Parameters
     ----------
-    x : ndarray
+    features : ndarray
         Data whose mean to compute.
 
     trim : float
@@ -815,7 +817,7 @@ def trimmed_variance(
 
     Parameters
     ----------
-    x : ndarray
+    features : ndarray
         Data whose trimmed variance to compute.
 
     trim : float
@@ -1368,7 +1370,7 @@ def mean_absolute_deviation(x: np.ndarray) -> float:
 
     Parameters
     ----------
-    x : ndarray
+    features : ndarray
         1D array whose MAD to compute.
 
     Returns
@@ -1399,13 +1401,13 @@ def make_scatter(
         List of ndarrays to plot.
 
     legend_labels : list
-        List of strings that correspond to plotted y values for legend.
+        List of strings that correspond to plotted targets values for legend.
 
     x_val : ndarray
         1D array to plot (example: ``dds.varm['_normed_means']``).
 
     log : bool
-        Whether or not to log scale x and y axes (``default=True``).
+        Whether or not to log scale features and targets axes (``default=True``).
 
     save_path : str or None
         The path where to save the plot. If left None, the plot won't be saved
@@ -1475,7 +1477,7 @@ def make_MA_plot(
         P-value threshold to subset scatterplot colors on.
 
     log : bool
-        Whether or not to log scale x and y axes (``default=True``).
+        Whether or not to log scale features and targets axes (``default=True``).
 
     save_path : str or None
         The path where to save the plot. If left None, the plot won't be saved
@@ -1520,3 +1522,73 @@ def make_MA_plot(
 
     if save_path is not None:
         plt.savefig(save_path, bbox_inches="tight")
+
+
+# Authors: Alexandre Gramfort <alexandre.gramfort@telecom-paristech.fr>
+#
+# License: BSD (3-clause)
+
+# Adapted from https://gist.github.com/agramfort/850437
+
+
+def lowess(
+    features: np.ndarray, targets: np.ndarray, frac: float = 2.0 / 3.0, iter: int = 3
+):
+    """Run lowess smoothing: Robust locally weighted regression.
+
+    The lowess function fits a nonparametric regression curve to a scatterplot.
+    The arrays features and targets contain an equal number of elements; each pair
+    (features[i], targets[i]) defines a data point in the scatterplot. The function
+    returns the estimated (smooth) values of targets.
+    The smoothing span is given by frac. A larger value for frac will result in a
+    smoother curve. The number of robustifying iterations is given by iter. The
+    function will run faster with a smaller number of iterations.
+
+    Parameters
+    ----------
+    features : ndarray
+        A 1D array of data points.
+    targets : ndarray
+        A 1D array of target values (with the same shape as features).
+    frac : float
+        The fraction of the data used when estimating each y-value. (default: ``2/3``).
+    iter : int
+        The number of robustifying iterations. (default: ``3``).
+
+    Returns
+    -------
+    ndarray
+        Estimated (smooth) values of targets.
+    """
+    n = len(features)
+    r = int(ceil(frac * n))
+    h = np.maximum(
+        np.array([np.sort(np.abs(features - features[i]))[r] for i in range(n)]), 1e-12
+    )
+    w = np.clip(
+        np.abs(np.nan_to_num((features[:, None] - features[None, :]) / h)), 0.0, 1.0
+    )
+    w = (1 - w**3) ** 3
+    yest = np.zeros(n)
+    delta = np.ones(n)
+    for _ in range(iter):
+        for i in range(n):
+            weights = delta * w[:, i]
+            b = np.array(
+                [np.sum(weights * targets), np.sum(weights * targets * features)]
+            )
+            A = np.array(
+                [
+                    [np.sum(weights), np.sum(weights * features)],
+                    [np.sum(weights * features), np.sum(weights * features * features)],
+                ]
+            )
+            beta = np.linalg.lstsq(A, b)[0]
+            yest[i] = beta[0] + beta[1] * features[i]
+
+        residuals = targets - yest
+        s = np.median(np.abs(residuals))
+        delta = np.clip(residuals / (6.0 * s), -1, 1)
+        delta = (1 - delta**2) ** 2
+
+    return yest
diff --git a/pyproject.toml b/pyproject.toml
index 267a8c30..e30e39a8 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -47,9 +47,7 @@ convention = "numpy"
 [tool.pytest.ini_options]
 filterwarnings = [
     "error",
-    "ignore::DeprecationWarning:patsy[.*]", # due to patsy being a dependency of statsmodels and
-    # calling pandas.core.dtypes.common.is_categorical_dtype.
-    "ignore::DeprecationWarning:statsmodels", # ignore Pyarrow deprecation warnings
-    "ignore::statsmodels.tools.sm_exceptions.DomainWarning", # due to fitting the dipersion curve with the identity link
+    '''ignore:\s*Pyarrow will become a required dependency of pandas:DeprecationWarning''',
+    # ignore Pyarrow deprecation warnings
     "ignore::FutureWarning", # Ignore AnnData FutureWarning about implicit conversion
 ]
diff --git a/setup.py b/setup.py
index 5bb3269c..ee560ea2 100644
--- a/setup.py
+++ b/setup.py
@@ -18,7 +18,7 @@
 setup(
     name="pydeseq2",
     version=about["__version__"],
-    python_requires=">=3.8.0",
+    python_requires=">=3.9.0",
     license="MIT",
     description="A python implementation of DESeq2.",
     long_description=readme,
@@ -31,8 +31,7 @@
         "numpy>=1.23.0",
         "pandas>=1.4.0",
         "scikit-learn>=1.1.0",
-        "scipy>=1.8.0",
-        "statsmodels",
+        "scipy>=1.11.0",
         "matplotlib>=3.6.2",  # not sure why sphinx_gallery does not work without it
     ],  # external packages as dependencies
     extras_require={
diff --git a/tests/test_edge_cases.py b/tests/test_edge_cases.py
index 5a2b8a71..2746e7b3 100644
--- a/tests/test_edge_cases.py
+++ b/tests/test_edge_cases.py
@@ -468,7 +468,9 @@ def test_zero_inflated():
     counts_df.iloc[idx, :] = 0
 
     dds = DeseqDataSet(counts=counts_df, metadata=metadata)
-    with pytest.warns(RuntimeWarning):
+    with pytest.warns(UserWarning):
+        # Will warn that each gene contains a zero
+        # and that the parametric curve is not a good fit
         dds.deseq2()