From 7ab16b149e1e00123dd00d09e0b1bb345434cb2e Mon Sep 17 00:00:00 2001
From: Robert Vogel <12845765+robert-vogel@users.noreply.github.com>
Date: Fri, 7 Mar 2025 12:47:03 -0500
Subject: [PATCH 1/6] Added type hints to fit function and options for user
 specification of default parameters.  Model parameters are checked that they
 meet the requirements of their respective definitions.

---
 .gitignore       |  1 +
 blnm/fit.py      | 75 ++++++++++++++++++++++++++++--------
 test/test_fit.py | 99 ++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 159 insertions(+), 16 deletions(-)
 create mode 100644 test/test_fit.py

diff --git a/.gitignore b/.gitignore
index 651f63c..236a0eb 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,3 +3,4 @@
 *~
 *__pycache__/
 *egg-info/
+env/
diff --git a/blnm/fit.py b/blnm/fit.py
index a252004..2919bf9 100644
--- a/blnm/fit.py
+++ b/blnm/fit.py
@@ -109,16 +109,22 @@ def _m_step(zeroth_order,
 
     return coefs, means, variance
 
-# return (coefs, means, variance)
-
-def blnm(x_counts, 
-         n_counts,
-         k_mixtures, 
-         seed=None,
-         tolerance=1e-6, 
-         max_iter=1000,
-         disp = True,
-         integral_n_samples = INTEGRAL_N_SAMPLES):
+
+def init_pars():
+    raise NotImplementedError
+
+
+def blnm(x_counts: np.ndarray, 
+         n_counts: np.ndarray,
+         k_mixtures: int, 
+         coefs: np.ndarray | None = None,
+         means: np.ndarray | None = None,
+         variance: float | None = None,
+         seed: int | np.random._generator.Generator = None,
+         tolerance:float = 1e-6, 
+         max_iter:int = 1000,
+         disp: bool = True,
+         integral_n_samples: int = INTEGRAL_N_SAMPLES) -> dict:
     """Fit mixture of BLN models.
 
     Args:
@@ -127,6 +133,17 @@ def blnm(x_counts,
         n_counts : ((N,) np.ndarray) alternative + reference allele 
             specific expression counts
         k_mixtures : (int) > 0 number of mixtures to fit
+        coefs: ((k_mixtures,) np.ndarray) 
+            The weights for each BLN probability mass funciton in the 
+            mixture.  Each coefficient must be greater than 0 and the
+            sum of coefficients be 1.  If None, pick coefficients randomly
+            subject to our contraints.
+        means: ((k_mixtures,) np.ndarray)
+            The mean parameter for each BLN probability mass function.
+            This can be any real number.  If None, pick coefficients randomly.
+        variance: (float)
+            A real number greater than zero representing the variance parameter
+            of each BLN probability mass function.
         seed : (any input to numpy random Generator object)
         tolerance : (float) criterion for convergence
         max_iter : (int) maximum number of interations
@@ -170,13 +187,39 @@ def blnm(x_counts,
     rng = np.random.default_rng(seed=seed)
 
     # initialize parameters
-    coefs = rng.uniform(low=0.1, high=0.9, size=k_mixtures)
-    coefs = coefs / np.sum(coefs)
-
-    p = rng.uniform(low=0.01, high=0.99, size=k_mixtures)
-    means = np.log(p / (1-p))
+    if coefs is None:
+        coefs = rng.uniform(low=0.1, high=0.9, size=k_mixtures)
+        coefs = coefs / np.sum(coefs)
+
+    # verify coefs
+    if (coefs < 0).any():
+        raise ValueError("All coefficients must be positive.")
+    elif (np.isnan(coefs)).any():
+        raise ValueError("All coefficients must be positive.")
+    elif (s := np.sum(coefs)) < 0.9999 or s > 1.0001:
+        raise ValueError("The sum of coefficients must be 1.")
+    elif coefs.size != k_mixtures:
+        raise ValueError("The number of coefficients must be"
+                        " equal to the number of k_mixtures.")
+
+    if means is None:
+        p = rng.uniform(low=0.01, high=0.99, size=k_mixtures)
+        means = np.log(p / (1-p))
+
+    if means.size != k_mixtures:
+        raise ValueError("The number of means must be"
+                        " equal to the number of k_mixtures.")
+    elif (np.isnan(means)).any():
+        raise ValueError("The number of means must be"
+                        " equal to the number of k_mixtures.")
+
+
+    if variance is None:
+        variance = rng.uniform(low=0.1, high=3)
+
+    if variance <= 0 or np.isnan(variance):
+        raise ValueError("Variance parameter must be a float greater than zero.")
 
-    variance = rng.uniform(low=0.1, high=3)
 
     # preallocate memory for arrays constructed in the E step
     # each array represents
diff --git a/test/test_fit.py b/test/test_fit.py
new file mode 100644
index 0000000..7a44624
--- /dev/null
+++ b/test/test_fit.py
@@ -0,0 +1,99 @@
+from unittest import TestCase, main
+
+import numpy as np
+from blnm import fit
+
+
+
+class TestInitPars(TestCase):
+    def test_not_implemented(self):
+        with self.assertRaises(NotImplementedError):
+            fit.init_pars()
+
+
+class TestFitParCheck(TestCase):
+    def setUp(self):
+        rng = np.random.default_rng()
+
+        self.n = 300
+        self.k_mixtures = 4
+        self.coefs = np.array([0.1,0.4,0.3,0.2])
+        self.means = np.array([-2,-1,1,2], dtype=float)
+        self.variance = 1.1
+        self.xcount = rng.choice(250, size=self.n, replace=True)
+        self.ncount = np.full((self.n,), 250)
+
+    def test_correct_input(self):
+        fit.blnm(self.xcount, self.ncount, self.k_mixtures,
+                 coefs=self.coefs, means=self.means,
+                 variance=self.variance)
+
+    def test_check_coefs_negative_val(self):
+        self.coefs[2] = -self.coefs[2]
+
+        with self.assertRaises(ValueError):
+            fit.blnm(self.xcount, self.ncount, self.k_mixtures,
+                     coefs=self.coefs, means=self.means,
+                     variance=self.variance)
+
+    def test_check_coefs_wrong_number(self):
+        self.coefs = self.coefs[0:2]
+
+        with self.assertRaises(ValueError):
+            fit.blnm(self.xcount, self.ncount, self.k_mixtures,
+                     coefs=self.coefs, means=self.means,
+                     variance=self.variance)
+
+
+    def test_check_coefs_wrong_number(self):
+        self.coefs[2] = self.coefs[2] * 1.1
+
+        with self.assertRaises(ValueError):
+            fit.blnm(self.xcount, self.ncount, self.k_mixtures,
+                     coefs=self.coefs, means=self.means,
+                     variance=self.variance)
+
+    def test_check_coefs_nan(self):
+        self.coefs[2] = np.nan
+
+        with self.assertRaises(ValueError):
+            fit.blnm(self.xcount, self.ncount, self.k_mixtures,
+                     coefs=self.coefs, means=self.means,
+                     variance=self.variance)
+
+    def test_check_means_wrong_number(self):
+        self.means = self.means[0:2]
+
+        with self.assertRaises(ValueError):
+            fit.blnm(self.xcount, self.ncount, self.k_mixtures,
+                     coefs=self.coefs, means=self.means,
+                     variance=self.variance)
+
+
+    def test_check_means_wrong_number(self):
+        self.means[2] = np.nan
+
+        with self.assertRaises(ValueError):
+            fit.blnm(self.xcount, self.ncount, self.k_mixtures,
+                     coefs=self.coefs, means=self.means,
+                     variance=self.variance)
+
+    def test_check_variance(self):
+        self.variance = -1.2
+
+        with self.assertRaises(ValueError):
+            fit.blnm(self.xcount, self.ncount, self.k_mixtures,
+                     coefs=self.coefs, means=self.means,
+                     variance=self.variance)
+
+    def test_check_nan(self):
+        self.variance = np.nan
+
+        with self.assertRaises(ValueError):
+            fit.blnm(self.xcount, self.ncount, self.k_mixtures,
+                     coefs=self.coefs, means=self.means,
+                     variance=self.variance)
+
+
+if __name__ == "__main__":
+    main()

From 050aec092f65327e8f5e33092d528cd8164592c6 Mon Sep 17 00:00:00 2001
From: Robert Vogel <12845765+robert-vogel@users.noreply.github.com>
Date: Fri, 4 Apr 2025 20:28:55 -0400
Subject: [PATCH 2/6] Update unittest.yml

I think my unit test are failing because test on Python 3.7 are not supported.  I changed the unit test to operate on 3.9 and above.
---
 .github/workflows/unittest.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/unittest.yml b/.github/workflows/unittest.yml
index 2aa138e..cee11ef 100644
--- a/.github/workflows/unittest.yml
+++ b/.github/workflows/unittest.yml
@@ -24,7 +24,7 @@ jobs:
     strategy:
       matrix: 
         os: [ubuntu-latest, macos-11]
-        python-version: ["3.7", "3.8", "3.9", "3.10"]
+        python-version: ["3.9", "3.10", "3.11", "3.12"]
 
     runs-on: ${{ matrix.os }}
 

From 115bf4783c07c808cf5525537688b2d9e4f5eecb Mon Sep 17 00:00:00 2001
From: Robert Vogel <12845765+robert-vogel@users.noreply.github.com>
Date: Fri, 4 Apr 2025 20:43:42 -0400
Subject: [PATCH 3/6] updated type hints

---
 blnm/fit.py | 32 +++++++++++++++++++-------------
 1 file changed, 19 insertions(+), 13 deletions(-)

diff --git a/blnm/fit.py b/blnm/fit.py
index 2919bf9..199e14c 100644
--- a/blnm/fit.py
+++ b/blnm/fit.py
@@ -1,6 +1,8 @@
 """Fitting routines.
 """
 import numpy as np
+import numpy.typing as npt
+import typing
 
 from scipy.stats import binom
 from scipy import special as scisp
@@ -11,7 +13,7 @@
 INTEGRAL_N_SAMPLES = 1000
 
 
-def _data_log_likelihood(zeroth_order):
+def _data_log_likelihood(zeroth_order: npt.NDArray) -> float:
     """Compute the data log likelihood.
 
     Args:
@@ -22,13 +24,17 @@ def _data_log_likelihood(zeroth_order):
     """
     return np.sum(np.log(np.sum(zeroth_order, axis=0)))
 
-def _e_step(x_counts, n_counts, 
-            coefs, means, variance, k_mixtures,
-            zeroth_order,
-            first_order,
-            second_order,
-            integral_n_samples,
-            seed):
+def _e_step(x_counts: npt.NDArray,
+            n_counts: npt.NDArray, 
+            coefs: npt.NDArray,
+            means: npt.NDArray,
+            variance: float,
+            k_mixtures: int,
+            zeroth_order: npt.NDArray,
+            first_order: npt.NDArray,
+            second_order: npt.NDArray,
+            integral_n_samples: int,
+            seed: typing.Any) -> None:
     """E step of EM algorithm.
 
     Updates the conditional expectation for the zeroth, 
@@ -114,13 +120,13 @@ def init_pars():
     raise NotImplementedError
 
 
-def blnm(x_counts: np.ndarray, 
-         n_counts: np.ndarray,
+def blnm(x_counts: npt.NDArray, 
+         n_counts: npt.NDArray,
          k_mixtures: int, 
-         coefs: np.ndarray | None = None,
-         means: np.ndarray | None = None,
+         coefs: npt.NDArray | None = None,
+         means: npt.NDArray | None = None,
          variance: float | None = None,
-         seed: int | np.random._generator.Generator = None,
+         seed: None | int | np.random._generator.Generator = None,
          tolerance:float = 1e-6, 
          max_iter:int = 1000,
          disp: bool = True,

From 595bcf9977866a6fe1822c6e0c623652c69dbb78 Mon Sep 17 00:00:00 2001
From: Robert Vogel <12845765+robert-vogel@users.noreply.github.com>
Date: Fri, 4 Apr 2025 20:45:29 -0400
Subject: [PATCH 4/6] updated github action python versions.

---
 .github/workflows/unittest.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/unittest.yml b/.github/workflows/unittest.yml
index cee11ef..210bd56 100644
--- a/.github/workflows/unittest.yml
+++ b/.github/workflows/unittest.yml
@@ -24,7 +24,7 @@ jobs:
     strategy:
       matrix: 
         os: [ubuntu-latest, macos-11]
-        python-version: ["3.9", "3.10", "3.11", "3.12"]
+        python-version: ["3.11", "3.12"]
 
     runs-on: ${{ matrix.os }}
 

From 2fa3e65b4586296f397670ccb74ea829919bd761 Mon Sep 17 00:00:00 2001
From: Robert Vogel <12845765+robert-vogel@users.noreply.github.com>
Date: Fri, 4 Apr 2025 20:47:50 -0400
Subject: [PATCH 5/6] added python 3.10

---
 .github/workflows/unittest.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/unittest.yml b/.github/workflows/unittest.yml
index 210bd56..d5c8472 100644
--- a/.github/workflows/unittest.yml
+++ b/.github/workflows/unittest.yml
@@ -23,8 +23,8 @@ jobs:
   unit-tests:
     strategy:
       matrix: 
-        os: [ubuntu-latest, macos-11]
-        python-version: ["3.11", "3.12"]
+        os: [ubuntu-latest]
+        python-version: ["3.10", "3.11", "3.12"]
 
     runs-on: ${{ matrix.os }}
 

From e9ae10cd3136885a1fc7c7c6e16f3e5ddd3db24c Mon Sep 17 00:00:00 2001
From: Robert Vogel <12845765+robert-vogel@users.noreply.github.com>
Date: Fri, 4 Apr 2025 20:48:43 -0400
Subject: [PATCH 6/6] updated python version requirement

---
 setup.cfg | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.cfg b/setup.cfg
index a8db752..13f5b48 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -10,7 +10,7 @@ author = Robert Vogel
 [options]
 packages = 
     blnm
-python_requires = >= 3.7
+python_requires = >= 3.10
 install_requires = 
     numpy >= 1.14.0
     scipy >= 1.0.0