Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/unittest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ jobs:
unit-tests:
strategy:
matrix:
os: [ubuntu-latest, macos-11]
python-version: ["3.7", "3.8", "3.9", "3.10"]
os: [ubuntu-latest]
python-version: ["3.10", "3.11", "3.12"]

runs-on: ${{ matrix.os }}

Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@
*~
*__pycache__/
*egg-info/
env/
95 changes: 72 additions & 23 deletions blnm/fit.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
"""Fitting routines.
"""
import numpy as np
import numpy.typing as npt
import typing

from scipy.stats import binom
from scipy import special as scisp
Expand All @@ -11,7 +13,7 @@
INTEGRAL_N_SAMPLES = 1000


def _data_log_likelihood(zeroth_order):
def _data_log_likelihood(zeroth_order: npt.NDArray) -> float:
"""Compute the data log likelihood.

Args:
Expand All @@ -22,13 +24,17 @@ def _data_log_likelihood(zeroth_order):
"""
return np.sum(np.log(np.sum(zeroth_order, axis=0)))

def _e_step(x_counts, n_counts,
coefs, means, variance, k_mixtures,
zeroth_order,
first_order,
second_order,
integral_n_samples,
seed):
def _e_step(x_counts: npt.NDArray,
n_counts: npt.NDArray,
coefs: npt.NDArray,
means: npt.NDArray,
variance: float,
k_mixtures: int,
zeroth_order: npt.NDArray,
first_order: npt.NDArray,
second_order: npt.NDArray,
integral_n_samples: int,
seed: typing.Any) -> None:
"""E step of EM algorithm.

Updates the conditional expectation for the zeroth,
Expand Down Expand Up @@ -109,16 +115,22 @@ def _m_step(zeroth_order,

return coefs, means, variance

# return (coefs, means, variance)

def blnm(x_counts,
n_counts,
k_mixtures,
seed=None,
tolerance=1e-6,
max_iter=1000,
disp = True,
integral_n_samples = INTEGRAL_N_SAMPLES):
def init_pars():
raise NotImplementedError


def blnm(x_counts: npt.NDArray,
n_counts: npt.NDArray,
k_mixtures: int,
coefs: npt.NDArray | None = None,
means: npt.NDArray | None = None,
variance: float | None = None,
seed: None | int | np.random._generator.Generator = None,
tolerance:float = 1e-6,
max_iter:int = 1000,
disp: bool = True,
integral_n_samples: int = INTEGRAL_N_SAMPLES) -> dict:
"""Fit mixture of BLN models.

Args:
Expand All @@ -127,6 +139,17 @@ def blnm(x_counts,
n_counts : ((N,) np.ndarray) alternative + reference allele
specific expression counts
k_mixtures : (int) > 0 number of mixtures to fit
coefs: ((k_mixtures,) np.ndarray)
The weights for each BLN probability mass funciton in the
mixture. Each coefficient must be greater than 0 and the
sum of coefficients be 1. If None, pick coefficients randomly
subject to our contraints.
means: ((k_mixtures,) np.ndarray)
The mean parameter for each BLN probability mass function.
This can be any real number. If None, pick coefficients randomly.
variance: (float)
A real number greater than zero representing the variance parameter
of each BLN probability mass function.
seed : (any input to numpy random Generator object)
tolerance : (float) criterion for convergence
max_iter : (int) maximum number of interations
Expand Down Expand Up @@ -170,13 +193,39 @@ def blnm(x_counts,
rng = np.random.default_rng(seed=seed)

# initialize parameters
coefs = rng.uniform(low=0.1, high=0.9, size=k_mixtures)
coefs = coefs / np.sum(coefs)

p = rng.uniform(low=0.01, high=0.99, size=k_mixtures)
means = np.log(p / (1-p))
if coefs is None:
coefs = rng.uniform(low=0.1, high=0.9, size=k_mixtures)
coefs = coefs / np.sum(coefs)

# verify coefs
if (coefs < 0).any():
raise ValueError("All coefficients must be positive.")
elif (np.isnan(coefs)).any():
raise ValueError("All coefficients must be positive.")
elif (s := np.sum(coefs)) < 0.9999 or s > 1.0001:
raise ValueError("The sum of coefficients must be 1.")
elif coefs.size != k_mixtures:
raise ValueError("The number of coefficients must be"
" equal to the number of k_mixtures.")

if means is None:
p = rng.uniform(low=0.01, high=0.99, size=k_mixtures)
means = np.log(p / (1-p))

if means.size != k_mixtures:
raise ValueError("The number of means must be"
" equal to the number of k_mixtures.")
elif (np.isnan(means)).any():
raise ValueError("The number of means must be"
" equal to the number of k_mixtures.")


if variance is None:
variance = rng.uniform(low=0.1, high=3)

if variance <= 0 or np.isnan(variance):
raise ValueError("Variance parameter must be a float greater than zero.")

variance = rng.uniform(low=0.1, high=3)

# preallocate memory for arrays constructed in the E step
# each array represents
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ author = Robert Vogel
[options]
packages =
blnm
python_requires = >= 3.7
python_requires = >= 3.10
install_requires =
numpy >= 1.14.0
scipy >= 1.0.0
99 changes: 99 additions & 0 deletions test/test_fit.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
from unittest import TestCase, main

import numpy as np
from blnm import fit



class TestInitPars(TestCase):
def test_not_implemented(self):
with self.assertRaises(NotImplementedError):
fit.init_pars()


class TestFitParCheck(TestCase):
def setUp(self):
rng = np.random.default_rng()

self.n = 300
self.k_mixtures = 4
self.coefs = np.array([0.1,0.4,0.3,0.2])
self.means = np.array([-2,-1,1,2], dtype=float)
self.variance = 1.1
self.xcount = rng.choice(250, size=self.n, replace=True)
self.ncount = np.full((self.n,), 250)

def test_correct_input(self):
fit.blnm(self.xcount, self.ncount, self.k_mixtures,
coefs=self.coefs, means=self.means,
variance=self.variance)

def test_check_coefs_negative_val(self):
self.coefs[2] = -self.coefs[2]

with self.assertRaises(ValueError):
fit.blnm(self.xcount, self.ncount, self.k_mixtures,
coefs=self.coefs, means=self.means,
variance=self.variance)

def test_check_coefs_wrong_number(self):
self.coefs = self.coefs[0:2]

with self.assertRaises(ValueError):
fit.blnm(self.xcount, self.ncount, self.k_mixtures,
coefs=self.coefs, means=self.means,
variance=self.variance)


def test_check_coefs_wrong_number(self):
self.coefs[2] = self.coefs[2] * 1.1

with self.assertRaises(ValueError):
fit.blnm(self.xcount, self.ncount, self.k_mixtures,
coefs=self.coefs, means=self.means,
variance=self.variance)

def test_check_coefs_nan(self):
self.coefs[2] = np.nan

with self.assertRaises(ValueError):
fit.blnm(self.xcount, self.ncount, self.k_mixtures,
coefs=self.coefs, means=self.means,
variance=self.variance)

def test_check_means_wrong_number(self):
self.means = self.means[0:2]

with self.assertRaises(ValueError):
fit.blnm(self.xcount, self.ncount, self.k_mixtures,
coefs=self.coefs, means=self.means,
variance=self.variance)


def test_check_means_wrong_number(self):
self.means[2] = np.nan

with self.assertRaises(ValueError):
fit.blnm(self.xcount, self.ncount, self.k_mixtures,
coefs=self.coefs, means=self.means,
variance=self.variance)

def test_check_variance(self):
self.variance = -1.2

with self.assertRaises(ValueError):
fit.blnm(self.xcount, self.ncount, self.k_mixtures,
coefs=self.coefs, means=self.means,
variance=self.variance)

def test_check_nan(self):
self.variance = np.nan

with self.assertRaises(ValueError):
fit.blnm(self.xcount, self.ncount, self.k_mixtures,
coefs=self.coefs, means=self.means,
variance=self.variance)


if __name__ == "__main__":
main()