Source code for uncurl.run_se
# state estimation with poisson convex mixture model
from .state_estimation import poisson_estimate_state
from .nb_state_estimation import nb_estimate_state
from .zip_state_estimation import zip_estimate_state
from .nmf_wrapper import log_norm_nmf, norm_nmf
import numpy as np
from scipy import sparse
[docs]def run_state_estimation(data, clusters, dist='Poiss', reps=1, **kwargs):
"""
Runs state estimation for multiple initializations, returning the result with the highest log-likelihood. All the arguments are passed to the underlying state estimation functions (poisson_estimate_state, nb_estimate_state, zip_estimate_state).
Args:
data (array): genes x cells
clusters (int): number of mixture components
dist (str, optional): Distribution used in state estimation. Options: 'Poiss', 'NB', 'ZIP', 'LogNorm', 'Gaussian'. Default: 'Poiss'
reps (int, optional): number of times to run the state estimation, taking the result with the highest log-likelihood.
**kwargs: arguments to pass to the underlying state estimation function.
Returns:
M (array): genes x clusters - state means
W (array): clusters x cells - state mixing components for each cell
ll (float): final log-likelihood
"""
func = poisson_estimate_state
dist = dist.lower()
if dist=='poiss' or dist=='poisson':
pass
elif dist=='nb':
func = nb_estimate_state
elif dist=='zip':
func = zip_estimate_state
elif dist=='lognorm' or dist=='log-normal' or dist=='lognormal':
func = log_norm_nmf
elif dist=='gaussian' or dist=='norm' or dist=='normal':
func = norm_nmf
else:
print('dist should be one of Poiss, NB, ZIP, LogNorm, or Gaussian. Using Poiss.')
best_ll = np.inf
best_M = None
best_W = None
for i in range(reps):
results = func(data, clusters, **kwargs)
M = results[0]
W = results[1]
if dist=='NB':
ll = results[3]
else:
ll = results[2]
if ll < best_ll:
best_ll = ll
best_M = M
best_W = W
return best_M, best_W, best_ll