Source code for uncurl.run_se

# state estimation with poisson convex mixture model

from .state_estimation import poisson_estimate_state
from .nb_state_estimation import nb_estimate_state
from .zip_state_estimation import zip_estimate_state
from .nmf_wrapper import log_norm_nmf, norm_nmf

import numpy as np
from scipy import sparse

[docs]def run_state_estimation(data, clusters, dist='Poiss', reps=1, **kwargs): """ Runs state estimation for multiple initializations, returning the result with the highest log-likelihood. All the arguments are passed to the underlying state estimation functions (poisson_estimate_state, nb_estimate_state, zip_estimate_state). Args: data (array): genes x cells clusters (int): number of mixture components dist (str, optional): Distribution used in state estimation. Options: 'Poiss', 'NB', 'ZIP', 'LogNorm', 'Gaussian'. Default: 'Poiss' reps (int, optional): number of times to run the state estimation, taking the result with the highest log-likelihood. **kwargs: arguments to pass to the underlying state estimation function. Returns: M (array): genes x clusters - state means W (array): clusters x cells - state mixing components for each cell ll (float): final log-likelihood """ func = poisson_estimate_state dist = dist.lower() if dist=='poiss' or dist=='poisson': pass elif dist=='nb': func = nb_estimate_state elif dist=='zip': func = zip_estimate_state elif dist=='lognorm' or dist=='log-normal' or dist=='lognormal': func = log_norm_nmf elif dist=='gaussian' or dist=='norm' or dist=='normal': func = norm_nmf else: print('dist should be one of Poiss, NB, ZIP, LogNorm, or Gaussian. Using Poiss.') best_ll = np.inf best_M = None best_W = None for i in range(reps): results = func(data, clusters, **kwargs) M = results[0] W = results[1] if dist=='NB': ll = results[3] else: ll = results[2] if ll < best_ll: best_ll = ll best_M = M best_W = W return best_M, best_W, best_ll