Source code for uncurl.simulation

# simulations... generating poisson data?

import numpy as np

[docs]def generate_poisson_data(centers, n_cells, cluster_probs=None): """ Generates poisson-distributed data, given a set of means for each cluster. Args: centers (array): genes x clusters matrix n_cells (int): number of output cells cluster_probs (array): prior probability for each cluster. Default: uniform. Returns: output - array with shape genes x n_cells labels - array of cluster labels """ genes, clusters = centers.shape output = np.zeros((genes, n_cells)) if cluster_probs is None: cluster_probs = np.ones(clusters)/clusters labels = [] for i in range(n_cells): c = np.random.choice(range(clusters), p=cluster_probs) labels.append(c) output[:,i] = np.random.poisson(centers[:,c]) return output, np.array(labels)
[docs]def generate_zip_data(M, L, n_cells, cluster_probs=None): """ Generates zero-inflated poisson-distributed data, given a set of means and zero probs for each cluster. Args: M (array): genes x clusters matrix L (array): genes x clusters matrix - zero-inflation parameters n_cells (int): number of output cells cluster_probs (array): prior probability for each cluster. Default: uniform. Returns: output - array with shape genes x n_cells labels - array of cluster labels """ genes, clusters = M.shape output = np.zeros((genes, n_cells)) if cluster_probs is None: cluster_probs = np.ones(clusters)/clusters zip_p = np.random.random((genes, n_cells)) labels = [] for i in range(n_cells): c = np.random.choice(range(clusters), p=cluster_probs) labels.append(c) output[:,i] = np.where(zip_p[:,i] < L[:,c], 0, np.random.poisson(M[:,c])) return output, np.array(labels)
[docs]def generate_state_data(means, weights): """ Generates data according to the Poisson Convex Mixture Model. Args: means (array): Cell types- genes x clusters weights (array): Cell cluster assignments- clusters x cells Returns: data matrix - genes x cells """ x_true = np.dot(means, weights) sample = np.random.poisson(x_true) return sample.astype(float)
[docs]def generate_zip_state_data(means, weights, z): """ Generates data according to the Zero-inflated Poisson Convex Mixture Model. Args: means (array): Cell types- genes x clusters weights (array): Cell cluster assignments- clusters x cells z (float): zero-inflation parameter Returns: data matrix - genes x cells """ x_true = np.dot(means, weights) sample = np.random.poisson(x_true) random = np.random.random(x_true.shape) x_true[random < z] = 0 return sample.astype(float)
[docs]def generate_nb_state_data(means, weights, R): """ Generates data according to the Negative Binomial Convex Mixture Model. Args: means (array): Cell types- genes x clusters weights (array): Cell cluster assignments- clusters x cells R (array): dispersion parameter - 1 x genes Returns: data matrix - genes x cells """ cells = weights.shape[1] # x_true = true means x_true = np.dot(means, weights) # convert means into P R_ = np.tile(R, (cells, 1)).T P_true = x_true/(R_ + x_true) sample = np.random.negative_binomial(np.tile(R, (cells, 1)).T, P_true) return sample.astype(float)
[docs]def generate_nb_states(n_states, n_cells, n_genes): """ Generates means and weights for the Negative Binomial Mixture Model. Weights are distributed Dirichlet(1,1,...), means are rand(0, 1). Returned values can be passed to generate_state_data(M, W). Args: n_states (int): number of states or clusters n_cells (int): number of cells n_genes (int): number of genes Returns: M - genes x clusters W - clusters x cells R - genes x 1 - randint(1, 100) """ W = np.random.dirichlet([1]*n_states, size=(n_cells,)) W = W.T M = np.random.random((n_genes, n_states))*100 R = np.random.randint(1, 100, n_genes) return M, W, R
[docs]def generate_poisson_states(n_states, n_cells, n_genes): """ Generates means and weights for the Poisson Convex Mixture Model. Weights are distributed Dirichlet(1,1,...), means are rand(0, 100). Returned values can be passed to generate_state_data(M, W). Args: n_states (int): number of states or clusters n_cells (int): number of cells n_genes (int): number of genes Returns: M - genes x clusters W - clusters x cells """ W = np.random.dirichlet([1]*n_states, size=(n_cells,)) W = W.T M = np.random.random((n_genes, n_states))*100 return M, W
[docs]def generate_poisson_lineage(n_states, n_cells_per_cluster, n_genes, means=300): """ Generates a lineage for each state- assumes that each state has a common ancestor. Returns: M - genes x clusters W - clusters x cells """ # means... M = np.random.random((n_genes, n_states))*means center = M.mean(1) W = np.zeros((n_states, n_cells_per_cluster*n_states)) # TODO # start at a center where all the clusters have equal probability, and for # each cluster, interpolate linearly towards the cluster. index = 0 means = np.array([1.0/n_states]*n_states) for c in range(n_states): for i in range(n_cells_per_cluster): w = np.copy(means) new_value = w[c] + i*(1.0 - 1.0/n_states)/n_cells_per_cluster w[:] = (1.0 - new_value)/(n_states - 1.0) w[c] = new_value W[:, index] = w index += 1 return M, W
[docs]def generate_nb_data(P, R, n_cells, assignments=None): """ Generates negative binomial data Args: P (array): genes x clusters R (array): genes x clusters n_cells (int): number of cells assignments (list): cluster assignment of each cell. Default: random uniform Returns: data array with shape genes x cells labels - array of cluster labels """ genes, clusters = P.shape output = np.zeros((genes, n_cells)) if assignments is None: cluster_probs = np.ones(clusters)/clusters labels = [] for i in range(n_cells): if assignments is None: c = np.random.choice(range(clusters), p=cluster_probs) else: c = assignments[i] labels.append(c) # because numpy's negative binomial, r is the number of successes output[:,i] = np.random.negative_binomial(R[:,c], 1.0-P[:,c]) return output, np.array(labels)