Source code for uncurl.scalable.state_estimation

"""
State estimation using SGD

(how to do it?)

TODO: be able to use sparse (CSC) matrices

Basically, we observe one (cell_id, gene_level) pair at a time, iterating
through the data point by point, updating the gradient based on that point.

"""

import random

import numpy as np
from scipy.sparse import issparse


[docs]def m_grad(m, X, w):
    """
    """
    pass

[docs]def w_grad(w, X, m):
    """
    """
    pass

[docs]def cost_grad(th, Xr, X, n):
    """
    translated from the matlab
    """
    xth = np.dot(X, th)
    cost = (1./n)*sum(xth - Xr*np.log(xth))
    temp = (Xr/xth)
    grad  = (1./n)*(1-temp)*X.T
    return cost, grad


[docs]def poisson_estimate_state(data, clusters, init_means=None, init_weights=None, max_iters=10, tol=1e-4, eta=1e-4,  disp=True):
    """
    Runs Poisson state estimation on a sparse data matrix...
    """
    # If data is a sparse (CSC) matrix: loop through points
    # otherwise, loop
    genes, cells = data.shape
    W = np.random.random((clusters, cells))
    M = np.random.random((genes, clusters))
    if issparse(data):
        points = zip(data.nonzero())
        for i in range(max_iters):
            random.shuffle(points)
            # 1. estimate W
            for p1, p2 in points:
                x = data[p1,p2]
                cost, grad = cost_grad(W[:,p2], x, M[p1,:], 1)
                W[:,p2] += grad*eta
            # 2. estimate M
            for p1, p2 in points:
                x = data[p1,p2]
                cost, grad = cost_grad(M[p1,:], x, W[:,p2], 1)
                M[p1,:] += grad*eta
        return M, W
    else:
        print('Warning: data is not sparse.')