Source code for uncurl.scalable.state_estimation
"""
State estimation using SGD
(how to do it?)
TODO: be able to use sparse (CSC) matrices
Basically, we observe one (cell_id, gene_level) pair at a time, iterating
through the data point by point, updating the gradient based on that point.
"""
import random
import numpy as np
from scipy.sparse import issparse
[docs]def m_grad(m, X, w):
"""
"""
pass
[docs]def w_grad(w, X, m):
"""
"""
pass
[docs]def cost_grad(th, Xr, X, n):
"""
translated from the matlab
"""
xth = np.dot(X, th)
cost = (1./n)*sum(xth - Xr*np.log(xth))
temp = (Xr/xth)
grad = (1./n)*(1-temp)*X.T
return cost, grad
[docs]def poisson_estimate_state(data, clusters, init_means=None, init_weights=None, max_iters=10, tol=1e-4, eta=1e-4, disp=True):
"""
Runs Poisson state estimation on a sparse data matrix...
"""
# If data is a sparse (CSC) matrix: loop through points
# otherwise, loop
genes, cells = data.shape
W = np.random.random((clusters, cells))
M = np.random.random((genes, clusters))
if issparse(data):
points = zip(data.nonzero())
for i in range(max_iters):
random.shuffle(points)
# 1. estimate W
for p1, p2 in points:
x = data[p1,p2]
cost, grad = cost_grad(W[:,p2], x, M[p1,:], 1)
W[:,p2] += grad*eta
# 2. estimate M
for p1, p2 in points:
x = data[p1,p2]
cost, grad = cost_grad(M[p1,:], x, W[:,p2], 1)
M[p1,:] += grad*eta
return M, W
else:
print('Warning: data is not sparse.')