Source code for uncurl.dimensionality_reduction

# dimensionality reduction

import numpy as np
from .pois_ll import poisson_dist

eps=1e-8
max_or_zero = np.vectorize(lambda x: max(0.0,x))

[docs]def diffusion_mds(means, weights, d, diffusion_rounds=10): """ Dimensionality reduction using MDS, while running diffusion on W. Args: means (array): genes x clusters weights (array): clusters x cells d (int): desired dimensionality Returns: W_reduced (array): array of shape (d, cells) """ for i in range(diffusion_rounds): weights = weights*weights weights = weights/weights.sum(0) X = dim_reduce(means, weights, d) if X.shape[0]==2: return X.dot(weights) else: return X.T.dot(weights)
[docs]def mds(means, weights, d): """ Dimensionality reduction using MDS. Args: means (array): genes x clusters weights (array): clusters x cells d (int): desired dimensionality Returns: W_reduced (array): array of shape (d, cells) """ X = dim_reduce(means, weights, d) if X.shape[0]==2: return X.dot(weights) else: return X.T.dot(weights)
[docs]def dim_reduce(means, weights, d): """ Dimensionality reduction using Poisson distances and MDS. Args: means (array): genes x clusters weights (array): clusters x cells d (int): desired dimensionality Returns: X, a clusters x d matrix representing the reduced dimensions of the cluster centers. """ return dim_reduce_data(means, d)
[docs]def dim_reduce_data(data, d): """ Does a MDS on the data directly, not on the means. Args: data (array): genes x cells d (int): desired dimensionality Returns: X, a cells x d matrix """ genes, cells = data.shape distances = np.zeros((cells, cells)) for i in range(cells): for j in range(cells): distances[i,j] = poisson_dist(data[:,i], data[:,j]) # do MDS on the distance matrix (procedure from Wikipedia) proximity = distances**2 J = np.eye(cells) - 1./cells B = -0.5*np.dot(J, np.dot(proximity, J)) # B should be symmetric, so we can use eigh e_val, e_vec = np.linalg.eigh(B) # Note: lam should be ordered to be the largest eigenvalues lam = np.diag(e_val[-d:])[::-1] #lam = max_or_zero(lam) E = e_vec[:,-d:][::-1] X = np.dot(E, lam**0.5) return X