"""
Evaluation metrics for alignment quality, imputation accuracy, and graph connectivity.
"""
import numpy as np
from sklearn.metrics import silhouette_score
from sklearn.neighbors import kneighbors_graph
from scipy.spatial import distance
import networkx as nx
[docs]def cell_type_matching_accuracy(m1_source_ct, m1_predict_ct, m2_source_ct, m2_predict_ct):
"""
Calculate the cell-type prediction accuracy of cell-cell alignment.
Parameters
----------
m1_source_ct : list
The original cell type labels of modality 1.
m1_predict_ct : list
The predicted cell type labels of modality 1.
m2_source_ct : list
The original cell type labels of modality 2.
m2_predict_ct : list
The predicted cell type labels of modality 2.
Returns
-------
float
The overall cell-type prediction accuracy (rounded to 4 decimals).
"""
n1 = len(m1_source_ct)
n2 = len(m2_source_ct)
r1 = sum(m1_source_ct == m1_predict_ct)
r2 = sum(m2_source_ct == m2_predict_ct)
acc = (r1 + r2) / (n1 + n2)
acc = round(acc, 4)
return acc
[docs]def average_sihouette_width(embedding, cell_type_label):
"""
Calculate the average silhouette score of integration performance.
Parameters
----------
embedding : np.ndarray of shape (n_samples, n_dims)
The 2D (or low-dim) embedding array used for silhouette calculation.
cell_type_label : array-like of shape (n_samples,)
The cell type labels corresponding to each embedding point.
Returns
-------
float
The average silhouette score (rounded to 4 decimals).
"""
sihouette_avg = silhouette_score(embedding, cell_type_label)
sihouette_avg = round(sihouette_avg, 4)
return sihouette_avg
[docs]def feature_imputation_accuracy_corr(m1_feature, m2_aligned_feature1):
"""
Calculate the feature imputation accuracy of the aligned feature profile using Pearson correlation.
Parameters
----------
m1_feature : np.ndarray of shape (n_samples, n_features)
The original modality-1 feature matrix.
m2_aligned_feature1 : np.ndarray of shape (n_samples, n_features)
The imputed (aligned) modality-1 feature matrix obtained for modality 2.
Returns
-------
float
The average per-sample Pearson correlation (imputation accuracy).
"""
assert m1_feature.shape == m2_aligned_feature1.shape
n_samples = m1_feature.shape[0]
corr_vec = np.zeros(n_samples)
for i in range(n_samples):
corr = np.corrcoef(m1_feature[i, :], m2_aligned_feature1[i, :])[0, 1]
corr_vec[i] = round(corr, 4)
impute_acc = np.mean(corr_vec)
return impute_acc
[docs]def feature_imputation_rmse(m1_feature, m2_aligned_feature1):
"""
Calculate the RMSE of the aligned feature profile.
Parameters
----------
m1_feature : np.ndarray of shape (n_samples, n_features)
The original modality-1 feature matrix.
m2_aligned_feature1 : np.ndarray of shape (n_samples, n_features)
The imputed (aligned) modality-1 feature matrix obtained for modality 2.
Returns
-------
float
The root mean squared error between the original and imputed features.
"""
assert m1_feature.shape == m2_aligned_feature1.shape
error = m1_feature - m2_aligned_feature1
squared_error = np.square(error)
mean_squared_error = np.mean(squared_error)
impute_rmse = np.sqrt(mean_squared_error)
return impute_rmse
[docs]def uniFOSCTTM(m1_embedding, m2_embedding, true_matches_for_m2):
"""
Calculate the proportion of samples closer than the true paired sample (uniFOSCTTM).
Parameters
----------
m1_embedding : np.ndarray of shape (n, d)
Embedding of modality 1.
m2_embedding : np.ndarray of shape (n, d)
Embedding of modality 2.
true_matches_for_m2 : array-like of length n
Indices of the true matched cells in modality 1 for each cell in modality 2.
Returns
-------
float
The uniFOSCTTM score (rounded to 4 decimals).
"""
distance_matrix = distance.cdist(m2_embedding, m1_embedding, metric = 'euclidean')
n = len(true_matches_for_m2)
vec = np.zeros(n)
for idx, true_match in enumerate(true_matches_for_m2):
true_distance = distance_matrix[idx, true_match]
# Count how many cells in modality 1 are closer to cell idx in modality 2 than the true match
closer_samples = np.sum(distance_matrix[idx, :] < true_distance)
vec[idx] = closer_samples / distance_matrix.shape[1]
prop = np.mean(vec)
return round(prop, 4)
[docs]def calculate_graph_connectivity(data, labels, k=15):
"""
Calculate the Graph Connectivity for each cell type in the dataset.
Parameters
----------
data : np.ndarray of shape (n_samples, n_features)
The dataset where rows are samples and columns are features.
labels : array-like of shape (n_samples,)
The cell type labels for each sample.
k : int, default=15
Number of nearest neighbors to consider for each cell.
Returns
-------
float
The graph connectivity score averaged across cell types.
"""
kng = kneighbors_graph(data, n_neighbors=k, mode='connectivity', include_self=False)
G = nx.from_scipy_sparse_array(kng)
unique_labels = np.unique(labels)
M = len(unique_labels)
sum_lcc_ratio = 0
for label in unique_labels:
indices = np.where(labels == label)[0]
subG = G.subgraph(indices)
largest_cc = max(nx.connected_components(subG), key=len)
LCC_j = len(largest_cc)
N_j = len(indices)
lcc_ratio = LCC_j / N_j
sum_lcc_ratio += lcc_ratio
return sum_lcc_ratio / M