Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Unified and ensemble cell typing #90

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 0 additions & 5 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -60,15 +60,10 @@ Imports:
future.apply,
ids,
RColorBrewer,
digest,
cowplot,
igraph,
reshape2,
callr,
future,
methods,
pbapply,
reshape2,
scales,
data.table,
ggplot2,
Expand Down
7 changes: 7 additions & 0 deletions R/HPCell.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
.myDataEnv <- new.env(parent = emptyenv()) # not exported

.data_internal <- function(dataset) {
if (!exists(dataset, envir = .myDataEnv)) {
utils::data(list = c(dataset), envir = .myDataEnv)
}
}
134 changes: 134 additions & 0 deletions R/functions_consensus.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
ensemble_annotation <- function(celltype_matrix, method_weights = NULL, override_celltype = c(), celltype_tree = NULL) {
if (is.null(celltype_tree)) {
.data_internal(immune_graph)
}

stopifnot(is(celltype_tree, "igraph"))
stopifnot(igraph::is_directed(celltype_tree))
stopifnot(is.matrix(celltype_matrix) | is.data.frame(celltype_matrix))

node_names = igraph::V(celltype_tree)$name

# check override_celltype nodes are present
missing_nodes = setdiff(override_celltype, node_names)
if (!is.null(missing_nodes) & length(missing_nodes) > 0) {
missing_nodes = paste(missing_nodes, collapse = ", ")
stop(sprintf("the following nodes in 'override_celltype' not found in 'celltype_tree': %s", utils::capture.output(utils::str(missing_nodes))))
}

# check celltype_matrix
if (ncol(celltype_matrix) == 1) {
# no ensemble required
return(celltype_matrix)
} else {
celltype_matrix = as.matrix(celltype_matrix)
invalid_types = setdiff(celltype_matrix, c(node_names, NA))
if (length(invalid_types) > 0) {
warning(sprintf("the following cell types in 'celltype_matrix' are not in the graph and will be set to NA:\n"), utils::capture.output(utils::str(invalid_types)))
}
celltype_matrix[celltype_matrix %in% invalid_types] = NA
}

# check method_weights
if (is.null(method_weights)) {
method_weights = matrix(1, ncol = ncol(celltype_matrix), nrow = nrow(celltype_matrix))
} else if (is.vector(method_weights)) {
if (ncol(celltype_matrix) != length(method_weights)) {
stop("the number of columns in 'celltype_matrix' should match the length of 'method_weights'")
}
method_weights = matrix(rep(method_weights, each = nrow(celltype_matrix)), nrow = nrow(celltype_matrix))
} else if (is.matrix(method_weights) | is.data.frame(method_weights)) {
if (ncol(celltype_matrix) != ncol(method_weights)) {
stop("the number of columns in 'celltype_matrix' and 'method_weights' should be equal")
}
method_weights = as.matrix(method_weights)
}
method_weights = method_weights / rowSums(method_weights)

# create vote matrix
vote_matrix = Matrix::sparseMatrix(i = integer(0), j = integer(0), dims = c(nrow(celltype_matrix), length(node_names)), dimnames = list(rownames(celltype_matrix), node_names))
for (i in seq_len(ncol(celltype_matrix))) {
locmat = cbind(seq_len(nrow(celltype_matrix)), as.numeric(factor(celltype_matrix[, i], levels = node_names)))
missing = is.na(locmat[, 2])
vote_matrix[locmat[!missing, ]] = vote_matrix[locmat[!missing, ]] + method_weights[!missing, i]
}

# propagate vote to children
d = apply(!is.infinite(igraph::distances(celltype_tree, mode = "out")), 2, as.numeric)
d = as(d, "sparseMatrix")
vote_matrix_children = Matrix::tcrossprod(vote_matrix, Matrix::t(d))

# propagate vote to parent
d = igraph::distances(celltype_tree, mode = "in")
d = 1 / (2^d) - 0.1 # vote halved at each subsequent ancestor
diag(d)[igraph::degree(celltype_tree, mode = "in") > 0 & igraph::degree(celltype_tree, mode = "out") == 0] = 0
diag(d) = diag(d) * 0.9 # prevent leaf nodes from being selected when trying to identify upstream ancestor (works for any number in the interval (0.5, 1))
vote_matrix_parent = Matrix::tcrossprod(vote_matrix, Matrix::t(d))

# assess votes and identify common ancestors for ties
vote_matrix_children = apply(vote_matrix_children, 1, \(x) x[x > 0], simplify = FALSE)
vote_matrix_parent = apply(vote_matrix_parent, 1, \(x) x[x > 0], simplify = FALSE)
ensemble = mapply(\(children, parents) {
# override condition
override_node = intersect(override_celltype, names(children))
if (length(override_node) > 0) {
return(override_node[1])
}

# maximum votes
children = names(children)[children == max(children)]
if (length(children) == 1) {
return(children)
} else {
# lowest ancestor with the maximum votes
parents = names(parents)[parents == max(parents)]
if (length(parents) == 1) {
return(parents)
} else {
return(NA)
}
}
}, vote_matrix_children, vote_matrix_parent)

return(ensemble)
}

add_celltype_level <- function(.data, id_col, level = 0, celltype_tree = NULL) {
if (is.null(celltype_tree)) {
.data_internal(immune_graph)
}
stopifnot(is(celltype_tree, "igraph"))
stopifnot(igraph::is_directed(celltype_tree))

ig_diameter = igraph::diameter(celltype_tree)
if (level > ig_diameter) {
stop(sprintf("The specified level (%d) exceeds the depth of the celltype tree (%d)", level, ig_diameter))
}

# check column exists
id_col_str = rlang::as_string(rlang::ensym(id_col))
if (!id_col_str %in% colnames(.data)) {
stop(sprintf("Column '%s' not found in .data", rlang::as_string(rlang::ensym(id_col))))
}

# generate map
ct_map = igraph::ego(celltype_tree, mode = "in", order = ig_diameter) |>
sapply(\(x) {
x = rev(x$name)
x[min(length(x), level + 1)]
}) |>
setNames(igraph::V(celltype_tree)$name)

# retain types of the matching level only
d = igraph::distances(celltype_tree, mode = "in")
d[is.infinite(d)] = NA
ct_level = apply(d, 1, max, na.rm = TRUE)
is_child = igraph::degree(celltype_tree, mode = "out") == 0
ct_map[ct_level[ct_map] != level & !is_child] = NA_character_
map_df = data.frame(ctypes, ct_map[ctypes])
colnames(map_df) = c(id_col_str, sprintf("%s_L%d", id_col_str, level))

# join and return
.data |>
dplyr::left_join(map_df, copy = TRUE)
}
Binary file added data/celltype_unification_maps.rda
Binary file not shown.
Binary file added data/immune_graph.rda
Binary file not shown.
Binary file added data/nonimmune_cellxgene.rda
Binary file not shown.
30 changes: 30 additions & 0 deletions inst/extdata/immune_map_azimuth.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
from,to,is_immune
NK,nk,TRUE
CD8 TEM,cd8 tem,TRUE
CD4 CTL,cytotoxic,TRUE
dnT,t,TRUE
CD8 Naive,cd8 naive,TRUE
CD4 Naive,cd4 naive,TRUE
CD4 TCM,cd4 tcm,TRUE
gdT,tgd,TRUE
CD8 TCM,cd8 tcm,TRUE
MAIT,mait,TRUE
CD4 TEM,cd4 tem,TRUE
ILC,ilc,TRUE
CD14 Mono,cd14 mono,TRUE
cDC1,cdc,TRUE
pDC,pdc,TRUE
cDC2,cdc,TRUE
B naive,b naive,TRUE
B intermediate,b memory,TRUE
B memory,b memory,TRUE
Eryth,erythrocyte,TRUE
CD16 Mono,cd16 mono,TRUE
HSPC,progenitor,TRUE
Treg,treg,TRUE
NK_CD56bright,nk,TRUE
Plasmablast,plasma,TRUE
NK Proliferating,nk,TRUE
ASDC,cdc,TRUE
CD8 Proliferating,cd8 tem,TRUE
CD4 Proliferating,cd4 tem,TRUE
45 changes: 45 additions & 0 deletions inst/extdata/immune_map_blueprint.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
from,to,is_immune
Neutrophils,granulocyte,TRUE
Monocytes,monocytic,TRUE
MEP,progenitor,TRUE
CD4+ T-cells,t cd4,TRUE
Tregs,treg,TRUE
CD4+ Tcm,cd4 tcm,TRUE
CD4+ Tem,cd4 tem,TRUE
CD8+ Tcm,cd8 tcm,TRUE
CD8+ Tem,cd8 tem,TRUE
NK cells,nk,TRUE
naive B-cells,b naive,TRUE
Memory B-cells,b memory,TRUE
Class-switched memory B-cells,b memory,TRUE
HSC,progenitor,TRUE
MPP,progenitor,TRUE
CLP,progenitor,TRUE
GMP,progenitor,TRUE
Macrophages,macrophage,TRUE
CD8+ T-cells,t cd8,TRUE
CD8 T,t cd8,TRUE
Erythrocytes,erythrocyte,TRUE
Megakaryocytes,non immune,TRUE
CMP,progenitor,TRUE
Macrophages M1,macrophage,TRUE
Macrophages M2,macrophage,TRUE
Endothelial cells,non immune,TRUE
DC,cdc,TRUE
Eosinophils,granulocyte,TRUE
Plasma cells,plasma,TRUE
Chondrocytes,non immune,TRUE
Fibroblasts,non immune,TRUE
Smooth muscle,non immune,TRUE
Epithelial cells,non immune,TRUE
Melanocytes,non immune,TRUE
Skeletal muscle,non immune,TRUE
Keratinocytes,non immune,TRUE
mv Endothelial cells,non immune,TRUE
Myocytes,non immune,TRUE
Adipocytes,non immune,TRUE
Neurons,non immune,TRUE
Pericytes,non immune,TRUE
Preadipocytes,non immune,TRUE
Astrocytes,non immune,TRUE
Mesangial cells,non immune,TRUE
Loading
Loading