Skip to content

Commit

Permalink
version 1 of package
Browse files Browse the repository at this point in the history
  • Loading branch information
mwohlfender committed Aug 29, 2023
1 parent 0cb56f8 commit ad51e6d
Show file tree
Hide file tree
Showing 32 changed files with 3,110 additions and 0 deletions.
2 changes: 2 additions & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
^estRodis\.Rproj$
^\.Rproj\.user$
512 changes: 512 additions & 0 deletions .Rhistory

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
.Rproj.user
29 changes: 29 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
Package: estRodis
Title: What the Package Does (One Line, Title Case)
Version: 0.0.0.9000
Authors@R:
person("First", "Last", , "[email protected]", role = c("aut", "cre"),
comment = c(ORCID = "YOUR-ORCID-ID"))
Description: What the package does (one paragraph).
License: MIT + file LICENCE
Encoding: UTF-8
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.2.3
Biarch: true
Depends:
R (>= 3.4.0)
Imports:
dplyr,
methods,
Rcpp (>= 0.12.0),
RcppParallel (>= 5.0.1),
rstan (>= 2.18.1),
rstantools (>= 2.3.1)
LinkingTo:
BH (>= 1.66.0),
Rcpp (>= 0.12.0),
RcppEigen (>= 0.3.3.3.0),
RcppParallel (>= 5.0.1),
rstan (>= 2.18.1),
StanHeaders (>= 2.18.0)
SystemRequirements: GNU make
2 changes: 2 additions & 0 deletions LICENCE
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
YEAR: 2023
COPYRIGHT HOLDER: ISPM
14 changes: 14 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Generated by roxygen2: do not edit by hand

export(esRd_estimate_parameters)
export(esRd_simulate_cluster_sizes)
import(Rcpp)
import(methods)
importFrom(dplyr,filter)
importFrom(dplyr,mutate)
importFrom(dplyr,pull)
importFrom(dplyr,select)
importFrom(rstan,sampling)
importFrom(stats,rbinom)
importFrom(stats,rnbinom)
useDynLib(estRodis, .registration = TRUE)
68 changes: 68 additions & 0 deletions R/esRd_apply_mutations.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@

#' @importFrom dplyr filter
#' @importFrom dplyr mutate
#' @importFrom stats rbinom
esRd_apply_mutations <- function(nodes, edges, mutation_proba) {

# make sure that the columns of nodes and edges have the right names
names(nodes) <- c("node_key")
names(edges) <- c("from", "to")

# determine the number of nodes
number_nodes <- nrow(nodes)

# determine values or add placeholders for the following properties of the nodes:
# (a) whether a mutation occurs at the node,
# (b) the variant the node received from its direct ancestor and
# (c) the variant the node transmits to its direct offspring if a mutation occurs at the node
nodes_a <- nodes |> dplyr::mutate(mutation_occured = rbinom(n = number_nodes, size = 1, prob = mutation_proba),
variant_received = rep(x = 0, times = number_nodes),
variant_after_mutation = nodes$node_key)

# add placeholder for the variant transmitted via an edge
edges_a <- edges |> dplyr::mutate(variant_transmitted = rep(x = 0, times = nrow(edges)))

# if there is only one node, then there is not much to do
if (number_nodes >= 2) {

for (ii in 1:number_nodes) {

# get edges connecting nodes_a$node_key[ii] to its direct offspring
edges_to_direct_offspring <- edges |> dplyr::filter(from == ii)

# determine direct offspring of nodes_a$node_key[ii]
descendants_one <- unique(union(nodes$node_key[edges_to_direct_offspring$from], nodes$node_key[edges_to_direct_offspring$to]))
descendants_one <- descendants_one[descendants_one != ii]

# if nodes_a$node_key[ii] has no direct offspring, then there is nothing further to do
if (length(descendants_one) >= 1) {

for (jj in 1:length(descendants_one)) {

# depending on whether a mutation takes place at a node or not, a different variant is transmitted to its direct offspring
if (nodes_a$mutation_occured[ii]) {

nodes_a$variant_received[descendants_one[jj]] <- nodes_a$variant_after_mutation[ii]
edges_a$variant_transmitted[descendants_one[jj] - 1] <- nodes_a$variant_after_mutation[ii]

} else {

nodes_a$variant_received[descendants_one[jj]] <- nodes_a$variant_received[ii]
edges_a$variant_transmitted[descendants_one[jj] - 1] <- nodes_a$variant_received[ii]

}

}

}

}

}

# create output
tree_mutation <- list(nodes = nodes_a, edges = edges_a)

return(tree_mutation)

}
32 changes: 32 additions & 0 deletions R/esRd_case_detection.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@

#' @importFrom dplyr filter
#' @importFrom dplyr mutate
#' @importFrom dplyr select
#' @importFrom stats rbinom
esRd_case_detection_indep_bernoulli <- function(nodes, edges, detection_proba) {

# make sure that the columns of nodes and edges have the right names
names(nodes) <- c("node_key", "mutation_occured", "variant_received", "variant_after_mutation")
names(edges) <- c("from", "to", "variant_transmitted")

# prepare the data frames that will be returned
nodes_a <- nodes |> dplyr::mutate(detection = rbinom(n = nrow(nodes), size = 1, prob = detection_proba))
edges_a <- edges |> dplyr::mutate(from_detected = rep(x = 0, times = nrow(edges)),
to_detected = rep(x = 0, times = nrow(edges)))

# determine whether start and target of the edges have been detected
if (nrow(edges_a) >= 1) {

edges_a <- edges_a |> dplyr::mutate(from_detected = unlist(lapply(X = edges$from,
FUN = function(x) if (nodes_a |> dplyr::filter(node_key == x) |> dplyr::select(detection) == 1) {1} else {0})),
to_detected = unlist(lapply(X = edges$to,
FUN = function(x) if (nodes_a |> dplyr::filter(node_key == x) |> dplyr::select(detection) == 1) {1} else {0})))

}

# create output
tree_detection <- list(nodes = nodes_a, edges = edges_a)

return(tree_detection)

}
115 changes: 115 additions & 0 deletions R/esRd_complete_variant_subtree.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@

#' @importFrom dplyr filter
#' @importFrom dplyr mutate
#' @importFrom dplyr pull
#' @importFrom stats rbinom
#' @importFrom stats rnbinom
esRd_complete_variant_subtree <- function(tree_nodes, tree_leaves, tree_edges, variant, limit_size, R, k, mutation_proba, detection_proba) {

# create data frame to store information about the nodes of the variant subtree we consider
# we define it to be most likely large enough (if necessary it will be enlarged)
variant_subtree_nodes_expanded <- data.frame(matrix(data = 0, ncol = ncol(tree_nodes), nrow = max(2 * ceiling(limit_size / detection_proba), ceiling(limit_size / detection_proba) + 250)))
names(variant_subtree_nodes_expanded) <- names(tree_nodes)

# create data frame to store information about the edges of the variant subtree we consider
# we define it to be most likely large enough (if necessary it will be enlarged)
variant_subtree_edges_expanded <- data.frame(matrix(data = 0, ncol = ncol(tree_edges), nrow = max(2 * ceiling(limit_size / detection_proba), ceiling(limit_size / detection_proba) + 250)))
names(variant_subtree_edges_expanded) <- names(tree_edges)

# store information about the variant subtree we consider (the part of the variant subtree we consider that has already been created) ----
variant_subtree_nodes_0 <- tree_nodes |> dplyr::filter((mutation_occured == 1 & variant_after_mutation == variant) | (mutation_occured == 0 & variant_received == variant))
variant_subtree_edges_0 <- tree_edges |> dplyr::filter((from %in% (variant_subtree_nodes_0 |> dplyr::pull(node_key))) & (to %in% (variant_subtree_nodes_0 |> dplyr::pull(node_key))))
variant_subtree_edges_0 <- variant_subtree_edges_0 |> dplyr::mutate(from = unlist(lapply(X = from,
FUN = function(x) match(x = x, table = (variant_subtree_nodes_0 |> dplyr::pull(node_key))))),
to = unlist(lapply(X = to,
FUN = function(x) match(x = x, table = (variant_subtree_nodes_0 |> dplyr::pull(node_key))))))

variant_subtree_nodes_expanded[1:nrow(variant_subtree_nodes_0), ] <- variant_subtree_nodes_0

if (nrow(variant_subtree_edges_0) >= 1) {

variant_subtree_edges_expanded[1:nrow(variant_subtree_edges_0), ] <- variant_subtree_edges_0

}

# determine the nodes of the variant subtree we consider that are leaves
variant_subtree_free_leaves <- intersect(variant_subtree_nodes_0 |> dplyr::pull(node_key), tree_leaves |> dplyr::pull(node_key))

# extend variant subtree ----

# initialize the number of nodes, the number of free leaves and the number of edges of the variant subtree we consider
number_nodes <- nrow(variant_subtree_nodes_0)
number_free_leaves <- length(variant_subtree_free_leaves)
number_edges <- nrow(variant_subtree_edges_0)

# continue as long as there is a free leaf and the limit_size limit of the variant subtree is not reached
while ((number_free_leaves > 0) && (sum(variant_subtree_nodes_expanded$detection) <= limit_size)) {

# create new leaves (might be no new leaves)
number_new_free_leaves <- stats::rnbinom(n = 1, size = k, mu = R)

# add the new leaves to the set of nodes and to the set of free leaves and connect the new leaves to the graph
if (number_new_free_leaves >= 1) {

# write new leaves into a list
new_leaves <- seq(from = max(variant_subtree_nodes_expanded$node_key) + 1, to = max(variant_subtree_nodes_expanded$node_key) + number_new_free_leaves, by = 1)

# information about the newly generated nodes
variant_subtree_nodes_expanded_temp <- data.frame(node_key = new_leaves,
mutation_occured = rbinom(n = number_new_free_leaves, size = 1, prob = mutation_proba),
variant_received = rep(x = variant, times = number_new_free_leaves),
variant_after_mutation = new_leaves,
detection = rbinom(n = number_new_free_leaves, size = 1, prob = detection_proba))

# remove nodes at which a mutation occurred
variant_subtree_nodes_expanded_temp <- variant_subtree_nodes_expanded_temp |> dplyr::filter(mutation_occured == 0)

# update number of new free leaves (only those nodes at which no mutation occurred are considered)
number_new_free_leaves <- nrow(variant_subtree_nodes_expanded_temp)

if (number_new_free_leaves >= 1) {

# update new_leaves
new_leaves <- variant_subtree_nodes_expanded_temp |> dplyr::pull(node_key)

# add variant_subtree_nodes_expanded_temp to variant_subtree_nodes_expanded
variant_subtree_nodes_expanded[(number_nodes + 1):(number_nodes + number_new_free_leaves), ] <- variant_subtree_nodes_expanded_temp

# add new free leaves to the set of nodes of the variant subtree and to the set of free leaves of the variant subtree
variant_subtree_free_leaves[(number_free_leaves + 1):(number_free_leaves + number_new_free_leaves)] <- new_leaves

# add edges connecting the new free leaves to the first free leaf of the tree
variant_subtree_edges_expanded$from[(number_edges + 1):(number_edges + number_new_free_leaves)] <- rep(x = match(x = variant_subtree_free_leaves[1], table = variant_subtree_nodes_expanded$node_key), times = number_new_free_leaves)
variant_subtree_edges_expanded$to[(number_edges + 1):(number_edges + number_new_free_leaves)] <- (number_edges + 1):(number_edges + number_new_free_leaves) + 1
variant_subtree_edges_expanded$variant_transmitted[(number_edges + 1):(number_edges + number_new_free_leaves)] <- rep(x = variant, times = number_new_free_leaves)
variant_subtree_edges_expanded$from_detected[(number_edges + 1):(number_edges + number_new_free_leaves)] <- unlist(lapply(X = variant_subtree_edges_expanded$from[(number_edges + 1):(number_edges + number_new_free_leaves)],
FUN = function(x) if (variant_subtree_nodes_expanded$detection[x] == 1) {1} else {0}))
variant_subtree_edges_expanded$to_detected[(number_edges + 1):(number_edges + number_new_free_leaves)] <- unlist(lapply(X = variant_subtree_edges_expanded$to[(number_edges + 1):(number_edges + number_new_free_leaves)],
FUN = function(x) if (variant_subtree_nodes_expanded$detection[x] == 1) {1} else {0}))

}

# update the number of nodes, the number of free leaves and the number of edges of the variant subtree
number_nodes <- number_nodes + number_new_free_leaves
number_free_leaves <- number_free_leaves + number_new_free_leaves
number_edges <- number_edges + number_new_free_leaves

}

# delete the free leaf we are considering from the set of free leaves
variant_subtree_free_leaves <- variant_subtree_free_leaves[-1]

# update the number of free leaves of the tree
number_free_leaves <- number_free_leaves - 1

}

# define output
completed_variant_subtree <- list(nodes = variant_subtree_nodes_expanded |> dplyr::filter(node_key != 0),
edges = variant_subtree_edges_expanded |> dplyr::filter(from != 0 & to != 0))

return(completed_variant_subtree)

}


Loading

0 comments on commit ad51e6d

Please sign in to comment.