Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added asCategorical() #211

Merged
merged 4 commits into from
Dec 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# AlphaSimR 1.6.1.9990

*added `asCategorical` to convert a normal (Gaussian) trait to an ordered categorical (threshold) trait

# AlphaSimR 1.6.1

*fixed bug in `mergePops` and `[` (subset) methods - they were failing for populations that had a misc slot with a matrix - we now check if a misc slot element is a matrix and rbind them for `mergePops` and subset rows for `[` (assuming the first dimension represents individuals)
Expand Down
185 changes: 185 additions & 0 deletions R/phenotypes.R
Original file line number Diff line number Diff line change
Expand Up @@ -261,4 +261,189 @@ setPheno = function(pop, h2=NULL, H2=NULL, varE=NULL, corE=NULL,
return(pop)
}

#' @title Convert a normal (Gaussian) trait to an ordered categorical (threshold)
#' trait
#' @param x matrix, values for one or more traits (if not a matrix,
#' we cast to a matrix)
#' @param p NULL, numeric or list, when \code{NULL} the \code{threshold} argument
#' takes precedence; when numeric, provide a vector of probabilities of
#' categories to convert continuous values into categories for a single trait
#' (if probabilities do not sum to 1, another category is added and a warning
#' is raised); when list, provide a list of numeric probabilities - list node
#' with \code{NULL} will skip conversion for a specific trait (see examples);
#' internally \code{p} is converted to \code{threshold} hence input
#' \code{threshold} is overwritten
#' @param mean numeric, assumed mean(s) of the normal (Gaussian) trait(s);
#' used only when \code{p} is given
#' @param var numeric, assumed variance(s) of the normal (Gaussian) trait(s);
#' used only when \code{p} is given
#' @param threshold NULL, numeric or list, when numeric, provide a vector of
#' threshold values to convert continuous values into categories for a single trait
#' (the thresholds specify left-closed and right-opened intervals [t1, t2),
#' which can be changed with \code{include.lowest} and \code{right};
#' ensure you add \code{-Inf} and \code{Inf} or min and max to cover the whole
#' range of values; otherwise you will get \code{NA} values);
#' when list, provide a list of numeric thresholds - list node with \code{NULL}
#' will skip conversion for a specific trait (see examples)
#' @param include.lowest logical, see \code{\link{cut}}
#' @param right logical, see \code{\link{cut}}
#' @details If input trait is normal (Gaussian) then this function generates a
#' categorical trait according to the ordered probit model.
#' @return matrix of values with some traits recorded as ordered categories
#' in the form of 1:nC with nC being the number of categories.
#' @examples
#' founderPop = quickHaplo(nInd=10, nChr=1, segSites=10)
#' SP = SimParam$new(founderPop)
#' \dontshow{SP$nThreads = 1L}
#' trtMean = c(0, 0)
#' trtVarG = c(1, 2)
#' SP$addTraitA(nQtlPerChr = 10, mean = trtMean, var = trtVarG,
#' corA = matrix(data = c(1.0, 0.6,
#' 0.6, 1.0), ncol = 2))
#' pop = newPop(founderPop)
#' trtVarE = c(1, 1)
#' trtVarP = trtVarG + trtVarE
#' pop = setPheno(pop, varE = trtVarE)
#' pheno(pop)
#'
#' #Convert a single input trait
#' asCategorical(x = pheno(pop)[, 1])
#'
#' #Demonstrate threshold argument (in units of pheno SD)
#' asCategorical(x = pheno(pop)[, 1], threshold = c(-1, 0, 1) * sqrt(trtVarP[1]))
#' asCategorical(x = pheno(pop)[, 1], threshold = c(-Inf, -1, 0, 1, Inf) * sqrt(trtVarP[1]))
#' asCategorical(x = pheno(pop)[, 1], threshold = c(-Inf, 0, Inf))
#'
#' #Demonstrate p argument
#' asCategorical(x = pheno(pop)[, 1], p = 0.5, var = trtVarP[1])
#' asCategorical(x = pheno(pop)[, 1], p = c(0.5, 0.5), var = trtVarP[1])
#' asCategorical(x = pheno(pop)[, 1], p = c(0.25, 0.5, 0.25), var = trtVarP[1])
#'
#' #Convert multiple input traits (via threshold or p argument)
#' try(asCategorical(x = pheno(pop)))
#' asCategorical(x = pheno(pop),
#' threshold = list(c(-Inf, 0, Inf),
#' NULL))
#' try(asCategorical(x = pheno(pop), p = c(0.5, 0.5)))
#' asCategorical(x = pheno(pop),
#' p = list(c(0.5, 0.5),
#' NULL),
#' mean = trtMean, var = trtVarP)
#'
#' asCategorical(x = pheno(pop),
#' threshold = list(c(-Inf, 0, Inf),
#' c(-Inf, -2, -1, 0, 1, 2, Inf) * sqrt(trtVarP[2])))
#' q = c(-2, -1, 0, 1, 2)
#' p = pnorm(q)
#' p = c(p[1], p[2]-p[1], p[3]-p[2], p[4]-p[3], p[5]-p[4], 1-p[5])
#' asCategorical(x = pheno(pop),
#' p = list(c(0.5, 0.5),
#' p),
#' mean = trtMean, var = trtVarP)
#' @export
asCategorical = function(x, p = NULL, mean = 0, var = 1,
threshold = c(-Inf, 0, Inf),
include.lowest = TRUE, right = FALSE) {
if (!is.matrix(x)) {
x = as.matrix(x)
}
nTraits = ncol(x)
if (!is.null(p)) {
if (is.numeric(p)) {
if (nTraits > 1) {
stop("When x contains more than one column, you must supply a list of probabilities! See examples.")
}
p = list(p)
}
if (length(p) != nTraits) {
stop("You must supply probabilities for all traits in x !")
}
if (length(mean) != nTraits) {
stop("You must supply means for all traits in x !")
}
if (length(var) != nTraits) {
stop("You must supply variances for all traits in x !")
}
threshold = p
for (trt in 1:nTraits) {
if (!is.null(p[[trt]])) {
pSum = sum(p[[trt]])
if (!(pSum == 1)) { # TODO: how can we do floating point aware comparison?
warning("Probabilities do not sum to 1 - creating one more category!")
p[[trt]] = c(p[[trt]], 1 - pSum)
}
tmp = qnorm(p = cumsum(p[[trt]]), mean = mean[trt], sd = sqrt(var[trt]))
if (!(-Inf %in% tmp)) {
tmp = c(-Inf, tmp)
}
if (!(Inf %in% tmp)) {
tmp = c(tmp, Inf)
}
threshold[[trt]] = tmp
}
}
}
if (is.numeric(threshold)) {
if (nTraits > 1) {
stop("When x contains more than one column, you must supply a list of thresholds! See examples.")
}
threshold = list(threshold)
}
if (length(threshold) != nTraits) {
stop("You must supply thresholds for all traits in x !")
}
for (trt in 1:nTraits) {
if (!is.null(threshold[[trt]])) {
x[, trt] = as.numeric(cut(x = x[, trt], breaks = threshold[[trt]],
include.lowest = include.lowest, right = right))
}
}
return(x)
}

#' @title Convert a normal (Gaussian) trait to a count (Poisson) trait
#' @param x matrix, values for one or more traits (if not a matrix,
#' we cast to a matrix)
#' @param TODO numeric or list, when numeric, provide a vector of TODO
#' @return matrix of values with some traits recoded as counts
#' @details If input trait is normal (Gaussian) then this function generates a
#' count trait according to the Poisson generalised linear model.
#' @examples
#' founderPop = quickHaplo(nInd=20, nChr=1, segSites=10)
#' SP = SimParam$new(founderPop)
#' \dontshow{SP$nThreads = 1L}
#' SP$addTraitA(nQtlPerChr = 10, mean = c(0, 0), var = c(1, 2),
#' corA = matrix(data = c(1.0, 0.6,
#' 0.6, 1.0), ncol = 2))
#' pop = newPop(founderPop)
#' pop = setPheno(pop, varE = c(1, 1))
#' pheno(pop)
#' #Convert a single input trait
#' asCount(x = pheno(pop)[, 2])
#' asCount(x = pheno(pop)[, 2], TODO = c(-1, 0, 1))
#' asCount(x = pheno(pop)[, 2], TODO = c(-Inf, -1, 0, 1, Inf))
#' #Convert multiple input traits
#' try(asCount(x = pheno(pop)))
#' asCount(x = pheno(pop),
#' TODO = list(NULL,
#' ???))
#' TODO export
# asCount = function(x, TODO = 10) {
# if (!is.matrix(x)) {
# x = as.matrix(x)
# }
# nTraits = ncol(x)
# if (is.numeric(TODO)) {
# if (nTraits > 1) {
# stop("When x contains more than one column, you must supply a list of TODO! See examples.")
# }
# TODO = list(TODO)
# }
# for (trt in 1:nTraits) {
# if (!is.null(TODO[[trt]])) {
# TODO: need to think what to do with an intercept and lambda
# x[, trt] = round(exp(x[, trt]))
# }
# }
# return(x)
# }
44 changes: 44 additions & 0 deletions tests/testthat/test-phenotypes.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
context("phenotypes")

test_that("asCategorical_converts_correctly",{
cont = matrix(data = 0, nrow = 7, ncol = 3)
cont[, 1] = c(-3, -2, -1, 0, 1, 2, 3)
cont[, 2] = c(-3, -2, -1, 0, 1, 2, 3)
cont[, 3] = c(-3, -2, -1, 0, 1, 2, 3)

expect_equal(asCategorical(x = cont[, 1]),
matrix(c(1, 1, 1, 2, 2, 2, 2)))
expect_equal(asCategorical(x = cont[, 1], threshold = c(-1, 0, 1)),
matrix(c(NA, NA, 1, 2, 2, NA, NA)))
expect_equal(asCategorical(x = cont[, 1], threshold = c(-Inf, -1, 0, 1, Inf)),
matrix(c(1, 1, 2, 3, 4, 4, 4)))

expect_warning(asCategorical(x = cont[, 1], p = 0.5))
expect_equal(suppressWarnings(asCategorical(x = cont[, 1], p = 0.5)),
asCategorical(x = cont[, 1], p = c(0.5, 0.5)))

trtMean = apply(X = cont, MARGIN = 2, FUN = mean)
trtVar = apply(X = cont, MARGIN = 2, FUN = var)
expect_equal(asCategorical(x = cont[, 1], p = c(0.5, 0.5), var = trtVar[1]),
matrix(c(1, 1, 1, 2, 2, 2, 2)))
expect_equal(asCategorical(x = cont[, 1], p = c(2/7, 1/7, 1/7, 3/7), var = trtVar[1]),
matrix(c(1, 1, 2, 3, 4, 4, 4)))

expect_error(asCategorical(x = cont))
cont2 = asCategorical(x = cont,
threshold = list(NULL,
c(-Inf, 0, Inf),
NULL))
cont2Exp = cont
cont2Exp[, 2] = c(1, 1, 1, 2, 2, 2, 2)
expect_equal(cont2, cont2Exp)

expect_error(asCategorical(x = cont, p = c(0.5, 0.5)))
pList = list(NULL, c(0.5, 0.5), NULL)
expect_error(asCategorical(x = cont, p = pList))
expect_error(asCategorical(x = cont, p = pList, mean = trtMean))
cont2 = asCategorical(x = cont, p = pList, mean = trtMean, var=trtVar)
cont2Exp = cont
cont2Exp[, 2] = c(1, 1, 1, 2, 2, 2, 2)
expect_equal(cont2, cont2Exp)
})