Merge pull request #211 from gregorgorjanc/devel

Added asCategorical()
gaynorr · Dec 25, 2024 · f030da2 · f030da2
2 parents fd654bd + 555e3b6
commit f030da2
Show file tree

Hide file tree

Showing 3 changed files with 233 additions and 0 deletions.
diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,7 @@
+# AlphaSimR 1.6.1.9990
+
+*added `asCategorical` to convert a normal (Gaussian) trait to an ordered categorical (threshold) trait
+
 # AlphaSimR 1.6.1
 
 *fixed bug in `mergePops` and `[` (subset) methods - they were failing for populations that had a misc slot with a matrix - we now check if a misc slot element is a matrix and rbind them for `mergePops` and subset rows for `[` (assuming the first dimension represents individuals)

diff --git a/R/phenotypes.R b/R/phenotypes.R
@@ -261,4 +261,189 @@ setPheno = function(pop, h2=NULL, H2=NULL, varE=NULL, corE=NULL,
   return(pop)
 }
 
+#' @title Convert a normal (Gaussian) trait to an ordered categorical (threshold)
+#'   trait
+#' @param x matrix, values for one or more traits (if not a matrix,
+#'   we cast to a matrix)
+#' @param p NULL, numeric or list, when \code{NULL} the \code{threshold} argument
+#'   takes precedence; when numeric, provide a vector of probabilities of
+#'   categories to convert continuous values into categories for a single trait
+#'   (if probabilities do not sum to 1, another category is added and a warning
+#'   is raised); when list, provide a list of numeric probabilities - list node
+#'   with \code{NULL} will skip conversion for a specific trait (see examples);
+#'   internally \code{p} is converted to \code{threshold} hence input
+#'   \code{threshold} is overwritten
+#' @param mean numeric, assumed mean(s) of the normal (Gaussian) trait(s);
+#'   used only when \code{p} is given
+#' @param var numeric, assumed variance(s) of the normal (Gaussian) trait(s);
+#'   used only when \code{p} is given
+#' @param threshold NULL, numeric or list, when numeric, provide a vector of
+#'   threshold values to convert continuous values into categories for a single trait
+#'   (the thresholds specify left-closed and right-opened intervals [t1, t2),
+#'   which can be changed with \code{include.lowest} and \code{right};
+#'   ensure you add \code{-Inf} and \code{Inf} or min and max to cover the whole
+#'   range of values; otherwise you will get \code{NA} values);
+#'   when list, provide a list of numeric thresholds - list node with \code{NULL}
+#'   will skip conversion for a specific trait (see examples)
+#' @param include.lowest logical, see \code{\link{cut}}
+#' @param right logical, see \code{\link{cut}}
+#' @details If input trait is normal (Gaussian) then this function generates a
+#'   categorical trait according to the ordered probit model.
+#' @return matrix of values with some traits recorded as ordered categories
+#'  in the form of 1:nC with nC being the number of categories.
+#' @examples
+#' founderPop = quickHaplo(nInd=10, nChr=1, segSites=10)
+#' SP = SimParam$new(founderPop)
+#' \dontshow{SP$nThreads = 1L}
+#' trtMean = c(0, 0)
+#' trtVarG = c(1, 2)
+#' SP$addTraitA(nQtlPerChr = 10, mean = trtMean, var = trtVarG,
+#'              corA = matrix(data = c(1.0, 0.6,
+#'                                     0.6, 1.0), ncol = 2))
+#' pop = newPop(founderPop)
+#' trtVarE = c(1, 1)
+#' trtVarP = trtVarG + trtVarE
+#' pop = setPheno(pop, varE = trtVarE)
+#' pheno(pop)
+#'
+#' #Convert a single input trait
+#' asCategorical(x = pheno(pop)[, 1])
+#'
+#' #Demonstrate threshold argument (in units of pheno SD)
+#' asCategorical(x = pheno(pop)[, 1], threshold = c(-1, 0, 1) * sqrt(trtVarP[1]))
+#' asCategorical(x = pheno(pop)[, 1], threshold = c(-Inf, -1, 0, 1, Inf) * sqrt(trtVarP[1]))
+#' asCategorical(x = pheno(pop)[, 1], threshold = c(-Inf, 0, Inf))
+#'
+#' #Demonstrate p argument
+#' asCategorical(x = pheno(pop)[, 1], p = 0.5, var = trtVarP[1])
+#' asCategorical(x = pheno(pop)[, 1], p = c(0.5, 0.5), var = trtVarP[1])
+#' asCategorical(x = pheno(pop)[, 1], p = c(0.25, 0.5, 0.25), var = trtVarP[1])
+#'
+#' #Convert multiple input traits (via threshold or p argument)
+#' try(asCategorical(x = pheno(pop)))
+#' asCategorical(x = pheno(pop),
+#'               threshold = list(c(-Inf, 0, Inf),
+#'                                NULL))
+#' try(asCategorical(x = pheno(pop), p = c(0.5, 0.5)))
+#' asCategorical(x = pheno(pop),
+#'               p = list(c(0.5, 0.5),
+#'                        NULL),
+#'               mean = trtMean, var = trtVarP)
+#'
+#' asCategorical(x = pheno(pop),
+#'               threshold = list(c(-Inf, 0, Inf),
+#'                                c(-Inf, -2, -1, 0, 1, 2, Inf) * sqrt(trtVarP[2])))
+#' q = c(-2, -1, 0, 1, 2)
+#' p = pnorm(q)
+#' p = c(p[1], p[2]-p[1], p[3]-p[2], p[4]-p[3], p[5]-p[4], 1-p[5])
+#' asCategorical(x = pheno(pop),
+#'               p = list(c(0.5, 0.5),
+#'                        p),
+#'               mean = trtMean, var = trtVarP)
+#' @export
+asCategorical = function(x, p = NULL, mean = 0, var = 1,
+                         threshold = c(-Inf, 0, Inf),
+                         include.lowest = TRUE, right = FALSE) {
+  if (!is.matrix(x)) {
+    x = as.matrix(x)
+  }
+  nTraits = ncol(x)
+  if (!is.null(p)) {
+    if (is.numeric(p)) {
+      if (nTraits > 1) {
+        stop("When x contains more than one column, you must supply a list of probabilities! See examples.")
+      }
+      p = list(p)
+    }
+    if (length(p) != nTraits) {
+      stop("You must supply probabilities for all traits in x !")
+    }
+    if (length(mean) != nTraits) {
+      stop("You must supply means for all traits in x !")
+    }
+    if (length(var) != nTraits) {
+      stop("You must supply variances for all traits in x !")
+    }
+    threshold = p
+    for (trt in 1:nTraits) {
+      if (!is.null(p[[trt]])) {
+        pSum = sum(p[[trt]])
+        if (!(pSum == 1)) { # TODO: how can we do floating point aware comparison?
+          warning("Probabilities do not sum to 1 - creating one more category!")
+          p[[trt]] = c(p[[trt]], 1 - pSum)
+        }
+        tmp = qnorm(p = cumsum(p[[trt]]), mean = mean[trt], sd = sqrt(var[trt]))
+        if (!(-Inf %in% tmp)) {
+          tmp = c(-Inf, tmp)
+        }
+        if (!(Inf %in% tmp)) {
+          tmp = c(tmp, Inf)
+        }
+        threshold[[trt]] = tmp
+      }
+    }
+  }
+  if (is.numeric(threshold)) {
+    if (nTraits > 1) {
+      stop("When x contains more than one column, you must supply a list of thresholds! See examples.")
+    }
+    threshold = list(threshold)
+  }
+  if (length(threshold) != nTraits) {
+    stop("You must supply thresholds for all traits in x !")
+  }
+  for (trt in 1:nTraits) {
+    if (!is.null(threshold[[trt]])) {
+      x[, trt] = as.numeric(cut(x = x[, trt], breaks = threshold[[trt]],
+                                include.lowest = include.lowest, right = right))
+    }
+  }
+  return(x)
+}
 
+#' @title Convert a normal (Gaussian) trait to a count (Poisson) trait
+#' @param x matrix, values for one or more traits (if not a matrix,
+#'   we cast to a matrix)
+#' @param TODO numeric or list, when numeric, provide a vector of TODO
+#' @return matrix of values with some traits recoded as counts
+#' @details If input trait is normal (Gaussian) then this function generates a
+#'   count trait according to the Poisson generalised linear model.
+#' @examples
+#' founderPop = quickHaplo(nInd=20, nChr=1, segSites=10)
+#' SP = SimParam$new(founderPop)
+#' \dontshow{SP$nThreads = 1L}
+#' SP$addTraitA(nQtlPerChr = 10, mean = c(0, 0), var = c(1, 2),
+#'              corA = matrix(data = c(1.0, 0.6,
+#'                                     0.6, 1.0), ncol = 2))
+#' pop = newPop(founderPop)
+#' pop = setPheno(pop, varE = c(1, 1))
+#' pheno(pop)
+#' #Convert a single input trait
+#' asCount(x = pheno(pop)[, 2])
+#' asCount(x = pheno(pop)[, 2], TODO = c(-1, 0, 1))
+#' asCount(x = pheno(pop)[, 2], TODO = c(-Inf, -1, 0, 1, Inf))
+#' #Convert multiple input traits
+#' try(asCount(x = pheno(pop)))
+#' asCount(x = pheno(pop),
+#'           TODO = list(NULL,
+#'                       ???))
+#' TODO export
+# asCount = function(x, TODO = 10) {
+#   if (!is.matrix(x)) {
+#     x = as.matrix(x)
+#   }
+#   nTraits = ncol(x)
+#   if (is.numeric(TODO)) {
+#     if (nTraits > 1) {
+#       stop("When x contains more than one column, you must supply a list of TODO! See examples.")
+#     }
+#     TODO = list(TODO)
+#   }
+#   for (trt in 1:nTraits) {
+#     if (!is.null(TODO[[trt]])) {
+# TODO: need to think what to do with an intercept and lambda
+#       x[, trt] = round(exp(x[, trt]))
+#     }
+#   }
+#   return(x)
+# }
diff --git a/tests/testthat/test-phenotypes.R b/tests/testthat/test-phenotypes.R
@@ -0,0 +1,44 @@
+context("phenotypes")
+
+test_that("asCategorical_converts_correctly",{
+  cont = matrix(data = 0, nrow = 7, ncol = 3)
+  cont[, 1] = c(-3, -2, -1, 0, 1, 2, 3)
+  cont[, 2] = c(-3, -2, -1, 0, 1, 2, 3)
+  cont[, 3] = c(-3, -2, -1, 0, 1, 2, 3)
+
+  expect_equal(asCategorical(x = cont[, 1]),
+               matrix(c(1, 1, 1, 2, 2, 2, 2)))
+  expect_equal(asCategorical(x = cont[, 1], threshold = c(-1, 0, 1)),
+               matrix(c(NA, NA, 1, 2, 2, NA, NA)))
+  expect_equal(asCategorical(x = cont[, 1], threshold = c(-Inf, -1, 0, 1, Inf)),
+               matrix(c(1, 1, 2, 3, 4, 4, 4)))
+
+  expect_warning(asCategorical(x = cont[, 1], p = 0.5))
+  expect_equal(suppressWarnings(asCategorical(x = cont[, 1], p = 0.5)),
+               asCategorical(x = cont[, 1], p = c(0.5, 0.5)))
+
+  trtMean = apply(X = cont, MARGIN = 2, FUN = mean)
+  trtVar = apply(X = cont, MARGIN = 2, FUN = var)
+  expect_equal(asCategorical(x = cont[, 1], p = c(0.5, 0.5), var = trtVar[1]),
+               matrix(c(1, 1, 1, 2, 2, 2, 2)))
+  expect_equal(asCategorical(x = cont[, 1], p = c(2/7, 1/7, 1/7, 3/7), var = trtVar[1]),
+               matrix(c(1, 1, 2, 3, 4, 4, 4)))
+
+  expect_error(asCategorical(x = cont))
+  cont2 = asCategorical(x = cont,
+                        threshold = list(NULL,
+                                         c(-Inf, 0, Inf),
+                                         NULL))
+  cont2Exp = cont
+  cont2Exp[, 2] = c(1, 1, 1, 2, 2, 2, 2)
+  expect_equal(cont2, cont2Exp)
+
+  expect_error(asCategorical(x = cont, p = c(0.5, 0.5)))
+  pList = list(NULL, c(0.5, 0.5), NULL)
+  expect_error(asCategorical(x = cont, p = pList))
+  expect_error(asCategorical(x = cont, p = pList, mean = trtMean))
+  cont2 = asCategorical(x = cont, p = pList, mean = trtMean, var=trtVar)
+  cont2Exp = cont
+  cont2Exp[, 2] = c(1, 1, 1, 2, 2, 2, 2)
+  expect_equal(cont2, cont2Exp)
+})