gaynorr · gregorgorjanc · Dec 7, 2024 · Dec 12, 2024 · Dec 22, 2024 · Dec 22, 2024
diff --git a/NAMESPACE b/NAMESPACE
@@ -51,6 +51,7 @@ export(makeDH)
 export(meanEBV)
 export(meanG)
 export(meanP)
+export(mendelianSampling)
 export(mergeGenome)
 export(mergePops)
 export(mutate)
@@ -59,6 +60,7 @@ export(newEmptyPop)
 export(newMapPop)
 export(newMultiPop)
 export(newPop)
+export(parentAverage)
 export(pedigreeCross)
 export(pheno)
 export(popVar)
@@ -138,6 +140,7 @@ importFrom(methods,classLabel)
 importFrom(methods,is)
 importFrom(methods,new)
 importFrom(methods,show)
+importFrom(methods,slot)
 importFrom(methods,validObject)
 importFrom(stats,aggregate)
 importFrom(stats,coef)

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,9 @@
+# AlphaSimR 1.6.2
+
+*added `parentAverage` and `mendelianSampling` functions
+
+*added `asCategorical` to convert a normal (Gaussian) trait to an ordered categorical (threshold) trait
+
 # AlphaSimR 1.6.1
 
 *fixed bug in `mergePops` and `[` (subset) methods - they were failing for populations that had a misc slot with a matrix - we now check if a misc slot element is a matrix and rbind them for `mergePops` and subset rows for `[` (assuming the first dimension represents individuals)

diff --git a/R/AlphaSimR.R b/R/AlphaSimR.R
@@ -1,6 +1,6 @@
 #' @useDynLib AlphaSimR, .registration = TRUE
 #' @import Rcpp
-#' @importFrom methods new validObject is .hasSlot 
+#' @importFrom methods new validObject is .hasSlot slot
 #' @importFrom methods show classLabel
 #' @importFrom stats aggregate rnorm qnorm var
 #' @importFrom stats coef dnorm lm pnorm qgamma na.omit
@@ -10,21 +10,21 @@
 #' @importFrom Rdpack reprompt
 
 #' @description
-#' The successor to the 'AlphaSim' software for breeding program 
-#' simulation [Faux et al. (2016) <doi:10.3835/plantgenome2016.02.0013>]. 
-#' Used for stochastic simulations of breeding programs to the level of DNA 
-#' sequence for every individual. Contained is a wide range of functions for 
-#' modeling common tasks in a breeding program, such as selection and crossing. 
-#' These functions allow for constructing simulations of highly complex plant and 
-#' animal breeding programs via scripting in the R software environment. Such 
-#' simulations can be used to evaluate overall breeding program performance and 
-#' conduct research into breeding program design, such as implementation of 
-#' genomic selection. Included is the 'Markovian Coalescent Simulator' ('MaCS') 
-#' for fast simulation of biallelic sequences according to a population 
+#' The successor to the 'AlphaSim' software for breeding program
+#' simulation [Faux et al. (2016) <doi:10.3835/plantgenome2016.02.0013>].
+#' Used for stochastic simulations of breeding programs to the level of DNA
+#' sequence for every individual. Contained is a wide range of functions for
+#' modeling common tasks in a breeding program, such as selection and crossing.
+#' These functions allow for constructing simulations of highly complex plant and
+#' animal breeding programs via scripting in the R software environment. Such
+#' simulations can be used to evaluate overall breeding program performance and
+#' conduct research into breeding program design, such as implementation of
+#' genomic selection. Included is the 'Markovian Coalescent Simulator' ('MaCS')
+#' for fast simulation of biallelic sequences according to a population
 #' demographic history [Chen et al. (2009) <doi:10.1101/gr.083634.108>].
-#' 
-#' Please see the introductory vignette for instructions for using this package. 
-#' The vignette can be viewed using the following command: 
+#'
+#' Please see the introductory vignette for instructions for using this package.
+#' The vignette can be viewed using the following command:
 #' \code{vignette("intro",package="AlphaSimR")}
 #' @keywords internal
-"_PACKAGE"
+"_PACKAGE"
diff --git a/R/phenotypes.R b/R/phenotypes.R
@@ -261,4 +261,189 @@ setPheno = function(pop, h2=NULL, H2=NULL, varE=NULL, corE=NULL,
   return(pop)
 }
 
+#' @title Convert a normal (Gaussian) trait to an ordered categorical (threshold)
+#'   trait
+#' @param x matrix, values for one or more traits (if not a matrix,
+#'   we cast to a matrix)
+#' @param p NULL, numeric or list, when \code{NULL} the \code{threshold} argument
+#'   takes precedence; when numeric, provide a vector of probabilities of
+#'   categories to convert continuous values into categories for a single trait
+#'   (if probabilities do not sum to 1, another category is added and a warning
+#'   is raised); when list, provide a list of numeric probabilities - list node
+#'   with \code{NULL} will skip conversion for a specific trait (see examples);
+#'   internally \code{p} is converted to \code{threshold} hence input
+#'   \code{threshold} is overwritten
+#' @param mean numeric, assumed mean(s) of the normal (Gaussian) trait(s);
+#'   used only when \code{p} is given
+#' @param var numeric, assumed variance(s) of the normal (Gaussian) trait(s);
+#'   used only when \code{p} is given
+#' @param threshold NULL, numeric or list, when numeric, provide a vector of
+#'   threshold values to convert continuous values into categories for a single trait
+#'   (the thresholds specify left-closed and right-opened intervals [t1, t2),
+#'   which can be changed with \code{include.lowest} and \code{right};
+#'   ensure you add \code{-Inf} and \code{Inf} or min and max to cover the whole
+#'   range of values; otherwise you will get \code{NA} values);
+#'   when list, provide a list of numeric thresholds - list node with \code{NULL}
+#'   will skip conversion for a specific trait (see examples)
+#' @param include.lowest logical, see \code{\link{cut}}
+#' @param right logical, see \code{\link{cut}}
+#' @details If input trait is normal (Gaussian) then this function generates a
+#'   categorical trait according to the ordered probit model.
+#' @return matrix of values with some traits recorded as ordered categories
+#'  in the form of 1:nC with nC being the number of categories.
+#' @examples
+#' founderPop = quickHaplo(nInd=10, nChr=1, segSites=10)
+#' SP = SimParam$new(founderPop)
+#' \dontshow{SP$nThreads = 1L}
+#' trtMean = c(0, 0)
+#' trtVarG = c(1, 2)
+#' SP$addTraitA(nQtlPerChr = 10, mean = trtMean, var = trtVarG,
+#'              corA = matrix(data = c(1.0, 0.6,
+#'                                     0.6, 1.0), ncol = 2))
+#' pop = newPop(founderPop)
+#' trtVarE = c(1, 1)
+#' trtVarP = trtVarG + trtVarE
+#' pop = setPheno(pop, varE = trtVarE)
+#' pheno(pop)
+#'
+#' #Convert a single input trait
+#' asCategorical(x = pheno(pop)[, 1])
+#'
+#' #Demonstrate threshold argument (in units of pheno SD)
+#' asCategorical(x = pheno(pop)[, 1], threshold = c(-1, 0, 1) * sqrt(trtVarP[1]))
+#' asCategorical(x = pheno(pop)[, 1], threshold = c(-Inf, -1, 0, 1, Inf) * sqrt(trtVarP[1]))
+#' asCategorical(x = pheno(pop)[, 1], threshold = c(-Inf, 0, Inf))
+#'
+#' #Demonstrate p argument
+#' asCategorical(x = pheno(pop)[, 1], p = 0.5, var = trtVarP[1])
+#' asCategorical(x = pheno(pop)[, 1], p = c(0.5, 0.5), var = trtVarP[1])
+#' asCategorical(x = pheno(pop)[, 1], p = c(0.25, 0.5, 0.25), var = trtVarP[1])
+#'
+#' #Convert multiple input traits (via threshold or p argument)
+#' try(asCategorical(x = pheno(pop)))
+#' asCategorical(x = pheno(pop),
+#'               threshold = list(c(-Inf, 0, Inf),
+#'                                NULL))
+#' try(asCategorical(x = pheno(pop), p = c(0.5, 0.5)))
+#' asCategorical(x = pheno(pop),
+#'               p = list(c(0.5, 0.5),
+#'                        NULL),
+#'               mean = trtMean, var = trtVarP)
+#'
+#' asCategorical(x = pheno(pop),
+#'               threshold = list(c(-Inf, 0, Inf),
+#'                                c(-Inf, -2, -1, 0, 1, 2, Inf) * sqrt(trtVarP[2])))
+#' q = c(-2, -1, 0, 1, 2)
+#' p = pnorm(q)
+#' p = c(p[1], p[2]-p[1], p[3]-p[2], p[4]-p[3], p[5]-p[4], 1-p[5])
+#' asCategorical(x = pheno(pop),
+#'               p = list(c(0.5, 0.5),
+#'                        p),
+#'               mean = trtMean, var = trtVarP)
+#' @export
+asCategorical = function(x, p = NULL, mean = 0, var = 1,
+                         threshold = c(-Inf, 0, Inf),
+                         include.lowest = TRUE, right = FALSE) {
+  if (!is.matrix(x)) {
+    x = as.matrix(x)
+  }
+  nTraits = ncol(x)
+  if (!is.null(p)) {
+    if (is.numeric(p)) {
+      if (nTraits > 1) {
+        stop("When x contains more than one column, you must supply a list of probabilities! See examples.")
+      }
+      p = list(p)
+    }
+    if (length(p) != nTraits) {
+      stop("You must supply probabilities for all traits in x !")
+    }
+    if (length(mean) != nTraits) {
+      stop("You must supply means for all traits in x !")
+    }
+    if (length(var) != nTraits) {
+      stop("You must supply variances for all traits in x !")
+    }
+    threshold = p
+    for (trt in 1:nTraits) {
+      if (!is.null(p[[trt]])) {
+        pSum = sum(p[[trt]])
+        if (!(pSum == 1)) { # TODO: how can we do floating point aware comparison?
+          warning("Probabilities do not sum to 1 - creating one more category!")
+          p[[trt]] = c(p[[trt]], 1 - pSum)
+        }
+        tmp = qnorm(p = cumsum(p[[trt]]), mean = mean[trt], sd = sqrt(var[trt]))
+        if (!(-Inf %in% tmp)) {
+          tmp = c(-Inf, tmp)
+        }
+        if (!(Inf %in% tmp)) {
+          tmp = c(tmp, Inf)
+        }
+        threshold[[trt]] = tmp
+      }
+    }
+  }
+  if (is.numeric(threshold)) {
+    if (nTraits > 1) {
+      stop("When x contains more than one column, you must supply a list of thresholds! See examples.")
+    }
+    threshold = list(threshold)
+  }
+  if (length(threshold) != nTraits) {
+    stop("You must supply thresholds for all traits in x !")
+  }
+  for (trt in 1:nTraits) {
+    if (!is.null(threshold[[trt]])) {
+      x[, trt] = as.numeric(cut(x = x[, trt], breaks = threshold[[trt]],
+                                include.lowest = include.lowest, right = right))
+    }
+  }
+  return(x)
+}
 
+#' @title Convert a normal (Gaussian) trait to a count (Poisson) trait
+#' @param x matrix, values for one or more traits (if not a matrix,
+#'   we cast to a matrix)
+#' @param TODO numeric or list, when numeric, provide a vector of TODO
+#' @return matrix of values with some traits recoded as counts
+#' @details If input trait is normal (Gaussian) then this function generates a
+#'   count trait according to the Poisson generalised linear model.
+#' @examples
+#' founderPop = quickHaplo(nInd=20, nChr=1, segSites=10)
+#' SP = SimParam$new(founderPop)
+#' \dontshow{SP$nThreads = 1L}
+#' SP$addTraitA(nQtlPerChr = 10, mean = c(0, 0), var = c(1, 2),
+#'              corA = matrix(data = c(1.0, 0.6,
+#'                                     0.6, 1.0), ncol = 2))
+#' pop = newPop(founderPop)
+#' pop = setPheno(pop, varE = c(1, 1))
+#' pheno(pop)
+#' #Convert a single input trait
+#' asCount(x = pheno(pop)[, 2])
+#' asCount(x = pheno(pop)[, 2], TODO = c(-1, 0, 1))
+#' asCount(x = pheno(pop)[, 2], TODO = c(-Inf, -1, 0, 1, Inf))
+#' #Convert multiple input traits
+#' try(asCount(x = pheno(pop)))
+#' asCount(x = pheno(pop),
+#'           TODO = list(NULL,
+#'                       ???))
+#' TODO export
+# asCount = function(x, TODO = 10) {
+#   if (!is.matrix(x)) {
+#     x = as.matrix(x)
+#   }
+#   nTraits = ncol(x)
+#   if (is.numeric(TODO)) {
+#     if (nTraits > 1) {
+#       stop("When x contains more than one column, you must supply a list of TODO! See examples.")
+#     }
+#     TODO = list(TODO)
+#   }
+#   for (trt in 1:nTraits) {
+#     if (!is.null(TODO[[trt]])) {
+# TODO: need to think what to do with an intercept and lambda
+#       x[, trt] = round(exp(x[, trt]))
+#     }
+#   }
+#   return(x)
+# }
diff --git a/R/popSummary.R b/R/popSummary.R
@@ -223,7 +223,7 @@ genParam = function(pop,simParam=NULL){
   if(is.null(simParam)){
     simParam = get("SP",envir=.GlobalEnv)
   }
-  
+
   nInd = nInd(pop)
   nTraits = simParam$nTraits
   traitNames = simParam$traitNames
@@ -680,6 +680,104 @@ ebv = function(pop){
   pop@ebv
 }
 
+#' @title Calculate parent average
+#'
+#' @param pop \code{\link{Pop-class}} with individuals whose parent average
+#'   will be calculated
+#' @param mothers \code{\link{Pop-class}} with mothers of individuals in \code{pop}
+#' @param fathers \code{\link{Pop-class}} with fathers of individuals in \code{pop}
+#' @param use character, calculate using \code{"\link{gv}"}, \code{"\link{bv}"},
+#'   \code{"\link{ebv}"}, or \code{"\link{pheno}"}
+#' @param simParam \code{\link{SimParam}} object
+#'
+#' @return a matrix of parent averages with dimensions nInd by nTraits
+#'
+#' @examples
+#' #Create founder haplotypes
+#' founderPop = quickHaplo(nInd=10, nChr=1, segSites=10)
+#'
+#' #Set simulation parameters
+#' SP = SimParam$new(founderPop)
+#' SP$addTraitAD(10, meanDD=0.5)
+#' SP$setVarE(h2=0.5)
+#' \dontshow{SP$nThreads = 1L}
+#'
+#' #Create population
+#' pop = newPop(founderPop, simParam=SP)
+#' pop2 = randCross(pop, nCrosses=10, nProgeny=2)
+#' parentAverage(pop2, mothers = pop, fathers = pop)
+#'
+#' @export
+# TODO: Should we call this `parentAverage()`, `pa()`, or something else?
+# TODO: Should we pass in `mothers` and `fathers` as arguments or `parents`?
+#       Using `parents` is convenient for monoecious species (we pass in just one pop).
+#       Using `mothers` and `fathers` is convenient for dioecious species (we would not have to combine pops).
+parentAverage = function(pop, mothers, fathers, use = "gv", simParam = NULL) {
+  if (is.null(simParam)) {
+    simParam = get("SP", envir = .GlobalEnv)
+  }
+  matchMothers = match(x = pop@mother, table = mothers@id)
+  matchFathers = match(x = pop@father, table = fathers@id)
+  if (use %in% c("gv", "ebv", "pheno")) {
+    tmp = 0.5 * (slot(object = mothers, name = use)[matchMothers, , drop = FALSE] +
+                 slot(object = fathers, name = use)[matchFathers, , drop = FALSE])
+  } else if (use == "bv") {
+    tmp = 0.5 * (bv(mothers, simParam = simParam)[matchMothers, , drop = FALSE] +
+                 bv(fathers, simParam = simParam)[matchFathers, , drop = FALSE])
+  } else {
+    stop("use must be one of 'gv', 'bv', 'ebv', or 'pheno'!")
+  }
+  return(tmp)
+}
+
+#' @title Calculate Mendelian sampling
+#'
+#' @param pop \code{\link{Pop-class}} with individuals whose parent average
+#'   will be calculated
+#' @param mothers \code{\link{Pop-class}} with mothers of individuals in \code{pop}
+#' @param fathers \code{\link{Pop-class}} with fathers of individuals in \code{pop}
+#' @param use character, calculate using \code{"\link{gv}"}, \code{"\link{bv}"},
+#'   \code{"\link{ebv}"}, or \code{"\link{pheno}"}
+#' @param simParam \code{\link{SimParam}} object
+#'
+#' @return a matrix of Mendelian samplings with dimensions nInd by nTraits
+#'
+#' @examples
+#' #Create founder haplotypes
+#' founderPop = quickHaplo(nInd=10, nChr=1, segSites=10)
+#'
+#' #Set simulation parameters
+#' SP = SimParam$new(founderPop)
+#' SP$addTraitAD(10, meanDD=0.5)
+#' SP$setVarE(h2=0.5)
+#' \dontshow{SP$nThreads = 1L}
+#'
+#' #Create population
+#' pop = newPop(founderPop, simParam=SP)
+#' pop2 = randCross(pop, nCrosses=10, nProgeny=2)
+#' mendelianSampling(pop2, mothers = pop, fathers = pop)
+#'
+#' @export
+# TODO: Should we call this `mendelianSampling()`, `ms()`, or something else?
+# TODO: Should we pass in `mothers` and `fathers` as arguments or `parents`?
+#       Using `parents` is convenient for monoecious species (we pass in just one pop).
+#       Using `mothers` and `fathers` is convenient for dioecious species (we would not have to combine pops).
+mendelianSampling = function(pop, mothers, fathers, use = "gv", simParam = NULL) {
+  if (is.null(simParam)) {
+    simParam = get("SP", envir = .GlobalEnv)
+  }
+  pa = parentAverage(pop = pop, mothers = mothers, fathers = fathers, use = use,
+                     simParam = simParam)
+  if (use %in% c("gv", "ebv", "pheno")) {
+    tmp = slot(object = pop, name = use) - pa
+  } else if (use == "bv") {
+    tmp = bv(pop, simParam = simParam) - pa
+  } else {
+    stop("use must be one of 'gv', 'bv', 'ebv', or 'pheno'!")
+  }
+  return(tmp)
+}
+
 #' @title Number of individuals
 #'
 #' @description A wrapper for accessing the nInd slot