From 884a5cec69d90afa7879699eeb99cdf12d65d53b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timoth=C3=A9e=20Flutre?= Date: Fri, 24 Nov 2017 09:49:00 +0000 Subject: [PATCH] add support for weights in GEMMA --- DESCRIPTION | 2 +- R/GEMMA.R | 45 +++++++++++++++++++++++++++++++++++------- R/quantgen.R | 3 ++- man/gemma.Rd | 8 +++++--- man/gemmaUlmmPerChr.Rd | 6 ++++-- 5 files changed, 50 insertions(+), 14 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 57fb7a1..ae16c55 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: rutilstimflutre Title: Timothee Flutre's personal R code -Version: 0.151.1 +Version: 0.152.0 Authors@R: c( person("Timothee", "Flutre", email="timothee.flutre@inra.fr", role=c("aut", "ctb", "cre")), person("Peter", "Carbonetto", role="aut", comment="function from varbvs example"), diff --git a/R/GEMMA.R b/R/GEMMA.R index 046d419..e983e61 100644 --- a/R/GEMMA.R +++ b/R/GEMMA.R @@ -61,6 +61,7 @@ genoDoses2bimbam <- function(X=NULL, tX=NULL, alleles, file=NULL, format="mean") ##' @param maf SNPs with minor allele frequency strictly below this threshold will be discarded ##' @param K.c kinship matrix; if NULL, will be estimated using X via \code{\link{estimGenRel}} with \code{relationships="additive"} and \code{method="zhou"} ##' @param W matrix of covariates with genotypes in rows (names as row names), a first column of 1 and a second column of covariates values +##' @param weights vector of positive weights with genotype names ##' @param out.dir directory in which the output files will be saved ##' @param task.id identifier of the task (used in temporary and output file names) ##' @param verbose verbosity level (0/1) @@ -141,11 +142,13 @@ genoDoses2bimbam <- function(X=NULL, tX=NULL, alleles, file=NULL, format="mean") ##' called.nulls=pvadj.DD$pv.bh > 0.05)) ##' } ##' @export -gemma <- function(model="ulmm", y, X, snp.coords, alleles=NULL, maf=0.01, K.c=NULL, - W, out.dir=getwd(), task.id="gemma", verbose=1, clean="none", +gemma <- function(model="ulmm", y, X, snp.coords, alleles=NULL, + maf=0.01, K.c=NULL, W, weights=NULL, + out.dir=getwd(), task.id="gemma", verbose=1, clean="none", seed=1859, burnin=1000, nb.iters=7000, thin=10){ - stopifnot(file.exists(Sys.which("gemma")), - model %in% c("ulmm", "bslmm")) + stopifnot(file.exists(Sys.which("gemma"))) + stopifnot(system("gemma > /dev/null") == 0) + stopifnot(model %in% c("ulmm", "bslmm")) if(is.matrix(y)){ stopifnot(ncol(y) == 1, ! is.null(rownames(y))) @@ -174,6 +177,13 @@ gemma <- function(model="ulmm", y, X, snp.coords, alleles=NULL, maf=0.01, K.c=NU ! is.null(rownames(alleles)), ncol(alleles) == 2, all(colnames(X) %in% rownames(alleles))) + if(! is.null(weights)) + stopifnot(is.vector(weights), + is.numeric(weights), + ! is.null(names(weights)), + length(weights) == length(y), + all(names(weights) == names(y)), + all(weights[! is.na(weights)] >= 0)) output <- list() @@ -241,6 +251,13 @@ gemma <- function(model="ulmm", y, X, snp.coords, alleles=NULL, maf=0.01, K.c=NU utils::write.table(x=y, file=gzfile(tmp.files["phenos"]), quote=FALSE, sep="\t", row.names=FALSE, col.names=FALSE) + if(! is.null(weights)){ + tmp.files <- c(tmp.files, + weights=paste0(out.dir, "/weights_", task.id, ".txt.gz")) + utils::write.table(x=weights, + file=gzfile(tmp.files["weights"]), + quote=FALSE, sep="\t", row.names=FALSE, col.names=FALSE) + } ## prepare cmd-line and execute it cmd <- paste0("cd ", out.dir, @@ -262,6 +279,9 @@ gemma <- function(model="ulmm", y, X, snp.coords, alleles=NULL, maf=0.01, K.c=NU " -rpace ", thin, " -seed ", seed) } + if(! is.null(weights)) + cmd <- paste0(cmd, + " -widv ", tmp.files["weights"]) if(verbose <= 0){ tmp.files <- c(tmp.files, stdoe=paste0(out.dir, "/stdouterr_gemma_", task.id, ".txt")) @@ -329,6 +349,7 @@ gemma <- function(model="ulmm", y, X, snp.coords, alleles=NULL, maf=0.01, K.c=NU ##' @param maf SNPs with minor allele frequency strictly below this threshold will be discarded ##' @param chr.ids set of chromosome identifiers to analyze (optional, all by default) ##' @param W matrix of covariates with genotypes in rows (names as row names), a first column of 1 and a second column of covariates values +##' @param weights vector of positive weights with genotype names ##' @param out.dir directory in which the data files will be found ##' @param task.id identifier of the task (used in output file names) ##' @param clean remove files: none, some (temporary only), all (temporary and results) @@ -337,10 +358,12 @@ gemma <- function(model="ulmm", y, X, snp.coords, alleles=NULL, maf=0.01, K.c=NU ##' @author Timothee Flutre [aut,cre], Dalel Ahmed [ctb] ##' @seealso \code{link{gemma}}, \code{\link{plotHistPval}}, \code{\link{qqplotPval}} ##' @export -gemmaUlmmPerChr <- function(y, X, snp.coords, alleles=NULL, maf=0.01, - chr.ids=NULL, W, out.dir, task.id="gemma", clean="none", +gemmaUlmmPerChr <- function(y, X, snp.coords, alleles=NULL, + maf=0.01, chr.ids=NULL, W, weights=NULL, + out.dir, task.id="gemma", clean="none", verbose=1){ stopifnot(file.exists(Sys.which("gemma"))) + stopifnot(system("gemma > /dev/null") == 0) if(is.matrix(y)){ stopifnot(ncol(y) == 1, ! is.null(rownames(y))) @@ -369,6 +392,13 @@ gemmaUlmmPerChr <- function(y, X, snp.coords, alleles=NULL, maf=0.01, ! is.null(rownames(alleles)), ncol(alleles) == 2, all(colnames(X) %in% rownames(alleles))) + if(! is.null(weights)) + stopifnot(is.vector(weights), + is.numeric(weights), + ! is.null(names(weights)), + length(weights) == length(y), + all(names(weights) == names(y)), + all(weights[! is.na(weights)] >= 0)) out <- list() @@ -418,7 +448,8 @@ gemmaUlmmPerChr <- function(y, X, snp.coords, alleles=NULL, maf=0.01, X=X[, subset.snp.ids], snp.coords=snp.coords[subset.snp.ids,], alleles=alleles[subset.snp.ids,], - maf=maf, K.c=K.c, W=W, out.dir=out.dir, + maf=maf, K.c=K.c, W=W, weights=weights, + out.dir=out.dir, task.id=paste0(task.id, "-", chr.id), verbose=verbose-1, clean=clean) out[[chr.id]] <- out.chr.id$tests diff --git a/R/quantgen.R b/R/quantgen.R index f52975f..365f549 100644 --- a/R/quantgen.R +++ b/R/quantgen.R @@ -8080,7 +8080,8 @@ boxplotCandidateQtl <- function(y, X, snp, simplify.imputed=TRUE, counts <- table(x) if(verbose > 0){ - msg <- paste0("genotypic classes:") + msg <- paste0("marker '", snp, "'", + "\ngenotypic classes:") for(i in seq_along(counts)) msg <- paste0(msg, " ", names(counts)[i], "=", counts[i]) msg <- paste0(msg, " (total=", sum(counts), ")") diff --git a/man/gemma.Rd b/man/gemma.Rd index 3096675..2d86aca 100644 --- a/man/gemma.Rd +++ b/man/gemma.Rd @@ -5,9 +5,9 @@ \title{GEMMA} \usage{ gemma(model = "ulmm", y, X, snp.coords, alleles = NULL, maf = 0.01, - K.c = NULL, W, out.dir = getwd(), task.id = "gemma", verbose = 1, - clean = "none", seed = 1859, burnin = 1000, nb.iters = 7000, - thin = 10) + K.c = NULL, W, weights = NULL, out.dir = getwd(), task.id = "gemma", + verbose = 1, clean = "none", seed = 1859, burnin = 1000, + nb.iters = 7000, thin = 10) } \arguments{ \item{model}{name of the model to fit (ulmm/bslmm)} @@ -26,6 +26,8 @@ gemma(model = "ulmm", y, X, snp.coords, alleles = NULL, maf = 0.01, \item{W}{matrix of covariates with genotypes in rows (names as row names), a first column of 1 and a second column of covariates values} +\item{weights}{vector of positive weights with genotype names} + \item{out.dir}{directory in which the output files will be saved} \item{task.id}{identifier of the task (used in temporary and output file names)} diff --git a/man/gemmaUlmmPerChr.Rd b/man/gemmaUlmmPerChr.Rd index 8100169..af12a92 100644 --- a/man/gemmaUlmmPerChr.Rd +++ b/man/gemmaUlmmPerChr.Rd @@ -5,8 +5,8 @@ \title{GEMMA uLMM per chromosome} \usage{ gemmaUlmmPerChr(y, X, snp.coords, alleles = NULL, maf = 0.01, - chr.ids = NULL, W, out.dir, task.id = "gemma", clean = "none", - verbose = 1) + chr.ids = NULL, W, weights = NULL, out.dir, task.id = "gemma", + clean = "none", verbose = 1) } \arguments{ \item{y}{vector of phenotypes with genotype names} @@ -23,6 +23,8 @@ gemmaUlmmPerChr(y, X, snp.coords, alleles = NULL, maf = 0.01, \item{W}{matrix of covariates with genotypes in rows (names as row names), a first column of 1 and a second column of covariates values} +\item{weights}{vector of positive weights with genotype names} + \item{out.dir}{directory in which the data files will be found} \item{task.id}{identifier of the task (used in output file names)}