Skip to content

Commit

Permalink
add pruneSnpsLd() + minor fix in writeSegregJoinMap()
Browse files Browse the repository at this point in the history
  • Loading branch information
Timothée Flutre committed Nov 9, 2017
1 parent ea02519 commit 99ad914
Show file tree
Hide file tree
Showing 6 changed files with 143 additions and 9 deletions.
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: rutilstimflutre
Title: Timothee Flutre's personal R code
Version: 0.150.0
Version: 0.151.0
Authors@R: c(
person("Timothee", "Flutre", email="[email protected]", role=c("aut", "ctb", "cre")),
person("Peter", "Carbonetto", role="aut", comment="function from varbvs example"),
Expand Down Expand Up @@ -59,6 +59,7 @@ Suggests:
rstan,
S4Vectors,
scrm,
SNPRelate,
sp,
sparseMVN,
stats,
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@ export(plotVcfPercNa)
export(plotWithScale)
export(precMatAR1)
export(prettyPrintSummary)
export(pruneSnpsLd)
export(pve2beta)
export(qqplotPval)
export(qtlrelPerChr)
Expand Down
87 changes: 83 additions & 4 deletions R/quantgen.R
Original file line number Diff line number Diff line change
Expand Up @@ -1283,11 +1283,11 @@ readSegregJoinMap <- function(file, na.string="--", verbose=1){
##' Write genotype data in the \href{https://www.kyazma.nl/index.php/JoinMap/}{JoinMap} format into a "loc" file used by this software.
##' @param pop.name name of the population
##' @param pop.type type of the population
##' @param locus vector of locus names (should be shorter or equal to 20 characters, otherwise a warning will be issued)
##' @param segregs vector of segregation types
##' @param locus vector of locus names; should be shorter or equal to 20 characters, otherwise a warning will be issued; should be in the same order as the rows of the "genos" argument
##' @param segregs vector of segregation types; should be in the same order as the rows of the "genos" argument
##' @param genos data frame containing genotypes encoded in the JoinMap format, similar to the output from \code{\link{genoClasses2JoinMap}}
##' @param phases vector of phase types (optional)
##' @param classifs vector of classification types (optional)
##' @param phases vector of phase types (optional; should be in the same order as the rows of the "genos" argument)
##' @param classifs vector of classification types (optional; should be in the same order as the rows of the "genos" argument)
##' @param file name of the file in which the data will be saved (will be compressed if ends with ".gz" and "gzip" is available in the PATH)
##' @param save.ind.names if TRUE, individual names will be saved at the end of the file
##' @param na.string character to replace NA's in "genos"
Expand Down Expand Up @@ -1372,6 +1372,7 @@ writeSegregJoinMap <- function(pop.name, pop.type="CP",
tmp <- cbind(tmp, phases)
if(! is.null(classifs))
tmp <- cbind(tmp, classifs)
rownames(tmp) <- NULL
genos[is.na(genos)] <- na.string
tmp <- cbind(tmp, genos)
suppressWarnings(utils::write.table(x=tmp, file=con, append=TRUE,
Expand Down Expand Up @@ -4924,6 +4925,7 @@ distConsecutiveSnps <- function(snp.coords, only.chr=NULL, nb.cores=1){
##' @param only.chr identifier of a given chromosome
##' @return vector of SNP identifiers
##' @author Timothee Flutre
##' @seealso \code{\link{pruneSnpsLd}}
##' @export
thinSnps <- function(method, threshold, snp.coords, only.chr=NULL){
requireNamespaces("GenomicRanges")
Expand Down Expand Up @@ -4961,6 +4963,83 @@ thinSnps <- function(method, threshold, snp.coords, only.chr=NULL){
return(out.snp.ids)
}

##' Prune SNPs based on LD
##'
##' Prune SNPs based on their pairwise linkage disequilibriums via sliding windows using the "snpgdsLDpruning" function from the "SNPRelate" package..
##' @param X matrix of bi-allelic SNP genotypes encoded, for each SNP, in number of copies of its second allele, i.e. as allele doses in {0,1,2}, with genotypes in rows and SNPs in columns; the "second" allele is arbitrary, it can correspond to the minor (least frequent) or the major (most frequent) allele; will be transformed into a "gds" object (see next argument)
##' @param snp.coords data frame which row names are SNP identifiers, the first column should contain chromosomes as integers, and the second column should contain positions; compulsory if the X argument is specified
##' @param gds object of class "SNPGDSFileClass" from the SNPRelate package
##' @param ld.threshold the LD threshold below which SNPs are discarded; will be passed to "snpgdsLDpruning"
##' @param remove.monosnp will be passed to "snpgdsLDpruning"
##' @param maf will be passed to "snpgdsLDpruning"
##' @param missing.rate will be passed to "snpgdsLDpruning"
##' @param method will be passed to "snpgdsLDpruning"
##' @param slide.max.bp will be passed to "snpgdsLDpruning"
##' @param slide.max.n will be passed to "snpgdsLDpruning"
##' @param seed seed for the pseudo-random number generator
##' @param nb.cores will be passed to "snpgdsLDpruning"
##' @param verbose verbosity level (0/1)
##' @return vector of SNP identifiers
##' @author Timothee Flutre
##' @seealso \code{\link{thinSnps}}, \code{\link[SNPRelate]{snpgdsLDpruning}}
##' @export
pruneSnpsLd <- function(X=NULL, snp.coords=NULL, gds=NULL,
ld.threshold=0.2,
remove.monosnp=TRUE, maf=NaN, missing.rate=NaN,
method=c("composite", "r", "dprime", "corr"),
slide.max.bp=500000, slide.max.n=NA,
seed=NULL, nb.cores=1, verbose=1){
requireNamespace("SNPRelate")
stopifnot(xor(is.null(X), is.null(gds)))
if(is.null(gds)){
stopIfNotValidGenosDose(X=X, check.noNA=FALSE, check.notImputed=TRUE)
stopifnot(! is.null(snp.coords),
is.data.frame(snp.coords),
! is.null(rownames(snp.coords)),
all(colnames(X) %in% rownames(snp.coords)),
ncol(snp.coords) >= 2)
}

gds.file <- NULL
if(is.null(gds)){
gds.file <- tempfile()
snp.coords <- snp.coords[colnames(X),] # re-order the rows
SNPRelate::snpgdsCreateGeno(gds.fn=gds.file,
genmat=X,
sample.id=rownames(X),
snp.id=colnames(X),
snp.chromosome=snp.coords[,1],
snp.position=snp.coords[,2],
snpfirstdim=FALSE)
gds <- SNPRelate::snpgdsOpen(gds.file)
}

if(verbose > 1)
SNPRelate::snpgdsSummary(gds)

if(! is.null(seed))
set.seed(seed)
out.snp.ids <-
SNPRelate::snpgdsLDpruning(gdsobj=gds,
remove.monosnp=remove.monosnp,
maf=maf,
missing.rate=missing.rate,
method=method,
slide.max.bp=slide.max.bp,
slide.max.n=slide.max.n,
ld.threshold=ld.threshold,
num.thread=nb.cores,
verbose=ifelse(verbose > 0, TRUE, FALSE))
out.snp.ids <- unlist(out.snp.ids)

if(! is.null(gds.file)){
SNPRelate::snpgdsClose(gds)
file.remove(gds.file)
}

return(out.snp.ids)
}

##' Genotype imputation
##'
##' Impute missing SNP genotypes with the mean of the non-missing.
Expand Down
50 changes: 50 additions & 0 deletions man/pruneSnpsLd.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions man/thinSnps.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 4 additions & 4 deletions man/writeSegregJoinMap.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 99ad914

Please sign in to comment.