Skip to content

Commit

Permalink
Address #1018
Browse files Browse the repository at this point in the history
  • Loading branch information
PoisonAlien committed May 5, 2024
1 parent 1fe2fd3 commit af128e4
Show file tree
Hide file tree
Showing 11 changed files with 240 additions and 196 deletions.
1 change: 0 additions & 1 deletion R/ClinicalEnrichment.R
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ clinicalEnrichment = function(maf, clinicalFeature = NULL, annotationDat = NULL,
}
}


colnames(cd)[2] = 'cf'
cd$cf = as.character(cd$cf)
cf.tbl = table(cd$cf)
Expand Down
240 changes: 120 additions & 120 deletions R/estimate_binconf.R
Original file line number Diff line number Diff line change
@@ -1,120 +1,120 @@
#X = number of successes
#n = number of trials
#alpha = pvalues threshold
#Details: https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval#Agresti-Coull_interval

estimate_binconf = function(X, n, alpha = 0.05){

z = 1-(alpha/2)
zsqrd = z^2

nt = n + zsqrd
#pr = X/n
pr = (1/nt) * (X + (zsqrd/2))

pr_ci_up = pr + z*(sqrt((pr/nt) * (1-pr)))
pr_ci_low = pr - z*(sqrt((pr/nt) * (1-pr)))

data.table::data.table(ci_up = pr_ci_up, ci_low = pr_ci_low)
}


#From https://raw.githubusercontent.com/harrelfe/Hmisc/master/R/binconf.s
binconf <- function(x, n, alpha = 0.05,
method = c("wilson","exact","asymptotic","all"),
include.x = FALSE, include.n = FALSE,
return.df = FALSE){
## ..modifications for printing and the addition of a
## method argument and the asymptotic interval
## and to accept vector arguments were
## made by Brad Biggerstaff on 10 June 1999

method <- match.arg(method)
bc <- function(x, n, alpha, method)
{
nu1 <- 2 * (n - x + 1)
nu2 <- 2 * x
ll <- if(x > 0)
x/(x + qf(1 - alpha/2, nu1, nu2) * (n - x + 1))
else
0

nu1p <- nu2 + 2
nu2p <- nu1 - 2
pp <- if(x < n)
qf(1 - alpha/2, nu1p, nu2p)
else
1

ul <- ((x + 1) * pp)/(n - x + (x + 1) * pp)
zcrit <- - qnorm(alpha/2)
z2 <- zcrit * zcrit
p <- x/n
cl <- (p + z2/2/n + c(-1, 1) * zcrit *
sqrt((p * (1 - p) + z2/4/n)/n))/(1 + z2/n)

if(x == 1)
cl[1] <- - log(1 - alpha)/n

if(x == (n - 1))
cl[2] <- 1 + log(1 - alpha)/n

asymp.lcl <- x/n - qnorm(1 - alpha/2) *
sqrt(((x/n) * (1 - x/n))/n)

asymp.ucl <- x/n + qnorm(1 - alpha/2) * sqrt(((x/n) * (1 - x/n)
)/n)
res <- rbind(c(ll, ul), cl, c(asymp.lcl, asymp.ucl))
res <- cbind(rep(x/n, 3), res)

##dimnames(res) <- list(c("Exact", "Wilson", "Asymptotic"), c(
## "Point Estimate", "Lower", "Upper"))
switch(method,
wilson = res[2, ],
exact = res[1, ],
asymptotic = res[3, ],
all = res,
res)
}

if((length(x) != length(n)) & length(x) == 1)
x <- rep(x, length(n))
if((length(x) != length(n)) & length(n) == 1)
n <- rep(n, length(x))
if((length(x) > 1 | length(n) > 1) & method == "all") {
method <- "wilson"
warning("method=all will not work with vectors...setting method to wilson")
}
if(method == "all" & length(x) == 1 & length(n) == 1) {
mat <- bc(x, n, alpha, method)
dimnames(mat) <- list(c("Exact", "Wilson", "Asymptotic"),
c("PointEst", "Lower", "Upper"))
if(include.n)
mat <- cbind(N = n, mat)

if(include.x)
mat <- cbind(X = x, mat)

if(return.df)
mat <- as.data.frame(mat)

return(mat)
}

mat <- matrix(ncol = 3, nrow = length(x))
for(i in 1:length(x))
mat[i, ] <- bc(x[i], n[i], alpha = alpha, method = method)

dimnames(mat) <- list(rep("", dim(mat)[1]),
c("PointEst", "Lower", "Upper"))
if(include.n)
mat <- cbind(N = n, mat)

if(include.x)
mat <- cbind(X = x, mat)

if(return.df)
mat <- as.data.frame(mat, row.names=NULL)

mat
}
#X = number of successes
#n = number of trials
#alpha = pvalues threshold
#Details: https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval#Agresti-Coull_interval

estimate_binconf = function(X, n, alpha = 0.05){

z = 1-(alpha/2)
zsqrd = z^2

nt = n + zsqrd
#pr = X/n
pr = (1/nt) * (X + (zsqrd/2))

pr_ci_up = pr + z*(sqrt((pr/nt) * (1-pr)))
pr_ci_low = pr - z*(sqrt((pr/nt) * (1-pr)))

data.table::data.table(ci_up = pr_ci_up, ci_low = pr_ci_low)
}


#From https://raw.githubusercontent.com/harrelfe/Hmisc/master/R/binconf.s
binconf <- function(x, n, alpha = 0.05,
method = c("wilson","exact","asymptotic","all"),
include.x = FALSE, include.n = FALSE,
return.df = FALSE){
## ..modifications for printing and the addition of a
## method argument and the asymptotic interval
## and to accept vector arguments were
## made by Brad Biggerstaff on 10 June 1999

method <- match.arg(method)
bc <- function(x, n, alpha, method)
{
nu1 <- 2 * (n - x + 1)
nu2 <- 2 * x
ll <- if(x > 0)
x/(x + qf(1 - alpha/2, nu1, nu2) * (n - x + 1))
else
0

nu1p <- nu2 + 2
nu2p <- nu1 - 2
pp <- if(x < n)
qf(1 - alpha/2, nu1p, nu2p)
else
1

ul <- ((x + 1) * pp)/(n - x + (x + 1) * pp)
zcrit <- - qnorm(alpha/2)
z2 <- zcrit * zcrit
p <- x/n
cl <- (p + z2/2/n + c(-1, 1) * zcrit *
sqrt((p * (1 - p) + z2/4/n)/n))/(1 + z2/n)

if(x == 1)
cl[1] <- - log(1 - alpha)/n

if(x == (n - 1))
cl[2] <- 1 + log(1 - alpha)/n

asymp.lcl <- x/n - qnorm(1 - alpha/2) *
sqrt(((x/n) * (1 - x/n))/n)

asymp.ucl <- x/n + qnorm(1 - alpha/2) * sqrt(((x/n) * (1 - x/n)
)/n)
res <- rbind(c(ll, ul), cl, c(asymp.lcl, asymp.ucl))
res <- cbind(rep(x/n, 3), res)

##dimnames(res) <- list(c("Exact", "Wilson", "Asymptotic"), c(
## "Point Estimate", "Lower", "Upper"))
switch(method,
wilson = res[2, ],
exact = res[1, ],
asymptotic = res[3, ],
all = res,
res)
}

if((length(x) != length(n)) & length(x) == 1)
x <- rep(x, length(n))
if((length(x) != length(n)) & length(n) == 1)
n <- rep(n, length(x))
if((length(x) > 1 | length(n) > 1) & method == "all") {
method <- "wilson"
warning("method=all will not work with vectors...setting method to wilson")
}
if(method == "all" & length(x) == 1 & length(n) == 1) {
mat <- bc(x, n, alpha, method)
dimnames(mat) <- list(c("Exact", "Wilson", "Asymptotic"),
c("PointEst", "Lower", "Upper"))
if(include.n)
mat <- cbind(N = n, mat)

if(include.x)
mat <- cbind(X = x, mat)

if(return.df)
mat <- as.data.frame(mat)

return(mat)
}

mat <- matrix(ncol = 3, nrow = length(x))
for(i in 1:length(x))
mat[i, ] <- bc(x[i], n[i], alpha = alpha, method = method)

dimnames(mat) <- list(rep("", dim(mat)[1]),
c("PointEst", "Lower", "Upper"))
if(include.n)
mat <- cbind(N = n, mat)

if(include.x)
mat <- cbind(X = x, mat)

if(return.df)
mat <- as.data.frame(mat, row.names=NULL)

mat
}
16 changes: 9 additions & 7 deletions R/plotClinicalEnrichment.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,13 @@
#' @param geneFontSize cex for gene font size. Default 0.8
#' @param legendFontSize cex for legend font size. Default 0.8
#' @param showTitle Default TRUE
#' @param ylims Default c(-1, 1)
#' @return returns nothing.
#' @export
#' @seealso \code{\link{clinicalEnrichment}} \code{\link{signatureEnrichment}}
#'
plotEnrichmentResults = function(enrich_res, pVal = 0.05, ORthr = 1, featureLvls = NULL, cols = NULL, annoFontSize = 0.8, geneFontSize = 0.8, legendFontSize = 0.8, showTitle = TRUE){
plotEnrichmentResults = function(enrich_res, pVal = 0.05, ORthr = 1, featureLvls = NULL, cols = NULL,
annoFontSize = 0.8, geneFontSize = 0.8, legendFontSize = 0.8, showTitle = TRUE, ylims = c(-1, 1)){

res = enrich_res$groupwise_comparision

Expand Down Expand Up @@ -87,12 +89,12 @@ plotEnrichmentResults = function(enrich_res, pVal = 0.05, ORthr = 1, featureLvls

#return(plot.dat)

par(bty="n", mgp = c(0.5,0.5,0), las=1, tcl=-.25, font.main=4, xpd=TRUE, mar = c(4,3,3.5,1)) #
par(bty="n", mgp = c(0.5,0.5,0), las=1, tcl=-.25, font.main=4, xpd=TRUE, mar = c(6,3,3.5,1)) #
#plot(c(0, nrow(plot.dat)+15),c(0,0),xlim=c(0.5,33),las=2, ylim=c(-1,1),xlab="", xaxt="n", type="l")
b = barplot(height = plot.dat$g1_muts_fract, ylim = c(-1.25, yl_max), axes = FALSE, border = 0.1, col = bar.cols)
b = barplot(height = plot.dat$g1_muts_fract, ylim = ylims, axes = FALSE, border = 0.1, col = bar.cols)

#text(b, plot.dat$g1_muts_fract+0.03 , plot.dat$g1_title ,cex = annoFontSize, las = 2, srt=90, adj=0, xpd=TRUE, font = 1)
axis(side = 2, at = seq(-1, 1, 0.25), labels = c(rev(seq(0, 1, 0.25)), seq(0, 1, 0.25)[2:5]),
axis(side = 2, at = pretty(ylims), labels = abs(pretty(ylims)),
lwd = 1.2, font.axis = 2, cex = 1.5, font = 1)

for(i in 1:nrow(conf_int_g1)){
Expand All @@ -110,7 +112,7 @@ plotEnrichmentResults = function(enrich_res, pVal = 0.05, ORthr = 1, featureLvls
}
text(b, -conf_int_g2$Upper-0.03 , plot.dat$g2_title ,cex = annoFontSize, las = 2, srt=90, adj=1, xpd=TRUE, font = 1)

text(b, -0.75 , plot.dat$Hugo_Symbol ,cex = geneFontSize, las = 2, srt = 90, adj = 1, xpd = TRUE, font = 3)
text(b, min(ylims) , plot.dat$Hugo_Symbol ,cex = geneFontSize, las = 2, srt = 45, adj = 1, xpd = TRUE, font = 3)

# b = as.data.frame(b)
# b$Group = plot.dat$Group1
Expand All @@ -126,9 +128,9 @@ plotEnrichmentResults = function(enrich_res, pVal = 0.05, ORthr = 1, featureLvls
}else{
n_col = (length(legend.cols) %/% 4)+1
}
legend(x = 0, y = -1.1, pt.lwd = 2, ncol = n_col,
add_legend("bottomleft", pt.lwd = 2, ncol = n_col,
legend = c(names(legend.cols), "Rest"), fill = c(legend.cols, "gray70"),
bty = "n", cex = legendFontSize, border=NA, xpd = TRUE, text.font = 3)
bty = "n", cex = legendFontSize, border = NA, xpd = TRUE, text.font = 3)
if(showTitle){
title(main = enrich_res$clinicalFeature, adj = 0, cex.main = 1, outer = FALSE, font.main = 1)
}
Expand Down
35 changes: 27 additions & 8 deletions R/subsetMaf.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#' @param dropLevels Default TRUE.
#' @param restrictTo restrict subset operations to these. Can be 'all', 'cnv', or 'mutations'. Default 'all'. If 'cnv' or 'mutations', subset operations will only be applied on copy-number or mutation data respectively, while retaining other parts as is.
#' @param keepNA Keep NAs while sub-setting for ranges. Default `FALSE` - removes rows with missing loci prior to overlapping. Set to TRUE to keep them as is.
#' @param verbose Default TRUE
#' @return subset table or an object of class \code{\link{MAF-class}}
#' @seealso \code{\link{getFields}}
#' @examples
Expand All @@ -33,7 +34,9 @@
#'
#' @export

subsetMaf = function(maf, tsb = NULL, genes = NULL, query = NULL, clinQuery = NULL, ranges = NULL, keepNA = FALSE, mult = "first", fields = NULL, mafObj = TRUE, includeSyn = TRUE, isTCGA = FALSE, dropLevels = TRUE, restrictTo = 'all'){
subsetMaf = function(maf, tsb = NULL, genes = NULL, query = NULL, clinQuery = NULL, ranges = NULL,
keepNA = FALSE, mult = "first", fields = NULL, mafObj = TRUE, includeSyn = TRUE, isTCGA = FALSE,
dropLevels = TRUE, restrictTo = 'all', verbose = TRUE){

if(all(c(is.null(tsb), is.null(genes), is.null(query), is.null(ranges), is.null(clinQuery)))){
stop("Please provide sample names or genes or a query or ranges to subset by.")
Expand All @@ -52,17 +55,25 @@ subsetMaf = function(maf, tsb = NULL, genes = NULL, query = NULL, clinQuery = NU
if(!is.null(tsb)){
warning("sample names provided to tsb argument will be over written by clinical query", immediate. = TRUE)
}
message("-subsetting by clinical data..")

if(verbose){
message("-subsetting by clinical data..")
}

maf.anno = maf.anno[eval(parse(text = clinQuery))]

tsb = unique(as.character(maf.anno[,Tumor_Sample_Barcode]))
if(length(tsb) > 0){
message(paste0('--', length(tsb)), " samples meet the clinical query")
if(verbose){
message(paste0('--', length(tsb)), " samples meet the clinical query")
}
}else{
if(all(c(is.null(query), is.null(genes)))){
stop("--None of the samples meet the clinical query", call. = FALSE)
}else{
message("--None of the samples meet the clinical query")
if(verbose){
message("--None of the samples meet the clinical query")
}
maf.anno <- data.table::copy(x = maf@clinical.data)
}
tsb = NULL
Expand Down Expand Up @@ -113,7 +124,9 @@ subsetMaf = function(maf, tsb = NULL, genes = NULL, query = NULL, clinQuery = NU
default.fields = unique(c(default.fields, fields))

if(length(default.fields[!default.fields %in% colnames(maf.dat)]) > 0){
message("Missing fields. Ignoring them.. ")
if(verbose){
message("Missing fields. Ignoring them.. ")
}
print(default.fields[!default.fields %in% colnames(maf.dat)])
default.fields = default.fields[default.fields %in% colnames(maf.dat)]
}
Expand Down Expand Up @@ -160,17 +173,23 @@ subsetMaf = function(maf, tsb = NULL, genes = NULL, query = NULL, clinQuery = NU

maf.dat = data.table::foverlaps(x = maf.dat, y = ranges, type = "within", nomatch = NULL, verbose = FALSE, mult = mult)
maf.silent = data.table::foverlaps(x = maf.silent, y = ranges, type = "within", nomatch = NULL, verbose = FALSE, mult = mult)
message(paste0(nrow(maf.dat)+nrow(maf.silent), " variants within provided ranges"))
if(verbose){
message(paste0(nrow(maf.dat)+nrow(maf.silent), " variants within provided ranges"))
}

if(keepNA){
maf.dat = data.table::rbindlist(l = list(maf.dat, na_pos), use.names = TRUE, fill = TRUE)
maf.silent = data.table::rbindlist(l = list(maf.silent, na_pos_silent), use.names = TRUE, fill = TRUE)
if(nrow(na_pos)+nrow(na_pos_silent) > 0){
warning("Added back ", nrow(na_pos)+nrow(na_pos_silent), " rows with no loci info.")
if(verbose){
warning("Added back ", nrow(na_pos)+nrow(na_pos_silent), " rows with no loci info.")
}
}
}else{
if(nrow(na_pos)+nrow(na_pos_silent) > 0){
warning("Removed ", nrow(na_pos)+nrow(na_pos_silent), " rows with no loci info.")
if(verbose){
warning("Removed ", nrow(na_pos)+nrow(na_pos_silent), " rows with no loci info.")
}
}
}
}
Expand Down
Loading

0 comments on commit af128e4

Please sign in to comment.