From c3db71d65763b59d55ee28dcbafd1fc7b775db99 Mon Sep 17 00:00:00 2001 From: Anand Mayakonda Date: Wed, 17 Jan 2024 13:07:18 +0100 Subject: [PATCH] address #997 --- DESCRIPTION | 4 ++-- R/oncoplot.R | 41 ++++++++++++++++++++++++++++------------- R/summarizeMaf.R | 19 ++++++++++--------- inst/NEWS.md | 12 +++++++++++- 4 files changed, 51 insertions(+), 25 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 76e6150..95ef0c3 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: maftools Title: Summarize, Analyze and Visualize MAF Files -Version: 2.17.0 +Version: 2.18.1 Date: 2021-04-30 Authors@R: person(given = "Anand", @@ -57,5 +57,5 @@ Encoding: UTF-8 LazyData: TRUE NeedsCompilation: no Packaged: 2016-04-08 02:06:05 UTC; anand -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.0 SystemRequirements: GNU make diff --git a/R/oncoplot.R b/R/oncoplot.R index 927cd3b..0f850be 100644 --- a/R/oncoplot.R +++ b/R/oncoplot.R @@ -68,7 +68,7 @@ #' @param cohortSize Number of sequenced samples in the cohort. Default all samples from Cohort. You can manually specify the cohort size. Default \code{NULL} #' @param colors named vector of colors for each Variant_Classification. #' @param cBioPortal Adds annotations similar to cBioPortals MutationMapper and collapse Variants into Truncating and rest. -#' @param bgCol Background grid color for wild-type (not-mutated) samples. Default gray - "#CCCCCC" +#' @param bgCol Background grid color for wild-type (not-mutated) samples. Default "#ecf0f1" #' @param borderCol border grid color (not-mutated) samples. Default 'white'. #' @param annoBorderCol border grid color for annotations. Default NA. #' @param numericAnnoCol color palette used for numeric annotations. Default 'YlOrBr' from RColorBrewer @@ -85,7 +85,10 @@ #' @param showTitle Default TRUE #' @param titleText Custom title. Default `NULL` #' @param showPct Default TRUE. Shows percent altered to the right side of the plot. -#' @return None. +#' @returns Invisibly returns a list with components +#' 1. `oncomatrix` A matrix used for drawing the oncoplot. Values are numeric coded for each variant classification +#' 2. `vc_legend` A mapping of variant classification to numeric values in the oncomatrix +#' 3. `vc_color` Color coding used for each variant classification #' @examples #' laml.maf <- system.file("extdata", "tcga_laml.maf.gz", package = "maftools") #' laml.clin = system.file('extdata', 'tcga_laml_annot.tsv', package = 'maftools') @@ -116,7 +119,7 @@ oncoplot = oncoplot = function(maf, top = 20, minMut = NULL, genes = NULL, alter sortByMutation = FALSE, keepGeneOrder = FALSE, GeneOrderSort = TRUE, sampleOrder = NULL, additionalFeature = NULL, additionalFeaturePch = 20, additionalFeatureCol = "gray70", additionalFeatureCex = 0.9, genesToIgnore = NULL, removeNonMutated = FALSE, fill = TRUE, cohortSize = NULL, - colors = NULL, cBioPortal = FALSE, bgCol = "#CCCCCC", borderCol = 'white', annoBorderCol = NA, numericAnnoCol = NULL, + colors = NULL, cBioPortal = FALSE, bgCol = "#ecf0f1", borderCol = 'white', annoBorderCol = NA, numericAnnoCol = NULL, drawBox = FALSE, fontSize = 0.8, SampleNamefontSize = 1, titleFontSize = 1.5, legendFontSize = 1.2, annotationFontSize = 1.2, sepwd_genes = 0.5, sepwd_samples = 0.25, writeMatrix = FALSE, colbar_pathway = FALSE, showTitle = TRUE, titleText = NULL, showPct = TRUE){ @@ -411,16 +414,26 @@ oncoplot = oncoplot = function(maf, top = 20, minMut = NULL, genes = NULL, alter x }) numMat = do.call(rbind, nm) - #Add pathway information to the character matrix mat_origin_path = rownames(numMat)[!rownames(numMat) %in% rownames(mat_origin)] + mat_origin_path = numMat[mat_origin_path,, drop = FALSE] mat_origin_path[mat_origin_path == 0] = "" mat_origin_path[mat_origin_path == "99"] = "pathway" mat_origin_path = mat_origin_path[,colnames(mat_origin), drop = FALSE] if(collapsePathway){ + #print(mat_origin_path) mat_origin = mat_origin_path numMat = numMat[rownames(mat_origin),, drop = FALSE] + row_ord = names(sort(apply(numMat, 1, function(x) length(x[x == 0])), decreasing = FALSE)) + numMat = numMat[row_ord,, drop = FALSE] + tnumMat = t(numMat) #transposematrix + numMat = t(tnumMat[do.call(order, c(as.list(as.data.frame(tnumMat)), decreasing = TRUE)), ]) #sort + + if(sortByAnnotation){ + numMat = sortByAnnotation(numMat = numMat, maf = maf, anno = annotation, annoOrder = annotationOrder, group = groupAnnotationBySize, isNumeric = FALSE) + } + #return(numMat) }else{ mat_origin = rbind(mat_origin, mat_origin_path) } @@ -1035,7 +1048,7 @@ oncoplot = oncoplot = function(maf, top = 20, minMut = NULL, genes = NULL, alter } #Add grids - abline(h = (0:(ncol(nm))) + 0.5, col = annoBorderCol, lwd = sepwd_genes) + #abline(h = (0:(ncol(nm))) + 0.5, col = annoBorderCol, lwd = sepwd_genes) abline(v = (0:(nrow(nm))) + 0.5, col = annoBorderCol, lwd = sepwd_samples) mtext(text = colnames(annotation), side = 4, font = 1, line = 0.4, cex = fontSize, las = 2, at = 1:ncol(annotation)) @@ -1214,13 +1227,14 @@ oncoplot = oncoplot = function(maf, top = 20, minMut = NULL, genes = NULL, alter } } - if(removeNonMutated){ - #mutSamples = length(unique(unlist(genesToBarcodes(maf = maf, genes = rownames(mat), justNames = TRUE)))) - altStat = paste0("Altered in ", ncol(numMat), " (", round(ncol(numMat)/totSamps, digits = 4)*100, "%) of ", totSamps, " samples.") - }else{ - mutSamples = length(unique(unlist(genesToBarcodes(maf = maf, genes = rownames(numMat), justNames = TRUE, verbose = FALSE)))) - altStat = paste0("Altered in ", mutSamples, " (", round(mutSamples/totSamps, digits = 4)*100, "%) of ", totSamps, " samples.") - } + n_mut_samps = length(which(colSums(numMat) != 0)) + altStat = paste0("Altered in ", n_mut_samps, " (", round(n_mut_samps/totSamps, digits = 4)*100, "%) of ", totSamps, " samples.") + # if(removeNonMutated){ + # #mutSamples = length(unique(unlist(genesToBarcodes(maf = maf, genes = rownames(mat), justNames = TRUE)))) + # }else{ + # mutSamples = length(unique(unlist(genesToBarcodes(maf = maf, genes = rownames(numMat), justNames = TRUE, verbose = FALSE)))) + # altStat = paste0("Altered in ", mutSamples, " (", round(mutSamples/totSamps, digits = 4)*100, "%) of ", totSamps, " samples.") + # } if(showTitle){ if(is.null(titleText)){ @@ -1229,5 +1243,6 @@ oncoplot = oncoplot = function(maf, top = 20, minMut = NULL, genes = NULL, alter title(main = titleText, outer = TRUE, line = -1, cex.main = titleFontSize) } } - return(invisible(rownames(nm))) + + return(invisible(list(oncomatrix = numMat, vc_legend = vc_codes, vc_color = leg_classes))) } diff --git a/R/summarizeMaf.R b/R/summarizeMaf.R index f58ab42..8b7de26 100644 --- a/R/summarizeMaf.R +++ b/R/summarizeMaf.R @@ -8,7 +8,7 @@ summarizeMaf = function(maf, anno = NULL, chatty = TRUE){ if('NCBI_Build' %in% colnames(maf)){ NCBI_Build = unique(maf[!Variant_Type %in% 'CNV', NCBI_Build]) NCBI_Build = NCBI_Build[!is.na(NCBI_Build)] - if (length(NCBI_Build)==0) NCBI_Build=NA + if (length(NCBI_Build)==0) NCBI_Build=NA if(chatty){ if(length(NCBI_Build) > 1){ @@ -191,14 +191,15 @@ summarizeMaf = function(maf, anno = NULL, chatty = TRUE){ #clean up annotation data colnames(sample.anno) = gsub(pattern = ' ', replacement = '_', x = colnames(sample.anno), fixed = TRUE) #replace spaces in column names for annotation data - if(nrow(sample.anno) == 1){ - temp_colnames = colnames(sample.anno) - sample.anno = as.data.frame(apply(sample.anno, 2, function(y) trimws(y))) #remove trailing whitespaces - sample.anno = data.frame(t(unlist(sample.anno, use.names = FALSE))) - colnames(sample.anno) = temp_colnames - }else{ - sample.anno = as.data.frame(apply(sample.anno, 2, function(y) trimws(y))) #remove trailing whitespaces - } + #Do not remove whitespaces - which coerces columns to character. Issue: #997 + # if(nrow(sample.anno) == 1){ + # temp_colnames = colnames(sample.anno) + # sample.anno = as.data.frame(apply(sample.anno, 2, function(y) trimws(y))) #remove trailing whitespaces + # sample.anno = data.frame(t(unlist(sample.anno, use.names = FALSE))) + # colnames(sample.anno) = temp_colnames + # }else{ + # sample.anno = as.data.frame(apply(sample.anno, 2, function(y) trimws(y))) #remove trailing whitespaces + # } sample.anno[sample.anno == ""] = NA #Replace blanks with NA #sample.anno = as.data.frame(apply(sample.anno, 2, function(y) gsub(pattern = " ", replacement = "_", x = y))) #replace spaces with _ diff --git a/inst/NEWS.md b/inst/NEWS.md index 5896f6f..05bd70e 100644 --- a/inst/NEWS.md +++ b/inst/NEWS.md @@ -1,6 +1,16 @@ -# CHANGES IN VERSION 2.17.0 +# CHANGES IN VERSION 2.18.1 (GitHub master branch) +## BUG FIXES +- MAJOR: `read.maf` by default coerces clinical data columns to character. This bug fix avoids it and is auto detected. Issue: [997](https://github.com/PoisonAlien/maftools/issues/997) + +## ENHANCEMENTS +- Better sorting of oncoplot with `collapsePathway` +- Changed default background for oncoplot from `gray` to `#ecf0f1` + +# CHANGES IN VERSION 2.18.0 +(Bioconductor release branch) + ## NEW FUNCTIONS - `pathways` and `plotPathways`for summarizing & visualizing pathways Issue: [956](https://github.com/PoisonAlien/maftools/issues/956) - `coGisticChromPlot` for plotting two GISTIC objects side-by-side. PR by [biosunsci](https://github.com/biosunsci) [954](https://github.com/PoisonAlien/maftools/pull/954)