diff --git a/DESCRIPTION b/DESCRIPTION index 47d47d75..c9585ad1 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: PhyloProfile -Version: 1.19.8 -Date: 2024-08-26 +Version: 1.19.9 +Date: 2024-08-29 Title: PhyloProfile Authors@R: c( person("Vinh", "Tran", role = c("aut", "cre"), email = "tran@bio.uni-frankfurt.de", comment=c(ORCID="0000-0001-6772-7595")), diff --git a/R/createDomainPlot.R b/R/createDomainPlot.R index 3759219a..3b789904 100644 --- a/R/createDomainPlot.R +++ b/R/createDomainPlot.R @@ -709,7 +709,6 @@ joinPlotMergeLegends <- function( length(levels(as.factor(df2$feature))) ) ) - # create a temp plot that contains all features mergedDf <- rbind(df1, df2) colorScheme <- structure( diff --git a/R/createTaxonomyMatrix.R b/R/createTaxonomyMatrix.R index a239faec..64cf963f 100644 --- a/R/createTaxonomyMatrix.R +++ b/R/createTaxonomyMatrix.R @@ -123,6 +123,7 @@ getTaxonomyInfo <- function(inputTaxa = NULL, currentNCBIinfo = NULL) { refEntry <- currentNCBIinfo[currentNCBIinfo$ncbiID == refID, ] lastID <- refEntry$parentID inputTaxaInfo <- refEntry + if(length(lastID) == 0) stop(paste("ERROR with", refID)) while (lastID != 1) { if (lastID %in% names(tmp)) { inputTaxaInfo <- rbindlist( diff --git a/R/umapClustering.R b/R/umapClustering.R index 2649f755..b82b16cb 100644 --- a/R/umapClustering.R +++ b/R/umapClustering.R @@ -182,9 +182,10 @@ umapClustering <- function( #' Reduce the number of labels for UMAP plot based on the gene/taxon frequency #' @export -#' @usage groupLabelUmapData(data4umap = NULL, freqCutoff = 0) +#' @usage groupLabelUmapData(data4umap = NULL, freqCutoff = c(0,200)) #' @param data4umap data for UMAP clustering (output from prepareUmapData) -#' @param freqCutoff gene/taxon frequency cutoff +#' @param freqCutoff gene/taxon frequency cutoff range. Any labels that are +#' outside of this range will be assigned as [Other] #' @return A dataframe similar to input data4umap, but with modified Label #' column, where less frequent labels are grouped together as "Other" #' @author Vinh Tran tran@bio.uni-frankfurt.de @@ -195,26 +196,26 @@ umapClustering <- function( #' ) #' longDf <- createLongMatrix(rawInput) #' data4umap <- prepareUmapData(longDf, "phylum") -#' groupLabelUmapData(data4umap, freqCutoff = 3) +#' groupLabelUmapData(data4umap, freqCutoff = c(3,5)) -groupLabelUmapData <- function(data4umap = NULL, freqCutoff = 0) { +groupLabelUmapData <- function(data4umap = NULL, freqCutoff = c(0,200)) { if (is.null(data4umap)) stop("Input data cannot be NULL!") if (length(data4umap) == 0) stop("Input data cannot be EMPTY!") - # minFreq <- tail(sort(unique(data4umap$n)), labelNr)[1] - # keepLabel <- unique(data4umap$Label[data4umap$n >= minFreq]) - # data4umap$Label[!(data4umap$Label %in% keepLabel)] <- "[Other]" - data4umap$Label[data4umap$n < freqCutoff] <- "[Other]" + data4umap$Label[ + data4umap$n < freqCutoff[1] | data4umap$n > freqCutoff[2] + ] <- "[Other]" return(data4umap) } #' Create UMAP cluster plot #' @export -#' @usage createUmapPlotData(umapData = NULL, data4umap = NULL, freqCutoff = 0, -#' excludeTaxa = "None") +#' @usage createUmapPlotData(umapData = NULL, data4umap = NULL, +#' freqCutoff = c(0,200), excludeTaxa = "None") #' @param umapData data contains UMAP cluster (output from umapClustering()) #' @param data4umap data for UMAP clustering (output from prepareUmapData()) -#' @param freqCutoff gene/taxon frequency cutoff +#' @param freqCutoff gene/taxon frequency cutoff range. Any labels that are +#' outside of this range will be assigned as [Other] #' @param excludeTaxa hide taxa from plot. Default: "None" #' @importFrom utils tail #' @return A plot as ggplot object @@ -230,7 +231,8 @@ groupLabelUmapData <- function(data4umap = NULL, freqCutoff = 0) { #' createUmapPlotData(umapData, data4umap) createUmapPlotData <- function( - umapData = NULL, data4umap = NULL, freqCutoff = 0, excludeTaxa = "None" + umapData = NULL, data4umap = NULL, freqCutoff = c(0, 200), + excludeTaxa = "None" ) { if (is.null(umapData) | is.null(data4umap)) stop("Input data cannot be NULL!") @@ -241,8 +243,7 @@ createUmapPlotData <- function( data4umap$X <- umapData$layout[,1] data4umap$Y <- umapData$layout[,2] # join less freq items into "other" - # data4umap <- groupLabelUmapData(data4umap, freqCutoff) - data4umap$Label[data4umap$n < freqCutoff] <- "[Other]" + data4umap <- groupLabelUmapData(data4umap, freqCutoff) # exclude taxa if (length(excludeTaxa) > 0) { data4umap$X[data4umap$Label %in% excludeTaxa] <- NA @@ -254,7 +255,7 @@ createUmapPlotData <- function( #' Create UMAP cluster plot #' @export -#' @usage plotUmap(plotDf = NULL, legendPos = "right", colorPalette = "Set2", +#' @usage plotUmap(plotDf = NULL, legendPos = "bottom", colorPalette = "Set2", #' transparent = 0, textSize = 12, font = "Arial", highlightTaxa = NULL) #' @param plotDf data for UMAP plot #' @param legendPos position of legend. Default: "right" @@ -278,7 +279,7 @@ createUmapPlotData <- function( #' plotUmap(plotDf, font = "sans") plotUmap <- function( - plotDf = NULL, legendPos = "right", colorPalette = "Set2", + plotDf = NULL, legendPos = "bottom", colorPalette = "Set2", transparent = 0, textSize = 12, font = "Arial", highlightTaxa = NULL ) { if (is.null(plotDf)) stop("Input data cannot be NULL!") @@ -300,6 +301,8 @@ plotUmap <- function( ), .Names = levels(as.factor(plotDf$Label)) ) } + # adapt plot height based on number of labels + # generate plot plot <- ggplot(plotDf, aes(x = X, y = Y, color = Label)) + geom_point(aes(size = Freq), alpha = 1 - transparent) + @@ -309,13 +312,13 @@ plotUmap <- function( # change legend title if ("ncbiID" %in% colnames(plotDf)) { plot <- plot + guides( - color = guide_legend(override.aes = list(alpha = 1)), - size = guide_legend(title = "Number of genes") + color = guide_legend(override.aes = list(alpha = 1), ncols = 5), + size = guide_legend(title = "Number of genes", ncols = 2) ) - } else + } else plot <- plot + guides( - color = guide_legend(override.aes = list(alpha = 1)), - size = guide_legend(title = "Number of taxa") + color = guide_legend(override.aes = list(alpha = 1), ncols = 5), + size = guide_legend(title = "Number of taxa", ncols = 1) ) plot <- plot + theme( legend.position = legendPos, diff --git a/inst/PhyloProfile/server.R b/inst/PhyloProfile/server.R index 913482f1..bd3b1992 100644 --- a/inst/PhyloProfile/server.R +++ b/inst/PhyloProfile/server.R @@ -3122,15 +3122,47 @@ shinyServer(function(input, output, session) { shinyjs::disable("umapDataType") } }) + + # * data for UMAP clustering ----------------------------------------------- + umapData <- reactive({ + req(getMainInput()) + req(u$doUmapPlot) + if(is.null(getMainInput())) stop("Input data is NULL!") + withProgress( + message = "Preparing data for clustering...", value = 0.5, { + umapData <- prepareUmapData( + getMainInput(), input$umapRank, input$umapClusteringType, + getTaxDBpath(), input$umapFilterVar, input$umapCutoff, + input$umapGroupLabelsBy + ) + return(umapData) + } + ) + }) # * read custom labels ----------------------------------------------------- + values <- reactiveValues( + uploadLabelState = NULL + ) + output$umapCustomLabel.ui <- renderUI({ if (input$umapClusteringType == "taxa") - fileInput("umapCustomLabel", "Add customize labels") + fileInput("umapCustomLabel", "Add customized labels") + }) + + observeEvent(input$input$umapCustomLabel, { + values$uploadLabelState <- 'uploaded' + }) + + observeEvent(input$umapResetLables, { + values$uploadLabelState <- NULL + updateTextInput(session, "umapGroupHigherRank", value = "") }) getCustomLabels <- reactive({ req(getMainInput()) + if (is.null(values$uploadLabelState)) + return(data.frame(ncbiID = c(), label = c())) filein <- input$umapCustomLabel if (!is.null(filein)) { customLabels <- read.table( @@ -3142,39 +3174,80 @@ shinyServer(function(input, output, session) { return(customLabels[customLabels$ncbiID %in% mainInput$ncbiID,]) } else return(data.frame(ncbiID = c(), label = c())) }) - - # * data for UMAP clustering ----------------------------------------------- - umapData <- reactive({ - req(getMainInput()) - req(u$doUmapPlot) - if(is.null(getMainInput())) stop("Input data is NULL!") - withProgress( - message = "Preparing data for clustering...", value = 0.5, { - umapData <- prepareUmapData( - getMainInput(), input$umapRank, input$umapClusteringType, - getTaxDBpath(), input$umapFilterVar, input$umapCutoff, - input$umapGroupLabelsBy - ) - if (input$umapClusteringType == "taxa") { - customLabels <- getCustomLabels() - if(nrow(customLabels) > 0) { - umapData$Label[ - umapData$ncbiID %in% customLabels$ncbiID - ] <- customLabels$Label + + # * apply user-defined labels ---------------------------------------------- + renameLabelsUmap <- reactive({ + req(umapData()) + input$umapApplyChangeLables + umapData <- umapData() + isolate({ + if (input$umapClusteringType == "taxa") { + # group labels into higher rank + higherRankTaxa <- unlist(strsplit(input$umapGroupHigherRank, ";")) + higherRankTaxa <- trimws(higherRankTaxa) + if (length(higherRankTaxa) > 0) { + taxMatrix <- getTaxonomyMatrix(getTaxDBpath()) + nameList <- getNameList(getTaxDBpath()) + selDf <- data.frame( + selRank = nameList$rank[nameList$fullName %in% higherRankTaxa], + selID = nameList$ncbiID[nameList$fullName %in% higherRankTaxa], + Label = nameList$fullName[nameList$fullName %in% higherRankTaxa] + ) + selDf <- selDf[complete.cases(selDf),] + if (nrow(selDf) > 0) { + selTaxList <- lapply( + seq(nrow(selDf)), function (x) { + selRank <- selDf$selRank[x] + selID <- selDf$selID[x] + if (!(selRank %in% mainTaxonomyRank())) + selRank <- paste0("norank_", selID) + selRank <- quo(!! sym(selRank)) + selTaxDf <- taxMatrix %>% + filter((!!selRank) %in% selID) %>% + select(abbrName, !!selRank) + colnames(selTaxDf) <- c("ncbiID", "supertaxonID") + selTaxDf$sel_label <- selDf$Label[selDf$selID == selID] + return(selTaxDf) + } + ) + joinedSelTaxDf <- do.call(rbind, selTaxList) + joinedSelTaxDf <- joinedSelTaxDf %>% group_by(ncbiID) %>% + filter(supertaxonID == min(supertaxonID)) + umapData <- left_join(umapData, joinedSelTaxDf, by = "ncbiID") %>% + mutate(Label = ifelse(!is.na(sel_label), sel_label, Label)) %>% + select(-c(supertaxonID, sel_label)) } } - return(umapData) + # apply custom labels (if provided) + customLabels <- getCustomLabels() + if(nrow(customLabels) > 0) { + umapData$Label[ + umapData$ncbiID %in% customLabels$ncbiID + ] <- customLabels$Label + } } - ) + }) + return(umapData) + }) + + output$umapGroupHigherRank.warning <- renderUI({ + req(input$umapGroupHigherRank) + if (length(input$umapGroupHigherRank) > 0) { + list( + em(paste("Click `Change labels` to apply. If you don't see your", + "specified labels, please check for typos!")), + br(),br() + ) + } }) # * UMAP clustered data ---------------------------------------------------- umapCluster <- reactive({ - req(umapData()) + req(renameLabelsUmap()) withProgress( message = "Performing UMAP clustering...", value = 0.5, { umapData.umap <- umapClustering( - umapData(), input$umapClusteringType, input$umapDataType + renameLabelsUmap(), input$umapClusteringType, input$umapDataType ) return(umapData.umap) } @@ -3183,8 +3256,8 @@ shinyServer(function(input, output, session) { # * generate list of UMAP labels ------------------------------------------- output$umapTaxa.ui <- renderUI({ - req(umapData()) - df <- groupLabelUmapData(umapData(), input$umapLabelNr) + req(renameLabelsUmap()) + df <- groupLabelUmapData(renameLabelsUmap(), input$umapLabelNr) list( selectInput( "excludeUmapTaxa", "Choose labels to hide", multiple = TRUE, @@ -3205,7 +3278,8 @@ shinyServer(function(input, output, session) { selectFreq <- tail(freqList, 5)[1] updateSliderInput( session, "umapLabelNr", "Freq cutoff", min = freqList[1], - max = tail(freqList, 1), step = 1, value = selectFreq + max = tail(freqList, 1), step = 1, + value = c(selectFreq, tail(freqList, 1)) ) }) @@ -3216,9 +3290,9 @@ shinyServer(function(input, output, session) { req(getMainInput()) if(is.null(getMainInput())) stop("Input data is NULL!") req(umapCluster()) - req(umapData()) + req(renameLabelsUmap()) plotDf <- createUmapPlotData( - umapCluster(), umapData(), freqCutoff = input$umapLabelNr, + umapCluster(), renameLabelsUmap(), freqCutoff = input$umapLabelNr, excludeTaxa = input$excludeUmapTaxa ) return(plotDf) @@ -3260,7 +3334,7 @@ shinyServer(function(input, output, session) { ) ) }) - + # When a double-click happens, check if there's a brush on the plot # If so, zoom to the brush bounds; if not, reset the zoom. observeEvent(input$umapdblClick, { @@ -3302,7 +3376,7 @@ shinyServer(function(input, output, session) { c("umapData.RData") }, content = function(fileName) { - data4umap <- umapData() + data4umap <- renameLabelsUmap() umapClusteredData <- umapCluster() umapPlotData <- umapPlotData() save(data4umap, umapClusteredData, umapPlotData, file = fileName) diff --git a/inst/PhyloProfile/ui.R b/inst/PhyloProfile/ui.R index 998170d7..5ddf496f 100644 --- a/inst/PhyloProfile/ui.R +++ b/inst/PhyloProfile/ui.R @@ -862,20 +862,23 @@ shinyUI( # UMAP CLUSTERING TAB ============================================== tabPanel( - "UMAP clustering", + "UMAP", # * Top panel for plot configuration --------------------------- wellPanel( fluidRow( column( 2, radioButtons( - "umapClusteringType", "Cluster:", + "umapClusteringType", "UMAP", c("Taxa" = "taxa", "Genes" = "genes"), inline = TRUE ), radioButtons( - "umapDataType", "Data type:", - c("Binary" = "binary", "Non-binary" = "nonbinary"), + "umapDataType", "using", + c( + "Presence/Absence" = "binary", + "Numeric score" = "nonbinary" + ), inline = TRUE ) ), @@ -884,7 +887,7 @@ shinyUI( column( 4, createPlotSize( - "umapPlot.width", "Plot width", 700 + "umapPlot.width", "Plot width", 900 ) ), column( @@ -906,22 +909,20 @@ shinyUI( 6, radioButtons( "umapGroupLabelsBy", - "Group labels by the freq of", + "Summarize as [Other] by", choices = c("taxa", "genes"), inline = TRUE - ) + ), + em(paste("If frequency smaller or higher than", + "Freq cutoff, labels will be grouped", + "as [Other]")) ), column( 6, sliderInput( - "umapLabelNr", "Freq cutoff", min = 3, - max = 99, step = 1, value = 5, width = 200 - ), - shinyBS::bsPopover( - "umapLabelNr", "", - paste("Only the most frequent labels", - "will be shown"), - "bottom" + "umapLabelNr", "Freq cutoff", min = 0, + max = 99, step = 1, value = c(5,99), + width = 200 ) ) ), @@ -942,7 +943,31 @@ shinyUI( choices = getTaxonomyRanks(), selected = "phylum" ), + hr(), + textInput( + "umapGroupHigherRank", + "Group labels into higher rank", + value = "", + placeholder = paste( + "Type taxon names in higher rank, separated by", + "semicolon (e.g.: Fungi;Metazoa)" + ) + ), + uiOutput("umapGroupHigherRank.warning"), uiOutput("umapCustomLabel.ui"), + shinyBS::bsButton( + "umapApplyChangeLables", "Change labels", + style = "success", icon = icon("play") + ), + shinyBS::bsButton( + "umapResetLables", "Reset labels", + style = "default", icon = icon("rotate-left") + ), + shinyBS::bsPopover( + "umapResetLables", "", + paste("Click `Change labels` after reset!"), + "bottom" + ), hr(), uiOutput("umapTaxa.ui"), selectInput( @@ -976,35 +1001,37 @@ shinyUI( "bottom" ), checkboxInput("addGeneUmap", em("Selected genes")), - uiOutput("addUmapCustomProfileCheck.ui"), - hr(), - shinyBS::bsButton( - "plotUmap", "PLOT", type = "action", - style = "danger", size = "large", disabled = FALSE - ) + uiOutput("addUmapCustomProfileCheck.ui") ), # * Main panel for plot and tables ------------------------- mainPanel( column( 6, em( - "Brush and double-click to zoom in/out", + "Brush to select and double-click to zoom in/out", style = "color:darkblue" ) ), uiOutput("umapPlot.ui"), br(), column( - 5, + 7, column( - 6, + 4, + shinyBS::bsButton( + "plotUmap", "PLOT UMAP", type = "action", + style = "danger", disabled = FALSE + ) + ), + column( + 4, downloadButton( "umapDownloadPlot", "Download plot", class = "butDL" ) ), column( - 6, + 4, downloadButton( "umapDownloadData", "Download UMAP data", class = "butDL" diff --git a/man/createUmapPlotData.Rd b/man/createUmapPlotData.Rd index ee3187b7..1b0661d4 100644 --- a/man/createUmapPlotData.Rd +++ b/man/createUmapPlotData.Rd @@ -4,15 +4,16 @@ \alias{createUmapPlotData} \title{Create UMAP cluster plot} \usage{ -createUmapPlotData(umapData = NULL, data4umap = NULL, freqCutoff = 0, - excludeTaxa = "None") +createUmapPlotData(umapData = NULL, data4umap = NULL, + freqCutoff = c(0,200), excludeTaxa = "None") } \arguments{ \item{umapData}{data contains UMAP cluster (output from umapClustering())} \item{data4umap}{data for UMAP clustering (output from prepareUmapData())} -\item{freqCutoff}{gene/taxon frequency cutoff} +\item{freqCutoff}{gene/taxon frequency cutoff range. Any labels that are +outside of this range will be assigned as [Other]} \item{excludeTaxa}{hide taxa from plot. Default: "None"} } diff --git a/man/groupLabelUmapData.Rd b/man/groupLabelUmapData.Rd index 91761d62..14f501b8 100644 --- a/man/groupLabelUmapData.Rd +++ b/man/groupLabelUmapData.Rd @@ -4,12 +4,13 @@ \alias{groupLabelUmapData} \title{Reduce the number of labels for UMAP plot based on the gene/taxon frequency} \usage{ -groupLabelUmapData(data4umap = NULL, freqCutoff = 0) +groupLabelUmapData(data4umap = NULL, freqCutoff = c(0,200)) } \arguments{ \item{data4umap}{data for UMAP clustering (output from prepareUmapData)} -\item{freqCutoff}{gene/taxon frequency cutoff} +\item{freqCutoff}{gene/taxon frequency cutoff range. Any labels that are +outside of this range will be assigned as [Other]} } \value{ A dataframe similar to input data4umap, but with modified Label @@ -24,7 +25,7 @@ rawInput <- system.file( ) longDf <- createLongMatrix(rawInput) data4umap <- prepareUmapData(longDf, "phylum") -groupLabelUmapData(data4umap, freqCutoff = 3) +groupLabelUmapData(data4umap, freqCutoff = c(3,5)) } \seealso{ \code{\link{prepareUmapData}} diff --git a/man/plotUmap.Rd b/man/plotUmap.Rd index 89367560..21828989 100644 --- a/man/plotUmap.Rd +++ b/man/plotUmap.Rd @@ -4,7 +4,7 @@ \alias{plotUmap} \title{Create UMAP cluster plot} \usage{ -plotUmap(plotDf = NULL, legendPos = "right", colorPalette = "Set2", +plotUmap(plotDf = NULL, legendPos = "bottom", colorPalette = "Set2", transparent = 0, textSize = 12, font = "Arial", highlightTaxa = NULL) } \arguments{