diff --git a/DESCRIPTION b/DESCRIPTION index e603c83..f8a0772 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -5,7 +5,8 @@ Version: 0.99.0 License: GPL-3 Description: Analysis toolkit for intra-species diversity from metagenomics. Authors@R: person("Noriaki", "Sato", email = "nori@hgc.jp", role = c("cre", "aut")) -Imports: GetoptLong, BiocFileCache, RCurl, ggplot2, ggraph, igraph, vegan, methods, data.table, ggtree, phangorn, RColorBrewer, circlize, ComplexHeatmap, ggkegg, ape, dplyr, exactRankTests, ggblend, ggh4x,scales, tidygraph, ggplotify +Depends: ggplot2, ggstar, ggraph, igraph +Imports: GetoptLong, BiocFileCache, RCurl, igraph, vegan, methods, data.table, phangorn, RColorBrewer, ggtree, circlize, ComplexHeatmap, ggkegg, ape, dplyr, exactRankTests, ggblend, ggh4x, scales, tidygraph, ggplotify, ggtreeExtra, ggnewscale, scico Suggests: simplifyEnrichment, stringr, tidyr, Boruta, knitr, ggrepel RoxygenNote: 7.2.3 VignetteBuilder: knitr diff --git a/NAMESPACE b/NAMESPACE index 561d46d..2738531 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -44,6 +44,7 @@ export(plotTree) export(returnGenes) export(setAnnotation) export(setGroup) +export(setMetadata) export(setTree) export(strainClusterHeatmap) export(summariseAbundance) @@ -68,11 +69,16 @@ importFrom(circlize,circos.text) importFrom(circlize,circos.track) importFrom(data.table,fread) importFrom(ggkegg,pathway) +importFrom(ggnewscale,new_scale_fill) importFrom(ggplotify,as.ggplot) +importFrom(ggstar,geom_star) +importFrom(ggtreeExtra,geom_fruit) importFrom(grDevices,colorRampPalette) importFrom(phangorn,NJ) importFrom(phangorn,dist.ml) importFrom(phangorn,read.phyDat) +importFrom(scico,scale_fill_scico) +importFrom(scico,scale_fill_scico_d) importFrom(stats,as.formula) importFrom(stats,dist) importFrom(stats,prcomp) diff --git a/R/InStrainFuncs.R b/R/InStrainFuncs.R index f75e2ea..d4cf999 100644 --- a/R/InStrainFuncs.R +++ b/R/InStrainFuncs.R @@ -32,6 +32,7 @@ genomeHeatmap <- function(stana, species, column="conANI", cl=NULL, heatmapArgs= heatmapArgs[["matrix"]] <- m heatmapArgs[["name"]] <- column heatmapArgs[["column_split"]] <- gr[colnames(m)] + heatmapArgs[["rect_gp"]] <- gpar(col = "white", lwd = 2) do.call(Heatmap, heatmapArgs) } @@ -71,5 +72,7 @@ strainClusterHeatmap <- function(stana, species, cl=NULL, heatmapArgs=list()) { heatmapArgs[["matrix"]] <- edge_mat heatmapArgs[["name"]] <- "cluster_present" heatmapArgs[["column_split"]] <- gr[colnames(edge_mat)] + heatmapArgs[["rect_gp"]] <- gpar(col = "white", lwd = 2) + do.call(Heatmap, heatmapArgs) } \ No newline at end of file diff --git a/R/plotTree.R b/R/plotTree.R index 6c7005b..648033f 100644 --- a/R/plotTree.R +++ b/R/plotTree.R @@ -1,7 +1,8 @@ #' plotTree #' #' infer the tree and plot -#' use dist.ml and NJ for inference +#' The function uses dist.ml and NJ for inference +#' if metadata is available, use them to plot metadata around the tree. #' #' @param stana stana object #' @param species species to plot @@ -11,26 +12,128 @@ #' @param tree_args passed to dist function in phangorn #' @param dist_method dist method in phangorn, default to dist.ml #' @param branch.length branch length, default to "none", cladogram +#' @param meta default to NULL, column name in `meta` slot +#' @param point_size point size +#' @param layout layout to be used in ggtree +#' @importFrom ggtreeExtra geom_fruit +#' @importFrom ggnewscale new_scale_fill +#' @importFrom ggstar geom_star +#' @importFrom scico scale_fill_scico_d scale_fill_scico #' #' @export plotTree <- function(stana, species=NULL, cl=NULL, - dist_method="dist.ml", - tree_args=list(), branch.length="none") { + dist_method="dist.ml", meta=NULL, layout="circular", + tree_args=list(), branch.length="none", point_size=2) { if (is.null(cl)) {cl <- stana@cl} + if (!is.null(meta)) { + meta <- checkMeta(stana, meta) + } if (is.null(species)) {species <- names(stana@fastaList)} for (sp in species) { tre <- stana@fastaList[[sp]] + + ## Infer tree tree_args[["x"]] <- tre dm <- do.call(dist_method, tree_args) tre <- NJ(dm) stana@treeList[[sp]] <- tre - tre <- groupOTU(tre, cl) - tp <- ggtree(tre, - layout='circular',branch.length = branch.length) + # Return cladogram by default - geom_tippoint(size=3, aes(color=.data$group)) + - ggtitle(sp)+ - scale_color_manual(values=stana@colors) - stana@treePlotList[[sp]] <- tp + + ## Plot tree + if (!is.null(meta)) { + ## Add NA row to unknown label in tree + ## not in metadata + for (i in tre$tip.label) { + if (!i %in% row.names(meta)) { + cur <- row.names(meta) + meta <- rbind(meta, rep(NA, ncol(meta))) + row.names(meta) <- c(cur, i) + } + } + + all_cols <- colnames(meta) + show_meta <- all_cols[all_cols!="id"] + + for (tmp_show_cv in show_meta) { + tmp_class <- class(meta[[tmp_show_cv]]) + change_cv <- meta[[tmp_show_cv]] + change_cv[change_cv==""] <- NA + change_cv[change_cv=="-"] <- NA + change_cv[is.infinite(change_cv)] <- NA + + if (tmp_class=="numeric") { + meta[[tmp_show_cv]] <- as.numeric(change_cv) + } else { + meta[[tmp_show_cv]] <- as.factor(change_cv) + } + } + + meta <- meta[tre$tip.label,] + + + if (layout=="rectangular") { + qqcat("Force setting layout to 'circular'\n") + flyt <- "circular" + } else { + flyt <- layout + } + + ## Select random palette from scico + scp <- sample(scico::scico_palette_names(), + length(colnames(meta)), replace=FALSE) + names(scp) <- colnames(meta) + + ## Select random shape from ggstar + starsh <- sample(1:30, length(colnames(meta)), replace=FALSE) + names(starsh) <- colnames(meta) + + p <- ggtree(tre, layout=flyt, branch.length=branch.length) + for (tmp_show_cv in show_meta) { + if (is.numeric(meta[[tmp_show_cv]])) { + ## If is numeric + p <- p + geom_fruit( + data = meta, + geom = geom_col, + mapping = aes(y=id, fill=.data[[tmp_show_cv]], + x=.data[[tmp_show_cv]]), + ) + + scale_fill_scico(palette = scp[tmp_show_cv]) + + new_scale_fill() + } else { + + ## If is discrete + p <- p + geom_fruit( + data = meta, + geom = geom_star, + size = point_size, + mapping = aes(y=id, x=0.5, fill=.data[[tmp_show_cv]]), + starshape = starsh[tmp_show_cv] + ) + + scale_fill_scico_d(palette = scp[tmp_show_cv], begin=0, end=0.5) + + new_scale_fill() + } + } + stana@treePlotList[[sp]] <- p + } else { + tre <- groupOTU(tre, cl) + tp <- ggtree(tre, + layout=layout,branch.length = branch.length) + # Return cladogram by default + geom_tippoint(size=point_size, aes(color=.data$group)) + + ggtitle(sp)+ + scale_color_manual(values=stana@colors) + stana@treePlotList[[sp]] <- tp + } } stana +} + + +#' @noRd +checkMeta <- function(stana, meta) { + meta_col <- stana@meta |> colnames() + int_cols <- intersect(meta_col, meta) + int_len <- int_cols |> length() + if (length(int_len)==0) { + stop("No column name is available in the `meta` slot") + } + return(stana@meta[, c("id", int_cols)]) } \ No newline at end of file diff --git a/R/setMetadata.R b/R/setMetadata.R index d96bccb..6db723c 100644 --- a/R/setMetadata.R +++ b/R/setMetadata.R @@ -3,10 +3,10 @@ #' @param stana stana #' @param meta metadata (data.frame) #' row.name as ID of the trees -#' +#' @export #' @return stana object #' -setMetadata(stana, meta) { +setMetadata <- function(stana, meta) { meta[["id"]] <- row.names(meta) stana@meta <- meta return(stana) diff --git a/README.Rmd b/README.Rmd index 7bea3cd..2f6a269 100644 --- a/README.Rmd +++ b/README.Rmd @@ -26,7 +26,9 @@ knitr::opts_chunk$set( [![R-CMD-check](https://github.com/noriakis/stana/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/noriakis/stana/actions/workflows/R-CMD-check.yaml) [![CodeFactor](https://www.codefactor.io/repository/github/noriakis/stana/badge)](https://www.codefactor.io/repository/github/noriakis/stana) -Metagenotyping analysis in R. Import and analyse, plot output of the software like [MIDAS](https://github.com/snayfach/MIDAS), [MIDAS2](https://github.com/czbiohub/MIDAS2), [metaSNV v1, metaSNV v2](https://github.com/metasnv-tool/metaSNV) and [inStrain](https://github.com/MrOlm/inStrain). Primarly developed for `MIDAS` and `MIDAS2`. The documentation is available using `pkgdown` at [https://noriakis.github.io/software/stana_pkgdown](https://noriakis.github.io/software/stana_pkgdown). The detailed usage is available [here](https://noriakis.github.io/software/stana), using `bookdown`. +Metagenotyping analysis in R. Import and analyse, plot output of the software like [MIDAS](https://github.com/snayfach/MIDAS), [MIDAS2](https://github.com/czbiohub/MIDAS2), [metaSNV v1, metaSNV v2](https://github.com/metasnv-tool/metaSNV) and [inStrain](https://github.com/MrOlm/inStrain). Primarly developed for `MIDAS` and `MIDAS2`. + +The documentation is available using `pkgdown` at [https://noriakis.github.io/software/stana_pkgdown](https://noriakis.github.io/software/stana_pkgdown). The detailed usage is available at [https://noriakis.github.io/software/stana](https://noriakis.github.io/software/stana), using `bookdown`. ## Installation @@ -42,17 +44,34 @@ devtools::install_github("noriakis/stana") ## Examples -```{r message=FALSE, warning=FALSE, fig.width=3, fig.height=3} +```{r message=FALSE, warning=FALSE, fig.width=6, fig.height=6} ## Using example data library(stana) -load("inst/extdata/sysdata.rda") +load(system.file("extdata", "sysdata.rda", package = "stana")) + stana getID(stana) + +## Make example metadata +samples <- getSlot(stana, "snps")[[1]] |> colnames() +metadata <- data.frame( + row.names=samples, + treatment=sample(1:3, length(samples), replace=TRUE), + marker=runif(length(samples)) +) + + +## Set metadata +stana <- setMetadata(stana, metadata) + +## Call consensus sequence +## Infer and plot tree based on metadata stana <- stana |> consensusSeq(argList=list(site_prev=0.95)) |> - plotTree() + plotTree(meta=c("treatment","marker")) getFasta(stana)[[1]] getTree(stana)[[1]] +getSlot(stana, "treePlotList")[[1]] ``` ## Interactive inspection diff --git a/README.md b/README.md index 78aa1e6..a6ca56f 100644 --- a/README.md +++ b/README.md @@ -14,9 +14,11 @@ software like [MIDAS](https://github.com/snayfach/MIDAS), [MIDAS2](https://github.com/czbiohub/MIDAS2), [metaSNV v1, metaSNV v2](https://github.com/metasnv-tool/metaSNV) and [inStrain](https://github.com/MrOlm/inStrain). Primarly developed for -`MIDAS` and `MIDAS2`. The documentation is available using `pkgdown` at +`MIDAS` and `MIDAS2`. + +The documentation is available using `pkgdown` at . The detailed usage -is available [here](https://noriakis.github.io/software/stana), using +is available at , using `bookdown`. ## Installation @@ -36,7 +38,8 @@ devtools::install_github("noriakis/stana") ``` r ## Using example data library(stana) -load("inst/extdata/sysdata.rda") +load(system.file("extdata", "sysdata.rda", package = "stana")) + stana #> Type: MIDAS2 #> Directory: midas2_sample_merge_uhgg @@ -49,9 +52,24 @@ stana #> 7.2 Mb getID(stana) #> [1] "100003" + +## Make example metadata +samples <- getSlot(stana, "snps")[[1]] |> colnames() +metadata <- data.frame( + row.names=samples, + treatment=sample(1:3, length(samples), replace=TRUE), + marker=runif(length(samples)) +) + + +## Set metadata +stana <- setMetadata(stana, metadata) + +## Call consensus sequence +## Infer and plot tree based on metadata stana <- stana |> consensusSeq(argList=list(site_prev=0.95)) |> - plotTree() + plotTree(meta=c("treatment","marker")) #> Beginning calling for 100003 #> Site number: 5019 #> Profiled samples: 11 @@ -67,8 +85,11 @@ getTree(stana)[[1]] #> ERR1711593, ERR1711594, ERR1711596, ERR1711598, ERR1711603, ERR1711605, ... #> #> Unrooted; includes branch lengths. +getSlot(stana, "treePlotList")[[1]] ``` + + ## Interactive inspection The users can inspect metagenotyping results interactively using Shiny diff --git a/inst/extdata/sysdata.rda b/inst/extdata/sysdata.rda index 2f69252..3d5cb27 100644 Binary files a/inst/extdata/sysdata.rda and b/inst/extdata/sysdata.rda differ diff --git a/man/figures/README-unnamed-chunk-2-1.png b/man/figures/README-unnamed-chunk-2-1.png new file mode 100644 index 0000000..ef125f9 Binary files /dev/null and b/man/figures/README-unnamed-chunk-2-1.png differ diff --git a/man/plotTree.Rd b/man/plotTree.Rd index ed1f0e4..9c8837e 100644 --- a/man/plotTree.Rd +++ b/man/plotTree.Rd @@ -9,8 +9,11 @@ plotTree( species = NULL, cl = NULL, dist_method = "dist.ml", + meta = NULL, + layout = "circular", tree_args = list(), - branch.length = "none" + branch.length = "none", + point_size = 2 ) } \arguments{ @@ -23,13 +26,20 @@ If NULL, first species in fasta list is assigned} \item{dist_method}{dist method in phangorn, default to dist.ml} +\item{meta}{default to NULL, column name in `meta` slot} + +\item{layout}{layout to be used in ggtree} + \item{tree_args}{passed to dist function in phangorn} \item{branch.length}{branch length, default to "none", cladogram} +\item{point_size}{point size} + \item{model}{dist.ml model} } \description{ infer the tree and plot -use dist.ml and NJ for inference +The function uses dist.ml and NJ for inference +if metadata is available, use them to plot metadata around the tree. } diff --git a/man/setMetadata.Rd b/man/setMetadata.Rd new file mode 100644 index 0000000..eba6403 --- /dev/null +++ b/man/setMetadata.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/setMetadata.R +\name{setMetadata} +\alias{setMetadata} +\title{setMetadata} +\usage{ +setMetadata(stana, meta) +} +\arguments{ +\item{stana}{stana} + +\item{meta}{metadata (data.frame) +row.name as ID of the trees} +} +\value{ +stana object +} +\description{ +setMetadata +}