From 8b7ab92cc2a2d92bd1b0b7dd43594bae9c886346 Mon Sep 17 00:00:00 2001 From: jorainer Date: Thu, 11 Jan 2024 08:32:19 +0100 Subject: [PATCH] feat: add filterSpectra function (issue #41) - Add a `filterSpectra` method that allows to filter the `Spectra` within an `MsExperiment` updating eventually present relationships (links) between samples and spectra. --- DESCRIPTION | 2 +- NAMESPACE | 4 ++ NEWS.md | 6 ++ R/MsExperiment-functions.R | 24 +++++++ R/MsExperiment.R | 66 +++++++++++++++++++- man/MsExperiment.Rd | 37 ++++++++++- tests/testthat/test_MsExperiment-functions.R | 16 +++++ tests/testthat/test_MsExperiment.R | 38 +++++++++++ vignettes/MsExperiment.Rmd | 33 +++++++++- 9 files changed, 222 insertions(+), 4 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index d437494..00c229f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: MsExperiment Title: Infrastructure for Mass Spectrometry Experiments -Version: 1.5.2 +Version: 1.5.3 Description: Infrastructure to store and manage all aspects related to a complete proteomics or metabolomics mass spectrometry (MS) experiment. The MsExperiment package provides light-weight and diff --git a/NAMESPACE b/NAMESPACE index bf38c0f..94d2763 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -9,6 +9,7 @@ export(MsExperimentFiles) export(dbWriteSampleData) export(existMsExperimentFiles) export(experimentFiles) +export(filterSpectra) export(linkSampleData) export(otherData) export(qdata) @@ -57,4 +58,7 @@ importMethodsFrom(S4Vectors,"metadata<-") importMethodsFrom(S4Vectors,mcols) importMethodsFrom(S4Vectors,metadata) importMethodsFrom(Spectra,Spectra) +importMethodsFrom(Spectra,peaksVariables) +importMethodsFrom(Spectra,selectSpectraVariables) +importMethodsFrom(Spectra,spectraVariables) importMethodsFrom(methods,show) diff --git a/NEWS.md b/NEWS.md index 36b0b92..2a787d8 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,11 @@ # MsExperiment 1.5 +## MsExperiment 1.5.3 + +- Add `filterSpectra` method to allow filtering of `Spectra` within an + `MsExperiment` while keeping eventually present relationships between samples + and spectra consistent. + ## MsExperiment 1.5.2 - Add support to read/write sample data from/to a *MsBackendSql* database diff --git a/R/MsExperiment-functions.R b/R/MsExperiment-functions.R index 99a9611..fe25d17 100644 --- a/R/MsExperiment-functions.R +++ b/R/MsExperiment-functions.R @@ -404,3 +404,27 @@ readMsExperiment <- function(spectraFiles = character(), spectra(x) <- Spectra(spectraFiles, ...) linkSampleData(x, with = "sampleData.spectraOrigin = spectra.dataOrigin") } + +#' @title Consolidate links between samples and spectra after filtering +#' +#' @description +#' +#' If @spectra got filtered eventually present *links* between them and samples +#' will no longer be valid and need to be updated/fixed. This function +#' consolidates these links using a spectra variable `"._SPECTRA_IDX"` in +#' `@spectra` that needs to represent/contain the index of the spectra +#' **before** filtering. +#' +#' @param x `MsExperiment` +#' +#' @author Johannes Rainer +#' @noRd +.update_sample_data_links_spectra <- function(x) { + sdl <- .sample_data_links(x, "spectra")[[1L]] + idx <- match(sdl[, 2L], x@spectra$._SPECTRA_IDX) + keep <- !is.na(idx) + sdl <- sdl[keep, , drop = FALSE] + sdl[, 2L] <- idx[keep] + x@sampleDataLinks[["spectra"]] <- sdl + x +} diff --git a/R/MsExperiment.R b/R/MsExperiment.R index b2a7b2b..1f959dc 100644 --- a/R/MsExperiment.R +++ b/R/MsExperiment.R @@ -161,6 +161,16 @@ #' arbitrary order is supported. #' See the vignette for details and examples. #' +#' - `filterSpectra`: subsets the `Spectra` within an `MsExperiment` using a +#' provided filter function (parameter `filter`). Parameters for the filter +#' function can be passed with parameter `...`. Any of the filter functions +#' of a [Spectra()] object can be passed with parameter `filter`. Eventually +#' present relationships between samples and spectra (*links*, see also +#' `linkSampleData`) are updated. Filtering affects only the spectra data +#' of the object, none of the other slots and data (e.g. `sampleData`) are +#' modified. +#' The function returns an `MsExperiment` with the filtered `Spectra` object. +#' #' @return See help of the individual functions. #' #' @param spectra [Spectra()] object with the MS spectra data of the @@ -171,6 +181,11 @@ #' @param experimentFiles [MsExperimentFiles()] defining (external) files #' to data or annotation. #' +#' @param filter for `filterSpectra`: any filter function supported by +#' [Spectra()] to filter the spectra object (such as `filterRt` or +#' `filterMsLevel`). Parameters for the filter function can be passed +#' through `...`. +#' #' @param i for `[`: an `integer`, `character` or `logical` referring to the #' indices or names (rowname of `sampleData`) of the samples to subset. #' @@ -205,7 +220,8 @@ #' #' @param x an `MsExperiment`. #' -#' @param ... optional additional parameters. +#' @param ... optional additional parameters. For `filterSpectra`: parameters +#' to be passed to the filter function (parameter `filter`). #' #' @name MsExperiment #' @@ -290,6 +306,20 @@ #' ## result in an MsExperiment with duplicated entries in "annotations" #' experimentFiles(mse)[["annotations"]] #' experimentFiles(mse[1:2])[["annotations"]] +#' +#' ## Spectra within an MsExperiment can be filtered/subset with the +#' ## `filterSpectra` function and any of the filter functions supported +#' ## by `Spectra` objects. Below we restrict the spectra data to spectra +#' ## with a retention time between 200 and 210 seconds. +#' res <- filterSpectra(mse, filterRt, rt = c(200, 210)) +#' res +#' +#' ## The object contains now much less spectra. The retention times for these +#' rtime(spectra(res)) +#' +#' ## Relationship between samples and spectra was preserved by the filtering +#' a <- res[1L] +#' spectra(a) NULL #' @name MsExperiment-class @@ -546,3 +576,37 @@ setMethod("[", "MsExperiment", function(x, i, j, ..., drop = FALSE) { } .extractSamples(x, i, newx = x) }) + +#' @rdname MsExperiment +#' +#' @export +setGeneric("filterSpectra", def = function(object, filter, ...) + standardGeneric("filterSpectra")) + +#' @rdname MsExperiment +#' +#' @importMethodsFrom Spectra selectSpectraVariables +#' +#' @importMethodsFrom Spectra spectraVariables +#' +#' @importMethodsFrom Spectra peaksVariables +setMethod( + "filterSpectra", c("MsExperiment", "function"), + function(object, filter, ...) { + ls <- length(spectra(object)) + if (!ls) + return(object) + have_links <- length(.sample_data_links(object, "spectra")) > 0 + if (have_links) + object@spectra$._SPECTRA_IDX <- seq_len(ls) + object@spectra <- filter(object@spectra, ...) + if (have_links) { + if (ls != length(spectra(object))) + object <- .update_sample_data_links_spectra(object) + svs <- unique(c(spectraVariables(spectra(object)), + peaksVariables(spectra(object)))) + object@spectra <- selectSpectraVariables( + object@spectra, svs[svs != "._SPECTRA_IDX"]) + } + object + }) diff --git a/man/MsExperiment.Rd b/man/MsExperiment.Rd index 6c6db27..3909e62 100644 --- a/man/MsExperiment.Rd +++ b/man/MsExperiment.Rd @@ -18,6 +18,8 @@ \alias{otherData<-} \alias{linkSampleData} \alias{[,MsExperiment,ANY,ANY,ANY-method} +\alias{filterSpectra} +\alias{filterSpectra,MsExperiment,function-method} \title{Managing Mass Spectrometry Experiments} \usage{ experimentFiles(object) @@ -61,6 +63,10 @@ linkSampleData( ) \S4method{[}{MsExperiment,ANY,ANY,ANY}(x, i, j, ..., drop = FALSE) + +filterSpectra(object, filter, ...) + +\S4method{filterSpectra}{MsExperiment,`function`}(object, filter, ...) } \arguments{ \item{object}{An instance of class \code{MsExperiment}.} @@ -106,9 +112,15 @@ indices or names (rowname of \code{sampleData}) of the samples to subset.} \item{j}{for \code{[}: not supported.} -\item{...}{optional additional parameters.} +\item{...}{optional additional parameters. For \code{filterSpectra}: parameters +to be passed to the filter function (parameter \code{filter}).} \item{drop}{for \code{[}: ignored.} + +\item{filter}{for \code{filterSpectra}: any filter function supported by +\code{\link[=Spectra]{Spectra()}} to filter the spectra object (such as \code{filterRt} or +\code{filterMsLevel}). Parameters for the filter function can be passed +through \code{...}.} } \value{ See help of the individual functions. @@ -286,6 +298,15 @@ not affect data consistency (see examples below for more information). Not linked data (slots) will be returned as they are. Subsetting in arbitrary order is supported. See the vignette for details and examples. +\item \code{filterSpectra}: subsets the \code{Spectra} within an \code{MsExperiment} using a +provided filter function (parameter \code{filter}). Parameters for the filter +function can be passed with parameter \code{...}. Any of the filter functions +of a \code{\link[=Spectra]{Spectra()}} object can be passed with parameter \code{filter}. Eventually +present relationships between samples and spectra (\emph{links}, see also +\code{linkSampleData}) are updated. Filtering affects only the spectra data +of the object, none of the other slots and data (e.g. \code{sampleData}) are +modified. +The function returns an \code{MsExperiment} with the filtered \code{Spectra} object. } } @@ -364,6 +385,20 @@ experimentFiles(mse[2])[["annotations"]] ## result in an MsExperiment with duplicated entries in "annotations" experimentFiles(mse)[["annotations"]] experimentFiles(mse[1:2])[["annotations"]] + +## Spectra within an MsExperiment can be filtered/subset with the +## `filterSpectra` function and any of the filter functions supported +## by `Spectra` objects. Below we restrict the spectra data to spectra +## with a retention time between 200 and 210 seconds. +res <- filterSpectra(mse, filterRt, rt = c(200, 210)) +res + +## The object contains now much less spectra. The retention times for these +rtime(spectra(res)) + +## Relationship between samples and spectra was preserved by the filtering +a <- res[1L] +spectra(a) } \author{ Laurent Gatto, Johannes Rainer diff --git a/tests/testthat/test_MsExperiment-functions.R b/tests/testthat/test_MsExperiment-functions.R index 444b2ee..39a0f07 100644 --- a/tests/testthat/test_MsExperiment-functions.R +++ b/tests/testthat/test_MsExperiment-functions.R @@ -287,3 +287,19 @@ test_that("readMsExperiment works", { expect_true(nrow(sampleData(a)) == 2) expect_equal(sampleData(a)$other_ann, c("a", "b")) }) + +test_that(".update_sample_data_links_spectra works", { + a <- readMsExperiment(fls) + tmp <- a + tmp@spectra$._SPECTRA_IDX <- seq_along(tmp@spectra) + tmp@spectra <- tmp@spectra[c(5, 14, 1000, 2, 200)] + res <- .update_sample_data_links_spectra(tmp) + expect_equal(res@sampleDataLinks[["spectra"]][, 1L], c(1L, 1L, 1L, 1L, 2L)) + expect_equal(res@sampleDataLinks[["spectra"]][, 2L], c(4L, 1L, 2L, 5L, 3L)) + expect_equal(res@spectra$scanIndex, + a@spectra$scanIndex[c(5, 14, 1000, 2, 200)]) + expect_equal(res@sampleData, tmp@sampleData) + + expect_true(length(spectra(res[1L])) == 4) + expect_true(length(spectra(res[2L])) == 1) +}) diff --git a/tests/testthat/test_MsExperiment.R b/tests/testthat/test_MsExperiment.R index 87d6e11..4419cf9 100644 --- a/tests/testthat/test_MsExperiment.R +++ b/tests/testthat/test_MsExperiment.R @@ -224,3 +224,41 @@ test_that("otherData<-,otherData,MsExperiment works", { otherData(m)[["NUM"]] <- NULL expect_identical(length(otherData(m)), 0L) }) + +test_that("filterSpectra,MsExperiment,function works", { + ## empty object + res <- filterSpectra(MsExperiment(), filterMsLevel, 2L) + expect_s4_class(res, "MsExperiment") + expect_true(length(res) == 0L) + + ## an object without links. + res <- filterSpectra(mse, filterMsLevel, 2L) + expect_s4_class(res, "MsExperiment") + expect_equal(sampleData(res), sampleData(mse)) + expect_true(length(spectra(res)) == 0L) + expect_equal(res@sampleDataLinks, mse@sampleDataLinks) + + ## an object with links between samples and spectra + tmp <- readMsExperiment(fls) + res <- filterSpectra(tmp, filterMsLevel, 2L) + expect_s4_class(res, "MsExperiment") + expect_equal(sampleData(res), sampleData(tmp)) + expect_true(length(spectra(res)) == 0L) + expect_equal(names(res@sampleDataLinks), names(tmp@sampleDataLinks)) + expect_true(nrow(res@sampleDataLinks[["spectra"]]) == 0L) + + ## Just reducing. + res <- filterSpectra(tmp, filterRt, c(200, 210)) + expect_s4_class(res, "MsExperiment") + expect_equal(sampleData(res), sampleData(tmp)) + expect_true(all(rtime(spectra(res)) >= 200 & rtime(spectra(res)) <= 210)) + ## check that sample/spectra links are valid + a <- spectra(res[1L]) + ref <- filterRt(spectra(tmp[1L]), c(200, 210)) + expect_equal(a$scanIndex, ref$scanIndex) + expect_equal(a$dataOrigin, ref$dataOrigin) + a <- spectra(res[2L]) + ref <- filterRt(spectra(tmp[2L]), c(200, 210)) + expect_equal(a$scanIndex, ref$scanIndex) + expect_equal(a$dataOrigin, ref$dataOrigin) +}) diff --git a/vignettes/MsExperiment.Rmd b/vignettes/MsExperiment.Rmd index bc01061..18b5972 100644 --- a/vignettes/MsExperiment.Rmd +++ b/vignettes/MsExperiment.Rmd @@ -281,7 +281,7 @@ plotSpectra(sp[1000]) For some experiments and data analyses an explicit link between data, data files and respective samples is required. Such links enable an easy (and error-free) -subsetting or re-ordering of a whole experiment by sample and would also +subset or re-ordering of a whole experiment by sample and would also simplify coloring and labeling of the data depending on the sample or of its variables or conditions. @@ -521,6 +521,37 @@ samples to elements does however not affect data consistency. A sample will always be linked to the correct value/element. +# Subset and filter `MsExperiment` + +As already shown above, `MsExperiment` objects can be subset with `[` which will +subset the data by sample. Depending on whether relationships (links) between +samples and any other data within the object are present also these are +correctly subset. In addition to this general subset operation, it is possible +to individually filter the spectra data within an `MsExperiment` using the +`filterSpectra` function. This function takes any filter function supported by +`Spectra` with parameter `filter`. Parameters for this filter function can be +passed through `...`. As an example we filter below the spectra data of our +`MsExperiment` keeping only spectra with an retention time between 200 and 210 +seconds. + +```{r} +#' Filter the Spectra using the `filterRt` function providing also the +#' parameters for this function. +res <- filterSpectra(lmse, filterRt, rt = c(200, 210)) +res +``` + +The resulting `MsExperiment` contains now much fewer spectra. `filterSpectra` +did only filter the spectra data, but not any of the other data slots. It did +however update and consolidate the relationships between samples and spectra +(if present) after filtering: + +```{r} +#' Extract spectra of the second sample after filtering +spectra(res[2L]) +``` + + # Using `MsExperiment` with `MsBackendSql` The `r Biocpkg("MsBackendSql")` provides functionality to store mass