references.bib

@article{wishart_2007,
title = {{HMDB}: the human metabolome database.},
author = {Wishart, David S and Tzur, Dan and Knox, Craig and Eisner, Roman and Guo, An Chi and Young, Nelson and Cheng, Dean and Jewell, Kevin and Arndt, David and Sawhney, Summit and Fung, Chris and Nikolai, Lisa and Lewis, Mike and Coutouly, Marie-Aude and Forsythe, Ian and Tang, Peter and Shrivastava, Savita and Jeroncic, Kevin and Stothard, Paul and Amegbey, Godwin and Block, David and Hau, David D and Wagner, James and Miniaci, Jessica and Clements, Melisa and Gebremedhin, Mulu and Guo, Natalie and Zhang, Ying and Duggan, Gavin E and Macinnis, Glen D and Weljie, Alim M and Dowlatabadi, Reza and Bamforth, Fiona and Clive, Derrick and Greiner, Russ and Li, Liang and Marrie, Tom and Sykes, Brian D and Vogel, Hans J and Querengesser, Lori},
pages = {D521-6},
url = {http://dx.doi.org/10.1093/nar/gkl923},
year = {2007},
month = {jan},
urldate = {2019-07-23},
journal = {Nucleic Acids Research},
volume = {35},
number = {Database issue},
issn = {1362-4962},
doi = {10.1093/nar/gkl923},
pmid = {17202168},
pmcid = {PMC1899095},
f1000-projects = {shared citations},
abstract = {The Human Metabolome Database ({HMDB}) is currently the most complete and comprehensive curated collection of human metabolite and human metabolism data in the world. It contains records for more than 2180 endogenous metabolites with information gathered from thousands of books, journal articles and electronic databases. In addition to its comprehensive literature-derived data, the {HMDB} also contains an extensive collection of experimental metabolite concentration data compiled from hundreds of mass spectra ({MS}) and Nuclear Magnetic resonance ({NMR}) metabolomic analyses performed on urine, blood and cerebrospinal fluid samples. This is further supplemented with thousands of {NMR} and {MS} spectra collected on purified, reference metabolites. Each metabolite entry in the {HMDB} contains an average of 90 separate data fields including a comprehensive compound description, names and synonyms, structural information, physico-chemical data, reference {NMR} and {MS} spectra, biofluid concentrations, disease associations, pathway information, enzyme data, gene sequence data, {SNP} and mutation data as well as extensive links to images, references and other public databases. Extensive searching, relational querying and data browsing tools are also provided. The {HMDB} is designed to address the broad needs of biochemists, clinical chemists, physicians, medical geneticists, nutritionists and members of the metabolomics community. The {HMDB} is available at: www.hmdb.ca.}
}
@article{goecks_2010,
title = {Galaxy: a comprehensive approach for supporting accessible, reproducible, and transparent computational research in the life sciences.},
author = {Goecks, Jeremy and Nekrutenko, Anton and Taylor, James and Galaxy Team},
pages = {R86},
url = {http://dx.doi.org/10.1186/gb-2010-11-8-r86},
year = {2010},
month = {aug},
day = {25},
urldate = {2018-01-13},
journal = {Genome Biology},
volume = {11},
number = {8},
doi = {10.1186/gb-2010-11-8-r86},
pmid = {20738864},
pmcid = {PMC2945788},
f1000-projects = {shared citations},
abstract = {Increased reliance on computational approaches in the life sciences has revealed grave concerns about how accessible and reproducible computation-reliant results truly are. Galaxy http://usegalaxy.org, an open web-based platform for genomic research, addresses these problems. Galaxy automatically tracks and manages data provenance and provides support for capturing the context and intent of computational methods. Galaxy Pages are interactive, web-based documents that provide users with a medium to communicate a complete computational analysis.}
}
@article{gentleman_2004,
title = {Bioconductor: open software development for computational biology and bioinformatics.},
author = {Gentleman, Robert C and Carey, Vincent J and Bates, Douglas M and Bolstad, Ben and Dettling, Marcel and Dudoit, Sandrine and Ellis, Byron and Gautier, Laurent and Ge, Yongchao and Gentry, Jeff and Hornik, Kurt and Hothorn, Torsten and Huber, Wolfgang and Iacus, Stefano and Irizarry, Rafael and Leisch, Friedrich and Li, Cheng and Maechler, Martin and Rossini, Anthony J and Sawitzki, Gunther and Smith, Colin and Smyth, Gordon and Tierney, Luke and Yang, Jean Y H and Zhang, Jianhua},
pages = {R80},
url = {http://dx.doi.org/10.1186/gb-2004-5-10-r80},
year = {2004},
month = {sep},
day = {15},
urldate = {2017-02-20},
journal = {Genome Biology},
volume = {5},
number = {10},
doi = {10.1186/gb-2004-5-10-r80},
pmid = {15461798},
pmcid = {PMC545600},
f1000-projects = {shared citations},
abstract = {The Bioconductor project is an initiative for the collaborative creation of extensible software for computational biology and bioinformatics. The goals of the project include: fostering collaborative development and widespread use of innovative software, reducing barriers to entry into interdisciplinary scientific research, and promoting the achievement of remote reproducibility of research results. We describe details of our aims and methods, identify current challenges, compare Bioconductor to other open bioinformatics projects, and provide working examples.}
}
@article{sansone_2012,
title = {Toward interoperable bioscience data.},
author = {Sansone, Susanna-Assunta and Rocca-Serra, Philippe and Field, Dawn and Maguire, Eamonn and Taylor, Chris and Hofmann, Oliver and Fang, Hong and Neumann, Steffen and Tong, Weida and Amaral-Zettler, Linda and Begley, Kimberly and Booth, Tim and Bougueleret, Lydie and Burns, Gully and Chapman, Brad and Clark, Tim and Coleman, Lee-Ann and Copeland, Jay and Das, Sudeshna and de Daruvar, Antoine and de Matos, Paula and Dix, Ian and Edmunds, Scott and Evelo, Chris T and Forster, Mark J and Gaudet, Pascale and Gilbert, Jack and Goble, Carole and Griffin, Julian L and Jacob, Daniel and Kleinjans, Jos and Harland, Lee and Haug, Kenneth and Hermjakob, Henning and Ho Sui, Shannan J and Laederach, Alain and Liang, Shaoguang and Marshall, Stephen and {McGrath}, Annette and Merrill, Emily and Reilly, Dorothy and Roux, Magali and Shamu, Caroline E and Shang, Catherine A and Steinbeck, Christoph and Trefethen, Anne and Williams-Jones, Bryn and Wolstencroft, Katherine and Xenarios, Ioannis and Hide, Winston},
pages = {121-126},
url = {http://dx.doi.org/10.1038/ng.1054},
year = {2012},
month = {feb},
day = {1},
urldate = {2018-03-14},
journal = {Nature Genetics},
volume = {44},
number = {2},
doi = {10.1038/ng.1054},
pmid = {22281772},
pmcid = {PMC3428019},
f1000-projects = {shared citations},
abstract = {To make full use of research data, the bioscience community needs to adopt technologies and reward mechanisms that support interoperability and promote the growth of an open 'data commoning' culture. Here we describe the prerequisites for data commoning and present an established and growing ecosystem of solutions using the shared 'Investigation-Study-Assay' framework to support that vision.}
}
@article{oboyle_2011,
title = {Open Babel: An open chemical toolbox.},
author = {O'Boyle, Noel M and Banck, Michael and James, Craig A and Morley, Chris and Vandermeersch, Tim and Hutchison, Geoffrey R},
pages = {33},
url = {http://dx.doi.org/10.1186/1758-2946-3-33},
year = {2011},
month = {oct},
day = {7},
urldate = {2018-01-29},
journal = {Journal of cheminformatics},
volume = {3},
doi = {10.1186/1758-2946-3-33},
pmid = {21982300},
pmcid = {PMC3198950},
f1000-projects = {shared citations},
abstract = {{BACKGROUND}: A frequent problem in computational modeling is the interconversion of chemical structures between different formats. While standard interchange formats exist (for example, Chemical Markup Language) and de facto standards have arisen (for example, {SMILES} format), the need to interconvert formats is a continuing problem due to the multitude of different application areas for chemistry data, differences in the data stored by different formats ({0D} versus {3D}, for example), and competition between software along with a lack of vendor-neutral formats. {RESULTS}: We discuss, for the first time, Open Babel, an open-source chemical toolbox that speaks the many languages of chemical data. Open Babel version 2.3 interconverts over 110 formats. The need to represent such a wide variety of chemical and molecular data requires a library that implements a wide range of cheminformatics algorithms, from partial charge assignment and aromaticity detection, to bond order perception and canonicalization. We detail the implementation of Open Babel, describe key advances in the 2.3 release, and outline a variety of uses both in terms of software products and scientific research, including applications far beyond simple format interconversion. {CONCLUSIONS}: Open Babel presents a solution to the proliferation of multiple chemical file formats. In addition, it provides a variety of useful utilities from conformer searching and {2D} depiction, to filtering, batch conversion, and substructure and similarity searching. For developers, it can be used as a programming library to handle chemical data in areas such as organic chemistry, drug design, materials science, and computational chemistry. It is freely available under an open-source license from http://openbabel.org.}
}
@article{huber_2015,
title = {Orchestrating high-throughput genomic analysis with Bioconductor.},
author = {Huber, Wolfgang and Carey, Vincent J and Gentleman, Robert and Anders, Simon and Carlson, Marc and Carvalho, Benilton S and Bravo, Hector Corrada and Davis, Sean and Gatto, Laurent and Girke, Thomas and Gottardo, Raphael and Hahne, Florian and Hansen, Kasper D and Irizarry, Rafael A and Lawrence, Michael and Love, Michael I and {MacDonald}, James and Obenchain, Valerie and Oleś, Andrzej K and Pagès, Hervé and Reyes, Alejandro and Shannon, Paul and Smyth, Gordon K and Tenenbaum, Dan and Waldron, Levi and Morgan, Martin},
pages = {115-121},
url = {http://dx.doi.org/10.1038/nmeth.3252},
year = {2015},
month = {feb},
urldate = {2017-02-20},
journal = {Nature Methods},
volume = {12},
number = {2},
doi = {10.1038/nmeth.3252},
pmid = {25633503},
pmcid = {PMC4509590},
f1000-projects = {shared citations},
abstract = {Bioconductor is an open-source, open-development software project for the analysis and comprehension of high-throughput data in genomics and molecular biology. The project aims to enable interdisciplinary research, collaboration and rapid development of scientific software. Based on the statistical programming language R, Bioconductor comprises 934 interoperable packages contributed by a large, diverse community of scientists. Packages cover a range of bioinformatic and statistical applications. They undergo formal initial review and continuous automated testing. We present an overview for prospective users and contributors.}
}
@article{fukushima_2009,
title = {Integrated omics approaches in plant systems biology.},
author = {Fukushima, Atsushi and Kusano, Miyako and Redestig, Henning and Arita, Masanori and Saito, Kazuki},
pages = {532-538},
url = {http://dx.doi.org/10.1016/j.cbpa.2009.09.022},
year = {2009},
month = {dec},
urldate = {2019-07-01},
journal = {Current Opinion in Chemical Biology},
volume = {13},
number = {5-6},
doi = {10.1016/j.cbpa.2009.09.022},
pmid = {19837627},
f1000-projects = {shared citations},
abstract = {Understanding cellular metabolic systems is vital not only for determining the function of enzymatic genes, but also for elucidating the coordination among various metabolic pathways. In this context, high-throughput experiments can provide us with essential, albeit only partial information. Integration of metabolite profiling with other multiple 'omics' data (e.g. transcript profiling), is required to reconstruct complex networks that characterize the phenotypes in the cell. Here, we review recent approaches to integrate multiple omics data in higher plants. We especially focus on metabolomics data management, normalization, meta-omics data analysis, and an integrative approach with other omics data. Further prospects for using metabolomics and the key points to be addressed are discussed. This could be a valuable strategy for a systems-level understanding of plant systems.}
}
@article{yu_2014,
title = {Improving peak detection in high-resolution {LC}/{MS} metabolomics data using preexisting knowledge and machine learning approach.},
author = {Yu, Tianwei and Jones, Dean P},
pages = {2941-2948},
url = {http://dx.doi.org/10.1093/bioinformatics/btu430},
year = {2014},
month = {oct},
day = {15},
urldate = {2018-01-15},
journal = {Bioinformatics},
volume = {30},
number = {20},
doi = {10.1093/bioinformatics/btu430},
pmid = {25005748},
pmcid = {PMC4184266},
f1000-projects = {shared citations},
abstract = {{MOTIVATION}: Peak detection is a key step in the preprocessing of untargeted metabolomics data generated from high-resolution liquid chromatography-mass spectrometry ({LC}/{MS}). The common practice is to use filters with predetermined parameters to select peaks in the {LC}/{MS} profile. This rigid approach can cause suboptimal performance when the choice of peak model and parameters do not suit the data characteristics. {RESULTS}: Here we present a method that learns directly from various data features of the extracted ion chromatograms ({EICs}) to differentiate between true peak regions from noise regions in the {LC}/{MS} profile. It utilizes the knowledge of known metabolites, as well as robust machine learning approaches. Unlike currently available methods, this new approach does not assume a parametric peak shape model and allows maximum flexibility. We demonstrate the superiority of the new approach using real data. Because matching to known metabolites entails uncertainties and cannot be considered a gold standard, we also developed a probabilistic receiver-operating characteristic ({pROC}) approach that can incorporate uncertainties. {AVAILABILITY} {AND} {IMPLEMENTATION}: The new peak detection approach is implemented as part of the {apLCMS} package available at http://web1.sph.emory.edu/{apLCMS}/ {CONTACT}: tyu8@emory.edu {SUPPLEMENTARY} {INFORMATION}: Supplementary data are available at Bioinformatics online. \copyright The Author 2014. Published by Oxford University Press. All rights reserved. For Permissions, please e-mail: journals.permissions@oup.com.}
}
@article{strimmer_2008,
title = {fdrtool: a versatile R package for estimating local and tail area-based false discovery rates.},
author = {Strimmer, Korbinian},
pages = {1461-1462},
url = {http://dx.doi.org/10.1093/bioinformatics/btn209},
year = {2008},
month = {jun},
day = {15},
urldate = {2018-01-13},
journal = {Bioinformatics},
volume = {24},
number = {12},
doi = {10.1093/bioinformatics/btn209},
pmid = {18441000},
f1000-projects = {shared citations},
abstract = {{UNLABELLED}: False discovery rate ({FDR}) methodologies are essential in the study of high-dimensional genomic and proteomic data. The R package 'fdrtool' facilitates such analyses by offering a comprehensive set of procedures for {FDR} estimation. Its distinctive features include: (i) many different types of test statistics are allowed as input data, such as P-values, z-scores, correlations and t-scores; (ii) simultaneously, both local {FDR} and tail area-based {FDR} values are estimated for all test statistics and (iii) empirical null models are fit where possible, thereby taking account of potential over- or underdispersion of the theoretical null. In addition, 'fdrtool' provides readily interpretable graphical output, and can be applied to very large scale (in the order of millions of hypotheses) multiple testing problems. Consequently, 'fdrtool' implements a flexible {FDR} estimation scheme that is unified across different test statistics and variants of {FDR}. {AVAILABILITY}: The program is freely available from the Comprehensive R Archive Network (http://cran.r-project.org/) under the terms of the {GNU} General Public License (version 3 or later). {CONTACT}: strimmer@uni-leipzig.de.}
}
@article{saeys_2007,
title = {A review of feature selection techniques in bioinformatics.},
author = {Saeys, Yvan and Inza, Iñaki and Larrañaga, Pedro},
pages = {2507-2517},
url = {http://dx.doi.org/10.1093/bioinformatics/btm344},
year = {2007},
month = {oct},
day = {1},
urldate = {2016-09-21},
journal = {Bioinformatics},
volume = {23},
number = {19},
doi = {10.1093/bioinformatics/btm344},
pmid = {17720704},
f1000-projects = {shared citations},
abstract = {Feature selection techniques have become an apparent need in many bioinformatics applications. In addition to the large pool of techniques that have already been developed in the machine learning and data mining fields, specific applications in bioinformatics have led to a wealth of newly proposed techniques. In this article, we make the interested reader aware of the possibilities of feature selection, providing a basic taxonomy of feature selection techniques, and discussing their use, variety and potential in a number of both common as well as upcoming bioinformatics applications.}
}
@article{kessner_2008,
title = {{ProteoWizard}: open source software for rapid proteomics tools development.},
author = {Kessner, Darren and Chambers, Matt and Burke, Robert and Agus, David and Mallick, Parag},
pages = {2534-2536},
url = {http://dx.doi.org/10.1093/bioinformatics/btn323},
year = {2008},
month = {nov},
day = {1},
urldate = {2019-07-17},
journal = {Bioinformatics},
volume = {24},
number = {21},
doi = {10.1093/bioinformatics/btn323},
pmid = {18606607},
pmcid = {PMC2732273},
f1000-projects = {shared citations},
abstract = {{SUMMARY}: The {ProteoWizard} software project provides a modular and extensible set of open-source, cross-platform tools and libraries. The tools perform proteomics data analyses; the libraries enable rapid tool creation by providing a robust, pluggable development framework that simplifies and unifies data file access, and performs standard proteomics and {LCMS} dataset computations. The library contains readers and writers of the {mzML} data format, which has been written using modern C++ techniques and design principles and supports a variety of platforms with native compilers. The software has been specifically released under the Apache v2 license to ensure it can be used in both academic and commercial projects. In addition to the library, we also introduce a rapidly growing set of companion tools whose implementation helps to illustrate the simplicity of developing applications on top of the {ProteoWizard} library. {AVAILABILITY}: Cross-platform software that compiles using native compilers (i.e. {GCC} on Linux, {MSVC} on Windows and {XCode} on {OSX}) is available for download free of charge, at http://proteowizard.sourceforge.net. This website also provides code examples, and documentation. It is our hope the {ProteoWizard} project will become a standard platform for proteomics development; consequently, code use, contribution and further development are strongly encouraged.}
}
@article{watrous_2012,
title = {Mass spectral molecular networking of living microbial colonies.},
author = {Watrous, Jeramie and Roach, Patrick and Alexandrov, Theodore and Heath, Brandi S and Yang, Jane Y and Kersten, Roland D and van der Voort, Menno and Pogliano, Kit and Gross, Harald and Raaijmakers, Jos M and Moore, Bradley S and Laskin, Julia and Bandeira, Nuno and Dorrestein, Pieter C},
pages = {E1743-52},
url = {http://dx.doi.org/10.1073/pnas.1203689109},
year = {2012},
month = {jun},
day = {26},
urldate = {2019-05-03},
journal = {Proceedings of the National Academy of Sciences of the United States of America},
volume = {109},
number = {26},
doi = {10.1073/pnas.1203689109},
pmid = {22586093},
pmcid = {PMC3387089},
f1000-projects = {shared citations},
abstract = {Integrating the governing chemistry with the genomics and phenotypes of microbial colonies has been a "holy grail" in microbiology. This work describes a highly sensitive, broadly applicable, and cost-effective approach that allows metabolic profiling of live microbial colonies directly from a Petri dish without any sample preparation. Nanospray desorption electrospray ionization mass spectrometry ({MS}), combined with alignment of {MS} data and molecular networking, enabled monitoring of metabolite production from live microbial colonies from diverse bacterial genera, including Bacillus subtilis, Streptomyces coelicolor, Mycobacterium smegmatis, and Pseudomonas aeruginosa. This work demonstrates that, by using these tools to visualize small molecular changes within bacterial interactions, insights can be gained into bacterial developmental processes as a result of the improved organization of {MS}/{MS} data. To validate this experimental platform, metabolic profiling was performed on Pseudomonas sp. {SH}-C52, which protects sugar beet plants from infections by specific soil-borne fungi [R. Mendes et al. (2011) Science 332:1097-1100]. The antifungal effect of strain {SH}-C52 was attributed to thanamycin, a predicted lipopeptide encoded by a nonribosomal peptide synthetase gene cluster. Our technology, in combination with our recently developed peptidogenomics strategy, enabled the detection and partial characterization of thanamycin and showed that it is a monochlorinated lipopeptide that belongs to the syringomycin family of antifungal agents. In conclusion, the platform presented here provides a significant advancement in our ability to understand the spatiotemporal dynamics of metabolite production in live microbial colonies and communities.}
}
@article{langfelder_2008,
title = {{WGCNA}: an R package for weighted correlation network analysis.},
author = {Langfelder, Peter and Horvath, Steve},
pages = {559},
url = {https://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-9-559},
year = {2008},
month = {dec},
day = {29},
urldate = {2016-06-10},
journal = {{BMC} Bioinformatics},
volume = {9},
issn = {1471-2105},
doi = {10.1186/1471-2105-9-559},
pmid = {19114008},
pmcid = {PMC2631488},
f1000-projects = {shared citations},
abstract = {{BACKGROUND}: Correlation networks are increasingly being used in bioinformatics applications. For example, weighted gene co-expression network analysis is a systems biology method for describing the correlation patterns among genes across microarray samples. Weighted correlation network analysis ({WGCNA}) can be used for finding clusters (modules) of highly correlated genes, for summarizing such clusters using the module eigengene or an intramodular hub gene, for relating modules to one another and to external sample traits (using eigengene network methodology), and for calculating module membership measures. Correlation networks facilitate network based gene screening methods that can be used to identify candidate biomarkers or therapeutic targets. These methods have been successfully applied in various biological contexts, e.g. cancer, mouse genetics, yeast genetics, and analysis of brain imaging data. While parts of the correlation network methodology have been described in separate publications, there is a need to provide a user-friendly, comprehensive, and consistent software implementation and an accompanying tutorial. {RESULTS}: The {WGCNA} R software package is a comprehensive collection of R functions for performing various aspects of weighted correlation network analysis. The package includes functions for network construction, module detection, gene selection, calculations of topological properties, data simulation, visualization, and interfacing with external software. Along with the R package we also present R software tutorials. While the methods development was motivated by gene expression data, the underlying data mining approach can be applied to a variety of different settings. {CONCLUSION}: The {WGCNA} package provides R functions for weighted correlation network analysis, e.g. co-expression network analysis of gene expression data. The R package along with its source code and additional material are freely available at http://www.genetics.ucla.edu/labs/horvath/{CoexpressionNetwork}/Rpackages/{WGCNA}.}
}
@article{roccaserra_2010,
title = {{ISA} software suite: supporting standards-compliant experimental annotation and enabling curation at the community level.},
author = {Rocca-Serra, Philippe and Brandizi, Marco and Maguire, Eamonn and Sklyar, Nataliya and Taylor, Chris and Begley, Kimberly and Field, Dawn and Harris, Stephen and Hide, Winston and Hofmann, Oliver and Neumann, Steffen and Sterk, Peter and Tong, Weida and Sansone, Susanna-Assunta},
pages = {2354-2356},
url = {http://dx.doi.org/10.1093/bioinformatics/btq415},
year = {2010},
month = {sep},
day = {15},
urldate = {2019-05-13},
journal = {Bioinformatics},
volume = {26},
number = {18},
issn = {1367-4811},
doi = {10.1093/bioinformatics/btq415},
pmid = {20679334},
pmcid = {PMC2935443},
f1000-projects = {shared citations},
abstract = {{UNLABELLED}: The first open source software suite for experimentalists and curators that (i) assists in the annotation and local management of experimental metadata from high-throughput studies employing one or a combination of omics and other technologies; (ii) empowers users to uptake community-defined checklists and ontologies; and (iii) facilitates submission to international public repositories. {AVAILABILITY} {AND} {IMPLEMENTATION}: Software, documentation, case studies and implementations at http://www.isa-tools.org.}
}
@article{tarca_2009,
title = {A novel signaling pathway impact analysis.},
author = {Tarca, Adi Laurentiu and Draghici, Sorin and Khatri, Purvesh and Hassan, Sonia S and Mittal, Pooja and Kim, Jung-Sun and Kim, Chong Jai and Kusanovic, Juan Pedro and Romero, Roberto},
pages = {75-82},
url = {http://dx.doi.org/10.1093/bioinformatics/btn577},
year = {2009},
month = {jan},
day = {1},
urldate = {2019-05-08},
journal = {Bioinformatics},
volume = {25},
number = {1},
doi = {10.1093/bioinformatics/btn577},
pmid = {18990722},
pmcid = {PMC2732297},
f1000-projects = {shared citations},
abstract = {{MOTIVATION}: Gene expression class comparison studies may identify hundreds or thousands of genes as differentially expressed ({DE}) between sample groups. Gaining biological insight from the result of such experiments can be approached, for instance, by identifying the signaling pathways impacted by the observed changes. Most of the existing pathway analysis methods focus on either the number of {DE} genes observed in a given pathway (enrichment analysis methods), or on the correlation between the pathway genes and the class of the samples (functional class scoring methods). Both approaches treat the pathways as simple sets of genes, disregarding the complex gene interactions that these pathways are built to describe. {RESULTS}: We describe a novel signaling pathway impact analysis ({SPIA}) that combines the evidence obtained from the classical enrichment analysis with a novel type of evidence, which measures the actual perturbation on a given pathway under a given condition. A bootstrap procedure is used to assess the significance of the observed total pathway perturbation. Using simulations we show that the evidence derived from perturbations is independent of the pathway enrichment evidence. This allows us to calculate a global pathway significance P-value, which combines the enrichment and perturbation P-values. We illustrate the capabilities of the novel method on four real datasets. The results obtained on these data show that {SPIA} has better specificity and more sensitivity than several widely used pathway analysis methods. {AVAILABILITY}: {SPIA} was implemented as an R package available at http://vortex.cs.wayne.edu/ontoexpress/}
}
@article{kanehisa_2000,
title = {{KEGG}: kyoto encyclopedia of genes and genomes.},
author = {Kanehisa, M and Goto, S},
pages = {27-30},
url = {http://dx.doi.org/10.1093/nar/28.1.27},
year = {2000},
month = {jan},
day = {1},
urldate = {2019-07-23},
journal = {Nucleic Acids Research},
volume = {28},
number = {1},
doi = {10.1093/nar/28.1.27},
pmid = {10592173},
pmcid = {PMC102409},
f1000-projects = {shared citations},
abstract = {{KEGG} (Kyoto Encyclopedia of Genes and Genomes) is a knowledge base for systematic analysis of gene functions, linking genomic information with higher order functional information. The genomic information is stored in the {GENES} database, which is a collection of gene catalogs for all the completely sequenced genomes and some partial genomes with up-to-date annotation of gene functions. The higher order functional information is stored in the {PATHWAY} database, which contains graphical representations of cellular processes, such as metabolism, membrane transport, signal transduction and cell cycle. The {PATHWAY} database is supplemented by a set of ortholog group tables for the information about conserved subpathways (pathway motifs), which are often encoded by positionally coupled genes on the chromosome and which are especially useful in predicting gene functions. A third database in {KEGG} is {LIGAND} for the information about chemical compounds, enzyme molecules and enzymatic reactions. {KEGG} provides Java graphics tools for browsing genome maps, comparing two genome maps and manipulating expression maps, as well as computational tools for sequence comparison, graph comparison and path computation. The {KEGG} databases are daily updated and made freely available (http://www. genome.ad.jp/kegg/).}
}
@article{xia_2015,
title = {{MetaboAnalyst} 3.0--making metabolomics more meaningful.},
author = {Xia, Jianguo and Sinelnikov, Igor V and Han, Beomsoo and Wishart, David S},
pages = {W251-7},
url = {http://dx.doi.org/10.1093/nar/gkv380},
year = {2015},
month = {jul},
day = {1},
urldate = {2018-01-13},
journal = {Nucleic Acids Research},
volume = {43},
number = {W1},
doi = {10.1093/nar/gkv380},
pmid = {25897128},
pmcid = {PMC4489235},
f1000-projects = {shared citations},
abstract = {{MetaboAnalyst} (www.metaboanalyst.ca) is a web server designed to permit comprehensive metabolomic data analysis, visualization and interpretation. It supports a wide range of complex statistical calculations and high quality graphical rendering functions that require significant computational resources. First introduced in 2009, {MetaboAnalyst} has experienced more than a {50X} growth in user traffic (\textgreater50 000 jobs processed each month). In order to keep up with the rapidly increasing computational demands and a growing number of requests to support translational and systems biology applications, we performed a substantial rewrite and major feature upgrade of the server. The result is {MetaboAnalyst} 3.0. By completely re-implementing the {MetaboAnalyst} suite using the latest web framework technologies, we have been able substantially improve its performance, capacity and user interactivity. Three new modules have also been added including: (i) a module for biomarker analysis based on the calculation of receiver operating characteristic curves; (ii) a module for sample size estimation and power analysis for improved planning of metabolomics studies and (iii) a module to support integrative pathway analysis for both genes and metabolites. In addition, popular features found in existing modules have been significantly enhanced by upgrading the graphical output, expanding the compound libraries and by adding support for more diverse organisms. \copyright The Author(s) 2015. Published by Oxford University Press on behalf of Nucleic Acids Research.}
}
@article{grapov_2015,
title = {{MetaMapR}: pathway independent metabolomic network analysis incorporating unknowns.},
author = {Grapov, Dmitry and Wanichthanarak, Kwanjeera and Fiehn, Oliver},
pages = {2757-2760},
url = {http://dx.doi.org/10.1093/bioinformatics/btv194},
year = {2015},
month = {aug},
day = {15},
urldate = {2018-01-13},
journal = {Bioinformatics},
volume = {31},
number = {16},
doi = {10.1093/bioinformatics/btv194},
pmid = {25847005},
pmcid = {PMC4528626},
f1000-projects = {shared citations},
abstract = {{UNLABELLED}: Metabolic network mapping is a widely used approach for integration of metabolomic experimental results with biological domain knowledge. However, current approaches can be limited by biochemical domain or pathway knowledge which results in sparse disconnected graphs for real world metabolomic experiments. {MetaMapR} integrates enzymatic transformations with metabolite structural similarity, mass spectral similarity and empirical associations to generate richly connected metabolic networks. This open source, web-based or desktop software, written in the R programming language, leverages {KEGG} and {PubChem} databases to derive associations between metabolites even in cases where biochemical domain or molecular annotations are unknown. Network calculation is enhanced through an interface to the Chemical Translation System, which allows metabolite identifier translation between \textgreater200 common biochemical databases. Analysis results are presented as interactive visualizations or can be exported as high-quality graphics and numerical tables which can be imported into common network analysis and visualization tools. {AVAILABILITY} {AND} {IMPLEMENTATION}: Freely available at http://dgrapov.github.io/{MetaMapR}/. Requires R and a modern web browser. Installation instructions, tutorials and application examples are available at http://dgrapov.github.io/{MetaMapR}/. {CONTACT}: ofiehn@ucdavis.edu. \copyright The Author 2015. Published by Oxford University Press. All rights reserved. For Permissions, please e-mail: journals.permissions@oup.com.}
}
@article{buuren_2011,
title = {mice : Multivariate Imputation by Chained Equations {inR}},
author = {Buuren, Stef van and Groothuis-Oudshoorn, Karin},
url = {http://www.jstatsoft.org/v45/i03/},
year = {2011},
urldate = {2019-07-30},
journal = {Journal of statistical software},
volume = {45},
number = {3},
issn = {1548-7660},
doi = {10.18637/jss.v045.i03},
f1000-projects = {shared citations}
}
@article{cuadrosinostroza_2009,
title = {{TargetSearch}--a Bioconductor package for the efficient preprocessing of {GC}-{MS} metabolite profiling data.},
author = {Cuadros-Inostroza, Alvaro and Caldana, Camila and Redestig, Henning and Kusano, Miyako and Lisec, Jan and Peña-Cortés, Hugo and Willmitzer, Lothar and Hannah, Matthew A},
pages = {428},
url = {http://dx.doi.org/10.1186/1471-2105-10-428},
year = {2009},
month = {dec},
day = {16},
urldate = {2018-01-15},
journal = {{BMC} Bioinformatics},
volume = {10},
doi = {10.1186/1471-2105-10-428},
pmid = {20015393},
pmcid = {PMC3087348},
f1000-projects = {shared citations},
abstract = {{BACKGROUND}: Metabolite profiling, the simultaneous quantification of multiple metabolites in an experiment, is becoming increasingly popular, particularly with the rise of systems-level biology. The workhorse in this field is gas-chromatography hyphenated with mass spectrometry ({GC}-{MS}). The high-throughput of this technology coupled with a demand for large experiments has led to data pre-processing, i.e. the quantification of metabolites across samples, becoming a major bottleneck. Existing software has several limitations, including restricted maximum sample size, systematic errors and low flexibility. However, the biggest limitation is that the resulting data usually require extensive hand-curation, which is subjective and can typically take several days to weeks. {RESULTS}: We introduce the {TargetSearch} package, an open source tool which is a flexible and accurate method for pre-processing even very large numbers of {GC}-{MS} samples within hours. We developed a novel strategy to iteratively correct and update retention time indices for searching and identifying metabolites. The package is written in the R programming language with computationally intensive functions written in C for speed and performance. The package includes a graphical user interface to allow easy use by those unfamiliar with R. {CONCLUSIONS}: {TargetSearch} allows fast and accurate data pre-processing for {GC}-{MS} experiments and overcomes the sample number limitations and manual curation requirements of existing software. We validate our method by carrying out an analysis against both a set of known chemical standard mixtures and of a biological experiment. In addition we demonstrate its capabilities and speed by comparing it with other {GC}-{MS} pre-processing tools. We believe this package will greatly ease current bottlenecks and facilitate the analysis of metabolic profiling data.}
}
@article{delivera_2012,
title = {Normalizing and integrating metabolomics data.},
author = {De Livera, Alysha M and Dias, Daniel A and De Souza, David and Rupasinghe, Thusitha and Pyke, James and Tull, Dedreia and Roessner, Ute and {McConville}, Malcolm and Speed, Terence P},
pages = {10768-10776},
url = {http://dx.doi.org/10.1021/ac302748b},
year = {2012},
month = {dec},
day = {18},
urldate = {2018-01-13},
journal = {Analytical Chemistry},
volume = {84},
number = {24},
doi = {10.1021/ac302748b},
pmid = {23150939},
f1000-projects = {shared citations},
abstract = {Metabolomics research often requires the use of multiple analytical platforms, batches of samples, and laboratories, any of which can introduce a component of unwanted variation. In addition, every experiment is subject to within-platform and other experimental variation, which often includes unwanted biological variation. Such variation must be removed in order to focus on the biological information of interest. We present a broadly applicable method for the removal of unwanted variation arising from various sources for the identification of differentially abundant metabolites and, hence, for the systematic integration of data on the same quantities from different sources. We illustrate the versatility and the performance of the approach in four applications, and we show that it has several advantages over the existing normalization methods.}
}
@article{delivera_2015,
title = {Statistical methods for handling unwanted variation in metabolomics data.},
author = {De Livera, Alysha M and Sysi-Aho, Marko and Jacob, Laurent and Gagnon-Bartsch, Johann A and Castillo, Sandra and Simpson, Julie A and Speed, Terence P},
pages = {3606-3615},
url = {http://dx.doi.org/10.1021/ac502439y},
year = {2015},
month = {apr},
day = {7},
urldate = {2018-01-13},
journal = {Analytical Chemistry},
volume = {87},
number = {7},
doi = {10.1021/ac502439y},
pmid = {25692814},
pmcid = {PMC4544854},
f1000-projects = {shared citations},
abstract = {Metabolomics experiments are inevitably subject to a component of unwanted variation, due to factors such as batch effects, long runs of samples, and confounding biological variation. Although the removal of this unwanted variation is a vital step in the analysis of metabolomics data, it is considered a gray area in which there is a recognized need to develop a better understanding of the procedures and statistical methods required to achieve statistically relevant optimal biological outcomes. In this paper, we discuss the causes of unwanted variation in metabolomics experiments, review commonly used metabolomics approaches for handling this unwanted variation, and present a statistical approach for the removal of unwanted variation to obtain normalized metabolomics data. The advantages and performance of the approach relative to several widely used metabolomics normalization approaches are illustrated through two metabolomics studies, and recommendations are provided for choosing and assessing the most suitable normalization method for a given metabolomics experiment. Software for the approach is made freely available.}
}
@article{gu_2013,
title = {{CePa}: an R package for finding significant pathways weighted by multiple network centralities.},
author = {Gu, Zuguang and Wang, Jin},
pages = {658-660},
url = {http://dx.doi.org/10.1093/bioinformatics/btt008},
year = {2013},
month = {mar},
day = {1},
urldate = {2019-05-08},
journal = {Bioinformatics},
volume = {29},
number = {5},
doi = {10.1093/bioinformatics/btt008},
pmid = {23314125},
f1000-projects = {shared citations},
abstract = {{SUMMARY}: {CePa} is an R package aiming to find significant pathways through network topology information. The package has several advantages compared with current pathway enrichment tools. First, pathway node instead of single gene is taken as the basic unit when analysing networks to meet the fact that genes must be constructed into complexes to hold normal functions. Second, multiple network centralities are applied simultaneously to measure importance of nodes from different aspects to make a full view on the biological system. {CePa} extends standard pathway enrichment methods, which include both over-representation analysis procedure and gene-set analysis procedure. {CePa} has been evaluated with high performance on real-world data, and it can provide more information directly related to current biological problems. {AVAILABILITY}: {CePa} is available at the Comprehensive R Archive Network ({CRAN}): http://cran.r-project.org/web/packages/{CePa}/}
}
@article{hughes_2014,
title = {{MSPrep}--summarization, normalization and diagnostics for processing of mass spectrometry-based metabolomic data.},
author = {Hughes, Grant and Cruickshank-Quinn, Charmion and Reisdorph, Richard and Lutz, Sharon and Petrache, Irina and Reisdorph, Nichole and Bowler, Russell and Kechris, Katerina},
pages = {133-134},
url = {http://dx.doi.org/10.1093/bioinformatics/btt589},
year = {2014},
month = {jan},
day = {1},
urldate = {2018-01-13},
journal = {Bioinformatics},
volume = {30},
number = {1},
doi = {10.1093/bioinformatics/btt589},
pmid = {24174567},
pmcid = {PMC3866554},
f1000-projects = {shared citations},
abstract = {{MOTIVATION}: Although R packages exist for the pre-processing of metabolomic data, they currently do not incorporate additional analysis steps of summarization, filtering and normalization of aligned data. We developed the {MSPrep} R package to complement other packages by providing these additional steps, implementing a selection of popular normalization algorithms and generating diagnostics to help guide investigators in their analyses. {AVAILABILITY}: http://www.sourceforge.net/projects/msprep}
}
@article{jauhiainen_2014,
title = {Normalization of metabolomics data with applications to correlation maps.},
author = {Jauhiainen, Alexandra and Madhu, Basetti and Narita, Masako and Narita, Masashi and Griffiths, John and Tavaré, Simon},
pages = {2155-2161},
url = {http://dx.doi.org/10.1093/bioinformatics/btu175},
year = {2014},
month = {aug},
day = {1},
urldate = {2018-01-13},
journal = {Bioinformatics},
volume = {30},
number = {15},
doi = {10.1093/bioinformatics/btu175},
pmid = {24711654},
f1000-projects = {shared citations},
abstract = {{MOTIVATION}: In metabolomics, the goal is to identify and measure the concentrations of different metabolites (small molecules) in a cell or a biological system. The metabolites form an important layer in the complex metabolic network, and the interactions between different metabolites are often of interest. It is crucial to perform proper normalization of metabolomics data, but current methods may not be applicable when estimating interactions in the form of correlations between metabolites. We propose a normalization approach based on a mixed model, with simultaneous estimation of a correlation matrix. We also investigate how the common use of a calibration standard in nuclear magnetic resonance ({NMR}) experiments affects the estimation of correlations. {RESULTS}: We show with both real and simulated data that our proposed normalization method is robust and has good performance when discovering true correlations between metabolites. The standardization of {NMR} data is shown in simulation studies to affect our ability to discover true correlations to a small extent. However, comparing standardized and non-standardized real data does not result in any large differences in correlation estimates. {AVAILABILITY} {AND} {IMPLEMENTATION}: Source code is freely available at https://sourceforge.net/projects/metabnorm/ {CONTACT}: alexandra.jauhiainen@ki.se {SUPPLEMENTARY} {INFORMATION}: Supplementary data are available at Bioinformatics online. \copyright The Author 2014. Published by Oxford University Press. All rights reserved. For Permissions, please e-mail: journals.permissions@oup.com.}
}
@article{kramer_2013,
title = {{rBiopaxParser}--an R package to parse, modify and visualize {BioPAX} data.},
author = {Kramer, Frank and Bayerlová, Michaela and Klemm, Florian and Bleckmann, Annalen and Beissbarth, Tim},
pages = {520-522},
url = {http://dx.doi.org/10.1093/bioinformatics/bts710},
year = {2013},
month = {feb},
day = {15},
urldate = {2019-05-08},
journal = {Bioinformatics},
volume = {29},
number = {4},
doi = {10.1093/bioinformatics/bts710},
pmid = {23274212},
f1000-projects = {shared citations},
abstract = {{MOTIVATION}: Biological pathway data, stored in structured databases, is a useful source of knowledge for a wide range of bioinformatics algorithms and tools. The Biological Pathway Exchange ({BioPAX}) language has been established as a standard to store and annotate pathway information. However, use of these data within statistical analyses can be tedious. On the other hand, the statistical computing environment R has become the standard for bioinformatics analysis of large-scale genomics data. With this package, we hope to enable R users to work with {BioPAX} data and make use of the always increasing amount of biological pathway knowledge within data analysis methods. {RESULTS}: {rBiopaxParser} is a software package that provides a comprehensive set of functions for parsing, viewing and modifying {BioPAX} pathway data within R. These functions enable the user to access and modify specific parts of the {BioPAX} model. Furthermore, it allows to generate and layout regulatory graphs of controlling interactions and to visualize {BioPAX} pathways. {AVAILABILITY}: {rBiopaxParser} is an open-source R package and has been submitted to Bioconductor.}
}
@article{nyamundanda_2013,
title = {{MetSizeR}: selecting the optimal sample size for metabolomic studies using an analysis based approach.},
author = {Nyamundanda, Gift and Gormley, Isobel Claire and Fan, Yue and Gallagher, William M and Brennan, Lorraine},
pages = {338},
url = {http://dx.doi.org/10.1186/1471-2105-14-338},
year = {2013},
month = {nov},
day = {21},
urldate = {2018-01-13},
journal = {{BMC} Bioinformatics},
volume = {14},
doi = {10.1186/1471-2105-14-338},
pmid = {24261687},
pmcid = {PMC4222287},
f1000-projects = {shared citations},
abstract = {{BACKGROUND}: Determining sample sizes for metabolomic experiments is important but due to the complexity of these experiments, there are currently no standard methods for sample size estimation in metabolomics. Since pilot studies are rarely done in metabolomics, currently existing sample size estimation approaches which rely on pilot data can not be applied. {RESULTS}: In this article, an analysis based approach called {MetSizeR} is developed to estimate sample size for metabolomic experiments even when experimental pilot data are not available. The key motivation for {MetSizeR} is that it considers the type of analysis the researcher intends to use for data analysis when estimating sample size. {MetSizeR} uses information about the data analysis technique and prior expert knowledge of the metabolomic experiment to simulate pilot data from a statistical model. Permutation based techniques are then applied to the simulated pilot data to estimate the required sample size. {CONCLUSIONS}: The {MetSizeR} methodology, and a publicly available software package which implements the approach, are illustrated through real metabolomic applications. Sample size estimates, informed by the intended statistical analysis technique, and the associated uncertainty are provided.}
}
@article{redestig_2009,
title = {Compensation for systematic cross-contribution improves normalization of mass spectrometry based metabolomics data.},
author = {Redestig, Henning and Fukushima, Atsushi and Stenlund, Hans and Moritz, Thomas and Arita, Masanori and Saito, Kazuki and Kusano, Miyako},
pages = {7974-7980},
url = {http://dx.doi.org/10.1021/ac901143w},
year = {2009},
month = {oct},
day = {1},
urldate = {2018-01-13},
journal = {Analytical Chemistry},
volume = {81},
number = {19},
doi = {10.1021/ac901143w},
pmid = {19743813},
f1000-projects = {shared citations},
abstract = {Most mass spectrometry based metabolomics studies are semiquantitative and depend on efficient normalization techniques to suppress systematic error. A common approach is to include isotope-labeled internal standards ({ISs}) and then express the estimated metabolite abundances relative to the {IS}. Because of problems such as insufficient chromatographic resolution, however, the analytes may directly influence estimates of the {IS}, a phenomenon known as cross-contribution ({CC}). Normalization using {ISs} that suffer from {CC} effects will cause significant loss of information if the interfering analytes are associated with the studied factors. We present a novel normalization algorithm, which compensates for systematic {CC} effects that can be traced back to a linear association with the experimental design. The proposed method was found to be superior at purifying the signal of interest compared to current normalization methods when applied to two biological data sets and a multicomponent dilution mixture. Our method is applicable to data from randomized and designed experiments that use {ISs} to monitor the systematic error.}
}
@article{stacklies_2007,
title = {{pcaMethods}--a bioconductor package providing {PCA} methods for incomplete data.},
author = {Stacklies, Wolfram and Redestig, Henning and Scholz, Matthias and Walther, Dirk and Selbig, Joachim},
pages = {1164-1167},
url = {http://dx.doi.org/10.1093/bioinformatics/btm069},
year = {2007},
month = {may},
day = {1},
urldate = {2018-01-13},
journal = {Bioinformatics},
volume = {23},
number = {9},
doi = {10.1093/bioinformatics/btm069},
pmid = {17344241},
f1000-projects = {shared citations},
abstract = {{UNLABELLED}: {pcaMethods} is a Bioconductor compliant library for computing principal component analysis ({PCA}) on incomplete data sets. The results can be analyzed directly or used to estimate missing values to enable the use of missing value sensitive statistical methods. The package was mainly developed with microarray and metabolite data sets in mind, but can be applied to any other incomplete data set as well. {AVAILABILITY}: http://www.bioconductor.org}
}
@article{meng_2014,
title = {A multivariate approach to the integration of multi-omics datasets.},
author = {Meng, Chen and Kuster, Bernhard and Culhane, Aedín C and Gholami, Amin Moghaddas},
pages = {162},
url = {http://dx.doi.org/10.1186/1471-2105-15-162},
year = {2014},
month = {may},
day = {29},
urldate = {2018-01-13},
journal = {{BMC} Bioinformatics},
volume = {15},
doi = {10.1186/1471-2105-15-162},
pmid = {24884486},
pmcid = {PMC4053266},
f1000-projects = {shared citations},
abstract = {{BACKGROUND}: To leverage the potential of multi-omics studies, exploratory data analysis methods that provide systematic integration and comparison of multiple layers of omics information are required. We describe multiple co-inertia analysis ({MCIA}), an exploratory data analysis method that identifies co-relationships between multiple high dimensional datasets. Based on a covariance optimization criterion, {MCIA} simultaneously projects several datasets into the same dimensional space, transforming diverse sets of features onto the same scale, to extract the most variant from each dataset and facilitate biological interpretation and pathway analysis. {RESULTS}: We demonstrate integration of multiple layers of information using {MCIA}, applied to two typical "omics" research scenarios. The integration of transcriptome and proteome profiles of cells in the {NCI}-60 cancer cell line panel revealed distinct, complementary features, which together increased the coverage and power of pathway analysis. Our analysis highlighted the importance of the leukemia extravasation signaling pathway in leukemia that was not highly ranked in the analysis of any individual dataset. Secondly, we compared transcriptome profiles of high grade serous ovarian tumors that were obtained, on two different microarray platforms and next generation {RNA}-sequencing, to identify the most informative platform and extract robust biomarkers of molecular subtypes. We discovered that the variance of {RNA}-sequencing data processed using {RPKM} had greater variance than that with {MapSplice} and {RSEM}. We provided novel markers highly associated to tumor molecular subtype combined from four data platforms. {MCIA} is implemented and available in the R/Bioconductor "omicade4" package. {CONCLUSION}: We believe {MCIA} is an attractive method for data integration and visualization of several datasets of multi-omics features observed on the same set of individuals. The method is not dependent on feature annotation, and thus it can extract important features even when there are not present across all datasets. {MCIA} provides simple graphical representations for the identification of relationships between large datasets.}
}
@article{yamamoto_2014,
title = {Statistical hypothesis testing of factor loading in principal component analysis and its application to metabolite set enrichment analysis.},
author = {Yamamoto, Hiroyuki and Fujimori, Tamaki and Sato, Hajime and Ishikawa, Gen and Kami, Kenjiro and Ohashi, Yoshiaki},
pages = {51},
url = {http://dx.doi.org/10.1186/1471-2105-15-51},
year = {2014},
month = {feb},
day = {21},
urldate = {2018-01-13},
journal = {{BMC} Bioinformatics},
volume = {15},
doi = {10.1186/1471-2105-15-51},
pmid = {24555693},
pmcid = {PMC4015128},
f1000-projects = {shared citations},
abstract = {{BACKGROUND}: Principal component analysis ({PCA}) has been widely used to visualize high-dimensional metabolomic data in a two- or three-dimensional subspace. In metabolomics, some metabolites (e.g., the top 10 metabolites) have been subjectively selected when using factor loading in {PCA}, and biological inferences are made for these metabolites. However, this approach may lead to biased biological inferences because these metabolites are not objectively selected with statistical criteria. {RESULTS}: We propose a statistical procedure that selects metabolites with statistical hypothesis testing of the factor loading in {PCA} and makes biological inferences about these significant metabolites with a metabolite set enrichment analysis ({MSEA}). This procedure depends on the fact that the eigenvector in {PCA} for autoscaled data is proportional to the correlation coefficient between the {PC} score and each metabolite level. We applied this approach to two sets of metabolomic data from mouse liver samples: 136 of 282 metabolites in the first case study and 66 of 275 metabolites in the second case study were statistically significant. This result suggests that to set the number of metabolites before the analysis is inappropriate because the number of significant metabolites differs in each study when factor loading is used in {PCA}. Moreover, when an {MSEA} of these significant metabolites was performed, significant metabolic pathways were detected, which were acceptable in terms of previous biological knowledge. {CONCLUSIONS}: It is essential to select metabolites statistically to make unbiased biological inferences from metabolomic data when using factor loading in {PCA}. We propose a statistical procedure to select metabolites with statistical hypothesis testing of the factor loading in {PCA}, and to draw biological inferences about these significant metabolites with {MSEA}. We have developed an R package "mseapca" to facilitate this approach. The "mseapca" package is publicly available at the {CRAN} website.}
}
@article{silva_2014,
title = {{ProbMetab}: an R package for Bayesian probabilistic annotation of {LC}-{MS}-based metabolomics.},
author = {Silva, Ricardo R and Jourdan, Fabien and Salvanha, Diego M and Letisse, Fabien and Jamin, Emilien L and Guidetti-Gonzalez, Simone and Labate, Carlos A and Vêncio, Ricardo Z N},
pages = {1336-1337},
url = {http://dx.doi.org/10.1093/bioinformatics/btu019},
year = {2014},
month = {may},
day = {1},
urldate = {2018-01-13},
journal = {Bioinformatics},
volume = {30},
number = {9},
doi = {10.1093/bioinformatics/btu019},
pmid = {24443383},
pmcid = {PMC3998140},
f1000-projects = {shared citations},
abstract = {We present {ProbMetab}, an R package that promotes substantial improvement in automatic probabilistic liquid chromatography-mass spectrometry-based metabolome annotation. The inference engine core is based on a Bayesian model implemented to (i) allow diverse source of experimental data and metadata to be systematically incorporated into the model with alternative ways to calculate the likelihood function and (ii) allow sensitive selection of biologically meaningful biochemical reaction databases as Dirichlet-categorical prior distribution. Additionally, to ensure result interpretation by system biologists, we display the annotation in a network where observed mass peaks are connected if their candidate metabolites are substrate/product of known biochemical reactions. This graph can be overlaid with other graph-based analysis, such as partial correlation networks, in a visualization scheme exported to Cytoscape, with web and stand-alone versions.}
}
@article{luo_2013,
title = {Pathview: an R/Bioconductor package for pathway-based data integration and visualization.},
author = {Luo, Weijun and Brouwer, Cory},
pages = {1830-1831},
url = {http://dx.doi.org/10.1093/bioinformatics/btt285},
year = {2013},
month = {jul},
day = {15},
urldate = {2017-04-19},
journal = {Bioinformatics},
volume = {29},
number = {14},
doi = {10.1093/bioinformatics/btt285},
pmid = {23740750},
pmcid = {PMC3702256},
f1000-projects = {shared citations},
abstract = {{SUMMARY}: Pathview is a novel tool set for pathway-based data integration and visualization. It maps and renders user data on relevant pathway graphs. Users only need to supply their data and specify the target pathway. Pathview automatically downloads the pathway graph data, parses the data file, maps and integrates user data onto the pathway and renders pathway graphs with the mapped data. Although built as a stand-alone program, Pathview may seamlessly integrate with pathway and functional analysis tools for large-scale and fully automated analysis pipelines. {AVAILABILITY}: The package is freely available under the {GPLv3} license through Bioconductor and R-Forge. It is available at http://bioconductor.org/packages/release/bioc/html/pathview.html and at http://Pathview.r-forge.r-project.org/. {CONTACT}: luo\_weijun@yahoo.com {SUPPLEMENTARY} {INFORMATION}: Supplementary data are available at Bioinformatics online.}
}
@article{tsugawa_2015,
title = {{MS}-{DIAL}: data-independent {MS}/{MS} deconvolution for comprehensive metabolome analysis.},
author = {Tsugawa, Hiroshi and Cajka, Tomas and Kind, Tobias and Ma, Yan and Higgins, Brendan and Ikeda, Kazutaka and Kanazawa, Mitsuhiro and {VanderGheynst}, Jean and Fiehn, Oliver and Arita, Masanori},
pages = {523-526},
url = {http://dx.doi.org/10.1038/nmeth.3393},
year = {2015},
month = {jun},
urldate = {2019-08-10},
journal = {Nature Methods},
volume = {12},
number = {6},
doi = {10.1038/nmeth.3393},
pmid = {25938372},
pmcid = {PMC4449330},
f1000-projects = {shared citations},
abstract = {Data-independent acquisition ({DIA}) in liquid chromatography ({LC}) coupled to tandem mass spectrometry ({MS}/{MS}) provides comprehensive untargeted acquisition of molecular data. We provide an open-source software pipeline, which we call {MS}-{DIAL}, for {DIA}-based identification and quantification of small molecules by mass spectral deconvolution. For a reversed-phase {LC}-{MS}/{MS} analysis of nine algal strains, {MS}-{DIAL} using an enriched {LipidBlast} library identified 1,023 lipid compounds, highlighting the chemotaxonomic relationships between the algal strains.}
}
@book{james_2013,
title = {An Introduction to Statistical Learning},
author = {James, Gareth and Witten, Daniela and Hastie, Trevor and Tibshirani, Robert},
series = {Springer Texts in Statistics},
publisher = {Springer New York},
url = {http://link.springer.com/10.1007/978-1-4614-7138-7},
year = {2013},
urldate = {2018-05-22},
volume = {103},
isbn = {978-1-4614-7137-0},
issn = {1431-{875X}},
doi = {10.1007/978-1-4614-7138-7},
address = {New York, {NY}},
f1000-projects = {shared citations}
}
@article{wishart_2013,
title = {{HMDB} 3.0--The Human Metabolome Database in 2013.},
author = {Wishart, David S and Jewison, Timothy and Guo, An Chi and Wilson, Michael and Knox, Craig and Liu, Yifeng and Djoumbou, Yannick and Mandal, Rupasri and Aziat, Farid and Dong, Edison and Bouatra, Souhaila and Sinelnikov, Igor and Arndt, David and Xia, Jianguo and Liu, Philip and Yallou, Faizath and Bjorndahl, Trent and Perez-Pineiro, Rolando and Eisner, Roman and Allen, Felicity and Neveu, Vanessa and Greiner, Russ and Scalbert, Augustin},
pages = {D801-7},
url = {http://dx.doi.org/10.1093/nar/gks1065},
year = {2013},
month = {jan},
urldate = {2019-07-23},
journal = {Nucleic Acids Research},
volume = {41},
number = {Database issue},
doi = {10.1093/nar/gks1065},
pmid = {23161693},
pmcid = {PMC3531200},
f1000-projects = {shared citations},
abstract = {The Human Metabolome Database ({HMDB}) (www.hmdb.ca) is a resource dedicated to providing scientists with the most current and comprehensive coverage of the human metabolome. Since its first release in 2007, the {HMDB} has been used to facilitate research for nearly 1000 published studies in metabolomics, clinical biochemistry and systems biology. The most recent release of {HMDB} (version 3.0) has been significantly expanded and enhanced over the 2009 release (version 2.0). In particular, the number of annotated metabolite entries has grown from 6500 to more than 40,000 (a 600\% increase). This enormous expansion is a result of the inclusion of both 'detected' metabolites (those with measured concentrations or experimental confirmation of their existence) and 'expected' metabolites (those for which biochemical pathways are known or human intake/exposure is frequent but the compound has yet to be detected in the body). The latest release also has greatly increased the number of metabolites with biofluid or tissue concentration data, the number of compounds with reference spectra and the number of data fields per entry. In addition to this expansion in data quantity, new database visualization tools and new data content have been added or enhanced. These include better spectral viewing tools, more powerful chemical substructure searches, an improved chemical taxonomy and better, more interactive pathway maps. This article describes these enhancements to the {HMDB}, which was previously featured in the 2009 {NAR} Database Issue. (Note to referees, {HMDB} 3.0 will go live on 18 September 2012.).}
}
@article{ernest_2012,
title = {{MetabR}: an R script for linear model analysis of quantitative metabolomic data.},
author = {Ernest, Ben and Gooding, Jessica R and Campagna, Shawn R and Saxton, Arnold M and Voy, Brynn H},
pages = {596},
url = {http://dx.doi.org/10.1186/1756-0500-5-596},
year = {2012},
month = {oct},
day = {30},
urldate = {2018-01-13},
journal = {{BMC} Research Notes},
volume = {5},
doi = {10.1186/1756-0500-5-596},
pmid = {23111096},
pmcid = {PMC3532230},
f1000-projects = {shared citations},
abstract = {{BACKGROUND}: Metabolomics is an emerging high-throughput approach to systems biology, but data analysis tools are lacking compared to other systems level disciplines such as transcriptomics and proteomics. Metabolomic data analysis requires a normalization step to remove systematic effects of confounding variables on metabolite measurements. Current tools may not correctly normalize every metabolite when the relationships between each metabolite quantity and fixed-effect confounding variables are different, or for the effects of random-effect confounding variables. Linear mixed models, an established methodology in the microarray literature, offer a standardized and flexible approach for removing the effects of fixed- and random-effect confounding variables from metabolomic data. {FINDINGS}: Here we present a simple menu-driven program, "{MetabR}", designed to aid researchers with no programming background in statistical analysis of metabolomic data. Written in the open-source statistical programming language R, {MetabR} implements linear mixed models to normalize metabolomic data and analysis of variance ({ANOVA}) to test treatment differences. {MetabR} exports normalized data, checks statistical model assumptions, identifies differentially abundant metabolites, and produces output files to help with data interpretation. Example data are provided to illustrate normalization for common confounding variables and to demonstrate the utility of the {MetabR} program. {CONCLUSIONS}: We developed {MetabR} as a simple and user-friendly tool for implementing linear mixed model-based normalization and statistical analysis of targeted metabolomic data, which helps to fill a lack of available data analysis tools in this field. The program, user guide, example data, and any future news or updates related to the program may be found at http://metabr.r-forge.r-project.org/.}
}
@article{lcao_2009,
title = {{integrOmics}: an R package to unravel relationships between two omics datasets.},
author = {Lê Cao, Kim-Anh and González, Ignacio and Déjean, Sébastien},
pages = {2855-2856},
url = {http://dx.doi.org/10.1093/bioinformatics/btp515},
year = {2009},
month = {nov},
day = {1},
urldate = {2018-01-13},
journal = {Bioinformatics},
volume = {25},
number = {21},
doi = {10.1093/bioinformatics/btp515},
pmid = {19706745},
pmcid = {PMC2781751},
f1000-projects = {shared citations},
abstract = {{MOTIVATION}: With the availability of many 'omics' data, such as transcriptomics, proteomics or metabolomics, the integrative or joint analysis of multiple datasets from different technology platforms is becoming crucial to unravel the relationships between different biological functional levels. However, the development of such an analysis is a major computational and technical challenge as most approaches suffer from high data dimensionality. New methodologies need to be developed and validated. {RESULTS}: {integrOmics} efficiently performs integrative analyses of two types of 'omics' variables that are measured on the same samples. It includes a regularized version of canonical correlation analysis to enlighten correlations between two datasets, and a sparse version of partial least squares ({PLS}) regression that includes simultaneous variable selection in both datasets. The usefulness of both approaches has been demonstrated previously and successfully applied in various integrative studies. {AVAILABILITY}: {integrOmics} is freely available from http://{CRAN}.R-project.org/ or from the web site companion (http://math.univ-toulouse.fr/biostat) that provides full documentation and tutorials. {CONTACT}: k.lecao@uq.edu.au {SUPPLEMENTARY} {INFORMATION}: Supplementary data are available at Bioinformatics online.}
}
@article{luna_2016,
title = {{PaxtoolsR}: pathway analysis in R using Pathway Commons.},
author = {Luna, Augustin and Babur, Özgün and Aksoy, Bülent Arman and Demir, Emek and Sander, Chris},
pages = {1262-1264},
url = {http://dx.doi.org/10.1093/bioinformatics/btv733},
year = {2016},
month = {apr},
day = {15},
urldate = {2018-01-13},
journal = {Bioinformatics},
volume = {32},
number = {8},
doi = {10.1093/bioinformatics/btv733},
pmid = {26685306},
pmcid = {PMC4824129},
f1000-projects = {shared citations},
abstract = {{PURPOSE}: {PaxtoolsR} package enables access to pathway data represented in the {BioPAX} format and made available through the Pathway Commons webservice for users of the R language to aid in advanced pathway analyses. Features include the extraction, merging and validation of pathway data represented in the {BioPAX} format. This package also provides novel pathway datasets and advanced querying features for R users through the Pathway Commons webservice allowing users to query, extract and retrieve data and integrate these data with local {BioPAX} datasets. {AVAILABILITY} {AND} {IMPLEMENTATION}: The {PaxtoolsR} package is compatible with versions of R 3.1.1 (and higher) on Windows, Mac {OS} X and Linux using Bioconductor 3.0 and is available through the Bioconductor R package repository along with source code and a tutorial vignette describing common tasks, such as data visualization and gene set enrichment analysis. Source code and documentation are at http://www.bioconductor.org/packages/paxtoolsr This plugin is free, open-source and licensed under the {LGPL}-3. {CONTACT}: paxtools@cbio.mskcc.org or lunaa@cbio.mskcc.org. \copyright The Author 2015. Published by Oxford University Press.}
}
@article{enot_2008,
title = {Preprocessing, classification modeling and feature selection using flow injection electrospray mass spectrometry metabolite fingerprint data.},
author = {Enot, David P and Lin, Wanchang and Beckmann, Manfred and Parker, David and Overy, David P and Draper, John},
pages = {446-470},
url = {http://dx.doi.org/10.1038/nprot.2007.511},
year = {2008},
urldate = {2019-09-06},
journal = {Nature Protocols},
volume = {3},
number = {3},
doi = {10.1038/nprot.2007.511},
pmid = {18323816},
f1000-projects = {shared citations},
abstract = {Metabolome analysis by flow injection electrospray mass spectrometry ({FIE}-{MS}) fingerprinting generates measurements relating to large numbers of m/z signals. Such data sets often exhibit high variance with a paucity of replicates, thus providing a challenge for data mining. We describe data preprocessing and modeling methods that have proved reliable in projects involving samples from a range of organisms. The protocols interact with software resources specifically for metabolomics provided in a Web-accessible data analysis package {FIEmspro} (http://users.aber.ac.uk/jhd) written in the R environment and requiring a moderate knowledge of R command-line usage. Specific emphasis is placed on describing the outcome of modeling experiments using {FIE}-{MS} data that require further preprocessing to improve quality. The salient features of both poor and robust (i.e., highly generalizable) multivariate models are outlined together with advice on validating classifiers and avoiding false discovery when seeking explanatory variables.}
}
@article{schaefer_2009,
title = {{PID}: the pathway interaction database.},
author = {Schaefer, Carl F and Anthony, Kira and Krupa, Shiva and Buchoff, Jeffrey and Day, Matthew and Hannay, Timo and Buetow, Kenneth H},
pages = {D674-9},
url = {http://dx.doi.org/10.1093/nar/gkn653},
year = {2009},
month = {jan},
urldate = {2019-05-03},
journal = {Nucleic Acids Research},
volume = {37},
number = {Database issue},
doi = {10.1093/nar/gkn653},
pmid = {18832364},
pmcid = {PMC2686461},
f1000-projects = {shared citations},
abstract = {The Pathway Interaction Database ({PID}, http://pid.nci.nih.gov) is a freely available collection of curated and peer-reviewed pathways composed of human molecular signaling and regulatory events and key cellular processes. Created in a collaboration between the {US} National Cancer Institute and Nature Publishing Group, the database serves as a research tool for the cancer research community and others interested in cellular pathways, such as neuroscientists, developmental biologists and immunologists. {PID} offers a range of search features to facilitate pathway exploration. Users can browse the predefined set of pathways or create interaction network maps centered on a single molecule or cellular process of interest. In addition, the batch query tool allows users to upload long list(s) of molecules, such as those derived from microarray experiments, and either overlay these molecules onto predefined pathways or visualize the complete molecular connectivity map. Users can also download molecule lists, citation lists and complete database content in extensible markup language ({XML}) and Biological Pathways Exchange ({BioPAX}) Level 2 format. The database is updated with new pathway content every month and supplemented by specially commissioned articles on the practical uses of other relevant online tools.}
}
@article{wachter_2015,
title = {{pwOmics}: an R package for pathway-based integration of time-series omics data using public database knowledge.},
author = {Wachter, Astrid and Bei\ssbarth, Tim},
pages = {3072-3074},
url = {http://dx.doi.org/10.1093/bioinformatics/btv323},
year = {2015},
month = {sep},
day = {15},
urldate = {2018-01-13},
journal = {Bioinformatics},
volume = {31},
number = {18},
doi = {10.1093/bioinformatics/btv323},
pmid = {26002883},
f1000-projects = {shared citations},
abstract = {{UNLABELLED}: Characterization of biological processes is progressively enabled with the increased generation of omics data on different signaling levels. Here we present a straightforward approach for the integrative analysis of data from different high-throughput technologies based on pathway and interaction models from public databases. {pwOmics} performs pathway-based level-specific data comparison of coupled human proteomic and genomic/transcriptomic datasets based on their log fold changes. Separate downstream and upstream analyses results on the functional levels of pathways, transcription factors and genes/transcripts are performed in the cross-platform consensus analysis. These provide a basis for the combined interpretation of regulatory effects over time. Via network reconstruction and inference methods (Steiner tree, dynamic Bayesian network inference) consensus graphical networks can be generated for further analyses and visualization. {AVAILABILITY} {AND} {IMPLEMENTATION}: The R package {pwOmics} is freely available on Bioconductor (http://www.bioconductor.org/). {CONTACT}: astrid.wachter@med.uni-goettingen.de. \copyright The Author 2015. Published by Oxford University Press. All rights reserved. For Permissions, please e-mail: journals.permissions@oup.com.}
}
@article{saghatelian_2004,
title = {Assignment of endogenous substrates to enzymes by global metabolite profiling.},
author = {Saghatelian, Alan and Trauger, Sunia A and Want, Elizabeth J and Hawkins, Edward G and Siuzdak, Gary and Cravatt, Benjamin F},
pages = {14332-14339},
url = {http://dx.doi.org/10.1021/bi0480335},
year = {2004},
month = {nov},
day = {16},
urldate = {2018-01-13},
journal = {Biochemistry},
volume = {43},
number = {45},
issn = {0006-2960},
doi = {10.1021/bi0480335},
pmid = {15533037},
f1000-projects = {shared citations},
abstract = {Enzymes regulate biological processes through the conversion of specific substrates to products. Therefore, of fundamental interest for every enzyme is the elucidation of its natural substrates. Here, we describe a general strategy for identifying endogenous substrates of enzymes by untargeted liquid chromatography-mass spectrometry ({LC}-{MS}) analysis of tissue metabolomes from wild-type and enzyme-inactivated organisms. We use this method to discover several brain lipids regulated by the mammalian enzyme fatty acid amide hydrolase ({FAAH}) in vivo, including known signaling molecules (e.g., the endogenous cannabinoid anandamide) and a novel family of nervous system-enriched natural products, the taurine-conjugated fatty acids. Remarkably, the relative hydrolytic activity that {FAAH} exhibited for lipid metabolites in vitro was not predictive of the identity of specific {FAAH} substrates in vivo. Thus, global metabolite profiling establishes unanticipated connections between the proteome and metabolome that enable assignment of an enzyme's unique biochemical functions in vivo.}
}
@article{lim_2010,
title = {{T3DB}: a comprehensively annotated database of common toxins and their targets.},
author = {Lim, Emilia and Pon, Allison and Djoumbou, Yannick and Knox, Craig and Shrivastava, Savita and Guo, An Chi and Neveu, Vanessa and Wishart, David S},
pages = {D781-6},
url = {http://dx.doi.org/10.1093/nar/gkp934},
year = {2010},
month = {jan},
urldate = {2019-07-23},
journal = {Nucleic Acids Research},
volume = {38},
number = {Database issue},
doi = {10.1093/nar/gkp934},
pmid = {19897546},
pmcid = {PMC2808899},
f1000-projects = {shared citations},
abstract = {In an effort to capture meaningful biological, chemical and mechanistic information about clinically relevant, commonly encountered or important toxins, we have developed the Toxin and Toxin-Target Database ({T3DB}). The {T3DB} is a unique bioinformatics resource that compiles comprehensive information about common or ubiquitous toxins and their toxin-targets into a single electronic repository. The database currently contains over 2900 small molecule and peptide toxins, 1300 toxin-targets and more than 33,000 toxin-target associations. Each {T3DB} record ({ToxCard}) contains over 80 data fields providing detailed information on chemical properties and descriptors, toxicity values, protein and gene sequences (for both targets and toxins), molecular and cellular interaction data, toxicological data, mechanistic information and references. This information has been manually extracted and manually verified from numerous sources, including other electronic databases, government documents, textbooks and scientific journals. A key focus of the {T3DB} is on providing 'depth' over 'breadth' with detailed descriptions, mechanisms of action, and information on toxins and toxin-targets. {T3DB} is fully searchable and supports extensive text, sequence, chemical structure and relational query searches, similar to those found in the Human Metabolome Database ({HMDB}) and {DrugBank}. Potential applications of the {T3DB} include clinical metabolomics, toxin target prediction, toxicity prediction and toxicology education. The {T3DB} is available online at http://www.t3db.org.}
}
@article{smith_2006,
title = {{XCMS}: processing mass spectrometry data for metabolite profiling using nonlinear peak alignment, matching, and identification.},
author = {Smith, Colin A and Want, Elizabeth J and O'Maille, Grace and Abagyan, Ruben and Siuzdak, Gary},
pages = {779-787},
url = {http://dx.doi.org/10.1021/ac051437y},
year = {2006},
month = {feb},
day = {1},
urldate = {2018-01-13},
journal = {Analytical Chemistry},
volume = {78},
number = {3},
doi = {10.1021/ac051437y},
pmid = {16448051},
f1000-projects = {shared citations},
abstract = {Metabolite profiling in biomarker discovery, enzyme substrate assignment, drug activity/specificity determination, and basic metabolic research requires new data preprocessing approaches to correlate specific metabolites to their biological origin. Here we introduce an {LC}/{MS}-based data analysis approach, {XCMS}, which incorporates novel nonlinear retention time alignment, matched filtration, peak detection, and peak matching. Without using internal standards, the method dynamically identifies hundreds of endogenous metabolites for use as standards, calculating a nonlinear retention time correction profile for each sample. Following retention time correction, the relative metabolite ion intensities are directly compared to identify changes in specific endogenous metabolites, such as potential biomarkers. The software is demonstrated using data sets from a previously reported enzyme knockout study and a large-scale study of plasma samples. {XCMS} is freely available under an open-source license at http://metlin.scripps.edu/download/.}
}
@article{scheltema_2011,
title = {{PeakML}/{mzMatch}: a file format, Java library, R library, and tool-chain for mass spectrometry data analysis.},
author = {Scheltema, Richard A and Jankevics, Andris and Jansen, Ritsert C and Swertz, Morris A and Breitling, Rainer},
pages = {2786-2793},
url = {http://dx.doi.org/10.1021/ac2000994},
year = {2011},
month = {apr},
day = {1},
urldate = {2018-01-13},
journal = {Analytical Chemistry},
volume = {83},
number = {7},
doi = {10.1021/ac2000994},
pmid = {21401061},
f1000-projects = {shared citations},
abstract = {The recent proliferation of high-resolution mass spectrometers has generated a wealth of new data analysis methods. However, flexible integration of these methods into configurations best suited to the research question is hampered by heterogeneous file formats and monolithic software development. The {mzXML}, {mzData}, and {mzML} file formats have enabled uniform access to unprocessed raw data. In this paper we present our efforts to produce an equally simple and powerful format, {PeakML}, to uniformly exchange processed intermediary and result data. To demonstrate the versatility of {PeakML}, we have developed an open source Java toolkit for processing, filtering, and annotating mass spectra in a customizable pipeline ({mzMatch}), as well as a user-friendly data visualization environment ({PeakML} Viewer). The {PeakML} format in particular enables the flexible exchange of processed data between software created by different groups or companies, as we illustrate by providing a {PeakML}-based integration of the widely used {XCMS} package with {mzMatch} data processing tools. As an added advantage, downstream analysis can benefit from direct access to the full mass trace information underlying summarized mass spectrometry results, providing the user with the means to rapidly verify results. The {PeakML}/{mzMatch} software is freely available at http://mzmatch.sourceforge.net, with documentation, tutorials, and a community forum.}
}
@article{chambers_2012,
title = {A cross-platform toolkit for mass spectrometry and proteomics.},
author = {Chambers, Matthew C and Maclean, Brendan and Burke, Robert and Amodei, Dario and Ruderman, Daniel L and Neumann, Steffen and Gatto, Laurent and Fischer, Bernd and Pratt, Brian and Egertson, Jarrett and Hoff, Katherine and Kessner, Darren and Tasman, Natalie and Shulman, Nicholas and Frewen, Barbara and Baker, Tahmina A and Brusniak, Mi-Youn and Paulse, Christopher and Creasy, David and Flashner, Lisa and Kani, Kian and Moulding, Chris and Seymour, Sean L and Nuwaysir, Lydia M and Lefebvre, Brent and Kuhlmann, Frank and Roark, Joe and Rainer, Paape and Detlev, Suckau and Hemenway, Tina and Huhmer, Andreas and Langridge, James and Connolly, Brian and Chadick, Trey and Holly, Krisztina and Eckels, Josh and Deutsch, Eric W and Moritz, Robert L and Katz, Jonathan E and Agus, David B and {MacCoss}, Michael and Tabb, David L and Mallick, Parag},
pages = {918-920},
url = {http://www.nature.com/doifinder/10.1038/nbt.2377},
year = {2012},
month = {oct},
urldate = {2018-01-13},
journal = {Nature Biotechnology},
volume = {30},
number = {10},
issn = {1087-0156},
doi = {10.1038/nbt.2377},
pmid = {23051804},
pmcid = {PMC3471674},
f1000-projects = {shared citations}
}
@article{kuhl_2012,
title = {{CAMERA}: an integrated strategy for compound spectra extraction and annotation of liquid chromatography/mass spectrometry data sets.},
author = {Kuhl, Carsten and Tautenhahn, Ralf and Böttcher, Christoph and Larson, Tony R and Neumann, Steffen},
pages = {283-289},
url = {http://dx.doi.org/10.1021/ac202450g},
year = {2012},
month = {jan},
day = {3},
urldate = {2018-01-13},
journal = {Analytical Chemistry},
volume = {84},
number = {1},
doi = {10.1021/ac202450g},
pmid = {22111785},
pmcid = {PMC3658281},
f1000-projects = {shared citations},
abstract = {Liquid chromatography coupled to mass spectrometry is routinely used for metabolomics experiments. In contrast to the fairly routine and automated data acquisition steps, subsequent compound annotation and identification require extensive manual analysis and thus form a major bottleneck in data interpretation. Here we present {CAMERA}, a Bioconductor package integrating algorithms to extract compound spectra, annotate isotope and adduct peaks, and propose the accurate compound mass even in highly complex data. To evaluate the algorithms, we compared the annotation of {CAMERA} against a manually defined annotation for a mixture of known compounds spiked into a complex matrix at different concentrations. {CAMERA} successfully extracted accurate masses for 89.7\% and 90.3\% of the annotatable compounds in positive and negative ion modes, respectively. Furthermore, we present a novel annotation approach that combines spectral information of data acquired in opposite ion modes to further improve the annotation rate. We demonstrate the utility of {CAMERA} in two different, easily adoptable plant metabolomics experiments, where the application of {CAMERA} drastically reduced the amount of manual analysis. \copyright 2011 American Chemical Society}
}
@article{wilkinson_2016,
title = {The {FAIR} Guiding Principles for scientific data management and stewardship.},
author = {Wilkinson, Mark D and Dumontier, Michel and Aalbersberg, I Jsbrand Jan and Appleton, Gabrielle and Axton, Myles and Baak, Arie and Blomberg, Niklas and Boiten, Jan-Willem and da Silva Santos, Luiz Bonino and Bourne, Philip E and Bouwman, Jildau and Brookes, Anthony J and Clark, Tim and Crosas, Mercè and Dillo, Ingrid and Dumon, Olivier and Edmunds, Scott and Evelo, Chris T and Finkers, Richard and Gonzalez-Beltran, Alejandra and Gray, Alasdair J G and Groth, Paul and Goble, Carole and Grethe, Jeffrey S and Heringa, Jaap and 't Hoen, Peter A C and Hooft, Rob and Kuhn, Tobias and Kok, Ruben and Kok, Joost and Lusher, Scott J and Martone, Maryann E and Mons, Albert and Packer, Abel L and Persson, Bengt and Rocca-Serra, Philippe and Roos, Marco and van Schaik, Rene and Sansone, Susanna-Assunta and Schultes, Erik and Sengstag, Thierry and Slater, Ted and Strawn, George and Swertz, Morris A and Thompson, Mark and van der Lei, Johan and van Mulligen, Erik and Velterop, Jan and Waagmeester, Andra and Wittenburg, Peter and Wolstencroft, Katherine and Zhao, Jun and Mons, Barend},
pages = {160018},
url = {http://www.nature.com/articles/sdata201618},
year = {2016},
month = {mar},
day = {15},
urldate = {2018-07-13},
journal = {Scientific data},
volume = {3},
issn = {2052-4463},
doi = {10.1038/sdata.2016.18},
pmid = {26978244},
pmcid = {PMC4792175},
f1000-projects = {shared citations},
abstract = {There is an urgent need to improve the infrastructure supporting the reuse of scholarly data. A diverse set of stakeholders-representing academia, industry, funding agencies, and scholarly publishers-have come together to design and jointly endorse a concise and measureable set of principles that we refer to as the {FAIR} Data Principles. The intent is that these may act as a guideline for those wishing to enhance the reusability of their data holdings. Distinct from peer initiatives that focus on the human scholar, the {FAIR} Principles put specific emphasis on enhancing the ability of machines to automatically find and use the data, in addition to supporting its reuse by individuals. This Comment is the first formal publication of the {FAIR} Principles, and includes the rationale behind them, and some exemplar implementations in the community.}
}
@article{kim_2016,
title = {{PubChem} Substance and Compound databases.},
author = {Kim, Sunghwan and Thiessen, Paul A and Bolton, Evan E and Chen, Jie and Fu, Gang and Gindulyte, Asta and Han, Lianyi and He, Jane and He, Siqian and Shoemaker, Benjamin A and Wang, Jiyao and Yu, Bo and Zhang, Jian and Bryant, Stephen H},
pages = {D1202-13},
url = {http://dx.doi.org/10.1093/nar/gkv951},
year = {2016},
month = {jan},
day = {4},
urldate = {2018-01-29},
journal = {Nucleic Acids Research},
volume = {44},
number = {D1},
doi = {10.1093/nar/gkv951},
pmid = {26400175},
pmcid = {PMC4702940},
f1000-projects = {shared citations},
abstract = {{PubChem} (https://pubchem.ncbi.nlm.nih.gov) is a public repository for information on chemical substances and their biological activities, launched in 2004 as a component of the Molecular Libraries Roadmap Initiatives of the {US} National Institutes of Health ({NIH}). For the past 11 years, {PubChem} has grown to a sizable system, serving as a chemical information resource for the scientific research community. {PubChem} consists of three inter-linked databases, Substance, Compound and {BioAssay}. The Substance database contains chemical information deposited by individual data contributors to {PubChem}, and the Compound database stores unique chemical structures extracted from the Substance database. Biological activity data of chemical substances tested in assay experiments are contained in the {BioAssay} database. This paper provides an overview of the {PubChem} Substance and Compound databases, including data sources and contents, data organization, data submission using {PubChem} Upload, chemical structure standardization, web-based interfaces for textual and non-textual searches, and programmatic access. It also gives a brief description of {PubChem3D}, a resource derived from theoretical three-dimensional structures of compounds in {PubChem}, as well as {PubChemRDF}, Resource Description Framework ({RDF})-formatted {PubChem} data for data sharing, analysis and integration with information contained in other databases. Published by Oxford University Press on behalf of Nucleic Acids Research 2015. This work is written by (a) {US} Government employee(s) and is in the public domain in the {US}.}
}
@article{collberg_2016,
title = {Repeatability in computer systems research},
author = {Collberg, Christian and Proebsting, Todd A.},
pages = {62-69},
url = {http://dl.acm.org/citation.cfm?doid=2897191.2812803},
year = {2016},
month = {feb},
day = {25},
urldate = {2019-05-13},
journal = {Communications of the {ACM}},
volume = {59},
number = {3},
issn = {00010782},
doi = {10.1145/2812803},
f1000-projects = {shared citations},
abstract = {To encourage repeatable research, fund repeatability engineering and reward commitments to sharing research artifacts.}
}
@article{zhang_2009,
title = {{KEGGgraph}: a graph approach to {KEGG} {PATHWAY} in R and bioconductor.},
author = {Zhang, Jitao David and Wiemann, Stefan},
pages = {1470-1471},
url = {http://dx.doi.org/10.1093/bioinformatics/btp167},
year = {2009},
month = {jun},
day = {1},
urldate = {2019-05-09},
journal = {Bioinformatics},
volume = {25},
number = {11},
doi = {10.1093/bioinformatics/btp167},
pmid = {19307239},
pmcid = {PMC2682514},
f1000-projects = {shared citations},
abstract = {{MOTIVATION}: {KEGG} {PATHWAY} is a service of Kyoto Encyclopedia of Genes and Genomes ({KEGG}), constructing manually curated pathway maps that represent current knowledge on biological networks in graph models. While valuable graph tools have been implemented in R/Bioconductor, to our knowledge there is currently no software package to parse and analyze {KEGG} pathways with graph theory. {RESULTS}: We introduce the software package {KEGGgraph} in R and Bioconductor, an interface between {KEGG} pathways and graph models as well as a collection of tools for these graphs. Superior to existing approaches, {KEGGgraph} captures the pathway topology and allows further analysis or dissection of pathway graphs. We demonstrate the use of the package by the case study of analyzing human pancreatic cancer pathway. {AVAILABILITY}: {KEGGgraph} is freely available at the Bioconductor web site (http://www.bioconductor.org). {KGML} files can be downloaded from {KEGG} {FTP} site (ftp://ftp.genome.jp/pub/kegg/xml).}
}
@article{lewis_2009,
title = {{rNMR}: open source software for identifying and quantifying metabolites in {NMR} spectra.},
author = {Lewis, Ian A and Schommer, Seth C and Markley, John L},
pages = {S123-6},
url = {http://dx.doi.org/10.1002/mrc.2526},
year = {2009},
month = {dec},
urldate = {2018-01-15},
journal = {Magnetic Resonance in Chemistry},
volume = {47 Suppl 1},
doi = {10.1002/mrc.2526},
pmid = {19821464},
pmcid = {PMC2798074},
f1000-projects = {shared citations},
abstract = {Despite the extensive use of nuclear magnetic resonance ({NMR}) for metabolomics, no publicly available tools have been designed for identifying and quantifying metabolites across multiple spectra. We introduce here a new open source software tool, {rNMR}, which provides a simple graphics-based method for visualizing, identifying, and quantifying metabolites across multiple one- or two-dimensional {NMR} spectra. {rNMR} differs from existing software tools for {NMR} spectroscopy in that analyses are based on regions of interest ({ROIs}) rather than peak lists. {ROIs} contain all of the underlying {NMR} data within user-defined chemical shift ranges. {ROIs} can be inspected visually, and they support robust quantification of {NMR} signals. {ROI}-based analyses support simultaneous views of metabolite signals from up to hundreds of spectra, and {ROI} boundaries can be adjusted dynamically to ensure that signals corresponding to assigned atoms are analyzed consistently throughout the dataset. We describe how {rNMR} greatly reduces the time required for robust bioanalytical analysis of complex {NMR} data. An {rNMR} analysis yields a compact and transparent way of archiving the results from a metabolomics study so that it can be examined and evaluated by others. The {rNMR} website at http://rnmr.nmrfam.wisc.edu offers downloadable versions of {rNMR} for Windows, Macintosh, and Linux platforms along with extensive help documentation, instructional videos, and sample data.}
}
@article{wishart_2009,
title = {{HMDB}: a knowledgebase for the human metabolome.},
author = {Wishart, David S and Knox, Craig and Guo, An Chi and Eisner, Roman and Young, Nelson and Gautam, Bijaya and Hau, David D and Psychogios, Nick and Dong, Edison and Bouatra, Souhaila and Mandal, Rupasri and Sinelnikov, Igor and Xia, Jianguo and Jia, Leslie and Cruz, Joseph A and Lim, Emilia and Sobsey, Constance A and Shrivastava, Savita and Huang, Paul and Liu, Philip and Fang, Lydia and Peng, Jun and Fradette, Ryan and Cheng, Dean and Tzur, Dan and Clements, Melisa and Lewis, Avalyn and De Souza, Andrea and Zuniga, Azaret and Dawe, Margot and Xiong, Yeping and Clive, Derrick and Greiner, Russ and Nazyrova, Alsu and Shaykhutdinov, Rustem and Li, Liang and Vogel, Hans J and Forsythe, Ian},
pages = {D603-10},
url = {http://dx.doi.org/10.1093/nar/gkn810},
year = {2009},
month = {jan},
urldate = {2019-06-11},
journal = {Nucleic Acids Research},
volume = {37},
number = {Database issue},
doi = {10.1093/nar/gkn810},
pmid = {18953024},
pmcid = {PMC2686599},
f1000-projects = {shared citations},
abstract = {The Human Metabolome Database ({HMDB}, http://www.hmdb.ca) is a richly annotated resource that is designed to address the broad needs of biochemists, clinical chemists, physicians, medical geneticists, nutritionists and members of the metabolomics community. Since its first release in 2007, the {HMDB} has been used to facilitate the research for nearly 100 published studies in metabolomics, clinical biochemistry and systems biology. The most recent release of {HMDB} (version 2.0) has been significantly expanded and enhanced over the previous release (version 1.0). In particular, the number of fully annotated metabolite entries has grown from 2180 to more than 6800 (a 300\% increase), while the number of metabolites with biofluid or tissue concentration data has grown by a factor of five (from 883 to 4413). Similarly, the number of purified compounds with reference to {NMR}, {LC}-{MS} and {GC}-{MS} spectra has more than doubled (from 380 to more than 790 compounds). In addition to this significant expansion in database size, many new database searching tools and new data content has been added or enhanced. These include better algorithms for spectral searching and matching, more powerful chemical substructure searches, faster text searching software, as well as dedicated pathway searching tools and customized, clickable metabolic maps. Changes to the user-interface have also been implemented to accommodate future expansion and to make database navigation much easier. These improvements should make the {HMDB} much more useful to a much wider community of users.}
}
@article{sales_2012,
title = {graphite - a Bioconductor package to convert pathway topology to gene network.},
author = {Sales, Gabriele and Calura, Enrica and Cavalieri, Duccio and Romualdi, Chiara},
pages = {20},
url = {http://dx.doi.org/10.1186/1471-2105-13-20},
year = {2012},
month = {jan},
day = {31},
urldate = {2019-05-09},
journal = {{BMC} Bioinformatics},
volume = {13},
doi = {10.1186/1471-2105-13-20},
pmid = {22292714},
pmcid = {PMC3296647},
f1000-projects = {shared citations},
abstract = {{BACKGROUND}: Gene set analysis is moving towards considering pathway topology as a crucial feature. Pathway elements are complex entities such as protein complexes, gene family members and chemical compounds. The conversion of pathway topology to a gene/protein networks (where nodes are a simple element like a gene/protein) is a critical and challenging task that enables topology-based gene set analyses.Unfortunately, currently available R/Bioconductor packages provide pathway networks only from single databases. They do not propagate signals through chemical compounds and do not differentiate between complexes and gene families. {RESULTS}: Here we present graphite, a Bioconductor package addressing these issues. Pathway information from four different databases is interpreted following specific biologically-driven rules that allow the reconstruction of gene-gene networks taking into account protein complexes, gene families and sensibly removing chemical compounds from the final graphs. The resulting networks represent a uniform resource for pathway analyses. Indeed, graphite provides easy access to three recently proposed topological methods. The graphite package is available as part of the Bioconductor software suite. {CONCLUSIONS}: graphite is an innovative package able to gather and make easily available the contents of the four major pathway databases. In the field of topological analysis graphite acts as a provider of biological information by reducing the pathway complexity considering the biological meaning of the pathway elements.}
}
@article{cao_2008,
title = {{ChemmineR}: a compound mining framework for R.},
author = {Cao, Yiqun and Charisi, Anna and Cheng, Li-Chang and Jiang, Tao and Girke, Thomas},
pages = {1733-1734},
url = {http://dx.doi.org/10.1093/bioinformatics/btn307},
year = {2008},
month = {aug},
day = {1},
urldate = {2018-01-13},
journal = {Bioinformatics},
volume = {24},
number = {15},
doi = {10.1093/bioinformatics/btn307},
pmid = {18596077},
pmcid = {PMC2638865},
f1000-projects = {shared citations},
abstract = {{MOTIVATION}: Software applications for structural similarity searching and clustering of small molecules play an important role in drug discovery and chemical genomics. Here, we present the first open-source compound mining framework for the popular statistical programming environment R. The integration with a powerful statistical environment maximizes the flexibility, expandability and programmability of the provided analysis functions. {RESULTS}: We discuss the algorithms and compound mining utilities provided by the R package {ChemmineR}. It contains functions for structural similarity searching, clustering of compound libraries with a wide spectrum of classification algorithms and various utilities for managing complex compound data. It also offers a wide range of visualization functions for compound clusters and chemical structures. The package is well integrated with the online {ChemMine} environment and allows bidirectional communications between the two services. {AVAILABILITY}: {ChemmineR} is freely available as an R package from the {ChemMine} project site: http://bioweb.ucr.edu/{ChemMineV2}/chemminer}
}
@article{heller_2015,
title = {Inchi, the {IUPAC} international chemical identifier.},
author = {Heller, Stephen R and {McNaught}, Alan and Pletnev, Igor and Stein, Stephen and Tchekhovskoi, Dmitrii},
pages = {23},
url = {http://dx.doi.org/10.1186/s13321-015-0068-4},
year = {2015},
month = {may},
day = {30},
urldate = {2018-01-29},
journal = {Journal of cheminformatics},
volume = {7},
doi = {10.1186/s13321-015-0068-4},
pmid = {26136848},
pmcid = {PMC4486400},
f1000-projects = {shared citations},
abstract = {This paper documents the design, layout and algorithms of the {IUPAC} International Chemical Identifier, {InChI}.}
}
@article{jacob_2016,
title = {Correcting gene expression data when neither the unwanted variation nor the factor of interest are observed.},
author = {Jacob, Laurent and Gagnon-Bartsch, Johann A and Speed, Terence P},
pages = {16-28},
url = {http://dx.doi.org/10.1093/biostatistics/kxv026},
year = {2016},
month = {jan},
urldate = {2017-05-28},
journal = {Biostatistics},
volume = {17},
number = {1},
doi = {10.1093/biostatistics/kxv026},
pmid = {26286812},
pmcid = {PMC4679071},
f1000-projects = {shared citations},
abstract = {When dealing with large scale gene expression studies, observations are commonly contaminated by sources of unwanted variation such as platforms or batches. Not taking this unwanted variation into account when analyzing the data can lead to spurious associations and to missing important signals. When the analysis is unsupervised, e.g. when the goal is to cluster the samples or to build a corrected version of the dataset--as opposed to the study of an observed factor of interest--taking unwanted variation into account can become a difficult task. The factors driving unwanted variation may be correlated with the unobserved factor of interest, so that correcting for the former can remove the latter if not done carefully. We show how negative control genes and replicate samples can be used to estimate unwanted variation in gene expression, and discuss how this information can be used to correct the expression data. The proposed methods are then evaluated on synthetic data and three gene expression datasets. They generally manage to remove unwanted variation without losing the signal of interest and compare favorably to state-of-the-art corrections. All proposed methods are implemented in the bioconductor package {RUVnormalize}. \copyright The Author 2015. Published by Oxford University Press.}
}
@article{fahy_2007,
title = {{LIPID} {MAPS} online tools for lipid research.},
author = {Fahy, Eoin and Sud, Manish and Cotter, Dawn and Subramaniam, Shankar},
pages = {W606-12},
url = {http://dx.doi.org/10.1093/nar/gkm324},
year = {2007},
month = {jul},
urldate = {2019-07-23},
journal = {Nucleic Acids Research},
volume = {35},
number = {Web Server issue},
doi = {10.1093/nar/gkm324},
pmid = {17584797},
pmcid = {PMC1933166},
f1000-projects = {shared citations},
abstract = {The {LIPID} {MAPS} consortium has developed a number of online tools for performing tasks such as drawing lipid structures and predicting possible structures from mass spectrometry ({MS}) data. A simple online interface has been developed to enable an end-user to rapidly generate a variety of lipid chemical structures, along with corresponding systematic names and ontological information. The structure-drawing tools are available for six categories of lipids: (i) fatty acyls, (ii) glycerolipids, (iii) glycerophospholipids, (iv) cardiolipins, (v) sphingolipids and (vi) sterols. Within each category, the structure-drawing tools support the specification of various parameters such as chain lengths at a specific sn position, head groups, double bond positions and stereochemistry to generate a specific lipid structure. The structure-drawing tools have also been integrated with a second set of online tools which predict possible lipid structures from precursor-ion and product-ion {MS} experimental data. The {MS} prediction tools are available for three categories of lipids: (i) mono/di/triacylglycerols, (ii) glycerophospholipids and (iii) cardiolipins. The {LIPID} {MAPS} online tools are publicly available at www.lipidmaps.org/tools/.}
}
@article{davidson_2016,
title = {Galaxy-M: a Galaxy workflow for processing and analyzing direct infusion and liquid chromatography mass spectrometry-based metabolomics data.},
author = {Davidson, Robert L and Weber, Ralf J M and Liu, Haoyu and Sharma-Oates, Archana and Viant, Mark R},
pages = {10},
url = {http://dx.doi.org/10.1186/s13742-016-0115-8},
year = {2016},
month = {feb},
day = {23},
urldate = {2019-05-09},
journal = {GigaScience},
volume = {5},
doi = {10.1186/s13742-016-0115-8},
pmid = {26913198},
pmcid = {PMC4765054},
f1000-projects = {shared citations},
abstract = {{BACKGROUND}: Metabolomics is increasingly recognized as an invaluable tool in the biological, medical and environmental sciences yet lags behind the methodological maturity of other omics fields. To achieve its full potential, including the integration of multiple omics modalities, the accessibility, standardization and reproducibility of computational metabolomics tools must be improved significantly. {RESULTS}: Here we present our end-to-end mass spectrometry metabolomics workflow in the widely used platform, Galaxy. Named Galaxy-M, our workflow has been developed for both direct infusion mass spectrometry ({DIMS}) and liquid chromatography mass spectrometry ({LC}-{MS}) metabolomics. The range of tools presented spans from processing of raw data, e.g. peak picking and alignment, through data cleansing, e.g. missing value imputation, to preparation for statistical analysis, e.g. normalization and scaling, and principal components analysis ({PCA}) with associated statistical evaluation. We demonstrate the ease of using these Galaxy workflows via the analysis of {DIMS} and {LC}-{MS} datasets, and provide {PCA} scores and associated statistics to help other users to ensure that they can accurately repeat the processing and analysis of these two datasets. Galaxy and data are all provided pre-installed in a virtual machine ({VM}) that can be downloaded from the {GigaDB} repository. Additionally, source code, executables and installation instructions are available from {GitHub}. {CONCLUSIONS}: The Galaxy platform has enabled us to produce an easily accessible and reproducible computational metabolomics workflow. More tools could be added by the community to expand its functionality. We recommend that Galaxy-M workflow files are included within the supplementary information of publications, enabling metabolomics studies to achieve greater reproducibility.}
}
@article{shahaf_2016,
title = {The {WEIZMA\SS} spectral library for high-confidence metabolite identification.},
author = {Shahaf, Nir and Rogachev, Ilana and Heinig, Uwe and Meir, Sagit and Malitsky, Sergey and Battat, Maor and Wyner, Hilary and Zheng, Shuning and Wehrens, Ron and Aharoni, Asaph},
pages = {12423},
url = {http://dx.doi.org/10.1038/ncomms12423},
year = {2016},
month = {aug},
day = {30},
urldate = {2018-01-13},
journal = {Nature Communications},
volume = {7},
doi = {10.1038/ncomms12423},
pmid = {27571918},
pmcid = {PMC5013563},
f1000-projects = {shared citations},
abstract = {Annotation of metabolites is an essential, yet problematic, aspect of mass spectrometry ({MS})-based metabolomics assays. The current repertoire of definitive annotations of metabolite spectra in public {MS} databases is limited and suffers from lack of chemical and taxonomic diversity. Furthermore, the heterogeneity of the data prevents the development of universally applicable metabolite annotation tools. Here we present a combined experimental and computational platform to advance this key issue in metabolomics. {WEIZMA\SS} is a unique reference metabolite spectral library developed from high-resolution {MS} data acquired from a structurally diverse set of 3,540 plant metabolites. We also present {MatchWeiz}, a multi-module strategy using a probabilistic approach to match library and experimental data. This strategy allows efficient and high-confidence identification of dozens of metabolites in model and exotic plants, including metabolites not previously reported in plants or found in few plant species to date.}
}
@article{horai_2010,
title = {{MassBank}: a public repository for sharing mass spectral data for life sciences.},
author = {Horai, Hisayuki and Arita, Masanori and Kanaya, Shigehiko and Nihei, Yoshito and Ikeda, Tasuku and Suwa, Kazuhiro and Ojima, Yuya and Tanaka, Kenichi and Tanaka, Satoshi and Aoshima, Ken and Oda, Yoshiya and Kakazu, Yuji and Kusano, Miyako and Tohge, Takayuki and Matsuda, Fumio and Sawada, Yuji and Hirai, Masami Yokota and Nakanishi, Hiroki and Ikeda, Kazutaka and Akimoto, Naoshige and Maoka, Takashi and Takahashi, Hiroki and Ara, Takeshi and Sakurai, Nozomu and Suzuki, Hideyuki and Shibata, Daisuke and Neumann, Steffen and Iida, Takashi and Tanaka, Ken and Funatsu, Kimito and Matsuura, Fumito and Soga, Tomoyoshi and Taguchi, Ryo and Saito, Kazuki and Nishioka, Takaaki},
pages = {703-714},
url = {http://dx.doi.org/10.1002/jms.1777},
year = {2010},
month = {jul},
urldate = {2019-08-01},
journal = {Journal of Mass Spectrometry},
volume = {45},
number = {7},
doi = {10.1002/jms.1777},
pmid = {20623627},
f1000-projects = {shared citations},
abstract = {{MassBank} is the first public repository of mass spectra of small chemical compounds for life sciences (\textless3000 Da). The database contains 605 electron-ionization mass spectrometry ({EI}-{MS}), 137 fast atom bombardment {MS} and 9276 electrospray ionization ({ESI})-{MS}(n) data of 2337 authentic compounds of metabolites, 11 545 {EI}-{MS} and 834 other-{MS} data of 10,286 volatile natural and synthetic compounds, and 3045 {ESI}-{MS}(2) data of 679 synthetic drugs contributed by 16 research groups (January 2010). {ESI}-{MS}(2) data were analyzed under nonstandardized, independent experimental conditions. {MassBank} is a distributed database. Each research group provides data from its own {MassBank} data servers distributed on the Internet. {MassBank} users can access either all of the {MassBank} data or a subset of the data by specifying one or more experimental conditions. In a spectral search to retrieve mass spectra similar to a query mass spectrum, the similarity score is calculated by a weighted cosine correlation in which weighting exponents on peak intensity and the mass-to-charge ratio are optimized to the {ESI}-{MS}(2) data. {MassBank} also provides a merged spectrum for each compound prepared by merging the analyzed {ESI}-{MS}(2) data on an identical compound under different collision-induced dissociation conditions. Data merging has significantly improved the precision of the identification of a chemical compound by 21-23\% at a similarity score of 0.6. Thus, {MassBank} is useful for the identification of chemical compounds and the publication of experimental data. 2010 John Wiley \& Sons, Ltd.}
}
@article{alonso_2011,
title = {{AStream}: an R package for annotating {LC}/{MS} metabolomic data.},
author = {Alonso, Arnald and Julià, Antonio and Beltran, Antoni and Vinaixa, Maria and Díaz, Marta and Ibañez, Lourdes and Correig, Xavier and Marsal, Sara},
pages = {1339-1340},
url = {http://dx.doi.org/10.1093/bioinformatics/btr138},
year = {2011},
month = {may},
day = {1},
urldate = {2018-01-13},
journal = {Bioinformatics},
volume = {27},
number = {9},
doi = {10.1093/bioinformatics/btr138},
pmid = {21414990},
f1000-projects = {shared citations},
abstract = {{UNLABELLED}: {AStream}, an R-statistical software package for the curation and identification of feature peaks extracted from liquid chromatography mass spectrometry ({LC}/{MS}) metabolomics data, is described. {AStream} detects isotopic, fragment and adduct patterns by identifying feature pairs that fulfill expected relational patterns. Data reduction by {AStream} allows compounds to be identified reliably and subsequently linked to metabolite databases. {AStream} provides researchers with a fast, reliable tool for summarizing metabolomic data, notably reducing curation time and increasing consistency of results. {AVAILABILITY}: The {AStream} R package and a study example can be freely accessed at http://www.urr.cat/{AStream}/{AStream}.html.}
}
@article{tautenhahn_2008,
title = {Highly sensitive feature detection for high resolution {LC}/{MS}.},
author = {Tautenhahn, Ralf and Böttcher, Christoph and Neumann, Steffen},
pages = {504},
url = {http://dx.doi.org/10.1186/1471-2105-9-504},
year = {2008},
month = {nov},
day = {28},
urldate = {2018-01-13},
journal = {{BMC} Bioinformatics},
volume = {9},
doi = {10.1186/1471-2105-9-504},
pmid = {19040729},
pmcid = {PMC2639432},
f1000-projects = {shared citations},
abstract = {{BACKGROUND}: Liquid chromatography coupled to mass spectrometry ({LC}/{MS}) is an important analytical technology for e.g. metabolomics experiments. Determining the boundaries, centres and intensities of the two-dimensional signals in the {LC}/{MS} raw data is called feature detection. For the subsequent analysis of complex samples such as plant extracts, which may contain hundreds of compounds, corresponding to thousands of features -- a reliable feature detection is mandatory. {RESULTS}: We developed a new feature detection algorithm {centWave} for high-resolution {LC}/{MS} data sets, which collects regions of interest (partial mass traces) in the raw-data, and applies continuous wavelet transformation and optionally Gauss-fitting in the chromatographic domain. We evaluated our feature detection algorithm on dilution series and mixtures of seed and leaf extracts, and estimated recall, precision and F-score of seed and leaf specific features in two experiments of different complexity. {CONCLUSION}: The new feature detection algorithm meets the requirements of current metabolomics experiments. {centWave} can detect close-by and partially overlapping features and has the highest overall recall and precision values compared to the other algorithms, {matchedFilter} (the original algorithm of {XCMS}) and the {centroidPicker} from {MZmine}. The {centWave} algorithm was integrated into the Bioconductor R-package {XCMS} and is available from (http://www.bioconductor.org/).}
}
@article{mahieu_2014,
title = {Credentialing features: a platform to benchmark and optimize untargeted metabolomic methods.},
author = {Mahieu, Nathaniel Guy and Huang, Xiaojing and Chen, Ying-Jr and Patti, Gary J},
pages = {9583-9589},
url = {http://dx.doi.org/10.1021/ac503092d},
year = {2014},
month = {oct},
day = {7},
urldate = {2018-01-13},
journal = {Analytical Chemistry},
volume = {86},
number = {19},
doi = {10.1021/ac503092d},
pmid = {25160088},
pmcid = {PMC4188275},
f1000-projects = {shared citations},
abstract = {The aim of untargeted metabolomics is to profile as many metabolites as possible, yet a major challenge is comparing experimental method performance on the basis of metabolome coverage. To date, most published approaches have compared experimental methods by counting the total number of features detected. Due to artifactual interference, however, this number is highly variable and therefore is a poor metric for comparing metabolomic methods. Here we introduce an alternative approach to benchmarking metabolome coverage which relies on mixed Escherichia coli extracts from cells cultured in regular and (13)C-enriched media. After mass spectrometry-based metabolomic analysis of these extracts, we "credential" features arising from E. coli metabolites on the basis of isotope spacing and intensity. This credentialing platform enables us to accurately compare the number of nonartifactual features yielded by different experimental approaches. We highlight the value of our platform by reoptimizing a published untargeted metabolomic method for {XCMS} data processing. Compared to the published parameters, the new {XCMS} parameters decrease the total number of features by 15\% (a reduction in noise features) while increasing the number of true metabolites detected and grouped by 20\%. Our credentialing platform relies on easily generated E. coli samples and a simple software algorithm that is freely available on our laboratory Web site (http://pattilab.wustl.edu/software/credential/). We have validated the credentialing platform with reversed-phase and hydrophilic interaction liquid chromatography as well as Agilent, Thermo Scientific, {AB} {SCIEX}, and {LECO} mass spectrometers. Thus, the credentialing platform can readily be applied by any laboratory to optimize their untargeted metabolomic pipeline for metabolite extraction, chromatographic separation, mass spectrometric detection, and bioinformatic processing.}
}
@article{nikolskiy_2013,
title = {An untargeted metabolomic workflow to improve structural characterization of metabolites.},
author = {Nikolskiy, Igor and Mahieu, Nathaniel G and Chen, Ying-Jr and Tautenhahn, Ralf and Patti, Gary J},
pages = {7713-7719},
url = {http://dx.doi.org/10.1021/ac400751j},
year = {2013},
month = {aug},
day = {20},
urldate = {2018-01-16},
journal = {Analytical Chemistry},
volume = {85},
number = {16},
doi = {10.1021/ac400751j},
pmid = {23829391},
pmcid = {PMC3983953},
f1000-projects = {shared citations},
abstract = {Mass spectrometry-based metabolomics relies on {MS}(2) data for structural characterization of metabolites. To obtain the high-quality {MS}(2) data necessary to support metabolite identifications, ions of interest must be purely isolated for fragmentation. Here, we show that metabolomic {MS}(2) data are frequently characterized by contaminating ions that prevent structural identification. Although using narrow-isolation windows can minimize contaminating {MS}(2) fragments, even narrow windows are not always selective enough, and they can complicate data analysis by removing isotopic patterns from {MS}(2) spectra. Moreover, narrow windows can significantly reduce sensitivity. In this work, we introduce a novel, two-part approach for performing metabolomic identifications that addresses these issues. First, we collect {MS}(2) scans with less stringent isolation settings to obtain improved sensitivity at the expense of specificity. Then, by evaluating {MS}(2) fragment intensities as a function of retention time and precursor mass targeted for {MS}(2) analysis, we obtain deconvolved {MS}(2) spectra that are consistent with pure standards and can therefore be used for metabolite identification. The value of our approach is highlighted with metabolic extracts from brain, liver, astrocytes, as well as nerve tissue, and performance is evaluated by using pure metabolite standards in combination with simulations based on raw {MS}(2) data from the {METLIN} metabolite database. A R package implementing the algorithms used in our workflow is available on our laboratory website ( http://pattilab.wustl.edu/decoms2.php ).}
}
@article{wehrens_2015,
title = {Fast parametric time warping of peak lists.},
author = {Wehrens, Ron and Bloemberg, Tom G and Eilers, Paul H C},
pages = {3063-3065},
url = {http://dx.doi.org/10.1093/bioinformatics/btv299},
year = {2015},
month = {sep},
day = {15},
urldate = {2018-01-16},
journal = {Bioinformatics},
volume = {31},
number = {18},
doi = {10.1093/bioinformatics/btv299},
pmid = {25971741},
f1000-projects = {shared citations},
abstract = {{UNLABELLED}: Alignment of peaks across samples is a difficult but unavoidable step in the data analysis for all analytical techniques containing a separation step like chromatography. Important application examples are the fields of metabolomics and proteomics. Parametric time warping ({PTW}) has already shown to be very useful in these fields because of the highly restricted form of the warping functions, avoiding overfitting. Here, we describe a new formulation of {PTW}, working on peak-picked features rather than on complete profiles. Not only does this allow for a much more smooth integration in existing pipelines, it also speeds up the (already among the fastest) algorithm by orders of magnitude. Using two publicly available datasets we show the potential of the new approach. The first set is a {LC}-{DAD} dataset of grape samples, and the second an {LC}-{MS} dataset of apple extracts. {AVAILABILITY} {AND} {IMPLEMENTATION}: Parametric time warping of peak lists is implemented in the ptw package, version 1.9.1 and onwards, available from Github (https://github.com/rwehrens/ptw) and {CRAN} (http://cran.r-project.org). The package also contains a vignette, providing more theoretical details and scripts to reproduce the results below. {CONTACT}: ron.wehrens@wur.nl. \copyright The Author 2015. Published by Oxford University Press. All rights reserved. For Permissions, please e-mail: journals.permissions@oup.com.}
}
@article{mahieu_2016,
title = {Warpgroup: increased precision of metabolomic data processing by consensus integration bound analysis.},
author = {Mahieu, Nathaniel G and Spalding, Jonathan L and Patti, Gary J},
pages = {268-275},
url = {http://dx.doi.org/10.1093/bioinformatics/btv564},
year = {2016},
month = {jan},
day = {15},
urldate = {2018-01-29},
journal = {Bioinformatics},
volume = {32},
number = {2},
doi = {10.1093/bioinformatics/btv564},
pmid = {26424859},
pmcid = {PMC5013975},
f1000-projects = {shared citations},
abstract = {{MOTIVATION}: Current informatic techniques for processing raw chromatography/mass spectrometry data break down under several common, non-ideal conditions. Importantly, hydrophilic liquid interaction chromatography (a key separation technology for metabolomics) produces data which are especially challenging to process. We identify three critical points of failure in current informatic workflows: compound specific drift, integration region variance, and naive missing value imputation. We implement the Warpgroup algorithm to address these challenges. {RESULTS}: Warpgroup adds peak subregion detection, consensus integration bound detection, and intelligent missing value imputation steps to the conventional informatic workflow. When compared with the conventional workflow, Warpgroup made major improvements to the processed data. The coefficient of variation for peaks detected in replicate injections of a complex Escherichia Coli extract were halved (a reduction of 19\%). Integration regions across samples were much more robust. Additionally, many signals lost by the conventional workflow were 'rescued' by the Warpgroup refinement, thereby resulting in greater analyte coverage in the processed data. {AVAILABILITY} {AND}: I: {MPLEMENTATION}: Warpgroup is an open source R package available on {GitHub} at github.com/nathaniel-mahieu/warpgroup. The package includes example data and {XCMS} compatibility wrappers for ease of use. {SUPPLEMENTARY} {INFORMATION}: Supplementary data are available at Bioinformatics online. {CONTACT}: nathaniel.mahieu@wustl.edu or gjpattij@wustl.edu. \copyright The Author 2015. Published by Oxford University Press. All rights reserved. For Permissions, please e-mail: journals.permissions@oup.com.}
}
@article{mahieu_2016a,
title = {A roadmap for the {XCMS} family of software solutions in metabolomics.},
author = {Mahieu, Nathaniel G and Genenbacher, Jessica Lloyd and Patti, Gary J},
pages = {87-93},
url = {http://dx.doi.org/10.1016/j.cbpa.2015.11.009},
year = {2016},
month = {feb},
urldate = {2018-01-13},
journal = {Current Opinion in Chemical Biology},
volume = {30},
doi = {10.1016/j.cbpa.2015.11.009},
pmid = {26673825},
pmcid = {PMC4831061},
f1000-projects = {shared citations},
abstract = {Global profiling of metabolites in biological samples by liquid chromatography/mass spectrometry results in datasets too large to evaluate manually. Fortunately, a variety of software programs are now available to automate the data analysis. Selection of the appropriate processing solution is dependent upon experimental design. Most metabolomic studies a decade ago had a relatively simple experimental design in which the intensities of compounds were compared between only two sample groups. More recently, however, increasingly sophisticated applications have been pursued. Examples include comparing compound intensities between multiple sample groups and unbiasedly tracking the fate of specific isotopic labels. The latter types of applications have necessitated the development of new software programs, which have introduced additional functionalities that facilitate data analysis. The objective of this review is to provide an overview of the freely available bioinformatic solutions that are either based upon or are compatible with the algorithms in {XCMS}, which we broadly refer to here as the '{XCMS} family' of software. These include {CAMERA}, credentialing, Warpgroup, {metaXCMS}, X(13){CMS}, and {XCMS} Online. Together, these informatic technologies can accommodate most cutting-edge metabolomic applications and offer unique advantages when compared to the original {XCMS} program. Copyright \copyright 2015 Elsevier Ltd. All rights reserved.}
}
@article{huang_2014,
title = {{X13CMS}: global tracking of isotopic labels in untargeted metabolomics.},
author = {Huang, Xiaojing and Chen, Ying-Jr and Cho, Kevin and Nikolskiy, Igor and Crawford, Peter A and Patti, Gary J},
pages = {1632-1639},
url = {http://dx.doi.org/10.1021/ac403384n},
year = {2014},
month = {feb},
day = {4},
urldate = {2018-01-15},
journal = {Analytical Chemistry},
volume = {86},
number = {3},
doi = {10.1021/ac403384n},
pmid = {24397582},
pmcid = {PMC3982964},
f1000-projects = {shared citations},
abstract = {Studies of isotopically labeled compounds have been fundamental to understanding metabolic pathways and fluxes. They have traditionally, however, been used in conjunction with targeted analyses that identify and quantify a limited number of labeled downstream metabolites. Here we describe an alternative workflow that leverages recent advances in untargeted metabolomic technologies to track the fates of isotopically labeled metabolites in a global, unbiased manner. This untargeted approach can be applied to discover novel biochemical pathways and characterize changes in the fates of labeled metabolites as a function of altered biological conditions such as disease. To facilitate the data analysis, we introduce X(13){CMS}, an extension of the widely used mass spectrometry-based metabolomic software package {XCMS}. X(13){CMS} uses the {XCMS} platform to detect metabolite peaks and perform retention-time alignment in liquid chromatography/mass spectrometry ({LC}/{MS}) data. With the use of the {XCMS} output, the program then identifies isotopologue groups that correspond to isotopically labeled compounds. The retrieval of these groups is done without any a priori knowledge besides the following input parameters: (i) the mass difference between the unlabeled and labeled isotopes, (ii) the mass accuracy of the instrument used in the analysis, and (iii) the estimated retention-time reproducibility of the chromatographic method. Despite its name, X(13){CMS} can be used to track any isotopic label. Additionally, it detects differential labeling patterns in biological samples collected from parallel control and experimental conditions. We validated the ability of X(13){CMS} to accurately retrieve labeled metabolites from complex biological matrices both with targeted {LC}/{MS}/{MS} analysis of a subset of the hits identified by the program and with labeled standards spiked into cell extracts. We demonstrate the full functionality of X(13){CMS} with an analysis of cultured rat astrocytes treated with uniformly labeled (U-)(13)C-glucose during lipopolysaccharide ({LPS}) challenge. Our results show that out of 223 isotopologue groups enriched from U-(13)C-glucose, 95 have statistically significant differential labeling patterns in astrocytes challenged with {LPS} compared to unchallenged control cells. Only two of these groups overlap with the 32 differentially regulated peaks identified by {XCMS}, indicating that X(13){CMS} uncovers different and complementary information from untargeted metabolomic studies. Like {XCMS}, X(13){CMS} is implemented in R. It is available from our laboratory website at http://pattilab.wustl.edu/x13cms.php .}
}
@article{fernndezalbert_2014,
title = {An R package to analyse {LC}/{MS} metabolomic data: {MAIT} (Metabolite Automatic Identification Toolkit).},
author = {Fernández-Albert, Francesc and Llorach, Rafael and Andrés-Lacueva, Cristina and Perera, Alexandre},
pages = {1937-1939},
url = {http://dx.doi.org/10.1093/bioinformatics/btu136},
year = {2014},
month = {jul},
day = {1},
urldate = {2018-01-13},
journal = {Bioinformatics},
volume = {30},
number = {13},
doi = {10.1093/bioinformatics/btu136},
pmid = {24642061},
pmcid = {PMC4071204},
f1000-projects = {shared citations},
abstract = {{UNLABELLED}: Current tools for liquid chromatography and mass spectrometry for metabolomic data cover a limited number of processing steps, whereas online tools are hard to use in a programmable fashion. This article introduces the Metabolite Automatic Identification Toolkit ({MAIT}) package, which makes it possible for users to perform metabolomic end-to-end liquid chromatography and mass spectrometry data analysis. {MAIT} is focused on improving the peak annotation stage and provides essential tools to validate statistical analysis results. {MAIT} generates output files with the statistical results, peak annotation and metabolite identification. {AVAILABILITY} {AND} {IMPLEMENTATION}: http://b2slab.upc.edu/software-and-downloads/metabolite-automatic-identification-toolkit/. \copyright The Author 2014. Published by Oxford University Press.}
}
@article{cai_2015,
title = {An integrated targeted metabolomic platform for high-throughput metabolite profiling and automated data processing},
author = {Cai, Yuping and Weng, Kai and Guo, Yuan and Peng, Jie and Zhu, Zheng-Jiang},
pages = {1575-1586},
url = {http://link.springer.com/10.1007/s11306-015-0809-4},
year = {2015},
month = {dec},
urldate = {2018-01-17},
journal = {Metabolomics : Official journal of the Metabolomic Society},
volume = {11},
number = {6},
issn = {1573-3882},
doi = {10.1007/s11306-015-0809-4},
f1000-projects = {shared citations},
abstract = {Multiple reaction monitoring ({MRM})-based targeted metabolomics can simultaneously analyze up to hundreds of metabolites with high-throughput, good reproducibility, and wide dynamic range. However, when hundreds or thousands of {MRM} transitions are measured with tens to hundreds of biological samples, the complexity of {MRM} dataset acquired is no longer amenable to manual evaluation, and presents a challenge for targeted metabolomics. Here, we developed an R package, namely {MRMAnalyzer}, to process large set of {MRM}-based targeted metabolomics data automatically without any manual intervention. To demonstrate our {MRMAnalyzer} program, we first developed a targeted metabolomic method that simultaneously analyzes 182 metabolites in one 15-min {LC} run, and demonstrated the data processing procedures using {MRMAnalyzer}. The data processing steps include \textquotedblleftpseudo\textquotedblright accurate m/z transformation, peak detection and alignment, metabolite identification, quality control check and statistical analysis. Finally, a targeted metabolomic assay was designed and integrated with {MRMAnalyzer} to profile the metabolic changes in Escherichia coli subjected to the protein expression. The generated {MRM} dataset consisting of more than 8000 {MRM} transitions were readily processed using {MRMAnalyzer} within 20 min without any manual intervention. Fourty seven out of 140 detected metabolites, enriched in six metabolic pathways, were found significantly affected in E. coli metabolome. In summary, a targeted metabolomic platform is developed for high-throughput metabolite profiling and automated data processing, and the {MRMAnalyzer} program is a high efficient informatics tool for large scale targeted metabolomics.}
}
@article{wohlgemuth_2010,
title = {The Chemical Translation Service--a web-based tool to improve standardization of metabolomic reports.},
author = {Wohlgemuth, Gert and Haldiya, Pradeep Kumar and Willighagen, Egon and Kind, Tobias and Fiehn, Oliver},
pages = {2647-2648},
url = {http://dx.doi.org/10.1093/bioinformatics/btq476},
year = {2010},
month = {oct},
day = {15},
urldate = {2018-01-29},
journal = {Bioinformatics},
volume = {26},
number = {20},
doi = {10.1093/bioinformatics/btq476},
pmid = {20829444},
pmcid = {PMC2951090},
f1000-projects = {shared citations},
abstract = {{SUMMARY}: Metabolomic publications and databases use different database identifiers or even trivial names which disable queries across databases or between studies. The best way to annotate metabolites is by chemical structures, encoded by the International Chemical Identifier code ({InChI}) or {InChIKey}. We have implemented a web-based Chemical Translation Service that performs batch conversions of the most common compound identifiers, including {CAS}, {CHEBI}, compound formulas, Human Metabolome Database {HMDB}, {InChI}, {InChIKey}, {IUPAC} name, {KEGG}, {LipidMaps}, {PubChem} {CID}+{SID}, {SMILES} and chemical synonym names. Batch conversion downloads of 1410 {CIDs} are performed in 2.5 min. Structures are automatically displayed. {IMPLEMENTATION}: The software was implemented in Groovy and {JAVA}, the web frontend was implemented in {GRAILS} and the database used was {PostgreSQL}. {AVAILABILITY}: The source code and an online web interface are freely available. Chemical Translation Service ({CTS}): http://cts.fiehnlab.ucdavis.edu {CONTACT}: ofiehn@ucdavis.edu}
}
@article{gavai_2015,
title = {Using bioconductor package {BiGGR} for metabolic flux estimation based on gene expression changes in brain.},
author = {Gavai, Anand K and Supandi, Farahaniza and Hettling, Hannes and Murrell, Paul and Leunissen, Jack A M and van Beek, Johannes H G M},
pages = {e0119016},
url = {http://dx.doi.org/10.1371/journal.pone.0119016},
year = {2015},
month = {mar},
day = {25},
urldate = {2019-06-18},
journal = {Plos One},
volume = {10},
number = {3},
doi = {10.1371/journal.pone.0119016},
pmid = {25806817},
pmcid = {PMC4373785},
f1000-projects = {shared citations},
abstract = {Predicting the distribution of metabolic fluxes in biochemical networks is of major interest in systems biology. Several databases provide metabolic reconstructions for different organisms. Software to analyze flux distributions exists, among others for the proprietary {MATLAB} environment. Given the large user community for the R computing environment, a simple implementation of flux analysis in R appears desirable and will facilitate easy interaction with computational tools to handle gene expression data. We extended the R software package {BiGGR}, an implementation of metabolic flux analysis in R. {BiGGR} makes use of public metabolic reconstruction databases, and contains the {BiGG} database and the reconstruction of human metabolism Recon2 as Systems Biology Markup Language ({SBML}) objects. Models can be assembled by querying the databases for pathways, genes or reactions of interest. Fluxes can then be estimated by maximization or minimization of an objective function using linear inverse modeling algorithms. Furthermore, {BiGGR} provides functionality to quantify the uncertainty in flux estimates by sampling the constrained multidimensional flux space. As a result, ensembles of possible flux configurations are constructed that agree with measured data within precision limits. {BiGGR} also features automatic visualization of selected parts of metabolic networks using hypergraphs, with hyperedge widths proportional to estimated flux values. {BiGGR} supports import and export of models encoded in {SBML} and is therefore interoperable with different modeling and analysis tools. As an application example, we calculated the flux distribution in healthy human brain using a model of central carbon metabolism. We introduce a new algorithm termed Least-squares with equalities and inequalities Flux Balance Analysis (Lsei-{FBA}) to predict flux changes from gene expression changes, for instance during disease. Our estimates of brain metabolic flux pattern with Lsei-{FBA} for Alzheimer's disease agree with independent measurements of cerebral metabolism in patients. This second version of {BiGGR} is available from Bioconductor.}
}
@article{hao_2012,
title = {{BATMAN}--an R package for the automated quantification of metabolites from nuclear magnetic resonance spectra using a Bayesian model.},
author = {Hao, Jie and Astle, William and De Iorio, Maria and Ebbels, Timothy M D},
pages = {2088-2090},
url = {http://dx.doi.org/10.1093/bioinformatics/bts308},
year = {2012},
month = {aug},
day = {1},
urldate = {2018-01-13},
journal = {Bioinformatics},
volume = {28},
number = {15},
doi = {10.1093/bioinformatics/bts308},
pmid = {22635605},
f1000-projects = {shared citations},
abstract = {{MOTIVATION}: Nuclear Magnetic Resonance ({NMR}) spectra are widely used in metabolomics to obtain metabolite profiles in complex biological mixtures. Common methods used to assign and estimate concentrations of metabolites involve either an expert manual peak fitting or extra pre-processing steps, such as peak alignment and binning. Peak fitting is very time consuming and is subject to human error. Conversely, alignment and binning can introduce artefacts and limit immediate biological interpretation of models. {RESULTS}: We present the Bayesian automated metabolite analyser for {NMR} spectra ({BATMAN}), an R package that deconvolutes peaks from one-dimensional {NMR} spectra, automatically assigns them to specific metabolites from a target list and obtains concentration estimates. The Bayesian model incorporates information on characteristic peak patterns of metabolites and is able to account for shifts in the position of peaks commonly seen in {NMR} spectra of biological samples. It applies a Markov chain Monte Carlo algorithm to sample from a joint posterior distribution of the model parameters and obtains concentration estimates with reduced error compared with conventional numerical integration and comparable to manual deconvolution by experienced spectroscopists. {AVAILABILITY} {AND} {IMPLEMENTATION}: http://www1.imperial.ac.uk/medicine/people/t.ebbels/ {CONTACT}: t.ebbels@imperial.ac.uk.}
}
@article{vu_2011,
title = {An integrated workflow for robust alignment and simplified quantitative analysis of {NMR} spectrometry data.},
author = {Vu, Trung N and Valkenborg, Dirk and Smets, Koen and Verwaest, Kim A and Dommisse, Roger and Lemière, Filip and Verschoren, Alain and Goethals, Bart and Laukens, Kris},
pages = {405},
url = {http://dx.doi.org/10.1186/1471-2105-12-405},
year = {2011},
month = {oct},
day = {20},
urldate = {2018-01-15},
journal = {{BMC} Bioinformatics},
volume = {12},
doi = {10.1186/1471-2105-12-405},
pmid = {22014236},
pmcid = {PMC3217056},
f1000-projects = {shared citations},
abstract = {{BACKGROUND}: Nuclear magnetic resonance spectroscopy ({NMR}) is a powerful technique to reveal and compare quantitative metabolic profiles of biological tissues. However, chemical and physical sample variations make the analysis of the data challenging, and typically require the application of a number of preprocessing steps prior to data interpretation. For example, noise reduction, normalization, baseline correction, peak picking, spectrum alignment and statistical analysis are indispensable components in any {NMR} analysis pipeline. {RESULTS}: We introduce a novel suite of informatics tools for the quantitative analysis of {NMR} metabolomic profile data. The core of the processing cascade is a novel peak alignment algorithm, called hierarchical Cluster-based Peak Alignment ({CluPA}). The algorithm aligns a target spectrum to the reference spectrum in a top-down fashion by building a hierarchical cluster tree from peak lists of reference and target spectra and then dividing the spectra into smaller segments based on the most distant clusters of the tree. To reduce the computational time to estimate the spectral misalignment, the method makes use of Fast Fourier Transformation ({FFT}) cross-correlation. Since the method returns a high-quality alignment, we can propose a simple methodology to study the variability of the {NMR} spectra. For each aligned {NMR} data point the ratio of the between-group and within-group sum of squares ({BW}-ratio) is calculated to quantify the difference in variability between and within predefined groups of {NMR} spectra. This differential analysis is related to the calculation of the F-statistic or a one-way {ANOVA}, but without distributional assumptions. Statistical inference based on the {BW}-ratio is achieved by bootstrapping the null distribution from the experimental data. {CONCLUSIONS}: The workflow performance was evaluated using a previously published dataset. Correlation maps, spectral and grey scale plots show clear improvements in comparison to other methods, and the down-to-earth quantitative analysis works well for the {CluPA}-aligned spectra. The whole workflow is embedded into a modular and statistically sound framework that is implemented as an R package called "speaq" ("spectrum alignment and quantitation"), which is freely available from http://code.google.com/p/speaq/.}
}
@article{haug_2013,
title = {{MetaboLights}--an open-access general-purpose repository for metabolomics studies and associated meta-data.},
author = {Haug, Kenneth and Salek, Reza M and Conesa, Pablo and Hastings, Janna and de Matos, Paula and Rijnbeek, Mark and Mahendraker, Tejasvi and Williams, Mark and Neumann, Steffen and Rocca-Serra, Philippe and Maguire, Eamonn and González-Beltrán, Alejandra and Sansone, Susanna-Assunta and Griffin, Julian L and Steinbeck, Christoph},
pages = {D781-6},
url = {http://dx.doi.org/10.1093/nar/gks1004},
year = {2013},
month = {jan},
urldate = {2018-01-13},
journal = {Nucleic Acids Research},
volume = {41},
number = {Database issue},
doi = {10.1093/nar/gks1004},
pmid = {23109552},
pmcid = {PMC3531110},
f1000-projects = {shared citations},
abstract = {{MetaboLights} (http://www.ebi.ac.uk/metabolights) is the first general-purpose, open-access repository for metabolomics studies, their raw experimental data and associated metadata, maintained by one of the major open-access data providers in molecular biology. Metabolomic profiling is an important tool for research into biological functioning and into the systemic perturbations caused by diseases, diet and the environment. The effectiveness of such methods depends on the availability of public open data across a broad range of experimental methods and conditions. The {MetaboLights} repository, powered by the open source {ISA} framework, is cross-species and cross-technique. It will cover metabolite structures and their reference spectra as well as their biological roles, locations, concentrations and raw data from metabolic experiments. Studies automatically receive a stable unique accession number that can be used as a publication reference (e.g. {MTBLS1}). At present, the repository includes 15 submitted studies, encompassing 93 protocols for 714 assays, and span over 8 different species including human, Caenorhabditis elegans, Mus musculus and Arabidopsis thaliana. Eight hundred twenty-seven of the metabolites identified in these studies have been mapped to {ChEBI}. These studies cover a variety of techniques, including {NMR} spectroscopy and mass spectrometry.}
}
@article{fuhrer_2011,
title = {High-throughput, accurate mass metabolome profiling of cellular extracts by flow injection-time-of-flight mass spectrometry.},
author = {Fuhrer, Tobias and Heer, Dominik and Begemann, Boris and Zamboni, Nicola},
pages = {7074-7080},
url = {http://dx.doi.org/10.1021/ac201267k},
year = {2011},
month = {sep},
day = {15},
urldate = {2016-11-02},
journal = {Analytical Chemistry},
volume = {83},
number = {18},
doi = {10.1021/ac201267k},
pmid = {21830798},
f1000-projects = {shared citations},
abstract = {Direct injection of samples on high-resolving mass spectrometers is an effective way to maximize analytical throughput and yet allow analyte discrimination in complex samples by mass-to-charge ratio. We present a platform of flow injection electrospray-time-of-flight mass spectrometry to profile small molecules in \textgreater1400 biological extracts per day at native mass resolution. We comprehensively benchmark the performance with more than 5000 injections of chemically defined standards and Escherichia coli cellular extracts obtained from miniscale cultivations. For at least 90\% of tested compounds, we attain a linear response over 3 decades of concentration, interday coefficient of variation of \textless20\%, and a mass accuracy of \textless0.001 amu. In polar Escherichia coli fractions, we reproducibly detected \textgreater1500 distinct ions in each mode. The accurate mass and correlation analysis enabled one to assign with good confidence 400-800 ions to electrospray derivatives of metabolites listed in the genome-wide reconstruction of Escherichia coli metabolism. Hence, we attain a coverage of about one-quarter of the total number of compounds listed in the reconstruction. Finally, we present an exemplary screen with Escherichia coli deletion mutants to show the exquisite capacity of the platform to identify lesions in primary metabolism at high throughputs.}
}
@article{wang_2016,
title = {Sharing and community curation of mass spectrometry data with Global Natural Products Social Molecular Networking.},
author = {Wang, Mingxun and Carver, Jeremy J and Phelan, Vanessa V and Sanchez, Laura M and Garg, Neha and Peng, Yao and Nguyen, Don Duy and Watrous, Jeramie and Kapono, Clifford A and Luzzatto-Knaan, Tal and Porto, Carla and Bouslimani, Amina and Melnik, Alexey V and Meehan, Michael J and Liu, Wei-Ting and Crüsemann, Max and Boudreau, Paul D and Esquenazi, Eduardo and Sandoval-Calderón, Mario and Kersten, Roland D and Pace, Laura A and Quinn, Robert A and Duncan, Katherine R and Hsu, Cheng-Chih and Floros, Dimitrios J and Gavilan, Ronnie G and Kleigrewe, Karin and Northen, Trent and Dutton, Rachel J and Parrot, Delphine and Carlson, Erin E and Aigle, Bertrand and Michelsen, Charlotte F and Jelsbak, Lars and Sohlenkamp, Christian and Pevzner, Pavel and Edlund, Anna and {McLean}, Jeffrey and Piel, Jörn and Murphy, Brian T and Gerwick, Lena and Liaw, Chih-Chuang and Yang, Yu-Liang and Humpf, Hans-Ulrich and Maansson, Maria and Keyzers, Robert A and Sims, Amy C and Johnson, Andrew R and Sidebottom, Ashley M and Sedio, Brian E and Klitgaard, Andreas and Larson, Charles B and P, Cristopher A Boya and Torres-Mendoza, Daniel and Gonzalez, David J and Silva, Denise B and Marques, Lucas M and Demarque, Daniel P and Pociute, Egle and O'Neill, Ellis C and Briand, Enora and Helfrich, Eric J N and Granatosky, Eve A and Glukhov, Evgenia and Ryffel, Florian and Houson, Hailey and Mohimani, Hosein and Kharbush, Jenan J and Zeng, Yi and Vorholt, Julia A and Kurita, Kenji L and Charusanti, Pep and {McPhail}, Kerry L and Nielsen, Kristian Fog and Vuong, Lisa and Elfeki, Maryam and Traxler, Matthew F and Engene, Niclas and Koyama, Nobuhiro and Vining, Oliver B and Baric, Ralph and Silva, Ricardo R and Mascuch, Samantha J and Tomasi, Sophie and Jenkins, Stefan and Macherla, Venkat and Hoffman, Thomas and Agarwal, Vinayak and Williams, Philip G and Dai, Jingqui and Neupane, Ram and Gurr, Joshua and Rodríguez, Andrés M C and Lamsa, Anne and Zhang, Chen and Dorrestein, Kathleen and Duggan, Brendan M and Almaliti, Jehad and Allard, Pierre-Marie and Phapale, Prasad and Nothias, Louis-Felix and Alexandrov, Theodore and Litaudon, Marc and Wolfender, Jean-Luc and Kyle, Jennifer E and Metz, Thomas O and Peryea, Tyler and Nguyen, Dac-Trung and {VanLeer}, Danielle and Shinn, Paul and Jadhav, Ajit and Müller, Rolf and Waters, Katrina M and Shi, Wenyuan and Liu, Xueting and Zhang, Lixin and Knight, Rob and Jensen, Paul R and Palsson, Bernhard O and Pogliano, Kit and Linington, Roger G and Gutiérrez, Marcelino and Lopes, Norberto P and Gerwick, William H and Moore, Bradley S and Dorrestein, Pieter C and Bandeira, Nuno},
pages = {828-837},
url = {http://dx.doi.org/10.1038/nbt.3597},
year = {2016},
month = {aug},
day = {9},
urldate = {2019-07-22},
journal = {Nature Biotechnology},
volume = {34},
number = {8},
doi = {10.1038/nbt.3597},
pmid = {27504778},
pmcid = {PMC5321674},
f1000-projects = {shared citations},
abstract = {The potential of the diverse chemistries present in natural products ({NP}) for biotechnology and medicine remains untapped because {NP} databases are not searchable with raw data and the {NP} community has no way to share data other than in published papers. Although mass spectrometry ({MS}) techniques are well-suited to high-throughput characterization of {NP}, there is a pressing need for an infrastructure to enable sharing and curation of data. We present Global Natural Products Social Molecular Networking ({GNPS}; http://gnps.ucsd.edu), an open-access knowledge base for community-wide organization and sharing of raw, processed or identified tandem mass ({MS}/{MS}) spectrometry data. In {GNPS}, crowdsourced curation of freely available community-wide reference {MS} libraries will underpin improved annotations. Data-driven social-networking should facilitate identification of spectra and foster collaborations. We also introduce the concept of 'living data' through continuous reanalysis of deposited data.}
}
@article{kramer_2014,
abstract = {Putting new findings into the context of available literature knowledge is one approach to deal with the surge of high-throughput data results. Furthermore, prior knowledge can increase the performance and stability of bioinformatic algorithms, for example, methods for network reconstruction. In this review, we examine software packages for the statistical computing framework R, which enable the integration of pathway data for further bioinformatic analyses. Different approaches to integrate and visualize pathway data are identified and packages are stratified concerning their features according to a number of different aspects: data import strategies, the extent of available data, dependencies on external tools, integration with further analysis steps and visualization options are considered. A total of 12 packages integrating pathway data are reviewed in this manuscript. These are supplemented by five R-specific packages for visualization and six connector packages, which provide access to external tools.},
author = {Kramer, Frank and Bayerlov{\'{a}}, Michaela and Bei{\ss}barth, Tim},
doi = {10.3390/biology3010085},
file = {:D$\backslash$:/Dokumenter/papers/Kramer, Bayerlov{\'{a}}, Bei{\ss}barth/2014/Kramer, Bayerlov{\'{a}}, Bei{\ss}barth - 2014 - R-based software for the integration of pathway data into bioinformatic algorithms.pdf:pdf},
issn = {2079-7737},
journal = {Biology},
month = {feb},
number = {1},
pages = {85--100},
pmid = {24833336},
publisher = {Multidisciplinary Digital Publishing Institute  (MDPI)},
title = {{R-based software for the integration of pathway data into bioinformatic algorithms.}},
url = {http://www.ncbi.nlm.nih.gov/pubmed/24833336 http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=PMC4009765},
volume = {3},
year = {2014}
}
@article{wang_2009,
title = {{PubChem}: a public information system for analyzing bioactivities of small molecules.},
author = {Wang, Yanli and Xiao, Jewen and Suzek, Tugba O and Zhang, Jian and Wang, Jiyao and Bryant, Stephen H},
pages = {W623-33},
url = {http://dx.doi.org/10.1093/nar/gkp456},
year = {2009},
month = {jul},
urldate = {2018-01-29},
journal = {Nucleic Acids Research},
volume = {37},
number = {Web Server issue},
doi = {10.1093/nar/gkp456},
pmid = {19498078},
pmcid = {PMC2703903},
f1000-projects = {shared citations},
abstract = {{PubChem} (http://pubchem.ncbi.nlm.nih.gov) is a public repository for biological properties of small molecules hosted by the {US} National Institutes of Health ({NIH}). {PubChem} {BioAssay} database currently contains biological test results for more than 700 000 compounds. The goal of {PubChem} is to make this information easily accessible to biomedical researchers. In this work, we present a set of web servers to facilitate and optimize the utility of biological activity information within {PubChem}. These web-based services provide tools for rapid data retrieval, integration and comparison of biological screening results, exploratory structure-activity analysis, and target selectivity examination. This article reviews these bioactivity analysis tools and discusses their uses. Most of the tools described in this work can be directly accessed at http://pubchem.ncbi.nlm.nih.gov/assay/. {URLs} for accessing other tools described in this work are specified individually.}
}
@article{castro_2012,
title = {{RedeR}: R/Bioconductor package for representing modular structures, nested networks and multiple levels of hierarchical associations.},
author = {Castro, Mauro A A and Wang, Xin and Fletcher, Michael N C and Meyer, Kerstin B and Markowetz, Florian},
pages = {R29},
url = {http://dx.doi.org/10.1186/gb-2012-13-4-r29},
year = {2012},
month = {apr},
day = {24},
urldate = {2019-05-03},
journal = {Genome Biology},
volume = {13},
number = {4},
doi = {10.1186/gb-2012-13-4-r29},
pmid = {22531049},
pmcid = {PMC3446303},
f1000-projects = {shared citations},
abstract = {Visualization and analysis of molecular networks are both central to systems biology. However, there still exists a large technological gap between them, especially when assessing multiple network levels or hierarchies. Here we present {RedeR}, an R/Bioconductor package combined with a Java core engine for representing modular networks. The functionality of {RedeR} is demonstrated in two different scenarios: hierarchical and modular organization in gene co-expression networks and nested structures in time-course gene expression subnetworks. Our results demonstrate {RedeR} as a new framework to deal with the multiple network levels that are inherent to complex biological systems. {RedeR} is available from http://bioconductor.org/packages/release/bioc/html/{RedeR}.html.}
}
@article{kuhn_2008,
title = {Building Predictive Models in R Using the caret Package},
author = {Kuhn, Max},
url = {http://www.jstatsoft.org/v28/i05/},
year = {2008},
month = {oct},
day = {11},
urldate = {2018-01-13},
journal = {Journal of statistical software},
volume = {28},
number = {5},
issn = {1548-7660},
doi = {10.18637/jss.v028.i05},
f1000-projects = {shared citations}
}
@article{kim_2011,
title = {Smith-Waterman peak alignment for comprehensive two-dimensional gas chromatography-mass spectrometry.},
author = {Kim, Seongho and Koo, Imhoi and Fang, Aiqin and Zhang, Xiang},
pages = {235},
url = {http://dx.doi.org/10.1186/1471-2105-12-235},
year = {2011},
month = {jun},
day = {15},
urldate = {2018-01-17},
journal = {{BMC} Bioinformatics},
volume = {12},
doi = {10.1186/1471-2105-12-235},
pmid = {21676240},
pmcid = {PMC3133553},
f1000-projects = {shared citations},
abstract = {{BACKGROUND}: Comprehensive two-dimensional gas chromatography coupled with mass spectrometry ({GC} × {GC}-{MS}) is a powerful technique which has gained increasing attention over the last two decades. The {GC} × {GC}-{MS} provides much increased separation capacity, chemical selectivity and sensitivity for complex sample analysis and brings more accurate information about compound retention times and mass spectra. Despite these advantages, the retention times of the resolved peaks on the two-dimensional gas chromatographic columns are always shifted due to experimental variations, introducing difficulty in the data processing for metabolomics analysis. Therefore, the retention time variation must be adjusted in order to compare multiple metabolic profiles obtained from different conditions. {RESULTS}: We developed novel peak alignment algorithms for both homogeneous (acquired under the identical experimental conditions) and heterogeneous (acquired under the different experimental conditions) {GC} × {GC}-{MS} data using modified Smith-Waterman local alignment algorithms along with mass spectral similarity. Compared with literature reported algorithms, the proposed algorithms eliminated the detection of landmark peaks and the usage of retention time transformation. Furthermore, an automated peak alignment software package was established by implementing a likelihood function for optimal peak alignment. {CONCLUSIONS}: The proposed Smith-Waterman local alignment-based algorithms are capable of aligning both the homogeneous and heterogeneous data of multiple {GC} × {GC}-{MS} experiments without the transformation of retention times and the selection of landmark peaks. An optimal version of the {SW}-based algorithms was also established based on the associated likelihood function for the automatic peak alignment. The proposed alignment algorithms outperform the literature reported alignment method by analyzing the experiment data of a mixture of compound standards and a metabolite extract of mouse plasma with spiked-in compound standards.}
}
@article{kannan_2016,
title = {Public data and open source tools for multi-assay genomic investigation of disease.},
author = {Kannan, Lavanya and Ramos, Marcel and Re, Angela and El-Hachem, Nehme and Safikhani, Zhaleh and Gendoo, Deena M A and Davis, Sean and Gomez-Cabrero, David and Castelo, Robert and Hansen, Kasper D and Carey, Vincent J and Morgan, Martin and Culhane, Aedín C and Haibe-Kains, Benjamin and Waldron, Levi},
pages = {603-615},
url = {http://dx.doi.org/10.1093/bib/bbv080},
year = {2016},
urldate = {2018-01-13},
journal = {Briefings in Bioinformatics},
volume = {17},
number = {4},
doi = {10.1093/bib/bbv080},
pmid = {26463000},
pmcid = {PMC4945830},
f1000-projects = {shared citations},
abstract = {Molecular interrogation of a biological sample through {DNA} sequencing, {RNA} and {microRNA} profiling, proteomics and other assays, has the potential to provide a systems level approach to predicting treatment response and disease progression, and to developing precision therapies. Large publicly funded projects have generated extensive and freely available multi-assay data resources; however, bioinformatic and statistical methods for the analysis of such experiments are still nascent. We review multi-assay genomic data resources in the areas of clinical oncology, pharmacogenomics and other perturbation experiments, population genomics and regulatory genomics and other areas, and tools for data acquisition. Finally, we review bioinformatic tools that are explicitly geared toward integrative genomic data visualization and analysis. This review provides starting points for accessing publicly available data and tools to support development of needed integrative methods. \copyright The Author 2015. Published by Oxford University Press.}
}
@article{uppal_2013,
title = {{xMSanalyzer}: automated pipeline for improved feature detection and downstream analysis of large-scale, non-targeted metabolomics data.},
author = {Uppal, Karan and Soltow, Quinlyn A and Strobel, Frederick H and Pittard, W Stephen and Gernert, Kim M and Yu, Tianwei and Jones, Dean P},
pages = {15},
url = {http://dx.doi.org/10.1186/1471-2105-14-15},
year = {2013},
month = {jan},
day = {16},
urldate = {2018-01-15},
journal = {{BMC} Bioinformatics},
volume = {14},
doi = {10.1186/1471-2105-14-15},
pmid = {23323971},
pmcid = {PMC3562220},
f1000-projects = {shared citations},
abstract = {{BACKGROUND}: Detection of low abundance metabolites is important for de novo mapping of metabolic pathways related to diet, microbiome or environmental exposures. Multiple algorithms are available to extract m/z features from liquid chromatography-mass spectral data in a conservative manner, which tends to preclude detection of low abundance chemicals and chemicals found in small subsets of samples. The present study provides software to enhance such algorithms for feature detection, quality assessment, and annotation. {RESULTS}: {xMSanalyzer} is a set of utilities for automated processing of metabolomics data. The utilites can be classified into four main modules to: 1) improve feature detection for replicate analyses by systematic re-extraction with multiple parameter settings and data merger to optimize the balance between sensitivity and reliability, 2) evaluate sample quality and feature consistency, 3) detect feature overlap between datasets, and 4) characterize high-resolution m/z matches to small molecule metabolites and biological pathways using multiple chemical databases. The package was tested with plasma samples and shown to more than double the number of features extracted while improving quantitative reliability of detection. {MS}/{MS} analysis of a random subset of peaks that were exclusively detected using {xMSanalyzer} confirmed that the optimization scheme improves detection of real metabolites. {CONCLUSIONS}: {xMSanalyzer} is a package of utilities for data extraction, quality control assessment, detection of overlapping and unique metabolites in multiple datasets, and batch annotation of metabolites. The program was designed to integrate with existing packages such as {apLCMS} and {XCMS}, but the framework can also be used to enhance data extraction for other {LC}/{MS} data software.}
}
@article{fukushima_2013,
title = {{DiffCorr}: an R package to analyze and visualize differential correlations in biological networks.},
author = {Fukushima, Atsushi},
pages = {209-214},
url = {http://dx.doi.org/10.1016/j.gene.2012.11.028},
year = {2013},
month = {apr},
day = {10},
urldate = {2019-04-30},
journal = {Gene},
volume = {518},
number = {1},
doi = {10.1016/j.gene.2012.11.028},
pmid = {23246976},
f1000-projects = {shared citations},
abstract = {Large-scale "omics" data, such as microarrays, can be used to infer underlying cellular regulatory networks in organisms, enabling us to better understand the molecular basis of disease and important traits. Correlation approaches, such as a hierarchical cluster analysis, have been widely used to analyze omics data. In addition to the changes in the mean levels of molecules in the omics data, it is important to know about the changes in the correlation relationship among molecules between 2 experimental conditions. The development of a tool to identify differential correlation patterns in omics data in an efficient and unbiased manner is therefore desirable. We developed the {DiffCorr} package, a simple method for identifying pattern changes between 2 experimental conditions in correlation networks, which builds on a commonly used association measure, such as Pearson's correlation coefficient. {DiffCorr} calculates correlation matrices for each dataset, identifies the first principal component-based "eigen-molecules" in the correlation networks, and tests differential correlation between the 2 groups based on Fisher's z-test. We illustrated its utility by demonstrating biologically relevant, differentially correlated molecules in transcriptome coexpression and metabolite-to-metabolite correlation networks. {DiffCorr} can explore differential correlations between 2 conditions in the context of post-genomics data types, namely transcriptomics and metabolomics. {DiffCorr} is simple to use in calculating differential correlations and is suitable for the first step towards inferring causal relationships and detecting biomarker candidates. The package can be downloaded from the following website: http://diffcorr.sourceforge.net/. Copyright \copyright 2012 Elsevier B.V. All rights reserved.}
}
@article{benton_2008,
title = {{XCMS2}: processing tandem mass spectrometry data for metabolite identification and structural characterization.},
author = {Benton, H P and Wong, D M and Trauger, S A and Siuzdak, G},
pages = {6382-6389},
url = {http://dx.doi.org/10.1021/ac800795f},
year = {2008},
month = {aug},
day = {15},
urldate = {2018-01-13},
journal = {Analytical Chemistry},
volume = {80},
number = {16},
doi = {10.1021/ac800795f},
pmid = {18627180},
pmcid = {PMC2728033},
f1000-projects = {shared citations},
abstract = {Mass spectrometry based metabolomics represents a new area for bioinformatics technology development. While the computational tools currently available such as {XCMS} statistically assess and rank {LC}-{MS} features, they do not provide information about their structural identity. {XCMS}(2) is an open source software package which has been developed to automatically search tandem mass spectrometry ({MS}/{MS}) data against high quality experimental {MS}/{MS} data from known metabolites contained in a reference library ({METLIN}). Scoring of hits is based on a "shared peak count" method that identifies masses of fragment ions shared between the analytical and reference {MS}/{MS} spectra. Another functional component of {XCMS}(2) is the capability of providing structural information for unknown metabolites, which are not in the {METLIN} database. This "similarity search" algorithm has been developed to detect possible structural motifs in the unknown metabolite which may produce characteristic fragment ions and neutral losses to related reference compounds contained in {METLIN}, even if the precursor masses are not the same.}
}
@article{guha_2007,
title = {Chemical Informatics Functionality in R},
author = {Guha, Rajarshi},
url = {http://www.jstatsoft.org/v18/i05/},
year = {2007},
urldate = {2018-01-13},
journal = {Journal of statistical software},
volume = {18},
number = {5},
issn = {1548-7660},
doi = {10.18637/jss.v018.i05},
f1000-projects = {shared citations}
}
@article{stekhoven_2012,
title = {{MissForest}--non-parametric missing value imputation for mixed-type data.},
author = {Stekhoven, Daniel J and Bühlmann, Peter},
pages = {112-118},
url = {http://dx.doi.org/10.1093/bioinformatics/btr597},
year = {2012},
month = {jan},
day = {1},
urldate = {2019-07-30},
journal = {Bioinformatics},
volume = {28},
number = {1},
doi = {10.1093/bioinformatics/btr597},
pmid = {22039212},
f1000-projects = {shared citations},
abstract = {{MOTIVATION}: Modern data acquisition based on high-throughput technology is often facing the problem of missing data. Algorithms commonly used in the analysis of such large-scale data often depend on a complete set. Missing value imputation offers a solution to this problem. However, the majority of available imputation methods are restricted to one type of variable only: continuous or categorical. For mixed-type data, the different types are usually handled separately. Therefore, these methods ignore possible relations between variable types. We propose a non-parametric method which can cope with different types of variables simultaneously. {RESULTS}: We compare several state of the art methods for the imputation of missing values. We propose and evaluate an iterative imputation method ({missForest}) based on a random forest. By averaging over many unpruned classification or regression trees, random forest intrinsically constitutes a multiple imputation scheme. Using the built-in out-of-bag error estimates of random forest, we are able to estimate the imputation error without the need of a test set. Evaluation is performed on multiple datasets coming from a diverse selection of biological fields with artificially introduced missing values ranging from 10\% to 30\%. We show that {missForest} can successfully handle missing values, particularly in datasets including different types of variables. In our comparative study, {missForest} outperforms other methods of imputation especially in data settings where complex interactions and non-linear relations are suspected. The out-of-bag imputation error estimates of {missForest} prove to be adequate in all settings. Additionally, {missForest} exhibits attractive computational efficiency and can cope with high-dimensional data. {AVAILABILITY}: The package {missForest} is freely available from http://stat.ethz.ch/{CRAN}/. {CONTACT}: stekhoven@stat.math.ethz.ch; buhlmann@stat.math.ethz.ch}
}
@article{kanehisa_2017,
title = {{KEGG}: new perspectives on genomes, pathways, diseases and drugs.},
author = {Kanehisa, Minoru and Furumichi, Miho and Tanabe, Mao and Sato, Yoko and Morishima, Kanae},
pages = {D353-D361},
url = {http://dx.doi.org/10.1093/nar/gkw1092},
year = {2017},
month = {jan},
day = {4},
urldate = {2019-07-23},
journal = {Nucleic Acids Research},
volume = {45},
number = {D1},
doi = {10.1093/nar/gkw1092},
pmid = {27899662},
pmcid = {PMC5210567},
f1000-projects = {shared citations},
abstract = {{KEGG} (http://www.kegg.jp/ or http://www.genome.jp/kegg/) is an encyclopedia of genes and genomes. Assigning functional meanings to genes and genomes both at the molecular and higher levels is the primary objective of the {KEGG} database project. Molecular-level functions are stored in the {KO} ({KEGG} Orthology) database, where each {KO} is defined as a functional ortholog of genes and proteins. Higher-level functions are represented by networks of molecular interactions, reactions and relations in the forms of {KEGG} pathway maps, {BRITE} hierarchies and {KEGG} modules. In the past the {KO} database was developed for the purpose of defining nodes of molecular networks, but now the content has been expanded and the quality improved irrespective of whether or not the {KOs} appear in the three molecular network databases. The newly introduced addendum category of the {GENES} database is a collection of individual proteins whose functions are experimentally characterized and from which an increasing number of {KOs} are defined. Furthermore, the {DISEASE} and {DRUG} databases have been improved by systematic analysis of drug labels for better integration of diseases and drugs with the {KEGG} molecular networks. {KEGG} is moving towards becoming a comprehensive knowledge base for both functional interpretation and practical application of genomic information. \copyright The Author(s) 2016. Published by Oxford University Press on behalf of Nucleic Acids Research.}
}
@article{uppal_2017,
title = {{xMSannotator}: An R Package for Network-Based Annotation of High-Resolution Metabolomics Data.},
author = {Uppal, Karan and Walker, Douglas I and Jones, Dean P},
pages = {1063-1067},
url = {http://dx.doi.org/10.1021/acs.analchem.6b01214},
year = {2017},
month = {jan},
day = {17},
urldate = {2019-04-30},
journal = {Analytical Chemistry},
volume = {89},
number = {2},
doi = {10.1021/acs.analchem.6b01214},
pmid = {27977166},
pmcid = {PMC5447360},
f1000-projects = {shared citations},
abstract = {Improved analytical technologies and data extraction algorithms enable detection of \textgreater10 000 reproducible signals by liquid chromatography-high-resolution mass spectrometry, creating a bottleneck in chemical identification. In principle, measurement of more than one million chemicals would be possible if algorithms were available to facilitate utilization of the raw mass spectrometry data, especially low-abundance metabolites. Here we describe an automated computational framework to annotate ions for possible chemical identity using a multistage clustering algorithm in which metabolic pathway associations are used along with intensity profiles, retention time characteristics, mass defect, and isotope/adduct patterns. The algorithm uses high-resolution mass spectrometry data for a series of samples with common properties and publicly available chemical, metabolic, and environmental databases to assign confidence levels to annotation results. Evaluation results show that the algorithm achieves an F1-measure of 0.8 for a data set with known targets and is more robust than previously reported results for cases when database size is much greater than the actual number of metabolites. {MS}/{MS} evaluation of a set of randomly selected 210 metabolites annotated using {xMSannotator} in an untargeted metabolomics human data set shows that 80\% of features with high or medium confidence scores have ion dissociation patterns consistent with the {xMSannotator} annotation. The algorithm has been incorporated into an R package, {xMSannotator}, which includes utilities for querying local or online databases such as {ChemSpider}, {KEGG}, {HMDB}, {T3DB}, and {LipidMaps}.}
}
@article{nishimura_2001,
title = {BioCarta},
author = {Nishimura, Darryl},
pages = {117-120},
url = {http://online.liebertpub.com/doi/abs/10.1089/152791601750294344},
year = {2001},
month = {jun},
urldate = {2019-08-06},
journal = {Biotech Software \& Internet Report},
volume = {2},
number = {3},
issn = {1527-9162},
doi = {10.1089/152791601750294344},
f1000-projects = {shared citations}
}
@article{edmands_2017,
title = {{compMS2Miner}: An Automatable Metabolite Identification, Visualization, and Data-Sharing R Package for High-Resolution {LC}-{MS} Data Sets.},
author = {Edmands, William M B and Petrick, Lauren and Barupal, Dinesh K and Scalbert, Augustin and Wilson, Mark J and Wickliffe, Jeffrey K and Rappaport, Stephen M},
pages = {3919-3928},
url = {http://dx.doi.org/10.1021/acs.analchem.6b02394},
year = {2017},
month = {apr},
day = {4},
urldate = {2018-01-13},
journal = {Analytical Chemistry},
volume = {89},
number = {7},
doi = {10.1021/acs.analchem.6b02394},
pmid = {28225587},
pmcid = {PMC6338221},
f1000-projects = {shared citations},
abstract = {A long-standing challenge of untargeted metabolomic profiling by ultrahigh-performance liquid chromatography-high-resolution mass spectrometry ({UHPLC}-{HRMS}) is efficient transition from unknown mass spectral features to confident metabolite annotations. The {compMS2Miner} (Comprehensive {MS2} Miner) package was developed in the R language to facilitate rapid, comprehensive feature annotation using a peak-picker-output and {MS2} data files as inputs. The number of {MS2} spectra that can be collected during a metabolomic profiling experiment far outweigh the amount of time required for pain-staking manual interpretation; therefore, a degree of software workflow autonomy is required for broad-scale metabolite annotation. {CompMS2Miner} integrates many useful tools in a single workflow for metabolite annotation and also provides a means to overview the {MS2} data with a Web application {GUI} {compMS2Explorer} (Comprehensive {MS2} Explorer) that also facilitates data-sharing and transparency. The automatable {compMS2Miner} workflow consists of the following steps: (i) matching unknown {MS1} features to precursor {MS2} scans, (ii) filtration of spectral noise (dynamic noise filter), (iii) generation of composite mass spectra by multiple similar spectrum signal summation and redundant/contaminant spectra removal, (iv) interpretation of possible fragment ion substructure using an internal database, (v) annotation of unknowns with chemical and spectral databases with prediction of mammalian biotransformation metabolites, wrapper functions for in silico fragmentation software, nearest neighbor chemical similarity scoring, random forest based retention time prediction, text-mining based false positive removal/true positive ranking, chemical taxonomic prediction and differential evolution based global annotation score optimization, and (vi) network graph visualizations, data curation, and sharing are made possible via the {compMS2Explorer} application. Metabolite identities and comments can also be recorded using an interactive table within {compMS2Explorer}. The utility of the package is illustrated with a data set of blood serum samples from 7 diet induced obese ({DIO}) and 7 nonobese ({NO}) {C57BL}/{6J} mice, which were also treated with an antibiotic (streptomycin) to knockdown the gut microbiota. The results of fully autonomous and objective usage of {compMS2Miner} are presented here. All automatically annotated spectra output by the workflow are provided in the Supporting Information and can alternatively be explored as publically available {compMS2Explorer} applications for both positive and negative modes ( https://wmbedmands.shinyapps.io/{compMS2\_mouseSera\_POS} and https://wmbedmands.shinyapps.io/{compMS2\_mouseSera\_NEG} ). The workflow provided rapid annotation of a diversity of endogenous and gut microbially derived metabolites affected by both diet and antibiotic treatment, which conformed to previously published reports. Composite spectra (n = 173) were autonomously matched to entries of the Massbank of North America ({MoNA}) spectral repository. These experimental and virtual ({lipidBlast}) spectra corresponded to 29 common endogenous compound classes (e.g., 51 lysophosphatidylcholines spectra) and were then used to calculate the ranking capability of 7 individual scoring metrics. It was found that an average of the 7 individual scoring metrics provided the most effective weighted average ranking ability of 3 for the {MoNA} matched spectra in spite of potential risk of false positive annotations emerging from automation. Minor structural differences such as relative carbon-carbon double bond positions were found in several cases to affect the correct rank of the {MoNA} annotated metabolite. The latest release and an example workflow is available in the package vignette ( https://github.com/{WMBEdmands}/{compMS2Miner} ) and a version of the published application is available on the shinyapps.io site ( https://wmbedmands.shinyapps.io/{compMS2Example} ).}
}
@article{gromski_2015,
title = {A tutorial review: Metabolomics and partial least squares-discriminant analysis--a marriage of convenience or a shotgun wedding.},
author = {Gromski, Piotr S and Muhamadali, Howbeer and Ellis, David I and Xu, Yun and Correa, Elon and Turner, Michael L and Goodacre, Royston},
pages = {10-23},
url = {http://dx.doi.org/10.1016/j.aca.2015.02.012},
year = {2015},
month = {jun},
day = {16},
urldate = {2018-01-30},
journal = {Analytica Chimica Acta},
volume = {879},
doi = {10.1016/j.aca.2015.02.012},
pmid = {26002472},
f1000-projects = {shared citations},
abstract = {The predominance of partial least squares-discriminant analysis ({PLS}-{DA}) used to analyze metabolomics datasets (indeed, it is the most well-known tool to perform classification and regression in metabolomics), can be said to have led to the point that not all researchers are fully aware of alternative multivariate classification algorithms. This may in part be due to the widespread availability of {PLS}-{DA} in most of the well-known statistical software packages, where its implementation is very easy if the default settings are used. In addition, one of the perceived advantages of {PLS}-{DA} is that it has the ability to analyze highly collinear and noisy data. Furthermore, the calibration model is known to provide a variety of useful statistics, such as prediction accuracy as well as scores and loadings plots. However, this method may provide misleading results, largely due to a lack of suitable statistical validation, when used by non-experts who are not aware of its potential limitations when used in conjunction with metabolomics. This tutorial review aims to provide an introductory overview to several straightforward statistical methods such as principal component-discriminant function analysis ({PC}-{DFA}), support vector machines ({SVM}) and random forests ({RF}), which could very easily be used either to augment {PLS} or as alternative supervised learning methods to {PLS}-{DA}. These methods can be said to be particularly appropriate for the analysis of large, highly-complex data sets which are common output(s) in metabolomics studies where the numbers of variables often far exceed the number of samples. In addition, these alternative techniques may be useful tools for generating parsimonious models through feature selection and data reduction, as well as providing more propitious results. We sincerely hope that the general reader is left with little doubt that there are several promising and readily available alternatives to {PLS}-{DA}, to analyze large and highly complex data sets. Copyright \copyright 2015 Elsevier B.V. All rights reserved.}
}
@article{dieterle_2006,
title = {Probabilistic quotient normalization as robust method to account for dilution of complex biological mixtures. Application in {1H} {NMR} metabonomics.},
author = {Dieterle, Frank and Ross, Alfred and Schlotterbeck, Götz and Senn, Hans},
pages = {4281-4290},
url = {http://dx.doi.org/10.1021/ac051632c},
year = {2006},
month = {jul},
day = {1},
urldate = {2019-04-17},
journal = {Analytical Chemistry},
volume = {78},
number = {13},
doi = {10.1021/ac051632c},
pmid = {16808434},
f1000-projects = {shared citations},
abstract = {For the analysis of the spectra of complex biofluids, preprocessing methods play a crucial role in rendering the subsequent data analyses more robust and accurate. Normalization is a preprocessing method, which accounts for different dilutions of samples by scaling the spectra to the same virtual overall concentration. In the field of {1H} {NMR} metabonomics integral normalization, which scales spectra to the same total integral, is the de facto standard. In this work, it is shown that integral normalization is a suboptimal method for normalizing spectra from metabonomic studies. Especially strong metabonomic changes, evident as massive amounts of single metabolites in samples, significantly hamper the integral normalization resulting in incorrectly scaled spectra. The probabilistic quotient normalization is introduced in this work. This method is based on the calculation of a most probable dilution factor by looking at the distribution of the quotients of the amplitudes of a test spectrum by those of a reference spectrum. Simulated spectra, spectra of urine samples from a metabonomic study with cyclosporin-A as the active compound, and spectra of more than 4000 samples of control animals demonstrate that the probabilistic quotient normalization is by far more robust and more accurate than the widespread integral normalization and vector length normalization.}
}
@article{beckonert_2007,
title = {Metabolic profiling, metabolomic and metabonomic procedures for {NMR} spectroscopy of urine, plasma, serum and tissue extracts.},
author = {Beckonert, Olaf and Keun, Hector C and Ebbels, Timothy M D and Bundy, Jacob and Holmes, Elaine and Lindon, John C and Nicholson, Jeremy K},
pages = {2692-2703},
url = {http://dx.doi.org/10.1038/nprot.2007.376},
year = {2007},
urldate = {2019-08-10},
journal = {Nature Protocols},
volume = {2},
number = {11},
doi = {10.1038/nprot.2007.376},
pmid = {18007604},
f1000-projects = {shared citations},
abstract = {Metabolic profiling, metabolomic and metabonomic studies mainly involve the multicomponent analysis of biological fluids, tissue and cell extracts using {NMR} spectroscopy and/or mass spectrometry ({MS}). We summarize the main {NMR} spectroscopic applications in modern metabolic research, and provide detailed protocols for biofluid (urine, serum/plasma) and tissue sample collection and preparation, including the extraction of polar and lipophilic metabolites from tissues. {1H} {NMR} spectroscopic techniques such as standard {1D} spectroscopy, relaxation-edited, diffusion-edited and {2D} J-resolved pulse sequences are widely used at the analysis stage to monitor different groups of metabolites and are described here. They are often followed by more detailed statistical analysis or additional {2D} {NMR} analysis for biomarker discovery. The standard acquisition time per sample is 4-5 min for a simple {1D} spectrum, and both preparation and analysis can be automated to allow application to high-throughput screening for clinical diagnostic and toxicological studies, as well as molecular phenotyping and functional genomics.}
}
@article{du_2006,
title = {Improved peak detection in mass spectrum by incorporating continuous wavelet transform-based pattern matching.},
author = {Du, Pan and Kibbe, Warren A and Lin, Simon M},
pages = {2059-2065},
url = {http://dx.doi.org/10.1093/bioinformatics/btl355},
year = {2006},
month = {sep},
day = {1},
urldate = {2019-07-24},
journal = {Bioinformatics},
volume = {22},
number = {17},
doi = {10.1093/bioinformatics/btl355},
pmid = {16820428},
f1000-projects = {shared citations},
abstract = {{MOTIVATION}: A major problem for current peak detection algorithms is that noise in mass spectrometry ({MS}) spectra gives rise to a high rate of false positives. The false positive rate is especially problematic in detecting peaks with low amplitudes. Usually, various baseline correction algorithms and smoothing methods are applied before attempting peak detection. This approach is very sensitive to the amount of smoothing and aggressiveness of the baseline correction, which contribute to making peak detection results inconsistent between runs, instrumentation and analysis methods. {RESULTS}: Most peak detection algorithms simply identify peaks based on amplitude, ignoring the additional information present in the shape of the peaks in a spectrum. In our experience, 'true' peaks have characteristic shapes, and providing a shape-matching function that provides a 'goodness of fit' coefficient should provide a more robust peak identification method. Based on these observations, a continuous wavelet transform ({CWT})-based peak detection algorithm has been devised that identifies peaks with different scales and amplitudes. By transforming the spectrum into wavelet space, the pattern-matching problem is simplified and in addition provides a powerful technique for identifying and separating the signal from the spike noise and colored noise. This transformation, with the additional information provided by the {2D} {CWT} coefficients can greatly enhance the effective signal-to-noise ratio. Furthermore, with this technique no baseline removal or peak smoothing preprocessing steps are required before peak detection, and this improves the robustness of peak detection under a variety of conditions. The algorithm was evaluated with {SELDI}-{TOF} spectra with known polypeptide positions. Comparisons with two other popular algorithms were performed. The results show the {CWT}-based algorithm can identify both strong and weak peaks while keeping false positive rate low. {AVAILABILITY}: The algorithm is implemented in R and will be included as an open source module in the Bioconductor project.}
}
@article{cacciatore_2017,
title = {{KODAMA}: an R package for knowledge discovery and data mining.},
author = {Cacciatore, Stefano and Tenori, Leonardo and Luchinat, Claudio and Bennett, Phillip R and {MacIntyre}, David A},
pages = {621-623},
url = {http://dx.doi.org/10.1093/bioinformatics/btw705},
year = {2017},
month = {feb},
day = {15},
urldate = {2018-01-29},
journal = {Bioinformatics},
volume = {33},
number = {4},
doi = {10.1093/bioinformatics/btw705},
pmid = {27993774},
pmcid = {PMC5408808},
f1000-projects = {shared citations},
abstract = {Summary: {KODAMA}, a novel learning algorithm for unsupervised feature extraction, is specifically designed for analysing noisy and high-dimensional datasets. Here we present an R package of the algorithm with additional functions that allow improved interpretation of high-dimensional data. The package requires no additional software and runs on all major platforms. Availability and Implementation: {KODAMA} is freely available from the R archive {CRAN} ( http://cran.r-project.org ). The software is distributed under the {GNU} General Public License (version 3 or later). Contact: s.cacciatore@imperial.ac.uk. Supplementary information: Supplementary data are available at Bioinformatics online.}
}
@article{hernndezdediego_2014,
title = {{STATegra} {EMS}: an Experiment Management System for complex next-generation omics experiments.},
author = {Hernández-de-Diego, Rafael and Boix-Chova, Noemi and Gómez-Cabrero, David and Tegner, Jesper and Abugessaisa, Imad and Conesa, Ana},
pages = {S9},
url = {http://dx.doi.org/10.1186/1752-0509-8-S2-S9},
year = {2014},
month = {mar},
day = {13},
urldate = {2019-07-22},
journal = {{BMC} Systems Biology},
volume = {8 Suppl 2},
doi = {10.1186/1752-0509-8-S2-S9},
pmid = {25033091},
pmcid = {PMC4101697},
f1000-projects = {shared citations},
abstract = {High-throughput sequencing assays are now routinely used to study different aspects of genome organization. As decreasing costs and widespread availability of sequencing enable more laboratories to use sequencing assays in their research projects, the number of samples and replicates in these experiments can quickly grow to several dozens of samples and thus require standardized annotation, storage and management of preprocessing steps. As a part of the {STATegra} project, we have developed an Experiment Management System ({EMS}) for high throughput omics data that supports different types of sequencing-based assays such as {RNA}-seq, {ChIP}-seq, Methyl-seq, etc, as well as proteomics and metabolomics data. The {STATegra} {EMS} provides metadata annotation of experimental design, samples and processing pipelines, as well as storage of different types of data files, from raw data to ready-to-use measurements. The system has been developed to provide research laboratories with a freely-available, integrated system that offers a simple and effective way for experiment annotation and tracking of analysis procedures.}
}
@article{gatto_2012,
title = {{MSnbase}-an R/Bioconductor package for isobaric tagged mass spectrometry data visualization, processing and quantitation.},
author = {Gatto, Laurent and Lilley, Kathryn S},
pages = {288-289},
url = {http://dx.doi.org/10.1093/bioinformatics/btr645},
year = {2012},
month = {jan},
day = {15},
urldate = {2018-01-13},
journal = {Bioinformatics},
volume = {28},
number = {2},
doi = {10.1093/bioinformatics/btr645},
pmid = {22113085},
f1000-projects = {shared citations},
abstract = {{UNLABELLED}: {MSnbase} is an R/Bioconductor package for the analysis of quantitative proteomics experiments that use isobaric tagging. It provides an exploratory data analysis framework for reproducible research, allowing raw data import, quality control, visualization, data processing and quantitation. {MSnbase} allows direct integration of quantitative proteomics data with additional facilities for statistical analysis provided by the Bioconductor project. {AVAILABILITY}: {MSnbase} is implemented in R (version ≥ 2.13.0) and available at the Bioconductor web site (http://www.bioconductor.org/). Vignettes outlining typical workflows, input/output capabilities and detailing underlying infrastructure are included in the package.}
}
@article{wen_2017,
title = {{metaX}: a flexible and comprehensive software for processing metabolomics data.},
author = {Wen, Bo and Mei, Zhanlong and Zeng, Chunwei and Liu, Siqi},
pages = {183},
url = {http://bmcbioinformatics.biomedcentral.com/articles/10.1186/s12859-017-1579-y},
year = {2017},
month = {mar},
day = {21},
urldate = {2019-07-01},
journal = {{BMC} Bioinformatics},
volume = {18},
number = {1},
issn = {1471-2105},
doi = {10.1186/s12859-017-1579-y},
pmid = {28327092},
pmcid = {PMC5361702},
f1000-projects = {shared citations},
abstract = {{BACKGROUND}: Non-targeted metabolomics based on mass spectrometry enables high-throughput profiling of the metabolites in a biological sample. The large amount of data generated from mass spectrometry requires intensive computational processing for annotation of mass spectra and identification of metabolites. Computational analysis tools that are fully integrated with multiple functions and are easily operated by users who lack extensive knowledge in programing are needed in this research field. {RESULTS}: We herein developed an R package, {metaX}, that is capable of end-to-end metabolomics data analysis through a set of interchangeable modules. Specifically, {metaX} provides several functions, such as peak picking and annotation, data quality assessment, missing value imputation, data normalization, univariate and multivariate statistics, power analysis and sample size estimation, receiver operating characteristic analysis, biomarker selection, pathway annotation, correlation network analysis, and metabolite identification. In addition, {metaX} offers a web-based interface ( http://metax.genomics.cn ) for data quality assessment and normalization method evaluation, and it generates an {HTML}-based report with a visualized interface. The {metaX} utilities were demonstrated with a published metabolomics dataset on a large scale. The software is available for operation as either a web-based graphical user interface ({GUI}) or in the form of command line functions. The package and the example reports are available at http://metax.genomics.cn/ . {CONCLUSIONS}: The pipeline of {metaX} is platform-independent and is easy to use for analysis of metabolomics data generated from mass spectrometry.}
}
@article{taschuk_2017,
title = {Ten simple rules for making research software more robust.},
author = {Taschuk, Morgan and Wilson, Greg},
pages = {e1005412},
url = {http://dx.doi.org/10.1371/journal.pcbi.1005412},
year = {2017},
month = {apr},
day = {13},
urldate = {2018-01-25},
journal = {{PLoS} Computational Biology},
volume = {13},
number = {4},
doi = {10.1371/journal.pcbi.1005412},
pmid = {28407023},
pmcid = {PMC5390961},
f1000-projects = {shared citations},
abstract = {Software produced for research, published and otherwise, suffers from a number of common problems that make it difficult or impossible to run outside the original institution or even off the primary developer's computer. We present ten simple rules to make such software robust enough to be run by anyone, anywhere, and thereby delight your users and collaborators.}
}
@article{wanichthanarak_2017,
title = {Metabox: A toolbox for metabolomic data analysis, interpretation and integrative exploration.},
author = {Wanichthanarak, Kwanjeera and Fan, Sili and Grapov, Dmitry and Barupal, Dinesh Kumar and Fiehn, Oliver},
pages = {e0171046},
url = {http://dx.doi.org/10.1371/journal.pone.0171046},
year = {2017},
month = {jan},
day = {31},
urldate = {2019-04-30},
journal = {Plos One},
volume = {12},
number = {1},
doi = {10.1371/journal.pone.0171046},
pmid = {28141874},
pmcid = {PMC5283729},
f1000-projects = {shared citations},
abstract = {Similar to genomic and proteomic platforms, metabolomic data acquisition and analysis is becoming a routine approach for investigating biological systems. However, computational approaches for metabolomic data analysis and integration are still maturing. Metabox is a bioinformatics toolbox for deep phenotyping analytics that combines data processing, statistical analysis, functional analysis and integrative exploration of metabolomic data within proteomic and transcriptomic contexts. With the number of options provided in each analysis module, it also supports data analysis of other 'omic' families. The toolbox is an R-based web application, and it is freely available at http://kwanjeeraw.github.io/metabox/ under the {GPL}-3 license.}
}
@article{li_2013,
title = {Predicting network activity from high throughput metabolomics.},
author = {Li, Shuzhao and Park, Youngja and Duraisingham, Sai and Strobel, Frederick H and Khan, Nooruddin and Soltow, Quinlyn A and Jones, Dean P and Pulendran, Bali},
pages = {e1003123},
url = {http://dx.doi.org/10.1371/journal.pcbi.1003123},
year = {2013},
month = {jul},
day = {4},
urldate = {2019-06-11},
journal = {{PLoS} Computational Biology},
volume = {9},
number = {7},
doi = {10.1371/journal.pcbi.1003123},
pmid = {23861661},
pmcid = {PMC3701697},
f1000-projects = {shared citations},
abstract = {The functional interpretation of high throughput metabolomics by mass spectrometry is hindered by the identification of metabolites, a tedious and challenging task. We present a set of computational algorithms which, by leveraging the collective power of metabolic pathways and networks, predict functional activity directly from spectral feature tables without a priori identification of metabolites. The algorithms were experimentally validated on the activation of innate immune cells.}
}
@article{robinson_2007,
title = {A dynamic programming approach for the alignment of signal peaks in multiple gas chromatography-mass spectrometry experiments.},
author = {Robinson, Mark D and De Souza, David P and Keen, Woon Wai and Saunders, Eleanor C and {McConville}, Malcolm J and Speed, Terence P and Likić, Vladimir A},
pages = {419},
url = {http://dx.doi.org/10.1186/1471-2105-8-419},
year = {2007},
month = {oct},
day = {29},
urldate = {2019-04-26},
journal = {{BMC} Bioinformatics},
volume = {8},
doi = {10.1186/1471-2105-8-419},
pmid = {17963529},
pmcid = {PMC2194738},
f1000-projects = {shared citations},
abstract = {{BACKGROUND}: Gas chromatography-mass spectrometry ({GC}-{MS}) is a robust platform for the profiling of certain classes of small molecules in biological samples. When multiple samples are profiled, including replicates of the same sample and/or different sample states, one needs to account for retention time drifts between experiments. This can be achieved either by the alignment of chromatographic profiles prior to peak detection, or by matching signal peaks after they have been extracted from chromatogram data matrices. Automated retention time correction is particularly important in non-targeted profiling studies. {RESULTS}: A new approach for matching signal peaks based on dynamic programming is presented. The proposed approach relies on both peak retention times and mass spectra. The alignment of more than two peak lists involves three steps: (1) all possible pairs of peak lists are aligned, and similarity of each pair of peak lists is estimated; (2) the guide tree is built based on the similarity between the peak lists; (3) peak lists are progressively aligned starting with the two most similar peak lists, following the guide tree until all peak lists are exhausted. When two or more experiments are performed on different sample states and each consisting of multiple replicates, peak lists within each set of replicate experiments are aligned first (within-state alignment), and subsequently the resulting alignments are aligned themselves (between-state alignment). When more than two sets of replicate experiments are present, the between-state alignment also employs the guide tree. We demonstrate the usefulness of this approach on {GC}-{MS} metabolic profiling experiments acquired on wild-type and mutant Leishmania mexicana parasites. {CONCLUSION}: We propose a progressive method to match signal peaks across multiple {GC}-{MS} experiments based on dynamic programming. A sensitive peak similarity function is proposed to balance peak retention time and peak mass spectra similarities. This approach can produce the optimal alignment between an arbitrary number of peak lists, and models explicitly within-state and between-state peak alignment. The accuracy of the proposed method was close to the accuracy of manually-curated peak matching, which required tens of man-hours for the analyzed data sets. The proposed approach may offer significant advantages for processing of high-throughput metabolomics data, especially when large numbers of experimental replicates and multiple sample states are analyzed.}
}
@article{christin_2013,
title = {A critical assessment of feature selection methods for biomarker discovery in clinical proteomics.},
author = {Christin, Christin and Hoefsloot, Huub C J and Smilde, Age K and Hoekman, B and Suits, Frank and Bischoff, Rainer and Horvatovich, Peter},
pages = {263-276},
url = {http://dx.doi.org/10.1074/mcp.M112.022566},
year = {2013},
month = {jan},
urldate = {2019-07-01},
journal = {Molecular \& Cellular Proteomics},
volume = {12},
number = {1},
doi = {10.1074/mcp.M112.022566},
pmid = {23115301},
pmcid = {PMC3536906},
f1000-projects = {shared citations},
abstract = {In this paper, we compare the performance of six different feature selection methods for {LC}-{MS}-based proteomics and metabolomics biomarker discovery-t test, the Mann-Whitney-Wilcoxon test (mww test), nearest shrunken centroid ({NSC}), linear support vector machine-recursive features elimination ({SVM}-{RFE}), principal component discriminant analysis ({PCDA}), and partial least squares discriminant analysis ({PLSDA})-using human urine and porcine cerebrospinal fluid samples that were spiked with a range of peptides at different concentration levels. The ideal feature selection method should select the complete list of discriminating features that are related to the spiked peptides without selecting unrelated features. Whereas many studies have to rely on classification error to judge the reliability of the selected biomarker candidates, we assessed the accuracy of selection directly from the list of spiked peptides. The feature selection methods were applied to data sets with different sample sizes and extents of sample class separation determined by the concentration level of spiked compounds. For each feature selection method and data set, the performance for selecting a set of features related to spiked compounds was assessed using the harmonic mean of the recall and the precision (f-score) and the geometric mean of the recall and the true negative rate (g-score). We conclude that the univariate t test and the mww test with multiple testing corrections are not applicable to data sets with small sample sizes (n = 6), but their performance improves markedly with increasing sample size up to a point (n \textgreater 12) at which they outperform the other methods. {PCDA} and {PLSDA} select small feature sets with high precision but miss many true positive features related to the spiked peptides. {NSC} strikes a reasonable compromise between recall and precision for all data sets independent of spiking level and number of samples. Linear {SVM}-{RFE} performs poorly for selecting features related to the spiked compounds, even though the classification error is relatively low.}
}
@article{wishart_2015,
title = {{T3DB}: the toxic exposome database.},
author = {Wishart, David and Arndt, David and Pon, Allison and Sajed, Tanvir and Guo, An Chi and Djoumbou, Yannick and Knox, Craig and Wilson, Michael and Liang, Yongjie and Grant, Jason and Liu, Yifeng and Goldansaz, Seyed Ali and Rappaport, Stephen M},
pages = {D928-34},
url = {http://dx.doi.org/10.1093/nar/gku1004},
year = {2015},
month = {jan},
urldate = {2019-07-23},
journal = {Nucleic Acids Research},
volume = {43},
number = {Database issue},
doi = {10.1093/nar/gku1004},
pmid = {25378312},
pmcid = {PMC4383875},
f1000-projects = {shared citations},
abstract = {The exposome is defined as the totality of all human environmental exposures from conception to death. It is often regarded as the complement to the genome, with the interaction between the exposome and the genome ultimately determining one's phenotype. The 'toxic exposome' is the complete collection of chronically or acutely toxic compounds to which humans can be exposed. Considerable interest in defining the toxic exposome has been spurred on by the realization that most human injuries, deaths and diseases are directly or indirectly caused by toxic substances found in the air, water, food, home or workplace. The Toxin-Toxin-Target Database ({T3DB}--www.t3db.ca) is a resource that was specifically designed to capture information about the toxic exposome. Originally released in 2010, the first version of {T3DB} contained data on nearly 2900 common toxic substances along with detailed information on their chemical properties, descriptions, targets, toxic effects, toxicity thresholds, sequences (for both targets and toxins), mechanisms and references. To more closely align itself with the needs of epidemiologists, toxicologists and exposome scientists, the latest release of {T3DB} has been substantially upgraded to include many more compounds (\textgreater3600), targets (\textgreater2000) and gene expression datasets (\textgreater15,000 genes). It now includes extensive data on 'normal' toxic compound concentrations in human biofluids as well as detailed chemical taxonomies, informative chemical ontologies and a large number of referential {NMR}, {MS}/{MS} and {GC}-{MS} spectra. This manuscript describes the most recent update to the {T3DB}, which was previously featured in the 2010 {NAR} Database Issue. \copyright The Author(s) 2014. Published by Oxford University Press on behalf of Nucleic Acids Research.}
}
@article{balcke_2017,
title = {Multi-Omics of Tomato Glandular Trichomes Reveals Distinct Features of Central Carbon Metabolism Supporting High Productivity of Specialized Metabolites.},
author = {Balcke, Gerd U and Bennewitz, Stefan and Bergau, Nick and Athmer, Benedikt and Henning, Anja and Majovsky, Petra and Jiménez-Gómez, José M and Hoehenwarter, Wolfgang and Tissier, Alain},
pages = {960-983},
url = {http://dx.doi.org/10.1105/tpc.17.00060},
year = {2017},
month = {may},
urldate = {2019-05-13},
journal = {The Plant Cell},
volume = {29},
number = {5},
doi = {10.1105/tpc.17.00060},
pmid = {28408661},
pmcid = {PMC5466034},
f1000-projects = {shared citations},
abstract = {Glandular trichomes are metabolic cell factories with the capacity to produce large quantities of secondary metabolites. Little is known about the connection between central carbon metabolism and metabolic productivity for secondary metabolites in glandular trichomes. To address this gap in our knowledge, we performed comparative metabolomics, transcriptomics, proteomics, and {13C}-labeling of type {VI} glandular trichomes and leaves from a cultivated (Solanum lycopersicum {LA4024}) and a wild (Solanum habrochaites {LA1777}) tomato accession. Specific features of glandular trichomes that drive the formation of secondary metabolites could be identified. Tomato type {VI} trichomes are photosynthetic but acquire their carbon essentially from leaf sucrose. The energy and reducing power from photosynthesis are used to support the biosynthesis of secondary metabolites, while the comparatively reduced Calvin-Benson-Bassham cycle activity may be involved in recycling metabolic {CO2} Glandular trichomes cope with oxidative stress by producing high levels of polyunsaturated fatty acids, oxylipins, and glutathione. Finally, distinct mechanisms are present in glandular trichomes to increase the supply of precursors for the isoprenoid pathways. Particularly, the citrate-malate shuttle supplies cytosolic acetyl-{CoA} and plastidic glycolysis and malic enzyme support the formation of plastidic pyruvate. A model is proposed on how glandular trichomes achieve high metabolic productivity. \copyright 2017 American Society of Plant Biologists. All rights reserved.}
}
@article{shen_2016,
title = {Normalization and integration of large-scale metabolomics data using support vector regression},
author = {Shen, Xiaotao and Gong, Xiaoyun and Cai, Yuping and Guo, Yuan and Tu, Jia and Li, Hao and Zhang, Tao and Wang, Jialin and Xue, Fuzhong and Zhu, Zheng-Jiang},
url = {http://link.springer.com/10.1007/s11306-016-1026-5},
year = {2016},
month = {may},
urldate = {2018-01-17},
journal = {Metabolomics : Official journal of the Metabolomic Society},
volume = {12},
number = {5},
issn = {1573-3882},
doi = {10.1007/s11306-016-1026-5},
f1000-projects = {shared citations},
abstract = {Introduction Untargeted metabolomics studies for biomarker discovery often have hundreds to thousands of human samples. Data acquisition of large-scale samples has to be divided into several batches and may span from months to as long as several years. The signal drift of metabolites during data acquisition (intra- and inter-batch) is unavoidable and is a major confounding factor for large-scale metabolomics studies.}
}
@article{karpievitch_2014,
title = {Metabolomics data normalization with {EigenMS}.},
author = {Karpievitch, Yuliya V and Nikolic, Sonja B and Wilson, Richard and Sharman, James E and Edwards, Lindsay M},
pages = {e116221},
url = {http://dx.doi.org/10.1371/journal.pone.0116221},
year = {2014},
month = {dec},
day = {30},
urldate = {2018-01-13},
journal = {Plos One},
volume = {9},
number = {12},
doi = {10.1371/journal.pone.0116221},
pmid = {25549083},
pmcid = {PMC4280143},
f1000-projects = {shared citations},
abstract = {Liquid chromatography mass spectrometry has become one of the analytical platforms of choice for metabolomics studies. However, {LC}-{MS} metabolomics data can suffer from the effects of various systematic biases. These include batch effects, day-to-day variations in instrument performance, signal intensity loss due to time-dependent effects of the {LC} column performance, accumulation of contaminants in the {MS} ion source and {MS} sensitivity among others. In this study we aimed to test a singular value decomposition-based method, called {EigenMS}, for normalization of metabolomics data. We analyzed a clinical human dataset where {LC}-{MS} serum metabolomics data and physiological measurements were collected from thirty nine healthy subjects and forty with type 2 diabetes and applied {EigenMS} to detect and correct for any systematic bias. {EigenMS} works in several stages. First, {EigenMS} preserves the treatment group differences in the metabolomics data by estimating treatment effects with an {ANOVA} model (multiple fixed effects can be estimated). Singular value decomposition of the residuals matrix is then used to determine bias trends in the data. The number of bias trends is then estimated via a permutation test and the effects of the bias trends are eliminated. {EigenMS} removed bias of unknown complexity from the {LC}-{MS} metabolomics data, allowing for increased sensitivity in differential analysis. Moreover, normalized samples better correlated with both other normalized samples and corresponding physiological data, such as blood glucose level, glycated haemoglobin, exercise central augmentation pressure normalized to heart rate of 75, and total cholesterol. We were able to report 2578 discriminatory metabolite peaks in the normalized data (p\textless 0.05) as compared to only 1840 metabolite signals in the raw data. Our results support the use of singular value decomposition-based normalization for metabolomics data.}
}
@article{eddelbuettel_2011,
title = {rcpp : seamless R and C++ Integration},
author = {Eddelbuettel, D and François, R},
url = {http://www.jstatsoft.org/v40/i08/},
year = {2011},
urldate = {2019-06-03},
journal = {Journal of statistical software},
volume = {40},
number = {8},
issn = {1548-7660},
doi = {10.18637/jss.v040.i08},
f1000-projects = {shared citations}
}
@article{wehrens_2014,
title = {{metaMS}: an open-source pipeline for {GC}-{MS}-based untargeted metabolomics.},
author = {Wehrens, Ron and Weingart, Georg and Mattivi, Fulvio},
pages = {109-116},
url = {http://dx.doi.org/10.1016/j.jchromb.2014.02.051},
year = {2014},
month = {sep},
day = {1},
urldate = {2018-01-13},
journal = {Journal of Chromatography. B, Analytical Technologies in the Biomedical and Life Sciences},
volume = {966},
doi = {10.1016/j.jchromb.2014.02.051},
pmid = {24656939},
f1000-projects = {shared citations},
abstract = {Untargeted metabolomics are rapidly becoming an important tool for studying complex biological samples. Gas chromatography-mass spectrometry ({GC}-{MS}) is the most widely used analytical technology for metabolomic analysis of compounds that are volatile or can be chemically derivatised into volatile compounds. Unfortunately, data processing and analysis are not straightforward and the field is dominated by vendor-supplied software that does not always allow easy integration for large laboratories with different instruments. This paper presents an open-source pipeline for high-throughput {GC}-{MS} data processing, written in the R language and available as package {metaMS}. It features rapid annotation using in-house databases, and also provides support for building and validating such databases. The results are presented in simple-to-use tables, summarising the relative concentrations of identified compounds and unknowns in all samples. The use of the pipeline is illustrated using three experimental data sets. Copyright \copyright 2014 Elsevier B.V. All rights reserved.}
}
@article{mevik_2007,
title = {The pls Package: Principal Component and Partial Least Squares Regression {inR}},
author = {Mevik, Bj\orn-Helge and Wehrens, Ron},
url = {http://www.jstatsoft.org/v18/i02/},
year = {2007},
urldate = {2018-01-16},
journal = {Journal of statistical software},
volume = {18},
number = {2},
issn = {1548-7660},
doi = {10.18637/jss.v018.i02},
f1000-projects = {shared citations}
}
@article{bohler_2015,
title = {Automatically visualise and analyse data on pathways using {PathVisioRPC} from any programming environment.},
author = {Bohler, Anwesha and Eijssen, Lars M T and van Iersel, Martijn P and Leemans, Christ and Willighagen, Egon L and Kutmon, Martina and Jaillard, Magali and Evelo, Chris T},
pages = {267},
url = {http://dx.doi.org/10.1186/s12859-015-0708-8},
year = {2015},
month = {aug},
day = {23},
urldate = {2018-01-16},
journal = {{BMC} Bioinformatics},
volume = {16},
doi = {10.1186/s12859-015-0708-8},
pmid = {26298294},
pmcid = {PMC4546821},
f1000-projects = {shared citations},
abstract = {{BACKGROUND}: Biological pathways are descriptive diagrams of biological processes widely used for functional analysis of differentially expressed genes or proteins. Primary data analysis, such as quality control, normalisation, and statistical analysis, is often performed in scripting languages like R, Perl, and Python. Subsequent pathway analysis is usually performed using dedicated external applications. Workflows involving manual use of multiple environments are time consuming and error prone. Therefore, tools are needed that enable pathway analysis directly within the same scripting languages used for primary data analyses. Existing tools have limited capability in terms of available pathway content, pathway editing and visualisation options, and export file formats. Consequently, making the full-fledged pathway analysis tool {PathVisio} available from various scripting languages will benefit researchers. {RESULTS}: We developed {PathVisioRPC}, an {XMLRPC} interface for the pathway analysis software {PathVisio}. {PathVisioRPC} enables creating and editing biological pathways, visualising data on pathways, performing pathway statistics, and exporting results in several image formats in multiple programming environments. We demonstrate {PathVisioRPC} functionalities using examples in Python. Subsequently, we analyse a publicly available {NCBI} {GEO} gene expression dataset studying tumour bearing mice treated with cyclophosphamide in R. The R scripts demonstrate how calls to existing R packages for data processing and calls to {PathVisioRPC} can directly work together. To further support R users, we have created {RPathVisio} simplifying the use of {PathVisioRPC} in this environment. We have also created a pathway module for the microarray data analysis portal {ArrayAnalysis}.org that calls the {PathVisioRPC} interface to perform pathway analysis. This module allows users to use {PathVisio} functionality online without having to download and install the software and exemplifies how the {PathVisioRPC} interface can be used by data analysis pipelines for functional analysis of processed genomics data. {CONCLUSIONS}: {PathVisioRPC} enables data visualisation and pathway analysis directly from within various analytical environments used for preliminary analyses. It supports the use of existing pathways from {WikiPathways} or pathways created using the {RPC} itself. It also enables automation of tasks performed using {PathVisio}, making it useful to {PathVisio} users performing repeated visualisation and analysis tasks. {PathVisioRPC} is freely available for academic and commercial use at http://projects.bigcat.unimaas.nl/pathvisiorpc.}
}
@article{tautenhahn_2011,
title = {{metaXCMS}: second-order analysis of untargeted metabolomics data.},
author = {Tautenhahn, Ralf and Patti, Gary J and Kalisiak, Ewa and Miyamoto, Takashi and Schmidt, Manuela and Lo, Fang Yin and {McBee}, Joshua and Baliga, Nitin S and Siuzdak, Gary},
pages = {696-700},
url = {http://pubs.acs.org/doi/abs/10.1021/ac102980g},
year = {2011},
month = {feb},
day = {1},
urldate = {2018-01-13},
journal = {Analytical Chemistry},
volume = {83},
number = {3},
issn = {0003-2700},
doi = {10.1021/ac102980g},
pmid = {21174458},
pmcid = {PMC3654666},
f1000-projects = {shared citations},
abstract = {Mass spectrometry-based untargeted metabolomics often results in the observation of hundreds to thousands of features that are differentially regulated between sample classes. A major challenge in interpreting the data is distinguishing metabolites that are causally associated with the phenotype of interest from those that are unrelated but altered in downstream pathways as an effect. To facilitate this distinction, here we describe new software called {metaXCMS} for performing second-order ("meta") analysis of untargeted metabolomics data from multiple sample groups representing different models of the same phenotype. While the original version of {XCMS} was designed for the direct comparison of two sample groups, {metaXCMS} enables meta-analysis of an unlimited number of sample classes to facilitate prioritization of the data and increase the probability of identifying metabolites causally related to the phenotype of interest. {metaXCMS} is used to import {XCMS} results that are subsequently filtered, realigned, and ultimately compared to identify shared metabolites that are up- or down-regulated across all sample groups. We demonstrate the software's utility by identifying histamine as a metabolite that is commonly altered in three different models of pain. {metaXCMS} is freely available at http://metlin.scripps.edu/metaxcms/.}
}
@article{giacomoni_2015,
title = {{Workflow4Metabolomics}: a collaborative research infrastructure for computational metabolomics.},
author = {Giacomoni, Franck and Le Corguillé, Gildas and Monsoor, Misharl and Landi, Marion and Pericard, Pierre and Pétéra, Mélanie and Duperier, Christophe and Tremblay-Franco, Marie and Martin, Jean-François and Jacob, Daniel and Goulitquer, Sophie and Thévenot, Etienne A and Caron, Christophe},
pages = {1493-1495},
url = {http://dx.doi.org/10.1093/bioinformatics/btu813},
year = {2015},
month = {may},
day = {1},
urldate = {2018-01-13},
journal = {Bioinformatics},
volume = {31},
number = {9},
doi = {10.1093/bioinformatics/btu813},
pmid = {25527831},
pmcid = {PMC4410648},
f1000-projects = {shared citations},
abstract = {{SUMMARY}: The complex, rapidly evolving field of computational metabolomics calls for collaborative infrastructures where the large volume of new algorithms for data pre-processing, statistical analysis and annotation can be readily integrated whatever the language, evaluated on reference datasets and chained to build ad hoc workflows for users. We have developed {Workflow4Metabolomics} ({W4M}), the first fully open-source and collaborative online platform for computational metabolomics. {W4M} is a virtual research environment built upon the Galaxy web-based platform technology. It enables ergonomic integration, exchange and running of individual modules and workflows. Alternatively, the whole {W4M} framework and computational tools can be downloaded as a virtual machine for local installation. {AVAILABILITY} {AND} {IMPLEMENTATION}: http://workflow4metabolomics.org homepage enables users to open a private account and access the infrastructure. {W4M} is developed and maintained by the French Bioinformatics Institute ({IFB}) and the French Metabolomics and Fluxomics Infrastructure ({MetaboHUB}). {CONTACT}: contact@workflow4metabolomics.org. \copyright The Author 2014. Published by Oxford University Press.}
}
@article{guitton_2017,
title = {Create, run, share, publish, and reference your {LC}-{MS}, {FIA}-{MS}, {GC}-{MS}, and {NMR} data analysis workflows with the {Workflow4Metabolomics} 3.0 Galaxy online infrastructure for metabolomics.},
author = {Guitton, Yann and Tremblay-Franco, Marie and Le Corguillé, Gildas and Martin, Jean-François and Pétéra, Mélanie and Roger-Mele, Pierrick and Delabrière, Alexis and Goulitquer, Sophie and Monsoor, Misharl and Duperier, Christophe and Canlet, Cécile and Servien, Rémi and Tardivel, Patrick and Caron, Christophe and Giacomoni, Franck and Thévenot, Etienne A},
pages = {89-101},
url = {http://dx.doi.org/10.1016/j.biocel.2017.07.002},
year = {2017},
month = {jul},
day = {12},
urldate = {2019-09-06},
journal = {The International Journal of Biochemistry \& Cell Biology},
volume = {93},
doi = {10.1016/j.biocel.2017.07.002},
pmid = {28710041},
f1000-projects = {shared citations},
abstract = {Metabolomics is a key approach in modern functional genomics and systems biology. Due to the complexity of metabolomics data, the variety of experimental designs, and the multiplicity of bioinformatics tools, providing experimenters with a simple and efficient resource to conduct comprehensive and rigorous analysis of their data is of utmost importance. In 2014, we launched the {Workflow4Metabolomics} ({W4M}; http://workflow4metabolomics.org) online infrastructure for metabolomics built on the Galaxy environment, which offers user-friendly features to build and run data analysis workflows including preprocessing, statistical analysis, and annotation steps. Here we present the new {W4M} 3.0 release, which contains twice as many tools as the first version, and provides two features which are, to our knowledge, unique among online resources. First, data from the four major metabolomics technologies (i.e., {LC}-{MS}, {FIA}-{MS}, {GC}-{MS}, and {NMR}) can be analyzed on a single platform. By using three studies in human physiology, alga evolution, and animal toxicology, we demonstrate how the 40 available tools can be easily combined to address biological issues. Second, the full analysis (including the workflow, the parameter values, the input data and output results) can be referenced with a permanent digital object identifier ({DOI}). Publication of data analyses is of major importance for robust and reproducible science. Furthermore, the publicly shared workflows are of high-value for e-learning and training. The {Workflow4Metabolomics} 3.0 e-infrastructure thus not only offers a unique online environment for analysis of data from the main metabolomics technologies, but it is also the first reference repository for metabolomics workflows. Copyright \copyright 2017 Elsevier Ltd. All rights reserved.}
}
@article{mehmood_2012,
title = {A review of variable selection methods in Partial Least Squares Regression},
author = {Mehmood, Tahir and Liland, Kristian Hovde and Snipen, Lars and S\aeb\o, Solve},
pages = {62-69},
url = {http://linkinghub.elsevier.com/retrieve/pii/S0169743912001542},
year = {2012},
month = {aug},
urldate = {2019-07-30},
journal = {Chemometrics and Intelligent Laboratory Systems},
volume = {118},
issn = {01697439},
doi = {10.1016/j.chemolab.2012.07.010},
f1000-projects = {shared citations},
abstract = {With the increasing ease of measuring multiple variables per object the importance of variable selection for data reduction and for improved interpretability is gaining importance. There are numerous suggested methods for variable selection in the literature of data analysis and statistics, and it is a challenge to stay updated on all the possibilities. We therefore present a review of available methods for variable selection within one of the many modeling approaches for high-throughput data, Partial Least Squares Regression. The aim of this paper is mainly to collect and shortly present the methods in such a way that the reader easily can get an understanding of the characteristics of the methods and to get a basis for selecting an appropriate method for own use. For each method we also give references to its use in the literature for further reading, and also to software availability.}
}
@article{meyer_2010,
title = {Automated mass spectral deconvolution and identification system for {GC}-{MS} screening for drugs, poisons, and metabolites in urine.},
author = {Meyer, Markus R and Peters, Frank T and Maurer, Hans H},
pages = {575-584},
url = {http://dx.doi.org/10.1373/clinchem.2009.135517},
year = {2010},
month = {apr},
urldate = {2019-08-01},
journal = {Clinical Chemistry},
volume = {56},
number = {4},
doi = {10.1373/clinchem.2009.135517},
pmid = {20185625},
f1000-projects = {shared citations},
abstract = {{BACKGROUND}: The challenge in systematic toxicological analysis using gas chromatography and/or liquid chromatography coupled to mass spectrometry is to identify compounds of interest from background noise. The large amount of spectral information collected in one full-scan {MS} run demands the use of automated evaluation of recorded data files. We evaluated the applicability of the freeware deconvolution software {AMDIS} (Automated Mass Spectral Deconvolution and Identification System) for {GC}-{MS}-based systematic toxicological analysis in urine for increasing the speed of evaluation and automating the daily routine workload. {METHODS}: We prepared a set of 111 urine samples for {GC}-{MS} analysis by acidic hydrolysis, liquid-liquid extraction, and acetylation. After analysis, the resulting data files were evaluated manually by an experienced toxicologist and automatically using {AMDIS} with deconvolution and identification settings previously optimized for this type of analysis. The results by manual and {AMDIS} evaluation were then compared. {RESULTS}: The deconvolution settings for the {AMDIS} evaluation were successfully optimized to obtain the highest possible number of components. Identification settings were evaluated and chosen for a compromise between most identified targets and general number of hits. With the use of these optimized settings, {AMDIS}-based data analysis was comparable or even superior to manual evaluation and reduced by half the overall analysis time. {CONCLUSIONS}: {AMDIS} proved to be a reliable and powerful tool for daily routine and emergency toxicology. Nevertheless, {AMDIS} can identify only targets present in the user-defined target library and may therefore not indicate unknown compounds that might be relevant in clinical and forensic toxicology.}
}
@article{backman_2011,
title = {{ChemMine} tools: an online service for analyzing and clustering small molecules.},
author = {Backman, Tyler W H and Cao, Yiqun and Girke, Thomas},
pages = {W486-91},
url = {http://dx.doi.org/10.1093/nar/gkr320},
year = {2011},
month = {jul},
urldate = {2018-01-29},
journal = {Nucleic Acids Research},
volume = {39},
number = {Web Server issue},
doi = {10.1093/nar/gkr320},
pmid = {21576229},
pmcid = {PMC3125754},
f1000-projects = {shared citations},
abstract = {{ChemMine} Tools is an online service for small molecule data analysis. It provides a web interface to a set of cheminformatics and data mining tools that are useful for various analysis routines performed in chemical genomics and drug discovery. The service also offers programmable access options via the R library {ChemmineR}. The primary functionalities of {ChemMine} Tools fall into five major application areas: data visualization, structure comparisons, similarity searching, compound clustering and prediction of chemical properties. First, users can upload compound data sets to the online Compound Workbench. Numerous utilities are provided for compound viewing, structure drawing and format interconversion. Second, pairwise structural similarities among compounds can be quantified. Third, interfaces to ultra-fast structure similarity search algorithms are available to efficiently mine the chemical space in the public domain. These include fingerprint and embedding/indexing algorithms. Fourth, the service includes a Clustering Toolbox that integrates cheminformatic algorithms with data mining utilities to enable systematic structure and activity based analyses of custom compound sets. Fifth, physicochemical property descriptors of custom compound sets can be calculated. These descriptors are important for assessing the bioactivity profile of compounds in silico and quantitative structure-activity relationship ({QSAR}) analyses. {ChemMine} Tools is available at: http://chemmine.ucr.edu.}
}
@article{djoumboufeunang_2016,
title = {{ClassyFire}: automated chemical classification with a comprehensive, computable taxonomy.},
author = {Djoumbou Feunang, Yannick and Eisner, Roman and Knox, Craig and Chepelev, Leonid and Hastings, Janna and Owen, Gareth and Fahy, Eoin and Steinbeck, Christoph and Subramanian, Shankar and Bolton, Evan and Greiner, Russell and Wishart, David S},
pages = {61},
url = {http://dx.doi.org/10.1186/s13321-016-0174-y},
year = {2016},
month = {nov},
day = {4},
urldate = {2018-04-23},
journal = {Journal of cheminformatics},
volume = {8},
doi = {10.1186/s13321-016-0174-y},
pmid = {27867422},
pmcid = {PMC5096306},
f1000-projects = {shared citations},
abstract = {{BACKGROUND}: Scientists have long been driven by the desire to describe, organize, classify, and compare objects using taxonomies and/or ontologies. In contrast to biology, geology, and many other scientific disciplines, the world of chemistry still lacks a standardized chemical ontology or taxonomy. Several attempts at chemical classification have been made; but they have mostly been limited to either manual, or semi-automated proof-of-principle applications. This is regrettable as comprehensive chemical classification and description tools could not only improve our understanding of chemistry but also improve the linkage between chemistry and many other fields. For instance, the chemical classification of a compound could help predict its metabolic fate in humans, its druggability or potential hazards associated with it, among others. However, the sheer number (tens of millions of compounds) and complexity of chemical structures is such that any manual classification effort would prove to be near impossible. {RESULTS}: We have developed a comprehensive, flexible, and computable, purely structure-based chemical taxonomy ({ChemOnt}), along with a computer program ({ClassyFire}) that uses only chemical structures and structural features to automatically assign all known chemical compounds to a taxonomy consisting of \textgreater4800 different categories. This new chemical taxonomy consists of up to 11 different levels (Kingdom, {SuperClass}, Class, {SubClass}, etc.) with each of the categories defined by unambiguous, computable structural rules. Furthermore each category is named using a consensus-based nomenclature and described (in English) based on the characteristic common structural properties of the compounds it contains. The {ClassyFire} webserver is freely accessible at http://classyfire.wishartlab.com/. Moreover, a Ruby {API} version is available at https://bitbucket.org/wishartlab/classyfire\_api, which provides programmatic access to the {ClassyFire} server and database. {ClassyFire} has been used to annotate over 77 million compounds and has already been integrated into other software packages to automatically generate textual descriptions for, and/or infer biological properties of over 100,000 compounds. Additional examples and applications are provided in this paper. {CONCLUSION}: {ClassyFire}, in combination with {ChemOnt} ({ClassyFire}'s comprehensive chemical taxonomy), now allows chemists and cheminformaticians to perform large-scale, rapid and automated chemical classification. Moreover, a freely accessible {API} allows easy access to more than 77 million "{ClassyFire}" classified compounds. The results can be used to help annotate well studied, as well as lesser-known compounds. In addition, these chemical classifications can be used as input for data integration, and many other cheminformatics-related tasks.}
}
@article{bemis_2015,
title = {Cardinal: an R package for statistical analysis of mass spectrometry-based imaging experiments.},
author = {Bemis, Kyle D and Harry, April and Eberlin, Livia S and Ferreira, Christina and van de Ven, Stephanie M and Mallick, Parag and Stolowitz, Mark and Vitek, Olga},
pages = {2418-2420},
url = {http://dx.doi.org/10.1093/bioinformatics/btv146},
year = {2015},
month = {jul},
day = {15},
urldate = {2018-01-16},
journal = {Bioinformatics},
volume = {31},
number = {14},
doi = {10.1093/bioinformatics/btv146},
pmid = {25777525},
pmcid = {PMC4495298},
f1000-projects = {shared citations},
abstract = {Cardinal is an R package for statistical analysis of mass spectrometry-based imaging ({MSI}) experiments of biological samples such as tissues. Cardinal supports both Matrix-Assisted Laser Desorption/Ionization ({MALDI}) and Desorption Electrospray Ionization-based {MSI} workflows, and experiments with multiple tissues and complex designs. The main analytical functionalities include (1) image segmentation, which partitions a tissue into regions of homogeneous chemical composition, selects the number of segments and the subset of informative ions, and characterizes the associated uncertainty and (2) image classification, which assigns locations on the tissue to pre-defined classes, selects the subset of informative ions, and estimates the resulting classification error by (cross-) validation. The statistical methods are based on mixture modeling and regularization. \copyright The Author 2015. Published by Oxford University Press.}
}
@article{thvenot_2015,
title = {Analysis of the Human Adult Urinary Metabolome Variations with Age, Body Mass Index, and Gender by Implementing a Comprehensive Workflow for Univariate and {OPLS} Statistical Analyses.},
author = {Thévenot, Etienne A and Roux, Aurélie and Xu, Ying and Ezan, Eric and Junot, Christophe},
pages = {3322-3335},
url = {http://dx.doi.org/10.1021/acs.jproteome.5b00354},
year = {2015},
month = {aug},
day = {7},
urldate = {2018-01-13},
journal = {Journal of Proteome Research},
volume = {14},
number = {8},
doi = {10.1021/acs.jproteome.5b00354},
pmid = {26088811},
f1000-projects = {shared citations},
abstract = {Urine metabolomics is widely used for biomarker research in the fields of medicine and toxicology. As a consequence, characterization of the variations of the urine metabolome under basal conditions becomes critical in order to avoid confounding effects in cohort studies. Such physiological information is however very scarce in the literature and in metabolomics databases so far. Here we studied the influence of age, body mass index ({BMI}), and gender on metabolite concentrations in a large cohort of 183 adults by using liquid chromatography coupled with high-resolution mass spectrometry ({LC}-{HRMS}). We implemented a comprehensive statistical workflow for univariate hypothesis testing and modeling by orthogonal partial least-squares ({OPLS}), which we made available to the metabolomics community within the online {Workflow4Metabolomics}.org resource. We found 108 urine metabolites displaying concentration variations with either age, {BMI}, or gender, by integrating the results from univariate p-values and multivariate variable importance in projection ({VIP}). Several metabolite clusters were further evidenced by correlation analysis, and they allowed stratification of the cohort. In conclusion, our study highlights the impact of gender and age on the urinary metabolome, and thus it indicates that these factors should be taken into account for the design of metabolomics studies.}
}
@article{sud_2016,
title = {Metabolomics Workbench: An international repository for metabolomics data and metadata, metabolite standards, protocols, tutorials and training, and analysis tools.},
author = {Sud, Manish and Fahy, Eoin and Cotter, Dawn and Azam, Kenan and Vadivelu, Ilango and Burant, Charles and Edison, Arthur and Fiehn, Oliver and Higashi, Richard and Nair, K Sreekumaran and Sumner, Susan and Subramaniam, Shankar},
pages = {D463-70},
url = {http://dx.doi.org/10.1093/nar/gkv1042},
year = {2016},
month = {jan},
day = {4},
urldate = {2018-01-13},
journal = {Nucleic Acids Research},
volume = {44},
number = {D1},
doi = {10.1093/nar/gkv1042},
pmid = {26467476},
pmcid = {PMC4702780},
f1000-projects = {shared citations},
abstract = {The Metabolomics Workbench, available at www.metabolomicsworkbench.org, is a public repository for metabolomics metadata and experimental data spanning various species and experimental platforms, metabolite standards, metabolite structures, protocols, tutorials, and training material and other educational resources. It provides a computational platform to integrate, analyze, track, deposit and disseminate large volumes of heterogeneous data from a wide variety of metabolomics studies including mass spectrometry ({MS}) and nuclear magnetic resonance spectrometry ({NMR}) data spanning over 20 different species covering all the major taxonomic categories including humans and other mammals, plants, insects, invertebrates and microorganisms. Additionally, a number of protocols are provided for a range of metabolite classes, sample types, and both {MS} and {NMR}-based studies, along with a metabolite structure database. The metabolites characterized in the studies available on the Metabolomics Workbench are linked to chemical structures in the metabolite structure database to facilitate comparative analysis across studies. The Metabolomics Workbench, part of the data coordinating effort of the National Institute of Health ({NIH}) Common Fund's Metabolomics Program, provides data from the Common Fund's Metabolomics Resource Cores, metabolite standards, and analysis tools to the wider metabolomics community and seeks data depositions from metabolomics researchers across the world. \copyright The Author(s) 2015. Published by Oxford University Press on behalf of Nucleic Acids Research.}
}
@article{franceschi_2014,
title = {{MetaDB} a Data Processing Workflow in Untargeted {MS}-Based Metabolomics Experiments.},
author = {Franceschi, Pietro and Mylonas, Roman and Shahaf, Nir and Scholz, Matthias and Arapitsas, Panagiotis and Masuero, Domenico and Weingart, Georg and Carlin, Silvia and Vrhovsek, Urska and Mattivi, Fulvio and Wehrens, Ron},
pages = {72},
url = {http://dx.doi.org/10.3389/fbioe.2014.00072},
year = {2014},
month = {dec},
day = {16},
urldate = {2018-01-16},
journal = {Frontiers in bioengineering and biotechnology},
volume = {2},
doi = {10.3389/fbioe.2014.00072},
pmid = {25566535},
pmcid = {PMC4267269},
f1000-projects = {shared citations},
abstract = {Due to their sensitivity and speed, mass-spectrometry based analytical technologies are widely used to in metabolomics to characterize biological phenomena. To address issues like metadata organization, quality assessment, data processing, data storage, and, finally, submission to public repositories, bioinformatic pipelines of a non-interactive nature are often employed, complementing the interactive software used for initial inspection and visualization of the data. These pipelines often are created as open-source software allowing the complete and exhaustive documentation of each step, ensuring the reproducibility of the analysis of extensive and often expensive experiments. In this paper, we will review the major steps which constitute such a data processing pipeline, discussing them in the context of an open-source software for untargeted {MS}-based metabolomics experiments recently developed at our institute. The software has been developed by integrating our {metaMS} R package with a user-friendly web-based application written in Grails. {MetaMS} takes care of data pre-processing and annotation, while the interface deals with the creation of the sample lists, the organization of the data storage, and the generation of survey plots for quality assessment. Experimental and biological metadata are stored in the {ISA}-Tab format making the proposed pipeline fully integrated with the Metabolights framework.}
}
@article{ruttkies_2016,
title = {{MetFrag} relaunched: incorporating strategies beyond in silico fragmentation.},
author = {Ruttkies, Christoph and Schymanski, Emma L and Wolf, Sebastian and Hollender, Juliane and Neumann, Steffen},
pages = {3},
url = {http://dx.doi.org/10.1186/s13321-016-0115-9},
year = {2016},
month = {jan},
day = {29},
urldate = {2018-01-13},
journal = {Journal of cheminformatics},
volume = {8},
doi = {10.1186/s13321-016-0115-9},
pmid = {26834843},
pmcid = {PMC4732001},
f1000-projects = {shared citations},
abstract = {{BACKGROUND}: The in silico fragmenter {MetFrag}, launched in 2010, was one of the first approaches combining compound database searching and fragmentation prediction for small molecule identification from tandem mass spectrometry data. Since then many new approaches have evolved, as has {MetFrag} itself. This article details the latest developments to {MetFrag} and its use in small molecule identification since the original publication. {RESULTS}: {MetFrag} has gone through algorithmic and scoring refinements. New features include the retrieval of reference, data source and patent information via {ChemSpider} and {PubChem} web services, as well as {InChIKey} filtering to reduce candidate redundancy due to stereoisomerism. Candidates can be filtered or scored differently based on criteria like occurence of certain elements and/or substructures prior to fragmentation, or presence in so-called "suspect lists". Retention time information can now be calculated either within {MetFrag} with a sufficient amount of user-provided retention times, or incorporated separately as "user-defined scores" to be included in candidate ranking. The changes to {MetFrag} were evaluated on the original dataset as well as a dataset of 473 merged high resolution tandem mass spectra ({HR}-{MS}/{MS}) and compared with another open source in silico fragmenter, {CFM}-{ID}. Using {HR}-{MS}/{MS} information only, {MetFrag2}.2 and {CFM}-{ID} had 30 and 43 Top 1 ranks, respectively, using {PubChem} as a database. Including reference and retention information in {MetFrag2}.2 improved this to 420 and 336 Top 1 ranks with {ChemSpider} and {PubChem} (89 and 71 \%), respectively, and even up to 343 Top 1 ranks ({PubChem}) when combining with {CFM}-{ID}. The optimal parameters and weights were verified using three additional datasets of 824 merged {HR}-{MS}/{MS} spectra in total. Further examples are given to demonstrate flexibility of the enhanced features. {CONCLUSIONS}: In many cases additional information is available from the experimental context to add to small molecule identification, which is especially useful where the mass spectrum alone is not sufficient for candidate selection from a large number of candidates. The results achieved with {MetFrag2}.2 clearly show the benefit of considering this additional information. The new functions greatly enhance the chance of identification success and have been incorporated into a command line interface in a flexible way designed to be integrated into high throughput workflows. Feedback on the command line version of {MetFrag2}.2 available at http://c-ruttkies.github.io/{MetFrag}/ is welcome.}
}
@article{rohart_2017,
title = {{mixOmics}: An R package for 'omics feature selection and multiple data integration.},
author = {Rohart, Florian and Gautier, Benoît and Singh, Amrit and Lê Cao, Kim-Anh},
pages = {e1005752},
url = {http://dx.doi.org/10.1371/journal.pcbi.1005752},
year = {2017},
month = {nov},
day = {3},
urldate = {2018-01-13},
journal = {{PLoS} Computational Biology},
volume = {13},
number = {11},
doi = {10.1371/journal.pcbi.1005752},
pmid = {29099853},
pmcid = {PMC5687754},
f1000-projects = {shared citations},
abstract = {The advent of high throughput technologies has led to a wealth of publicly available 'omics data coming from different sources, such as transcriptomics, proteomics, metabolomics. Combining such large-scale biological data sets can lead to the discovery of important biological insights, provided that relevant information can be extracted in a holistic manner. Current statistical approaches have been focusing on identifying small subsets of molecules (a 'molecular signature') to explain or predict biological conditions, but mainly for a single type of 'omics. In addition, commonly used methods are univariate and consider each biological feature independently. We introduce {mixOmics}, an R package dedicated to the multivariate analysis of biological data sets with a specific focus on data exploration, dimension reduction and visualisation. By adopting a systems biology approach, the toolkit provides a wide range of methods that statistically integrate several data sets at once to probe relationships between heterogeneous 'omics data sets. Our recent methods extend Projection to Latent Structure ({PLS}) models for discriminant analysis, for data integration across multiple 'omics data or across independent studies, and for the identification of molecular signatures. We illustrate our latest {mixOmics} integrative frameworks for the multivariate analyses of 'omics data available from the package.}
}
@article{fabregat_2018,
title = {The reactome pathway knowledgebase.},
author = {Fabregat, Antonio and Jupe, Steven and Matthews, Lisa and Sidiropoulos, Konstantinos and Gillespie, Marc and Garapati, Phani and Haw, Robin and Jassal, Bijay and Korninger, Florian and May, Bruce and Milacic, Marija and Roca, Corina Duenas and Rothfels, Karen and Sevilla, Cristoffer and Shamovsky, Veronica and Shorser, Solomon and Varusai, Thawfeek and Viteri, Guilherme and Weiser, Joel and Wu, Guanming and Stein, Lincoln and Hermjakob, Henning and D'Eustachio, Peter},
pages = {D649-D655},
url = {http://dx.doi.org/10.1093/nar/gkx1132},
year = {2018},
month = {jan},
day = {4},
urldate = {2019-07-23},
journal = {Nucleic Acids Research},
volume = {46},
number = {D1},
doi = {10.1093/nar/gkx1132},
pmid = {29145629},
pmcid = {PMC5753187},
f1000-projects = {shared citations},
abstract = {The Reactome Knowledgebase (https://reactome.org) provides molecular details of signal transduction, transport, {DNA} replication, metabolism, and other cellular processes as an ordered network of molecular transformations-an extended version of a classic metabolic map, in a single consistent data model. Reactome functions both as an archive of biological processes and as a tool for discovering unexpected functional relationships in data such as gene expression profiles or somatic mutation catalogues from tumor cells. To support the continued brisk growth in the size and complexity of Reactome, we have implemented a graph database, improved performance of data analysis tools, and designed new data structures and strategies to boost diagram viewer performance. To make our website more accessible to human users, we have improved pathway display and navigation by implementing interactive Enhanced High Level Diagrams ({EHLDs}) with an associated icon library, and subpathway highlighting and zooming, in a simplified and reorganized web site with adaptive design. To encourage re-use of our content, we have enabled export of pathway diagrams as '{PowerPoint}' files. \copyright The Author(s) 2017. Published by Oxford University Press on behalf of Nucleic Acids Research.}
}
@article{slenter_2018,
title = {{WikiPathways}: a multifaceted pathway database bridging metabolomics to other omics research.},
author = {Slenter, Denise N and Kutmon, Martina and Hanspers, Kristina and Riutta, Anders and Windsor, Jacob and Nunes, Nuno and Mélius, Jonathan and Cirillo, Elisa and Coort, Susan L and Digles, Daniela and Ehrhart, Friederike and Giesbertz, Pieter and Kalafati, Marianthi and Martens, Marvin and Miller, Ryan and Nishida, Kozo and Rieswijk, Linda and Waagmeester, Andra and Eijssen, Lars M T and Evelo, Chris T and Pico, Alexander R and Willighagen, Egon L},
pages = {D661-D667},
url = {http://dx.doi.org/10.1093/nar/gkx1064},
year = {2018},
month = {jan},
day = {4},
urldate = {2019-06-03},
journal = {Nucleic Acids Research},
volume = {46},
number = {D1},
doi = {10.1093/nar/gkx1064},
pmid = {29136241},
pmcid = {PMC5753270},
f1000-projects = {shared citations},
abstract = {{WikiPathways} (wikipathways.org) captures the collective knowledge represented in biological pathways. By providing a database in a curated, machine readable way, omics data analysis and visualization is enabled. {WikiPathways} and other pathway databases are used to analyze experimental data by research groups in many fields. Due to the open and collaborative nature of the {WikiPathways} platform, our content keeps growing and is getting more accurate, making {WikiPathways} a reliable and rich pathway database. Previously, however, the focus was primarily on genes and proteins, leaving many metabolites with only limited annotation. Recent curation efforts focused on improving the annotation of metabolism and metabolic pathways by associating unmapped metabolites with database identifiers and providing more detailed interaction knowledge. Here, we report the outcomes of the continued growth and curation efforts, such as a doubling of the number of annotated metabolite nodes in {WikiPathways}. Furthermore, we introduce an {OpenAPI} documentation of our web services and the {FAIR} (Findable, Accessible, Interoperable and Reusable) annotation of resources to increase the interoperability of the knowledge encoded in these pathways and experimental omics data. New search options, monthly downloads, more links to metabolite databases, and new portals make pathway knowledge more effortlessly accessible to individual researchers and research communities. \copyright The Author(s) 2017. Published by Oxford University Press on behalf of Nucleic Acids Research.}
}
@article{loos_2017,
title = {Nontargeted homologue series extraction from hyphenated high resolution mass spectrometry data.},
author = {Loos, Martin and Singer, Heinz},
pages = {12},
url = {http://dx.doi.org/10.1186/s13321-017-0197-z},
year = {2017},
month = {feb},
day = {23},
urldate = {2019-04-26},
journal = {Journal of cheminformatics},
volume = {9},
doi = {10.1186/s13321-017-0197-z},
pmid = {28286574},
pmcid = {PMC5323340},
f1000-projects = {shared citations},
abstract = {{BACKGROUND}: A large proportion of polar anthropogenic compounds routinely released into the environment comprises homologue series, i.e., sets of chemicals differing in a repeating chemical unit. Using analytical techniques such as liquid chromatography coupled to high-resolution mass spectrometry ({LC}-{HRMS}), these compounds are readily measurable as signal sets with characteristic differences in mass and typically retention time. However, and despite such distinct characteristics, no computational approach for the direct, simultaneous and untargeted detection of all such signal sets comprising both {LC} and {HRMS} information has to date been presented. {RESULTS}: A fast two-staged approach has been developed to extract {LC}-{HRMS} signal patterns which can be indicative of homologous analytes. In a first stage, a k-d tree representation of picked {LC}-{HRMS} peaks is used to extract all feasible 3-tuples of peaks with restrictions in, e.g., mass defect differences. A second stage then recombines these 3-tuples to larger series tuples while ensuring smooth changes in their retention time characteristics. This unsupervised approach was evaluated for ten effluent samples from Swiss sewage treatment plants ({STPs}), in both positive and negative electrospray-ionization. {CONCLUSIONS}: Beside recovering all continuous series of previously identified homologues, substantial fractions of nontargeted peaks could subsequently be assigned into very diverse peak series, although assignments were often not unique. The latter ambiguities were resolved by a self-organizing map technique and revealed both distinctive series meshing and rivaling combinatorial solutions in the presence of isobaric or gapped series peaks. When comparing {STPs}, several ubiquitous yet partially low-frequent series mass differences emerged and may prioritize future identification efforts. The presented algorithm is freely available as part of the R package nontarget and as a user-friendly web-interface at www.envihomolog.eawag.ch.Graphical {AbstractSearch} for systematic series indicative of homologous compounds is based on a partitioned representation of {LC}-{HRMS} signal characteristics. This nontargeted search first extracts series triplets in a nearest-neighbour walk and then recombines them to larger ones. For illustration, the two dimensions involving mass defect characteristics are represented by one only.}
}
@article{aggio_2011,
title = {Metab: an R package for high-throughput analysis of metabolomics data generated by {GC}-{MS}.},
author = {Aggio, Raphael and Villas-Bôas, Silas Granato and Ruggiero, Katya},
pages = {2316-2318},
url = {http://dx.doi.org/10.1093/bioinformatics/btr379},
year = {2011},
month = {aug},
day = {15},
urldate = {2018-01-16},
journal = {Bioinformatics},
volume = {27},
number = {16},
doi = {10.1093/bioinformatics/btr379},
pmid = {21697128},
f1000-projects = {shared citations},
abstract = {{MOTIVATION}: The Automated Mass Spectral Deconvolution and Identification System ({AMDIS}) is freeware extensively applied in metabolomics. However, datasets processed by {AMDIS} require extensive data correction, filtering and reshaping to create reliable datasets for further downstream analysis. Performed manually, these processes are laborious and extremely time consuming. Furthermore, manual corrections increase the chance of human error and can introduce additional technical variability to the data. Thus, an automated pipeline for curating {GC}-{MS} data is urgently needed. {RESULTS}: We present the Metab R package designed to automate the pipeline for analysis of metabolomics {GC}-{MS} datasets processed by {AMDIS}. {AVAILABILITY}: The Metab package, the {AMDIS} library and the reference ion library are available at www.metabolomics.auckland.ac.nz/index.php/downloads. {CONTACT}: k.ruggiero@auckland.ac.nz.}
}
@book{clarke_2009,
title = {Principles and theory for data mining and machine learning},
author = {Clarke, Bertrand and Fokoue, Ernest and Zhang, Hao Helen},
series = {Springer series in statistics},
publisher = {Springer New York},
url = {http://link.springer.com/10.1007/978-0-387-98135-2},
year = {2009},
urldate = {2019-07-01},
isbn = {978-0-387-98134-5},
issn = {0172-7397},
doi = {10.1007/978-0-387-98135-2},
address = {New York, {NY}},
f1000-projects = {shared citations}
}
@article{willighagen_2017,
title = {The Chemistry Development Kit ({CDK}) v2.0: atom typing, depiction, molecular formulas, and substructure searching.},
author = {Willighagen, Egon L and Mayfield, John W and Alvarsson, Jonathan and Berg, Arvid and Carlsson, Lars and Jeliazkova, Nina and Kuhn, Stefan and Pluskal, Tomáš and Rojas-Chertó, Miquel and Spjuth, Ola and Torrance, Gilleain and Evelo, Chris T and Guha, Rajarshi and Steinbeck, Christoph},
pages = {33},
url = {http://dx.doi.org/10.1186/s13321-017-0220-4},
year = {2017},
month = {jun},
day = {6},
urldate = {2018-01-29},
journal = {Journal of cheminformatics},
volume = {9},
number = {1},
doi = {10.1186/s13321-017-0220-4},
pmid = {29086040},
pmcid = {PMC5461230},
f1000-projects = {shared citations},
abstract = {{BACKGROUND}: The Chemistry Development Kit ({CDK}) is a widely used open source cheminformatics toolkit, providing data structures to represent chemical concepts along with methods to manipulate such structures and perform computations on them. The library implements a wide variety of cheminformatics algorithms ranging from chemical structure canonicalization to molecular descriptor calculations and pharmacophore perception. It is used in drug discovery, metabolomics, and toxicology. Over the last 10 years, the code base has grown significantly, however, resulting in many complex interdependencies among components and poor performance of many algorithms. {RESULTS}: We report improvements to the {CDK} v2.0 since the v1.2 release series, specifically addressing the increased functional complexity and poor performance. We first summarize the addition of new functionality, such atom typing and molecular formula handling, and improvement to existing functionality that has led to significantly better performance for substructure searching, molecular fingerprints, and rendering of molecules. Second, we outline how the {CDK} has evolved with respect to quality control and the approaches we have adopted to ensure stability, including a code review mechanism. {CONCLUSIONS}: This paper highlights our continued efforts to provide a community driven, open source cheminformatics library, and shows that such collaborative projects can thrive over extended periods of time, resulting in a high-quality and performant library. By taking advantage of community support and contributions, we show that an open source cheminformatics project can act as a peer reviewed publishing platform for scientific computing software. Graphical abstract {CDK} 2.0 provides new features and improved performance.}
}
@article{spicer_2017,
title = {Navigating freely-available software tools for metabolomics analysis.},
author = {Spicer, Rachel and Salek, Reza M and Moreno, Pablo and Cañueto, Daniel and Steinbeck, Christoph},
pages = {106},
url = {http://dx.doi.org/10.1007/s11306-017-1242-7},
year = {2017},
month = {aug},
day = {9},
urldate = {2018-01-13},
journal = {Metabolomics : Official journal of the Metabolomic Society},
volume = {13},
number = {9},
doi = {10.1007/s11306-017-1242-7},
pmid = {28890673},
pmcid = {PMC5550549},
f1000-projects = {shared citations},
abstract = {{INTRODUCTION}: The field of metabolomics has expanded greatly over the past two decades, both as an experimental science with applications in many areas, as well as in regards to data standards and bioinformatics software tools. The diversity of experimental designs and instrumental technologies used for metabolomics has led to the need for distinct data analysis methods and the development of many software tools. {OBJECTIVES}: To compile a comprehensive list of the most widely used freely available software and tools that are used primarily in metabolomics. {METHODS}: The most widely used tools were selected for inclusion in the review by either ≥ 50 citations on Web of Science (as of 08/09/16) or the use of the tool being reported in the recent Metabolomics Society survey. Tools were then categorised by the type of instrumental data (i.e. {LC}-{MS}, {GC}-{MS} or {NMR}) and the functionality (i.e. pre- and post-processing, statistical analysis, workflow and other functions) they are designed for. {RESULTS}: A comprehensive list of the most used tools was compiled. Each tool is discussed within the context of its application domain and in relation to comparable tools of the same domain. An extended list including additional tools is available at https://github.com/{RASpicer}/{MetabolomicsTools} which is classified and searchable via a simple controlled vocabulary. {CONCLUSION}: This review presents the most widely used tools for metabolomics analysis, categorised based on their main functionality. As future work, we suggest a direct comparison of tools' abilities to perform specific data analysis tasks e.g. peak picking.}
}
@article{misra_2016,
title = {Updates in metabolomics tools and resources: 2014-2015.},
author = {Misra, Biswapriya B and van der Hooft, Justin J J},
pages = {86-110},
url = {http://dx.doi.org/10.1002/elps.201500417},
year = {2016},
month = {jan},
urldate = {2018-01-13},
journal = {Electrophoresis},
volume = {37},
number = {1},
doi = {10.1002/elps.201500417},
pmid = {26464019},
f1000-projects = {shared citations},
abstract = {Data processing and interpretation represent the most challenging and time-consuming steps in high-throughput metabolomic experiments, regardless of the analytical platforms ({MS} or {NMR} spectroscopy based) used for data acquisition. Improved machinery in metabolomics generates increasingly complex datasets that create the need for more and better processing and analysis software and in silico approaches to understand the resulting data. However, a comprehensive source of information describing the utility of the most recently developed and released metabolomics resources--in the form of tools, software, and databases--is currently lacking. Thus, here we provide an overview of freely-available, and open-source, tools, algorithms, and frameworks to make both upcoming and established metabolomics researchers aware of the recent developments in an attempt to advance and facilitate data processing workflows in their metabolomics research. The major topics include tools and researches for data processing, data annotation, and data visualization in {MS} and {NMR}-based metabolomics. Most in this review described tools are dedicated to untargeted metabolomics workflows; however, some more specialist tools are described as well. All tools and resources described including their analytical and computational platform dependencies are summarized in an overview Table. \copyright 2015 {WILEY}-{VCH} Verlag {GmbH} \& Co. {KGaA}, Weinheim.}
}
@article{misra_2017,
title = {Review of emerging metabolomic tools and resources: 2015-2016.},
author = {Misra, Biswapriya B and Fahrmann, Johannes F and Grapov, Dmitry},
pages = {2257-2274},
url = {http://dx.doi.org/10.1002/elps.201700110},
year = {2017},
month = {aug},
day = {1},
urldate = {2018-01-13},
journal = {Electrophoresis},
volume = {38},
number = {18},
doi = {10.1002/elps.201700110},
pmid = {28621886},
f1000-projects = {shared citations},
abstract = {Data processing and analysis are major bottlenecks in high-throughput metabolomic experiments. Recent advancements in data acquisition platforms are driving trends toward increasing data size (e.g., petabyte scale) and complexity (multiple omic platforms). Improvements in data analysis software and in silico methods are similarly required to effectively utilize these advancements and link the acquired data with biological interpretations. Herein, we provide an overview of recently developed and freely available metabolomic tools, algorithms, databases, and data analysis frameworks. This overview of popular tools for {MS} and {NMR}-based metabolomics is organized into the following sections: data processing, annotation, analysis, and visualization. The following overview of newly developed tools helps to better inform researchers to support the emergence of metabolomics as an integral tool for the study of biochemistry, systems biology, environmental analysis, health, and personalized medicine. \copyright 2017 {WILEY}-{VCH} Verlag {GmbH} \& Co. {KGaA}, Weinheim.}
}
@article{misra_2018,
title = {New tools and resources in metabolomics: 2016-2017.},
author = {Misra, Biswapriya B},
pages = {909-923},
url = {http://dx.doi.org/10.1002/elps.201700441},
year = {2018},
month = {apr},
urldate = {2018-01-13},
journal = {Electrophoresis},
volume = {39},
number = {7},
doi = {10.1002/elps.201700441},
pmid = {29292835},
f1000-projects = {shared citations},
abstract = {Rapid advances in mass spectrometry ({MS}) and nuclear magnetic resonance ({NMR})-based platforms for metabolomics have led to an upsurge of data every single year. Newer high-throughput platforms, hyphenated technologies, miniaturization, and tool kits in data acquisition efforts in metabolomics have led to additional challenges in metabolomics data pre-processing, analysis, interpretation, and integration. Thanks to the informatics, statistics, and computational community, new resources continue to develop for metabolomics researchers. The purpose of this review is to provide a summary of the metabolomics tools, software, and databases that were developed or improved during 2016-2017, thus, enabling readers, developers, and researchers access to a succinct but thorough list of resources for further improvisation, implementation, and application in due course of time. \copyright 2018 {WILEY}-{VCH} Verlag {GmbH} \& Co. {KGaA}, Weinheim.}
}
@article{libiseller_2015,
title = {{IPO}: a tool for automated optimization of {XCMS} parameters.},
author = {Libiseller, Gunnar and Dvorzak, Michaela and Kleb, Ulrike and Gander, Edgar and Eisenberg, Tobias and Madeo, Frank and Neumann, Steffen and Trausinger, Gert and Sinner, Frank and Pieber, Thomas and Magnes, Christoph},
pages = {118},
url = {http://dx.doi.org/10.1186/s12859-015-0562-8},
year = {2015},
month = {apr},
day = {16},
urldate = {2018-01-13},
journal = {{BMC} Bioinformatics},
volume = {16},
doi = {10.1186/s12859-015-0562-8},
pmid = {25888443},
pmcid = {PMC4404568},
f1000-projects = {shared citations},
abstract = {{BACKGROUND}: Untargeted metabolomics generates a huge amount of data. Software packages for automated data processing are crucial to successfully process these data. A variety of such software packages exist, but the outcome of data processing strongly depends on algorithm parameter settings. If they are not carefully chosen, suboptimal parameter settings can easily lead to biased results. Therefore, parameter settings also require optimization. Several parameter optimization approaches have already been proposed, but a software package for parameter optimization which is free of intricate experimental labeling steps, fast and widely applicable is still missing. {RESULTS}: We implemented the software package {IPO} ('Isotopologue Parameter Optimization') which is fast and free of labeling steps, and applicable to data from different kinds of samples and data from different methods of liquid chromatography - high resolution mass spectrometry and data from different instruments. {IPO} optimizes {XCMS} peak picking parameters by using natural, stable (13)C isotopic peaks to calculate a peak picking score. Retention time correction is optimized by minimizing relative retention time differences within peak groups. Grouping parameters are optimized by maximizing the number of peak groups that show one peak from each injection of a pooled sample. The different parameter settings are achieved by design of experiments, and the resulting scores are evaluated using response surface models. {IPO} was tested on three different data sets, each consisting of a training set and test set. {IPO} resulted in an increase of reliable groups (146\% - 361\%), a decrease of non-reliable groups (3\% - 8\%) and a decrease of the retention time deviation to one third. {CONCLUSIONS}: {IPO} was successfully applied to data derived from liquid chromatography coupled to high resolution mass spectrometry from three studies with different sample types and different chromatographic methods and devices. We were also able to show the potential of {IPO} to increase the reliability of metabolomics data. The source code is implemented in R, tested on Linux and Windows and it is freely available for download at https://github.com/glibiseller/{IPO} . The training sets and test sets can be downloaded from https://health.joanneum.at/{IPO} .}
}
@article{broeckling_2014,
title = {{RAMClust}: a novel feature clustering method enables spectral-matching-based annotation for metabolomics data.},
author = {Broeckling, C D and Afsar, F A and Neumann, S and Ben-Hur, A and Prenni, J E},
pages = {6812-6817},
url = {http://dx.doi.org/10.1021/ac501530d},
year = {2014},
month = {jul},
day = {15},
urldate = {2018-01-13},
journal = {Analytical Chemistry},
volume = {86},
number = {14},
doi = {10.1021/ac501530d},
pmid = {24927477},
f1000-projects = {shared citations},
abstract = {Metabolomic data are frequently acquired using chromatographically coupled mass spectrometry ({MS}) platforms. For such datasets, the first step in data analysis relies on feature detection, where a feature is defined by a mass and retention time. While a feature typically is derived from a single compound, a spectrum of mass signals is more a more-accurate representation of the mass spectrometric signal for a given metabolite. Here, we report a novel feature grouping method that operates in an unsupervised manner to group signals from {MS} data into spectra without relying on predictability of the in-source phenomenon. We additionally address a fundamental bottleneck in metabolomics, annotation of {MS} level signals, by incorporating indiscriminant {MS}/{MS} ({idMS}/{MS}) data implicitly: feature detection is performed on both {MS} and {idMS}/{MS} data, and feature-feature relationships are determined simultaneously from the {MS} and {idMS}/{MS} data. This approach facilitates identification of metabolites using in-source {MS} and/or {idMS}/{MS} spectra from a single experiment, reduces quantitative analytical variation compared to single-feature measures, and decreases false positive annotations of unpredictable phenomenon as novel compounds. This tool is released as a freely available R package, called {RAMClustR}, and is sufficiently versatile to group features from any chromatographic-spectrometric platform or feature-finding software.}
}
@article{stravs_2013,
title = {Automatic recalibration and processing of tandem mass spectra using formula annotation.},
author = {Stravs, Michael A and Schymanski, Emma L and Singer, Heinz P and Hollender, Juliane},
pages = {89-99},
url = {http://dx.doi.org/10.1002/jms.3131},
year = {2013},
month = {jan},
urldate = {2018-01-13},
journal = {Journal of Mass Spectrometry},
volume = {48},
number = {1},
doi = {10.1002/jms.3131},
pmid = {23303751},
f1000-projects = {shared citations},
abstract = {High accuracy, high resolution tandem mass spectrometry ({MS}/{MS}) is becoming more common in analytical applications, yet databases of these spectra remain limited. Databases require good quality spectra with sufficient compound information, but processing, calibration, noise reduction and retrieval of compound information are time-consuming tasks that prevent many contributions. We present a comprehensive workflow for the automatic processing of {MS}/{MS} using formula annotation for recalibration and cleanup to generate high quality spectra of standard compounds for upload to {MassBank} (www.massbank.jp). Compound information is retrieved via Internet services. Reference standards of 70 pesticides were measured at various collision energies on an {LTQ}-Orbitrap {XL} to develop and evaluate the workflow. A total of 944 resulting spectra are now available on {MassBank}. Evidence of nitrogen adduct formation during {MS}/{MS} fragmentation processes was found, highlighting the benefits high accuracy {MS}/{MS} offers for spectral interpretation. A database of recalibrated, cleaned-up spectra resulted in the most correct spectra ranked in first place, regardless of whether the search spectra were recalibrated or not, whereas the average rank of the correct molecular formula was improved from 2.55 (uncalibrated) to 1.53 when using recalibrated {MS}/{MS} data. The workflow is available as an R package {RMassBank} capable of generating {MassBank} records from raw {MS} and {MS}/{MS} data and can be adjusted to process data acquired with different settings and instruments. This workflow is a vital step towards addressing the need for more high quality, high accuracy {MS}/{MS} spectra in spectral databases and provides important information for spectral interpretation. Copyright \copyright 2012 John Wiley \& Sons, Ltd.}
}
@article{bcker_2008,
title = {{DECOMP}--from interpreting Mass Spectrometry peaks to solving the Money Changing Problem.},
author = {Böcker, Sebastian and Lipták, Zsuzsanna and Martin, Marcel and Pervukhin, Anton and Sudek, Henner},
pages = {591-593},
url = {http://dx.doi.org/10.1093/bioinformatics/btm631},
year = {2008},
month = {feb},
day = {15},
urldate = {2018-01-13},
journal = {Bioinformatics},
volume = {24},
number = {4},
issn = {1460-2059},
doi = {10.1093/bioinformatics/btm631},
pmid = {18174179},
f1000-projects = {shared citations},
abstract = {{UNLABELLED}: We introduce Decomp, a tool that computes the sum formula of all molecules whose mass equals the input mass. This problem arises frequently in biochemistry and mass spectrometry ({MS}), when we know the molecular mass of a protein, {DNA} or metabolite fragment but have no other information. A closely related problem is known as the Money Changing Problem ({MCP}), where all masses are positive integers. Recently, efficient algorithms have been developed for the {MCP}, in which Decomp applies to real-valued {MS} data. The excellent performance of this method on proteomic and metabolomic {MS} data has recently been demonstrated. Decomp has an easy-to-use graphical interface, which caters for both types of users: those interested in solving {MCP} instances and those submitting {MS} data. {AVAILABILITY}: Decomp is freely accessible at http://bibiserv.techfak.uni-bielefeld.de/decomp/.}
}
@article{nicol_2012,
title = {{MSeasy}: unsupervised and untargeted {GC}-{MS} data processing.},
author = {Nicolè, Florence and Guitton, Yann and Courtois, Elodie A and Moja, Sandrine and Legendre, Laurent and Hossaert-{McKey}, Martine},
pages = {2278-2280},
url = {http://dx.doi.org/10.1093/bioinformatics/bts427},
year = {2012},
month = {sep},
day = {1},
urldate = {2018-01-13},
journal = {Bioinformatics},
volume = {28},
number = {17},
doi = {10.1093/bioinformatics/bts427},
pmid = {22782550},
f1000-projects = {shared citations},
abstract = {{UNLABELLED}: {MSeasy} performs unsupervised data mining on gas chromatography-mass spectrometry data. It detects putative compounds within complex metabolic mixtures through the clustering of mass spectra. Retention times or retention indices are used after clustering, together with other validation criteria, for quality control of putative compounds. The package generates a fingerprinting or profiling matrix compatible with {NIST} mass spectral search program and {ARISTO} webtool (Automatic Reduction of Ion Spectra To Ontology) for molecule identification. Most commonly used file formats, {NetCDF}, {mzXML} and {ASCII}, are acceptable. A graphical and user-friendly interface, {MSeasyTkGUI}, is available for R novices. {AVAILABILITY}: {MSeasy} and {MSeasytkGUI} are implemented as R packages available at http://cran.r-project.org/web/packages/{MSeasy}/index.html and http://cran.r-project.org/web/packages/{MSeasyTkGUI}/index.html.}
}
@article{domingoalmenara_2016,
title = {{eRah}: A Computational Tool Integrating Spectral Deconvolution and Alignment with Quantification and Identification of Metabolites in {GC}/{MS}-Based Metabolomics.},
author = {Domingo-Almenara, Xavier and Brezmes, Jesus and Vinaixa, Maria and Samino, Sara and Ramirez, Noelia and Ramon-Krauel, Marta and Lerin, Carles and Díaz, Marta and Ibáñez, Lourdes and Correig, Xavier and Perera-Lluna, Alexandre and Yanes, Oscar},
pages = {9821-9829},
url = {http://dx.doi.org/10.1021/acs.analchem.6b02927},
year = {2016},
month = {oct},
day = {4},
urldate = {2018-01-13},
journal = {Analytical Chemistry},
volume = {88},
number = {19},
doi = {10.1021/acs.analchem.6b02927},
pmid = {27584001},
f1000-projects = {shared citations},
abstract = {Gas chromatography coupled to mass spectrometry ({GC}/{MS}) has been a long-standing approach used for identifying small molecules due to the highly reproducible ionization process of electron impact ionization ({EI}). However, the use of {GC}-{EI} {MS} in untargeted metabolomics produces large and complex data sets characterized by coeluting compounds and extensive fragmentation of molecular ions caused by the hard electron ionization. In order to identify and extract quantitative information on metabolites across multiple biological samples, integrated computational workflows for data processing are needed. Here we introduce {eRah}, a free computational tool written in the open language R composed of five core functions: (i) noise filtering and baseline removal of {GC}/{MS} chromatograms, (ii) an innovative compound deconvolution process using multivariate analysis techniques based on compound match by local covariance ({CMLC}) and orthogonal signal deconvolution ({OSD}), (iii) alignment of mass spectra across samples, (iv) missing compound recovery, and (v) identification of metabolites by spectral library matching using publicly available mass spectra. {eRah} outputs a table with compound names, matching scores and the integrated area of compounds for each sample. The automated capabilities of {eRah} are demonstrated by the analysis of {GC}-time-of-flight ({TOF}) {MS} data from plasma samples of adolescents with hyperinsulinaemic androgen excess and healthy controls. The quantitative results of {eRah} are compared to {centWave}, the peak-picking algorithm implemented in the widely used {XCMS} package, {MetAlign}, and {ChromaTOF} software. Significantly dysregulated metabolites are further validated using pure standards and targeted analysis by {GC}-triple quadrupole ({QqQ}) {MS}, {LC}-{QqQ}, and {NMR}. {eRah} is freely available at http://{CRAN}.R-project.org/package=erah .}
}
@article{wehrens_2015a,
title = {Metabolite profiling in {LC}–{DAD} using multivariate curve resolution: the alsace package for R},
author = {Wehrens, Ron and Carvalho, Elisabete and Fraser, Paul D.},
pages = {143-154},
url = {http://link.springer.com/10.1007/s11306-014-0683-5},
year = {2015},
month = {feb},
urldate = {2018-01-13},
journal = {Metabolomics : Official journal of the Metabolomic Society},
volume = {11},
number = {1},
issn = {1573-3882},
doi = {10.1007/s11306-014-0683-5},
f1000-projects = {shared citations},
abstract = {For those chemical compounds absorbing in the {UV}–Vis region and not readily applicable to routine mass spectrometry ionisation methods, liquid chromatography coupled to diode array detection is a convenient platform to perform metabolite profiling. Data processing by hand is labour-intensive and error prone. In the present study a strategy based on multivariate curve resolution, and its implementation in an R package called alsace are described. The final result of an analysis is a table containing peak heights or peak areas for all features of the individual injections. The capabilities of the software, providing elements such as splitting the data into separate, possibly overlapping time windows, merging the results of the individual time windows, and parametric time warping to align features, are illustrated using a cassava-derived data set.}
}
@article{delabrire_2017,
title = {{proFIA}: a data preprocessing workflow for flow injection analysis coupled to high-resolution mass spectrometry.},
author = {Delabrière, Alexis and Hohenester, Ulli M and Colsch, Benoit and Junot, Christophe and Fenaille, François and Thévenot, Etienne A},
pages = {3767-3775},
url = {http://dx.doi.org/10.1093/bioinformatics/btx458},
year = {2017},
month = {dec},
day = {1},
urldate = {2018-01-13},
journal = {Bioinformatics},
volume = {33},
number = {23},
doi = {10.1093/bioinformatics/btx458},
pmid = {29036359},
f1000-projects = {shared citations},
abstract = {Motivation: Flow Injection Analysis coupled to High-Resolution Mass Spectrometry ({FIA}-{HRMS}) is a promising approach for high-throughput metabolomics. {FIA}-{HRMS} data, however, cannot be preprocessed with current software tools which rely on liquid chromatography separation, or handle low resolution data only. Results: We thus developed the {proFIA} package, which implements a suite of innovative algorithms to preprocess {FIA}-{HRMS} raw files, and generates the table of peak intensities. The workflow consists of 3 steps: (i) noise estimation, peak detection and quantification, (ii) peak grouping across samples and (iii) missing value imputation. In addition, we have implemented a new indicator to quantify the potential alteration of the feature peak shape due to matrix effect. The preprocessing is fast (less than 15 s per file), and the value of the main parameters (ppm and dmz) can be easily inferred from the mass resolution of the instrument. Application to two metabolomics datasets (including spiked serum samples) showed high precision (96\%) and recall (98\%) compared with manual integration. These results demonstrate that {proFIA} achieves very efficient and robust detection and quantification of {FIA}-{HRMS} data, and opens new opportunities for high-throughput phenotyping. Availability and implementation: The {proFIA} software (as well as the {plasFIA} dataset) is available as an R package on the Bioconductor repository (http://bioconductor.org/packages/{proFIA}), and as a Galaxy module on the Main Toolshed (https://toolshed.g2.bx.psu.edu), and on the {Workflow4Metabolomics} online infrastructure (http://workflow4metabolomics.org). Contact: alexis.delabriere@cea.fr or etienne.thevenot@cea.fr. Supplementary information: Supplementary data are available at Bioinformatics online.}
}
@article{brunius_2016,
title = {Large-scale untargeted {LC}-{MS} metabolomics data correction using between-batch feature alignment and cluster-based within-batch signal intensity drift correction.},
author = {Brunius, Carl and Shi, Lin and Landberg, Rikard},
pages = {173},
url = {http://dx.doi.org/10.1007/s11306-016-1124-4},
year = {2016},
month = {sep},
day = {22},
urldate = {2018-01-13},
journal = {Metabolomics : Official journal of the Metabolomic Society},
volume = {12},
number = {11},
doi = {10.1007/s11306-016-1124-4},
pmid = {27746707},
pmcid = {PMC5031781},
f1000-projects = {shared citations},
abstract = {{INTRODUCTION}: Liquid chromatography-mass spectrometry ({LC}-{MS}) is a commonly used technique in untargeted metabolomics owing to broad coverage of metabolites, high sensitivity and simple sample preparation. However, data generated from multiple batches are affected by measurement errors inherent to alterations in signal intensity, drift in mass accuracy and retention times between samples both within and between batches. These measurement errors reduce repeatability and reproducibility and may thus decrease the power to detect biological responses and obscure interpretation. {OBJECTIVE}: Our aim was to develop procedures to address and correct for within- and between-batch variability in processing multiple-batch untargeted {LC}-{MS} metabolomics data to increase their quality. {METHODS}: Algorithms were developed for: (i) alignment and merging of features that are systematically misaligned between batches, through aggregating feature presence/missingness on batch level and combining similar features orthogonally present between batches; and (ii) within-batch drift correction using a cluster-based approach that allows multiple drift patterns within batch. Furthermore, a heuristic criterion was developed for the feature-wise choice of reference-based or population-based between-batch normalisation. {RESULTS}: In authentic data, between-batch alignment resulted in picking 15 \% more features and deconvoluting 15 \% of features previously erroneously aligned. Within-batch correction provided a decrease in median quality control feature coefficient of variation from 20.5 to 15.1 \%. Algorithms are open source and available as an R package ('{batchCorr}'). {CONCLUSIONS}: The developed procedures provide unbiased measures of improved data quality, with implications for improved data analysis. Although developed for {LC}-{MS} based metabolomics, these methods are generic and can be applied to other data suffering from similar limitations.}
}
@article{chawade_2014,
title = {Normalyzer: a tool for rapid evaluation of normalization methods for omics data sets.},
author = {Chawade, Aakash and Alexandersson, Erik and Levander, Fredrik},
pages = {3114-3120},
url = {http://dx.doi.org/10.1021/pr401264n},
year = {2014},
month = {jun},
day = {6},
urldate = {2018-01-13},
journal = {Journal of Proteome Research},
volume = {13},
number = {6},
doi = {10.1021/pr401264n},
pmid = {24766612},
pmcid = {PMC4053077},
f1000-projects = {shared citations},
abstract = {High-throughput omics data often contain systematic biases introduced during various steps of sample processing and data generation. As the source of these biases is usually unknown, it is difficult to select an optimal normalization method for a given data set. To facilitate this process, we introduce the open-source tool "Normalyzer". It normalizes the data with 12 different normalization methods and generates a report with several quantitative and qualitative plots for comparative evaluation of different methods. The usefulness of Normalyzer is demonstrated with three different case studies from quantitative proteomics and transcriptomics. The results from these case studies show that the choice of normalization method strongly influences the outcome of downstream quantitative comparisons. Normalyzer is an R package and can be used locally or through the online implementation at http://quantitativeproteomics.org/normalyzer .}
}
@article{fernndezalbert_2014a,
title = {Intensity drift removal in {LC}/{MS} metabolomics by common variance compensation.},
author = {Fernández-Albert, Francesc and Llorach, Rafael and Garcia-Aloy, Mar and Ziyatdinov, Andrey and Andres-Lacueva, Cristina and Perera, Alexandre},
pages = {2899-2905},
url = {http://dx.doi.org/10.1093/bioinformatics/btu423},
year = {2014},
month = {oct},
day = {15},
urldate = {2018-01-13},
journal = {Bioinformatics},
volume = {30},
number = {20},
doi = {10.1093/bioinformatics/btu423},
pmid = {24990606},
f1000-projects = {shared citations},
abstract = {{UNLABELLED}: Liquid chromatography coupled to mass spectrometry ({LC}/{MS}) has become widely used in Metabolomics. Several artefacts have been identified during the acquisition step in large {LC}/{MS} metabolomics experiments, including ion suppression, carryover or changes in the sensitivity and intensity. Several sources have been pointed out as responsible for these effects. In this context, the drift effects of the peak intensity is one of the most frequent and may even constitute the main source of variance in the data, resulting in misleading statistical results when the samples are analysed. In this article, we propose the introduction of a methodology based on a common variance analysis before the data normalization to address this issue. This methodology was tested and compared with four other methods by calculating the Dunn and Silhouette indices of the quality control classes. The results showed that our proposed methodology performed better than any of the other four methods. As far as we know, this is the first time that this kind of approach has been applied in the metabolomics context. {AVAILABILITY} {AND} {IMPLEMENTATION}: The source code of the methods is available as the R package {intCor} at http://b2slab.upc.edu/software-and-downloads/intensity-drift-correction/. {SUPPLEMENTARY} {INFORMATION}: Supplementary data are available at Bioinformatics online. \copyright The Author 2014. Published by Oxford University Press. All rights reserved. For Permissions, please e-mail: journals.permissions@oup.com.}
}
@incollection{pollard_2005,
booktitle = {Bioinformatics and computational biology solutions using R and bioconductor},
title = {Multiple Testing Procedures: the multtest Package and Applications to Genomics},
author = {Pollard, K. S. and Dudoit, S. and van der Laan, M. J.},
editor = {Gentleman, Robert and Carey, Vincent J. and Huber, Wolfgang and Irizarry, Rafael A. and Dudoit, Sandrine and Wong, Wing and Gail, M. and Krickeberg, K. and Tsiatis, A. and Samet, J.},
series = {Statistics for biology and health},
pages = {249-271},
publisher = {Springer New York},
url = {http://link.springer.com/10.1007/0-387-29362-0\_15},
year = {2005},
urldate = {2018-01-13},
isbn = {978-0-387-25146-2},
issn = {1431-8776},
doi = {10.1007/0-387-29362-0\_15},
address = {New York, {NY}},
f1000-projects = {shared citations}
}
@article{nyamundanda_2010,
title = {Probabilistic principal component analysis for metabolomic data.},
author = {Nyamundanda, Gift and Brennan, Lorraine and Gormley, Isobel Claire},
pages = {571},
url = {http://dx.doi.org/10.1186/1471-2105-11-571},
year = {2010},
month = {nov},
day = {23},
urldate = {2018-01-13},
journal = {{BMC} Bioinformatics},
volume = {11},
doi = {10.1186/1471-2105-11-571},
pmid = {21092268},
pmcid = {PMC3006395},
f1000-projects = {shared citations},
abstract = {{BACKGROUND}: Data from metabolomic studies are typically complex and high-dimensional. Principal component analysis ({PCA}) is currently the most widely used statistical technique for analyzing metabolomic data. However, {PCA} is limited by the fact that it is not based on a statistical model. {RESULTS}: Here, probabilistic principal component analysis ({PPCA}) which addresses some of the limitations of {PCA}, is reviewed and extended. A novel extension of {PPCA}, called probabilistic principal component and covariates analysis ({PPCCA}), is introduced which provides a flexible approach to jointly model metabolomic data and additional covariate information. The use of a mixture of {PPCA} models for discovering the number of inherent groups in metabolomic data is demonstrated. The jackknife technique is employed to construct confidence intervals for estimated model parameters throughout. The optimal number of principal components is determined through the use of the Bayesian Information Criterion model selection tool, which is modified to address the high dimensionality of the data. {CONCLUSIONS}: The methods presented are illustrated through an application to metabolomic data sets. Jointly modeling metabolomic data and covariates was successfully achieved and has the potential to provide deeper insight to the underlying data structure. Examination of confidence intervals for the model parameters, such as loadings, allows for principled and clear interpretation of the underlying data structure. A software package called {MetabolAnalyze}, freely available through the R statistical software, has been developed to facilitate implementation of the presented methods in the metabolomics field.}
}
@article{determanjr_2014,
title = {Optimal Algorithm for Metabolomics Classification and Feature Selection varies by Dataset},
author = {Determan Jr, Charles E.},
url = {http://www.ccsenet.org/journal/index.php/ijb/article/view/41816},
year = {2014},
month = {dec},
day = {11},
urldate = {2018-01-13},
journal = {International journal of biology},
volume = {7},
number = {1},
issn = {1916-{968X}},
doi = {10.5539/ijb.v7n1p100},
f1000-projects = {shared citations}
}
@article{rinaudo_2016,
title = {biosigner: A New Method for the Discovery of Significant Molecular Signatures from Omics Data.},
author = {Rinaudo, Philippe and Boudah, Samia and Junot, Christophe and Thévenot, Etienne A},
pages = {26},
url = {http://dx.doi.org/10.3389/fmolb.2016.00026},
year = {2016},
month = {jun},
day = {21},
urldate = {2018-01-13},
journal = {Frontiers in molecular biosciences},
volume = {3},
doi = {10.3389/fmolb.2016.00026},
pmid = {27446929},
pmcid = {PMC4914951},
f1000-projects = {shared citations},
abstract = {High-throughput technologies such as transcriptomics, proteomics, and metabolomics show great promise for the discovery of biomarkers for diagnosis and prognosis. Selection of the most promising candidates between the initial untargeted step and the subsequent validation phases is critical within the pipeline leading to clinical tests. Several statistical and data mining methods have been described for feature selection: in particular, wrapper approaches iteratively assess the performance of the classifier on distinct subsets of variables. Current wrappers, however, do not estimate the significance of the selected features. We therefore developed a new methodology to find the smallest feature subset which significantly contributes to the model performance, by using a combination of resampling, ranking of variable importance, significance assessment by permutation of the feature values in the test subsets, and half-interval search. We wrapped our biosigner algorithm around three reference binary classifiers (Partial Least Squares-Discriminant Analysis, Random Forest, and Support Vector Machines) which have been shown to achieve specific performances depending on the structure of the dataset. By using three real biological and clinical metabolomics and transcriptomics datasets (containing up to 7000 features), complementary signatures were obtained in a few minutes, generally providing higher prediction accuracies than the initial full model. Comparison with alternative feature selection approaches further indicated that our method provides signatures of restricted size and high stability. Finally, by using our methodology to seek metabolites discriminating type 1 from type 2 diabetic patients, several features were selected, including a fragment from the taurochenodeoxycholic bile acid. Our methodology, implemented in the biosigner R/Bioconductor package and Galaxy/Workflow4metabolomics module, should be of interest for both experimenters and statisticians to identify robust molecular signatures from large omics datasets in the process of developing new diagnostics.}
}
@article{wehrens_2012,
title = {Meta-Statistics for Variable Selection: {TheR} {PackageBioMark}},
author = {Wehrens, Ron and Franceschi, Pietro},
url = {http://www.jstatsoft.org/v51/i10/},
year = {2012},
urldate = {2018-01-13},
journal = {Journal of statistical software},
volume = {51},
number = {10},
issn = {1548-7660},
doi = {10.18637/jss.v051.i10},
f1000-projects = {shared citations}
}
@article{hernandezferrer_2017,
title = {{MultiDataSet}: an R package for encapsulating multiple data sets with application to omic data integration.},
author = {Hernandez-Ferrer, Carles and Ruiz-Arenas, Carlos and Beltran-Gomila, Alba and González, Juan R},
pages = {36},
url = {http://dx.doi.org/10.1186/s12859-016-1455-1},
year = {2017},
month = {jan},
day = {17},
urldate = {2018-01-13},
journal = {{BMC} Bioinformatics},
volume = {18},
number = {1},
doi = {10.1186/s12859-016-1455-1},
pmid = {28095799},
pmcid = {PMC5240259},
f1000-projects = {shared citations},
abstract = {{BACKGROUND}: Reduction in the cost of genomic assays has generated large amounts of biomedical-related data. As a result, current studies perform multiple experiments in the same subjects. While Bioconductor's methods and classes implemented in different packages manage individual experiments, there is not a standard class to properly manage different omic datasets from the same subjects. In addition, most R/Bioconductor packages that have been designed to integrate and visualize biological data often use basic data structures with no clear general methods, such as subsetting or selecting samples. {RESULTS}: To cover this need, we have developed {MultiDataSet}, a new R class based on Bioconductor standards, designed to encapsulate multiple data sets. {MultiDataSet} deals with the usual difficulties of managing multiple and non-complete data sets while offering a simple and general way of subsetting features and selecting samples. We illustrate the use of {MultiDataSet} in three common situations: 1) performing integration analysis with third party packages; 2) creating new methods and functions for omic data integration; 3) encapsulating new unimplemented data from any biological experiment. {CONCLUSIONS}: {MultiDataSet} is a suitable class for data integration under R and Bioconductor framework.}
}
@article{nodzenski_2014,
title = {Metabomxtr: an R package for mixture-model analysis of non-targeted metabolomics data.},
author = {Nodzenski, Michael and Muehlbauer, Michael J and Bain, James R and Reisetter, Anna C and Lowe, William L and Scholtens, Denise M},
pages = {3287-3288},
url = {http://dx.doi.org/10.1093/bioinformatics/btu509},
year = {2014},
month = {nov},
day = {15},
urldate = {2018-01-13},
journal = {Bioinformatics},
volume = {30},
number = {22},
doi = {10.1093/bioinformatics/btu509},
pmid = {25075114},
pmcid = {PMC4221120},
f1000-projects = {shared citations},
abstract = {{SUMMARY}: Non-targeted metabolomics technologies often yield data in which abundance for any given metabolite is observed and quantified for some samples and reported as missing for other samples. Apparent missingness can be due to true absence of the metabolite in the sample or presence at a level below detectability. Mixture-model analysis can formally account for metabolite 'missingness' due to absence or undetectability, but software for this type of analysis in the high-throughput setting is limited. The R package metabomxtr has been developed to facilitate mixture-model analysis of non-targeted metabolomics data in which only a portion of samples have quantifiable abundance for certain metabolites. {AVAILABILITY} {AND} {IMPLEMENTATION}: metabomxtr is available through Bioconductor. It is released under the {GPL}-2 license. {CONTACT}: dscholtens@northwestern.edu {SUPPLEMENTARY} {INFORMATION}: Supplementary data are available at Bioinformatics online. \copyright The Author 2014. Published by Oxford University Press.}
}
@article{zhan_2015,
title = {Kernel approaches for differential expression analysis of mass spectrometry-based metabolomics data.},
author = {Zhan, Xiang and Patterson, Andrew D and Ghosh, Debashis},
pages = {77},
url = {http://dx.doi.org/10.1186/s12859-015-0506-3},
year = {2015},
month = {mar},
day = {11},
urldate = {2018-01-13},
journal = {{BMC} Bioinformatics},
volume = {16},
doi = {10.1186/s12859-015-0506-3},
pmid = {25887233},
pmcid = {PMC4359587},
f1000-projects = {shared citations},
abstract = {{BACKGROUND}: Data generated from metabolomics experiments are different from other types of "-omics" data. For example, a common phenomenon in mass spectrometry ({MS})-based metabolomics data is that the data matrix frequently contains missing values, which complicates some quantitative analyses. One way to tackle this problem is to treat them as absent. Hence there are two types of information that are available in metabolomics data: presence/absence of a metabolite and a quantitative value of the abundance level of a metabolite if it is present. Combining these two layers of information poses challenges to the application of traditional statistical approaches in differential expression analysis. {RESULTS}: In this article, we propose a novel kernel-based score test for the metabolomics differential expression analysis. In order to simultaneously capture both the continuous pattern and discrete pattern in metabolomics data, two new kinds of kernels are designed. One is the distance-based kernel and the other is the stratified kernel. While we initially describe the procedures in the case of single-metabolite analysis, we extend the methods to handle metabolite sets as well. {CONCLUSIONS}: Evaluation based on both simulated data and real data from a liver cancer metabolomics study indicates that our kernel method has a better performance than some existing alternatives. An implementation of the proposed kernel method in the R statistical computing environment is available at http://works.bepress.com/debashis\_ghosh/60/ .}
}
@article{loos_2015,
title = {Accelerated isotope fine structure calculation using pruned transition trees.},
author = {Loos, Martin and Gerber, Christian and Corona, Francesco and Hollender, Juliane and Singer, Heinz},
pages = {5738-5744},
url = {http://dx.doi.org/10.1021/acs.analchem.5b00941},
year = {2015},
month = {jun},
day = {2},
urldate = {2018-01-13},
journal = {Analytical Chemistry},
volume = {87},
number = {11},
doi = {10.1021/acs.analchem.5b00941},
pmid = {25929282},
f1000-projects = {shared citations},
abstract = {A fast and memory-efficient calculation of theoretical isotope patterns is crucial for the routine interpretation of mass spectrometric data. For high-resolution experiments, calculations must procure the exact masses and probabilities of relevant isotopologues over a wide range of polyisotopic compounds, while pruning low-probable ones. Here, a novel albeit simple treelike structure is introduced to swiftly derive sets of relevant subisotopologues for each element in a molecule, which are then combined to the isotopologues of the full molecule. In contrast to existing approaches, transitions via single replacements of the most abundant isotope per element are used in separable tree branches to derive subisotopologues from each other. Moreover, the underlying transition trees prevent redundant replacements and permit the detection of the most probable isotopologue in a first phase. A relative threshold can then be exploited in a second parallelized phase for a precise prepruning of large fractions of the remaining subisotopologues. The gain in performance from such early pruning and the lower variation in the distortion of simulated data with use of relative rather than absolute thresholds were validated in a large-scale benchmark simulation, unprecedentedly comprising several thousand molecular formulas. Both the algorithm and a wealth of related features are freely available as R-package {enviPat} and as a user-friendly Web interface.}
}
@article{jaeger_2017,
title = {Compound annotation in liquid chromatography/high-resolution mass spectrometry based metabolomics: robust adduct ion determination as a prerequisite to structure prediction in electrospray ionization mass spectra.},
author = {Jaeger, Carsten and Méret, Michaël and Schmitt, Clemens A and Lisec, Jan},
pages = {1261-1266},
url = {http://dx.doi.org/10.1002/rcm.7905},
year = {2017},
month = {aug},
day = {15},
urldate = {2018-01-13},
journal = {Rapid Communications in Mass Spectrometry},
volume = {31},
number = {15},
doi = {10.1002/rcm.7905},
pmid = {28499062},
f1000-projects = {shared citations},
abstract = {{RATIONALE}: A bottleneck in metabolic profiling of complex biological extracts is confident, non-supervised annotation of ideally all contained, chemically highly diverse small molecules. Recent computational strategies combining sum formula prediction with in silico fragmentation achieve confident de novo annotation, once the correct neutral mass of a compound is known. Current software solutions for automated adduct ion assignment, however, are either publicly unavailable or have been validated against only few experimental electrospray ionization ({ESI}) mass spectra. {METHODS}: We here present {findMAIN} (find Main Adduct {IoN}), a new heuristic approach for interpreting {ESI} mass spectra. {findMAIN} scores {MS1} spectra based on explained intensity, mass accuracy and isotope charge agreement of adducts and related ionization products and annotates peaks of the (de)protonated molecule and adduct ions. The approach was validated against 1141 {ESI} positive mode spectra of chemically diverse standard compounds acquired on different high-resolution mass spectrometric instruments (Orbitrap and time-of-flight). Robustness against impure spectra was evaluated. {RESULTS}: Correct adduct ion assignment was achieved for up to 83\% of the spectra. Performance was independent of compound class and mass spectrometric platform. The algorithm proved highly tolerant against spectral contamination as demonstrated exemplarily for co-eluting compounds as well as systematically by pairwise mixing of spectra. When used in conjunction with {MS}-{FINDER}, a state-of-the-art sum formula tool, correct sum formulas were obtained for 77\% of spectra. It outperformed both 'brute force' approaches and current state-of-the-art annotation packages tested as potential alternatives. Limitations of the heuristic pertained to poorly ionizing compounds and cationic compounds forming [M]+ ions. {CONCLUSIONS}: A new, validated approach for interpreting {ESI} mass spectra is presented, filling a gap in the nontargeted metabolomics workflow. It is freely available in the latest version of R package {InterpretM\SSpectrum}. Copyright \copyright 2017 John Wiley \& Sons, Ltd.}
}
@article{wang_2013,
title = {{fmcsR}: mismatch tolerant maximum common substructure searching in R.},
author = {Wang, Yan and Backman, Tyler W H and Horan, Kevin and Girke, Thomas},
pages = {2792-2794},
url = {http://dx.doi.org/10.1093/bioinformatics/btt475},
year = {2013},
month = {nov},
day = {1},
urldate = {2018-01-13},
journal = {Bioinformatics},
volume = {29},
number = {21},
doi = {10.1093/bioinformatics/btt475},
pmid = {23962615},
f1000-projects = {shared citations},
abstract = {{MOTIVATION}: The ability to accurately measure structural similarities among small molecules is important for many analysis routines in drug discovery and chemical genomics. Algorithms used for this purpose include fragment-based fingerprint and graph-based maximum common substructure ({MCS}) methods. {MCS} approaches provide one of the most accurate similarity measures. However, their rigid matching policies limit them to the identification of perfect {MCSs}. To eliminate this restriction, we introduce a new mismatch tolerant search method for identifying flexible {MCSs} ({FMCSs}) containing a user-definable number of atom and/or bond mismatches. {RESULTS}: The {fmcsR} package provides an R interface, with the time-consuming steps of the {FMCS} algorithm implemented in C++. It includes utilities for pairwise compound comparisons, structure similarity searching, clustering and visualization of {MCSs}. In comparison with an existing {MCS} tool, {fmcsR} shows better time performance over a wide range of compound sizes. When mismatching of atoms or bonds is turned on, the compute times increase as expected, and the resulting {FMCSs} are often substantially larger than their strict {MCS} counterparts. Based on extensive virtual screening ({VS}) tests, the flexible matching feature enhances the enrichment of active structures at the top of {MCS}-based similarity search results. With respect to overall and early enrichment performance, {FMCS} outperforms most of the seven other {VS} methods considered in these tests. {AVAILABILITY}: {fmcsR} is freely available for all common operating systems from the Bioconductor site (http://www.bioconductor.org/packages/devel/bioc/html/{fmcsR}.html). {CONTACT}: thomas.girke@ucr.edu. {SUPPLEMENTARY} {INFORMATION}: Supplementary data are available at Bioinformatics online.}
}
@article{cao_2013,
title = {Computational analyses of spectral trees from electrospray multi-stage mass spectrometry to aid metabolite identification.},
author = {Cao, Mingshu and Fraser, Karl and Rasmussen, Susanne},
pages = {1036-1050},
url = {http://dx.doi.org/10.3390/metabo3041036},
year = {2013},
month = {oct},
day = {31},
urldate = {2018-01-13},
journal = {Metabolites},
volume = {3},
number = {4},
doi = {10.3390/metabo3041036},
pmid = {24958264},
pmcid = {PMC3937840},
f1000-projects = {shared citations},
abstract = {Mass spectrometry coupled with chromatography has become the major technical platform in metabolomics. Aided by peak detection algorithms, the detected signals are characterized by mass-over-charge ratio (m/z) and retention time. Chemical identities often remain elusive for the majority of the signals. Multi-stage mass spectrometry based on electrospray ionization ({ESI}) allows collision-induced dissociation ({CID}) fragmentation of selected precursor ions. These fragment ions can assist in structural inference for metabolites of low molecular weight. Computational investigations of fragmentation spectra have increasingly received attention in metabolomics and various public databases house such data. We have developed an R package "iontree" that can capture, store and analyze {MS2} and {MS3} mass spectral data from high throughput metabolomics experiments. The package includes functions for ion tree construction, an algorithm ({distMS2}) for {MS2} spectral comparison, and tools for building platform-independent ion tree ({MS2}/{MS3}) libraries. We have demonstrated the utilization of the package for the systematic analysis and annotation of fragmentation spectra collected in various metabolomics platforms, including direct infusion mass spectrometry, and liquid chromatography coupled with either low resolution or high resolution mass spectrometry. Assisted by the developed computational tools, we have demonstrated that spectral trees can provide informative evidence complementary to retention time and accurate mass to aid with annotating unknown peaks. These experimental spectral trees once subjected to a quality control process, can be used for querying public {MS2} databases or de novo interpretation. The putatively annotated spectral trees can be readily incorporated into reference libraries for routine identification of metabolites.}
}
@article{uppal_2015,
title = {{MetabNet}: An R Package for Metabolic Association Analysis of High-Resolution Metabolomics Data.},
author = {Uppal, Karan and Soltow, Quinlyn A and Promislow, Daniel E L and Wachtman, Lynn M and Quyyumi, Arshed Ali and Jones, Dean P},
pages = {87},
url = {http://dx.doi.org/10.3389/fbioe.2015.00087},
year = {2015},
month = {jun},
day = {11},
urldate = {2018-01-13},
journal = {Frontiers in bioengineering and biotechnology},
volume = {3},
doi = {10.3389/fbioe.2015.00087},
pmid = {26125020},
pmcid = {PMC4464066},
f1000-projects = {shared citations},
abstract = {Liquid-chromatography high-resolution mass spectrometry provides capability to measure \textgreater40,000 ions derived from metabolites in biologic samples. This presents challenges to confirm identities of known chemicals and delineate potential metabolic pathway associations of unidentified chemicals. We provide an R package for metabolic network analysis, {MetabNet}, to perform targeted metabolome-wide association study of specific metabolites to facilitate detection of their related metabolic pathways and network structures.}
}
@article{lawson_2017,
title = {{msPurity}: Automated Evaluation of Precursor Ion Purity for Mass Spectrometry-Based Fragmentation in Metabolomics.},
author = {Lawson, Thomas N and Weber, Ralf J M and Jones, Martin R and Chetwynd, Andrew J and Rodrı Guez-Blanco, Giovanny and Di Guida, Riccardo and Viant, Mark R and Dunn, Warwick B},
pages = {2432-2439},
url = {http://dx.doi.org/10.1021/acs.analchem.6b04358},
year = {2017},
month = {feb},
day = {21},
urldate = {2018-01-13},
journal = {Analytical Chemistry},
volume = {89},
number = {4},
doi = {10.1021/acs.analchem.6b04358},
pmid = {28194963},
f1000-projects = {shared citations},
abstract = {Tandem mass spectrometry ({MS}/{MS} or {MS2}) is a widely used approach for structural annotation and identification of metabolites in complex biological samples. The importance of assessing the contribution of the precursor ion within an isolation window for {MS2} experiments has been previously detailed in proteomics, where precursor ion purity influences the quality and accuracy of matching to mass spectral libraries, but to date, there has been little attention to this data-processing technique in metabolomics. Here, we present {msPurity}, a vendor-independent R package for liquid chromatography ({LC}) and direct infusion ({DI}) {MS2} that calculates a simple metric to describe the contribution of the selected precursor. The precursor purity metric is calculated as "intensity of a selected precursor divided by the summed intensity of the isolation window". The metric is interpolated at the recorded point of {MS2} acquisition using bordering full-scan spectra. Isotopic peaks of the selected precursor can be removed, and low abundance peaks that are believed to have limited contribution to the resulting {MS2} spectra are removed. Additionally, the isolation efficiency of the mass spectrometer can be taken into account. The package was applied to Data Dependent Acquisition ({DDA})-based {MS2} metabolomics data sets derived from three metabolomics data repositories. For the 10 {LC}-{MS2} {DDA} data sets with \textgreater ±1 Da isolation windows, the median precursor purity score ranged from 0.67 to 0.96 (scale = 0 to +1). The R package was also used to assess precursor purity of theoretical isolation windows from {LC}-{MS} data sets of differing sample types. The theoretical isolation windows being the same width used for an anticipated {DDA} experiment (±0.5 Da). The most complex sample had a median precursor purity score of 0.46 for the 64,498 {XCMS} determined features, in comparison to the less spectrally complex sample that had a purity score of 0.66 for 5071 {XCMS} features. It has been previously reported in proteomics that a purity score of \textless 0.5 can produce unreliable spectra matching results. With this assumption, we show that for complex samples there will be a large number of metabolites where traditional {DDA} approaches will struggle to provide reliable annotations or accurate matches to mass spectral libraries.}
}
@article{neumann_2013,
title = {Nearline acquisition and processing of liquid chromatography-tandem mass spectrometry data},
author = {Neumann, Steffen and Thum, Andrea and Böttcher, Christoph},
pages = {84-91},
url = {http://link.springer.com/10.1007/s11306-012-0401-0},
year = {2013},
month = {mar},
urldate = {2018-01-13},
journal = {Metabolomics : Official journal of the Metabolomic Society},
volume = {9},
number = {S1},
issn = {1573-3882},
doi = {10.1007/s11306-012-0401-0},
f1000-projects = {shared citations}
}
@article{jungreuthmayer_2016,
title = {{ICT}: isotope correction toolbox.},
author = {Jungreuthmayer, Christian and Neubauer, Stefan and Mairinger, Teresa and Zanghellini, Jürgen and Hann, Stephan},
pages = {154-156},
url = {http://dx.doi.org/10.1093/bioinformatics/btv514},
year = {2016},
month = {jan},
day = {1},
urldate = {2018-01-13},
journal = {Bioinformatics},
volume = {32},
number = {1},
doi = {10.1093/bioinformatics/btv514},
pmid = {26382193},
f1000-projects = {shared citations},
abstract = {{SUMMARY}: Isotope tracer experiments are an invaluable technique to analyze and study the metabolism of biological systems. However, isotope labeling experiments are often affected by naturally abundant isotopes especially in cases where mass spectrometric methods make use of derivatization. The correction of these additive interferences--in particular for complex isotopic systems--is numerically challenging and still an emerging field of research. When positional information is generated via collision-induced dissociation, even more complex calculations for isotopic interference correction are necessary. So far, no freely available tools can handle tandem mass spectrometry data. We present isotope correction toolbox, a program that corrects tandem mass isotopomer data from tandem mass spectrometry experiments. Isotope correction toolbox is written in the multi-platform programming language Perl and, therefore, can be used on all commonly available computer platforms. {AVAILABILITY} {AND} {IMPLEMENTATION}: Source code and documentation can be freely obtained under the Artistic License or the {GNU} General Public License from: https://github.com/jungreuc/isotope\_correction\_toolbox/ {CONTACT}: \{christian.jungreuthmayer@boku.ac.at,juergen.zanghellini@boku.ac.at\} {SUPPLEMENTARY} {INFORMATION}: Supplementary data are available at Bioinformatics online. \copyright The Author 2015. Published by Oxford University Press. All rights reserved. For Permissions, please e-mail: journals.permissions@oup.com.}
}
@article{costa_2016,
title = {An R package for the integrated analysis of metabolomics and spectral data.},
author = {Costa, Christopher and Maraschin, Marcelo and Rocha, Miguel},
pages = {117-124},
url = {http://dx.doi.org/10.1016/j.cmpb.2016.01.008},
year = {2016},
month = {jun},
urldate = {2018-01-13},
journal = {Computer Methods and Programs in Biomedicine},
volume = {129},
doi = {10.1016/j.cmpb.2016.01.008},
pmid = {26853041},
f1000-projects = {shared citations},
abstract = {Recently, there has been a growing interest in the field of metabolomics, materialized by a remarkable growth in experimental techniques, available data and related biological applications. Indeed, techniques as nuclear magnetic resonance, gas or liquid chromatography, mass spectrometry, infrared and {UV}-visible spectroscopies have provided extensive datasets that can help in tasks as biological and biomedical discovery, biotechnology and drug development. However, as it happens with other omics data, the analysis of metabolomics datasets provides multiple challenges, both in terms of methodologies and in the development of appropriate computational tools. Indeed, from the available software tools, none addresses the multiplicity of existing techniques and data analysis tasks. In this work, we make available a novel R package, named specmine, which provides a set of methods for metabolomics data analysis, including data loading in different formats, pre-processing, metabolite identification, univariate and multivariate data analysis, machine learning, and feature selection. Importantly, the implemented methods provide adequate support for the analysis of data from diverse experimental techniques, integrating a large set of functions from several R packages in a powerful, yet simple to use environment. The package, already available in {CRAN}, is accompanied by a web site where users can deposit datasets, scripts and analysis reports to be shared with the community, promoting the efficient sharing of metabolomics data analysis pipelines. Copyright \copyright 2016 Elsevier Ireland Ltd. All rights reserved.}
}
@article{edmands_2015,
title = {{MetMSLine}: an automated and fully integrated pipeline for rapid processing of high-resolution {LC}-{MS} metabolomic datasets.},
author = {Edmands, William M B and Barupal, Dinesh K and Scalbert, Augustin},
pages = {788-790},
url = {http://dx.doi.org/10.1093/bioinformatics/btu705},
year = {2015},
month = {mar},
day = {1},
urldate = {2018-01-13},
journal = {Bioinformatics},
volume = {31},
number = {5},
doi = {10.1093/bioinformatics/btu705},
pmid = {25348215},
pmcid = {PMC4341062},
f1000-projects = {shared citations},
abstract = {{UNLABELLED}: {MetMSLine} represents a complete collection of functions in the R programming language as an accessible {GUI} for biomarker discovery in large-scale liquid-chromatography high-resolution mass spectral datasets from acquisition through to final metabolite identification forming a backend to output from any peak-picking software such as {XCMS}. {MetMSLine} automatically creates subdirectories, data tables and relevant figures at the following steps: (i) signal smoothing, normalization, filtration and noise transformation ({PreProc}.{QC}.{LSC}.R); (ii) {PCA} and automatic outlier removal (Auto.{PCA}.R); (iii) automatic regression, biomarker selection, hierarchical clustering and cluster ion/artefact identification (Auto.{MV}.Regress.R); (iv) Biomarker-{MS}/{MS} fragmentation spectra matching and fragment/neutral loss annotation (Auto.{MS}.{MS}.match.R) and (v) semi-targeted metabolite identification based on a list of theoretical masses obtained from public databases ({DBAnnotate}.R). {AVAILABILITY} {AND} {IMPLEMENTATION}: All source code and suggested parameters are available in an un-encapsulated layout on http://wmbedmands.github.io/{MetMSLine}/. Readme files and a synthetic dataset of both X-variables (simulated {LC}-{MS} data), Y-variables (simulated continuous variables) and metabolite theoretical masses are also available on our {GitHub} repository. \copyright The Author 2014. Published by Oxford University Press.}
}
@article{theul_2009,
title = {Collaborative Software Development Using R-Forge},
author = {Theußl, Stefan and Zeileis, Achim},
pages = {9},
url = {https://journal.r-project.org/archive/2009/{RJ}-2009-007/index.html},
year = {2009},
urldate = {2019-08-01},
journal = {The R journal},
volume = {1},
number = {1},
issn = {2073-4859},
doi = {10.32614/{RJ}-2009-007},
f1000-projects = {shared citations}
}
@article{hedjazi_2015,
title = {{mQTL}.{NMR}: an integrated suite for genetic mapping of quantitative variations of (1)H {NMR}-based metabolic profiles.},
author = {Hedjazi, Lyamine and Gauguier, Dominique and Zalloua, Pierre A and Nicholson, Jeremy K and Dumas, Marc-Emmanuel and Cazier, Jean-Baptiste},
pages = {4377-4384},
url = {http://dx.doi.org/10.1021/acs.analchem.5b00145},
year = {2015},
month = {apr},
day = {21},
urldate = {2018-01-13},
journal = {Analytical Chemistry},
volume = {87},
number = {8},
doi = {10.1021/acs.analchem.5b00145},
pmid = {25803548},
f1000-projects = {shared citations},
abstract = {High-throughput (1)H nuclear magnetic resonance ({NMR}) is an increasingly popular robust approach for qualitative and quantitative metabolic profiling, which can be used in conjunction with genomic techniques to discover novel genetic associations through metabotype quantitative trait locus ({mQTL}) mapping. There is therefore a crucial necessity to develop specialized tools for an accurate detection and unbiased interpretability of the genetically determined metabolic signals. Here we introduce and implement a combined chemoinformatic approach for objective and systematic analysis of untargeted (1)H {NMR}-based metabolic profiles in quantitative genetic contexts. The R/Bioconductor {mQTL}.{NMR} package was designed to (i) perform a series of preprocessing steps restoring spectral dependency in collinear {NMR} data sets to reduce the multiple testing burden, (ii) carry out robust and accurate {mQTL} mapping in human cohorts as well as in rodent models, (iii) statistically enhance structural assignment of genetically determined metabolites, and (iv) illustrate results with a series of visualization tools. Built-in flexibility and implementation in the powerful R/Bioconductor framework allow key preprocessing steps such as peak alignment, normalization, or dimensionality reduction to be tailored to specific problems. The {mQTL}.{NMR} package is freely available with its source code through the Comprehensive R/Bioconductor repository and its own website ( http://www.ican-institute.org/tools/ ). It represents a significant advance to facilitate untargeted metabolomic data processing and quantitative analysis and their genetic mapping.}
}
@article{martin_2017,
title = {{PepsNMR} for the {1H}-{NMR} metabolomic data pre-processing},
author = {Martin, M and Legat, B and Leenders, J and Vanwinsberghe, J and Rousseau, R and Boulanger, B and Eilers, P.H. and De Tullio, P and Govaerts, B},
url = {https://dial.uclouvain.be/pr/boreal/object/boreal\%{3A187159}/datastream/{PDF\_01}/view},
year = {2017},
urldate = {2018-01-15},
journal = {{UCL}-Université Catholique de Louvain},
f1000-projects = {shared citations}
}
@article{beirnaert_2017,
title = {speaq 2.0: a complete workflow for high-throughput {1D} {NMR} spectra processing and quantification},
author = {Beirnaert, Charlie and Meysman, Pieter and Vu, Trung Nghia and Hermans, Nina and Apers, Sandra and Pieters, Luc and Covaci, Adrian and Laukens, Kris},
url = {http://biorxiv.org/lookup/doi/10.1101/138503},
year = {2017},
month = {may},
day = {16},
urldate = {2018-01-15},
journal = {BioRxiv},
doi = {10.1101/138503},
f1000-projects = {shared citations},
abstract = {Nuclear Magnetic Resonance ({NMR}) spectroscopy is, together with liquid chromatography-mass spectrometry ({LC}-{MS}), the most established platform to perform metabolomics. In contrast to {LC}-{MS} however, {NMR} data is predominantly being processed with commercial software. This has the effect that its data processing remains tedious and dependent on user interventions. As a follow-up to speaq, a previously released workflow for {NMR} spectral alignment and quantitation, we present speaq 2.0. This completely revised framework to automatically analyze {1D} {NMR} spectra uses wavelets to efficiently summarize the raw spectra with minimal information loss or user interaction. The tool offers a fast and easy workflow that starts with the common approach of peak-picking, followed by grouping. This yields a matrix consisting of features, samples and peak values that can be conveniently processed either by using included multivariate statistical functions or by using many other recently developed methods for {NMR} data analysis. speaq 2.0 facilitates robust and high-throughput metabolomics based on {1D} {NMR} but is also compatible with other {NMR} frameworks or complementary {LC}-{MS} workflows. The methods are benchmarked using two publicly available datasets. speaq 2.0 is distributed through the existing speaq R package to provide a complete solution for {NMR} data processing. The package and the code for the presented case studies are freely available on {CRAN} (https://cran.r-project.org/package=speaq) and {GitHub} (https://github.com/beirnaert/speaq).}
}
@article{ranjbar_2015,
title = {{SIMAT}: {GC}-{SIM}-{MS} data analysis tool.},
author = {Ranjbar, Mohammad R Nezami and Di Poto, Cristina and Wang, Yue and Ressom, Habtom W},
pages = {259},
url = {http://dx.doi.org/10.1186/s12859-015-0681-2},
year = {2015},
month = {aug},
day = {19},
urldate = {2018-01-15},
journal = {{BMC} Bioinformatics},
volume = {16},
doi = {10.1186/s12859-015-0681-2},
pmid = {26283310},
pmcid = {PMC4539696},
f1000-projects = {shared citations},
abstract = {{BACKGROUND}: Gas chromatography coupled with mass spectrometry ({GC}-{MS}) is one of the technologies widely used for qualitative and quantitative analysis of small molecules. In particular, {GC} coupled to single quadrupole {MS} can be utilized for targeted analysis by selected ion monitoring ({SIM}). However, to our knowledge, there are no software tools specifically designed for analysis of {GC}-{SIM}-{MS} data. In this paper, we introduce a new R/Bioconductor package called {SIMAT} for quantitative analysis of the levels of targeted analytes. {SIMAT} provides guidance in choosing fragments for a list of targets. This is accomplished through an optimization algorithm that has the capability to select the most appropriate fragments from overlapping chromatographic peaks based on a pre-specified library of background analytes. The tool also allows visualization of the total ion chromatograms ({TIC}) of runs and extracted ion chromatograms ({EIC}) of analytes of interest. Moreover, retention index ({RI}) calibration can be performed and raw {GC}-{SIM}-{MS} data can be imported in {netCDF} or {NIST} mass spectral library ({MSL}) formats. {RESULTS}: We evaluated the performance of {SIMAT} using two {GC}-{SIM}-{MS} datasets obtained by targeted analysis of: (1) plasma samples from 86 patients in a targeted metabolomic experiment; and (2) mixtures of internal standards spiked in plasma samples at varying concentrations in a method development study. Our results demonstrate that {SIMAT} offers alternative solutions to {AMDIS} and {MetaboliteDetector} to achieve accurate detection of targets and estimation of their relative intensities by analysis of {GC}-{SIM}-{MS} data. {CONCLUSIONS}: We introduce a new R package called {SIMAT} that allows the selection of the optimal set of fragments and retention time windows for target analytes in {GC}-{SIM}-{MS} based analysis. Also, various functions and algorithms are implemented in the tool to: (1) read and import raw data and spectral libraries; (2) perform {GC}-{SIM}-{MS} data preprocessing; and (3) plot and visualize {EICs} and {TICs}.}
}
@article{chen_2015,
title = {{MetTailor}: dynamic block summary and intensity normalization for robust analysis of mass spectrometry data in metabolomics.},
author = {Chen, Gengbo and Cui, Liang and Teo, Guo Shou and Ong, Choon Nam and Tan, Chuen Seng and Choi, Hyungwon},
pages = {3645-3652},
url = {http://dx.doi.org/10.1093/bioinformatics/btv434},
year = {2015},
month = {nov},
day = {15},
urldate = {2018-01-16},
journal = {Bioinformatics},
volume = {31},
number = {22},
doi = {10.1093/bioinformatics/btv434},
pmid = {26220962},
f1000-projects = {shared citations},
abstract = {{MOTIVATION}: Accurate cross-sample peak alignment and reliable intensity normalization is a critical step for robust quantitative analysis in untargetted metabolomics since tandem mass spectrometry ({MS}/{MS}) is rarely used for compound identification. Therefore shortcomings in the data processing steps can easily introduce false positives due to misalignments and erroneous normalization adjustments in large sample studies. {RESULTS}: In this work, we developed a software package {MetTailor} featuring two novel data preprocessing steps to remedy drawbacks in the existing processing tools. First, we propose a novel dynamic block summarization ({DBS}) method for correcting misalignments from peak alignment algorithms, which alleviates missing data problem due to misalignments. For the purpose of verifying correct re-alignments, we propose to use the cross-sample consistency in isotopic intensity ratios as a quality metric. Second, we developed a flexible intensity normalization procedure that adjusts normalizing factors against the temporal variations in total ion chromatogram ({TIC}) along the chromatographic retention time ({RT}). We first evaluated the {DBS} algorithm using a curated metabolomics dataset, illustrating that the algorithm identifies misaligned peaks and correctly realigns them with good sensitivity. We next demonstrated the {DBS} algorithm and the {RT}-based normalization procedure in a large-scale dataset featuring \textgreater100 sera samples in primary Dengue infection study. Although the initial alignment was successful for the majority of peaks, the {DBS} algorithm still corrected ∼7000 misaligned peaks in this data and many recovered peaks showed consistent isotopic patterns with the peaks they were realigned to. In addition, the {RT}-based normalization algorithm efficiently removed visible local variations in {TIC} along the {RT}, without sacrificing the sensitivity of detecting differentially expressed metabolites. {AVAILABILITY} {AND} {IMPLEMENTATION}: The R package {MetTailor} is freely available at the {SourceForge} website http://mettailor.sourceforge.net/. {CONTACT}: hyung\_won\_choi@nuhs.edu.sg {SUPPLEMENTARY} {INFORMATION}: Supplementary data are available at Bioinformatics online. \copyright The Author 2015. Published by Oxford University Press. All rights reserved. For Permissions, please e-mail: journals.permissions@oup.com.}
}
@article{takahashi_2011,
title = {{AMDORAP}: non-targeted metabolic profiling based on high-resolution {LC}-{MS}.},
author = {Takahashi, Hiroki and Morimoto, Takuya and Ogasawara, Naotake and Kanaya, Shigehiko},
pages = {259},
url = {http://dx.doi.org/10.1186/1471-2105-12-259},
year = {2011},
month = {jun},
day = {24},
urldate = {2018-01-16},
journal = {{BMC} Bioinformatics},
volume = {12},
doi = {10.1186/1471-2105-12-259},
pmid = {21702951},
pmcid = {PMC3149581},
f1000-projects = {shared citations},
abstract = {{BACKGROUND}: Liquid chromatography-mass spectrometry ({LC}-{MS}) utilizing the high-resolution power of an orbitrap is an important analytical technique for both metabolomics and proteomics. Most important feature of the orbitrap is excellent mass accuracy. Thus, it is necessary to convert raw data to accurate and reliable m/z values for metabolic fingerprinting by high-resolution {LC}-{MS}. {RESULTS}: In the present study, we developed a novel, easy-to-use and straightforward m/z detection method, {AMDORAP}. For assessing the performance, we used real biological samples, Bacillus subtilis strains 168 and {MGB874}, in the positive mode by {LC}-orbitrap. For 14 identified compounds by measuring the authentic compounds, we compared obtained m/z values with other {LC}-{MS} processing tools. The errors by {AMDORAP} were distributed within ±3 ppm and showed the best performance in m/z value accuracy. {CONCLUSIONS}: Our method can detect m/z values of biological samples much more accurately than other {LC}-{MS} analysis tools. {AMDORAP} allows us to address the relationships between biological effects and cellular metabolites based on accurate m/z values. Obtaining the accurate m/z values from raw data should be indispensable as a starting point for comparative {LC}-orbitrap analysis. {AMDORAP} is freely available under an open-source license at http://amdorap.sourceforge.net/.}
}
@article{lisec_2016,
title = {Extending the dynamic range in metabolomics experiments by automatic correction of peaks exceeding the detection limit.},
author = {Lisec, Jan and Hoffmann, Friederike and Schmitt, Clemens and Jaeger, Carsten},
pages = {7487-7492},
url = {http://dx.doi.org/10.1021/acs.analchem.6b02515},
year = {2016},
month = {aug},
day = {2},
urldate = {2018-01-16},
journal = {Analytical Chemistry},
volume = {88},
number = {15},
doi = {10.1021/acs.analchem.6b02515},
pmid = {27377477},
f1000-projects = {shared citations},
abstract = {Metabolomics, the analysis of potentially all small molecules within a biological system, has become a valuable tool for biomarker identification and the elucidation of biological processes. While metabolites are often present in complex mixtures at extremely different concentrations, the dynamic range of available analytical methods to capture this variance is generally limited. Here, we show that gas chromatography coupled to atmospheric pressure chemical ionization mass spectrometry ({GC}-{APCI}-{MS}), a state of the art analytical technology applied in metabolomics analyses, shows an average linear range ({LR}) of 2.39 orders of magnitude for a set of 62 metabolites from a representative compound mixture. We further developed a computational tool to extend this dynamic range on average by more than 1 order of magnitude, demonstrated with a dilution series of the compound mixture, using robust and automatic reconstruction of intensity values exceeding the detection limit. The tool is freely available as an R package ({CorrectOverloadedPeaks}) from {CRAN} ( https://cran.r-project.org/ ) and can be incorporated in a metabolomics data processing pipeline facilitating large screening assays.}
}
@article{suvitaival_2014,
title = {Stronger findings from mass spectral data through multi-peak modeling.},
author = {Suvitaival, Tommi and Rogers, Simon and Kaski, Samuel},
pages = {208},
url = {http://dx.doi.org/10.1186/1471-2105-15-208},
year = {2014},
month = {jun},
day = {19},
urldate = {2018-01-16},
journal = {{BMC} Bioinformatics},
volume = {15},
doi = {10.1186/1471-2105-15-208},
pmid = {24947013},
pmcid = {PMC4080774},
f1000-projects = {shared citations},
abstract = {{BACKGROUND}: Mass spectrometry-based metabolomic analysis depends upon the identification of spectral peaks by their mass and retention time. Statistical analysis that follows the identification currently relies on one main peak of each compound. However, a compound present in the sample typically produces several spectral peaks due to its isotopic properties and the ionization process of the mass spectrometer device. In this work, we investigate the extent to which these additional peaks can be used to increase the statistical strength of differential analysis. {RESULTS}: We present a Bayesian approach for integrating data of multiple detected peaks that come from one compound. We demonstrate the approach through a simulated experiment and validate it on ultra performance liquid chromatography-mass spectrometry ({UPLC}-{MS}) experiments for metabolomics and lipidomics. Peaks that are likely to be associated with one compound can be clustered by the similarity of their chromatographic shape. Changes of concentration between sample groups can be inferred more accurately when multiple peaks are available. {CONCLUSIONS}: When the sample-size is limited, the proposed multi-peak approach improves the accuracy at inferring covariate effects. An R implementation and data are available at http://research.ics.aalto.fi/mi/software/{peakANOVA}/.}
}
@article{collins_2016,
title = {{LOBSTAHS}: An Adduct-Based Lipidomics Strategy for Discovery and Identification of Oxidative Stress Biomarkers.},
author = {Collins, James R and Edwards, Bethanie R and Fredricks, Helen F and Van Mooy, Benjamin A S},
pages = {7154-7162},
url = {http://dx.doi.org/10.1021/acs.analchem.6b01260},
year = {2016},
month = {jul},
day = {19},
urldate = {2018-01-16},
journal = {Analytical Chemistry},
volume = {88},
number = {14},
doi = {10.1021/acs.analchem.6b01260},
pmid = {27322848},
f1000-projects = {shared citations},
abstract = {Discovery and identification of molecular biomarkers in large {LC}/{MS} data sets requires significant automation without loss of accuracy in the compound screening and annotation process. Here, we describe a lipidomics workflow and open-source software package for high-throughput annotation and putative identification of lipid, oxidized lipid, and oxylipin biomarkers in high-mass-accuracy {HPLC}-{MS} data. Lipid and oxylipin biomarker screening through adduct hierarchy sequences, or {LOBSTAHS}, uses orthogonal screening criteria based on adduct ion formation patterns and other properties to identify thousands of compounds while providing the user with a confidence score for each assignment. Assignments are made from one of two customizable databases; the default databases contain 14 068 unique entries. To demonstrate the software's functionality, we screened more than 340 000 mass spectral features from an experiment in which hydrogen peroxide was used to induce oxidative stress in the marine diatom Phaeodactylum tricornutum. {LOBSTAHS} putatively identified 1969 unique parent compounds in 21 869 features that survived the multistage screening process. While P. tricornutum maintained more than 92\% of its core lipidome under oxidative stress, patterns in biomarker distribution and abundance indicated remodeling was both subtle and pervasive. Treatment with 150 μM {H2O2} promoted statistically significant carbon-chain elongation across lipid classes, with the strongest elongation accompanying oxidation in moieties of monogalactosyldiacylglycerol, a lipid typically localized to the chloroplast. Oxidative stress also induced a pronounced reallocation of lipidome peak area to triacylglycerols. {LOBSTAHS} can be used with environmental or experimental data from a variety of systems and is freely available at https://github.com/vanmooylipidomics/{LOBSTAHS} .}
}
@article{aggio_2010,
title = {Pathway Activity Profiling ({PAPi}): from the metabolite profile to the metabolic pathway activity.},
author = {Aggio, Raphael B M and Ruggiero, Katya and Villas-Bôas, Silas Granato},
pages = {2969-2976},
url = {http://dx.doi.org/10.1093/bioinformatics/btq567},
year = {2010},
month = {dec},
day = {1},
urldate = {2018-01-17},
journal = {Bioinformatics},
volume = {26},
number = {23},
doi = {10.1093/bioinformatics/btq567},
pmid = {20929912},
f1000-projects = {shared citations},
abstract = {{MOTIVATION}: Metabolomics is one of the most recent omics-technologies and uses robust analytical techniques to screen low molecular mass metabolites in biological samples. It has evolved very quickly during the last decade. However, metabolomics datasets are considered highly complex when used to relate metabolite levels to metabolic pathway activity. Despite recent developments in bioinformatics, which have improved the quality of metabolomics data, there is still no straightforward method capable of correlating metabolite level to the activity of different metabolic pathways operating within the cells. Thus, this kind of analysis still depends on extremely laborious and time-consuming processes. {RESULTS}: Here, we present a new algorithm Pathway Activity Profiling ({PAPi}) with which we are able to compare metabolic pathway activities from metabolite profiles. The applicability and potential of {PAPi} was demonstrated using a previously published data from the yeast Saccharomyces cerevisiae. {PAPi} was able to support the biological interpretations of the previously published observations and, in addition, generated new hypotheses in a straightforward manner. However, {PAPi} is time consuming to perform manually. Thus, we also present here a new R-software package ({PAPi}) which implements the {PAPi} algorithm and facilitates its usage to quickly compare metabolic pathways activities between different experimental conditions. Using the identified metabolites and their respective abundances as input, the {PAPi} package calculates pathways' Activity Scores, which represents the potential metabolic pathways activities and allows their comparison between conditions. {PAPi} also performs principal components analysis and analysis of variance or t-test to investigate differences in activity level between experimental conditions. In addition, {PAPi} generates comparative graphs highlighting up- and down-regulated pathway activity. {AVAILABILITY}: These datasets are available in http://www.4shared.com/file/{hTWyndYU}/extra.html and http://www.4shared.com/file/{VbQIIDeu}/intra.html. {PAPi} package is available in: http://www.4shared.com/file/{s0uIYWIg}/{PAPi\_10}.html {CONTACT}: s.villas-boas@auckland.ac.nz {SUPPLEMENTARY} {INFORMATION}: Supplementary data are available at Bioinformatics online.}
}
@article{caldernsantiago_2016,
title = {{MSCombine}: a tool for merging untargeted metabolomic data from high-resolution mass spectrometry in the positive and negative ionization modes},
author = {Calderón-Santiago, Mónica and Fernández-Peralbo, María A. and Priego-Capote, Feliciano and Luque de Castro, María D.},
pages = {43},
url = {http://link.springer.com/10.1007/s11306-016-0970-4},
year = {2016},
month = {mar},
urldate = {2018-01-17},
journal = {Metabolomics : Official journal of the Metabolomic Society},
volume = {12},
number = {3},
issn = {1573-3882},
doi = {10.1007/s11306-016-0970-4},
f1000-projects = {shared citations},
abstract = {The wide chemical diversity of metabolites present in biological samples hinders full characterization of their metabolomic profile by using a single analytical technique. However, {LC}–{MS} is a competitive technique for obtaining a comprehensive description of a wide range of metabolites by using a combination of the two ionization modes, positive and negative, the suitability of which for this purpose depends on the chemical structure of the particular metabolites. One of the main shortcomings of this approach is that it produces two independent data sets (one per polarity) that require two parallel data analyses to identify significant metabolites associated to the target biological effect. As a result, analyzing the data is a time-consuming step of metabolomic analysis. In this work, this shortcoming was overcome by developing a tool to combine the data matrices for the positive and negative ionization modes in {MS}. The most critical function of the tool is filtering off the molecular features of a given metabolite in whichever ionization mode they are detected with the less sensitivity in order to avoid using redundant information and complicating the statistical treatment as a result. The software allows the two data sets to be merged into a single one that can be statistically processed in the conventional metabolomics workflow. Its efficiency was assessed by application to data sets obtained from the analysis of human serum and urine.}
}
@article{capellades_2016,
title = {{geoRge}: A Computational Tool To Detect the Presence of Stable Isotope Labeling in {LC}/{MS}-Based Untargeted Metabolomics.},
author = {Capellades, Jordi and Navarro, Miriam and Samino, Sara and Garcia-Ramirez, Marta and Hernandez, Cristina and Simo, Rafael and Vinaixa, Maria and Yanes, Oscar},
pages = {621-628},
url = {http://dx.doi.org/10.1021/acs.analchem.5b03628},
year = {2016},
month = {jan},
day = {5},
urldate = {2018-01-17},
journal = {Analytical Chemistry},
volume = {88},
number = {1},
doi = {10.1021/acs.analchem.5b03628},
pmid = {26639619},
f1000-projects = {shared citations},
abstract = {Studying the flow of chemical moieties through the complex set of metabolic reactions that happen in the cell is essential to understanding the alterations in homeostasis that occur in disease. Recently, {LC}/{MS}-based untargeted metabolomics and isotopically labeled metabolites have been used to facilitate the unbiased mapping of labeled moieties through metabolic pathways. However, due to the complexity of the resulting experimental data sets few computational tools are available for data analysis. Here we introduce {geoRge}, a novel computational approach capable of analyzing untargeted {LC}/{MS} data from stable isotope-labeling experiments. {geoRge} is written in the open language R and runs on the output structure of the {XCMS} package, which is in widespread use. As opposed to the few existing tools, which use labeled samples to track stable isotopes by iterating over all {MS} signals using the theoretical mass difference between the light and heavy isotopes, {geoRge} uses unlabeled and labeled biologically equivalent samples to compare isotopic distributions in the mass spectra. Isotopically enriched compounds change their isotopic distribution as compared to unlabeled compounds. This is directly reflected in a number of new m/z peaks and higher intensity peaks in the mass spectra of labeled samples relative to the unlabeled equivalents. The automated untargeted isotope annotation and relative quantification capabilities of {geoRge} are demonstrated by the analysis of {LC}/{MS} data from a human retinal pigment epithelium cell line ({ARPE}-19) grown on normal and high glucose concentrations mimicking diabetic retinopathy conditions in vitro. In addition, we compared the results of {geoRge} with the outcome of X(13){CMS}, since both approaches rely entirely on {XCMS} parameters for feature selection, namely m/z and retention time values. To ensure data traceability and reproducibility, and enabling for comparison with other existing and future approaches, raw {LC}/{MS} files have been deposited in {MetaboLights} ({MTBLS213}) and {geoRge} is available as an R script at https://github.com/jcapelladesto/{geoRge}.}
}
@article{domingoalmenara_2015,
title = {Compound identification in gas chromatography/mass spectrometry-based metabolomics by blind source separation.},
author = {Domingo-Almenara, Xavier and Perera, Alexandre and Ramírez, Noelia and Cañellas, Nicolau and Correig, Xavier and Brezmes, Jesus},
pages = {226-233},
url = {http://dx.doi.org/10.1016/j.chroma.2015.07.044},
year = {2015},
month = {aug},
day = {28},
urldate = {2018-01-17},
journal = {Journal of Chromatography. A},
volume = {1409},
doi = {10.1016/j.chroma.2015.07.044},
pmid = {26210114},
f1000-projects = {shared citations},
abstract = {Metabolomics {GC}-{MS} samples involve high complexity data that must be effectively resolved to produce chemically meaningful results. Multivariate curve resolution-alternating least squares ({MCR}-{ALS}) is the most frequently reported technique for that purpose. More recently, independent component analysis ({ICA}) has been reported as an alternative to {MCR}. Those algorithms attempt to infer a model describing the observed data and, therefore, the least squares regression used in {MCR} assumes that the data is a linear combination of that model. However, due to the high complexity of real data, the construction of a model to describe optimally the observed data is a critical step and these algorithms should prevent the influence from outlier data. This study proves independent component regression ({ICR}) as an alternative for {GC}-{MS} compound identification. Both {ICR} and {MCR} though require least squares regression to correctly resolve the mixtures. In this paper, a novel orthogonal signal deconvolution ({OSD}) approach is introduced, which uses principal component analysis to determine the compound spectra. The study includes a compound identification comparison between the results by {ICA}-{OSD}, {MCR}-{OSD}, {ICR} and {MCR}-{ALS} using pure standards and human serum samples. Results shows that {ICR} may be used as an alternative to multivariate curve methods, as {ICR} efficiency is comparable to {MCR}-{ALS}. Also, the study demonstrates that the proposed {OSD} approach achieves greater spectral resolution accuracy than the traditional least squares approach when compounds elute under undue interference of biological matrices. Copyright \copyright 2015 Elsevier B.V. All rights reserved.}
}
@article{domingoalmenara_2016a,
title = {Automated resolution of chromatographic signals by independent component analysis-orthogonal signal deconvolution in comprehensive gas chromatography/mass spectrometry-based metabolomics.},
author = {Domingo-Almenara, Xavier and Perera, Alexandre and Ramírez, Noelia and Brezmes, Jesus},
pages = {135-141},
url = {http://dx.doi.org/10.1016/j.cmpb.2016.03.007},
year = {2016},
month = {jul},
urldate = {2018-01-17},
journal = {Computer Methods and Programs in Biomedicine},
volume = {130},
doi = {10.1016/j.cmpb.2016.03.007},
pmid = {27208528},
f1000-projects = {shared citations},
abstract = {Comprehensive gas chromatography-mass spectrometry ({GC}×{GC}-{MS}) provides a different perspective in metabolomics profiling of samples. However, algorithms for {GC}×{GC}-{MS} data processing are needed in order to automatically process the data and extract the purest information about the compounds appearing in complex biological samples. This study shows the capability of independent component analysis-orthogonal signal deconvolution ({ICA}-{OSD}), an algorithm based on blind source separation and distributed in an R package called osd, to extract the spectra of the compounds appearing in {GC}×{GC}-{MS} chromatograms in an automated manner. We studied the performance of {ICA}-{OSD} by the quantification of 38 metabolites through a set of 20 Jurkat cell samples analyzed by {GC}×{GC}-{MS}. The quantification by {ICA}-{OSD} was compared with a supervised quantification by selective ions, and most of the R(2) coefficients of determination were in good agreement (R(2)\textgreater0.90) while up to 24 cases exhibited an excellent linear relation (R(2)\textgreater0.95). We concluded that {ICA}-{OSD} can be used to resolve co-eluted compounds in {GC}×{GC}-{MS}. Copyright \copyright 2016 Elsevier Ireland Ltd. All rights reserved.}
}
@article{kim_2014,
title = {A new method of peak detection for analysis of comprehensive two-dimensional gas chromatography mass spectrometry data.},
author = {Kim, Seongho and Ouyang, Ming and Jeong, Jaesik and Shen, Changyu and Zhang, Xiang},
pages = {1209-1231},
url = {https://www.ncbi.nlm.nih.gov/pubmed/25264474},
year = {2014},
month = {jun},
urldate = {2018-01-17},
journal = {The annals of applied statistics},
volume = {8},
number = {2},
pmid = {25264474},
pmcid = {PMC4175529},
f1000-projects = {shared citations},
abstract = {We develop a novel peak detection algorithm for the analysis of comprehensive two-dimensional gas chromatography time-of-flight mass spectrometry ({GC}×{GC}-{TOF} {MS}) data using normal-exponential-Bernoulli ({NEB}) and mixture probability models. The algorithm first performs baseline correction and denoising simultaneously using the {NEB} model, which also defines peak regions. Peaks are then picked using a mixture of probability distribution to deal with the co-eluting peaks. Peak merging is further carried out based on the mass spectral similarities among the peaks within the same peak group. The algorithm is evaluated using experimental data to study the effect of different cut-offs of the conditional Bayes factors and the effect of different mixture models including Poisson, truncated Gaussian, Gaussian, Gamma, and exponentially modified Gaussian ({EMG}) distributions, and the optimal version is introduced using a trial-and-error approach. We then compare the new algorithm with two existing algorithms in terms of compound identification. Data analysis shows that the developed algorithm can detect the peaks with lower false discovery rates than the existing algorithms, and a less complicated peak picking model is a promising alternative to the more complicated and widely used {EMG} mixture models.}
}
@article{kim_2011a,
title = {An optimal peak alignment for comprehensive two-dimensional gas chromatography mass spectrometry using mixture similarity measure.},
author = {Kim, Seongho and Fang, Aiqin and Wang, Bing and Jeong, Jaesik and Zhang, Xiang},
pages = {1660-1666},
url = {http://dx.doi.org/10.1093/bioinformatics/btr188},
year = {2011},
month = {jun},
day = {15},
urldate = {2018-01-17},
journal = {Bioinformatics},
volume = {27},
number = {12},
doi = {10.1093/bioinformatics/btr188},
pmid = {21493650},
pmcid = {PMC3106184},
f1000-projects = {shared citations},
abstract = {{MOTIVATION}: Comprehensive two-dimensional gas chromatography mass spectrometry ({GC} × {GC}-{MS}) brings much increased separation capacity, chemical selectivity and sensitivity for metabolomics and provides more accurate information about metabolite retention times and mass spectra. However, there is always a shift of retention times in the two columns that makes it difficult to compare metabolic profiles obtained from multiple samples exposed to different experimental conditions. {RESULTS}: The existing peak alignment algorithms for {GC} × {GC}-{MS} data use the peak distance and the spectra similarity sequentially and require predefined either distance-based window and/or spectral similarity-based window. To overcome the limitations of the current alignment methods, we developed an optimal peak alignment using a novel mixture similarity by employing the peak distance and the spectral similarity measures simultaneously without any variation windows. In addition, we examined the effect of the four different distance measures such as Euclidean, Maximum, Manhattan and Canberra distances on the peak alignment. The performance of our proposed peak alignment algorithm was compared with the existing alignment methods on the two sets of {GC} × {GC}-{MS} data. Our analysis showed that Canberra distance performed better than other distances and the proposed mixture similarity peak alignment algorithm prevailed against all literature reported methods. {AVAILABILITY}: The data and software {mSPA} are available at http://stage.louisville.edu/faculty/x0zhan17/software/software-development.}
}
@article{wei_2018,
title = {Missing Value Imputation Approach for Mass Spectrometry-based Metabolomics Data.},
author = {Wei, Runmin and Wang, Jingye and Su, Mingming and Jia, Erik and Chen, Shaoqiu and Chen, Tianlu and Ni, Yan},
pages = {663},
url = {http://dx.doi.org/10.1038/s41598-017-19120-0},
year = {2018},
month = {jan},
day = {12},
urldate = {2018-01-29},
journal = {Scientific reports},
volume = {8},
number = {1},
doi = {10.1038/s41598-017-19120-0},
pmid = {29330539},
pmcid = {PMC5766532},
f1000-projects = {shared citations},
abstract = {Missing values exist widely in mass-spectrometry ({MS}) based metabolomics data. Various methods have been applied for handling missing values, but the selection can significantly affect following data analyses. Typically, there are three types of missing values, missing not at random ({MNAR}), missing at random ({MAR}), and missing completely at random ({MCAR}). Our study comprehensively compared eight imputation methods (zero, half minimum ({HM}), mean, median, random forest ({RF}), singular value decomposition ({SVD}), k-nearest neighbors ({kNN}), and quantile regression imputation of left-censored data ({QRILC})) for different types of missing values using four metabolomics datasets. Normalized root mean squared error ({NRMSE}) and {NRMSE}-based sum of ranks ({SOR}) were applied to evaluate imputation accuracy. Principal component analysis ({PCA})/partial least squares ({PLS})-Procrustes analysis were used to evaluate the overall sample distribution. Student's t-test followed by correlation analysis was conducted to evaluate the effects on univariate statistics. Our findings demonstrated that {RF} performed the best for {MCAR}/{MAR} and {QRILC} was the favored one for left-censored {MNAR}. Finally, we proposed a comprehensive strategy and developed a public-accessible web-tool for the application of missing value imputation in metabolomics ( https://metabolomics.cc.hawaii.edu/software/{MetImp}/ ).}
}
@article{dill_2010,
title = {Multivariate statistical differentiation of renal cell carcinomas based on lipidomic analysis by ambient ionization imaging mass spectrometry.},
author = {Dill, Allison L and Eberlin, Livia S and Zheng, Cheng and Costa, Anthony B and Ifa, Demian R and Cheng, Liang and Masterson, Timothy A and Koch, Michael O and Vitek, Olga and Cooks, R Graham},
pages = {2969-2978},
url = {http://dx.doi.org/10.1007/s00216-010-4259-6},
year = {2010},
month = {dec},
urldate = {2018-01-28},
journal = {Analytical and Bioanalytical Chemistry},
volume = {398},
number = {7-8},
doi = {10.1007/s00216-010-4259-6},
pmid = {20953777},
f1000-projects = {shared citations},
abstract = {Desorption electrospray ionization ({DESI}) mass spectrometry ({MS}) was used in an imaging mode to interrogate the lipid profiles of thin tissue sections of 11 sample pairs of human papillary renal cell carcinoma ({RCC}) and adjacent normal tissue and nine sample pairs of clear cell {RCC} and adjacent normal tissue. {DESI}-{MS} images showing the spatial distributions of particular glycerophospholipids ({GPs}) and free fatty acids in the negative ion mode were compared to serial tissue sections stained with hematoxylin and eosin (H\&E). Increased absolute intensities as well as changes in relative abundance were seen for particular compounds in the tumor regions of the samples. Multivariate statistical analysis using orthogonal projection to latent structures treated partial least square discriminate analysis ({PLS}-{DA}) was used for visualization and classification of the tissue pairs using the full mass spectra as predictors. {PLS}-{DA} successfully distinguished tumor from normal tissue for both papillary and clear cell {RCC} with misclassification rates obtained from the validation set of 14.3\% and 7.8\%, respectively. It was also used to distinguish papillary and clear cell {RCC} from each other and from the combined normal tissues with a reasonable misclassification rate of 23\%, as determined from the validation set. Overall {DESI}-{MS} imaging combined with multivariate statistical analysis shows promise as a molecular pathology technique for diagnosing cancerous and normal tissue on the basis of {GP} profiles.}
}
@article{murrell_2015,
title = {Chemically Aware Model Builder (camb): an R package for property and bioactivity modelling of small molecules.},
author = {Murrell, Daniel S and Cortes-Ciriano, Isidro and van Westen, Gerard J P and Stott, Ian P and Bender, Andreas and Malliavin, Thérèse E and Glen, Robert C},
pages = {45},
url = {http://dx.doi.org/10.1186/s13321-015-0086-2},
year = {2015},
month = {aug},
day = {28},
urldate = {2018-01-29},
journal = {Journal of cheminformatics},
volume = {7},
doi = {10.1186/s13321-015-0086-2},
pmid = {26322135},
pmcid = {PMC4551546},
f1000-projects = {shared citations},
abstract = {{BACKGROUND}: In silico predictive models have proved to be valuable for the optimisation of compound potency, selectivity and safety profiles in the drug discovery process. {RESULTS}: camb is an R package that provides an environment for the rapid generation of quantitative Structure-Property and Structure-Activity models for small molecules (including {QSAR}, {QSPR}, {QSAM}, {PCM}) and is aimed at both advanced and beginner R users. camb's capabilities include the standardisation of chemical structure representation, computation of 905 one-dimensional and 14 fingerprint type descriptors for small molecules, 8 types of amino acid descriptors, 13 whole protein sequence descriptors, filtering methods for feature selection, generation of predictive models (using an interface to the R package caret), as well as techniques to create model ensembles using techniques from the R package {caretEnsemble}). Results can be visualised through high-quality, customisable plots (R package ggplot2). {CONCLUSIONS}: Overall, camb constitutes an open-source framework to perform the following steps: (1) compound standardisation, (2) molecular and protein descriptor calculation, (3) descriptor pre-processing and model training, visualisation and validation, and (4) bioactivity/property prediction for new molecules. camb aims to speed model generation, in order to provide reproducibility and tests of robustness. {QSPR} and proteochemometric case studies are included which demonstrate camb's application.Graphical {abstractFrom} compounds and data to models: a complete model building workflow in one package.}
}
@article{feng_2018,
title = {Antibiotic resistome in a large-scale healthy human gut microbiota deciphered by metagenomic and network analyses.},
author = {Feng, Jie and Li, Bing and Jiang, Xiaotao and Yang, Ying and Wells, George F and Zhang, Tong and Li, Xiaoyan},
pages = {355-368},
url = {http://dx.doi.org/10.1111/1462-2920.14009},
year = {2018},
urldate = {2019-07-01},
journal = {Environmental Microbiology},
volume = {20},
number = {1},
doi = {10.1111/1462-2920.14009},
pmid = {29194931},
f1000-projects = {shared citations},
abstract = {The human gut microbiota is an important reservoir of antibiotic resistance genes ({ARGs}). A metagenomic approach and network analysis were used to establish a comprehensive antibiotic resistome catalog and to obtain co-occurrence patterns between {ARGs} and microbial taxa in fecal samples from 180 healthy individuals from 11 different countries. In total, 507 {ARG} subtypes belonging to 20 {ARG} types were detected with abundances ranging from 7.12 × 10-7 to 2.72 × 10-1 copy of {ARG}/copy of {16S}-{rRNA} gene. Tetracycline, multidrug, macrolide-lincosamide-streptogramin, bacitracin, vancomycin, beta-lactam and aminoglycoside resistance genes were the top seven most abundant {ARG} types. The multidrug {ABC} transporter, {aadE}, {bacA}, {acrB}, {tetM}, {tetW}, {vanR} and {vanS} were shared by all 180 individuals, suggesting their common occurrence in the human gut. Compared to populations from the other 10 countries, the Chinese population harboured the most abundant {ARGs}. Moreover, {LEfSe} analysis suggested that the {MLS} resistance type and its subtype '{ermF}' were representative {ARGs} of the Chinese population. Antibiotic inactivation, antibiotic target alteration and antibiotic efflux were the dominant resistance mechanism categories in all populations. Procrustes analysis revealed that microbial phylogeny structured the antibiotic resistome. Co-occurrence patterns obtained via network analysis implied that 12 species might be potential hosts of 58 {ARG} subtypes. \copyright 2017 Society for Applied Microbiology and John Wiley \& Sons Ltd.}
}
@incollection{erxleben_2014,
booktitle = {The semantic web – {ISWC} 2014},
title = {Introducing wikidata to the linked data web},
author = {Erxleben, Fredo and Günther, Michael and Krötzsch, Markus and Mendez, Julian and Vrandečić, Denny},
editor = {Mika, Peter and Tudorache, Tania and Bernstein, Abraham and Welty, Chris and Knoblock, Craig and Vrandečić, Denny and Groth, Paul and Noy, Natasha and Janowicz, Krzysztof and Goble, Carole},
series = {Lecture notes in computer science},
pages = {50-65},
publisher = {Springer International Publishing},
url = {http://link.springer.com/10.1007/978-3-319-11964-9\_4},
year = {2014},
urldate = {2018-01-29},
volume = {8796},
isbn = {978-3-319-11963-2},
issn = {0302-9743},
doi = {10.1007/978-3-319-11964-9\_4},
address = {Cham},
f1000-projects = {shared citations}
}
@article{pence_2010,
title = {Chemspider: an online chemical information resource},
author = {Pence, Harry E. and Williams, Antony},
pages = {1123-1124},
url = {http://pubs.acs.org/doi/abs/10.1021/ed100697w},
year = {2010},
month = {nov},
urldate = {2018-01-29},
journal = {Journal of chemical education},
volume = {87},
number = {11},
issn = {0021-9584},
doi = {10.1021/ed100697w},
f1000-projects = {shared citations}
}
@article{bond_2017,
title = {{massPix}: an R package for annotation and interpretation of mass spectrometry imaging data for lipidomics.},
author = {Bond, Nicholas J and Koulman, Albert and Griffin, Julian L and Hall, Zoe},
pages = {128},
url = {http://dx.doi.org/10.1007/s11306-017-1252-5},
year = {2017},
month = {sep},
day = {21},
urldate = {2018-01-29},
journal = {Metabolomics : Official journal of the Metabolomic Society},
volume = {13},
number = {11},
doi = {10.1007/s11306-017-1252-5},
pmid = {28989334},
pmcid = {PMC5608769},
f1000-projects = {shared citations},
abstract = {{INTRODUCTION}: Mass spectrometry imaging ({MSI}) experiments result in complex multi-dimensional datasets, which require specialist data analysis tools. {OBJECTIVES}: We have developed {massPix}-an R package for analysing and interpreting data from {MSI} of lipids in tissue. {METHODS}: {massPix} produces single ion images, performs multivariate statistics and provides putative lipid annotations based on accurate mass matching against generated lipid libraries. {RESULTS}: Classification of tissue regions with high spectral similarly can be carried out by principal components analysis ({PCA}) or k-means clustering. {CONCLUSION}: {massPix} is an open-source tool for the analysis and statistical interpretation of {MSI} data, and is particularly useful for lipidomics applications.}
}
@article{edmands_2017a,
title = {{simExTargId}: An R package for real-time {LC}-{MS} metabolomic data analysis, instrument failure/drift notification and {MS2} target identification},
author = {Edmands, William M. B. and Rappaport, Stephen M.},
url = {http://biorxiv.org/lookup/doi/10.1101/151159},
year = {2017},
month = {jun},
day = {16},
urldate = {2018-01-29},
journal = {BioRxiv},
doi = {10.1101/151159},
f1000-projects = {shared citations},
abstract = {The {simExTargId} R package provides real-time, autonomous, within-laboratory data analysis during a metabolomic {LC}-{MS1}-profiling experiment. Of concern to metabolomic investigators are instrumentation failure (especially for precious samples), outlier identification, instrument signal attenuation and pre-emptive feature identification for {MS2} fragmentation. {SimExTargId} allows observation of an experiment in progress with {PCA} plot and peak table outputs and also two shiny applications {targetId} for {MS2} target identification and {peakMonitor} for signal attenuation monitoring. {SimExTargId} is ideally utilised on a (temporarily) dedicated work-station or server which is networked to a {LC}-{MS} data directory. Features include: email notification for instrument stoppage/drift, file format conversion, peak-picking, pre-processing, {PCA}-based out-lier identification and statistical analysis. Additional {MS1}/{MS2} experiments can be concatenated to a worklist or cleaning/recalibration undertaken if instrument drift is observed. All source code and a vignette with example data are available on {GitHub} https://github.com/{WMBEdmands}/{simExTargId}.}
}
@article{rodriguezmartinez_2018,
title = {{MWASTools}: an R/bioconductor package for metabolome-wide association studies.},
author = {Rodriguez-Martinez, Andrea and Posma, Joram M and Ayala, Rafael and Neves, Ana L and Anwar, Maryam and Petretto, Enrico and Emanueli, Costanza and Gauguier, Dominique and Nicholson, Jeremy K and Dumas, Marc-Emmanuel},
pages = {890-892},
url = {http://dx.doi.org/10.1093/bioinformatics/btx477},
year = {2018},
month = {mar},
day = {1},
urldate = {2018-01-29},
journal = {Bioinformatics},
volume = {34},
number = {5},
doi = {10.1093/bioinformatics/btx477},
pmid = {28961702},
pmcid = {PMC6049002},
f1000-projects = {shared citations},
abstract = {Summary: {MWASTools} is an R package designed to provide an integrated pipeline to analyse metabonomic data in large-scale epidemiological studies. Key functionalities of our package include: quality control analysis; metabolome-wide association analysis using various models (partial correlations, generalized linear models); visualization of statistical outcomes; metabolite assignment using statistical total correlation spectroscopy ({STOCSY}); and biological interpretation of metabolome-wide association studies results. Availability and implementation: The {MWASTools} R package is implemented in R (version  \textgreater =3.4) and is available from Bioconductor: https://bioconductor.org/packages/{MWASTools}/. Contact: m.dumas@imperial.ac.uk. Supplementary information: Supplementary data are available at Bioinformatics online. \copyright The Author(s) 2017. Published by Oxford University Press.}
}
@article{delcarratore_2017,
title = {{RankProd} 2.0: a refactored bioconductor package for detecting differentially expressed features in molecular profiling datasets.},
author = {Del Carratore, Francesco and Jankevics, Andris and Eisinga, Rob and Heskes, Tom and Hong, Fangxin and Breitling, Rainer},
pages = {2774-2775},
url = {http://dx.doi.org/10.1093/bioinformatics/btx292},
year = {2017},
month = {sep},
day = {1},
urldate = {2018-01-29},
journal = {Bioinformatics},
volume = {33},
number = {17},
doi = {10.1093/bioinformatics/btx292},
pmid = {28481966},
pmcid = {PMC5860065},
f1000-projects = {shared citations},
abstract = {Motivation: The Rank Product ({RP}) is a statistical technique widely used to detect differentially expressed features in molecular profiling experiments such as transcriptomics, metabolomics and proteomics studies. An implementation of the {RP} and the closely related Rank Sum ({RS}) statistics has been available in the {RankProd} Bioconductor package for several years. However, several recent advances in the understanding of the statistical foundations of the method have made a complete refactoring of the existing package desirable. Results: We implemented a completely refactored version of the {RankProd} package, which provides a more principled implementation of the statistics for unpaired datasets. Moreover, the permutation-based P -value estimation methods have been replaced by exact methods, providing faster and more accurate results. Availability and implementation: {RankProd} 2.0 is available at Bioconductor ( https://www.bioconductor.org/packages/devel/bioc/html/{RankProd}.html ) and as part of the {mzMatch} pipeline ( http://www.mzmatch.sourceforge.net ). Contact: rainer.breitling@manchester.ac.uk. Supplementary information: Supplementary data are available at Bioinformatics online. \copyright The Author(s) 2017. Published by Oxford University Press.}
}
@article{ferrazza_2017,
title = {{IsotopicLabelling}: an R package for the analysis of {MS} isotopic patterns of labelled analytes.},
author = {Ferrazza, Ruggero and Griffin, Julian L and Guella, Graziano and Franceschi, Pietro},
pages = {300-302},
url = {http://dx.doi.org/10.1093/bioinformatics/btw588},
year = {2017},
month = {jan},
day = {15},
urldate = {2018-01-29},
journal = {Bioinformatics},
volume = {33},
number = {2},
doi = {10.1093/bioinformatics/btw588},
pmid = {27634947},
f1000-projects = {shared citations},
abstract = {{MOTIVATION}: Labelling experiments in biology usually make use of isotopically enriched substrates, with the two most commonly employed isotopes for metabolism being {2H} and {13C}. At the end of the experiment some metabolites will have incorporated the labelling isotope, to a degree that depends on the metabolic turnover. In order to propose a meaningful biological interpretation, it is necessary to estimate the amount of labelling, and one possible route is to exploit the fact that {MS} isotopic patterns reflect the isotopic distributions. {RESULTS}: We developed the {IsotopicLabelling} R package, a tool able to extract and analyze isotopic patterns from liquid chromatography-mass spectrometry ({LC}-{MS}) and gas chromatography-{MS} ({GC}-{MS}) data relative to labelling experiments. This package estimates the isotopic abundance of the employed stable isotope (either {2H} or {13C}) within a specified list of analytes. {AVAILABILITY} {AND} {IMPLEMENTATION}: The {IsotopicLabelling} R package is freely available at https://github.com/{RuggeroFerrazza}/{IsotopicLabelling} {CONTACTS}: r.ferrazza@unitn.{itSupplementary} information: Supplementary data are available at Bioinformatics online. \copyright The Author 2016. Published by Oxford University Press. All rights reserved. For Permissions, please e-mail: journals.permissions@oup.com.}
}
@article{wishart_2018,
title = {{HMDB} 4.0: the human metabolome database for 2018.},
author = {Wishart, David S and Feunang, Yannick Djoumbou and Marcu, Ana and Guo, An Chi and Liang, Kevin and Vázquez-Fresno, Rosa and Sajed, Tanvir and Johnson, Daniel and Li, Carin and Karu, Naama and Sayeeda, Zinat and Lo, Elvis and Assempour, Nazanin and Berjanskii, Mark and Singhal, Sandeep and Arndt, David and Liang, Yonjie and Badran, Hasan and Grant, Jason and Serra-Cayuela, Arnau and Liu, Yifeng and Mandal, Rupa and Neveu, Vanessa and Pon, Allison and Knox, Craig and Wilson, Michael and Manach, Claudine and Scalbert, Augustin},
pages = {D608-D617},
url = {http://dx.doi.org/10.1093/nar/gkx1089},
year = {2018},
month = {jan},
day = {4},
urldate = {2019-07-23},
journal = {Nucleic Acids Research},
volume = {46},
number = {D1},
doi = {10.1093/nar/gkx1089},
pmid = {29140435},
pmcid = {PMC5753273},
f1000-projects = {shared citations},
abstract = {The Human Metabolome Database or {HMDB} (www.hmdb.ca) is a web-enabled metabolomic database containing comprehensive information about human metabolites along with their biological roles, physiological concentrations, disease associations, chemical reactions, metabolic pathways, and reference spectra. First described in 2007, the {HMDB} is now considered the standard metabolomic resource for human metabolic studies. Over the past decade the {HMDB} has continued to grow and evolve in response to emerging needs for metabolomics researchers and continuing changes in web standards. This year's update, {HMDB} 4.0, represents the most significant upgrade to the database in its history. For instance, the number of fully annotated metabolites has increased by nearly threefold, the number of experimental spectra has grown by almost fourfold and the number of illustrated metabolic pathways has grown by a factor of almost 60. Significant improvements have also been made to the {HMDB}'s chemical taxonomy, chemical ontology, spectral viewing, and spectral/text searching tools. A great deal of brand new data has also been added to {HMDB} 4.0. This includes large quantities of predicted {MS}/{MS} and {GC}-{MS} reference spectral data as well as predicted (physiologically feasible) metabolite structures to facilitate novel metabolite identification. Additional information on metabolite-{SNP} interactions and the influence of drugs on metabolite levels (pharmacometabolomics) has also been added. Many other important improvements in the content, the interface, and the performance of the {HMDB} website have been made and these should greatly enhance its ease of use and its potential applications in nutrition, biochemistry, clinical chemistry, clinical genetics, medicine, and metabolomics science. \copyright The Author(s) 2017. Published by Oxford University Press on behalf of Nucleic Acids Research.}
}
@article{mylonas_2009,
title = {X-Rank: a robust algorithm for small molecule identification using tandem mass spectrometry.},
author = {Mylonas, Roman and Mauron, Yann and Masselot, Alexandre and Binz, Pierre-Alain and Budin, Nicolas and Fathi, Marc and Viette, Véronique and Hochstrasser, Denis F and Lisacek, Frederique},
pages = {7604-7610},
url = {http://dx.doi.org/10.1021/ac900954d},
year = {2009},
month = {sep},
day = {15},
urldate = {2018-01-30},
journal = {Analytical Chemistry},
volume = {81},
number = {18},
doi = {10.1021/ac900954d},
pmid = {19702277},
f1000-projects = {shared citations},
abstract = {The diversity of experimental workflows involving {LC}-{MS}/{MS} and the extended range of mass spectrometers tend to produce extremely variable spectra. Variability reduces the accuracy of compound identification produced by commonly available software for a spectral library search. We introduce here a new algorithm that successfully matches {MS}/{MS} spectra generated by a range of instruments, acquired under different conditions. Our algorithm called X-Rank first sorts peak intensities of a spectrum and second establishes a correlation between two sorted spectra. X-Rank then computes the probability that a rank from an experimental spectrum matches a rank from a reference library spectrum. In a training step, characteristic parameter values are generated for a given data set. We compared the efficiency of the X-Rank algorithm with the dot-product algorithm implemented by {MS} Search from the National Institute of Standards and Technology ({NIST}) on two test sets produced with different instruments. Overall the X-Rank algorithm accurately discriminates correct from wrong matches and detects more correct substances than the {MS} Search. Furthermore, X-Rank could correctly identify and top rank eight chemical compounds in a commercially available test mix. This confirms the ability of the algorithm to perform both a straight single-platform identification and a cross-platform library search in comparison to other tools. It also opens the possibility for efficient general unknown screening ({GUS}) against large compound libraries.}
}
@article{strozier_2016,
title = {Use of Comprehensive Two-Dimensional Gas Chromatography with Time-of-Flight Mass Spectrometric Detection and Random Forest Pattern Recognition Techniques for Classifying Chemical Threat Agents and Detecting Chemical Attribution Signatures.},
author = {Strozier, Erich D and Mooney, Douglas D and Friedenberg, David A and Klupinski, Theodore P and Triplett, Cheryl A},
pages = {7068-7075},
url = {http://dx.doi.org/10.1021/acs.analchem.6b00725},
year = {2016},
month = {jul},
day = {19},
urldate = {2018-01-30},
journal = {Analytical Chemistry},
volume = {88},
number = {14},
doi = {10.1021/acs.analchem.6b00725},
pmid = {27295356},
f1000-projects = {shared citations},
abstract = {In this proof of concept study, chemical threat agent ({CTA}) samples were classified to their sources with accuracies of 87-100\% by applying a random forest statistical pattern recognition technique to analytical data acquired by comprehensive two-dimensional gas chromatography with time-of-flight mass spectrometric detection ({GC} × {GC}-{TOFMS}). Three organophosphate pesticides, chlorpyrifos, dichlorvos, and dicrotophos, were used as the model {CTAs}, with data collected for 4-6 sources per {CTA} and 7-10 replicate analyses per source. The analytical data were also evaluated to determine tentatively identified chemical attribution signatures for the {CTAs} by comparing samples from different sources according to either the presence/absence of peaks or the relative responses of peaks. These results demonstrate that {GC} × {GC}-{TOFMS} analysis in combination with a random forest technique can be useful in sample classification and signature identification for pesticides. Furthermore, the results suggest that this combination of analytical chemistry and statistical approaches can be applied to forensic analysis of other chemicals for similar purposes.}
}
@article{gromski_2014,
title = {A comparative investigation of modern feature selection and classification approaches for the analysis of mass spectrometry data.},
author = {Gromski, Piotr S and Xu, Yun and Correa, Elon and Ellis, David I and Turner, Michael L and Goodacre, Royston},
pages = {1-8},
url = {http://dx.doi.org/10.1016/j.aca.2014.03.039},
year = {2014},
month = {jun},
day = {4},
urldate = {2018-01-30},
journal = {Analytica Chimica Acta},
volume = {829},
doi = {10.1016/j.aca.2014.03.039},
pmid = {24856395},
f1000-projects = {shared citations},
abstract = {Many analytical approaches such as mass spectrometry generate large amounts of data (input variables) per sample analysed, and not all of these variables are important or related to the target output of interest. The selection of a smaller number of variables prior to sample classification is a widespread task in many research studies, where attempts are made to seek the lowest possible set of variables that are still able to achieve a high level of prediction accuracy; in other words, there is a need to generate the most parsimonious solution when the number of input variables is huge but the number of samples/objects are smaller. Here, we compare several different variable selection approaches in order to ascertain which of these are ideally suited to achieve this goal. All variable selection approaches were applied to the analysis of a common set of metabolomics data generated by Curie-point pyrolysis mass spectrometry (Py-{MS}), where the goal of the study was to classify the Gram-positive bacteria Bacillus. These approaches include stepwise forward variable selection, used for linear discriminant analysis ({LDA}); variable importance for projection ({VIP}) coefficient, employed in partial least squares-discriminant analysis ({PLS}-{DA}); support vector machines-recursive feature elimination ({SVM}-{RFE}); as well as the mean decrease in accuracy and mean decrease in Gini, provided by random forests ({RF}). Finally, a double cross-validation procedure was applied to minimize the consequence of overfitting. The results revealed that {RF} with its variable selection techniques and {SVM} combined with {SVM}-{RFE} as a variable selection method, displayed the best results in comparison to other approaches. Copyright \copyright 2014. Published by Elsevier B.V.}
}
@article{caueto_2018,
title = {{rDolphin}: a {GUI} R package for proficient automatic profiling of {1D} {1H}-{NMR} spectra of study datasets.},
author = {Cañueto, Daniel and Gómez, Josep and Salek, Reza M and Correig, Xavier and Cañellas, Nicolau},
pages = {24},
url = {http://link.springer.com/10.1007/s11306-018-1319-y},
year = {2018},
month = {jan},
day = {31},
urldate = {2018-02-03},
journal = {Metabolomics : Official journal of the Metabolomic Society},
volume = {14},
number = {3},
issn = {1573-3882},
doi = {10.1007/s11306-018-1319-y},
pmid = {30830320},
f1000-projects = {shared citations},
abstract = {{INTRODUCTION}: Adoption of automatic profiling tools for {1H}-{NMR}-based metabolomic studies still lags behind other approaches in the absence of the flexibility and interactivity necessary to adapt to the properties of study data sets of complex matrices. {OBJECTIVES}: To provide an open source tool that fully integrates these needs and enables the reproducibility of the profiling process. {METHODS}: {rDolphin} incorporates novel techniques to optimize exploratory analysis, metabolite identification, and validation of profiling output quality. {RESULTS}: The information and quality achieved in two public datasets of complex matrices are maximized. {CONCLUSION}: {rDolphin} is an open-source R package ( http://github.com/danielcanueto/{rDolphin} ) able to provide the best balance between accuracy, reproducibility and ease of use.}
}
@article{domingoalmenara_2017,
title = {Baitmet, a computational approach for {GC}–{MS} library-driven metabolite profiling},
author = {Domingo-Almenara, Xavier and Brezmes, Jesus and Venturini, Gabriela and Vivó-Truyols, Gabriel and Perera, Alexandre and Vinaixa, Maria},
pages = {93},
url = {http://link.springer.com/10.1007/s11306-017-1223-x},
year = {2017},
month = {aug},
urldate = {2018-02-20},
journal = {Metabolomics : Official journal of the Metabolomic Society},
volume = {13},
number = {8},
issn = {1573-3882},
doi = {10.1007/s11306-017-1223-x},
f1000-projects = {shared citations},
abstract = {Introduction Current computational tools for gas chromatography\textemdashmass spectrometry ({GC}–{MS}) metabolomics profiling do not focus on metabolite identification, that still remains as the entire workflow bottleneck and it relies on manual data reviewing. Metabolomics advent has fostered the development of public metabolite repositories containing mass spectra and retention indices, two orthogonal properties needed for metabolite identification. Such libraries can be used for library-driven compound profiling of large datasets produced in metabolomics, a complementary approach to current {GC}–{MS} non-targeted data analysis solutions that can eventually help to assess metabolite identities more efficiently.}
}
@article{josse_2016,
title = {missmda : A package for handling missing values in multivariate data analysis},
author = {Josse, Julie and Husson, François},
pages = {1-31},
url = {http://www.jstatsoft.org/v70/i01/},
year = {2016},
urldate = {2019-07-30},
journal = {Journal of statistical software},
volume = {70},
number = {1},
issn = {1548-7660},
doi = {10.18637/jss.v070.i01},
f1000-projects = {shared citations},
abstract = {We present the R package {missMDA} which performs principal component methods on incomplete data sets, aiming to obtain scores, loadings and graphical representations despite missing values. Package methods include principal component analysis for continuous variables, multiple correspondence analysis for categorical variables, factorial analysis on mixed data for both continuous and categorical variables, and multiple factor analysis for multi-table data. Furthermore, {missMDA} can be used to perform single imputation to complete data involving continuous, categorical and mixed variables. A multiple imputation method is also available. In the principal component analysis framework, variability across different imputations is represented by confidence areas around the row and column positions on the graphical outputs. This allows assessment of the credibility of results obtained from incomplete data sets.}
}
@article{murtagh_2012,
title = {Algorithms for hierarchical clustering: an overview},
author = {Murtagh, Fionn and Contreras, Pedro},
pages = {86-97},
url = {http://doi.wiley.com/10.1002/widm.53},
year = {2012},
month = {jan},
urldate = {2019-07-01},
journal = {Wiley Interdisciplinary Reviews: Data Mining and Knowledge Discovery},
volume = {2},
number = {1},
issn = {19424787},
doi = {10.1002/widm.53},
f1000-projects = {shared citations}
}
@article{myint_2017,
title = {Joint Bounding of Peaks Across Samples Improves Differential Analysis in Mass Spectrometry-Based Metabolomics.},
author = {Myint, Leslie and Kleensang, Andre and Zhao, Liang and Hartung, Thomas and Hansen, Kasper D},
pages = {3517-3523},
url = {http://dx.doi.org/10.1021/acs.analchem.6b04719},
year = {2017},
month = {mar},
day = {21},
urldate = {2019-02-19},
journal = {Analytical Chemistry},
volume = {89},
number = {6},
doi = {10.1021/acs.analchem.6b04719},
pmid = {28221771},
pmcid = {PMC5362739},
f1000-projects = {shared citations},
abstract = {As mass spectrometry-based metabolomics becomes more widely used in biomedical research, it is important to revisit existing data analysis paradigms. Existing data preprocessing efforts have largely focused on methods which start by extracting features separately from each sample, followed by a subsequent attempt to group features across samples to facilitate comparisons. We show that this preprocessing approach leads to unnecessary variability in peak quantifications that adversely impacts downstream analysis. We present a new method, bakedpi, for the preprocessing of both centroid and profile mode metabolomics data that relies on an intensity-weighted bivariate kernel density estimation on a pooling of all samples to detect peaks. This new method reduces this unnecessary quantification variability and increases power in downstream differential analysis.}
}
@article{li_2018,
title = {{polyPK}: an R package for pharmacokinetic analysis of multi-component drugs using a metabolomics approach.},
author = {Li, Mengci and Wang, Shouli and Xie, Guoxiang and Ma, Xiaohui and Chen, Tianlu and Jia, Wei},
pages = {1792-1794},
url = {http://dx.doi.org/10.1093/bioinformatics/btx834},
year = {2018},
month = {may},
day = {15},
urldate = {2019-09-04},
journal = {Bioinformatics},
volume = {34},
number = {10},
doi = {10.1093/bioinformatics/btx834},
pmid = {29293946},
f1000-projects = {shared citations},
abstract = {Summary: Pharmacokinetics ({PK}) is a long-standing bottleneck for botanical drug and traditional medicine research. By using an integrated phytochemical and metabolomics approach coupled with multivariate statistical analysis, we propose a new strategy, Poly-{PK}, to simultaneously monitor the performance of drug constituents and endogenous metabolites, taking into account both the diversity of the drug's chemical composition and its complex effects on the mammalian metabolic pathways. Poly-{PK} is independent of specific measurement platforms and has been successfully applied in the {PK} studies of Puerh tea, a traditional Chinese medicine Huangqi decoction and many other multi-component drugs. Here, we introduce an R package, {polyPK}, the first and only automation of the data analysis pipeline of Poly-{PK} strategy. {polyPK} provides 10 functions for data pre-processing, differential compound identification and grouping, traditional {PK} parameters calculation, multivariate statistical analysis, correlations, cluster analyses and resulting visualization. It may serve a wide range of users, including pharmacologists, biologists and doctors, in understanding the metabolic fate of multi-component drugs. Availability and implementation: {polyPK} package is freely available from the R archive {CRAN} (https://{CRAN}.R-project.org/package={polyPK}). Contact: wjia@cc.hawaii.edu or chentianlu@sjtu.edu.cn. Supplementary information: Supplementary data are available at Bioinformatics online.}
}
@article{honaker_2011,
title = {amelia {II}: A program for missing data},
author = {Honaker, James and King, Gary and Blackwell, Matthew},
url = {http://www.jstatsoft.org/v45/i07/},
year = {2011},
urldate = {2019-07-30},
journal = {Journal of statistical software},
volume = {45},
number = {7},
issn = {1548-7660},
doi = {10.18637/jss.v045.i07},
f1000-projects = {shared citations}
}
@article{castillo_2011,
title = {Data analysis tool for comprehensive two-dimensional gas chromatography/time-of-flight mass spectrometry.},
author = {Castillo, Sandra and Mattila, Ismo and Miettinen, Jarkko and Orešič, Matej and Hyötyläinen, Tuulia},
pages = {3058-3067},
url = {http://dx.doi.org/10.1021/ac103308x},
year = {2011},
month = {apr},
day = {15},
urldate = {2019-05-28},
journal = {Analytical Chemistry},
volume = {83},
number = {8},
doi = {10.1021/ac103308x},
pmid = {21434611},
f1000-projects = {shared citations},
abstract = {Data processing and identification of unknown compounds in comprehensive two-dimensional gas chromatography combined with time-of-flight mass spectrometry ({GC}×{GC}/{TOFMS}) analysis is a major challenge, particularly when large sample sets are analyzed. Herein, we present a method for efficient treatment of large data sets produced by {GC}×{GC}/{TOFMS} implemented as a freely available open source software package, Guineu. To handle large data sets and to efficiently utilize all the features available in the vendor software (baseline correction, mass spectral deconvolution, peak picking, integration, library search, and signal-to-noise filtering), data preprocessed by instrument software are used as a starting point for further processing. Our software affords alignment of the data, normalization, data filtering, and utilization of retention indexes in the verification of identification as well as a novel tool for automated group-type identification of the compounds. Herein, different features of the software are studied in detail and the performance of the system is verified by the analysis of a large set of standard samples as well as of a large set of authentic biological samples, including the control samples. The quantitative features of our {GC}×{GC}/{TOFMS} methodology are also studied to further demonstrate the method performance and the experimental results confirm the reliability of the developed procedure. The methodology has already been successfully used for the analysis of several thousand samples in the field of metabolomics. \copyright 2011 American Chemical Society}
}
@article{naake_2017,
title = {{MetCirc}: navigating mass spectral similarity in high-resolution {MS}/{MS} metabolomics data.},
author = {Naake, Thomas and Gaquerel, Emmanuel},
pages = {2419-2420},
url = {http://dx.doi.org/10.1093/bioinformatics/btx159},
year = {2017},
month = {aug},
day = {1},
urldate = {2019-04-26},
journal = {Bioinformatics},
volume = {33},
number = {15},
doi = {10.1093/bioinformatics/btx159},
pmid = {28402393},
f1000-projects = {shared citations},
abstract = {Summary: Among the main challenges in metabolomics are the rapid dereplication of previously characterized metabolites across a range of biological samples and the structural prediction of unknowns from {MS}/{MS} data. Here, we developed {MetCirc} to comprehensively align and calculate pairwise similarity scores among {MS}/{MS} spectral data and visualize these across a range of biological samples. {MetCirc} comprises functionalities to interactively organize these data according to compound familial groupings and to accelerate the discovery of shared metabolites and hypothesis formulation for unknowns. As such, {MetCirc} provides a significant advance to address biological questions in areas where chemodiversity plays a role. Availability and Implementation: {MetCirc} , implemented in the open-source R language, together with its vignette are available in the Bioconductor project and at https://github.com/{PlantDefenseMetabolism}/{MetCirc} . Contact: thomasnaake@googlemail.com or emmanuel.gaquerel@cos.uni-heidelberg.de. Supplementary information: Supplementary data are available at Bioinformatics online. \copyright The Author (2017). Published by Oxford University Press. All rights reserved. For Permissions, please email: journals.permissions@oup.com}
}
@article{blaenovi_2018,
title = {Software Tools and Approaches for Compound Identification of {LC}-{MS}/{MS} Data in Metabolomics.},
author = {Blaženović, Ivana and Kind, Tobias and Ji, Jian and Fiehn, Oliver},
url = {http://dx.doi.org/10.3390/metabo8020031},
year = {2018},
month = {may},
day = {10},
urldate = {2019-04-26},
journal = {Metabolites},
volume = {8},
number = {2},
doi = {10.3390/metabo8020031},
pmid = {29748461},
pmcid = {PMC6027441},
f1000-projects = {shared citations},
abstract = {The annotation of small molecules remains a major challenge in untargeted mass spectrometry-based metabolomics. We here critically discuss structured elucidation approaches and software that are designed to help during the annotation of unknown compounds. Only by elucidating unknown metabolites first is it possible to biologically interpret complex systems, to map compounds to pathways and to create reliable predictive metabolic models for translational and clinical research. These strategies include the construction and quality of tandem mass spectral databases such as the coalition of {MassBank} repositories and investigations of {MS}/{MS} matching confidence. We present in silico fragmentation tools such as {MS}-{FINDER}, {CFM}-{ID}, {MetFrag}, {ChemDistiller} and {CSI}:{FingerID} that can annotate compounds from existing structure databases and that have been used in the {CASMI} (critical assessment of small molecule identification) contests. Furthermore, the use of retention time models from liquid chromatography and the utility of collision cross-section modelling from ion mobility experiments are covered. Workflows and published examples of successfully annotated unknown compounds are included.}
}
@article{peters_2018,
title = {Current Challenges in Plant Eco-Metabolomics.},
author = {Peters, Kristian and Worrich, Anja and Weinhold, Alexander and Alka, Oliver and Balcke, Gerd and Birkemeyer, Claudia and Bruelheide, Helge and Calf, Onno W and Dietz, Sophie and Dührkop, Kai and Gaquerel, Emmanuel and Heinig, Uwe and Kücklich, Marlen and Macel, Mirka and Müller, Caroline and Poeschl, Yvonne and Pohnert, Georg and Ristok, Christian and Rodríguez, Victor Manuel and Ruttkies, Christoph and Schuman, Meredith and Schweiger, Rabea and Shahaf, Nir and Steinbeck, Christoph and Tortosa, Maria and Treutler, Hendrik and Ueberschaar, Nico and Velasco, Pablo and Wei\ss, Brigitte M and Widdig, Anja and Neumann, Steffen and Dam, Nicole M van},
url = {http://dx.doi.org/10.3390/ijms19051385},
year = {2018},
month = {may},
day = {6},
urldate = {2019-04-23},
journal = {International Journal of Molecular Sciences},
volume = {19},
number = {5},
doi = {10.3390/ijms19051385},
pmid = {29734799},
pmcid = {PMC5983679},
f1000-projects = {shared citations},
abstract = {The relatively new research discipline of Eco-Metabolomics is the application of metabolomics techniques to ecology with the aim to characterise biochemical interactions of organisms across different spatial and temporal scales. Metabolomics is an untargeted biochemical approach to measure many thousands of metabolites in different species, including plants and animals. Changes in metabolite concentrations can provide mechanistic evidence for biochemical processes that are relevant at ecological scales. These include physiological, phenotypic and morphological responses of plants and communities to environmental changes and also interactions with other organisms. Traditionally, research in biochemistry and ecology comes from two different directions and is performed at distinct spatiotemporal scales. Biochemical studies most often focus on intrinsic processes in individuals at physiological and cellular scales. Generally, they take a bottom-up approach scaling up cellular processes from spatiotemporally fine to coarser scales. Ecological studies usually focus on extrinsic processes acting upon organisms at population and community scales and typically study top-down and bottom-up processes in combination. Eco-Metabolomics is a transdisciplinary research discipline that links biochemistry and ecology and connects the distinct spatiotemporal scales. In this review, we focus on approaches to study chemical and biochemical interactions of plants at various ecological levels, mainly plant⁻organismal interactions, and discuss related examples from other domains. We present recent developments and highlight advancements in Eco-Metabolomics over the last decade from various angles. We further address the five key challenges: (1) complex experimental designs and large variation of metabolite profiles; (2) feature extraction; (3) metabolite identification; (4) statistical analyses; and (5) bioinformatics software tools and workflows. The presented solutions to these challenges will advance connecting the distinct spatiotemporal scales and bridging biochemistry and ecology.}
}
@article{jacob_2017,
title = {{NMRProcFlow}: a graphical and interactive tool dedicated to {1D} spectra processing for {NMR}-based metabolomics.},
author = {Jacob, D and Deborde, C and Lefebvre, M and Maucourt, M and Moing, A},
pages = {36},
url = {http://dx.doi.org/10.1007/s11306-017-1178-y},
year = {2017},
month = {feb},
day = {17},
urldate = {2019-04-18},
journal = {Metabolomics : Official journal of the Metabolomic Society},
volume = {13},
number = {4},
doi = {10.1007/s11306-017-1178-y},
pmid = {28261014},
pmcid = {PMC5313591},
f1000-projects = {shared citations},
abstract = {{INTRODUCTION}: Concerning {NMR}-based metabolomics, {1D} spectra processing often requires an expert eye for disentangling the intertwined peaks. {OBJECTIVES}: The objective of {NMRProcFlow} is to assist the expert in this task in the best way without requirement of programming skills. {METHODS}: {NMRProcFlow} was developed to be a graphical and interactive {1D} {NMR} ({1H} \& {13C}) spectra processing tool. {RESULTS}: {NMRProcFlow} (http://nmrprocflow.org), dedicated to metabolic fingerprinting and targeted metabolomics, covers all spectra processing steps including baseline correction, chemical shift calibration and alignment. {CONCLUSION}: Biologists and {NMR} spectroscopists can easily interact and develop synergies by visualizing the {NMR} spectra along with their corresponding experimental-factor levels, thus setting a bridge between experimental design and subsequent statistical analyses.}
}
@article{wehrens_2016,
title = {Improved batch correction in untargeted {MS}-based metabolomics.},
author = {Wehrens, Ron and Hageman, Jos A and van Eeuwijk, Fred and Kooke, Rik and Flood, Pádraic J and Wijnker, Erik and Keurentjes, Joost J B and Lommen, Arjen and van Eekelen, Henriëtte D L M and Hall, Robert D and Mumm, Roland and de Vos, Ric C H},
pages = {88},
url = {http://dx.doi.org/10.1007/s11306-016-1015-8},
year = {2016},
month = {mar},
day = {18},
urldate = {2019-04-03},
journal = {Metabolomics : Official journal of the Metabolomic Society},
volume = {12},
doi = {10.1007/s11306-016-1015-8},
pmid = {27073351},
pmcid = {PMC4796354},
f1000-projects = {shared citations},
abstract = {{INTRODUCTION}: Batch effects in large untargeted metabolomics experiments are almost unavoidable, especially when sensitive detection techniques like mass spectrometry ({MS}) are employed. In order to obtain peak intensities that are comparable across all batches, corrections need to be performed. Since non-detects, i.e., signals with an intensity too low to be detected with certainty, are common in metabolomics studies, the batch correction methods need to take these into account. {OBJECTIVES}: This paper aims to compare several batch correction methods, and investigates the effect of different strategies for handling non-detects. {METHODS}: Batch correction methods usually consist of regression models, possibly also accounting for trends within batches. To fit these models quality control samples ({QCs}), injected at regular intervals, can be used. Also study samples can be used, provided that the injection order is properly randomized. Normalization methods, not using information on batch labels or injection order, can correct for batch effects as well. Introducing two easy-to-use quality criteria, we assess the merits of these batch correction strategies using three large {LC}-{MS} and {GC}-{MS} data sets of samples {fromArabidopsis} thaliana. {RESULTS}: The three data sets have very different characteristics, leading to clearly distinct behaviour of the batch correction strategies studied. Explicit inclusion of information on batch and injection order in general leads to very good corrections; when enough {QCs} are available, also general normalization approaches perform well. Several approaches are shown to be able to handle non-detects-replacing them with very small numbers such as zero seems the worst of the approaches considered. {CONCLUSION}: The use of quality control samples for batch correction leads to good results when enough {QCs} are available. If an experiment is properly set up, batch correction using the study samples usually leads to a similar high-quality correction, but has the advantage that more metabolites are corrected. The strategy for handling non-detects is important: choosing small values like zero can lead to suboptimal batch corrections.}
}
@article{ludwig_2010,
title = {Two-dimensional J-resolved {NMR} spectroscopy: review of a key methodology in the metabolomics toolbox.},
author = {Ludwig, Christian and Viant, Mark R},
pages = {22-32},
url = {http://dx.doi.org/10.1002/pca.1186},
year = {2010},
month = {feb},
urldate = {2019-05-01},
journal = {Phytochemical Analysis},
volume = {21},
number = {1},
doi = {10.1002/pca.1186},
pmid = {19904730},
f1000-projects = {shared citations},
abstract = {One-dimensional ({1D}) (1)H {NMR} spectroscopy remains a leading analytical technology in metabolomics. Advantages of this approach include relatively rapid spectral acquisition and {NMR} resonances that provide a direct measure of metabolite concentration based upon a single internal standard. Severe spectral congestion can, however, significantly hinder both metabolite identification and quantification. Two-dimensional (1)H J-resolved ({JRES}) {NMR} spectroscopy retains many of the benefits of {1D} {NMR}, but additionally disperses the overlapping resonances into a second dimension, reducing congestion and increasing metabolite specificity. The usefulness of this approach to metabolomics was first realised six years ago, and since then it has been used in biological, medical and environmental studies of plants and animals. Here we provide a basic introduction to the {2D} {JRES} {NMR} experiment and then discuss strategies for spectral acquisition and processing in the context of metabolomics applications, concluding with some key recommendations: acquisition using a double spin-echo sequence with excitation sculpting; processing using the {SEM} window function, tilting and symmetricising, optionally followed by a skyline projection. Strategies for implementing {JRES} spectroscopy into the metabolomics toolbox are then considered, including its roles in metabolic fingerprinting, metabolite identification and metabolite quantification. Public resources and data standards for {JRES} metabolomics are reviewed. We conclude by evaluating the advantages (e.g. increased spectral dispersion and confidence in metabolite identification; fully automated processing; reduced batch-to-batch variation) and disadvantages (e.g. longer acquisition times; higher technical variability; phase-twisted lineshapes resulting in quantification errors) of {2D} {JRES} {NMR} vs the established {1D} approach for metabolomics. (c) 2009 John Wiley \& Sons, Ltd.}
}
@article{pudakalakatti_2014,
title = {A fast {NMR} method for resonance assignments: application to metabolomics.},
author = {Pudakalakatti, Shivanand M and Dubey, Abhinav and Jaipuria, Garima and Shubhashree, U and Adiga, Satish Kumar and Moskau, Detlef and Atreya, Hanudatta S},
pages = {165-173},
url = {http://dx.doi.org/10.1007/s10858-014-9814-6},
year = {2014},
month = {mar},
urldate = {2019-05-01},
journal = {Journal of Biomolecular {NMR}},
volume = {58},
number = {3},
doi = {10.1007/s10858-014-9814-6},
pmid = {24488481},
f1000-projects = {shared citations},
abstract = {We present a new method for rapid {NMR} data acquisition and assignments applicable to unlabeled ((12)C) or (13)C-labeled biomolecules/organic molecules in general and metabolomics in particular. The method involves the acquisition of three two dimensional ({2D}) {NMR} spectra simultaneously using a dual receiver system. The three spectra, namely: (1) G-matrix Fourier transform ({GFT}) (3,2)D [(13)C, (1)H] {HSQC}-{TOCSY}, (2) {2D} (1)H-(1)H {TOCSY} and (3) {2D} (13)C-(1)H {HETCOR} are acquired in a single experiment and provide mutually complementary information to completely assign individual metabolites in a mixture. The {GFT} (3,2)D [(13)C, (1)H] {HSQC}-{TOCSY} provides {3D} correlations in a reduced dimensionality manner facilitating high resolution and unambiguous assignments. The experiments were applied for complete (1)H and (13)C assignments of a mixture of 21 unlabeled metabolites corresponding to a medium used in assisted reproductive technology. Taken together, the experiments provide time gain of order of magnitudes compared to the conventional data acquisition methods and can be combined with other fast {NMR} techniques such as non-uniform sampling and covariance spectroscopy. This provides new avenues for using multiple receivers and projection {NMR} techniques for high-throughput approaches in metabolomics.}
}
@article{gmez_2014,
title = {Dolphin: a tool for automatic targeted metabolite profiling using {1D} and {2D} (1)H-{NMR} data.},
author = {Gómez, Josep and Brezmes, Jesús and Mallol, Roger and Rodríguez, Miguel A and Vinaixa, Maria and Salek, Reza M and Correig, Xavier and Cañellas, Nicolau},
pages = {7967-7976},
url = {http://dx.doi.org/10.1007/s00216-014-8225-6},
year = {2014},
month = {dec},
urldate = {2019-04-17},
journal = {Analytical and Bioanalytical Chemistry},
volume = {406},
number = {30},
doi = {10.1007/s00216-014-8225-6},
pmid = {25370160},
f1000-projects = {shared citations},
abstract = {One of the main challenges in nuclear magnetic resonance ({NMR}) metabolomics is to obtain valuable metabolic information from large datasets of raw {NMR} spectra in a high throughput, automatic, and reproducible way. To date, established software packages used to match and quantify metabolites in {NMR} spectra remain mostly manually operated, leading to low resolution results and subject to inconsistencies not attributable to the {NMR} technique itself. Here, we introduce a new software package, called Dolphin, able to automatically quantify a set of target metabolites in multiple sample measurements using an approach based on {1D} and {2D} {NMR} techniques to overcome the inherent limitations of {1D} (1)H-{NMR} spectra in metabolomics. Dolphin takes advantage of the {2D} J-resolved {NMR} spectroscopy signal dispersion to avoid inconsistencies in signal position detection, enhancing the reliability and confidence in metabolite matching. Furthermore, in order to improve accuracy in quantification, Dolphin uses {2D} {NMR} spectra to obtain additional information on all neighboring signals surrounding the target metabolite. We have compared the targeted profiling results of Dolphin, recorded from standard biological mixtures, with those of two well established approaches in {NMR} metabolomics. Overall, Dolphin produced more accurate results with the added advantage of being a fully automated and high throughput processing package.}
}
@article{vaughan_2012,
title = {Liquid chromatography-mass spectrometry calibration transfer and metabolomics data fusion.},
author = {Vaughan, Andrew A and Dunn, Warwick B and Allwood, J William and Wedge, David C and Blackhall, Fiona H and Whetton, Anthony D and Dive, Caroline and Goodacre, Royston},
pages = {9848-9857},
url = {http://dx.doi.org/10.1021/ac302227c},
year = {2012},
month = {nov},
day = {20},
urldate = {2019-07-01},
journal = {Analytical Chemistry},
volume = {84},
number = {22},
doi = {10.1021/ac302227c},
pmid = {23072438},
f1000-projects = {shared citations},
abstract = {Metabolic profiling is routinely performed on multiple analytical platforms to increase the coverage of detected metabolites, and it is often necessary to distribute biological and clinical samples from a study between instruments of the same type to share the workload between different laboratories. The ability to combine metabolomics data arising from different sources is therefore of great interest, particularly for large-scale or long-term studies, where samples must be analyzed in separate blocks. This is not a trivial task, however, due to differing data structures, temporal variability, and instrumental drift. In this study, we employed blood serum and plasma samples collected from 29 subjects diagnosed with small cell lung cancer and analyzed each sample on two liquid chromatography-mass spectrometry ({LC}-{MS}) platforms. We describe a method for mapping retention times and matching metabolite features between platforms and approaches for fusing data acquired from both instruments. Calibration transfer models were developed and shown to be successful at mapping the response of one {LC}-{MS} instrument to another (Procrustes dissimilarity = 0.04; Mantel correlation = 0.95), allowing us to merge the data from different samples analyzed on different instruments. Data fusion was assessed in a clinical context by comparing the correlation of each metabolite with subject survival time in both the original and fused data sets: a simple autoscaling procedure (Pearson's R = 0.99) was found to improve upon a calibration transfer method based on partial least-squares regression (R = 0.94).}
}
@article{argelaguet_2018,
title = {Multi-Omics Factor Analysis-a framework for unsupervised integration of multi-omics data sets.},
author = {Argelaguet, Ricard and Velten, Britta and Arnol, Damien and Dietrich, Sascha and Zenz, Thorsten and Marioni, John C and Buettner, Florian and Huber, Wolfgang and Stegle, Oliver},
pages = {e8124},
url = {http://msb.embopress.org/lookup/doi/10.15252/msb.20178124},
year = {2018},
month = {jun},
day = {20},
urldate = {2019-06-14},
journal = {Molecular Systems Biology},
volume = {14},
number = {6},
issn = {1744-4292},
doi = {10.15252/msb.20178124},
pmid = {29925568},
pmcid = {PMC6010767},
f1000-projects = {shared citations},
abstract = {Multi-omics studies promise the improved characterization of biological processes across molecular layers. However, methods for the unsupervised integration of the resulting heterogeneous data sets are lacking. We present Multi-Omics Factor Analysis ({MOFA}), a computational method for discovering the principal sources of variation in multi-omics data sets. {MOFA} infers a set of (hidden) factors that capture biological and technical sources of variability. It disentangles axes of heterogeneity that are shared across multiple modalities and those specific to individual data modalities. The learnt factors enable a variety of downstream analyses, including identification of sample subgroups, data imputation and the detection of outlier samples. We applied {MOFA} to a cohort of 200 patient samples of chronic lymphocytic leukaemia, profiled for somatic mutations, {RNA} expression, {DNA} methylation and ex vivo drug responses. {MOFA} identified major dimensions of disease heterogeneity, including immunoglobulin heavy-chain variable region status, trisomy of chromosome 12 and previously underappreciated drivers, such as response to oxidative stress. In a second application, we used {MOFA} to analyse single-cell multi-omics data, identifying coordinated transcriptional and epigenetic changes along cell differentiation. \copyright 2018 The Authors. Published under the terms of the {CC} {BY} 4.0 license.}
}
@article{mock_2018,
title = {{MetaboDiff}: an R package for differential metabolomic analysis.},
author = {Mock, Andreas and Warta, Rolf and Dettling, Steffen and Brors, Benedikt and Jäger, Dirk and Herold-Mende, Christel},
pages = {3417-3418},
url = {http://dx.doi.org/10.1093/bioinformatics/bty344},
year = {2018},
month = {oct},
day = {1},
urldate = {2018-10-02},
journal = {Bioinformatics},
volume = {34},
number = {19},
doi = {10.1093/bioinformatics/bty344},
pmid = {29718102},
pmcid = {PMC6157071},
f1000-projects = {shared citations},
abstract = {Summary: Comparative metabolomics comes of age through commercial vendors offering metabolomics for translational researchers outside the mass spectrometry field. The {MetaboDiff} packages aims to provide a low-level entry to differential metabolomic analysis with R by starting off with the table of metabolite measurements. As a key functionality, {MetaboDiffs} offers the exploration of sample traits in a data-derived metabolic correlation network. Availability and implementation: The {MetaboDiff} R package is platform-independent, available at http://github.com/andreasmock/{MetaboDiff}/ and released under the {MIT} licence. The package documentation comprises a step-by-step markdown tutorial. Supplementary information: Supplementary data are available at Bioinformatics online.}
}
@article{koelmel_2017,
title = {{LipidMatch}: an automated workflow for rule-based lipid identification using untargeted high-resolution tandem mass spectrometry data.},
author = {Koelmel, Jeremy P and Kroeger, Nicholas M and Ulmer, Candice Z and Bowden, John A and Patterson, Rainey E and Cochran, Jason A and Beecher, Christopher W W and Garrett, Timothy J and Yost, Richard A},
pages = {331},
url = {http://dx.doi.org/10.1186/s12859-017-1744-3},
year = {2017},
month = {jul},
day = {10},
urldate = {2019-05-13},
journal = {{BMC} Bioinformatics},
volume = {18},
number = {1},
doi = {10.1186/s12859-017-1744-3},
pmid = {28693421},
pmcid = {PMC5504796},
f1000-projects = {shared citations},
abstract = {{BACKGROUND}: Lipids are ubiquitous and serve numerous biological functions; thus lipids have been shown to have great potential as candidates for elucidating biomarkers and pathway perturbations associated with disease. Methods expanding coverage of the lipidome increase the likelihood of biomarker discovery and could lead to more comprehensive understanding of disease etiology. {RESULTS}: We introduce {LipidMatch}, an R-based tool for lipid identification for liquid chromatography tandem mass spectrometry workflows. {LipidMatch} currently has over 250,000 lipid species spanning 56 lipid types contained in in silico fragmentation libraries. Unique fragmentation libraries, compared to other open source software, include oxidized lipids, bile acids, sphingosines, and previously uncharacterized adducts, including ammoniated cardiolipins. {LipidMatch} uses rule-based identification. For each lipid type, the user can select which fragments must be observed for identification. Rule-based identification allows for correct annotation of lipids based on the fragments observed, unlike typical identification based solely on spectral similarity scores, where over-reporting structural details that are not conferred by fragmentation data is common. Another unique feature of {LipidMatch} is ranking lipid identifications for a given feature by the sum of fragment intensities. For each lipid candidate, the intensities of experimental fragments with exact mass matches to expected in silico fragments are summed. The lipid identifications with the greatest summed intensity using this ranking algorithm were comparable to other lipid identification software annotations, {MS}-{DIAL} and Greazy. For example, for features with identifications from all 3 software, 92\% of {LipidMatch} identifications by fatty acyl constituents were corroborated by at least one other software in positive mode and 98\% in negative ion mode. {CONCLUSIONS}: {LipidMatch} allows users to annotate lipids across a wide range of high resolution tandem mass spectrometry experiments, including imaging experiments, direct infusion experiments, and experiments employing liquid chromatography. {LipidMatch} leverages the most extensive in silico fragmentation libraries of freely available software. When integrated into a larger lipidomics workflow, {LipidMatch} may increase the probability of finding lipid-based biomarkers and determining etiology of disease by covering a greater portion of the lipidome and using annotation which does not over-report biologically relevant structural details of identified lipid molecules.}
}
@article{chong_2018,
title = {{MetaboAnalystR}: an R package for flexible and reproducible analysis of metabolomics data.},
author = {Chong, Jasmine and Xia, Jianguo},
pages = {4313-4314},
url = {http://dx.doi.org/10.1093/bioinformatics/bty528},
year = {2018},
month = {dec},
day = {15},
urldate = {2018-07-07},
journal = {Bioinformatics},
volume = {34},
number = {24},
doi = {10.1093/bioinformatics/bty528},
pmid = {29955821},
pmcid = {PMC6289126},
f1000-projects = {shared citations},
abstract = {Summary: The {MetaboAnalyst} web application has been widely used for metabolomics data analysis and interpretation. Despite its user-friendliness, the web interface has presented its inherent limitations (especially for advanced users) with regard to flexibility in creating customized workflow, support for reproducible analysis, and capacity in dealing with large data. To address these limitations, we have developed a companion R package ({MetaboAnalystR}) based on the R code base of the web server. The package has been thoroughly tested to ensure that the same R commands will produce identical results from both interfaces. {MetaboAnalystR} complements the {MetaboAnalyst} web server to facilitate transparent, flexible and reproducible analysis of metabolomics data. Availability and implementation: {MetaboAnalystR} is freely available from https://github.com/xia-lab/{MetaboAnalystR}.}
}
@article{degenhardt_2019,
title = {Evaluation of variable selection methods for random forests and omics data sets.},
author = {Degenhardt, Frauke and Seifert, Stephan and Szymczak, Silke},
pages = {492-503},
url = {http://dx.doi.org/10.1093/bib/bbx124},
year = {2019},
month = {mar},
day = {22},
urldate = {2019-08-05},
journal = {Briefings in Bioinformatics},
volume = {20},
number = {2},
doi = {10.1093/bib/bbx124},
pmid = {29045534},
pmcid = {PMC6433899},
f1000-projects = {shared citations},
abstract = {Machine learning methods and in particular random forests are promising approaches for prediction based on high dimensional omics data sets. They provide variable importance measures to rank predictors according to their predictive power. If building a prediction model is the main goal of a study, often a minimal set of variables with good prediction performance is selected. However, if the objective is the identification of involved variables to find active networks and pathways, approaches that aim to select all relevant variables should be preferred. We evaluated several variable selection procedures based on simulated data as well as publicly available experimental methylation and gene expression data. Our comparison included the Boruta algorithm, the Vita method, recurrent relative variable importance, a permutation approach and its parametric variant (Altmann) as well as recursive feature elimination ({RFE}).  In our simulation studies, Boruta was the most powerful approach, followed closely by the Vita method. Both approaches demonstrated similar stability in variable selection, while Vita was the most robust approach under a pure null model without any predictor variables related to the outcome. In the analysis of the different experimental data sets, Vita demonstrated slightly better stability in variable selection and was less computationally intensive than Boruta. In conclusion, we recommend the Boruta and Vita approaches for the analysis of high-dimensional data sets. Vita is considerably faster than Boruta and thus more suitable for large data sets, but only Boruta can also be applied in low-dimensional settings. \copyright The Author 2017. Published by Oxford University Press.}
}
@article{inglese_2019,
title = {{SPUTNIK}: an R package for filtering of spatially related peaks in mass spectrometry imaging data.},
author = {Inglese, Paolo and Correia, Gonçalo and Takats, Zoltan and Nicholson, Jeremy K and Glen, Robert C},
pages = {178-180},
url = {http://dx.doi.org/10.1093/bioinformatics/bty622},
year = {2019},
month = {jan},
day = {1},
urldate = {2018-07-23},
journal = {Bioinformatics},
volume = {35},
number = {1},
doi = {10.1093/bioinformatics/bty622},
pmid = {30010780},
pmcid = {PMC6298046},
f1000-projects = {shared citations},
abstract = {Summary: {SPUTNIK} is an R package consisting of a series of tools to filter mass spectrometry imaging peaks characterized by a noisy or unlikely spatial distribution. {SPUTNIK} can produce mass spectrometry imaging datasets characterized by a smaller but more informative set of peaks, reduce the complexity of subsequent multi-variate analysis and increase the interpretability of the statistical results. Availability and implementation: {SPUTNIK} is freely available online from {CRAN} repository and at https://github.com/paoloinglese/{SPUTNIK}. The package is distributed under the {GNU} General Public License version 3 and is accompanied by example files and data. Supplementary information: Supplementary data are available at Bioinformatics online.}
}
@article{do_2019,
title = {{MoDentify}: phenotype-driven module identification in metabolomics networks at different resolutions.},
author = {Do, Kieu Trinh and Rasp, David J N-P and Kastenmüller, Gabi and Suhre, Karsten and Krumsiek, Jan},
pages = {532-534},
url = {http://dx.doi.org/10.1093/bioinformatics/bty650},
year = {2019},
month = {feb},
day = {1},
urldate = {2019-05-03},
journal = {Bioinformatics},
volume = {35},
number = {3},
doi = {10.1093/bioinformatics/bty650},
pmid = {30032270},
pmcid = {PMC6361241},
f1000-projects = {shared citations},
abstract = {Summary: Associations of metabolomics data with phenotypic outcomes are expected to span functional modules, which are defined as sets of correlating metabolites that are coordinately regulated. Moreover, these associations occur at different scales, from entire pathways to only a few metabolites; an aspect that has not been addressed by previous methods. Here, we present {MoDentify}, a free R package to identify regulated modules in metabolomics networks at different layers of resolution. Importantly, {MoDentify} shows higher statistical power than classical association analysis. Moreover, the package offers direct interactive visualization of the results in Cytoscape. We present an application example using complex, multifluid metabolomics data. Due to its generic character, the method is widely applicable to other types of data. Availability and implementation: https://github.com/krumsieklab/{MoDentify} (vignette includes detailed workflow). Supplementary information: Supplementary data are available at Bioinformatics online.}
}
@inproceedings{jupyter_2018,
title = {Binder 2.0 - Reproducible, interactive, sharable environments for science at scale},
author = {Jupyter, Project and Bussonnier, Matthias and Forde, Jessica and Freeman, Jeremy and Granger, Brian and Head, Tim and Holdgraf, Chris and Kelley, Kyle and Nalvarte, Gladys and Osheroff, Andrew and Pacer, M and Panda, Yuvi and Perez, Fernando and Ragan-Kelley, Benjamin and Willing, Carol},
series = {Proceedings of the python in science conference},
pages = {113-120},
publisher = {SciPy},
url = {https://conference.scipy.org/proceedings/scipy2018/project\_jupyter.html},
year = {2018},
urldate = {2019-07-22},
issn = {2575-9752},
doi = {10.25080/Majora-4af1f417-011},
f1000-projects = {shared citations},
booktitle = {Proceedings of the 17th Python in Science Conference}
}
@article{alakwaa_2018,
title = {Lilikoi: an R package for personalized pathway-based classification modeling using metabolomics data},
author = {Alakwaa, Fadhl and Huang, Sijia and Garmire, Lana},
url = {http://biorxiv.org/lookup/doi/10.1101/283408},
year = {2018},
month = {mar},
day = {16},
urldate = {2019-05-03},
journal = {BioRxiv},
doi = {10.1101/283408},
f1000-projects = {shared citations},
abstract = {Lilikoi (Hawaiian word for passion fruit) is a new and comprehensive R package for personalized pathway based classification modelling, using metabolomics data. Four basic modules are presented as the backbone of the package: 1) Feature mapping module, which standardizes the metabolite names provided by users, and map them to pathways. 2) Dimension transformation module, which transforms the metabolomic profiles to personalized pathway-based profiles using pathway deregulation scores ({PDS}). 3) Feature selection module which helps to select the significant pathway features related to the disease phenotypes, and 4) Classification and prediction module which offers various machine-learning classification algorithms. The package is freely available under the {GPLv3} license through the github repository at: https://github.com/lanagarmire/lilikoi}
}
@article{marbac_2018,
title = {{VarSelLCM}: an R/C ++ package for variable selection in model-based clustering of mixed-data with missing values.},
author = {Marbac, Matthieu and Sedki, Mohammed},
pages = {1255-1257},
url = {http://dx.doi.org/10.1093/bioinformatics/bty786},
year = {2018},
month = {sep},
day = {5},
urldate = {2018-09-20},
journal = {Bioinformatics},
volume = {35},
number = {7},
doi = {10.1093/bioinformatics/bty786},
pmid = {30192923},
f1000-projects = {shared citations},
abstract = {Summary: {VarSelLCM} allows a full model selection (detection of the relevant features for clustering and selection of the number of clusters) in model-based clustering, according to classical information criteria. Data to be analyzed can be composed of continuous, integer and/or categorical features. Moreover, missing values are managed, without any pre-processing, by the model used to cluster with the assumption that values are missing completely at random. Thus, {VarSelLCM} also allows data imputation by using mixture models. A Shiny application is implemented to easily interpret the clustering results. Availability: {VarSelLCM} is available to download at https://{CRAN}.R-project.org/package={VarSelLCM}/. Tutorial: vignette is available online at http://varsellcm.r-forge.r-project.org/. Supplementary information: Supplementary materials are available at Bioinformatics online.}
}
@article{sales_2018,
title = {{metaGraphite} - a new layer of pathway annotation to get metabolite networks.},
author = {Sales, Gabriele and Calura, Enrica and Romualdi, Chiara},
pages = {1258-1260},
url = {http://dx.doi.org/10.1093/bioinformatics/bty719},
year = {2018},
month = {sep},
day = {4},
urldate = {2018-09-25},
journal = {Bioinformatics},
volume = {35},
number = {7},
doi = {10.1093/bioinformatics/bty719},
pmid = {30184047},
f1000-projects = {shared citations},
abstract = {Motivation: Metabolomics is an emerging 'omics' science involving the characterization of metabolites and metabolism in biological systems. Few bioinformatic tools have been developed for the visualization, exploration and analysis of metabolomic data within the context of metabolic pathways: some of them became rapidly obsolete and are no longer supported, others are based on a single database. A systematic collection of existing annotations has the potential of considerably boosting the investigation and contextualization of metabolomic measurements. Results: We have released a major update of our Bioconductor package graphite which explicitly tracks small molecules within pathway topologies and their interactions with proteins. The package gathers the information stored in eight major databases, oriented both at genes and at metabolites, across 14 different species. Depending on user preferences, all pathways can be retrieved as gene-only, gene-metabolite or metabolite-only networks. Availability: The new graphite version (1.24) is available on Bioconductor. Supplementary information: Supplementary data are available at Bioinformatics online and at the {BioConductor} repository.}
}
@article{perrotdocks_2018,
title = {A variable selection approach in the multivariate linear model: an application to {LC}-{MS} metabolomics data.},
author = {Perrot-Dockès, Marie and Lévy-Leduc, Céline and Chiquet, Julien and Sansonnet, Laure and Brégère, Margaux and Étienne, Marie-Pierre and Robin, Stéphane and Genta-Jouve, Grégory},
url = {http://dx.doi.org/10.1515/sagmb-2017-0077},
year = {2018},
month = {sep},
day = {8},
urldate = {2018-09-25},
journal = {Statistical Applications in Genetics and Molecular Biology},
doi = {10.1515/sagmb-2017-0077},
pmid = {30205662},
f1000-projects = {shared citations}
}
@article{zha_2018,
title = {{SWATHtoMRM}: Development of High-Coverage Targeted Metabolomics Method Using {SWATH} Technology for Biomarker Discovery.},
author = {Zha, Haihong and Cai, Yuping and Yin, Yandong and Wang, Zhuozhong and Li, Kang and Zhu, Zheng-Jiang},
pages = {4062-4070},
url = {http://dx.doi.org/10.1021/acs.analchem.7b05318},
year = {2018},
month = {mar},
day = {20},
urldate = {2018-09-26},
journal = {Analytical Chemistry},
volume = {90},
number = {6},
doi = {10.1021/acs.analchem.7b05318},
pmid = {29485856},
f1000-projects = {shared citations},
abstract = {The complexity of metabolome presents a great analytical challenge for quantitative metabolite profiling, and restricts the application of metabolomics in biomarker discovery. Targeted metabolomics using multiple-reaction monitoring ({MRM}) technique has excellent capability for quantitative analysis, but suffers from the limited metabolite coverage. To address this challenge, we developed a new strategy, namely, {SWATHtoMRM}, which utilizes the broad coverage of {SWATH}-{MS} technology to develop high-coverage targeted metabolomics method. Specifically, {SWATH}-{MS} technique was first utilized to untargeted profile one pooled biological sample and to acquire the {MS2} spectra for all metabolites. Then, {SWATHtoMRM} was used to extract the large-scale {MRM} transitions for targeted analysis with coverage as high as 1000-2000 metabolites. Then, we demonstrated the advantages of {SWATHtoMRM} method in quantitative analysis such as coverage, reproducibility, sensitivity, and dynamic range. Finally, we applied our {SWATHtoMRM} approach to discover potential metabolite biomarkers for colorectal cancer ({CRC}) diagnosis. A high-coverage targeted metabolomics method with 1303 metabolites in one injection was developed to profile colorectal cancer tissues from {CRC} patients. A total of 20 potential metabolite biomarkers were discovered and validated for {CRC} diagnosis. In plasma samples from {CRC} patients, 17 out of 20 potential biomarkers were further validated to be associated with tumor resection, which may have a great potential in assessing the prognosis of {CRC} patients after tumor resection. Together, the {SWATHtoMRM} strategy provides a new way to develop high-coverage targeted metabolomics method, and facilitates the application of targeted metabolomics in disease biomarker discovery. The {SWATHtoMRM} program is freely available on the Internet ( http://www.zhulab.cn/software.php ).}
}
@article{leibovici_2010,
title = {Spatio-Temporal Multiway Decompositions Using Principal Tensor Analysis on-Modes: {TheR} {PackagePTAk}},
author = {Leibovici, Didier G.},
url = {http://www.jstatsoft.org/v34/i10/},
year = {2010},
urldate = {2018-10-02},
journal = {Journal of statistical software},
volume = {34},
number = {10},
issn = {1548-7660},
doi = {10.18637/jss.v034.i10},
f1000-projects = {shared citations}
}
@article{giordani_2014,
title = {Three-Way Component Analysis Using {theR} {PackageThreeWay}},
author = {Giordani, Paolo and Kiers, Henk A. L. and Ferraro, Maria Antonietta Del},
url = {http://www.jstatsoft.org/v57/i07/},
year = {2014},
urldate = {2018-10-02},
journal = {Journal of statistical software},
volume = {57},
number = {7},
issn = {1548-7660},
doi = {10.18637/jss.v057.i07},
f1000-projects = {shared citations}
}
@incollection{pedersen_2006,
title = {Application of Multi-Way Analysis to {2D} {NMR} Data},
author = {Pedersen, Henrik T. and Dyrby, Marianne and Engelsen, S\oren B. and Bro, Rasmus},
series = {Annual reports on {NMR} spectroscopy},
pages = {207-233},
publisher = {Elsevier},
url = {http://linkinghub.elsevier.com/retrieve/pii/S0066410306590040},
year = {2006},
urldate = {2018-10-02},
volume = {59},
isbn = {9780125054591},
issn = {00664103},
doi = {10.1016/S0066-4103(06)59004-0},
f1000-projects = {shared citations},
abstract = {Two- and three-dimensional or even higher-dimensional {NMR} spectroscopy is changing from specialised techniques to more commonly used ones. As the complexity of the acquired {NMR} data increases, the task of analysing these data constantly becomes more and more demanding and new methods are required to facilitate the analysis. With one-dimensional {NMR} data multivariate data analysis has proven to be a strong tool, but how should one analyse higher-dimensional {NMR} data in order to extract as much relevant information as possible without having to break data down into smaller dimensions and thus lose the inherent structure? A class of multivariate data analytical techniques called multi-way analysis encompass techniques that have been designed to handle and analyse such data structures directly. In this paper, the theory of some of the most commonly used multi-way methods will be described and examples of their application to three-way arrays of {NMR} data reported in the literature will be given. The focus will be on the generic principles of multi-way analysis using three-way data in order to gently introduce the new concepts.}
}
@article{bouhaddani_2018,
title = {Integrating omics datasets with the {OmicsPLS} package.},
author = {Bouhaddani, Said El and Uh, Hae-Won and Jongbloed, Geurt and Hayward, Caroline and Klarić, Lucija and Kiełbasa, Szymon M and Houwing-Duistermaat, Jeanine},
pages = {371},
url = {https://bmcbioinformatics.biomedcentral.com/articles/10.1186/s12859-018-2371-3},
year = {2018},
month = {oct},
day = {11},
urldate = {2018-10-21},
journal = {{BMC} Bioinformatics},
volume = {19},
number = {1},
issn = {1471-2105},
doi = {10.1186/s12859-018-2371-3},
pmid = {30309317},
pmcid = {PMC6182835},
f1000-projects = {shared citations},
abstract = {{BACKGROUND}: With the exponential growth in available biomedical data, there is a need for data integration methods that can extract information about relationships between the data sets. However, these data sets might have very different characteristics. For interpretable results, data-specific variation needs to be quantified. For this task, Two-way Orthogonal Partial Least Squares ({O2PLS}) has been proposed. To facilitate application and development of the methodology, free and open-source software is required. However, this is not the case with {O2PLS}. {RESULTS}: We introduce {OmicsPLS}, an open-source implementation of the {O2PLS} method in R. It can handle both low- and high-dimensional datasets efficiently. Generic methods for inspecting and visualizing results are implemented. Both a standard and faster alternative cross-validation methods are available to determine the number of components. A simulation study shows good performance of {OmicsPLS} compared to alternatives, in terms of accuracy and {CPU} runtime. We demonstrate {OmicsPLS} by integrating genetic and glycomic data. {CONCLUSIONS}: We propose the {OmicsPLS} R package: a free and open-source implementation of {O2PLS} for statistical data integration. {OmicsPLS} is available at https://cran.r-project.org/package={OmicsPLS} and can be installed in R via install.packages("{OmicsPLS}").}
}
@article{narduzzi_2018,
title = {The Compound Characteristics Comparison ({CCC}) approach: a tool for improving confidence in natural compound identification.},
author = {Narduzzi, Luca and Stanstrup, Jan and Mattivi, Fulvio and Franceschi, Pietro},
pages = {1-13},
url = {http://dx.doi.org/10.1080/19440049.2018.1523572},
year = {2018},
month = {oct},
day = {23},
urldate = {2018-11-01},
journal = {Food additives \& contaminants. Part A, Chemistry, analysis, control, exposure \& risk assessment},
volume = {35},
number = {11},
doi = {10.1080/19440049.2018.1523572},
pmid = {30352003},
f1000-projects = {shared citations},
abstract = {Compound identification is the main hurdle in {LC}-{HRMS}-based metabolomics, given the high number of 'unknown' metabolites. In recent years, numerous in silico fragmentation simulators have been developed to simplify and improve mass spectral interpretation and compound annotation. Nevertheless, expert mass spectrometry users and chemists are still needed to select the correct entry from the numerous candidates proposed by automatic tools, especially in the plant kingdom due to the huge structural diversity of natural compounds occurring in plants. In this work, we propose the use of a supervised machine learning approach to predict molecular substructures from isotopic patterns, training the model on a large database of grape metabolites. This approach, called 'Compounds Characteristics Comparison' ({CCC}) emulates the experience of a plant chemist who 'gains experience' from a (proof-of-principle) dataset of grape compounds. The results show that the {CCC} approach is able to predict with good accuracy most of the sub-structures proposed. In addition, after querying {MS}/{MS} spectra in Metfrag 2.2 and applying {CCC} predictions as scoring terms with real data, the {CCC} approach helped to give a better ranking to the correct candidates, improving users' confidence in candidate selection. Our results demonstrated that the proposed approach can complement current identification strategies based on fragmentation simulators and formula calculators, assisting compound identification. The {CCC} algorithm is freely available as R package (https://github.com/lucanard/{CCC}) which includes a seamless integration with Metfrag. The {CCC} package also permits uploading additional training data, which can be used to extend the proposed approach to other systems biological matrices. List of abbreviations: Acidic: acidic moiety; aliph: aliphatic chain; {AUC}: area under the {ROC} curve; bs: best glycosidic structure; {CCC}: Compounds' Characteristics Comparison; Cees: Carbons estimation errors; {CO}: Carbon to Oxygen ratio; Het: Heterocyclic moiety; {IMD}: Isotopic Mass Defect (and Pattern); {LC}-{HRMS}: Liquid Chromatography - High Resolution Mass Spectrometry; md: mass defect; {MM}: Monoisotopic Mass; {MS}: Mass Spectrometry; {MSE}: Mean Squared Error; {nC}: number of Carbons; {NN}: Nitrogen; {pC}: percentage of Carbon mass on the total mass; Pho: Phosphate; {PLSr}: Partial Least Square regression; ppm: parts per million; {QSRR}: Quantitative structure-retention relationship; {RMD}: Relative Mass Defect; {ROC}: Receiver Operating Characteristics; {rRMD}: residual Relative Mass Defect; {RT}: retention time; Sul: Sulphur; {UPLC}-{ESI}-Q-{TOF}-{MS}: Ultra Performance Liquid Chromatography - {ElectroSpray} Ionization -Quadropole - Time of Flight - Mass Spectrometry; {VAT}: Vitis arizonica Texas.}
}
@article{kanehisa_2019,
title = {New approach for understanding genome variations in {KEGG}.},
author = {Kanehisa, Minoru and Sato, Yoko and Furumichi, Miho and Morishima, Kanae and Tanabe, Mao},
pages = {D590-D595},
url = {https://academic.oup.com/nar/advance-article/doi/10.1093/nar/gky962/5128935},
year = {2019},
month = {jan},
day = {8},
urldate = {2019-07-23},
journal = {Nucleic Acids Research},
volume = {47},
number = {D1},
issn = {0305-1048},
doi = {10.1093/nar/gky962},
pmid = {30321428},
pmcid = {PMC6324070},
f1000-projects = {shared citations},
abstract = {{KEGG} (Kyoto Encyclopedia of Genes and Genomes; https://www.kegg.jp/ or https://www.genome.jp/kegg/) is a reference knowledge base for biological interpretation of genome sequences and other high-throughput data. It is an integrated database consisting of three generic categories of systems information, genomic information and chemical information, and an additional human-specific category of health information. {KEGG} pathway maps, {BRITE} hierarchies and {KEGG} modules have been developed as generic molecular networks with {KEGG} Orthology nodes of functional orthologs so that {KEGG} pathway mapping and other procedures can be applied to any cellular organism. Unfortunately, however, this generic approach was inadequate for knowledge representation in the health information category, where variations of human genomes, especially disease-related variations, had to be considered. Thus, we have introduced a new approach where human gene variants are explicitly incorporated into what we call 'network variants' in the recently released {KEGG} {NETWORK} database. This allows accumulation of knowledge about disease-related perturbed molecular networks caused not only by gene variants, but also by viruses and other pathogens, environmental factors and drugs. We expect that {KEGG} {NETWORK} will become another reference knowledge base for the basic understanding of disease mechanisms and practical use in clinical sequencing and drug development.}
}
@article{alcorizabalaguer_2018,
title = {{LipidMS}: an R package for lipid annotation in untargeted liquid chromatography-data independent acquisition-mass spectrometry lipidomics.},
author = {Alcoriza-Balaguer, Maria Isabel and García-Cañaveras, Juan Carlos and Lopez, Adrian and Conde, Isabel and Juan, Oscar and Carretero, Julian and Lahoz, Agustín},
pages = {836-845},
url = {http://dx.doi.org/10.1021/acs.analchem.8b03409},
year = {2018},
month = {nov},
day = {30},
urldate = {2018-12-04},
journal = {Analytical Chemistry},
volume = {91},
number = {1},
doi = {10.1021/acs.analchem.8b03409},
pmid = {30500173},
f1000-projects = {shared citations},
abstract = {High resolution {LC}-{MS}-untargeted lipidomics using data-independent acquisition ({DIA}) has the potential to increase lipidome coverage as it enables the continuous and unbiased acquisition of all eluting ions. However, the loss of the link between the precursor and the product ions combined with the high dimensionality of {DIA} data sets hinder accurate feature annotation. Here, we present {LipidMS}, an R-package aimed to confidently identify lipid species in untargeted {LC}-{DIA}-{MS}. To this end, {LipidMS} combines a coelution score, which links precursor and fragment ions, with fragmentation and intensity rules. Depending on the {MS} evidence reached by the identification function survey, {LipidMS} provides three levels of structural annotations: i) "subclass level", e.g., {PG}(34:1); ii) "fatty acyl level", e.g., {PG}(16:0\_18:1); and iii) "fatty acyl position level", e.g., {PG}(16:0/18:1). The comparison of {LipidMS} with freely available data-dependent acquisition ({DDA}) and {DIA} identification tools showed that {LipidMS} provides significantly more accurate and structural informative lipid identifications. Finally, to exemplify the utility of {LipidMS}, we investigated the lipidomic serum profile of patients diagnosed with non-alcoholic steatohepatitis ({NASH}), which is the progressive form of non-alcoholic fatty liver disease, a disorder underlying a strong lipid dysregulation. As previously published, a significant decrease in lyso- and phosphatidylcholines and cholesterol esters and an increase in phosphatidylethanolamines were observed in {NASH} patients. Remarkably, {LipidMS} allowed to identify a new set of lipids that may be used for {NASH} diagnosis. Altogether, {LipidMS} has been validated as a tool to assist lipid identification in the {LC}-{DIA}-{MS} untargeted analysis of complex biological samples.}
}
@article{ottensmann_2018,
title = {{GCalignR}: An R package for aligning gas-chromatography data for ecological and evolutionary studies.},
author = {Ottensmann, Meinolf and Stoffel, Martin A and Nichols, Hazel J and Hoffman, Joseph I},
pages = {e0198311},
url = {http://dx.doi.org/10.1371/journal.pone.0198311},
year = {2018},
month = {jun},
day = {7},
urldate = {2018-12-07},
journal = {Plos One},
volume = {13},
number = {6},
doi = {10.1371/journal.pone.0198311},
pmid = {29879149},
pmcid = {PMC5991698},
f1000-projects = {shared citations},
abstract = {Chemical cues are arguably the most fundamental means of animal communication and play an important role in mate choice and kin recognition. Consequently, there is growing interest in the use of gas chromatography ({GC}) to investigate the chemical basis of eco-evolutionary interactions. Both {GC}-{MS} (mass spectrometry) and {FID} (flame ionization detection) are commonly used to characterise the chemical composition of biological samples such as skin swabs. The resulting chromatograms comprise peaks that are separated according to their retention times and which represent different substances. Across chromatograms of different samples, homologous substances are expected to elute at similar retention times. However, random and often unavoidable experimental variation introduces noise, making the alignment of homologous peaks challenging, particularly with {GC}-{FID} data where mass spectral data are lacking. Here we present {GCalignR}, a user-friendly R package for aligning {GC}-{FID} data based on retention times. The package was developed specifically for ecological and evolutionary studies that seek to investigate similarity patterns across multiple and often highly variable biological samples, for example representing different sexes, age classes or reproductive stages. The package also implements dynamic visualisations to facilitate inspection and fine-tuning of the resulting alignments and can be integrated within a broader workflow in R to facilitate downstream multivariate analyses. We demonstrate an example workflow using empirical data from Antarctic fur seals and explore the impact of user-defined parameter values by calculating alignment error rates for multiple datasets. The resulting alignments had low error rates for most of the explored parameter space and we could also show that {GCalignR} performed equally well or better than other available software. We hope that {GCalignR} will help to simplify the processing of chemical datasets and improve the standardization and reproducibility of chemical analyses in studies of animal chemical communication and related fields.}
}
@article{shi_2019,
title = {Variable selection and validation in multivariate modelling.},
author = {Shi, Lin and Westerhuis, Johan A and Rosén, Johan and Landberg, Rikard and Brunius, Carl},
pages = {972-980},
url = {http://dx.doi.org/10.1093/bioinformatics/bty710},
year = {2019},
month = {mar},
day = {15},
urldate = {2019-08-05},
journal = {Bioinformatics},
volume = {35},
number = {6},
doi = {10.1093/bioinformatics/bty710},
pmid = {30165467},
pmcid = {PMC6419897},
f1000-projects = {shared citations},
abstract = {{MOTIVATION}: Validation of variable selection and predictive performance is crucial in construction of robust multivariate models that generalize well, minimize overfitting and facilitate interpretation of results. Inappropriate variable selection leads instead to selection bias, thereby increasing the risk of model overfitting and false positive discoveries. Although several algorithms exist to identify a minimal set of most informative variables (i.e. the minimal-optimal problem), few can select all variables related to the research question (i.e. the all-relevant problem). Robust algorithms combining identification of both minimal-optimal and all-relevant variables with proper cross-validation are urgently needed. {RESULTS}: We developed the {MUVR} algorithm to improve predictive performance and minimize overfitting and false positives in multivariate analysis. In the {MUVR} algorithm, minimal variable selection is achieved by performing recursive variable elimination in a repeated double cross-validation ({rdCV}) procedure. The algorithm supports partial least squares and random forest modelling, and simultaneously identifies minimal-optimal and all-relevant variable sets for regression, classification and multilevel analyses. Using three authentic omics datasets, {MUVR} yielded parsimonious models with minimal overfitting and improved model performance compared with state-of-the-art {rdCV}. Moreover, {MUVR} showed advantages over other variable selection algorithms, i.e. Boruta and {VSURF}, including simultaneous variable selection and validation scheme and wider applicability. {AVAILABILITY} {AND} {IMPLEMENTATION}: Algorithms, data, scripts and tutorial are open source and available as an R package ('{MUVR}') at https://gitlab.com/{CarlBrunius}/{MUVR}.git. {SUPPLEMENTARY} {INFORMATION}: Supplementary data are available at Bioinformatics online. \copyright The Author(s) 2018. Published by Oxford University Press.}
}
@article{wgele_2012,
title = {{MassTRIX} reloaded: combined analysis and visualization of transcriptome and metabolome data.},
author = {Wägele, Brigitte and Witting, Michael and Schmitt-Kopplin, Philippe and Suhre, Karsten},
pages = {e39860},
url = {http://dx.doi.org/10.1371/journal.pone.0039860},
year = {2012},
month = {jul},
day = {6},
urldate = {2019-06-20},
journal = {Plos One},
volume = {7},
number = {7},
doi = {10.1371/journal.pone.0039860},
pmid = {22815716},
pmcid = {PMC3391204},
f1000-projects = {shared citations},
abstract = {Systems Biology is a field in biological science that focuses on the combination of several or all "omics"-approaches in order to find out how genes, transcripts, proteins and metabolites act together in the network of life. Metabolomics as analog to genomics, transcriptomics and proteomics is more and more integrated into biological studies and often transcriptomic and metabolomic experiments are combined in one setup. At a first glance both data types seem to be completely different, but both produce information on biological entities, either transcripts or metabolites. Both types can be overlaid on metabolic pathways to obtain biological information on the studied system. For the joint analysis of both data types the {MassTRIX} webserver was updated. {MassTRIX} is freely available at www.masstrix.org.}
}
@article{picartarmada_2018,
title = {{FELLA}: an R package to enrich metabolomics data.},
author = {Picart-Armada, Sergio and Fernández-Albert, Francesc and Vinaixa, Maria and Yanes, Oscar and Perera-Lluna, Alexandre},
pages = {538},
url = {http://dx.doi.org/10.1186/s12859-018-2487-5},
year = {2018},
month = {dec},
day = {22},
urldate = {2019-01-01},
journal = {{BMC} Bioinformatics},
volume = {19},
number = {1},
doi = {10.1186/s12859-018-2487-5},
pmid = {30577788},
pmcid = {PMC6303911},
f1000-projects = {shared citations},
abstract = {{BACKGROUND}: Pathway enrichment techniques are useful for understanding experimental metabolomics data. Their purpose is to give context to the affected metabolites in terms of the prior knowledge contained in metabolic pathways. However, the interpretation of a prioritized pathway list is still challenging, as pathways show overlap and cross talk effects. {RESULTS}: We introduce {FELLA}, an R package to perform a network-based enrichment of a list of affected metabolites. {FELLA} builds a hierarchical representation of an organism biochemistry from the Kyoto Encyclopedia of Genes and Genomes ({KEGG}), containing pathways, modules, enzymes, reactions and metabolites. In addition to providing a list of pathways, {FELLA} reports intermediate entities (modules, enzymes, reactions) that link the input metabolites to them. This sheds light on pathway cross talk and potential enzymes or metabolites as targets for the condition under study. {FELLA} has been applied to six public datasets -three from Homo sapiens, two from Danio rerio and one from Mus musculus- and has reproduced findings from the original studies and from independent literature. {CONCLUSIONS}: The R package {FELLA} offers an innovative enrichment concept starting from a list of metabolites, based on a knowledge graph representation of the {KEGG} database that focuses on interpretability. Besides reporting a list of pathways, {FELLA} suggests intermediate entities that are of interest per se. Its usefulness has been shown at several molecular levels on six public datasets, including human and animal models. The user can run the enrichment analysis through a simple interactive graphical interface or programmatically. {FELLA} is publicly available in Bioconductor under the {GPL}-3 license.}
}
@article{alakwaa_2018a,
title = {Lilikoi: an R package for personalized pathway-based classification modeling using metabolomics data.},
author = {Al-Akwaa, Fadhl M and Yunits, Breck and Huang, Sijia and Alhajaji, Hassam and Garmire, Lana X},
url = {http://dx.doi.org/10.1093/gigascience/giy136},
year = {2018},
month = {dec},
day = {1},
urldate = {2019-01-10},
journal = {GigaScience},
volume = {7},
number = {12},
doi = {10.1093/gigascience/giy136},
pmid = {30535020},
pmcid = {PMC6290884},
f1000-projects = {shared citations},
abstract = {Lilikoi (the Hawaiian word for passion fruit) is a new and comprehensive R package for personalized pathway-based classification modeling using metabolomics data. Four basic modules are presented as the backbone of the package: feature mapping module, which standardizes the metabolite names provided by users and maps them to pathways; dimension transformation module, which transforms the metabolomic profiles to personalized pathway-based profiles using pathway deregulation scores; feature selection module, which helps to select the significant pathway features related to the disease phenotypes; and classification and prediction module, which offers various machine learning classification algorithms. The package is freely available under the {GPLv3} license through the github repository at: https://github.com/lanagarmire/lilikoi and {CRAN}: https://cran.r-project.org/web/packages/lilikoi/index.html.}
}
@article{siddiqui_2018,
title = {{IntLIM}: integration using linear models of metabolomics and gene expression data.},
author = {Siddiqui, Jalal K and Baskin, Elizabeth and Liu, Mingrui and Cantemir-Stone, Carmen Z and Zhang, Bofei and Bonneville, Russell and {McElroy}, Joseph P and Coombes, Kevin R and Mathé, Ewy A},
pages = {81},
url = {http://dx.doi.org/10.1186/s12859-018-2085-6},
year = {2018},
month = {mar},
day = {5},
urldate = {2019-07-18},
journal = {{BMC} Bioinformatics},
volume = {19},
number = {1},
doi = {10.1186/s12859-018-2085-6},
pmid = {29506475},
pmcid = {PMC5838881},
f1000-projects = {shared citations},
abstract = {{BACKGROUND}: Integration of transcriptomic and metabolomic data improves functional interpretation of disease-related metabolomic phenotypes, and facilitates discovery of putative metabolite biomarkers and gene targets. For this reason, these data are increasingly collected in large (\textgreater 100 participants) cohorts, thereby driving a need for the development of user-friendly and open-source methods/tools for their integration. Of note, clinical/translational studies typically provide snapshot (e.g. one time point) gene and metabolite profiles and, oftentimes, most metabolites measured are not identified. Thus, in these types of studies, pathway/network approaches that take into account the complexity of transcript-metabolite relationships may neither be applicable nor readily uncover novel relationships. With this in mind, we propose a simple linear modeling approach to capture disease-(or other phenotype) specific gene-metabolite associations, with the assumption that co-regulation patterns reflect functionally related genes and metabolites. {RESULTS}: The proposed linear model, metabolite \~ gene + phenotype + gene:phenotype, specifically evaluates whether gene-metabolite relationships differ by phenotype, by testing whether the relationship in one phenotype is significantly different from the relationship in another phenotype (via a statistical interaction gene:phenotype p-value). Statistical interaction p-values for all possible gene-metabolite pairs are computed and significant pairs are then clustered by the directionality of associations (e.g. strong positive association in one phenotype, strong negative association in another phenotype). We implemented our approach as an R package, {IntLIM}, which includes a user-friendly R Shiny web interface, thereby making the integrative analyses accessible to non-computational experts. We applied {IntLIM} to two previously published datasets, collected in the {NCI}-60 cancer cell lines and in human breast tumor and non-tumor tissue, for which transcriptomic and metabolomic data are available. We demonstrate that {IntLIM} captures relevant tumor-specific gene-metabolite associations involved in known cancer-related pathways, including glutamine metabolism. Using {IntLIM}, we also uncover biologically relevant novel relationships that could be further tested experimentally. {CONCLUSIONS}: {IntLIM} provides a user-friendly, reproducible framework to integrate transcriptomic and metabolomic data and help interpret metabolomic data and uncover novel gene-metabolite relationships. The {IntLIM} R package is publicly available in {GitHub} ( https://github.com/mathelab/{IntLIM} ) and includes a user-friendly web application, vignettes, sample data and data/code to reproduce results.}
}
@article{depke_2019,
title = {{CluMSID}: an R package for similarity-based clustering of tandem mass spectra to aid feature annotation in metabolomics.},
author = {Depke, Tobias and Franke, Raimo and Brönstrup, Mark},
url = {http://dx.doi.org/10.1093/bioinformatics/btz005},
year = {2019},
month = {jan},
day = {12},
urldate = {2019-02-12},
journal = {Bioinformatics},
doi = {10.1093/bioinformatics/btz005},
pmid = {30649189},
f1000-projects = {shared citations},
abstract = {Summary: Compound identification is one of the most eminent challenges in the untargeted analysis of complex mixtures of small molecules by mass spectrometry. Similarity of tandem mass spectra can provide valuable information on putative structural similarities between known and unknown analytes and hence aids feature identification in the bioanalytical sciences. We have developed {CluMSID} (Clustering of {MS}² spectra for metabolite identification), an R package that enables researchers to make use of tandem mass spectra and neutral loss pattern similarities as a part of their metabolite annotation workflow. {CluMSID} offers functions for all analysis steps from import of raw data to data mining by unsupervised multivariate methods along with respective (interactive) visualisations. A detailed tutorial with example data is provided as supplementary information. Availability: {CluMSID} is available as R package from https://github.com/tdepke/{CluMSID}/. Supplementary information: Supplementary data are available at Bioinformatics online.}
}
@article{dong_2019,
title = {Miso: an R package for multiple isotope labeling assisted metabolomics data analysis.},
author = {Dong, Yonghui and Feldberg, Liron and Aharoni, Asaph},
url = {http://dx.doi.org/10.1093/bioinformatics/btz092},
year = {2019},
month = {feb},
day = {6},
urldate = {2019-02-12},
journal = {Bioinformatics},
doi = {10.1093/bioinformatics/btz092},
pmid = {30726876},
f1000-projects = {shared citations},
abstract = {Motivation: The use of stable isotope labeling is highly advantageous for structure elucidation in metabolomics studies. However, computational tools dealing with multiple-precursor-based labeling studies are still missing. Hence, we developed Miso, an R package providing automated and efficient data analysis workflow to detect the complete repertoire of labeled molecules from multiple-precursor-based labeling experiments. Results: The capability of Miso is demonstrated by the analysis of liquid chromatography-mass spectrometry ({LC}-{MS}) data obtained from duckweed plants fed with one unlabeled and two differently labeled tyrosine (unlabeled tyrosine, tyrosine-{2H4} and tyrosine-{13C915N1}). The resulting data matrix generated by Miso contains sets of unlabeled and labeled ions with their retention time, m/z values and number of labeled atoms that can be directly utilized for database query and biological studies. Availability: Miso is publicly available on the {CRAN} repository (https://cran.r-project.org/web/packages/Miso). A reproducible case study and a detailed tutorial are available from {GitHub} (https://github.com/{YonghuiDong}/Miso\_example). Supplementary information: Supplementary data are available at Bioinformatics online.}
}
@article{feldberg_2018,
title = {{DLEMMA}-{MS}-Imaging for Identification of Spatially Localized Metabolites and Metabolic Network Map Reconstruction.},
author = {Feldberg, Liron and Dong, Yonghui and Heinig, Uwe and Rogachev, Ilana and Aharoni, Asaph},
pages = {10231-10238},
url = {http://dx.doi.org/10.1021/acs.analchem.8b01644},
year = {2018},
month = {sep},
day = {4},
urldate = {2019-02-12},
journal = {Analytical Chemistry},
volume = {90},
number = {17},
doi = {10.1021/acs.analchem.8b01644},
pmid = {30063330},
f1000-projects = {shared citations},
abstract = {Regardless of major advances in mass spectrometry imaging ({MSI}), there are three intrinsic limitations associated with {MSI}, including intricate molecular identification, low molecular coverage, and incapability to obtain "true" spatial distribution due to isobaric and particularly isomeric ions interference. We developed a novel approach that integrates in vivo dual isotope labeling of precursor metabolites with {MSI} ({DLEMMA}-{MS}-Imaging) for identification of spatially localized metabolite and metabolic network map reconstruction. In a proof-of-concept study, we identified 59 and 6 novel metabolites in lemna and tomato fruit, respectively. Significantly, 20-30\% of the identified metabolites were found to contain at least one structural isomer that displays a different distribution pattern. The notable feature of this approach is the ability to differentiate localization of structural isomers, hence, providing the "true" distribution of molecules of interest.}
}
@article{wei_2018a,
title = {{GSimp}: A Gibbs sampler based left-censored missing value imputation approach for metabolomics studies.},
author = {Wei, Runmin and Wang, Jingye and Jia, Erik and Chen, Tianlu and Ni, Yan and Jia, Wei},
pages = {e1005973},
url = {http://dx.doi.org/10.1371/journal.pcbi.1005973},
year = {2018},
month = {jan},
day = {31},
urldate = {2019-09-04},
journal = {{PLoS} Computational Biology},
volume = {14},
number = {1},
doi = {10.1371/journal.pcbi.1005973},
pmid = {29385130},
pmcid = {PMC5809088},
f1000-projects = {shared citations},
abstract = {Left-censored missing values commonly exist in targeted metabolomics datasets and can be considered as missing not at random ({MNAR}). Improper data processing procedures for missing values will cause adverse impacts on subsequent statistical analyses. However, few imputation methods have been developed and applied to the situation of {MNAR} in the field of metabolomics. Thus, a practical left-censored missing value imputation method is urgently needed. We developed an iterative Gibbs sampler based left-censored missing value imputation approach ({GSimp}). We compared {GSimp} with other three imputation methods on two real-world targeted metabolomics datasets and one simulation dataset using our imputation evaluation pipeline. The results show that {GSimp} outperforms other imputation methods in terms of imputation accuracy, observation distribution, univariate and multivariate analyses, and statistical sensitivity. Additionally, a parallel version of {GSimp} was developed for dealing with large scale metabolomics datasets. The R code for {GSimp}, evaluation pipeline, tutorial, real-world and simulated targeted metabolomics datasets are available at: https://github.com/{WandeRum}/{GSimp}.}
}
@article{hoffmann_2019,
title = {{mzTab}-M: A Data Standard for Sharing Quantitative Results in Mass Spectrometry Metabolomics.},
author = {Hoffmann, Nils and Rein, Joel and Sachsenberg, Timo and Hartler, Jürgen and Haug, Kenneth and Mayer, Gerhard and Alka, Oliver and Dayalan, Saravanan and Pearce, Jake T M and Rocca-Serra, Philippe and Qi, Da and Eisenacher, Martin and Perez-Riverol, Yasset and Vizcaíno, Juan Antonio and Salek, Reza M and Neumann, Steffen and Jones, Andrew R},
pages = {3302-3310},
url = {http://dx.doi.org/10.1021/acs.analchem.8b04310},
year = {2019},
month = {mar},
day = {5},
urldate = {2019-04-26},
journal = {Analytical Chemistry},
volume = {91},
number = {5},
doi = {10.1021/acs.analchem.8b04310},
pmid = {30688441},
pmcid = {PMC6660005},
f1000-projects = {shared citations},
abstract = {Mass spectrometry ({MS}) is one of the primary techniques used for large-scale analysis of small molecules in metabolomics studies. To date, there has been little data format standardization in this field, as different software packages export results in different formats represented in {XML} or plain text, making data sharing, database deposition, and reanalysis highly challenging. Working within the consortia of the Metabolomics Standards Initiative, Proteomics Standards Initiative, and the Metabolomics Society, we have created {mzTab}-M to act as a common output format from analytical approaches using {MS} on small molecules. The format has been developed over several years, with input from a wide range of stakeholders. {mzTab}-M is a simple tab-separated text format, but importantly, the structure is highly standardized through the design of a detailed specification document, tightly coupled to validation software, and a mandatory controlled vocabulary of terms to populate it. The format is able to represent final quantification values from analyses, as well as the evidence trail in terms of features measured directly from {MS} (e.g., {LC}-{MS}, {GC}-{MS}, {DIMS}, etc.) and different types of approaches used to identify molecules. {mzTab}-M allows for ambiguity in the identification of molecules to be communicated clearly to readers of the files (both people and software). There are several implementations of the format available, and we anticipate widespread adoption in the field.}
}
@article{shen_2019,
title = {Metabolic reaction network-based recursive metabolite annotation for untargeted metabolomics.},
author = {Shen, Xiaotao and Wang, Ruohong and Xiong, Xin and Yin, Yandong and Cai, Yuping and Ma, Zaijun and Liu, Nan and Zhu, Zheng-Jiang},
pages = {1516},
url = {http://www.nature.com/articles/s41467-019-09550-x},
year = {2019},
month = {apr},
day = {3},
urldate = {2019-04-30},
journal = {Nature Communications},
volume = {10},
number = {1},
issn = {2041-1723},
doi = {10.1038/s41467-019-09550-x},
pmid = {30944337},
pmcid = {PMC6447530},
f1000-projects = {shared citations},
abstract = {Large-scale metabolite annotation is a challenge in liquid chromatogram-mass spectrometry ({LC}-{MS})-based untargeted metabolomics. Here, we develop a metabolic reaction network ({MRN})-based recursive algorithm ({MetDNA}) that expands metabolite annotations without the need for a comprehensive standard spectral library. {MetDNA} is based on the rationale that seed metabolites and their reaction-paired neighbors tend to share structural similarities resulting in similar {MS2} spectra. {MetDNA} characterizes initial seed metabolites using a small library of {MS2} spectra, and utilizes their experimental {MS2} spectra as surrogate spectra to annotate their reaction-paired neighbor metabolites, which subsequently serve as the basis for recursive analysis. Using different {LC}-{MS} platforms, data acquisition methods, and biological samples, we showcase the utility and versatility of {MetDNA} and demonstrate that about 2000 metabolites can cumulatively be annotated from one experiment. Our results demonstrate that {MetDNA} substantially expands metabolite annotation, enabling quantitative assessment of metabolic pathways and facilitating integrative multi-omics analysis.}
}
@article{lefort_2019,
title = {{ASICS}: an R package for a whole analysis workflow of {1D} {1H} {NMR} spectra.},
author = {Lefort, Gaëlle and Liaubet, Laurence and Canlet, Cécile and Tardivel, Patrick and Père, Marie-Christine and Quesnel, Hélène and Paris, Alain and Iannuccelli, Nathalie and Vialaneix, Nathalie and Servien, Rémi},
url = {http://dx.doi.org/10.1093/bioinformatics/btz248},
year = {2019},
month = {apr},
day = {12},
urldate = {2019-04-18},
journal = {Bioinformatics},
doi = {10.1093/bioinformatics/btz248},
pmid = {30977816},
f1000-projects = {shared citations},
abstract = {{MOTIVATION}: In metabolomics, the detection of new biomarkers from {NMR} spectra is a promising approach. However, this analysis remains difficult due to the lack of a whole workflow that handles spectra pre-processing, automatic identification and quantification of metabolites and statistical analyses, in a reproducible way. {RESULTS}: We present {ASICS}, an R package that contains a complete workflow to analyse spectra from {NMR} experiments. It contains an automatic approach to identify and quantify metabolites in a complex mixture spectrum and uses the results of the quantification in untargeted and targeted statistical analyses. {ASICS} was shown to improve the precision of quantification in comparison to existing methods on two independent datasets. In addition, {ASICS} successfully recovered most metabolites that were found important to explain a two level condition describing the samples by a manual and expert analysis based on bucketting. It also found new relevant metabolites involved in metabolic pathways related to risk factors associated with the condition. {AVAILABILITY}: {ASICS} is distributed as an R package, available on Bioconductor. {SUPPLEMENTARY} {INFORMATION}: Supplementary data are available at Bioinformatics online. \copyright The Author(s) (2019). Published by Oxford University Press. All rights reserved. For Permissions, please email: journals.permissions@oup.com.}
}
@book{hall_2011,
title = {Annual Plant Reviews, Biology of Plant Metabolomics},
author = {Hall, and Robert, D.},
pages = {448},
publisher = {Wiley, John \& Sons, Incorporated},
year = {2011},
month = {mar},
day = {5},
urldate = {2019-04-25},
edition = {1},
isbn = {978-1-4051-9954-4},
f1000-projects = {shared citations}
}
@article{yu_2017,
title = {Evaluation and reduction of the analytical uncertainties in {GC}-{MS} analysis using a boundary regression model.},
author = {Yu, Miao and Hou, Xingwang and Liu, Qian and Wang, Yawei and Liu, Jiyan and Jiang, Guibin},
pages = {141-147},
url = {http://dx.doi.org/10.1016/j.talanta.2016.11.046},
year = {2017},
month = {mar},
day = {1},
urldate = {2019-04-26},
journal = {Talanta},
volume = {164},
doi = {10.1016/j.talanta.2016.11.046},
pmid = {28107908},
f1000-projects = {shared citations},
abstract = {The uncertainties in analysis of trace environmental pollutants may come from sample matrix and sample pretreatment process. In this study, a boundary model was developed based on the visualized data of mass spectrometry to evaluate the influences from sample matrix and pretreatment process. The factors affecting the pretreatment procedures, such as the solvents, the extraction sorbents and the extraction process had limited influences compared with matrix effects. Using such boundary model, we found that selecting suitable qualitative and quantitative ions for {MS} detector is more important for reducing the matrix effect in {GC}-{MS} analysis than the traditionally used methods of optimizing the pretreatment process since some clean up sorbents might be useless to reduce the matrix effects. As for 2,2',4,4',5-pentabromodiphenyl ether ({BDE}-99), the fragmental ions were usually used for qualitative and quantitative analysis, which however was easily affected by the matrix effects. While, molecular ions would eliminate the influences from the sample matrix. Such a model could be used to decrease the uncertainty and increase the accuracy of environmental trace analysis. Copyright \copyright 2016 Elsevier B.V. All rights reserved.}
}
@article{meringer_2011,
title = {{MS}/{MS} data improves automated determination of molecular formulas by mass spectrometry},
author = {Meringer, Markus and Reinker, Stefan and Zhang, Juan and Muller, Alban},
pages = {259-290},
url = {http://match.pmf.kg.ac.rs/electronic\_versions/Match65/n2/match65n2\_259-290.pdf},
year = {2011},
urldate = {2019-04-26},
journal = {{MATCH} Commun. Math. Comput. Chem},
volume = {65},
f1000-projects = {shared citations}
}
@article{naake_2019,
title = {{MetNet}: Metabolite Network Prediction from High-Resolution Mass Spectrometry Data in R Aiding Metabolite Annotation.},
author = {Naake, Thomas and Fernie, Alisdair R},
pages = {1768-1772},
url = {http://pubs.acs.org/doi/10.1021/acs.analchem.8b04096},
year = {2019},
month = {feb},
day = {5},
urldate = {2019-04-30},
journal = {Analytical Chemistry},
volume = {91},
number = {3},
issn = {0003-2700},
doi = {10.1021/acs.analchem.8b04096},
pmid = {30500168},
f1000-projects = {shared citations},
abstract = {A major bottleneck of mass spectrometric metabolomic analysis is still the rapid detection and annotation of unknown m/ z features across biological matrices. This kind of analysis is especially cumbersome for complex samples with hundreds to thousands of unknown features. Traditionally, the annotation was done manually imposing constraints in reproducibility and automatization. Furthermore, different analysis tools are typically used at different steps which requires parsing of data and changing of environments. We present here {MetNet}, implemented in the R programming language and available as an open-source package via the Bioconductor project. {MetNet}, which is compatible with the output of the xcms/{CAMERA} suite, uses the data-rich output of mass spectrometry metabolomics to putatively link features on their relation to other features in the data set. {MetNet} uses both structural and quantitative information on metabolomics data for network inference and enables the annotation of unknown analytes.}
}
@article{picartarmada_2017,
title = {Null diffusion-based enrichment for metabolomics data.},
author = {Picart-Armada, Sergio and Fernández-Albert, Francesc and Vinaixa, Maria and Rodríguez, Miguel A and Aivio, Suvi and Stracker, Travis H and Yanes, Oscar and Perera-Lluna, Alexandre},
pages = {e0189012},
url = {http://dx.doi.org/10.1371/journal.pone.0189012},
year = {2017},
month = {dec},
day = {6},
urldate = {2019-04-30},
journal = {Plos One},
volume = {12},
number = {12},
doi = {10.1371/journal.pone.0189012},
pmid = {29211807},
pmcid = {PMC5718512},
f1000-projects = {shared citations},
abstract = {Metabolomics experiments identify metabolites whose abundance varies as the conditions under study change. Pathway enrichment tools help in the identification of key metabolic processes and in building a plausible biological explanation for these variations. Although several methods are available for pathway enrichment using experimental evidence, metabolomics does not yet have a comprehensive overview in a network layout at multiple molecular levels. We propose a novel pathway enrichment procedure for analysing summary metabolomics data based on sub-network analysis in a graph representation of a reference database. Relevant entries are extracted from the database according to statistical measures over a null diffusive process that accounts for network topology and pathway crosstalk. Entries are reported as a sub-pathway network, including not only pathways, but also modules, enzymes, reactions and possibly other compound candidates for further analyses. This provides a richer biological context, suitable for generating new study hypotheses and potential enzymatic targets. Using this method, we report results from cells depleted for an uncharacterised mitochondrial gene using {GC} and {LC}-{MS} data and employing {KEGG} as a knowledge base. Partial validation is provided with {NMR}-based tracking of {13C} glucose labelling of these cells.}
}
@article{zuo_2016,
title = {{INDEED}: Integrated differential expression and differential network analysis of omic data for biomarker discovery.},
author = {Zuo, Yiming and Cui, Yi and Di Poto, Cristina and Varghese, Rency S and Yu, Guoqiang and Li, Ruijiang and Ressom, Habtom W},
pages = {12-20},
url = {http://dx.doi.org/10.1016/j.ymeth.2016.08.015},
year = {2016},
month = {dec},
day = {1},
urldate = {2019-05-03},
journal = {Methods},
volume = {111},
doi = {10.1016/j.ymeth.2016.08.015},
pmid = {27592383},
pmcid = {PMC5135617},
f1000-projects = {shared citations},
abstract = {Differential expression ({DE}) analysis is commonly used to identify biomarker candidates that have significant changes in their expression levels between distinct biological groups. One drawback of {DE} analysis is that it only considers the changes on single biomolecule level. Recently, differential network ({DN}) analysis has become popular due to its capability to measure the changes on biomolecular pair level. In {DN} analysis, network is typically built based on correlation and biomarker candidates are selected by investigating the network topology. However, correlation tends to generate over-complicated networks and the selection of biomarker candidates purely based on network topology ignores the changes on single biomolecule level. In this paper, we propose a novel approach, {INDEED}, that builds sparse differential network based on partial correlation and integrates {DE} and {DN} analyses for biomarker discovery. We applied this approach on real proteomic and glycomic data generated by liquid chromatography coupled with mass spectrometry for hepatocellular carcinoma ({HCC}) biomarker discovery study. For each omic data, we used one dataset to select biomarker candidates, built a disease classifier and evaluated the performance of the classifier on an independent dataset. The biomarker candidates, selected by {INDEED}, were more reproducible across independent datasets, and led to a higher classification accuracy in predicting {HCC} cases and cirrhotic controls compared with those selected by separate {DE} and {DN} analyses. {INDEED} also identified some candidates previously reported to be relevant to {HCC}, such as intercellular adhesion molecule 2 ({ICAM2}) and c4b-binding protein alpha chain ({C4BPA}), which were missed by both {DE} and {DN} analyses. In addition, we applied {INDEED} for survival time prediction based on transcriptomic data acquired by analysis of samples from breast cancer patients. We selected biomarker candidates and built a regression model for survival time prediction based on a gene expression dataset and patients' survival records. We evaluated the performance of the regression model on an independent dataset. Compared with the biomarker candidates selected by {DE} and {DN} analyses, those selected through {INDEED} led to more accurate survival time prediction. Copyright Â\copyright 2016 Elsevier Inc. All rights reserved.}
}
@article{senan_2019,
title = {{CliqueMS}: A computational tool for annotating in-source metabolite ions from {LC}-{MS} untargeted metabolomics data based on a coelution similarity network.},
author = {Senan, Oriol and Aguilar-Mogas, Antoni and Navarro, Miriam and Capellades, Jordi and Noon, Luke and Burks, Deborah and Yanes, Oscar and Guimerà, Roger and Sales-Pardo, Marta},
url = {http://dx.doi.org/10.1093/bioinformatics/btz207},
year = {2019},
month = {mar},
day = {23},
urldate = {2019-05-03},
journal = {Bioinformatics},
doi = {10.1093/bioinformatics/btz207},
pmid = {30903689},
f1000-projects = {shared citations},
abstract = {{MOTIVATION}: The analysis of biological samples in untargeted metabolomic studies using {LC}-{MS} yields tens of thousands of ion signals. Annotating these features is of the utmost importance for answering questions as fundamental as, for example, how many metabolites are there in a given sample. {RESULTS}: Here, we introduce {CliqueMS}, a new algorithm for annotating in-source {LC}-{MS1} data. {CliqueMS} is based on the similarity between coelution profiles and therefore, as opposed to most methods, allows for the annotation of a single spectrum. Furthermore, {CliqueMS} improves upon the state of the art in several dimensions: (i) it uses a more discriminatory feature similarity metric; (ii) it treats the similarities between features in a transparent way by means of a simple generative model; (iii) it uses a well-grounded maximum likelihood inference approach to group features; (iv) it uses empirical adduct frequencies to identify the parental mass; and (v) it deals more flexibly with the identification of the parental mass by proposing and ranking alternative annotations. We validate our approach with simple mixtures of standards and with real complex biological samples. {CliqueMS} reduces the thousands of features typically obtained in complex samples to hundreds of metabolites, and it is able to correctly annotate more metabolites and adducts from a single spectrum than available tools. {AVAILABILITY}: https://{CRAN}.R-project.org/package={cliqueMS} and https://github.com/osenan/{cliqueMS}. {SUPPLEMENTARY} {INFORMATION}: Supplementary data, figures and text are available at Bioinformatics online. \copyright The Author(s) 2019. Published by Oxford University Press.}
}
@article{dhrkop_2019,
title = {{SIRIUS} 4: a rapid tool for turning tandem mass spectra into metabolite structure information.},
author = {Dührkop, Kai and Fleischauer, Markus and Ludwig, Marcus and Aksenov, Alexander A and Melnik, Alexey V and Meusel, Marvin and Dorrestein, Pieter C and Rousu, Juho and Böcker, Sebastian},
pages = {299-302},
url = {http://www.nature.com/articles/s41592-019-0344-8},
year = {2019},
month = {mar},
day = {18},
urldate = {2019-05-28},
journal = {Nature Methods},
volume = {16},
number = {4},
issn = {1548-7091},
doi = {10.1038/s41592-019-0344-8},
pmid = {30886413},
f1000-projects = {shared citations},
abstract = {Mass spectrometry is a predominant experimental technique in metabolomics and related fields, but metabolite structural elucidation remains highly challenging. We report {SIRIUS} 4 (https://bio.informatik.uni-jena.de/sirius/), which provides a fast computational approach for molecular structure identification. {SIRIUS} 4 integrates {CSI}:{FingerID} for searching in molecular structure databases. Using {SIRIUS} 4, we achieved identification rates of more than 70\% on challenging metabolomics datasets.}
}
@article{fredericfournier_2017,
title = {rTANDEM},
author = {Frederic Fournier, {CharlesJoly} Beauparlant , Rene Paradis , Arnaud Droit},
url = {https://bioconductor.org/packages/{rTANDEM}},
year = {2017},
urldate = {2019-05-07},
journal = {Bioconductor},
doi = {10.18129/b9.bioc.rtandem},
f1000-projects = {shared citations}
}
@article{pascovici_2017,
title = {SwathXtend},
author = {Pascovici, J {WU} And D},
url = {https://bioconductor.org/packages/{SwathXtend}},
year = {2017},
urldate = {2019-05-07},
journal = {Bioconductor},
doi = {10.18129/b9.bioc.swathxtend},
f1000-projects = {shared citations}
}
@article{csardi_2006,
title = {The igraph software package for complex network research},
author = {Csardi, Gabor and Nepusz, Tamas},
pages = {1695},
url = {http://igraph.org},
year = {2006},
urldate = {2019-05-08},
journal = {InterJournal},
volume = {Complex Systems},
f1000-projects = {shared citations}
}
@article{odom_2019,
title = {{pathwayPCA}: an R package for integrative pathway analysis with modern {PCA} methodology and gene selection},
author = {Odom, Gabriel and Ban, Yuguang and Liu, Lizhong and Sun, Xiaodian and Pico, Alex and Zhang, Bing and Wang, Lily and Chen, Xi},
url = {http://biorxiv.org/lookup/doi/10.1101/615435},
year = {2019},
month = {apr},
day = {22},
urldate = {2019-05-08},
journal = {BioRxiv},
doi = {10.1101/615435},
f1000-projects = {shared citations},
abstract = {With the advance in high-throughput technology for molecular assays, multi-omics datasets have become increasingly available. However, most currently available pathway analysis software provide little or no functionalities for analyzing multiple types of -omics data simultaneously. In addition, most tools do not provide sample-specific estimates of pathway activities, which are important for precision medicine. To address these challenges, we present {pathwayPCA}, a unique R package for integrative pathway analysis that utilizes modern statistical methodology including supervised {PCA} and adaptive elastic-net {PCA} for principal component analysis. {pathwayPCA} can analyze continuous, binary, and survival outcomes in studies with multiple covariate and/or interaction effects. We provide three case studies to illustrate pathway analysis with gene selection, integrative analysis of multi-omics datasets to identify driver genes, estimating and visualizing sample-specific pathway activities in ovarian cancer, and identifying sex-specific pathway effects in kidney cancer. {pathwayPCA} is an open source R package, freely available to the research community. We expect {pathwayPCA} to be a useful tool for empowering the wide scientific community on the analyses and interpretation of the wealth of multi-omics data recently made available by {TCGA}, {CPTAC} and other large consortiums.}
}
@article{liggi_2018,
title = {{KniMet}: a pipeline for the processing of chromatography-mass spectrometry metabolomics data.},
author = {Liggi, Sonia and Hinz, Christine and Hall, Zoe and Santoru, Maria Laura and Poddighe, Simone and Fjeldsted, John and Atzori, Luigi and Griffin, Julian L},
pages = {52},
url = {http://dx.doi.org/10.1007/s11306-018-1349-5},
year = {2018},
month = {mar},
day = {16},
urldate = {2019-05-09},
journal = {Metabolomics : Official journal of the Metabolomic Society},
volume = {14},
number = {4},
doi = {10.1007/s11306-018-1349-5},
pmid = {29576760},
pmcid = {PMC5856871},
f1000-projects = {shared citations},
abstract = {Introduction: Data processing is one of the biggest problems in metabolomics, given the high number of samples analyzed and the need of multiple software packages for each step of the processing workflow. Objectives: Merge in the same platform the steps required for metabolomics data processing. Methods: {KniMet} is a workflow for the processing of mass spectrometry-metabolomics data based on the {KNIME} Analytics platform. Results: The approach includes key steps to follow in metabolomics data processing: feature filtering, missing value imputation, normalization, batch correction and annotation. Conclusion: {KniMet} provides the user with a local, modular and customizable workflow for the processing of both {GC}-{MS} and {LC}-{MS} open profiling data.}
}
@article{verhoeven_2018,
title = {{KIMBLE}: A versatile visual {NMR} metabolomics workbench in {KNIME}.},
author = {Verhoeven, Aswin and Giera, Martin and Mayboroda, Oleg A},
pages = {66-76},
url = {https://linkinghub.elsevier.com/retrieve/pii/S0003267018309346},
year = {2018},
month = {dec},
day = {31},
urldate = {2019-05-09},
journal = {Analytica Chimica Acta},
volume = {1044},
issn = {00032670},
doi = {10.1016/j.aca.2018.07.070},
pmid = {30442406},
f1000-projects = {shared citations},
abstract = {The problem of reproducibility of scientific research is a serious issue in biomedical sciences. In addition to experimental repeatability, limiting the (pre-) analytical variance is also essential. To address this problem in the field of metabolomics, we have designed {KIMBLE}, the {KNIME}-based Integrated {MetaBoLomics} Environment, a novel platform for the processing and analysis of {NMR} metabolomics data. It consists of an elaborate {NMR} metabolomics workflow in the {KNIME} workflow management system that handles both targeted and untargeted metabolomics. The workflow provides a self-documenting way of transforming raw time-domain {NMR} data into metabolic insights. Parameters for the quantification of a number of interesting metabolites in urine are included in the workflow, and several useful statistical analysis and visualization tools are incorporated as well. The workflow comes with an interesting sports-induced ketosis dataset so that new users can easily get acquainted with the platform. The user is free to adapt and extend the workflow to his or her personal needs. The {KIMBLE} workflow, the {KNIME} software and all the required libraries are installed in a {VirtualBox} virtual machine that allows for facile installation and use by non-experts. Copyright \copyright 2018 The Authors. Published by Elsevier B.V. All rights reserved.}
}
@article{huang_2014a,
title = {{MetaboNexus}: an interactive platform for integrated metabolomics analysis},
author = {Huang, Shao-Min and Toh, Weizhong and Benke, Peter Imre and Tan, Chuen Seng and Ong, Choon Nam},
pages = {1084-1093},
url = {http://link.springer.com/10.1007/s11306-014-0648-8},
year = {2014},
month = {dec},
urldate = {2019-05-16},
journal = {Metabolomics : Official journal of the Metabolomic Society},
volume = {10},
number = {6},
issn = {1573-3882},
doi = {10.1007/s11306-014-0648-8},
f1000-projects = {shared citations}
}
@article{treutler_2016,
title = {Discovering regulated metabolite families in untargeted metabolomics studies.},
author = {Treutler, Hendrik and Tsugawa, Hiroshi and Porzel, Andrea and Gorzolka, Karin and Tissier, Alain and Neumann, Steffen and Balcke, Gerd Ulrich},
pages = {8082-8090},
url = {http://dx.doi.org/10.1021/acs.analchem.6b01569},
year = {2016},
month = {aug},
day = {16},
urldate = {2019-08-10},
journal = {Analytical Chemistry},
volume = {88},
number = {16},
doi = {10.1021/acs.analchem.6b01569},
pmid = {27452369},
f1000-projects = {shared citations},
abstract = {The identification of metabolites by mass spectrometry constitutes a major bottleneck which considerably limits the throughput of metabolomics studies in biomedical or plant research. Here, we present a novel approach to analyze metabolomics data from untargeted, data-independent {LC}-{MS}/{MS} measurements. By integrated analysis of {MS}(1) abundances and {MS}/{MS} spectra, the identification of regulated metabolite families is achieved. This approach offers a global view on metabolic regulation in comparative metabolomics. We implemented our approach in the web application "{MetFamily}", which is freely available at http://msbi.ipb-halle.de/{MetFamily}/ . {MetFamily} provides a dynamic link between the patterns based on {MS}(1)-signal intensity and the corresponding structural similarity at the {MS}/{MS} level. Structurally related metabolites are annotated as metabolite families based on a hierarchical cluster analysis of measured {MS}/{MS} spectra. Joint examination with principal component analysis of {MS}(1) patterns, where this annotation is preserved in the loadings, facilitates the interpretation of comparative metabolomics data at the level of metabolite families. As a proof of concept, we identified two trichome-specific metabolite families from wild-type tomato Solanum habrochaites {LA1777} in a fully unsupervised manner and validated our findings based on earlier publications and with {NMR}.}
}
@misc{plotlytechnologiesinc_website_2015,
title = {Collaborative data science},
author = {Plotly Technologies Inc,},
url = {https://plot.ly},
year = {2015},
urldate = {2019-05-27},
f1000-projects = {shared citations},
type = {WEBSITE}
}
@misc{chang_website_2012,
title = {shiny: Web Application Framework for R},
author = {Chang, Winston and Cheng, Joe and Allaire, {JJ} and Xie, Yihui and {McPherson}, Jonathan},
url = {https://{CRAN}.R-project.org/package=shiny},
year = {2012},
month = {dec},
day = {1},
urldate = {2019-08-06},
f1000-projects = {shared citations},
type = {WEBSITE}
}
@book{wickham_2016,
title = {Ggplot2: Elegant Graphics For Data Analysis},
author = {Wickham, Hadley},
publisher = {Springer International Publishing :},
url = {https://ggplot2.tidyverse.org},
year = {2016},
urldate = {2019-05-27},
isbn = {978-3-319-24277-4},
f1000-projects = {shared citations}
}
@inproceedings{gray_2017,
title = {Bioschemas: From Potato Salad to Protein Annotation},
author = {Gray, Alasdair J. G. and Goble, Carole A. and Jimenez, Rafael},
editor = {Nikitina, Nadeschda and Song, Dezhao and Fokoue, Achille and Haase, Peter},
series = {{CEUR} Workshop Proceedings},
publisher = {{CEUR}-{WS}.org},
url = {http://ceur-ws.org/Vol-1963/paper579.pdf},
year = {2017},
urldate = {2019-06-03},
volume = {1963},
f1000-projects = {shared citations},
booktitle = {Proceedings of the {ISWC} 2017 Posters \& Demonstrations and Industry Tracks co-located with 16th International Semantic Web Conference ({ISWC} 2017), Vienna, Austria, October 23rd - to - 25th, 2017.}
}
@misc{rcoredevelopmentteam_website_2018,
title = {R: A Language and Environment for Statistical Computing},
author = {R Core Development Team,},
url = {https://www.R-project.org},
year = {2018},
urldate = {2019-08-06},
f1000-projects = {shared citations},
type = {WEBSITE}
}
@article{wills_2017,
title = {Assayr: A simple mass spectrometry software tool for targeted metabolic and stable isotope tracer analyses.},
author = {Wills, Jimi and Edwards-Hicks, Joy and Finch, Andrew J},
pages = {9616-9619},
url = {http://dx.doi.org/10.1021/acs.analchem.7b02401},
year = {2017},
month = {sep},
day = {19},
urldate = {2019-06-23},
journal = {Analytical Chemistry},
volume = {89},
number = {18},
doi = {10.1021/acs.analchem.7b02401},
pmid = {28850215},
pmcid = {PMC5628912},
f1000-projects = {shared citations},
abstract = {Metabolic analyses generally fall into two classes: unbiased metabolomic analyses and analyses that are targeted toward specific metabolites. Both techniques have been revolutionized by the advent of mass spectrometers with detectors that afford high mass accuracy and resolution, such as time-of-flights ({TOFs}) and Orbitraps. One particular area where this technology is key is in the field of metabolic flux analysis because the resolution of these spectrometers allows for discrimination between {13C}-containing isotopologues and those containing {15N} or other isotopes. While {XCMS}-based software is freely available for untargeted analysis of mass spectrometric data sets, it does not always identify metabolites of interest in a targeted assay. Furthermore, there is a paucity of vendor-independent software that deals with targeted analyses of metabolites and of isotopologues in particular. Here, we present {AssayR}, an R package that takes high resolution wide-scan liquid chromatography-mass spectrometry ({LC}-{MS}) data sets and tailors peak detection for each metabolite through a simple, iterative user interface. It automatically integrates peak areas for all isotopologues and outputs extracted ion chromatograms ({EICs}), absolute and relative stacked bar charts for all isotopologues, and a .csv data file. We demonstrate several examples where {AssayR} provides more accurate and robust quantitation than {XCMS}, and we propose that tailored peak detection should be the preferred approach for targeted assays. In summary, {AssayR} provides easy and robust targeted metabolite and stable isotope analyses on wide-scan data sets from high resolution mass spectrometers.}
}
@article{boettiger_2015,
title = {Building Software, Building Community: Lessons from the {rOpenSci} Project},
author = {Boettiger, Carl and Chamberlain, Scott and Hart, Edmund and Ram, Karthik},
url = {http://openresearchsoftware.metajnl.com/articles/10.5334/jors.bu/},
year = {2015},
month = {nov},
day = {16},
urldate = {2019-08-01},
journal = {Journal of open research software},
volume = {3},
issn = {2049-9647},
doi = {10.5334/jors.bu},
f1000-projects = {shared citations},
abstract = {{rOpenSci} is a developer collective originally formed in 2011 by graduate students and post-docs from ecology and evolutionary biology to collaborate on building software tools to facilitate a more open and synthetic approach in the face of transformative rise of large and heterogeneous data. Born on the internet (the collective only began through chance discussions over social media), we have grown into a widely recognized effort that supports an ecosystem of some 45 software packages, engages scores of collaborators, has taught dozens of workshops around the world, and has secured over \$480,000 in grant support. As young scientists working in an academic context largely without direct support for our efforts, we have first hand experience with most of the the technical and social challenges {W\SSSPE} seeks to address. In this paper we provide an experience report which describes our approach and success in building an effective and diverse community.}
}
@article{emwas_2019,
title = {{NMR} spectroscopy for metabolomics research.},
author = {Emwas, Abdul-Hamid and Roy, Raja and {McKay}, Ryan T and Tenori, Leonardo and Saccenti, Edoardo and Gowda, G A Nagana and Raftery, Daniel and Alahmari, Fatimah and Jaremko, Lukasz and Jaremko, Mariusz and Wishart, David S},
url = {http://dx.doi.org/10.3390/metabo9070123},
year = {2019},
month = {jun},
day = {27},
urldate = {2019-09-08},
journal = {Metabolites},
volume = {9},
number = {7},
doi = {10.3390/metabo9070123},
pmid = {31252628},
pmcid = {PMC6680826},
f1000-projects = {shared citations},
abstract = {Over the past two decades, nuclear magnetic resonance ({NMR}) has emerged as one of the three principal analytical techniques used in metabolomics (the other two being gas chromatography coupled to mass spectrometry ({GC}-{MS}) and liquid chromatography coupled with single-stage mass spectrometry ({LC}-{MS})). The relative ease of sample preparation, the ability to quantify metabolite levels, the high level of experimental reproducibility, and the inherently nondestructive nature of {NMR} spectroscopy have made it the preferred platform for long-term or large-scale clinical metabolomic studies. These advantages, however, are often outweighed by the fact that most other analytical techniques, including both {LC}-{MS} and {GC}-{MS}, are inherently more sensitive than {NMR}, with lower limits of detection typically being 10 to 100 times better. This review is intended to introduce readers to the field of {NMR}-based metabolomics and to highlight both the advantages and disadvantages of {NMR} spectroscopy for metabolomic studies. It will also explore some of the unique strengths of {NMR}-based metabolomics, particularly with regard to isotope selection/detection, mixture deconvolution via {2D} spectroscopy, automation, and the ability to noninvasively analyze native tissue specimens. Finally, this review will highlight a number of emerging {NMR} techniques and technologies that are being used to strengthen its utility and overcome its inherent limitations in metabolomic applications.}
}
@article{zaslavsky_2016,
title = {Clustering analysis of proteins from microbial genomes at multiple levels of resolution.},
author = {Zaslavsky, Leonid and Ciufo, Stacy and Fedorov, Boris and Tatusova, Tatiana},
pages = {276},
url = {http://dx.doi.org/10.1186/s12859-016-1112-8},
year = {2016},
month = {aug},
day = {31},
urldate = {2019-07-01},
journal = {{BMC} Bioinformatics},
volume = {17 Suppl 8},
doi = {10.1186/s12859-016-1112-8},
pmid = {27586436},
pmcid = {PMC5009818},
f1000-projects = {shared citations},
abstract = {{BACKGROUND}: Microbial genomes at the National Center for Biotechnology Information ({NCBI}) represent a large collection of more than 35,000 assemblies. There are several complexities associated with the data: a great variation in sampling density since human pathogens are densely sampled while other bacteria are less represented; different protein families occur in annotations with different frequencies; and the quality of genome annotation varies greatly. In order to extract useful information from these sophisticated data, the analysis needs to be performed at multiple levels of phylogenomic resolution and protein similarity, with an adequate sampling strategy. {RESULTS}: Protein clustering is used to construct meaningful and stable groups of similar proteins to be used for analysis and functional annotation. Our approach is to create protein clusters at three levels. First, tight clusters in groups of closely-related genomes (species-level clades) are constructed using a combined approach that takes into account both sequence similarity and genome context. Second, clustroids of conservative in-clade clusters are organized into seed global clusters. Finally, global protein clusters are built around the the seed clusters. We propose filtering strategies that allow limiting the protein set included in global clustering. The in-clade clustering procedure, subsequent selection of clustroids and organization into seed global clusters provides a robust representation and high rate of compression. Seed protein clusters are further extended by adding related proteins. Extended seed clusters include a significant part of the data and represent all major known cell machinery. The remaining part, coming from either non-conservative (unique) or rapidly evolving proteins, from rare genomes, or resulting from low-quality annotation, does not group together well. Processing these proteins requires significant computational resources and results in a large number of questionable clusters. {CONCLUSION}: The developed filtering strategies allow to identify and exclude such peripheral proteins limiting the protein dataset in global clustering. Overall, the proposed methodology allows the relevant data at different levels of details to be obtained and data redundancy eliminated while keeping biologically interesting variations.}
}
@article{brusco_2017,
title = {A comparison of latent class, K-means, and K-median methods for clustering dichotomous data.},
author = {Brusco, Michael J and Shireman, Emilie and Steinley, Douglas},
pages = {563-580},
url = {http://dx.doi.org/10.1037/met0000095},
year = {2017},
urldate = {2019-07-01},
journal = {Psychological methods},
volume = {22},
number = {3},
doi = {10.1037/met0000095},
pmid = {27607543},
pmcid = {PMC5982597},
f1000-projects = {shared citations},
abstract = {The problem of partitioning a collection of objects based on their measurements on a set of dichotomous variables is a well-established problem in psychological research, with applications including clinical diagnosis, educational testing, cognitive categorization, and choice analysis. Latent class analysis and K-means clustering are popular methods for partitioning objects based on dichotomous measures in the psychological literature. The K-median clustering method has recently been touted as a potentially useful tool for psychological data and might be preferable to its close neighbor, K-means, when the variable measures are dichotomous. We conducted simulation-based comparisons of the latent class, K-means, and K-median approaches for partitioning dichotomous data. Although all 3 methods proved capable of recovering cluster structure, K-median clustering yielded the best average performance, followed closely by latent class analysis. We also report results for the 3 methods within the context of an application to transitive reasoning data, in which it was found that the 3 approaches can exhibit profound differences when applied to real data. ({PsycINFO} Database Record (c) 2017 {APA}, all rights reserved).}
}
@article{mllner_2011,
title = {Modern hierarchical, agglomerative clustering algorithms},
author = {Müllner, Daniel},
year = {2011},
urldate = {2019-07-01},
journal = {arXiv},
volume = {abs/1109.2378},
f1000-projects = {shared citations}
}
@book{bishop_2006,
title = {Pattern Recognition And Machine Learning (information Science And Statistics)},
author = {Bishop, Christopher M.},
pages = {738},
publisher = {Springer},
year = {2006},
urldate = {2019-07-01},
isbn = {978-0387310732},
address = {New York},
f1000-projects = {shared citations}
}
@book{felici_2007,
title = {Mathematical Methods For Knowledge Discovery And Data Mining},
author = {Felici, Giovanni},
pages = {371},
publisher = {Idea Group Reference},
year = {2007},
urldate = {2019-07-01},
isbn = {978-1599045283},
address = {Hershey},
f1000-projects = {shared citations}
}
@book{shaw_2003,
title = {Multivariate Statistics For The Environmental Sciences (mathematics)},
author = {Shaw, Peter J. A.},
pages = {233},
publisher = {Hodder Education Publishers},
year = {2003},
urldate = {2019-07-01},
edition = {1},
isbn = {0-340-80763-6},
address = {London},
f1000-projects = {shared citations}
}
@book{manly_2017,
title = {Multivariate Statistical Methods},
author = {Manly, Bryan F.j.},
pages = {270},
publisher = {Routledge},
year = {2017},
urldate = {2019-07-01},
edition = {4},
isbn = {9781498728966},
address = {Boca Raton},
f1000-projects = {shared citations}
}
@book{wei_2006,
title = {Time Series Analysis : Univariate And Multivariate Methods (2nd Edition)},
author = {Wei, William W.s.},
pages = {640},
publisher = {Pearson},
year = {2006},
urldate = {2019-07-01},
edition = {2},
isbn = {9780321322166},
address = {Boston},
f1000-projects = {shared citations}
}
@book{Routledge_2018,
title = {Introduction to multivariate analysis},
publisher = {Routledge},
url = {https://www.taylorfrancis.com/books/9780203749999},
year = {2018},
month = {feb},
day = {19},
urldate = {2019-07-01},
isbn = {9780203749999},
doi = {10.1201/9780203749999},
f1000-projects = {shared citations}
}
@article{cai_2017,
title = {Network Marker Selection for Untargeted {LC}-{MS} Metabolomics Data.},
author = {Cai, Qingpo and Alvarez, Jessica A and Kang, Jian and Yu, Tianwei},
pages = {1261-1269},
url = {http://dx.doi.org/10.1021/acs.jproteome.6b00861},
year = {2017},
month = {mar},
day = {3},
urldate = {2019-07-01},
journal = {Journal of Proteome Research},
volume = {16},
number = {3},
doi = {10.1021/acs.jproteome.6b00861},
pmid = {28168878},
pmcid = {PMC5441461},
f1000-projects = {shared citations},
abstract = {Untargeted metabolomics using high-resolution liquid chromatography-mass spectrometry ({LC}-{MS}) is becoming one of the major areas of high-throughput biology. Functional analysis, that is, analyzing the data based on metabolic pathways or the genome-scale metabolic network, is critical in feature selection and interpretation of metabolomics data. One of the main challenges in the functional analyses is the lack of the feature identity in the {LC}-{MS} data itself. By matching mass-to-charge ratio (m/z) values of the features to theoretical values derived from known metabolites, some features can be matched to one or more known metabolites. When multiple matchings occur, in most cases only one of the matchings can be true. At the same time, some known metabolites are missing in the measurements. Current network/pathway analysis methods ignore the uncertainty in metabolite identification and the missing observations, which could lead to errors in the selection of significant subnetworks/pathways. In this paper, we propose a flexible network feature selection framework that combines metabolomics data with the genome-scale metabolic network. The method adopts a sequential feature screening procedure and machine learning-based criteria to select important subnetworks and identify the optimal feature matching simultaneously. Simulation studies show that the proposed method has a much higher sensitivity than the commonly used maximal matching approach. For demonstration, we apply the method on a cohort of healthy subjects to detect subnetworks associated with the body mass index ({BMI}). The method identifies several subnetworks that are supported by the current literature, as well as detects some subnetworks with plausible new functional implications. The R code is available at http://web1.sph.emory.edu/users/tyu8/{M\SS}.}
}
@book{legendre_2012,
title = {Numerical Ecology, Volume 24 (developments In Environmental Modelling)},
author = {Legendre, P. and Legendre, Loic F J},
pages = {1006},
publisher = {Elsevier},
year = {2012},
month = {aug},
day = {20},
urldate = {2019-07-01},
edition = {3},
isbn = {978-0-444-53868-0},
f1000-projects = {shared citations}
}
@article{stravs_2017,
title = {Exploring micropollutant biotransformation in three freshwater phytoplankton species.},
author = {Stravs, Michael A and Pomati, Francesco and Hollender, Juliane},
pages = {822-832},
url = {http://dx.doi.org/10.1039/c7em00100b},
year = {2017},
month = {jun},
day = {21},
urldate = {2019-07-02},
journal = {Environmental science. Processes \& impacts},
volume = {19},
number = {6},
doi = {10.1039/c7em00100b},
pmid = {28485428},
f1000-projects = {shared citations},
abstract = {Phytoplankton constitute an important component of surface water ecosystems; however little is known about their contribution to biotransformation of organic micropollutants. To elucidate biotransformation processes, batch experiments with two cyanobacterial species (Microcystis aeruginosa and Synechococcus sp.) and one green algal species (Chlamydomonas reinhardtii) were conducted. Twenty-four micropollutants were studied, including 15 fungicides and 9 pharmaceuticals. Online solid phase extraction ({SPE}) coupled with liquid chromatography ({LC})-high resolution tandem mass spectrometry ({HRMS}/{MS}) was used together with suspect and nontarget screening to identify transformation products ({TPs}). 14 {TPs} were identified for 9 micropollutants, formed by cytochrome P450-mediated oxidation, conjugation and methylation reactions. The observed transformation pathways included reactions likely mediated by promiscuous enzymes, such as glutamate conjugation to mefenamic acid and pterin conjugation of sulfamethoxazole. For 15 compounds, including all azole fungicides tested, no {TPs} were identified. Environmentally relevant concentrations of chemical stressors had no influence on the transformation types and rates.}
}
@article{stravs_2019,
title = {Biodiversity drives micropollutant biotransformation in freshwater phytoplankton assemblages.},
author = {Stravs, Michael A and Pomati, Francesco and Hollender, Juliane},
pages = {4265-4273},
url = {http://dx.doi.org/10.1021/acs.est.8b07018},
year = {2019},
month = {apr},
day = {16},
urldate = {2019-07-02},
journal = {Environmental Science \& Technology},
volume = {53},
number = {8},
doi = {10.1021/acs.est.8b07018},
pmid = {30871313},
f1000-projects = {shared citations},
abstract = {Biotransformation of chemical pollutants is an ecological process requiring multifunctionality (multiple metabolic pathways) and, potentially, high biodiversity. Phytoplankton communities are highly diverse functionally and taxonomically and co-occur with complex mixtures of organic pollutants in aquatic environments. Here, we investigated how phytoplankton species richness ({SR}) and class richness ({CR}) determine the biotransformation of a mixture of 37 structurally diverse pollutants using laboratory experiments and analysis of high-resolution mass spectrometry data. Laboratory phytoplankton communities were assembled from pure cultures by creating a gradient from one to five taxonomic groups, and 5 to 11 total species, in defined medium. The biotransformation of pollutants over 6 days and the total number of transformed chemicals increased with {CR} for 13 considerably transformed compounds. The total number of transformation products ({TPs}, up to 42) was positively affected by both {CR} and {SR}: {CR} had a positive effect on stable {TPs} found, and {SR} led to more transient {TPs}. Our data indicate that both taxonomic and functional diversity are important for biotransformation of anthropogenic chemicals in phytoplankton and suggest that plankton biodiversity could play a role in the remediation of pollutant loads in aquatic ecosystems.}
}
@misc{devries_website_2015,
title = {{GitHub} - andrie/cran-network-structure: Scripts used for my {UseR}!2015 presentation on the network structure of {CRAN}},
author = {de Vries, Andrie},
url = {https://github.com/andrie/cran-network-structure},
year = {2015},
urldate = {2019-07-10},
f1000-projects = {shared citations},
type = {WEBSITE}
}
@misc{spicer_website_2018,
title = {{GitHub} - {RASpicer}/{MetabolomicsTools}},
author = {Spicer, Rachel},
url = {https://github.com/{RASpicer}/{MetabolomicsTools}},
year = {2018},
month = {mar},
day = {5},
urldate = {2019-07-10},
f1000-projects = {shared citations},
type = {WEBSITE}
}
@misc{misra_website_2017,
title = {{GitHub} - biswapriyamisra/metabolomics: Tools Databases Resources in Metabolomics \& Integrated Omics in 2015-2016},
author = {Misra, Biswapriya},
url = {https://github.com/biswapriyamisra/metabolomics},
year = {2017},
month = {sep},
day = {10},
urldate = {2019-07-10},
f1000-projects = {shared citations},
type = {WEBSITE}
}
@misc{mullen_website_2019,
title = {{CRAN} Task View: Chemometrics and Computational Physics},
author = {Mullen, Katharine},
url = {https://cran.r-project.org/view={ChemPhys}},
year = {2019},
month = {jun},
day = {4},
urldate = {2019-07-10},
f1000-projects = {shared citations},
type = {WEBSITE}
}
@misc{thecomprehensiverarchivenetwork_website_nd,
title = {{CRAN} Repository Policy},
author = {The Comprehensive R Archive Network,},
url = {https://cran.r-project.org/web/packages/policies.html},
urldate = {2019-07-10},
f1000-projects = {shared citations},
type = {WEBSITE}
}
@misc{bioconductor_website_nd,
title = {Bioconductor - Developers},
author = {Bioconductor,},
url = {http://bioconductor.org/developers/},
urldate = {2019-07-10},
f1000-projects = {shared citations},
type = {WEBSITE}
}
@misc{bioconductor_website_nda,
title = {Bioconductor - {BiocViews}},
author = {Bioconductor,},
url = {http://bioconductor.org/packages/release/{BiocViews}.html\#{\_\_\_Workflow}},
urldate = {2019-07-10},
f1000-projects = {shared citations},
type = {WEBSITE}
}
@misc{devries_website_2015a,
title = {Differences in the network structure of {CRAN} and {BioConductor} (Revolutions)},
author = {de Vries, Andrie},
url = {https://blog.revolutionanalytics.com/2015/08/differences-in-the-network-structure-of-cran-and-bioconductor.html},
year = {2015},
month = {aug},
day = {12},
urldate = {2019-07-10},
f1000-projects = {shared citations},
type = {WEBSITE}
}
@misc{devries_website_2015b,
title = {The Network structure of R packages on {CRAN} \& {BioConductor}},
author = {de Vries, Andrie and Rickert, Joseph},
url = {https://www.slideshare.net/{RevolutionAnalytics}/jsm-r-pkgs-2015-0809},
year = {2015},
month = {aug},
day = {11},
urldate = {2019-07-10},
f1000-projects = {shared citations},
type = {WEBSITE}
}
@misc{neumann_website_2019,
title = {{GitHub} - sneumann/{metaRbolomics}: Metabolomics in R and Bioconductor},
author = {Neumann, Steffen},
url = {http://github.com/sneumann/{metaRbolomics}},
year = {2019},
urldate = {2019-07-10},
f1000-projects = {shared citations},
type = {WEBSITE}
}
@article{martinmorgan_2017,
title = {SummarizedExperiment},
author = {Martin Morgan, Valerie Obenchain},
url = {https://bioconductor.org/packages/{SummarizedExperiment}},
year = {2017},
urldate = {2019-07-10},
journal = {Bioconductor},
doi = {10.18129/b9.bioc.summarizedexperiment},
f1000-projects = {shared citations}
}
@misc{hoffmann_website_2019,
title = {{GitHub} - lifs-tools/{rmzTab}-m: The R-language bindings for {mzTab}-M},
author = {Hoffmann, Nils},
url = {https://github.com/lifs-tools/{rmzTab}-m},
year = {2019},
urldate = {2019-07-10},
f1000-projects = {shared citations},
type = {WEBSITE}
}
@inproceedings{aalizadeh_2017,
title = {{AutoSuspect} : an R package to Perform Automatic Suspect Screening based on Regulatory Databases},
author = {Aalizadeh, Reza and Schymanski, Emma L and Thomaidis, Nikolaos S},
year = {2017},
urldate = {2019-07-18},
f1000-projects = {shared citations}
}
@article{chen_2018,
title = {Chemometric methods to quantify {1D} and {2D} {NMR} spectral differences among similar protein therapeutics.},
author = {Chen, Kang and Park, Junyong and Li, Feng and Patil, Sharadrao M and Keire, David A},
pages = {1011-1019},
url = {http://dx.doi.org/10.1208/s12249-017-0911-1},
year = {2018},
month = {apr},
urldate = {2019-07-22},
journal = {{AAPS} {PharmSciTech}},
volume = {19},
number = {3},
doi = {10.1208/s12249-017-0911-1},
pmid = {29110294},
f1000-projects = {shared citations},
abstract = {{NMR} spectroscopy is an emerging analytical tool for measuring complex drug product qualities, e.g., protein higher order structure ({HOS}) or heparin chemical composition. Most drug {NMR} spectra have been visually analyzed; however, {NMR} spectra are inherently quantitative and multivariate and thus suitable for chemometric analysis. Therefore, quantitative measurements derived from chemometric comparisons between spectra could be a key step in establishing acceptance criteria for a new generic drug or a new batch after manufacture change. To measure the capability of chemometric methods to differentiate comparator {NMR} spectra, we calculated inter-spectra difference metrics on {1D}/{2D} spectra of two insulin drugs, Humulin R\textregistered and Novolin R\textregistered, from different manufacturers. Both insulin drugs have an identical drug substance but differ in formulation. Chemometric methods (i.e., principal component analysis ({PCA}), 3-way Tucker3 or graph invariant ({GI})) were performed to calculate Mahalanobis distance (D M) between the two brands (inter-brand) and distance ratio (D R) among the different lots (intra-brand). The {PCA} on {1D} inter-brand spectral comparison yielded a D M value of 213. In comparing {2D} spectra, the Tucker3 analysis yielded the highest differentiability value (D M = 305) in the comparisons made followed by {PCA} (D M = 255) then the {GI} method (D M = 40). In conclusion, drug quality comparisons among different lots might benefit from {PCA} on {1D} spectra for rapidly comparing many samples, while higher resolution but more time-consuming {2D}-{NMR}-data-based comparisons using Tucker3 analysis or {PCA} provide a greater level of assurance for drug structural similarity evaluation between drug brands.}
}
@article{shinzawa_2012,
title = {Parallel factor ({PARAFAC}) kernel analysis of temperature- and composition-dependent {NMR} spectra of poly(lactic acid) nanocomposites.},
author = {Shinzawa, Hideyuki and Nishida, Masakazu and Kanematsu, Wataru and Tanaka, Toshiyuki and Suzuki, Kenzi and Noda, Isao},
pages = {1913-1921},
url = {http://dx.doi.org/10.1039/c2an16019f},
year = {2012},
month = {apr},
day = {21},
urldate = {2019-07-22},
journal = {The Analyst},
volume = {137},
number = {8},
doi = {10.1039/c2an16019f},
pmid = {22382744},
f1000-projects = {shared citations},
abstract = {The parallel factor ({PARAFAC}) kernel matrix to analyze a sample system stimulated by more than one type of perturbation is described. {PARAFAC} kernel is a quantitative representation of the synchronicity and asynchronicity observed within the {PARAFAC} score matrices generated by carrying out two-dimensional ({2D}) correlation analyses. Thus, kernel matrix representation provides more intuitively understandable interpretation to the conventional {PARAFAC} trilinear model. In this study, the utility of {PARAFAC} kernel is demonstrated by the study of poly(lactic acid)-nanocomposite undergoing a structural change depending on the temperature as well as the clay content in the sample. Seemingly complicated variation of nuclear magnetic resonance ({NMR}) spectra induced by the change in the temperature and clay content are readily analyzed by the multiple-perturbation {2D} correlation spectroscopy and {PARAFAC} kernel. {PARAFAC} kernel revealed that crystalline and amorphous structures of the {PLA} substantially undergo thermal deformation, and these variations are also influenced by the presence of the clay.}
}
@article{landrum_2016,
title = {{RDKit}: Open-Source Cheminformatics Software},
author = {Landrum, Greg},
url = {https://github.com/rdkit/rdkit/},
year = {2016},
urldate = {2019-07-22},
f1000-projects = {shared citations}
}
@article{zhang_2018,
title = {Ramp: A comprehensive relational database of metabolomics pathways for pathway enrichment analysis of genes and metabolites.},
author = {Zhang, Bofei and Hu, Senyang and Baskin, Elizabeth and Patt, Andrew and Siddiqui, Jalal K and Mathé, Ewy A},
url = {http://dx.doi.org/10.3390/metabo8010016},
year = {2018},
month = {feb},
day = {22},
urldate = {2019-07-22},
journal = {Metabolites},
volume = {8},
number = {1},
doi = {10.3390/metabo8010016},
pmid = {29470400},
pmcid = {PMC5876005},
f1000-projects = {shared citations},
abstract = {The value of metabolomics in translational research is undeniable, and metabolomics data are increasingly generated in large cohorts. The functional interpretation of disease-associated metabolites though is difficult, and the biological mechanisms that underlie cell type or disease-specific metabolomics profiles are oftentimes unknown. To help fully exploit metabolomics data and to aid in its interpretation, analysis of metabolomics data with other complementary omics data, including transcriptomics, is helpful. To facilitate such analyses at a pathway level, we have developed {RaMP} (Relational database of Metabolomics Pathways), which combines biological pathways from the Kyoto Encyclopedia of Genes and Genomes ({KEGG}), Reactome, {WikiPathways}, and the Human Metabolome {DataBase} ({HMDB}). To the best of our knowledge, an off-the-shelf, public database that maps genes and metabolites to biochemical/disease pathways and can readily be integrated into other existing software is currently lacking. For consistent and comprehensive analysis, {RaMP} enables batch and complex queries (e.g., list all metabolites involved in glycolysis and lung cancer), can readily be integrated into pathway analysis tools, and supports pathway overrepresentation analysis given a list of genes and/or metabolites of interest. For usability, we have developed a {RaMP} R package (https://github.com/Mathelab/{RaMP}-{DB}), including a user-friendly {RShiny} web application, that supports basic simple and batch queries, pathway overrepresentation analysis given a list of genes or metabolites of interest, and network visualization of gene-metabolite relationships. The package also includes the raw database file (mysql dump), thereby providing a stand-alone downloadable framework for public use and integration with other tools. In addition, the Python code needed to recreate the database on another system is also publicly available (https://github.com/Mathelab/{RaMP}-{BackEnd}). Updates for databases in {RaMP} will be checked multiple times a year and {RaMP} will be updated accordingly.}
}
@article{goble_2019,
title = {{FAIR} Computational Workflows},
author = {Goble, Carole and Cohen-Boulakia, Sarah and Soiland-Reyes, Stian and Garijo, Daniel and Gil, Yolanda and Crusoe, Michael R. and Peters, Kristian and Schober, Daniel},
url = {http://www.data-intelligence-journal.org/p/45/},
journal = {Data Intelligence},
volume = {2},
number = {1},
pages = {108-121},
year = {2020},
doi = {10.1162/dint_a_00033},
f1000-projects = {shared citations},
abstract = {Computational workflows describe the complex multi-step methods that are used for data collection, data preparation, analytics, predictive modelling, and simulation that lead to new data products. They can inherently contribute to the {FAIR} data principles: by processing data according to established metadata; by creating metadata themselves during the processing of data; and by tracking and recording data provenance. These properties aid data quality assessment and contribute to secondary data usage. Moreover, workflows are digital objects in their own right. This paper argues that {FAIR} principles for workflows need to address their specific nature in terms of their composition of executable software steps, their provenance, and their development.}
}
@article{askenazi_2011,
title = {{ARISTO}: ontological classification of small molecules by electron ionization-mass spectrometry.},
author = {Askenazi, Manor and Linial, Michal},
pages = {W505-10},
url = {http://dx.doi.org/10.1093/nar/gkr403},
year = {2011},
month = {jul},
urldate = {2019-07-23},
journal = {Nucleic Acids Research},
volume = {39},
number = {Web Server issue},
doi = {10.1093/nar/gkr403},
pmid = {21622952},
pmcid = {PMC3125788},
f1000-projects = {shared citations},
abstract = {Gas chromatography-mass spectrometry ({GC}-{MS}) acquisitions routinely yield hundreds to thousands of Electron Ionization ({EI}) mass spectra. The chemical identification of these spectra typically involves a search protocol that seeks an exact match to a reference spectrum. Reference spectra are found in comprehensive libraries of small molecule {EI} spectra curated by commercial and public entities. We developed {ARISTO} (Automatic Reduction of Ion Spectra To Ontology), a webtool, which provides information regarding the general chemical nature of the compound underlying an input {EI} mass spectrum. Importantly, {ARISTO} can provide such annotation without necessitating an exact match to a specific compound. {ARISTO} provides assignments to a subset of the {ChEBI} (Chemical Entities of Biological Interest) dictionary, an ontology, which aims to cover biologically relevant small molecules. Our system takes as input a mass spectrum represented as a series of mass and intensity pairs; the system returns a graphical representation of the supported ontology as well as a detailed table of suggested annotations along with their associated statistical evidence. {ARISTO} is accessible at this {URL}: http://www.ionspectra.org/aristo. The system is free, open to all and does not require registration of any sort.}
}
@article{szcs_2015,
title = {Webchem: Zenodo Release},
author = {Szöcs, Eduard},
url = {https://zenodo.org/record/33823},
year = {2015},
urldate = {2019-07-23},
journal = {Zenodo},
doi = {10.5281/zenodo.33823},
f1000-projects = {shared citations},
abstract = {zenodo release}
}
@article{heinrich_2018,
title = {Correcting for natural isotope abundance and tracer impurity in {MS}-, {MS}/{MS}- and high-resolution-multiple-tracer-data from stable isotope labeling experiments with {IsoCorrectoR}.},
author = {Heinrich, Paul and Kohler, Christian and Ellmann, Lisa and Kuerner, Paul and Spang, Rainer and Oefner, Peter J and Dettmer, Katja},
pages = {17910},
url = {http://dx.doi.org/10.1038/s41598-018-36293-4},
year = {2018},
month = {dec},
day = {17},
urldate = {2019-07-24},
journal = {Scientific reports},
volume = {8},
number = {1},
doi = {10.1038/s41598-018-36293-4},
pmid = {30559398},
pmcid = {PMC6297158},
f1000-projects = {shared citations},
abstract = {Experiments with stable isotope tracers such as {13C} and {15N} are increasingly used to gain insights into metabolism. However, mass spectrometric measurements of stable isotope labeling experiments should be corrected for the presence of naturally occurring stable isotopes and for impurities of the tracer substrate. Here, we analyzed the effect that such correction has on the data: omitting correction or performing invalid correction can result in largely distorted data, potentially leading to misinterpretation. {IsoCorrectoR} is the first R-based tool to offer said correction capabilities. It is easy-to-use and comprises all correction features that comparable tools can offer in a single solution: correction of {MS} and {MS}/{MS} data for natural stable isotope abundance and tracer impurity, applicability to any tracer isotope and correction of multiple-tracer data from high-resolution measurements. {IsoCorrectoR}'s correction performance agreed well with manual calculations and other available tools including Python-based {IsoCor} and Perl-based {ICT}. {IsoCorrectoR} can be downloaded as an R-package from: http://bioconductor.org/packages/release/bioc/html/{IsoCorrectoR}.html .}
}
@article{conley_2014,
title = {Massifquant: open-source Kalman filter-based {XC}-{MS} isotope trace feature detection.},
author = {Conley, Christopher J and Smith, Rob and Torgrip, Ralf J O and Taylor, Ryan M and Tautenhahn, Ralf and Prince, John T},
pages = {2636-2643},
url = {http://dx.doi.org/10.1093/bioinformatics/btu359},
year = {2014},
month = {sep},
day = {15},
urldate = {2019-07-24},
journal = {Bioinformatics},
volume = {30},
number = {18},
doi = {10.1093/bioinformatics/btu359},
pmid = {24872423},
f1000-projects = {shared citations},
abstract = {{MOTIVATION}: Isotope trace ({IT}) detection is a fundamental step for liquid or gas chromatography mass spectrometry ({XC}-{MS}) data analysis that faces a multitude of technical challenges on complex samples. The Kalman filter ({KF}) application to {IT} detection addresses some of these challenges; it discriminates closely eluting {ITs} in the m/z dimension, flexibly handles heteroscedastic m/z variances and does not bin the m/z axis. Yet, the behavior of this {KF} application has not been fully characterized, as no cost-free open-source implementation exists and incomplete evaluation standards for {IT} detection persist. {RESULTS}: Massifquant is an open-source solution for {KF} {IT} detection that has been subjected to novel and rigorous methods of performance evaluation. The presented evaluation with accompanying annotations and optimization guide sets a new standard for comparative {IT} detection. Compared with {centWave}, {matchedFilter} and {MZMine2}-alternative {IT} detection engines-Massifquant detected more true {ITs} in a real {LC}-{MS} complex sample, especially low-intensity {ITs}. It also offers competitive specificity and equally effective quantitation accuracy. {AVAILABILITY} {AND} {IMPLEMENTATION}: Massifquant is integrated into {XCMS} with {GPL} license ≥ 2.0 and hosted by Bioconductor: http://bioconductor.org. Annotation data are archived at http://hdl.lib.byu.edu/1877/3232. Parameter optimization code and documentation is hosted at https://github.com/topherconley/optimize-it. \copyright The Author 2014. Published by Oxford University Press. All rights reserved. For Permissions, please e-mail: journals.permissions@oup.com.}
}
@article{moritz_2017,
title = {{imputeTS}: Time Series Missing Value Imputation in R},
author = {Moritz, Steffen and Bartz-Beielstein, Thomas},
pages = {207},
url = {https://journal.r-project.org/archive/2017/{RJ}-2017-009/index.html},
year = {2017},
urldate = {2019-07-29},
journal = {The R journal},
volume = {9},
number = {1},
issn = {2073-4859},
doi = {10.32614/{RJ}-2017-009},
f1000-projects = {shared citations}
}
@incollection{weihs_2005,
booktitle = {Data analysis and decision support},
title = {{klaR} Analyzing German Business Cycles},
author = {Weihs, Claus and Ligges, Uwe and Luebke, Karsten and Raabe, Nils},
editor = {Baier, Daniel and Decker, Reinhold and Schmidt-Thieme, Lars},
pages = {335-343},
publisher = {Springer-Verlag},
url = {http://link.springer.com/10.1007/3-540-28397-8\_36},
year = {2005},
urldate = {2019-07-30},
isbn = {3-540-26007-2},
doi = {10.1007/3-540-28397-8\_36},
address = {Berlin/Heidelberg},
f1000-projects = {shared citations}
}
@article{kowarik_2016,
title = {Imputation with {theR} {PackageVIM}},
author = {Kowarik, Alexander and Templ, Matthias},
pages = {1-16},
url = {http://www.jstatsoft.org/v74/i07/},
year = {2016},
urldate = {2019-07-30},
journal = {Journal of statistical software},
volume = {74},
number = {7},
issn = {1548-7660},
doi = {10.18637/jss.v074.i07},
f1000-projects = {shared citations},
abstract = {The package {VIM} (Templ, Alfons, Kowarik, and Prantner 2016) is developed to explore and analyze the structure of missing values in data using visualization methods, to impute these missing values with the built-in imputation methods and to verify the imputation process using visualization tools, as well as to produce high-quality graphics for publications. This article focuses on the different imputation techniques available in the package. Four different imputation methods are currently implemented in {VIM}, namely hot-deck imputation, k-nearest neighbor imputation, regression imputation and iterative robust model-based imputation (Templ, Kowarik, and Filzmoser 2011). All of these methods are implemented in a flexible manner with many options for customization. Furthermore in this article practical examples are provided to highlight the use of the implemented methods on real-world applications. In addition, the graphical user interface of {VIM} has been re-implemented from scratch resulting in the package {VIMGUI} (Schopfhauser, Templ, Alfons, Kowarik, and Prantner 2016) to enable users without extensive R skills to access these imputation and visualization methods.}
}
@article{su_2011,
title = {Multiple Imputation with Diagnostics (mi ) {inR} : Opening Windows into the Black Box},
author = {Su, Yu-Sung and Gelman, Andrew and Hill, Jennifer and Yajima, Masanao},
url = {http://www.jstatsoft.org/v45/i02/},
year = {2011},
urldate = {2019-07-30},
journal = {Journal of statistical software},
volume = {45},
number = {2},
issn = {1548-7660},
doi = {10.18637/jss.v045.i02},
f1000-projects = {shared citations}
}
@misc{na_website_nd,
title = {{Te\SS} (Training {eSupport} System)},
url = {https://tess.oerc.ox.ac.uk/},
urldate = {2019-07-30},
f1000-projects = {shared citations},
type = {WEBSITE}
}
@misc{na_website_nda,
title = {Attempt to add {BioSchemas}.org {JSON}-{LD} to the vignette {HTML} · bridgedb/{BridgeDbR}@40e741a · {GitHub}},
url = {https://github.com/bridgedb/{BridgeDbR}/commit/40e741aed77765572e77f84f9fea0e0fb511d3f0},
urldate = {2019-07-30},
f1000-projects = {shared citations},
type = {WEBSITE}
}
@misc{na_website_ndb,
title = {Added template for {BioSchemas} Tool annotation by egonw · Pull Request \#25 · Bioconductor/bioconductor.org · {GitHub}},
url = {https://github.com/Bioconductor/bioconductor.org/pull/25},
urldate = {2019-07-30},
f1000-projects = {shared citations},
type = {WEBSITE}
}
@misc{thenationalinstituteofstandardsandtechnology_website_2019,
title = {{AMDIS} download},
author = {The National Institute of Standards and Technology,},
url = {https://chemdata.nist.gov/dokuwiki/doku.php?id=chemdata:downloads:start\#amdis},
year = {2019},
month = {feb},
day = {11},
urldate = {2019-08-01},
f1000-projects = {shared citations},
type = {WEBSITE}
}
@misc{thenationalinstituteofstandardsandtechnology_website_2012,
title = {Library Conversion Tool},
author = {The National Institute of Standards and Technology,},
url = {https://chemdata.nist.gov/mass-spc/ms-search/Library\_conversion\_tool.html},
year = {2012},
month = {may},
day = {7},
urldate = {2019-08-01},
f1000-projects = {shared citations},
type = {WEBSITE}
}
@misc{bioconductor_website_ndb,
title = {Bioconductor - {BiocViews}: Packages found under {StatisticalMethod}},
author = {Bioconductor,},
url = {http://bioconductor.org/packages/release/{BiocViews}.html\#{\_\_\_StatisticalMethod}},
urldate = {2019-08-01},
f1000-projects = {shared citations},
type = {WEBSITE}
}
@misc{groemping_website_2019,
title = {{CRAN} Task View: Design of Experiments ({DoE}) \& Analysis of Experimental Data},
author = {Groemping, Ulrike},
url = {https://cran.r-project.org/web/views/{ExperimentalDesign}.html},
year = {2019},
month = {may},
day = {14},
urldate = {2019-08-01},
f1000-projects = {shared citations},
type = {WEBSITE}
}
@misc{leisch_website_2019,
title = {{CRAN} Task View: Cluster Analysis \& Finite Mixture Models},
author = {Leisch, Friedrich and Gruen, Bettina},
url = {https://cran.r-project.org/web/views/Cluster.html},
year = {2019},
month = {jul},
day = {2},
urldate = {2019-08-01},
f1000-projects = {shared citations},
type = {WEBSITE}
}
@misc{hewson_website_2018,
title = {{CRAN} Task View: Multivariate Statistics},
author = {Hewson, Paul},
url = {https://cran.r-project.org/web/views/Multivariate.html},
year = {2018},
month = {jul},
day = {21},
urldate = {2019-08-01},
f1000-projects = {shared citations},
type = {WEBSITE}
}
@misc{hothorn_website_2019,
title = {{CRAN} Task View: Machine Learning \& Statistical Learning},
author = {Hothorn, Torsten},
url = {https://cran.r-project.org/web/views/{MachineLearning}.html},
year = {2019},
month = {jun},
day = {7},
urldate = {2019-08-01},
f1000-projects = {shared citations},
type = {WEBSITE}
}
@misc{thecomprehensiverarchivenetwork_website_nda,
title = {{CRAN} Task Views},
author = {The Comprehensive R Archive Network,},
url = {https://cran.r-project.org/web/views/},
urldate = {2019-08-01},
f1000-projects = {shared citations},
type = {WEBSITE}
}
@misc{srcinc_website_nd,
title = {Scientific Databases},
author = {{SRC}, Inc.,},
url = {https://www.srcinc.com/what-we-do/environmental/scientific-databases.html},
urldate = {2019-08-01},
f1000-projects = {shared citations},
type = {WEBSITE}
}
@misc{ncicaddgroup_website_nd,
title = {{NCI}/{CADD} Chemical Identifier Resolver},
author = {{NCI}/{CADD} Group,},
url = {https://cactus.nci.nih.gov/chemical/structure},
urldate = {2019-08-01},
f1000-projects = {shared citations},
type = {WEBSITE}
}
@misc{massbankofnorthamerica_website_nd,
title = {{MoNA} downloads},
author = {{MassBank} of North America,},
url = {http://mona.fiehnlab.ucdavis.edu/downloads},
urldate = {2019-08-01},
f1000-projects = {shared citations},
type = {WEBSITE}
}
@misc{metabohub_website_2019,
title = {National infrastructure in metabolomics and fluxomics},
author = {Metabohub,},
url = {https://www.metabohub.fr/home.html},
year = {2019},
urldate = {2019-08-01},
f1000-projects = {shared citations},
type = {WEBSITE}
}
@article{tran_2018,
title = {Condition-specific series of metabolic sub-networks and its application for gene set enrichment analysis.},
author = {Tran, Van Du T and Moretti, Sébastien and Coste, Alix T and Amorim-Vaz, Sara and Sanglard, Dominique and Pagni, Marco},
url = {http://fdslive.oup.com/www.oup.com/pdf/production\_in\_progress.pdf},
year = {2018},
month = {nov},
day = {16},
urldate = {2019-08-01},
journal = {Bioinformatics},
issn = {1367-4803},
doi = {10.1093/bioinformatics/bty929},
pmid = {30445518},
pmcid = {PMC6596900},
f1000-projects = {shared citations},
abstract = {Motivation: Genome-scale metabolic networks and transcriptomic data represent complementary sources of knowledge about an organism's metabolism, yet their integration to achieve biological insight remains challenging. Results: We investigate here condition-specific series of metabolic sub-networks constructed by successively removing genes from a comprehensive network. The optimal order of gene removal is deduced from transcriptomic data. The sub-networks are evaluated via a fitness function, which estimates their degree of alteration. We then consider how a gene set, i.e. a group of genes contributing to a common biological function, is depleted in different series of sub-networks to detect the difference between experimental conditions. The method, named {metaboGSE}, is validated on public data for Yarrowia lipolytica and mouse. It is shown to produce {GO} terms of higher specificity compared to popular gene set enrichment methods like {GSEA} or {topGO}. Availability: The {metaboGSE} R package is available at https://cran.r-project.org/web/packages/{metaboGSE}. Supplementary information: Supplementary data are available at Bioinformatics online.}
}
@misc{workflow4metabolomics_website_nd,
title = {Referenced {W4M} Histories \textbar workflow4metabolomics.org},
author = {Workflow4metabolomics,},
url = {https://workflow4metabolomics.org/{referenced\_W4M\_histories}},
urldate = {2019-08-01},
f1000-projects = {shared citations},
type = {WEBSITE}
}
@book{wehrens_2019,
title = {Metabolomics: Practical Guide to Design and Analysis},
editor = {Wehrens, Ron and Salek, Reza},
series = {Chapman \& Hall/{CRC} Mathematical and Computational Biology},
publisher = {Chapman and Hall/{CRC}},
url = {https://www.crcpress.com/Metabolomics-Practical-Guide-to-Design-and-Analysis/Wehrens-Salek/p/book/9781498725262},
year = {2019},
urldate = {2019-08-02},
isbn = {1498725260},
f1000-projects = {shared citations}
}
@misc{witting_website_nd,
title = {{GitHub} - michaelwitting/ms2dbworkflow},
author = {Witting, Michael},
url = {https://github.com/michaelwitting/ms2dbworkflow},
urldate = {2019-08-07},
f1000-projects = {shared citations},
type = {WEBSITE}
}
@misc{ram_website_nd,
title = {Configure Your R Project for binderhub • hole punch},
author = {Ram, Karthik},
url = {https://karthik.github.io/holepunch/},
urldate = {2019-08-08},
f1000-projects = {shared citations},
type = {WEBSITE}
}
@article{yu_2019,
title = {Structure/reaction directed analysis for {LC}-{MS} based untargeted analysis.},
author = {Yu, Miao and Olkowicz, Mariola and Pawliszyn, Janusz},
pages = {16-24},
url = {https://linkinghub.elsevier.com/retrieve/pii/S0003267018313047},
year = {2019},
month = {mar},
day = {7},
urldate = {2019-09-04},
journal = {Analytica Chimica Acta},
volume = {1050},
issn = {00032670},
doi = {10.1016/j.aca.2018.10.062},
pmid = {30661584},
f1000-projects = {shared citations},
abstract = {In {LC}-{MS} based untargeted analysis, data is collected at the peak or ion level, although the investigated biochemistry processes occur at the compound or reaction level. To this end, the presence of redundancy peaks such as co-eluted peaks, multi-chargers, adducts, neutral loss, isotopologues, and fragments ions often muddle subsequent statistical data analysis. In order to fill this gap, between peaks and compounds/reactions, independent components must first be found at the peak level, then evaluated at the compound or reaction levels. Based on paired mass distances ({PMD}), the algorithm {GlobalStd}, based on retention time hierarchical cluster analysis and global analysis of {PMDs} within clusters, is here proposed to extract independent peaks from raw {LC}-{MS} data. Following its application, a structure/reaction directed analysis can then be used to evaluate compounds at the structure or biochemistry reaction level, based on similar {PMDs} among different retention times clusters. As a proof-of-concept, the developed statistical method was applied to data obtained for in vivo {SPME} sampling on fish. In total, 277 independent peaks were demonstrated to stand for most of the variances found for the total 1459 ions detected via {LC}-{MS}. Following, both known homologous series or biological reactions along with unknown bio-processes, which may involve oxidation/reduction reactions or homologous series, were analyzed via a structure/reaction directed analysis. The findings of this analysis yielded interesting information regarding the data, for instance denoting the possible occurrence of a biosynthesis process involving l-Carnitine and its precursor 4-Trimethylammoniobutanoic acid. Such {PMD} relationships could also aid in the screening of annotation results. To this end, semi-quantitative analysis based on structure/reaction directed analysis is also here proposed for further investigation of unknown patterns or for removal of contaminants in metabolomics studies. The developed data-driven algorithm has been included in a {PMD} package with a {GUI} interactive document, and is freely available online (https://github.com/yufree/pmd). Copyright \copyright 2018 Elsevier B.V. All rights reserved.}
}
@article{yin_2019,
title = {{DecoMetDIA}: Deconvolution of Multiplexed {MS}/{MS} Spectra for Metabolite Identification in {SWATH}-{MS} based Untargeted Metabolomics.},
author = {Yin, Yandong and Wang, Ruohong and Cai, Yuping and Wang, Zhuozhong and Zhu, Zheng-Jiang},
url = {http://dx.doi.org/10.1021/acs.analchem.9b02655},
year = {2019},
month = {aug},
day = {22},
urldate = {2019-09-05},
journal = {Analytical Chemistry},
doi = {10.1021/acs.analchem.9b02655},
pmid = {31436405},
f1000-projects = {shared citations},
abstract = {{SWATH}-{MS} based data independent acquisition mass spectrometry ({DIA}-{MS}) technology has been recently developed for untargeted metabolomics due to its capability to acquire all {MS2} spectra and high quantitative accuracy. However, software tools for deconvolving multiplexed {MS}/{MS} spectra from {SWATH}-{MS} with high efficiency and high quality are still lacking in untargeted metabolomics. Here, we developed a new software tool, namely, {DecoMetDIA}, to deconvolve multiplexed {MS}/{MS} spectra for metabolite identification and support the {SWATH} based untargeted metabolomics. In {DecoMetDIA}, it selected multiple model peaks to model the co-eluted and unresolved chromatographic peaks of fragment ions in multiplexed spectra, and decomposed them into a linear combination of the model peaks. {DecoMetDIA} enabled to reconstruct the {MS2} spectra of metabolites from a variety of different biological samples with high coverages. We also demonstrated that the deconvolved {MS2} spectra from {DecoMetDIA} were of high accuracy through the comparison to the experimental {MS2} spectra from data dependent acquisition ({DDA}). Finally, about 90\% of deconvolved {MS2} spectra in various biological samples were successfully annotated using software tools such as {MetDNA} and Sirius. The results demonstrated that the deconvolved {MS2} spectra obtained from {DecoMetDIA} were accurate and valid for metabolite identification and structural elucidation. The comparison of {DecoMetDIA} to other deconvolution software such as {MS}-{DIAL} demonstrated that it performs very well for small polar metabolites. The {DecoMetDIA} software is freely available on the Internet (https://github.com/{ZhuMSLab}/{DecoMetDIA}).}
}
@article{caldernsantiago_2017,
title = {{MetaboQC}: A tool for correcting untargeted metabolomics data with mass spectrometry detection using quality controls.},
author = {Calderón-Santiago, Mónica and López-Bascón, María A and Peralbo-Molina, Ángela and Priego-Capote, Feliciano},
pages = {29-37},
url = {http://dx.doi.org/10.1016/j.talanta.2017.05.076},
year = {2017},
month = {nov},
day = {1},
urldate = {2019-09-04},
journal = {Talanta},
volume = {174},
doi = {10.1016/j.talanta.2017.05.076},
pmid = {28738582},
f1000-projects = {shared citations},
abstract = {Nowadays most metabolomic studies involve the analysis of large sets of samples to find a representative metabolite pattern associated to the factor under study. During a sequence of analyses the instrument signals can be subjected to the influence of experimental variability sources. Implementation of quality control ({QC}) samples to check the contribution of experimental variability is the most common approach in metabolomics. This practice is based on the filtration of molecular entities experiencing a variation coefficient higher than that measured in the {QC} data set. Although other robust correction algorithms have been proposed, none of them has provided an easy-to-use and easy-to-install tool capable of correcting experimental variability sources. In this research an R-package -the {MetaboQC}- has been developed to correct intra-day and inter-days variability using {QCs} analyzed within a pre-set sequence of experiments. {MetaboQC} has been tested in two data sets to assess the correction effects by comparing the metabolites variability before and after application of the proposed tool. As a result, the number of entities in {QCs} significantly different between days was reduced from 86\% to 19\% in the negative ionization mode and from 100\% to 13\% in the positive ionization mode. Furthermore, principal component analysis allowed detecting the filtration of instrumental variability associated to the injection order. Copyright \copyright 2017 Elsevier B.V. All rights reserved.}
}
@article{wolthuis_2019,
title = {{MetaboShiny} - interactive processing, analysis and identification of untargeted metabolomics data},
author = {Wolthuis, Joanna C. and Magnusdottir, Stefania and Pras-Raves, Mia and Jans, Judith J.M. and Burgering, Boudewijn and van Mil, Saskia and de Ridder, Jeroen},
url = {http://biorxiv.org/lookup/doi/10.1101/734236},
year = {2019},
month = {aug},
day = {15},
urldate = {2019-09-03},
journal = {BioRxiv},
doi = {10.1101/734236},
f1000-projects = {shared citations},
abstract = {Untargeted metabolomics by mass spectrometry in the form of mass over charge and intensity of ions, provides insight into the metabolic activity in a sample and is therefore essential to understand regulation and expression at the protein and transcription level. Problematically, it is often challenging to analyze untargeted metabolomics data as many m/z values are detected per sample and it is difficult to identify what compound they represent. We aimed to facilitate the process of finding m/z biomarkers through statistical analysis, machine learning and searching for their putative identities. To address this challenge, we developed {MetaboShiny}, a novel R and {RShiny} based metabolomics data analysis package. {MetaboShiny} features bi/multivariate and temporal statistics, an extensive machine learning module, interactive plotting and result exploration, and compound identification through a variety of chemical databases. As a result, {MetaboShiny} enables rapid and rigorous analysis of untargeted metabolomics data as well as target identification at unprecedented scale. To demonstrate its efficacy and ease-of-use, we apply {MetaboShiny} to a publicly accessible metabolomics dataset generated from the urine of smokers and non-smokers. Replication of the main results of the original publication, which includes importing, normalization and several statistical analyses, is achieved within minutes. Moreover, {MetaboShiny} enables deeper exploration of the data thereby revealing novel putative biomarkers and hypotheses. For instance, by using {MetaboShiny\textquoterights} subsetting feature, iodine is found to be significantly increased in non-smoking lung cancer patients. Furthermore, by allowing for custom adducts, {MetaboShiny} reveals a putative identification for an m/z value which could not be identified by the original authors. This validates {MetaboShiny} as a flexible and customizable data analysis package that greatly enhances metabolomics biomarker discovery.}
}
@article{chaffin_2019,
title = {{MetProc}: Separating Measurement Artifacts from True Metabolites in an Untargeted Metabolomics Experiment.},
author = {Chaffin, Mark D and Cao, Liu and Deik, Amy A and Clish, Clary B and Hu, Frank B and Martínez-González, Miguel A and Razquin, Cristina and Bullo, Monica and Corella, Dolores and Gómez-Gracia, Enrique and Fiol, Miquel and Estruch, Ramon and Lapetra, José and Fitó, Montserrat and Arós, Fernando and Serra-Majem, Lluís and Ros, Emilio and Liang, Liming},
pages = {1446-1450},
url = {http://dx.doi.org/10.1021/acs.jproteome.8b00893},
year = {2019},
month = {mar},
day = {1},
urldate = {2019-09-03},
journal = {Journal of Proteome Research},
volume = {18},
number = {3},
doi = {10.1021/acs.jproteome.8b00893},
pmid = {30562035},
f1000-projects = {shared citations},
abstract = {High-throughput metabolomics using liquid chromatography and mass spectrometry ({LC}/{MS}) provides a useful method to identify biomarkers of disease and explore biological systems. However, the majority of metabolic features detected from untargeted metabolomics experiments have unknown ion signatures, making it critical that data should be thoroughly quality controlled to avoid analyzing false signals. Here, we present a postalignment method relying on intermittent pooled study samples to separate genuine metabolic features from potential measurement artifacts. We apply the method to lipid metabolite data from the {PREDIMED} ({PREvenci}ón con {DIeta} {MEDi}-terránea) study to demonstrate clear removal of measurement artifacts. The method is publicly available as the R package {MetProc}, available on {CRAN} under the {GPL}-v2 license.}
}
@article{tiffany_2019,
title = {omu, a Metabolomics Count Data Analysis Tool for Intuitive Figures and Convenient Metadata Collection.},
author = {Tiffany, Connor R and Bäumler, Andreas J},
url = {http://dx.doi.org/10.1128/{MRA}.00129-19},
year = {2019},
month = {apr},
day = {11},
urldate = {2019-09-03},
journal = {Microbiology Resource Announcements},
volume = {8},
number = {15},
doi = {10.1128/{MRA}.00129-19},
pmid = {30975806},
pmcid = {PMC6460029},
f1000-projects = {shared citations},
abstract = {Metabolomics is a powerful tool for measuring the functional output of the microbiota. Currently, there are few established workflows for analysis downstream of metabolite identification. Here, we introduce omu, an R package designed for assigning compound hierarchies and linking compounds to corresponding enzyme and gene annotations for organisms of interest. Copyright \copyright 2019 Tiffany and Bäumler.}
}
@article{sentandreu_2018,
title = {A Survey of Orbitrap All Ion Fragmentation Analysis Assessed by an R {MetaboList} Package to Study Small-Molecule Metabolites},
author = {Sentandreu, Enrique and Peris-Díaz, Manuel D. and Sweeney, Shannon R. and Chiou, Jennifer and Muñoz, Nathalie and Tiziani, Stefano},
pages = {981-994},
url = {http://link.springer.com/10.1007/s10337-018-3536-y},
year = {2018},
month = {jul},
urldate = {2019-09-03},
journal = {Chromatographia},
volume = {81},
number = {7},
issn = {0009-5893},
doi = {10.1007/s10337-018-3536-y},
f1000-projects = {shared citations},
abstract = {Leukemia cell and melanoma tumor tissue extracts were studied for small (mostly m/z \textless 250) polar metabolites by {LC}-{ESI}-{HRMSn} analysis powered by a hybrid Quadrupole-Orbitrap. {MS} data were simultaneously acquired in fast polarity switching mode operating in {MS1} and {MS}/{MS} (All Ion Fragmentation, {AIF}) full-scan analyses at high mass resolution. Positive metabolite assignments were achieved by {AIF} analysis considering at least two characteristic transitions. Targeted metabolite profiling was achieved by the relative quantification of 18 metabolites through spiking of their respective deuterated counterparts. Manual data processing of {MS1} and {AIF} scans were compared for the accurate determination of natural metabolites and their deuterated analogs by chromatographic alignment and peak area integration. Evaluation of manual and automated ({MetaboList} R package) {AIF} data processing yielded comparable results. The versatility of {AIF} analysis also enabled the untargeted metabolite profiling of leukemia and melanoma samples in which 22 and 53 compounds were, respectively, identified outside those studied by labeling. The main limitation of this method was that low abundance metabolites with scan rates below 8 scans/peak could not be accurately quantified by {AIF} analysis. The combination of {AIF} analysis with {MetaboList} R package represents an opportunity to move towards automated, faster, and more global metabolomics approaches supported by an entirely flexible open source data processing platform freely available from Comprehensive R Archive Network ({CRAN}, https://{CRAN}.R-project.org/package={MetaboList}).}
}
@misc{zhu_website_2019,
title = {zhuchcn/Metabase: A R package to store, manipulate, analyze, and visualize Metabolomics data},
author = {Zhu, Chenghao},
url = {https://github.com/zhuchcn/Metabase},
year = {2019},
urldate = {2019-09-04},
f1000-projects = {shared citations},
type = {WEBSITE}
}
@article{peluso_2018,
title = {Estimation of permutation-based metabolome-wide significance thresholds: Supplementary material},
author = {Peluso, Alina and Glen, Robert and Ebbels, Timothy M. D.},
url = {http://biorxiv.org/lookup/doi/10.1101/478370},
year = {2018},
month = {nov},
day = {27},
urldate = {2019-09-04},
journal = {BioRxiv},
doi = {10.1101/478370},
f1000-projects = {shared citations},
abstract = {A key issue in the omics literature is the search for statistically significant relationships between molecular markers and phenotype. The aim is to detect disease-related discriminatory features while controlling for false positive associations at adequate power. Metabolome-wide association studies have revealed significant relationships of metabolic phenotypes with disease risk by analysing hundreds to tens of thousands of molecular variables leading to multivariate data which are highly noisy and collinear. In this context, conventional Bonferroni or Sidak multiple testing corrections are rather useful as these are valid for independent tests, while permutation procedures allow for the estimation of significance levels from the null distribution without assuming independence among features. Nevertheless, under the permutation approach the distribution of p-values may present systematic deviations from the theoretical null distribution which leads to overly conservative adjusted threshold estimates i.e. smaller than a Bonferroni or Sidak correction. We make use of parametric approximation methods based on a multivariate Normal distribution to derive stable estimates of the metabolome-wide significance level. A univariate approach is applied based on a permutation procedure which effectively controls the overall type I error rate at the \alpha level. We illustrate the approach for different model parametrizations and distributional features of the outcome measure, using both simulated and real data. We also investigate different levels of correlation within the features and between the features and the outcome. {MWSL} is an open-source R software package for the empirical estimation of the metabolome-wide significance level available at https://github.com/{AlinaPeluso}/{MWSL}.}
}
@article{delivera_2018,
title = {{NormalizeMets}: assessing, selecting and implementing statistical methods for normalizing metabolomics data.},
author = {De Livera, Alysha M and Olshansky, Gavriel and Simpson, Julie A and Creek, Darren J},
pages = {54},
url = {http://link.springer.com/10.1007/s11306-018-1347-7},
year = {2018},
month = {mar},
day = {20},
urldate = {2019-09-04},
journal = {Metabolomics : Official journal of the Metabolomic Society},
volume = {14},
number = {5},
issn = {1573-3882},
doi = {10.1007/s11306-018-1347-7},
pmid = {30830328},
f1000-projects = {shared citations},
abstract = {{INTRODUCTION}: In metabolomics studies, unwanted variation inevitably arises from various sources. Normalization, that is the removal of unwanted variation, is an essential step in the statistical analysis of metabolomics data. However, metabolomics normalization is often considered an imprecise science due to the diverse sources of variation and the availability of a number of alternative strategies that may be implemented. {OBJECTIVES}: We highlight the need for comparative evaluation of different normalization methods and present software strategies to help ease this task for both data-oriented and biological researchers. {METHODS}: We present {NormalizeMets}-a joint graphical user interface within the familiar Microsoft Excel and freely-available R software for comparative evaluation of different normalization methods. The {NormalizeMets} R package along with the vignette describing the workflow can be downloaded from https://cran.r-project.org/web/packages/{NormalizeMets}/ . The Excel Interface and the Excel user guide are available on https://metabolomicstats.github.io/{ExNormalizeMets} . {RESULTS}: {NormalizeMets} allows for comparative evaluation of normalization methods using criteria that depend on the given dataset and the ultimate research question. Hence it guides researchers to assess, select and implement a suitable normalization method using either the familiar Microsoft Excel and/or freely-available R software. In addition, the package can be used for visualisation of metabolomics data using interactive graphical displays and to obtain end statistical results for clustering, classification, biomarker identification adjusting for confounding variables, and correlation analysis. {CONCLUSION}: {NormalizeMets} is designed for comparative evaluation of normalization methods, and can also be used to obtain end statistical results. The use of freely-available R software offers an attractive proposition for programming-oriented researchers, and the Excel interface offers a familiar alternative to most biological researchers. The package handles the data locally in the user's own computer allowing for reproducible code to be stored locally.}
}
@article{jaeger_2018,
title = {Statistical and Multivariate Analysis of {MS}-Based Plant Metabolomics Data.},
author = {Jaeger, Carsten and Lisec, Jan},
pages = {285-296},
url = {http://dx.doi.org/10.1007/978-1-4939-7819-9\_20},
year = {2018},
urldate = {2019-09-04},
journal = {Methods in Molecular Biology},
volume = {1778},
doi = {10.1007/978-1-4939-7819-9\_20},
pmid = {29761446},
f1000-projects = {shared citations},
abstract = {Raw data from metabolomics experiments are initially subjected to peak identification and signal deconvolution to generate raw data matrices m × n, where m are samples and n are metabolites. We describe here simple statistical procedures on such multivariate data matrices, all provided as functions in the programming environment R, useful to normalize data, detect biomarkers, and perform sample classification.}
}
@article{ramaker_2017,
title = {{R2DGC}: Threshold-free peak alignment and identification for {2D} gas chromatography mass spectrometry in R},
author = {Ramaker, Ryne C. and Gordon, Emily and Cooper, Sara J.},
url = {http://biorxiv.org/lookup/doi/10.1101/179168},
year = {2017},
month = {aug},
day = {21},
urldate = {2019-09-04},
journal = {BioRxiv},
doi = {10.1101/179168},
f1000-projects = {shared citations},
abstract = {Comprehensive two dimensional gas chromatography-mass spectrometry is a powerful method for analyzing complex mixtures of volatile compounds. This method produces a large amount of raw data that requires downstream processing to align signals of interest (peaks) across multiple samples and match peak characteristics to reference standard libraries prior to downstream statistical analysis. To address the paucity of applications addressing this need, we have developed an R package that implements retention time and mass spectra similarity threshold-free alignments, seamlessly integrates retention time standards for universally reproducible alignments, performs common ion filtering, and provides compatibility with multiple peak quantification methods. We demonstrate the packages utility on a controlled mix of metabolite standards separated under variable chromatography conditions and data generated from cell lines.}
}
@misc{tenenbaum_website_2019,
title = {Bioconductor - {KEGGREST}},
author = {Tenenbaum, Dan},
url = {http://bioconductor.org/packages/release/bioc/html/{KEGGREST}.html},
year = {2019},
urldate = {2019-09-06},
f1000-projects = {shared citations},
type = {WEBSITE}
}
@book{lammerhofer_2013,
title = {Metabolomics in practice: successful strategies to generate and analyze metabolic data},
editor = {Lämmerhofer, Michael and Weckwerth, Wolfram},
publisher = {Wiley-{VCH} Verlag {GmbH} \& Co. {KGaA}},
url = {http://doi.wiley.com/10.1002/9783527655861},
year = {2013},
month = {jan},
day = {23},
urldate = {2019-09-08},
isbn = {9783527330898},
doi = {10.1002/9783527655861},
address = {Weinheim, Germany},
f1000-projects = {shared citations}
}
@misc{salek_website_nd,
title = {Metabolomics: An introduction \textbar {EMBL}-{EBI} Train online},
author = {Salek, Reza and Emery, Laura and Beisken, Stephan},
url = {https://www.ebi.ac.uk/training/online/course/introduction-metabolomics},
urldate = {2019-09-08},
f1000-projects = {shared citations},
type = {WEBSITE}
}
@misc{stanstrup_website_nd,
title = {The {MetaRbolomics} book},
author = {Stanstrup, Jan and Broeckling, Corey D. and Helmus, Rick and Hoffmann, Nils and Mathé, Ewy and Naake, Thomas and Nicolotti, Luca and Peters, Kristian and Rainer, Johannes and Salek, Reza and Schulze, Tobias and Schymanski, Emma and Stravs, Michael A and Thévenot, Etienne and Treutler, Hendrik and Weber, Ralf and Willighagen, Egon and Witting, Michael and Neumann, Steffen},
url = {https://rformassspectrometry.github.io/{metaRbolomics}-book/},
urldate = {2019-09-10},
f1000-projects = {shared citations},
type = {WEBSITE}
}
@book{villasboas_2007,
title = {Metabolome Analysis: An Introduction},
author = {Villas-Boas, Silas G. and Nielsen, Jens and Smedsgaard, Jorn and Hansen, Michael A. E. and Roessner-Tunali, Ute},
pages = {319},
publisher = {Wiley, John \& Sons, Incorporated},
year = {2007},
month = {feb},
urldate = {2019-09-08},
edition = {1},
isbn = {978-0-471-74344-6},
f1000-projects = {shared citations}
}
@article{gaude_2013,
author = {Gaude, Edoardo and Chignola, Francesca and Spiliotopoulos, Dimitrios and Spitaleri, Andrea and Ghitti, Michela and {M Garcia-Manteiga}, Jose and Mari, Silvia and Musco, Giovanna},
doi = {10.2174/2213235X11301020005},
issn = {2213235X},
journal = {Current Metabolomics},
month = {mar},
number = {2},
pages = {180--189},
title = {{muma, An R Package for Metabolomics Univariate and Multivariate Statistical Analysis}},
url = {http://www.eurekaselect.com/openurl/content.php?genre=article{\&}issn=2213-235X{\&}volume=1{\&}issue=2{\&}spage=180},
volume = {1},
year = {2013}
}
@misc{InternationalMetabolomicsSociety_2019,
author = {{International Metabolomics Society}},
title = {{Free Tools {\&} Learning Resources - Metabolomics Society Wiki}},
url = {http://wiki.metabolomicssociety.org/index.php/Free{\_}Tools{\_}{\%}26{\_}Learning{\_}Resources},
urldate = {2019-09-14}
}
@Article{DelCarratore_2019,
annote = {doi: 10.1021/acs.analchem.9b02354},
author = {{Del Carratore}, Francesco and Schmidt, Kamila and Vinaixa, Maria and Hollywood, Katherine Anne and Greenland-Bews, Caitlin and Takano, Eriko and Rogers, Simon and Breitling, Rainer},
doi = {10.1021/acs.analchem.9b02354},
issn = {0003-2700},
journal = {Analytical Chemistry},
month = {sep},
publisher = {American Chemical Society},
title = {{Integrated Probabilistic Annotation (IPA): A Bayesian-based annotation method for metabolomic profiles integrating biochemical connections, isotope patterns and adduct relationships}},
url = {https://doi.org/10.1021/acs.analchem.9b02354},
year = {2019}
}
@article{kopczynski_2020,
author = {Kopczynski, Dominik and Hoffmann, Nils and Peng, Bing and Ahrends, Robert},
doi = {10.1021/acs.analchem.0c01690},
title = {{Goslin: A Grammar of Succinct Lipid Nomenclature}},
journal = {Analytical Chemistry},
volume = {92},
number = {16},
pages = {10957-10960},
year = {2020},
month = {jun},
publisher = {American Chemical Society},
note ={PMID: 32589019},
url = {https://doi.org/10.1021/acs.analchem.0c01690}
}
@misc{spectratutorial_2020,
author = {Rainer, Johannes and Witting, Michael and Gibb, Sebastian and Gatto, Laurent},
title = {Seamless Integration of Mass Spectrometry Data from Different Sources},
url = {https://jorainer.github.io/SpectraTutorials/},
urldate = {2020-12-21}
}
@article{gattoMSnbaseEfficientElegant2020a,
  title = {{{MSnbase}}, {{Efficient}} and {{Elegant R}}-{{Based Processing}} and {{Visualization}} of {{Raw Mass Spectrometry Data}}},
  author = {Gatto, Laurent and Gibb, Sebastian and Rainer, Johannes},
  year = {2020},
  month = sep,
  issn = {1535-3907},
  doi = {10.1021/acs.jproteome.0c00313},
  abstract = {We present version 2 of the MSnbase R/Bioconductor package. MSnbase provides infrastructure for the manipulation, processing, and visualization of mass spectrometry data. We focus on the new on-disk infrastructure, that allows the handling of large raw mass spectrometry experiments on commodity hardware and illustrate how the package is used for elegant data processing, method development, and visualization.},
  journal = {Journal of Proteome Research},
  keywords = {Bioconductor,mass spectrometry,metabolomics,proteomics,R,reproducible research,software,visualization},
  language = {eng},
  pmid = {32902283}
}