paper/paper.bib

@ARTICLE{Stodden2010-cu,
  title     = "The Scientific Method in Practice: Reproducibility in the
               Computational Sciences",
  author    = "Stodden, Victoria",
  abstract  = "Since the 1660's the scientific method has included
               reproducibility as a mainstay in its effort to root error from
               scientific discovery. With the explosive grow",
  publisher = "papers.ssrn.com",
  month     =  feb,
  year      =  2010
}

@ARTICLE{Ram2013-km,
  title       = "Git can facilitate greater reproducibility and increased
                 transparency in science",
  author      = "Ram, Karthik",
  affiliation = "Environmental Science, Policy, and Management, University of
                 California, Berkeley, Berkeley, CA 94720, USA.
                 karthik.ram@berkeley.edu.",
  abstract    = "BACKGROUND: Reproducibility is the hallmark of good science.
                 Maintaining a high degree of transparency in scientific
                 reporting is essential not just for gaining trust and
                 credibility within the scientific community but also for
                 facilitating the development of new ideas. Sharing data and
                 computer code associated with publications is becoming
                 increasingly common, motivated partly in response to data
                 deposition requirements from journals and mandates from
                 funders. Despite this increase in transparency, it is still
                 difficult to reproduce or build upon the findings of most
                 scientific publications without access to a more complete
                 workflow. FINDINGS: Version control systems (VCS), which have
                 long been used to maintain code repositories in the software
                 industry, are now finding new applications in science. One
                 such open source VCS, Git, provides a lightweight yet robust
                 framework that is ideal for managing the full suite of
                 research outputs such as datasets, statistical code, figures,
                 lab notes, and manuscripts. For individual researchers, Git
                 provides a powerful way to track and compare versions, retrace
                 errors, explore new approaches in a structured manner, while
                 maintaining a full audit trail. For larger collaborative
                 efforts, Git and Git hosting services make it possible for
                 everyone to work asynchronously and merge their contributions
                 at any time, all the while maintaining a complete authorship
                 trail. In this paper I provide an overview of Git along with
                 use-cases that highlight how this tool can be leveraged to
                 make science more reproducible and transparent, foster new
                 collaborations, and support novel uses.",
  journal     = "Source Code Biol. Med.",
  volume      =  8,
  number      =  1,
  pages       = "7",
  month       =  feb,
  year        =  2013,
  language    = "en"
}


@MISC{noauthor_2015-ig,
  title        = "Docker-based solutions to reproducibility in science - Seven
                  Bridges",
  booktitle    = "Seven Bridges",
  abstract     = "Seven Bridges is launching a toolkit for creating fully
                  portable bioinformatics workflows, using Docker and the
                  Common Workflow Language.",
  month        =  jun,
  year         =  2015,
  howpublished = "\url{https://blog.sbgenomics.com/docker-based-solutions-to-reproducibility-in-science/}",
  note         = "Accessed: 2016-12-17"
}


@MISC{noauthor_undated-pi,
  title       = "expfactory-docker",
  author    = "Sochat, Vanessa V and Ross W. Blair",
  abstract    = "expfactory-docker - container for deploying behavioral
                 psychology experiments",
  institution = "Github"
}


@ARTICLE{Sochat2016-pu,
  title     = "The Experiment Factory: Standardizing Behavioral Experiments",
  author    = "Sochat, Vanessa V and Eisenberg, Ian W and Enkavi, A Zeynep and
               Li, Jamie and Bissett, Patrick G and Poldrack, Russell A",
  abstract  = "The administration of behavioral and experimental paradigms for
               psychology research is hindered by lack of a coordinated effort
               to develop and deploy standardized paradigms. While several
               frameworks (de Leeuw (2015); McDonnell et al. (2012); Mason and
               Suri (2011); Lange et al. (2015)) have provided infrastructure
               and methods for individual research groups to develop paradigms,
               missing is a coordinated effort to develop paradigms linked with
               a system to easily deploy them. This disorganization leads to
               redundancy in development, divergent implementations of
               conceptually identical tasks, disorganized and error-prone code
               lacking documentation, and difficulty in replication. The
               ongoing reproducibility crisis in psychology and neuroscience
               research (Baker (2015); Open Science Collaboration (2015))
               highlights the urgency of this challenge: reproducible research
               in behavioral psychology is conditional on deployment of
               equivalent experiments. A large, accessible repository of
               experiments for researchers to develop collaboratively is most
               efficiently accomplished through an open source framework. Here
               we present the Experiment Factory, an open source framework for
               the development and deployment of web-based experiments. The
               modular infrastructure includes experiments, virtual machines
               for local or cloud deployment, and an application to drive these
               components and provide developers with functions and tools for
               further extension. We release this infrastructure with a
               deployment (http://www.expfactory.org) that researchers are
               currently using to run a set of over 80 standardized web-based
               experiments on Amazon Mechanical Turk. By providing open source
               tools for both deployment and development, this novel
               infrastructure holds promise to bring reproducibility to the
               administration of experiments, and accelerate scientific
               progress by providing a shared community resource of
               psychological paradigms.",
  journal   = "Front. Psychol.",
  publisher = "Frontiers",
  volume    =  7,
  month     =  apr,
  year      =  2016,
  keywords  = "web-experiments; Behavior; Docker; assessment; reproducibility;
               experiments"
}

@MISC{noauthor_undated-sn,
  title        = "Science is in a reproducibility crisis: How do we resolve it?",
  abstract     = "Over the past few years, there has been a growing awareness
                  that many experimentally established ``facts'' don't seem to
                  hold up to repeated investigation.",
  howpublished = "\url{http://phys.org/news/2013-09-science-crisis.html}",
  note         = "Accessed: 2015-11-2"
}

@ARTICLE{Baker_undated-bx,
  title    = "Over half of psychology studies fail reproducibility test",
  author   = "Baker, Monya",
  abstract = "Largest replication study to date casts doubt on many published
              positive results.",
  journal  = "Nature News"
}

@ARTICLE{Open_Science_Collaboration2015-hb,
  title    = "{PSYCHOLOGY}. Estimating the reproducibility of psychological
              science",
  author   = "{Open Science Collaboration}",
  abstract = "Reproducibility is a defining feature of science, but the extent
              to which it characterizes current research is unknown. We
              conducted replications of 100 experimental and correlational
              studies published in three psychology journals using high-powered
              designs and original materials when available. Replication
              effects were half the magnitude of original effects, representing
              a substantial decline. Ninety-seven percent of original studies
              had statistically significant results. Thirty-six percent of
              replications had statistically significant results; 47\% of
              original effect sizes were in the 95\% confidence interval of the
              replication effect size; 39\% of effects were subjectively rated
              to have replicated the original result; and if no bias in
              original results is assumed, combining original and replication
              results left 68\% with statistically significant effects.
              Correlational tests suggest that replication success was better
              predicted by the strength of original evidence than by
              characteristics of the original and replication teams.",
  journal  = "Science",
  volume   =  349,
  number   =  6251,
  pages    = "aac4716",
  month    =  aug,
  year     =  2015
}


@misc{vanessa_sochat_2017_1059119,
  author       = {Vanessa Sochat},
  title        = {{expfactory/expfactory: The Experiment Factory 
                   (v3.0) Release}},
  month        = nov,
  year         = 2017,
  note         = {{The original published work is written about at ht 
                   tps://www.frontiersin.org/articles/10.3389/fpsyg.2
                   016.00610/full. This is the second version of that
                   work, with a focus on reproducible containers.}},
  doi          = {10.5281/zenodo.1059119},
  url          = {https://doi.org/10.5281/zenodo.1059119}
}


@MISC{McDonnell2012-ns,
  title  = "psiTurk (Version 1.02)[Software]. New York, {NY}: New York
            University",
  author = "McDonnell, J V and Martin, J B and Markant, D B and Coenen, A and
            Rich, A S and Gureckis, T M",
  year   =  2012
}


@ARTICLE{De_Leeuw2015-zw,
  title       = "jsPsych: a {JavaScript} library for creating behavioral
                 experiments in a Web browser",
  author      = "de Leeuw, Joshua R",
  affiliation = "Department of Psychological \& Brain Science, Cognitive
                 Science Program, Indiana University, Bloomington, IN, USA,
                 jodeleeu@indiana.edu.",
  abstract    = "Online experiments are growing in popularity, and the
                 increasing sophistication of Web technology has made it
                 possible to run complex behavioral experiments online using
                 only a Web browser. Unlike with offline laboratory
                 experiments, however, few tools exist to aid in the
                 development of browser-based experiments. This makes the
                 process of creating an experiment slow and challenging,
                 particularly for researchers who lack a Web development
                 background. This article introduces jsPsych, a JavaScript
                 library for the development of Web-based experiments. jsPsych
                 formalizes a way of describing experiments that is much
                 simpler than writing the entire experiment from scratch.
                 jsPsych then executes these descriptions automatically,
                 handling the flow from one task to another. The jsPsych
                 library is open-source and designed to be expanded by the
                 research community. The project is available online at
                 www.jspsych.org .",
  journal     = "Behav. Res. Methods",
  volume      =  47,
  number      =  1,
  pages       = "1--12",
  month       =  mar,
  year        =  2015
}


@BOOK{Smith2005-kg,
  title     = "Virtual Machines: Versatile Platforms for Systems and Processes",
  author    = "Smith, J E and Nair, R",
  publisher = "Morgan Kaufmann Publishers",
  series    = "The Morgan Kaufmann Series in Computer Architecture and Design
               Series",
  year      =  2005
}

@MISC{Merkel2014-da,
  title     = "Docker: Lightweight Linux Containers for Consistent Development
               and Deployment",
  author    = "Merkel, Dirk",
  journal   = "Linux J.",
  publisher = "Belltown Media",
  volume    =  2014,
  number    =  239,
  month     =  mar,
  year      =  2014,
  address   = "Houston, TX"
}

@MISC{noauthor_2015-ig,
  title        = "Docker-based solutions to reproducibility in science - Seven
                  Bridges",
  booktitle    = "Seven Bridges",
  abstract     = "Seven Bridges is launching a toolkit for creating fully
                  portable bioinformatics workflows, using Docker and the
                  Common Workflow Language.",
  month        =  jun,
  year         =  2015,
  howpublished = "\url{https://blog.sbgenomics.com/docker-based-solutions-to-reproducibility-in-science/}",
  note         = "Accessed: 2016-12-17"
}

@ARTICLE{Ali2016-rh,
  title     = "The Case for Docker in Multicloud Enabled Bioinformatics
               Applications",
  author    = "Ali, Ahmed Abdullah and El-Kalioby, Mohamed and Abouelhoda,
               Mohamed",
  abstract  = "The introduction of next generation sequencing technologies did
               not bring only huge amounts of biological data but also highly
               sophisticated and versatile analysis workflows and systems.
               These new cha",
  journal   = "Conference on Bioinformatics and …",
  publisher = "Springer International Publishing",
  pages     = "587--601",
  doi       = "https://doi.org/10.1007/978-3-319-31744-1_52",
  month     =  apr,
  year      =  2016,
  language  = "en"
}

@ARTICLE{Moreews2015-dy,
  title       = "{BioShaDock}: a community driven bioinformatics shared
                 Docker-based tools registry",
  author      = "Moreews, Fran{\c c}ois and Sallou, Olivier and M{\'e}nager,
                 Herv{\'e} and Le Bras, Yvan and Monjeaud, Cyril and Blanchet,
                 Christophe and Collin, Olivier",
  affiliation = "Genscale team, IRISA, Rennes, France. Genouest Bioinformatics
                 Facility, University of Rennes 1/IRISA, Rennes, France. Centre
                 d'Informatique pour la Biologie, C3BI, Institut Pasteur,
                 Paris, France. Genouest Bioinformatics Facility, University of
                 Rennes 1/IRISA, Rennes, France. Genouest Bioinformatics
                 Facility, University of Rennes 1/IRISA, Rennes, France.
                 Genouest Bioinformatics Facility, University of Rennes
                 1/IRISA, Rennes, France. French Institute of Bioinformatics,
                 CNRS IFB-Core, Gif-sur-Yvette, France.",
  abstract    = "Linux container technologies, as represented by Docker,
                 provide an alternative to complex and time-consuming
                 installation processes needed for scientiﬁc software. The ease
                 of deployment and the process isolation they enable, as well
                 as the reproducibility they permit across environments and
                 versions, are among the qualities that make them interesting
                 candidates for the construction of bioinformatic
                 infrastructures, at any scale from single workstations to high
                 throughput computing architectures. The Docker Hub is a public
                 registry which can be used to distribute bioinformatic
                 software as Docker images. However, its lack of curation and
                 its genericity make it difﬁcult for a bioinformatics user to
                 ﬁnd the most appropriate images needed. BioShaDock is a
                 bioinformatics-focused Docker registry, which provides a local
                 and fully controlled environment to build and publish
                 bioinformatic software as portable Docker images. It provides
                 a number of improvements over the base Docker registry on
                 authentication and permissions management, that enable its
                 integration in existing bioinformatic infrastructures such as
                 computing platforms. The metadata associated with the
                 registered images are domain-centric, including for instance
                 concepts deﬁned in the EDAM ontology, a shared and structured
                 vocabulary of commonly used terms in bioinformatics. The
                 registry also includes user deﬁned tags to facilitate its
                 discovery, as well as a link to the tool description in the
                 ELIXIR registry if it already exists. If it does not, the
                 BioShaDock registry will synchronize with the registry to
                 create a new description in the Elixir registry, based on the
                 BioShaDock entry metadata. This link will help users get more
                 information on the tool such as its EDAM operations, input and
                 output types. This allows integration with the ELIXIR Tools
                 and Data Services Registry, thus providing the appropriate
                 visibility of such images to the bioinformatics community.",
  journal     = "F1000Res.",
  publisher   = "ncbi.nlm.nih.gov",
  volume      =  4,
  pages       = "1443",
  month       =  dec,
  year        =  2015,
  keywords    = "bioinformatics; community driven registry; container;
                 deployment; docker; interoperability; maintainability",
  language    = "en",
  doi         = "10.12688/f1000research.7536.1"
}

@ARTICLE{Belmann2015-eb,
  title       = "Bioboxes: standardised containers for interchangeable
                 bioinformatics software",
  author      = "Belmann, Peter and Dr{\"o}ge, Johannes and Bremges, Andreas
                 and McHardy, Alice C and Sczyrba, Alexander and Barton,
                 Michael D",
  affiliation = "Faculty of Technology and Center for Biotechnology, Bielefeld
                 University, 33615 Bielefeld, Germany. Computational Biology of
                 Infection Research, Helmholtz Centre for Infection Research,
                 38124 Braunschweig, Germany. Faculty of Technology and Center
                 for Biotechnology, Bielefeld University, 33615 Bielefeld,
                 Germany ; Computational Biology of Infection Research,
                 Helmholtz Centre for Infection Research, 38124 Braunschweig,
                 Germany. Computational Biology of Infection Research,
                 Helmholtz Centre for Infection Research, 38124 Braunschweig,
                 Germany. Faculty of Technology and Center for Biotechnology,
                 Bielefeld University, 33615 Bielefeld, Germany. DOE Joint
                 Genome Institute, Walnut Creek, CA 94598 USA.",
  abstract    = "Software is now both central and essential to modern biology,
                 yet lack of availability, difficult installations, and complex
                 user interfaces make software hard to obtain and use.
                 Containerisation, as exemplified by the Docker platform, has
                 the potential to solve the problems associated with sharing
                 software. We propose bioboxes: containers with standardised
                 interfaces to make bioinformatics software interchangeable.",
  journal     = "Gigascience",
  publisher   = "gigascience.biomedcentral.com",
  volume      =  4,
  pages       = "47",
  month       =  oct,
  year        =  2015,
  doi         = "10.1186/s13742-015-0087-0",
  keywords    = "Bioinformatics; Docker; Reproducibility; Software; Standards;
                 Usability",
  language    = "en"
}


@ARTICLE{Boettiger2014-cz,
  title         = "An introduction to Docker for reproducible research, with
                   examples from the {R} environment",
  author        = "Boettiger, Carl",
  abstract      = "As computational work becomes more and more integral to many
                   aspects of scientific research, computational
                   reproducibility has become an issue of increasing importance
                   to computer systems researchers and domain scientists alike.
                   Though computational reproducibility seems more straight
                   forward than replicating physical experiments, the complex
                   and rapidly changing nature of computer environments makes
                   being able to reproduce and extend such work a serious
                   challenge. In this paper, I explore common reasons that code
                   developed for one research project cannot be successfully
                   executed or extended by subsequent researchers. I review
                   current approaches to these issues, including virtual
                   machines and workflow systems, and their limitations. I then
                   examine how the popular emerging technology Docker combines
                   several areas from systems research - such as operating
                   system virtualization, cross-platform portability, modular
                   re-usable elements, versioning, and a `DevOps' philosophy,
                   to address these challenges. I illustrate this with several
                   examples of Docker use with a focus on the R statistical
                   environment.",
  month         =  oct,
  year          =  2014,
  archivePrefix = "arXiv",
  primaryClass  = "cs.SE",
  doi           = "10.1145/2723872.2723882",
  eprint        = "1410.0846"
}

@ARTICLE{Santana-Perez2015-wo,
  title     = "Towards Reproducibility in Scientific Workflows: An
               {Infrastructure-Based} Approach",
  author    = "Santana-Perez, Idafen and P{\'e}rez-Hern{\'a}ndez, Mar{\'\i}a S",
  abstract  = "It is commonly agreed that in silico scientific experiments
               should be executable and repeatable processes. Most of the
               current approaches for computational experiment conservation and
               reproducibility have focused so far on two of the main
               components of the experiment, namely, data and method. In this
               paper, we propose a new approach that addresses the third
               cornerstone of experimental reproducibility: the equipment. This
               work focuses on the equipment of a computational experiment,
               that is, the set of software and hardware components that are
               involved in the execution of a scientific workflow. In order to
               demonstrate the feasibility of our proposal, we describe a use
               case scenario on the Text Analytics domain and the application
               of our approach to it. From the original workflow, we document
               its execution environment, by means of a set of semantic models
               and a catalogue of resources, and generate an equivalent
               infrastructure for reexecuting it.",
  journal   = "Sci. Program.",
  publisher = "Hindawi Publishing Corporation",
  volume    =  2015,
  month     =  feb,
  year      =  2015,
  doi       = "http://dx.doi.org/10.1155/2015/243180"
}

@ARTICLE{Wandell2015-yt,
  title         = "Data management to support reproducible research",
  author        = "Wandell, B A and Rokem, A and Perry, L M and Schaefer, G and
                   Dougherty, R F",
  abstract      = "We describe the current state and future plans for a set of
                   tools for scientific data management (SDM) designed to
                   support scientific transparency and reproducible research.
                   SDM has been in active use at our MRI Center for more than
                   two years. We designed the system to be used from the
                   beginning of a research project, which contrasts with
                   conventional end-state databases that accept data as a
                   project concludes. A number of benefits accrue from using
                   scientific data management tools early and throughout the
                   project, including data integrity as well as reuse of the
                   data and of computational methods.",
  month         =  feb,
  year          =  2015,
  archivePrefix = "arXiv",
  primaryClass  = "q-bio.QM",
  eprint        = "1502.06900"
}