-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathpaper.bib
461 lines (434 loc) · 23.8 KB
/
paper.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
@ARTICLE{Stodden2010-cu,
title = "The Scientific Method in Practice: Reproducibility in the
Computational Sciences",
author = "Stodden, Victoria",
abstract = "Since the 1660's the scientific method has included
reproducibility as a mainstay in its effort to root error from
scientific discovery. With the explosive grow",
publisher = "papers.ssrn.com",
month = feb,
year = 2010
}
@ARTICLE{Ram2013-km,
title = "Git can facilitate greater reproducibility and increased
transparency in science",
author = "Ram, Karthik",
affiliation = "Environmental Science, Policy, and Management, University of
California, Berkeley, Berkeley, CA 94720, USA.
abstract = "BACKGROUND: Reproducibility is the hallmark of good science.
Maintaining a high degree of transparency in scientific
reporting is essential not just for gaining trust and
credibility within the scientific community but also for
facilitating the development of new ideas. Sharing data and
computer code associated with publications is becoming
increasingly common, motivated partly in response to data
deposition requirements from journals and mandates from
funders. Despite this increase in transparency, it is still
difficult to reproduce or build upon the findings of most
scientific publications without access to a more complete
workflow. FINDINGS: Version control systems (VCS), which have
long been used to maintain code repositories in the software
industry, are now finding new applications in science. One
such open source VCS, Git, provides a lightweight yet robust
framework that is ideal for managing the full suite of
research outputs such as datasets, statistical code, figures,
lab notes, and manuscripts. For individual researchers, Git
provides a powerful way to track and compare versions, retrace
errors, explore new approaches in a structured manner, while
maintaining a full audit trail. For larger collaborative
efforts, Git and Git hosting services make it possible for
everyone to work asynchronously and merge their contributions
at any time, all the while maintaining a complete authorship
trail. In this paper I provide an overview of Git along with
use-cases that highlight how this tool can be leveraged to
make science more reproducible and transparent, foster new
collaborations, and support novel uses.",
journal = "Source Code Biol. Med.",
volume = 8,
number = 1,
pages = "7",
month = feb,
year = 2013,
language = "en"
}
@MISC{noauthor_2015-ig,
title = "Docker-based solutions to reproducibility in science - Seven
Bridges",
booktitle = "Seven Bridges",
abstract = "Seven Bridges is launching a toolkit for creating fully
portable bioinformatics workflows, using Docker and the
Common Workflow Language.",
month = jun,
year = 2015,
howpublished = "\url{https://blog.sbgenomics.com/docker-based-solutions-to-reproducibility-in-science/}",
note = "Accessed: 2016-12-17"
}
@MISC{noauthor_undated-pi,
title = "expfactory-docker",
author = "Sochat, Vanessa V and Ross W. Blair",
abstract = "expfactory-docker - container for deploying behavioral
psychology experiments",
institution = "Github"
}
@ARTICLE{Sochat2016-pu,
title = "The Experiment Factory: Standardizing Behavioral Experiments",
author = "Sochat, Vanessa V and Eisenberg, Ian W and Enkavi, A Zeynep and
Li, Jamie and Bissett, Patrick G and Poldrack, Russell A",
abstract = "The administration of behavioral and experimental paradigms for
psychology research is hindered by lack of a coordinated effort
to develop and deploy standardized paradigms. While several
frameworks (de Leeuw (2015); McDonnell et al. (2012); Mason and
Suri (2011); Lange et al. (2015)) have provided infrastructure
and methods for individual research groups to develop paradigms,
missing is a coordinated effort to develop paradigms linked with
a system to easily deploy them. This disorganization leads to
redundancy in development, divergent implementations of
conceptually identical tasks, disorganized and error-prone code
lacking documentation, and difficulty in replication. The
ongoing reproducibility crisis in psychology and neuroscience
research (Baker (2015); Open Science Collaboration (2015))
highlights the urgency of this challenge: reproducible research
in behavioral psychology is conditional on deployment of
equivalent experiments. A large, accessible repository of
experiments for researchers to develop collaboratively is most
efficiently accomplished through an open source framework. Here
we present the Experiment Factory, an open source framework for
the development and deployment of web-based experiments. The
modular infrastructure includes experiments, virtual machines
for local or cloud deployment, and an application to drive these
components and provide developers with functions and tools for
further extension. We release this infrastructure with a
deployment (http://www.expfactory.org) that researchers are
currently using to run a set of over 80 standardized web-based
experiments on Amazon Mechanical Turk. By providing open source
tools for both deployment and development, this novel
infrastructure holds promise to bring reproducibility to the
administration of experiments, and accelerate scientific
progress by providing a shared community resource of
psychological paradigms.",
journal = "Front. Psychol.",
publisher = "Frontiers",
volume = 7,
month = apr,
year = 2016,
keywords = "web-experiments; Behavior; Docker; assessment; reproducibility;
experiments"
}
@MISC{noauthor_undated-sn,
title = "Science is in a reproducibility crisis: How do we resolve it?",
abstract = "Over the past few years, there has been a growing awareness
that many experimentally established ``facts'' don't seem to
hold up to repeated investigation.",
howpublished = "\url{http://phys.org/news/2013-09-science-crisis.html}",
note = "Accessed: 2015-11-2"
}
@ARTICLE{Baker_undated-bx,
title = "Over half of psychology studies fail reproducibility test",
author = "Baker, Monya",
abstract = "Largest replication study to date casts doubt on many published
positive results.",
journal = "Nature News"
}
@ARTICLE{Open_Science_Collaboration2015-hb,
title = "{PSYCHOLOGY}. Estimating the reproducibility of psychological
science",
author = "{Open Science Collaboration}",
abstract = "Reproducibility is a defining feature of science, but the extent
to which it characterizes current research is unknown. We
conducted replications of 100 experimental and correlational
studies published in three psychology journals using high-powered
designs and original materials when available. Replication
effects were half the magnitude of original effects, representing
a substantial decline. Ninety-seven percent of original studies
had statistically significant results. Thirty-six percent of
replications had statistically significant results; 47\% of
original effect sizes were in the 95\% confidence interval of the
replication effect size; 39\% of effects were subjectively rated
to have replicated the original result; and if no bias in
original results is assumed, combining original and replication
results left 68\% with statistically significant effects.
Correlational tests suggest that replication success was better
predicted by the strength of original evidence than by
characteristics of the original and replication teams.",
journal = "Science",
volume = 349,
number = 6251,
pages = "aac4716",
month = aug,
year = 2015
}
@misc{vanessa_sochat_2017_1059119,
author = {Vanessa Sochat},
title = {{expfactory/expfactory: The Experiment Factory
(v3.0) Release}},
month = nov,
year = 2017,
note = {{The original published work is written about at ht
tps://www.frontiersin.org/articles/10.3389/fpsyg.2
016.00610/full. This is the second version of that
work, with a focus on reproducible containers.}},
doi = {10.5281/zenodo.1059119},
url = {https://doi.org/10.5281/zenodo.1059119}
}
@MISC{McDonnell2012-ns,
title = "psiTurk (Version 1.02)[Software]. New York, {NY}: New York
University",
author = "McDonnell, J V and Martin, J B and Markant, D B and Coenen, A and
Rich, A S and Gureckis, T M",
year = 2012
}
@ARTICLE{De_Leeuw2015-zw,
title = "jsPsych: a {JavaScript} library for creating behavioral
experiments in a Web browser",
author = "de Leeuw, Joshua R",
affiliation = "Department of Psychological \& Brain Science, Cognitive
Science Program, Indiana University, Bloomington, IN, USA,
abstract = "Online experiments are growing in popularity, and the
increasing sophistication of Web technology has made it
possible to run complex behavioral experiments online using
only a Web browser. Unlike with offline laboratory
experiments, however, few tools exist to aid in the
development of browser-based experiments. This makes the
process of creating an experiment slow and challenging,
particularly for researchers who lack a Web development
background. This article introduces jsPsych, a JavaScript
library for the development of Web-based experiments. jsPsych
formalizes a way of describing experiments that is much
simpler than writing the entire experiment from scratch.
jsPsych then executes these descriptions automatically,
handling the flow from one task to another. The jsPsych
library is open-source and designed to be expanded by the
research community. The project is available online at
www.jspsych.org .",
journal = "Behav. Res. Methods",
volume = 47,
number = 1,
pages = "1--12",
month = mar,
year = 2015
}
@BOOK{Smith2005-kg,
title = "Virtual Machines: Versatile Platforms for Systems and Processes",
author = "Smith, J E and Nair, R",
publisher = "Morgan Kaufmann Publishers",
series = "The Morgan Kaufmann Series in Computer Architecture and Design
Series",
year = 2005
}
@MISC{Merkel2014-da,
title = "Docker: Lightweight Linux Containers for Consistent Development
and Deployment",
author = "Merkel, Dirk",
journal = "Linux J.",
publisher = "Belltown Media",
volume = 2014,
number = 239,
month = mar,
year = 2014,
address = "Houston, TX"
}
@MISC{noauthor_2015-ig,
title = "Docker-based solutions to reproducibility in science - Seven
Bridges",
booktitle = "Seven Bridges",
abstract = "Seven Bridges is launching a toolkit for creating fully
portable bioinformatics workflows, using Docker and the
Common Workflow Language.",
month = jun,
year = 2015,
howpublished = "\url{https://blog.sbgenomics.com/docker-based-solutions-to-reproducibility-in-science/}",
note = "Accessed: 2016-12-17"
}
@ARTICLE{Ali2016-rh,
title = "The Case for Docker in Multicloud Enabled Bioinformatics
Applications",
author = "Ali, Ahmed Abdullah and El-Kalioby, Mohamed and Abouelhoda,
Mohamed",
abstract = "The introduction of next generation sequencing technologies did
not bring only huge amounts of biological data but also highly
sophisticated and versatile analysis workflows and systems.
These new cha",
journal = "Conference on Bioinformatics and …",
publisher = "Springer International Publishing",
pages = "587--601",
doi = "https://doi.org/10.1007/978-3-319-31744-1_52",
month = apr,
year = 2016,
language = "en"
}
@ARTICLE{Moreews2015-dy,
title = "{BioShaDock}: a community driven bioinformatics shared
Docker-based tools registry",
author = "Moreews, Fran{\c c}ois and Sallou, Olivier and M{\'e}nager,
Herv{\'e} and Le Bras, Yvan and Monjeaud, Cyril and Blanchet,
Christophe and Collin, Olivier",
affiliation = "Genscale team, IRISA, Rennes, France. Genouest Bioinformatics
Facility, University of Rennes 1/IRISA, Rennes, France. Centre
d'Informatique pour la Biologie, C3BI, Institut Pasteur,
Paris, France. Genouest Bioinformatics Facility, University of
Rennes 1/IRISA, Rennes, France. Genouest Bioinformatics
Facility, University of Rennes 1/IRISA, Rennes, France.
Genouest Bioinformatics Facility, University of Rennes
1/IRISA, Rennes, France. French Institute of Bioinformatics,
CNRS IFB-Core, Gif-sur-Yvette, France.",
abstract = "Linux container technologies, as represented by Docker,
provide an alternative to complex and time-consuming
installation processes needed for scientific software. The ease
of deployment and the process isolation they enable, as well
as the reproducibility they permit across environments and
versions, are among the qualities that make them interesting
candidates for the construction of bioinformatic
infrastructures, at any scale from single workstations to high
throughput computing architectures. The Docker Hub is a public
registry which can be used to distribute bioinformatic
software as Docker images. However, its lack of curation and
its genericity make it difficult for a bioinformatics user to
find the most appropriate images needed. BioShaDock is a
bioinformatics-focused Docker registry, which provides a local
and fully controlled environment to build and publish
bioinformatic software as portable Docker images. It provides
a number of improvements over the base Docker registry on
authentication and permissions management, that enable its
integration in existing bioinformatic infrastructures such as
computing platforms. The metadata associated with the
registered images are domain-centric, including for instance
concepts defined in the EDAM ontology, a shared and structured
vocabulary of commonly used terms in bioinformatics. The
registry also includes user defined tags to facilitate its
discovery, as well as a link to the tool description in the
ELIXIR registry if it already exists. If it does not, the
BioShaDock registry will synchronize with the registry to
create a new description in the Elixir registry, based on the
BioShaDock entry metadata. This link will help users get more
information on the tool such as its EDAM operations, input and
output types. This allows integration with the ELIXIR Tools
and Data Services Registry, thus providing the appropriate
visibility of such images to the bioinformatics community.",
journal = "F1000Res.",
publisher = "ncbi.nlm.nih.gov",
volume = 4,
pages = "1443",
month = dec,
year = 2015,
keywords = "bioinformatics; community driven registry; container;
deployment; docker; interoperability; maintainability",
language = "en",
doi = "10.12688/f1000research.7536.1"
}
@ARTICLE{Belmann2015-eb,
title = "Bioboxes: standardised containers for interchangeable
bioinformatics software",
author = "Belmann, Peter and Dr{\"o}ge, Johannes and Bremges, Andreas
and McHardy, Alice C and Sczyrba, Alexander and Barton,
Michael D",
affiliation = "Faculty of Technology and Center for Biotechnology, Bielefeld
University, 33615 Bielefeld, Germany. Computational Biology of
Infection Research, Helmholtz Centre for Infection Research,
38124 Braunschweig, Germany. Faculty of Technology and Center
for Biotechnology, Bielefeld University, 33615 Bielefeld,
Germany ; Computational Biology of Infection Research,
Helmholtz Centre for Infection Research, 38124 Braunschweig,
Germany. Computational Biology of Infection Research,
Helmholtz Centre for Infection Research, 38124 Braunschweig,
Germany. Faculty of Technology and Center for Biotechnology,
Bielefeld University, 33615 Bielefeld, Germany. DOE Joint
Genome Institute, Walnut Creek, CA 94598 USA.",
abstract = "Software is now both central and essential to modern biology,
yet lack of availability, difficult installations, and complex
user interfaces make software hard to obtain and use.
Containerisation, as exemplified by the Docker platform, has
the potential to solve the problems associated with sharing
software. We propose bioboxes: containers with standardised
interfaces to make bioinformatics software interchangeable.",
journal = "Gigascience",
publisher = "gigascience.biomedcentral.com",
volume = 4,
pages = "47",
month = oct,
year = 2015,
doi = "10.1186/s13742-015-0087-0",
keywords = "Bioinformatics; Docker; Reproducibility; Software; Standards;
Usability",
language = "en"
}
@ARTICLE{Boettiger2014-cz,
title = "An introduction to Docker for reproducible research, with
examples from the {R} environment",
author = "Boettiger, Carl",
abstract = "As computational work becomes more and more integral to many
aspects of scientific research, computational
reproducibility has become an issue of increasing importance
to computer systems researchers and domain scientists alike.
Though computational reproducibility seems more straight
forward than replicating physical experiments, the complex
and rapidly changing nature of computer environments makes
being able to reproduce and extend such work a serious
challenge. In this paper, I explore common reasons that code
developed for one research project cannot be successfully
executed or extended by subsequent researchers. I review
current approaches to these issues, including virtual
machines and workflow systems, and their limitations. I then
examine how the popular emerging technology Docker combines
several areas from systems research - such as operating
system virtualization, cross-platform portability, modular
re-usable elements, versioning, and a `DevOps' philosophy,
to address these challenges. I illustrate this with several
examples of Docker use with a focus on the R statistical
environment.",
month = oct,
year = 2014,
archivePrefix = "arXiv",
primaryClass = "cs.SE",
doi = "10.1145/2723872.2723882",
eprint = "1410.0846"
}
@ARTICLE{Santana-Perez2015-wo,
title = "Towards Reproducibility in Scientific Workflows: An
{Infrastructure-Based} Approach",
author = "Santana-Perez, Idafen and P{\'e}rez-Hern{\'a}ndez, Mar{\'\i}a S",
abstract = "It is commonly agreed that in silico scientific experiments
should be executable and repeatable processes. Most of the
current approaches for computational experiment conservation and
reproducibility have focused so far on two of the main
components of the experiment, namely, data and method. In this
paper, we propose a new approach that addresses the third
cornerstone of experimental reproducibility: the equipment. This
work focuses on the equipment of a computational experiment,
that is, the set of software and hardware components that are
involved in the execution of a scientific workflow. In order to
demonstrate the feasibility of our proposal, we describe a use
case scenario on the Text Analytics domain and the application
of our approach to it. From the original workflow, we document
its execution environment, by means of a set of semantic models
and a catalogue of resources, and generate an equivalent
infrastructure for reexecuting it.",
journal = "Sci. Program.",
publisher = "Hindawi Publishing Corporation",
volume = 2015,
month = feb,
year = 2015,
doi = "http://dx.doi.org/10.1155/2015/243180"
}
@ARTICLE{Wandell2015-yt,
title = "Data management to support reproducible research",
author = "Wandell, B A and Rokem, A and Perry, L M and Schaefer, G and
Dougherty, R F",
abstract = "We describe the current state and future plans for a set of
tools for scientific data management (SDM) designed to
support scientific transparency and reproducible research.
SDM has been in active use at our MRI Center for more than
two years. We designed the system to be used from the
beginning of a research project, which contrasts with
conventional end-state databases that accept data as a
project concludes. A number of benefits accrue from using
scientific data management tools early and throughout the
project, including data integrity as well as reuse of the
data and of computational methods.",
month = feb,
year = 2015,
archivePrefix = "arXiv",
primaryClass = "q-bio.QM",
eprint = "1502.06900"
}