references.bib


@article{louizos_variational_2015,
  archivePrefix = {arXiv},
  eprinttype = {arxiv},
  eprint = {1511.00830},
  primaryClass = {cs, stat},
  title = {The {{Variational Fair Autoencoder}}},
  abstract = {We investigate the problem of learning representations that are invariant to certain nuisance or sensitive factors of variation in the data while retaining as much of the remaining information as possible. Our model is based on a variational autoencoding architecture with priors that encourage independence between sensitive and latent factors of variation. Any subsequent processing, such as classification, can then be performed on this purged latent representation. To remove any remaining dependencies we incorporate an additional penalty term based on the "Maximum Mean Discrepancy" (MMD) measure. We discuss how these architectures can be efficiently trained on data and show in experiments that this method is more effective than previous work in removing unwanted sources of variation while maintaining informative latent representations.},
  journal = {arXiv:1511.00830 [cs, stat]},
  author = {Louizos, Christos and Swersky, Kevin and Li, Yujia and Welling, Max and Zemel, Richard},
  month = nov,
  year = {2015},
  keywords = {Computer Science - Learning,Statistics - Machine Learning},
  file = {/home/jdayton3/.zotero/library/storage/HVE6H8WF/Louizos et al. - 2015 - The Variational Fair Autoencoder.pdf;/home/jdayton3/.zotero/library/storage/XB9DBASL/1511.html},
  annote = {Comment: Fixed typo in eq. 3 and 4}
}

@article{way_extracting_2017,
  title = {Extracting a {{Biologically Relevant Latent Space}} from {{Cancer Transcriptomes}} with {{Variational Autoencoders}}},
  copyright = {\textcopyright{} 2017, Posted by Cold Spring Harbor Laboratory. This pre-print is available under a Creative Commons License (Attribution 4.0 International), CC BY 4.0, as described at http://creativecommons.org/licenses/by/4.0/},
  abstract = {The Cancer Genome Atlas (TCGA) has profiled over 10,000 tumors across 33 different cancer-types for many genomic features, including gene expression levels. Gene expression measurements capture substantial information about the state of each tumor. Certain classes of deep neural network models are capable of learning a meaningful latent space. Such a latent space could be used to explore and generate hypothetical gene expression profiles under various types of molecular and genetic perturbation. For example, one might wish to use such a model to predict a tumor's response to specific therapies or to characterize complex gene expression activations existing in differential proportions in different tumors. Variational autoencoders (VAEs) are a deep neural network approach capable of generating meaningful latent spaces for image and text data. In this work, we sought to determine the extent to which a VAE can be trained to model cancer gene expression, and whether or not such a VAE would capture biologically-relevant features. In the following report, we introduce a VAE trained on TCGA pan-cancer RNA-seq data, identify specific patterns in the VAE encoded features, and discuss potential merits of the approach. We name our method "Tybalt" after an instigative, cat-like character who sets a cascading chain of events in motion in Shakespeare's Romeo and Juliet. From a systems biology perspective, Tybalt could one day aid in cancer stratification or predict specific activated expression patterns that would result from genetic changes or treatment effects.},
  language = {en},
  journal = {bioRxiv},
  doi = {10.1101/174474},
  author = {Way, Gregory P. and Greene, Casey S.},
  month = oct,
  year = {2017},
  pages = {174474},
  file = {/home/jdayton3/.zotero/library/storage/5XHRV7KS/Way and Greene - 2017 - Extracting a Biologically Relevant Latent Space fr.pdf;/home/jdayton3/.zotero/library/storage/BK4W4HT8/174474.html}
}

@article{beaulieu-jones_privacy-preserving_2017,
  title = {Privacy-Preserving Generative Deep Neural Networks Support Clinical Data Sharing},
  copyright = {\textcopyright{} 2017, Posted by Cold Spring Harbor Laboratory. This pre-print is available under a Creative Commons License (Attribution 4.0 International), CC BY 4.0, as described at http://creativecommons.org/licenses/by/4.0/},
  abstract = {Though it is widely recognized that data sharing enables faster scientific progress, the sensible need to protect participant privacy hampers this practice in medicine. We train deep neural networks that generate synthetic subjects closely resembling study participants. Using the SPRINT trial as an example, we show that machine-learning models built from simulated participants generalize to the original dataset. We incorporate differential privacy, which offers strong guarantees on the likelihood that a subject could be identified as a member of the trial. Investigators who have compiled a dataset can use our method to provide a freely accessible public version that enables other scientists to perform discovery-oriented analyses. Generated data can be released alongside analytical code to enable fully reproducible workflows, even when privacy is a concern. By addressing data sharing challenges, deep neural networks can facilitate the rigorous and reproducible investigation of clinical datasets.},
  language = {en},
  journal = {bioRxiv},
  doi = {10.1101/159756},
  author = {{Beaulieu-Jones}, Brett K. and Wu, Zhiwei Steven and Williams, Chris and Byrd, James Brian and Greene, Casey S.},
  month = nov,
  year = {2017},
  pages = {159756},
  file = {/home/jdayton3/.zotero/library/storage/Q5QRV72N/Beaulieu-Jones et al. - 2017 - Privacy-preserving generative deep neural networks.pdf;/home/jdayton3/.zotero/library/storage/E35FRR5I/159756.html}
}

@article{yue_comparative_2014,
  title = {A Comparative Encyclopedia of {{DNA}} Elements in the Mouse Genome},
  volume = {515},
  issn = {1476-4687},
  abstract = {The laboratory mouse shares the majority of its protein-coding genes with humans, making it the premier model organism in biomedical research, yet the two mammals differ in significant ways. To gain greater insights into both shared and species-specific transcriptional and cellular regulatory programs in the mouse, the Mouse ENCODE Consortium has mapped transcription, DNase I hypersensitivity, transcription factor binding, chromatin modifications and replication domains throughout the mouse genome in diverse cell and tissue types. By comparing with the human genome, we not only confirm substantial conservation in the newly annotated potential functional sequences, but also find a large degree of divergence of sequences involved in transcriptional regulation, chromatin state and higher order chromatin organization. Our results illuminate the wide range of evolutionary forces acting on genes and their regulatory regions, and provide a general resource for research into mammalian biology and mechanisms of human diseases.},
  language = {eng},
  number = {7527},
  journal = {Nature},
  doi = {10.1038/nature13992},
  author = {Yue, Feng and Cheng, Yong and Breschi, Alessandra and Vierstra, Jeff and Wu, Weisheng and Ryba, Tyrone and Sandstrom, Richard and Ma, Zhihai and Davis, Carrie and Pope, Benjamin D. and Shen, Yin and Pervouchine, Dmitri D. and Djebali, Sarah and Thurman, Robert E. and Kaul, Rajinder and Rynes, Eric and Kirilusha, Anthony and Marinov, Georgi K. and Williams, Brian A. and Trout, Diane and Amrhein, Henry and {Fisher-Aylor}, Katherine and Antoshechkin, Igor and DeSalvo, Gilberto and See, Lei-Hoon and Fastuca, Meagan and Drenkow, Jorg and Zaleski, Chris and Dobin, Alex and Prieto, Pablo and Lagarde, Julien and Bussotti, Giovanni and Tanzer, Andrea and Denas, Olgert and Li, Kanwei and Bender, M. A. and Zhang, Miaohua and Byron, Rachel and Groudine, Mark T. and McCleary, David and Pham, Long and Ye, Zhen and Kuan, Samantha and Edsall, Lee and Wu, Yi-Chieh and Rasmussen, Matthew D. and Bansal, Mukul S. and Kellis, Manolis and Keller, Cheryl A. and Morrissey, Christapher S. and Mishra, Tejaswini and Jain, Deepti and Dogan, Nergiz and Harris, Robert S. and Cayting, Philip and Kawli, Trupti and Boyle, Alan P. and Euskirchen, Ghia and Kundaje, Anshul and Lin, Shin and Lin, Yiing and Jansen, Camden and Malladi, Venkat S. and Cline, Melissa S. and Erickson, Drew T. and Kirkup, Vanessa M. and Learned, Katrina and Sloan, Cricket A. and Rosenbloom, Kate R. and {Lacerda de Sousa}, Beatriz and Beal, Kathryn and Pignatelli, Miguel and Flicek, Paul and Lian, Jin and Kahveci, Tamer and Lee, Dongwon and Kent, W. James and Ramalho Santos, Miguel and Herrero, Javier and Notredame, Cedric and Johnson, Audra and Vong, Shinny and Lee, Kristen and Bates, Daniel and Neri, Fidencio and Diegel, Morgan and Canfield, Theresa and Sabo, Peter J. and Wilken, Matthew S. and Reh, Thomas A. and Giste, Erika and Shafer, Anthony and Kutyavin, Tanya and Haugen, Eric and Dunn, Douglas and Reynolds, Alex P. and Neph, Shane and Humbert, Richard and Hansen, R. Scott and De Bruijn, Marella and Selleri, Licia and Rudensky, Alexander and Josefowicz, Steven and Samstein, Robert and Eichler, Evan E. and Orkin, Stuart H. and Levasseur, Dana and Papayannopoulou, Thalia and Chang, Kai-Hsin and Skoultchi, Arthur and Gosh, Srikanta and Disteche, Christine and Treuting, Piper and Wang, Yanli and Weiss, Mitchell J. and Blobel, Gerd A. and Cao, Xiaoyi and Zhong, Sheng and Wang, Ting and Good, Peter J. and Lowdon, Rebecca F. and Adams, Leslie B. and Zhou, Xiao-Qiao and Pazin, Michael J. and Feingold, Elise A. and Wold, Barbara and Taylor, James and Mortazavi, Ali and Weissman, Sherman M. and Stamatoyannopoulos, John A. and Snyder, Michael P. and Guigo, Roderic and Gingeras, Thomas R. and Gilbert, David M. and Hardison, Ross C. and Beer, Michael A. and Ren, Bing and {Mouse ENCODE Consortium}},
  month = nov,
  year = {2014},
  keywords = {Animals,Humans,Cell Lineage,Chromatin,Conserved Sequence,Deoxyribonuclease I,DNA Replication,Gene Expression Regulation,Gene Regulatory Networks,Genome,Genome-Wide Association Study,Genomics,Mice,Molecular Sequence Annotation,Regulatory Sequences; Nucleic Acid,RNA,Species Specificity,Transcription Factors,Transcriptome},
  pages = {355-364},
  pmid = {25409824},
  pmcid = {PMC4266106}
}

@article{gilad_reanalysis_2015,
  title = {A Reanalysis of Mouse {{ENCODE}} Comparative Gene Expression Data},
  volume = {4},
  issn = {2046-1402},
  abstract = {Recently, the Mouse ENCODE Consortium reported that comparative gene expression data from human and mouse tend to cluster more by species rather than by tissue. This observation was surprising, as it contradicted much of the comparative gene regulatory data collected previously, as well as the common notion that major developmental pathways are highly conserved across a wide range of species, in particular across mammals. Here we show that the Mouse ENCODE gene expression data were collected using a flawed study design, which confounded sequencing batch (namely, the assignment of samples to sequencing flowcells and lanes) with species. When we account for the batch effect, the corrected comparative gene expression data from human and mouse tend to cluster by tissue, not by species.},
  journal = {F1000Research},
  doi = {10.12688/f1000research.6536.1},
  author = {Gilad, Yoav and {Mizrahi-Man}, Orna},
  month = may,
  year = {2015},
  file = {/home/jdayton3/.zotero/library/storage/PN9V7X22/Gilad and Mizrahi-Man - 2015 - A reanalysis of mouse ENCODE comparative gene expr.pdf},
  pmid = {26236466},
  pmcid = {PMC4516019}
}

@article{leek_tackling_2010,
  title = {Tackling the Widespread and Critical Impact of Batch Effects in High-Throughput Data},
  volume = {11},
  issn = {1471-0064},
  abstract = {High-throughput technologies are widely used, for example to assay genetic variants, gene and protein expression, and epigenetic modifications. One often overlooked complication with such studies is batch effects, which occur because measurements are affected by laboratory conditions, reagent lots and personnel differences. This becomes a major problem when batch effects are correlated with an outcome of interest and lead to incorrect conclusions. Using both published studies and our own analyses, we argue that batch effects (as well as other technical and biological artefacts) are widespread and critical to address. We review experimental and computational approaches for doing so.},
  language = {eng},
  number = {10},
  journal = {Nature Reviews. Genetics},
  doi = {10.1038/nrg2825},
  author = {Leek, Jeffrey T. and Scharpf, Robert B. and Bravo, H{\'e}ctor Corrada and Simcha, David and Langmead, Benjamin and Johnson, W. Evan and Geman, Donald and Baggerly, Keith and Irizarry, Rafael A.},
  month = oct,
  year = {2010},
  keywords = {Genomics,Biotechnology,Computational Biology,Oligonucleotide Array Sequence Analysis,Periodicals as Topic,Research Design,Sequence Analysis; DNA},
  pages = {733-739},
  pmid = {20838408},
  pmcid = {PMC3880143}
}

@article{johnson_adjusting_2007,
  title = {Adjusting Batch Effects in Microarray Expression Data Using Empirical {{Bayes}} Methods},
  volume = {8},
  issn = {1465-4644},
  abstract = {Non-biological experimental variation or "batch effects" are commonly observed across multiple batches of microarray experiments, often rendering the task of combining data from these batches difficult. The ability to combine microarray data sets is advantageous to researchers to increase statistical power to detect biological phenomena from studies where logistical considerations restrict sample size or in studies that require the sequential hybridization of arrays. In general, it is inappropriate to combine data sets without adjusting for batch effects. Methods have been proposed to filter batch effects from data, but these are often complicated and require large batch sizes ( {$>$} 25) to implement. Because the majority of microarray studies are conducted using much smaller sample sizes, existing methods are not sufficient. We propose parametric and non-parametric empirical Bayes frameworks for adjusting data for batch effects that is robust to outliers in small sample sizes and performs comparable to existing methods for large samples. We illustrate our methods using two example data sets and show that our methods are justifiable, easy to apply, and useful in practice. Software for our method is freely available at: http://biosun1.harvard.edu/complab/batch/.},
  language = {eng},
  number = {1},
  journal = {Biostatistics (Oxford, England)},
  doi = {10.1093/biostatistics/kxj037},
  author = {Johnson, W. Evan and Li, Cheng and Rabinovic, Ariel},
  month = jan,
  year = {2007},
  keywords = {Humans,Oligonucleotide Array Sequence Analysis,Bayes Theorem,Data Interpretation; Statistical,Gene Expression Profiling},
  pages = {118-127},
  pmid = {16632515}
}

@article{louizos_causal_2017-2,
  archivePrefix = {arXiv},
  eprinttype = {arxiv},
  eprint = {1705.08821},
  primaryClass = {cs, stat},
  title = {Causal {{Effect Inference}} with {{Deep Latent}}-{{Variable Models}}},
  abstract = {Learning individual-level causal effects from observational data, such as inferring the most effective medication for a specific patient, is a problem of growing importance for policy makers. The most important aspect of inferring causal effects from observational data is the handling of confounders, factors that affect both an intervention and its outcome. A carefully designed observational study attempts to measure all important confounders. However, even if one does not have direct access to all confounders, there may exist noisy and uncertain measurement of proxies for confounders. We build on recent advances in latent variable modeling to simultaneously estimate the unknown latent space summarizing the confounders and the causal effect. Our method is based on Variational Autoencoders (VAE) which follow the causal structure of inference with proxies. We show our method is significantly more robust than existing methods, and matches the state-of-the-art on previous benchmarks focused on individual treatment effects.},
  journal = {arXiv:1705.08821 [cs, stat]},
  author = {Louizos, Christos and Shalit, Uri and Mooij, Joris and Sontag, David and Zemel, Richard and Welling, Max},
  month = may,
  year = {2017},
  keywords = {Computer Science - Learning,Statistics - Machine Learning},
  file = {/home/jdayton3/.zotero/library/storage/6AXNHRMR/Louizos et al. - 2017 - Causal Effect Inference with Deep Latent-Variable .pdf;/home/jdayton3/.zotero/library/storage/97NXU7CC/1705.html},
  annote = {Comment: Published as a conference paper at NIPS 2017}
}

@article{tzeng_deep_2014-2,
  archivePrefix = {arXiv},
  eprinttype = {arxiv},
  eprint = {1412.3474},
  primaryClass = {cs},
  title = {Deep {{Domain Confusion}}: {{Maximizing}} for {{Domain Invariance}}},
  shorttitle = {Deep {{Domain Confusion}}},
  abstract = {Recent reports suggest that a generic supervised deep CNN model trained on a large-scale dataset reduces, but does not remove, dataset bias on a standard benchmark. Fine-tuning deep models in a new domain can require a significant amount of data, which for many applications is simply not available. We propose a new CNN architecture which introduces an adaptation layer and an additional domain confusion loss, to learn a representation that is both semantically meaningful and domain invariant. We additionally show that a domain confusion metric can be used for model selection to determine the dimension of an adaptation layer and the best position for the layer in the CNN architecture. Our proposed adaptation method offers empirical performance which exceeds previously published results on a standard benchmark visual domain adaptation task.},
  journal = {arXiv:1412.3474 [cs]},
  author = {Tzeng, Eric and Hoffman, Judy and Zhang, Ning and Saenko, Kate and Darrell, Trevor},
  month = dec,
  year = {2014},
  keywords = {Computer Science - Computer Vision and Pattern Recognition},
  file = {/home/jdayton3/.zotero/library/storage/MWMN8ZJ6/Tzeng et al. - 2014 - Deep Domain Confusion Maximizing for Domain Invar.pdf;/home/jdayton3/.zotero/library/storage/WQWP2TZV/1412.html}
}

@article{leek_capturing_2007,
  title = {Capturing {{Heterogeneity}} in {{Gene Expression Studies}} by {{Surrogate Variable Analysis}}},
  volume = {3},
  issn = {1553-7404},
  abstract = {It has unambiguously been shown that genetic, environmental, demographic, and technical factors may have substantial effects on gene expression levels. In addition to the measured variable(s) of interest, there will tend to be sources of signal due to factors that are unknown, unmeasured, or too complicated to capture through simple models. We show that failing to incorporate these sources of heterogeneity into an analysis can have widespread and detrimental effects on the study. Not only can this reduce power or induce unwanted dependence across genes, but it can also introduce sources of spurious signal to many genes. This phenomenon is true even for well-designed, randomized studies. We introduce ``surrogate variable analysis'' (SVA) to overcome the problems caused by heterogeneity in expression studies. SVA can be applied in conjunction with standard analysis techniques to accurately capture the relationship between expression and any modeled variables of interest. We apply SVA to disease class, time course, and genetics of gene expression studies. We show that SVA increases the biological accuracy and reproducibility of analyses in genome-wide expression studies.},
  number = {9},
  journal = {PLOS Genetics},
  doi = {10.1371/journal.pgen.0030161},
  author = {Leek, Jeffrey T. and Storey, John D.},
  month = sep,
  year = {2007},
  keywords = {Algorithms,Gene expression,Genetic causes of cancer,Genetic loci,Genomic signal processing,Microarrays,Trait locus analysis,Vector spaces},
  pages = {e161},
  file = {/home/jdayton3/.zotero/library/storage/Y7WRCJES/Leek and Storey - 2007 - Capturing Heterogeneity in Gene Expression Studies.pdf;/home/jdayton3/.zotero/library/storage/ZENL2MY6/article.html}
}

@article{schmidhuber_deep_2015,
  title = {Deep Learning in Neural Networks: {{An}} Overview},
  volume = {61},
  issn = {0893-6080},
  shorttitle = {Deep Learning in Neural Networks},
  abstract = {In recent years, deep artificial neural networks (including recurrent ones) have won numerous contests in pattern recognition and machine learning. This historical survey compactly summarizes relevant work, much of it from the previous millennium. Shallow and Deep Learners are distinguished by the depth of their credit assignment paths, which are chains of possibly learnable, causal links between actions and effects. I review deep supervised learning (also recapitulating the history of backpropagation), unsupervised learning, reinforcement learning \& evolutionary computation, and indirect search for short programs encoding deep and large networks.},
  journal = {Neural Networks},
  doi = {10.1016/j.neunet.2014.09.003},
  author = {Schmidhuber, J{\"u}rgen},
  month = jan,
  year = {2015},
  keywords = {Deep learning,Evolutionary computation,Reinforcement learning,Supervised learning,Unsupervised learning},
  pages = {85-117},
  file = {/home/jdayton3/.zotero/library/storage/ISG49HCD/S0893608014002135.html}
}

@article{hinton_reducing_2006,
  title = {Reducing the {{Dimensionality}} of {{Data}} with {{Neural Networks}}},
  volume = {313},
  copyright = {American Association for the Advancement of Science},
  issn = {0036-8075, 1095-9203},
  abstract = {High-dimensional data can be converted to low-dimensional codes by training a multilayer neural network with a small central layer to reconstruct high-dimensional input vectors. Gradient descent can be used for fine-tuning the weights in such ``autoencoder'' networks, but this works well only if the initial weights are close to a good solution. We describe an effective way of initializing the weights that allows deep autoencoder networks to learn low-dimensional codes that work much better than principal components analysis as a tool to reduce the dimensionality of data.
Neural networks can be used to reduce accurately high-dimensional data to lower dimensional representations for pattern recognition tasks.
Neural networks can be used to reduce accurately high-dimensional data to lower dimensional representations for pattern recognition tasks.},
  language = {en},
  number = {5786},
  journal = {Science},
  doi = {10.1126/science.1127647},
  author = {Hinton, G. E. and Salakhutdinov, R. R.},
  month = jul,
  year = {2006},
  pages = {504-507},
  file = {/home/jdayton3/.zotero/library/storage/9T4BJZN3/504.html},
  pmid = {16873662}
}

@article{ganin_domain-adversarial_2015,
  archivePrefix = {arXiv},
  eprinttype = {arxiv},
  eprint = {1505.07818},
  primaryClass = {cs, stat},
  title = {Domain-{{Adversarial Training}} of {{Neural Networks}}},
  abstract = {We introduce a new representation learning approach for domain adaptation, in which data at training and test time come from similar but different distributions. Our approach is directly inspired by the theory on domain adaptation suggesting that, for effective domain transfer to be achieved, predictions must be made based on features that cannot discriminate between the training (source) and test (target) domains. The approach implements this idea in the context of neural network architectures that are trained on labeled data from the source domain and unlabeled data from the target domain (no labeled target-domain data is necessary). As the training progresses, the approach promotes the emergence of features that are (i) discriminative for the main learning task on the source domain and (ii) indiscriminate with respect to the shift between the domains. We show that this adaptation behaviour can be achieved in almost any feed-forward model by augmenting it with few standard layers and a new gradient reversal layer. The resulting augmented architecture can be trained using standard backpropagation and stochastic gradient descent, and can thus be implemented with little effort using any of the deep learning packages. We demonstrate the success of our approach for two distinct classification problems (document sentiment analysis and image classification), where state-of-the-art domain adaptation performance on standard benchmarks is achieved. We also validate the approach for descriptor learning task in the context of person re-identification application.},
  journal = {arXiv:1505.07818 [cs, stat]},
  author = {Ganin, Yaroslav and Ustinova, Evgeniya and Ajakan, Hana and Germain, Pascal and Larochelle, Hugo and Laviolette, Fran{\c c}ois and Marchand, Mario and Lempitsky, Victor},
  month = may,
  year = {2015},
  keywords = {Computer Science - Learning,Statistics - Machine Learning,Computer Science - Neural and Evolutionary Computing},
  file = {/home/jdayton3/.zotero/library/storage/QYT62QMZ/Ganin et al. - 2015 - Domain-Adversarial Training of Neural Networks.pdf;/home/jdayton3/.zotero/library/storage/8J7WXX2B/1505.html},
  annote = {Comment: Published in JMLR: http://jmlr.org/papers/v17/15-239.html}
}

@article{shaham_removal_2017,
  title = {Removal of Batch Effects Using Distribution-Matching Residual Networks},
  volume = {33},
  issn = {1367-4811},
  abstract = {Motivation: Sources of variability in experimentally derived data include measurement error in addition to the physical phenomena of interest. This measurement error is a combination of systematic components, originating from the measuring instrument and random measurement errors. Several novel biological technologies, such as mass cytometry and single-cell RNA-seq (scRNA-seq), are plagued with systematic errors that may severely affect statistical analysis if the data are not properly calibrated.
Results: We propose a novel deep learning approach for removing systematic batch effects. Our method is based on a residual neural network, trained to minimize the Maximum Mean Discrepancy between the multivariate distributions of two replicates, measured in different batches. We apply our method to mass cytometry and scRNA-seq datasets, and demonstrate that it effectively attenuates batch effects.
Availability and Implementation: our codes and data are publicly available at https://github.com/ushaham/BatchEffectRemoval.git.
Contact: yuval.kluger@yale.edu.
Supplementary information: Supplementary data are available at Bioinformatics online.},
  language = {eng},
  number = {16},
  journal = {Bioinformatics (Oxford, England)},
  doi = {10.1093/bioinformatics/btx196},
  author = {Shaham, Uri and Stanton, Kelly P. and Zhao, Jun and Li, Huamin and Raddassi, Khadir and Montgomery, Ruth and Kluger, Yuval},
  month = aug,
  year = {2017},
  keywords = {Humans,Computational Biology,Cytophotometry,Data Accuracy,Machine Learning,Sequence Analysis; RNA,Single-Cell Analysis,Statistics as Topic},
  pages = {2539-2546},
  pmid = {28419223},
  pmcid = {PMC5870543}
}

@article{shaham_batch_2018,
  title = {Batch {{Effect Removal}} via {{Batch}}-{{Free Encoding}}},
  abstract = {Biological measurements often contain systematic errors, also known as ``batch effects'', which may invalidate downstream analysis when not handled correctly. The problem of removing batch effects is of major importance in the biological community. Despite recent advances in this direction via deep learning techniques, most current methods may not fully preserve the true biological patterns the data contains. In this work we propose a deep learning approach for batch effect removal. The crux of our approach is learning a batch-free encoding of the data, representing its intrinsic biological properties, but not batch effects. In addition, we also encode the systematic factors through a decoding mechanism and require accurate reconstruction of the data. Altogether, this allows us to fully preserve the true biological patterns represented in the data. Experimental results are reported on data obtained from two high throughput technologies, mass cytometry and single-cell RNA-seq. Beyond good performance on training data, we also observe that our system performs well on test data obtained from new patients, which was not available at training time. Our method is easy to handle, a publicly available code can be found at https://github.com/ushaham/BatchEffectRemoval2018.},
  language = {en},
  journal = {bioRxiv},
  doi = {10.1101/380816},
  author = {Shaham, Uri},
  month = jul,
  year = {2018},
  file = {/home/jdayton3/.zotero/library/storage/L3S8TLDL/Shaham - 2018 - Batch Effect Removal via Batch-Free Encoding.pdf}
}

@incollection{baldi_understanding_2013,
  title = {Understanding {{Dropout}}},
  booktitle = {Advances in {{Neural Information Processing Systems}} 26},
  publisher = {{Curran Associates, Inc.}},
  author = {Baldi, Pierre and Sadowski, Peter J},
  editor = {Burges, C. J. C. and Bottou, L. and Welling, M. and Ghahramani, Z. and Weinberger, K. Q.},
  year = {2013},
  pages = {2814--2822},
  file = {/home/jdayton3/.zotero/library/storage/CZ55YD2U/Baldi and Sadowski - 2013 - Understanding Dropout.pdf;/home/jdayton3/.zotero/library/storage/78YNCB5S/4878-understanding-dropout.html}
}

@article{ronneberger_u-net_2015,
  archivePrefix = {arXiv},
  eprinttype = {arxiv},
  eprint = {1505.04597},
  primaryClass = {cs},
  title = {U-{{Net}}: {{Convolutional Networks}} for {{Biomedical Image Segmentation}}},
  shorttitle = {U-{{Net}}},
  abstract = {There is large consent that successful training of deep networks requires many thousand annotated training samples. In this paper, we present a network and training strategy that relies on the strong use of data augmentation to use the available annotated samples more efficiently. The architecture consists of a contracting path to capture context and a symmetric expanding path that enables precise localization. We show that such a network can be trained end-to-end from very few images and outperforms the prior best method (a sliding-window convolutional network) on the ISBI challenge for segmentation of neuronal structures in electron microscopic stacks. Using the same network trained on transmitted light microscopy images (phase contrast and DIC) we won the ISBI cell tracking challenge 2015 in these categories by a large margin. Moreover, the network is fast. Segmentation of a 512x512 image takes less than a second on a recent GPU. The full implementation (based on Caffe) and the trained networks are available at http://lmb.informatik.uni-freiburg.de/people/ronneber/u-net .},
  journal = {arXiv:1505.04597 [cs]},
  author = {Ronneberger, Olaf and Fischer, Philipp and Brox, Thomas},
  month = may,
  year = {2015},
  keywords = {Computer Science - Computer Vision and Pattern Recognition},
  file = {/home/jdayton3/.zotero/library/storage/4LEVJ4MI/Ronneberger et al. - 2015 - U-Net Convolutional Networks for Biomedical Image.pdf;/home/jdayton3/.zotero/library/storage/WG5Q6IKC/1505.html},
  annote = {Comment: conditionally accepted at MICCAI 2015}
}

@article{klambauer_self-normalizing_2017,
  archivePrefix = {arXiv},
  eprinttype = {arxiv},
  eprint = {1706.02515},
  primaryClass = {cs, stat},
  title = {Self-{{Normalizing Neural Networks}}},
  abstract = {Deep Learning has revolutionized vision via convolutional neural networks (CNNs) and natural language processing via recurrent neural networks (RNNs). However, success stories of Deep Learning with standard feed-forward neural networks (FNNs) are rare. FNNs that perform well are typically shallow and, therefore cannot exploit many levels of abstract representations. We introduce self-normalizing neural networks (SNNs) to enable high-level abstract representations. While batch normalization requires explicit normalization, neuron activations of SNNs automatically converge towards zero mean and unit variance. The activation function of SNNs are "scaled exponential linear units" (SELUs), which induce self-normalizing properties. Using the Banach fixed-point theorem, we prove that activations close to zero mean and unit variance that are propagated through many network layers will converge towards zero mean and unit variance -- even under the presence of noise and perturbations. This convergence property of SNNs allows to (1) train deep networks with many layers, (2) employ strong regularization, and (3) to make learning highly robust. Furthermore, for activations not close to unit variance, we prove an upper and lower bound on the variance, thus, vanishing and exploding gradients are impossible. We compared SNNs on (a) 121 tasks from the UCI machine learning repository, on (b) drug discovery benchmarks, and on (c) astronomy tasks with standard FNNs and other machine learning methods such as random forests and support vector machines. SNNs significantly outperformed all competing FNN methods at 121 UCI tasks, outperformed all competing methods at the Tox21 dataset, and set a new record at an astronomy data set. The winning SNN architectures are often very deep. Implementations are available at: github.com/bioinf-jku/SNNs.},
  journal = {arXiv:1706.02515 [cs, stat]},
  author = {Klambauer, G{\"u}nter and Unterthiner, Thomas and Mayr, Andreas and Hochreiter, Sepp},
  month = jun,
  year = {2017},
  keywords = {Statistics - Machine Learning,Computer Science - Machine Learning},
  file = {/home/jdayton3/.zotero/library/storage/SPB3FFYM/Klambauer et al. - 2017 - Self-Normalizing Neural Networks.pdf;/home/jdayton3/.zotero/library/storage/Y3P5FS4S/1706.html},
  annote = {Comment: 9 pages (+ 93 pages appendix)}
}

@article{agarap_deep_2018,
  archivePrefix = {arXiv},
  eprinttype = {arxiv},
  eprint = {1803.08375},
  primaryClass = {cs, stat},
  title = {Deep {{Learning}} Using {{Rectified Linear Units}} ({{ReLU}})},
  abstract = {We introduce the use of rectified linear units (ReLU) as the classification function in a deep neural network (DNN). Conventionally, ReLU is used as an activation function in DNNs, with Softmax function as their classification function. However, there have been several studies on using a classification function other than Softmax, and this study is an addition to those. We accomplish this by taking the activation of the penultimate layer \$h\_\{n - 1\}\$ in a neural network, then multiply it by weight parameters \$\textbackslash{}theta\$ to get the raw scores \$o\_\{i\}\$. Afterwards, we threshold the raw scores \$o\_\{i\}\$ by \$0\$, i.e. \$f(o) = \textbackslash{}max(0, o\_\{i\})\$, where \$f(o)\$ is the ReLU function. We provide class predictions \$\textbackslash{}hat\{y\}\$ through argmax function, i.e. argmax \$f(x)\$.},
  journal = {arXiv:1803.08375 [cs, stat]},
  author = {Agarap, Abien Fred},
  month = mar,
  year = {2018},
  keywords = {Computer Science - Computer Vision and Pattern Recognition,Statistics - Machine Learning,Computer Science - Neural and Evolutionary Computing,Computer Science - Machine Learning},
  file = {/home/jdayton3/.zotero/library/storage/2JK2JXZ8/Agarap - 2018 - Deep Learning using Rectified Linear Units (ReLU).pdf;/home/jdayton3/.zotero/library/storage/5DJ8J5J6/1803.html},
  annote = {Comment: 7 pages, 11 figures, 9 tables}
}

@article{kingma_auto-encoding_2013,
  archivePrefix = {arXiv},
  eprinttype = {arxiv},
  eprint = {1312.6114},
  primaryClass = {cs, stat},
  title = {Auto-{{Encoding Variational Bayes}}},
  abstract = {How can we perform efficient inference and learning in directed probabilistic models, in the presence of continuous latent variables with intractable posterior distributions, and large datasets? We introduce a stochastic variational inference and learning algorithm that scales to large datasets and, under some mild differentiability conditions, even works in the intractable case. Our contributions is two-fold. First, we show that a reparameterization of the variational lower bound yields a lower bound estimator that can be straightforwardly optimized using standard stochastic gradient methods. Second, we show that for i.i.d. datasets with continuous latent variables per datapoint, posterior inference can be made especially efficient by fitting an approximate inference model (also called a recognition model) to the intractable posterior using the proposed lower bound estimator. Theoretical advantages are reflected in experimental results.},
  journal = {arXiv:1312.6114 [cs, stat]},
  author = {Kingma, Diederik P. and Welling, Max},
  month = dec,
  year = {2013},
  keywords = {Statistics - Machine Learning,Computer Science - Machine Learning},
  file = {/home/jdayton3/.zotero/library/storage/484HCSZU/Kingma and Welling - 2013 - Auto-Encoding Variational Bayes.pdf;/home/jdayton3/.zotero/library/storage/WJVPPGIL/1312.html}
}

@article{kingma_adam_2014,
  archivePrefix = {arXiv},
  eprinttype = {arxiv},
  eprint = {1412.6980},
  primaryClass = {cs},
  title = {Adam: {{A Method}} for {{Stochastic Optimization}}},
  shorttitle = {Adam},
  abstract = {We introduce Adam, an algorithm for first-order gradient-based optimization of stochastic objective functions, based on adaptive estimates of lower-order moments. The method is straightforward to implement, is computationally efficient, has little memory requirements, is invariant to diagonal rescaling of the gradients, and is well suited for problems that are large in terms of data and/or parameters. The method is also appropriate for non-stationary objectives and problems with very noisy and/or sparse gradients. The hyper-parameters have intuitive interpretations and typically require little tuning. Some connections to related algorithms, on which Adam was inspired, are discussed. We also analyze the theoretical convergence properties of the algorithm and provide a regret bound on the convergence rate that is comparable to the best known results under the online convex optimization framework. Empirical results demonstrate that Adam works well in practice and compares favorably to other stochastic optimization methods. Finally, we discuss AdaMax, a variant of Adam based on the infinity norm.},
  journal = {arXiv:1412.6980 [cs]},
  author = {Kingma, Diederik P. and Ba, Jimmy},
  month = dec,
  year = {2014},
  keywords = {Computer Science - Machine Learning},
  file = {/home/jdayton3/.zotero/library/storage/6AQ9XDPY/Kingma and Ba - 2014 - Adam A Method for Stochastic Optimization.pdf;/home/jdayton3/.zotero/library/storage/XFUWQ7VA/1412.html},
  annote = {Comment: Published as a conference paper at the 3rd International Conference for Learning Representations, San Diego, 2015}
}

@incollection{krizhevsky_imagenet_2012-1,
  title = {{{ImageNet Classification}} with {{Deep Convolutional Neural Networks}}},
  booktitle = {Advances in {{Neural Information Processing Systems}} 25},
  publisher = {{Curran Associates, Inc.}},
  author = {Krizhevsky, Alex and Sutskever, Ilya and Hinton, Geoffrey E},
  editor = {Pereira, F. and Burges, C. J. C. and Bottou, L. and Weinberger, K. Q.},
  year = {2012},
  pages = {1097--1105},
  file = {/home/jdayton3/.zotero/library/storage/DWHYRPEU/Krizhevsky et al. - 2012 - ImageNet Classification with Deep Convolutional Ne.pdf;/home/jdayton3/.zotero/library/storage/I9BLCZ3U/4824-imagenet-classification-with-deep-convolutional-neural-networks.html}
}

@article{wickham_tidy_2014-1,
  title = {Tidy {{Data}}},
  volume = {59},
  copyright = {Copyright (c) 2013 Hadley  Wickham},
  issn = {1548-7660},
  language = {en},
  number = {1},
  journal = {Journal of Statistical Software},
  doi = {10.18637/jss.v059.i10},
  author = {Wickham, Hadley},
  month = sep,
  year = {2014},
  pages = {1-23},
  file = {/home/jdayton3/.zotero/library/storage/7MC73JZN/Wickham - 2014 - Tidy Data.pdf;/home/jdayton3/.zotero/library/storage/JWZ6EMM8/v059i10.html}
}

@article{lecun_mnist_nodate,
  title = {{{THE MNIST DATABASE}} of Handwritten Digits},
  journal = {http://yann.lecun.com/exdb/mnist/},
  author = {LECUN, Y.},
  file = {/home/jdayton3/.zotero/library/storage/R86ZPMVP/10027939599.html}
}

@article{dyrskjot_gene_2004,
  title = {Gene Expression in the Urinary Bladder: A Common Carcinoma in Situ Gene Expression Signature Exists Disregarding Histopathological Classification},
  volume = {64},
  issn = {0008-5472},
  shorttitle = {Gene Expression in the Urinary Bladder},
  abstract = {The presence of carcinoma in situ (CIS) lesions in the urinary bladder is associated with a high risk of disease progression to a muscle invasive stage. In this study, we used microarray expression profiling to examine the gene expression patterns in superficial transitional cell carcinoma (sTCC) with surrounding CIS (13 patients), without surrounding CIS lesions (15 patients), and in muscle invasive carcinomas (mTCC; 13 patients). Hierarchical cluster analysis separated the sTCC samples according to the presence or absence of CIS in the surrounding urothelium. We identified a few gene clusters that contained genes with similar expression levels in transitional cell carcinoma (TCC) with surrounding CIS and invasive TCC. However, no close relationship between TCC with adjacent CIS and invasive TCC was observed using hierarchical cluster analysis. Expression profiling of a series of biopsies from normal urothelium and urothelium with CIS lesions from the same urinary bladder revealed that the gene expression found in sTCC with surrounding CIS is found also in CIS biopsies as well as in histologically normal samples adjacent to the CIS lesions. Furthermore, we also identified similar gene expression changes in mTCC samples. We used a supervised learning approach to build a 16-gene molecular CIS classifier. The classifier was able to classify sTCC samples according to the presence or absence of surrounding CIS with a high accuracy. This study demonstrates that a CIS gene expression signature is present not only in CIS biopsies but also in sTCC, mTCC, and, remarkably, in histologically normal urothelium from bladders with CIS. Identification of this expression signature could provide guidance for the selection of therapy and follow-up regimen in patients with early stage bladder cancer.},
  language = {eng},
  number = {11},
  journal = {Cancer Research},
  doi = {10.1158/0008-5472.CAN-03-3620},
  author = {Dyrskj{\o}t, Lars and Kruh{\o}ffer, Mogens and Thykjaer, Thomas and Marcussen, Niels and Jensen, Jens L. and M{\o}ller, Klaus and {\O}rntoft, Torben F.},
  month = jun,
  year = {2004},
  keywords = {Humans,Oligonucleotide Array Sequence Analysis,Gene Expression Profiling,Biopsy,Carcinoma in Situ,Carcinoma; Transitional Cell,Cluster Analysis,Neoplasm Staging,Urinary Bladder Neoplasms},
  pages = {4040-4048},
  file = {/home/jdayton3/.zotero/library/storage/7RKI8U54/Dyrskjøt et al. - 2004 - Gene expression in the urinary bladder a common c.pdf},
  pmid = {15173019}
}

@misc{leek_bladderbatch_2017,
  title = {Bladderbatch},
  publisher = {{Bioconductor}},
  author = {Leek, Jeffrey T.},
  year = {2017},
  doi = {10.18129/B9.bioc.bladderbatch}
}

@misc{leek_sva_2017,
  title = {Sva},
  publisher = {{Bioconductor}},
  author = {Leek, Jeffrey T. and Johnson, W. Evan and Parker, Hilary S. and Fertig, Elana J. and Jaffe, Andrew E. and Storey, John D. and Zhang, Yuqing and Torres, Leonardo Collado},
  year = {2017},
  doi = {10.18129/B9.bioc.sva}
}

@article{olmos_prognostic_2012,
  title = {Prognostic Value of Blood {{mRNA}} Expression Signatures in Castration-Resistant Prostate Cancer: A Prospective, Two-Stage Study},
  volume = {13},
  issn = {1474-5488},
  shorttitle = {Prognostic Value of Blood {{mRNA}} Expression Signatures in Castration-Resistant Prostate Cancer},
  abstract = {BACKGROUND: Biomarkers are urgently needed to dissect the heterogeneity of prostate cancer between patients to improve treatment and accelerate drug development. We analysed blood mRNA expression arrays to identify patients with metastatic castration-resistant prostate cancer with poorer outcome.
METHODS: Whole blood was collected into PAXgene tubes from patients with castration-resistant prostate cancer and patients with prostate cancer selected for active surveillance. In stage I (derivation set), patients with castration-resistant prostate cancer were used as cases and patients under active surveillance were used as controls. These patients were recruited from The Royal Marsden Hospital NHS Foundation Trust (Sutton, UK) and The Beatson West of Scotland Cancer Centre (Glasgow, UK). In stage II (validation-set), patients with castration-resistant prostate cancer recruited from the Memorial Sloan-Kettering Cancer Center (New York, USA) were assessed. Whole-blood RNA was hybridised to Affymetrix U133plus2 microarrays. Expression profiles were analysed with Bayesian latent process decomposition (LPD) to identify RNA expression profiles associated with castration-resistant prostate cancer subgroups; these profiles were then confirmed by quantative reverse transcriptase (qRT) PCR studies and correlated with overall survival in both the test-set and validation-set.
FINDINGS: LPD analyses of the mRNA expression data divided the evaluable patients in stage I (n=94) into four groups. All patients in LPD1 (14 of 14) and most in LPD2 (17 of 18) had castration-resistant prostate cancer. Patients with castration-resistant prostate cancer and those under active surveillance comprised LPD3 (15 of 31 castration-resistant prostate cancer) and LDP4 (12 of 21 castration-resistant prostate cancer). Patients with castration-resistant prostate cancer in the LPD1 subgroup had features associated with worse prognosis and poorer overall survival than patients with castration-resistant prostate cancer in other LPD subgroups (LPD1 overall survival 10{$\cdot$}7 months [95\% CI 4{$\cdot$}1-17{$\cdot$}2] vs non-LPD1 25{$\cdot$}6 months [18{$\cdot$}0-33{$\cdot$}4]; p{$<$}0{$\cdot$}0001). A nine-gene signature verified by qRT-PCR classified patients into this LPD1 subgroup with a very low percentage of misclassification (1{$\cdot$}2\%). The ten patients who were initially unclassifiable by the LPD analyses were subclassified by this signature. We confirmed the prognostic utility of this nine-gene signature in the validation castration-resistant prostate cancer cohort, where LPD1 membership was also associated with worse overall survival (LPD1 9{$\cdot$}2 months [95\% CI 2{$\cdot$}1-16{$\cdot$}4] vs non-LPD1 21{$\cdot$}6 months [7{$\cdot$}5-35{$\cdot$}6]; p=0{$\cdot$}001), and remained an independent prognostic factor in multivariable analyses for both cohorts.
INTERPRETATION: Our results suggest that whole-blood gene profiling could identify gene-expression signatures that stratify patients with castration-resistant prostate cancer into distinct prognostic groups.
FUNDING: AstraZeneca, Experimental Cancer Medicine Centre, Prostate Cancer Charity, Prostate Cancer Foundation.},
  language = {eng},
  number = {11},
  journal = {The Lancet. Oncology},
  doi = {10.1016/S1470-2045(12)70372-8},
  author = {Olmos, David and Brewer, Daniel and Clark, Jeremy and Danila, Daniel C. and Parker, Chris and Attard, Gerhardt and Fleisher, Martin and Reid, Alison Hm and Castro, Elena and Sandhu, Shahneen K. and Barwell, Lorraine and Oommen, Nikhil Babu and Carreira, Suzanne and Drake, Charles G. and Jones, Robert and Cooper, Colin S. and Scher, Howard I. and {de Bono}, Johann S.},
  month = nov,
  year = {2012},
  keywords = {Humans,Gene Expression Profiling,Neoplasm Staging,Aged,Aged; 80 and over,Biomarkers; Tumor,Castration,Gene Expression Regulation; Neoplastic,Inflammation,Male,Middle Aged,Neoplasm Grading,Neoplasm Metastasis,Prognosis,Prospective Studies,Prostatic Neoplasms,RNA; Messenger,Survival Analysis},
  pages = {1114-1124},
  file = {/home/jdayton3/.zotero/library/storage/3C95C69E/Olmos et al. - 2012 - Prognostic value of blood mRNA expression signatur.pdf},
  pmid = {23059046},
  pmcid = {PMC4878433}
}

@article{golightly_curated_2018,
  title = {Curated Compendium of Human Transcriptional Biomarker Data},
  volume = {5},
  copyright = {2018 Nature Publishing Group},
  issn = {2052-4463},
  abstract = {One important use of genome-wide transcriptional profiles is to identify relationships between transcription levels and patient outcomes. These translational insights can guide the development of biomarkers for clinical application. Data from thousands of translational-biomarker studies have been deposited in public repositories, enabling reuse. However, data-reuse efforts require considerable time and expertise because transcriptional data are generated using heterogeneous profiling technologies, preprocessed using diverse normalization procedures, and annotated in non-standard ways. To address this problem, we curated 45 publicly available, translational-biomarker datasets from a variety of human diseases. To increase the data's utility, we reprocessed the raw expression data using a uniform computational pipeline, addressed quality-control problems, mapped the clinical annotations to a controlled vocabulary, and prepared consistently structured, analysis-ready data files. These data, along with scripts we used to prepare the data, are available in a public repository. We believe these data will be particularly useful to researchers seeking to perform benchmarking studies\textemdash{}for example, to compare and optimize machine-learning algorithms' ability to predict biomedical outcomes.},
  language = {en},
  journal = {Scientific Data},
  doi = {10.1038/sdata.2018.66},
  author = {Golightly, Nathan P. and Bell, Avery and Bischoff, Anna I. and Hollingsworth, Parker D. and Piccolo, Stephen R.},
  month = apr,
  year = {2018},
  pages = {180066},
  file = {/home/jdayton3/.zotero/library/storage/8FDS22S7/Golightly et al. - 2018 - Curated compendium of human transcriptional biomar.pdf;/home/jdayton3/.zotero/library/storage/ZWPALTUP/sdata201866.html}
}

@article{dayton_classifying_2017-1,
  title = {Classifying Cancer Genome Aberrations by Their Mutually Exclusive Effects on Transcription},
  volume = {10},
  issn = {1755-8794},
  abstract = {Background
Malignant tumors are typically caused by a conglomeration of genomic aberrations\textemdash{}including point mutations, small insertions, small deletions, and large copy-number variations. In some cases, specific chemotherapies and targeted drug treatments are effective against tumors that harbor certain genomic aberrations. However, predictive aberrations (biomarkers) have not been identified for many tumor types and treatments. One way to address this problem is to examine the downstream, transcriptional effects of genomic aberrations and to identify characteristic patterns. Even though two tumors harbor different genomic aberrations, the transcriptional effects of those aberrations may be similar. These patterns could be used to inform treatment choices.

Methods
We used data from 9300 tumors across 25 cancer types from The Cancer Genome Atlas. We used supervised machine learning to evaluate our ability to distinguish between tumors that had mutually exclusive genomic aberrations in specific genes. An ability to accurately distinguish between tumors with aberrations in these genes suggested that the genes have a relatively different downstream effect on transcription, and vice versa. We compared these findings against prior knowledge about signaling networks and drug responses.

Results
Our analysis recapitulates known relationships in cancer pathways and identifies gene pairs known to predict responses to the same treatments. For example, in lung adenocarcinomas, gene-expression profiles from tumors with somatic aberrations in EGFR or MET were negatively correlated with each other, in line with prior knowledge that MET amplification causes resistance to EGFR inhibition. In breast carcinomas, we observed high similarity between PTEN and PIK3CA, which play complementary roles in regulating cellular proliferation. In a pan-cancer analysis, we found that genomic aberrations in BRAF and VHL exhibit downstream effects that are clearly distinct from other genes.

Conclusion
We show that transcriptional data offer promise as a way to group genomic aberrations according to their downstream effects, and these groupings recapitulate known relationships. Our approach shows potential to help pharmacologists and clinical trialists narrow the search space for candidate gene/drug associations, including for rare mutations, and for identifying potential drug-repurposing opportunities.

Electronic supplementary material
The online version of this article (10.1186/s12920-017-0303-0) contains supplementary material, which is available to authorized users.},
  number = {Suppl 4},
  journal = {BMC Medical Genomics},
  doi = {10.1186/s12920-017-0303-0},
  author = {Dayton, Jonathan B. and Piccolo, Stephen R.},
  month = dec,
  year = {2017},
  file = {/home/jdayton3/.zotero/library/storage/D8KA5YA4/Dayton and Piccolo - 2017 - Classifying cancer genome aberrations by their mut.pdf},
  pmid = {29322935},
  pmcid = {PMC5763295}
}

@article{the_cancer_genome_atlas_research_network_cancer_2013,
  title = {The {{Cancer Genome Atlas Pan}}-{{Cancer}} Analysis Project},
  volume = {45},
  copyright = {2013 Nature Publishing Group},
  issn = {1546-1718},
  abstract = {The Cancer Genome Atlas (TCGA) Research Network has profiled and analyzed large numbers of human tumors to discover molecular aberrations at the DNA, RNA, protein and epigenetic levels. The resulting rich data provide a major opportunity to develop an integrated picture of commonalities, differences and emergent themes across tumor lineages. The Pan-Cancer initiative compares the first 12 tumor types profiled by TCGA. Analysis of the molecular aberrations and their functional roles across tumor types will teach us how to extend therapies effective in one cancer type to others with a similar genomic profile.},
  language = {en},
  journal = {Nature Genetics},
  doi = {10.1038/ng.2764},
  author = {{The Cancer Genome Atlas Research Network} and Weinstein, John N. and Collisson, Eric A. and Mills, Gordon B. and Shaw, Kenna R. Mills and Ozenberger, Brad A. and Ellrott, Kyle and Shmulevich, Ilya and Sander, Chris and Stuart, Joshua M.},
  month = sep,
  year = {2013},
  pages = {1113-1120},
  file = {/home/jdayton3/.zotero/library/storage/RQUNAYYZ/The Cancer Genome Atlas Research Network et al. - 2013 - The Cancer Genome Atlas Pan-Cancer analysis projec.pdf;/home/jdayton3/.zotero/library/storage/BDDXZ7V7/ng.html}
}

@book{r_core_team_r_2014,
  address = {{Vienna, Austria}},
  title = {R: {{A Language}} and {{Environment}} for {{Statistical Computing}}},
  publisher = {{R Foundation for Statistical Computing}},
  author = {{R Core Team}},
  year = {2014}
}

@article{upadhyay_removal_2019,
  archivePrefix = {arXiv},
  eprinttype = {arxiv},
  eprint = {1901.06654},
  primaryClass = {cs, stat},
  title = {Removal of {{Batch Effects}} Using {{Generative Adversarial Networks}}},
  abstract = {Many biological data analysis processes like Cytometry or Next Generation Sequencing (NGS) produce massive amounts of data which needs to be processed in batches for down-stream analysis. Such datasets are prone to technical variations due to difference in handling the batches possibly at different times, by different experimenters or under other different conditions. This adds variation to the batches coming from the same source sample. These variations are known as Batch Effects. It is possible that these variations and natural variations due to biology confound but such situations can be avoided by performing experiments in a carefully planned manner. Batch effects can hamper down-stream analysis and may also cause results to be inconclusive. Thus, it is essential to correct for these effects. Some recent methods propose deep learning based solution to solve this problem. We demonstrate that this can be solved using a novel Generative Adversarial Networks (GANs) based framework. The advantage of using this framework over other prior approaches is that here we do not require to choose a reproducing kernel and define its parameters.We demonstrate results of our framework on a Mass Cytometry dataset.},
  journal = {arXiv:1901.06654 [cs, stat]},
  author = {Upadhyay, Uddeshya and Jain, Arjun},
  month = jan,
  year = {2019},
  keywords = {Statistics - Machine Learning,Computer Science - Machine Learning},
  file = {/home/jdayton3/.zotero/library/storage/TCP6S5LV/Upadhyay and Jain - 2019 - Removal of Batch Effects using Generative Adversar.pdf;/home/jdayton3/.zotero/library/storage/UVCDQMG8/1901.html},
  annote = {Comment: 4 pages}
}

@misc{python_software_foundation_python_2019,
  title = {The {{Python Language Reference}} \textemdash{} {{Python}} 3.6.8 Documentation},
  howpublished = {https://docs.python.org/3.6/reference/index.html},
  author = {Python Software Foundation},
  month = apr,
  year = {2019},
  file = {/home/jdayton3/.zotero/library/storage/K89JGFUU/index.html}
}

@book{geron_hands-machine_2017,
  edition = {1st},
  title = {Hands-{{On Machine Learning}} with {{Scikit}}-{{Learn}} and {{TensorFlow}}: {{Concepts}}, {{Tools}}, and {{Techniques}} to {{Build Intelligent Systems}}},
  isbn = {978-1-4919-6229-9},
  shorttitle = {Hands-{{On Machine Learning}} with {{Scikit}}-{{Learn}} and {{TensorFlow}}},
  abstract = {Graphics in this book are printed in black and white. Through a series of recent breakthroughs, deep learning has boosted the entire field of machine learning. Now, even programmers who know close to nothing about this technology can use simple, efficient tools to implement programs capable of learning from data. This practical book shows you how. By using concrete examples, minimal theory, and two production-ready Python frameworksscikit-learn and Tensor Flowauthor Aurlien Gron helps you gain an intuitive understanding of the concepts and tools for building intelligent systems. Youll learn a range of techniques, starting with simple linear regression and progressing to deep neural networks. With exercises in each chapter to help you apply what youve learned, all you need is programming experience to get started. Explore the machine learning landscape, particularly neural nets Use scikit-learn to track an example machine-learning project end-to-end Explore several training models, including support vector machines, decision trees, random forests, and ensemble methods Use the Tensor Flow library to build and train neural nets Dive into neural net architectures, including convolutional nets, recurrent nets, and deep reinforcement learning Learn techniques for training and scaling deep neural nets Apply practical code examples without acquiring excessive machine learning theory or algorithm details},
  publisher = {{O'Reilly Media, Inc.}},
  author = {G{\'e}ron, Aur{\'e}lien},
  year = {2017}
}

@article{srivastava_dropout_2014,
  title = {Dropout: {{A Simple Way}} to {{Prevent Neural Networks}} from {{Overfitting}}},
  volume = {15},
  journal = {Journal of Machine Learning Research},
  author = {Srivastava, Nitish and Hinton, Geoffrey and Krizhevsky, Alex and Sutskever, Ilya and Salakhutdinov, Ruslan},
  year = {2014},
  pages = {1929-1958}
}

@article{ioffe_batch_2015,
  archivePrefix = {arXiv},
  eprinttype = {arxiv},
  eprint = {1502.03167},
  primaryClass = {cs},
  title = {Batch {{Normalization}}: {{Accelerating Deep Network Training}} by {{Reducing Internal Covariate Shift}}},
  shorttitle = {Batch {{Normalization}}},
  abstract = {Training Deep Neural Networks is complicated by the fact that the distribution of each layer's inputs changes during training, as the parameters of the previous layers change. This slows down the training by requiring lower learning rates and careful parameter initialization, and makes it notoriously hard to train models with saturating nonlinearities. We refer to this phenomenon as internal covariate shift, and address the problem by normalizing layer inputs. Our method draws its strength from making normalization a part of the model architecture and performing the normalization for each training mini-batch. Batch Normalization allows us to use much higher learning rates and be less careful about initialization. It also acts as a regularizer, in some cases eliminating the need for Dropout. Applied to a state-of-the-art image classification model, Batch Normalization achieves the same accuracy with 14 times fewer training steps, and beats the original model by a significant margin. Using an ensemble of batch-normalized networks, we improve upon the best published result on ImageNet classification: reaching 4.9\% top-5 validation error (and 4.8\% test error), exceeding the accuracy of human raters.},
  journal = {arXiv:1502.03167 [cs]},
  author = {Ioffe, Sergey and Szegedy, Christian},
  month = feb,
  year = {2015},
  keywords = {Computer Science - Machine Learning},
  file = {/home/jdayton3/.zotero/library/storage/CCGQ4TQF/Ioffe and Szegedy - 2015 - Batch Normalization Accelerating Deep Network Tra.pdf;/home/jdayton3/.zotero/library/storage/Y8IKMTLV/1502.html}
}

@article{pedregosa_scikit-learn_2011,
  title = {Scikit-Learn: {{Machine Learning}} in {{Python}}},
  volume = {12},
  issn = {ISSN 1533-7928},
  shorttitle = {Scikit-Learn},
  number = {Oct},
  journal = {Journal of Machine Learning Research},
  author = {Pedregosa, Fabian and Varoquaux, Ga{\"e}l and Gramfort, Alexandre and Michel, Vincent and Thirion, Bertrand and Grisel, Olivier and Blondel, Mathieu and Prettenhofer, Peter and Weiss, Ron and Dubourg, Vincent and Vanderplas, Jake and Passos, Alexandre and Cournapeau, David and Brucher, Matthieu and Perrot, Matthieu and Duchesnay, {\'E}douard},
  year = {2011},
  pages = {2825-2830},
  file = {/home/jdayton3/.zotero/library/storage/6TNXZ48Y/Pedregosa et al. - 2011 - Scikit-learn Machine Learning in Python.pdf;/home/jdayton3/.zotero/library/storage/Q2XS4QAF/pedregosa11a.html}
}

@article{zhao_towards_2017,
  archivePrefix = {arXiv},
  eprinttype = {arxiv},
  eprint = {1702.08658},
  primaryClass = {cs, stat},
  title = {Towards {{Deeper Understanding}} of {{Variational Autoencoding Models}}},
  abstract = {We propose a new family of optimization criteria for variational auto-encoding models, generalizing the standard evidence lower bound. We provide conditions under which they recover the data distribution and learn latent features, and formally show that common issues such as blurry samples and uninformative latent features arise when these conditions are not met. Based on these new insights, we propose a new sequential VAE model that can generate sharp samples on the LSUN image dataset based on pixel-wise reconstruction loss, and propose an optimization criterion that encourages unsupervised learning of informative latent features.},
  journal = {arXiv:1702.08658 [cs, stat]},
  author = {Zhao, Shengjia and Song, Jiaming and Ermon, Stefano},
  month = feb,
  year = {2017},
  keywords = {Computer Science - Machine Learning,Statistics - Machine Learning},
  file = {/home/jdayton3/.zotero/library/storage/LSA9288Z/Zhao et al. - 2017 - Towards Deeper Understanding of Variational Autoen.pdf;/home/jdayton3/.zotero/library/storage/YCLBXETC/1702.html}
}

@article{hou_deep_2016,
  archivePrefix = {arXiv},
  eprinttype = {arxiv},
  eprint = {1610.00291},
  primaryClass = {cs},
  title = {Deep {{Feature Consistent Variational Autoencoder}}},
  abstract = {We present a novel method for constructing Variational Autoencoder (VAE). Instead of using pixel-by-pixel loss, we enforce deep feature consistency between the input and the output of a VAE, which ensures the VAE's output to preserve the spatial correlation characteristics of the input, thus leading the output to have a more natural visual appearance and better perceptual quality. Based on recent deep learning works such as style transfer, we employ a pre-trained deep convolutional neural network (CNN) and use its hidden features to define a feature perceptual loss for VAE training. Evaluated on the CelebA face dataset, we show that our model produces better results than other methods in the literature. We also show that our method can produce latent vectors that can capture the semantic information of face expressions and can be used to achieve state-of-the-art performance in facial attribute prediction.},
  journal = {arXiv:1610.00291 [cs]},
  author = {Hou, Xianxu and Shen, Linlin and Sun, Ke and Qiu, Guoping},
  month = oct,
  year = {2016},
  keywords = {Computer Science - Computer Vision and Pattern Recognition},
  file = {/home/jdayton3/.zotero/library/storage/PDH8CX6G/Hou et al. - 2016 - Deep Feature Consistent Variational Autoencoder.pdf;/home/jdayton3/.zotero/library/storage/GU8E7RIE/1610.html}
}

@article{kullback_information_1951,
  title = {On {{Information}} and {{Sufficiency}}},
  volume = {22},
  issn = {0003-4851, 2168-8990},
  abstract = {Project Euclid - mathematics and statistics online},
  language = {EN},
  number = {1},
  journal = {The Annals of Mathematical Statistics},
  doi = {10.1214/aoms/1177729694},
  author = {Kullback, S. and Leibler, R. A.},
  month = mar,
  year = {1951},
  pages = {79-86},
  file = {/home/jdayton3/.zotero/library/storage/XKJ9N5PR/Kullback and Leibler - 1951 - On Information and Sufficiency.pdf;/home/jdayton3/.zotero/library/storage/M6TUTK4X/1177729694.html}
}

@article{maron_automatic_1961,
  title = {Automatic {{Indexing}}: {{An Experimental Inquiry}}},
  volume = {8},
  issn = {0004-5411},
  shorttitle = {Automatic {{Indexing}}},
  number = {3},
  journal = {J. ACM},
  doi = {10.1145/321075.321084},
  author = {Maron, M. E.},
  month = jul,
  year = {1961},
  pages = {404--417}
}

@inproceedings{tin_kam_ho_random_1995,
  title = {Random Decision Forests},
  volume = {1},
  abstract = {Decision trees are attractive classifiers due to their high execution speed. But trees derived with traditional methods often cannot be grown to arbitrary complexity for possible loss of generalization accuracy on unseen data. The limitation on complexity usually means suboptimal accuracy on training data. Following the principles of stochastic modeling, we propose a method to construct tree-based classifiers whose capacity can be arbitrarily expanded for increases in accuracy for both training and unseen data. The essence of the method is to build multiple trees in randomly selected subspaces of the feature space. Trees in, different subspaces generalize their classification in complementary ways, and their combined classification can be monotonically improved. The validity of the method is demonstrated through experiments on the recognition of handwritten digits.},
  booktitle = {Proceedings of 3rd {{International Conference}} on {{Document Analysis}} and {{Recognition}}},
  doi = {10.1109/ICDAR.1995.598994},
  author = {{Tin Kam Ho}},
  month = aug,
  year = {1995},
  keywords = {Classification tree analysis,complexity,decision theory,decision trees,Decision trees,generalization accuracy,handwriting recognition,Handwriting recognition,handwritten digits,Hidden Markov models,Multilayer perceptrons,optical character recognition,Optimization methods,random decision forests,stochastic modeling,Stochastic processes,suboptimal accuracy,Testing,Tin,Training data,tree-based classifiers},
  pages = {278-282 vol.1},
  file = {/home/jdayton3/.zotero/library/storage/6J2TQXBZ/Tin Kam Ho - 1995 - Random decision forests.pdf;/home/jdayton3/.zotero/library/storage/YVF82W3R/598994.html}
}

@article{silverman_e._1989,
  title = {E. {{Fix}} and {{J}}.{{L}}. {{Hodges}} (1951): {{An Important Contribution}} to {{Nonparametric Discriminant Analysis}} and {{Density Estimation}}: {{Commentary}} on {{Fix}} and {{Hodges}} (1951)},
  volume = {57},
  issn = {0306-7734},
  shorttitle = {E. {{Fix}} and {{J}}.{{L}}. {{Hodges}} (1951)},
  abstract = {In 1951, Evelyn Fix and J.L. Hodges, Jr. wrote a technical report which contained prophetic work on nonparametric discriminant analysis and probability density estimation, and which was never published by the authors. The report introduced several important concepts for the first time. It is of interest not only for historical reasons but also because it contains much material that is still of contemporary relevance. Here, the report is printed in full together with a commentary placing the paper in context and interpreting its ideas in the light of more modern developments. /// En 1951, E. Fix et J.L. Hodges, Jr. ont {\'e}crit un rapport technique proph{\'e}tique sur l'analyse non-param{\'e}trique de discrimination et l'estimation de la densit{\'e} de probabilit{\'e}, mais celui-ci ne fut jamais publi{\'e} par ses auteurs. Ce rapport introduit plusieurs id{\'e}es nouvelles et importantes. Il nous int{\'e}resse non seulement pour des raisons historiques, mais aussi parce qu'il contient des concepts qui sont encore importants de nos jours. Nous le publions ici en entier, accompagn{\'e} d'un commentaire qui l'interpr{\`e}te d'un point de vue plus moderne.},
  number = {3},
  journal = {International Statistical Review / Revue Internationale de Statistique},
  doi = {10.2307/1403796},
  author = {Silverman, B. W. and Jones, M. C.},
  year = {1989},
  pages = {233-238}
}

@techreport{fix_discriminatory_1951,
  address = {{Randolph Field, Texas}},
  title = {Discriminatory {{Analysis}}, {{Nonparametric Discrimination}}: {{Consistency Properties}}},
  language = {English},
  number = {4},
  institution = {{USAF School of Aviation Medicine}},
  author = {Fix, Evelyn and Hodges, Jr., J.L.},
  month = feb,
  year = {1951},
  file = {/home/jdayton3/.zotero/library/storage/KMFL7NAY/a800276.pdf}
}

@article{cortes_support-vector_1995,
  title = {Support-Vector Networks},
  volume = {20},
  issn = {1573-0565},
  abstract = {Thesupport-vector network is a new learning machine for two-group classification problems. The machine conceptually implements the following idea: input vectors are non-linearly mapped to a very high-dimension feature space. In this feature space a linear decision surface is constructed. Special properties of the decision surface ensures high generalization ability of the learning machine. The idea behind the support-vector network was previously implemented for the restricted case where the training data can be separated without errors. We here extend this result to non-separable training data.High generalization ability of support-vector networks utilizing polynomial input transformations is demonstrated. We also compare the performance of the support-vector network to various classical learning algorithms that all took part in a benchmark study of Optical Character Recognition.},
  language = {en},
  number = {3},
  journal = {Machine Learning},
  doi = {10.1007/BF00994018},
  author = {Cortes, Corinna and Vapnik, Vladimir},
  month = sep,
  year = {1995},
  keywords = {efficient learning algorithms,neural networks,pattern recognition,polynomial classifiers,radial basis function classifiers},
  pages = {273-297},
  file = {/home/jdayton3/.zotero/library/storage/HZMD7TMJ/Cortes and Vapnik - 1995 - Support-vector networks.pdf}
}

@article{espin-perez_comparison_2018,
  title = {Comparison of Statistical Methods and the Use of Quality Control Samples for Batch Effect Correction in Human Transcriptome Data},
  volume = {13},
  issn = {1932-6203},
  abstract = {Batch effects are technical sources of variation introduced by the necessity of conducting gene expression analyses on different dates due to the large number of biological samples in population-based studies. The aim of this study is to evaluate the performances of linear mixed models (LMM) and Combat in batch effect removal. We also assessed the utility of adding quality control samples in the study design as technical replicates. In order to do so, we simulated gene expression data by adding ``treatment'' and batch effects to a real gene expression dataset. The performances of LMM and Combat, with and without quality control samples, are assessed in terms of sensitivity and specificity while correcting for the batch effect using a wide range of effect sizes, statistical noise, sample sizes and level of balanced/unbalanced designs. The simulations showed small differences among LMM and Combat. LMM identifies stronger relationships between big effect sizes and gene expression than Combat, while Combat identifies in general more true and false positives than LMM. However, these small differences can still be relevant depending on the research goal. When any of these methods are applied, quality control samples did not reduce the batch effect, showing no added value for including them in the study design.},
  language = {en},
  number = {8},
  journal = {PLOS ONE},
  doi = {10.1371/journal.pone.0202947},
  author = {{Esp{\'i}n-P{\'e}rez}, Almudena and Portier, Chris and {Chadeau-Hyam}, Marc and van Veldhoven, Karin and Kleinjans, Jos C. S. and de Kok, Theo M. C. M.},
  month = aug,
  year = {2018},
  keywords = {Biomarkers,Gene expression,Microarrays,Principal component analysis,Quality control,Simulation and modeling,Statistical data,Statistical methods},
  pages = {e0202947},
  file = {/home/jdayton3/.zotero/library/storage/SKA793Q9/Espín-Pérez et al. - 2018 - Comparison of statistical methods and the use of q.pdf;/home/jdayton3/.zotero/library/storage/TF22HBPS/article.html}
}

@article{brazma_gene_2000,
  title = {Gene Expression Data Analysis},
  volume = {480},
  issn = {0014-5793},
  abstract = {Microarrays are one of the latest breakthroughs in experimental molecular biology, which allow monitoring of gene expression for tens of thousands of genes in parallel and are already producing huge amounts of valuable data. Analysis and handling of such data is becoming one of the major bottlenecks in the utilization of the technology. The raw microarray data are images, which have to be transformed into gene expression matrices--tables where rows represent genes, columns represent various samples such as tissues or experimental conditions, and numbers in each cell characterize the expression level of the particular gene in the particular sample. These matrices have to be analyzed further, if any knowledge about the underlying biological processes is to be extracted. In this paper we concentrate on discussing bioinformatics methods used for such analysis. We briefly discuss supervised and unsupervised data analysis and its applications, such as predicting gene function classes and cancer classification. Then we discuss how the gene expression matrix can be used to predict putative regulatory signals in the genome sequences. In conclusion we discuss some possible future directions.},
  language = {eng},
  number = {1},
  journal = {FEBS letters},
  author = {Brazma, A. and Vilo, J.},
  month = aug,
  year = {2000},
  keywords = {Animals,Computational Biology,Gene Expression Profiling,Genes,Humans,Neoplasms,Oligonucleotide Array Sequence Analysis,Phylogeny,Regulatory Sequences; Nucleic Acid,Statistics as Topic},
  pages = {17-24},
  pmid = {10967323}
}

@article{stuart_gene-coexpression_2003,
  title = {A {{Gene}}-{{Coexpression Network}} for {{Global Discovery}} of {{Conserved Genetic Modules}}},
  volume = {302},
  copyright = {American Association for the Advancement of Science},
  issn = {0036-8075, 1095-9203},
  abstract = {To elucidate gene function on a global scale, we identified pairs of genes that are coexpressed over 3182 DNA microarrays from humans, flies, worms, and yeast. We found 22,163 such coexpression relationships, each of which has been conserved across evolution. This conservation implies that the coexpression of these gene pairs confers a selective advantage and therefore that these genes are functionally related. Manyof these relationships provide strong evidence for the involvement of new genes in core biological functions such as the cell cycle, secretion, and protein expression. We experimentallyconfirmed the predictions implied bysome of these links and identified cell proliferation functions for several genes. By assembling these links into a gene-coexpression network, we found several components that were animal-specific as well as interrelationships between newly evolved and ancient modules.
Twenty-two thousand pairs of genes that are coexpressed in organisms as diverse as yeast, flies, worms, and humans are used to construct a genetic network and predict the functions of proteins.
Twenty-two thousand pairs of genes that are coexpressed in organisms as diverse as yeast, flies, worms, and humans are used to construct a genetic network and predict the functions of proteins.},
  language = {en},
  number = {5643},
  journal = {Science},
  doi = {10.1126/science.1087447},
  author = {Stuart, Joshua M. and Segal, Eran and Koller, Daphne and Kim, Stuart K.},
  month = oct,
  year = {2003},
  pages = {249-255},
  file = {/home/jdayton3/.zotero/library/storage/HY9A5EJX/Stuart et al. - 2003 - A Gene-Coexpression Network for Global Discovery o.pdf;/home/jdayton3/.zotero/library/storage/IKY3PDJZ/249.html},
  pmid = {12934013}
}

@article{henriksen_application_2002,
  title = {Application of Gene Expression Profiling to Cardiovascular Disease},
  volume = {54},
  issn = {0008-6363},
  abstract = {Abstract.  The number of cardiovascular publications featuring gene expression profiling technologies is growing rapidly. This article introduces four profiling},
  language = {en},
  number = {1},
  journal = {Cardiovascular Research},
  doi = {10.1016/S0008-6363(01)00516-8},
  author = {Henriksen, P. A. and Kotelevtsev, Y.},
  month = apr,
  year = {2002},
  pages = {16-24},
  file = {/home/jdayton3/.zotero/library/storage/I9E5L5XI/Henriksen and Kotelevtsev - 2002 - Application of gene expression profiling to cardio.pdf;/home/jdayton3/.zotero/library/storage/JMLIMZ2G/295328.html}
}

@article{veer_gene_2002,
  title = {Gene Expression Profiling Predicts Clinical Outcome of Breast Cancer},
  volume = {415},
  copyright = {2002 Macmillan Magazines Ltd.},
  issn = {1476-4687},
  abstract = {Breast cancer patients with the same stage of disease can have markedly different treatment responses and overall outcome. The strongest predictors for metastases (for example, lymph node status and histological grade) fail to classify accurately breast tumours according to their clinical behaviour1,2,3. Chemotherapy or hormonal therapy reduces the risk of distant metastases by approximately one-third; however, 70\textendash{}80\% of patients receiving this treatment would have survived without it4,5. None of the signatures of breast cancer gene expression reported to date6,7,8,9,10,11,12 allow for patient-tailored therapy strategies. Here we used DNA microarray analysis on primary breast tumours of 117 young patients, and applied supervised classification to identify a gene expression signature strongly predictive of a short interval to distant metastases (`poor prognosis' signature) in patients without tumour cells in local lymph nodes at diagnosis (lymph node negative). In addition, we established a signature that identifies tumours of BRCA1 carriers. The poor prognosis signature consists of genes regulating cell cycle, invasion, metastasis and angiogenesis. This gene expression profile will outperform all currently used clinical parameters in predicting disease outcome. Our findings provide a strategy to select patients who would benefit from adjuvant therapy.},
  language = {En},
  number = {6871},
  journal = {Nature},
  doi = {10.1038/415530a},
  author = {van 't Veer, Laura J. and Dai, Hongyue and van de Vijver, Marc J. and He, Yudong D. and Hart, Augustinus A. M. and Mao, Mao and Peterse, Hans L. and van der Kooy, Karin and Marton, Matthew J. and Witteveen, Anke T. and Schreiber, George J. and Kerkhoven, Ron M. and Roberts, Chris and Linsley, Peter S. and Bernards, Ren{\'e} and Friend, Stephen H.},
  month = jan,
  year = {2002},
  pages = {530},
  file = {/home/jdayton3/.zotero/library/storage/G4E883BM/Veer et al. - 2002 - Gene expression profiling predicts clinical outcom.pdf;/home/jdayton3/.zotero/library/storage/JBATNHFU/415530a.html}
}

@article{sirota_discovery_2011,
  title = {Discovery and {{Preclinical Validation}} of {{Drug Indications Using Compendia}} of {{Public Gene Expression Data}}},
  volume = {3},
  copyright = {Copyright \textcopyright{} 2011, American Association for the Advancement of Science},
  issn = {1946-6234, 1946-6242},
  abstract = {Greening Drug Discovery
Recycling is good for the environment\textemdash{}and for drug development too. Repurposing existing, approved drugs can speed their adoption in the clinic because they can often take advantage of the existing rigorous safety testing required by the Food and Drug Administration and other regulatory agencies. In a pair of papers, Sirota et al. and Dudley et al. examined publicly available gene expression data and determined the genes affected in 100 diseases and 164 drugs. By pairing drugs that correct abnormal gene expression in diseases, they confirm known effective drug-disease pairs and predict new indications for already approved agents. Experimental validation that an antiulcer drug and an antiepileptic can be reused for lung cancer and inflammatory bowel disease reinforces the promise of this approach.
The authors scrutinized the data in Gene Expression Omnibus and identified a disease signature for 100 diseases, which they defined as the set of mRNAs that reliably increase or decrease in patients with that disease compared to normal individuals. They compared each of these disease signatures to each of the gene expression signatures for 164 drugs from the Connectivity Map, a collection of mRNA expression data from cultured human cells treated with bioactive small molecules that is maintained at the Broad Institute at Massachusetts Institute of Technology. A similarity score calculated by the authors for every possible pair of drug and disease ranged from +1 (a perfect correlation of signatures) to -1 (exactly opposite signatures). The investigators suggested that a similarity score of -1 would predict that the drug would ameliorate the abnormalities in the disease and thus be an effective therapy.
This proved to be true for a number of drugs already on the market. The corticosteroid prednisolone, a common treatment for Crohn's disease and ulcerative colitis, showed a strong similarity score for these two diseases. The histone deacetylase inhibitors trichostatin A, valproic acid, and vorinostat were predicted to work against brain tumors and other cancers (esophagus, lung, and colon), and there is experimental evidence that this is indeed the case. But in the ultimate test of method, the authors confirmed two new predictions in animal experiments: Cimetidine, an antiulcer drug, predicted by the authors to be effective against lung cancer, inhibited tumor cells in vitro and in vivo in mice. In addition, the antiepileptic topiramate, predicted to improve inflammatory bowel disease by similarity score, improved damage in colon tissue of rats treated with trinitrobenzenesulfonic acid, a model of the disease. These two drugs are therefore good candidates for recycling to treat two diseases in need of better therapies\textemdash{}lung cancer and inflammatory bowel disease\textemdash{}and we now have a way to mine available data for fast routes to new disease therapies.
A systematic computational method predicts new uses for existing drugs by integrating public gene expression signatures of drugs and diseases.
A systematic computational method predicts new uses for existing drugs by integrating public gene expression signatures of drugs and diseases.},
  language = {en},
  number = {96},
  journal = {Science Translational Medicine},
  doi = {10.1126/scitranslmed.3001318},
  author = {Sirota, Marina and Dudley, Joel T. and Kim, Jeewon and Chiang, Annie P. and Morgan, Alex A. and {Sweet-Cordero}, Alejandro and Sage, Julien and Butte, Atul J.},
  month = aug,
  year = {2011},
  pages = {96ra77-96ra77},
  file = {/home/jdayton3/.zotero/library/storage/FSU2WWNB/Sirota et al. - 2011 - Discovery and Preclinical Validation of Drug Indic.pdf;/home/jdayton3/.zotero/library/storage/2UIF8XKY/96ra77.html},
  pmid = {21849665}
}

@article{lazar_batch_2013,
  title = {Batch Effect Removal Methods for Microarray Gene Expression Data Integration: A Survey},
  volume = {14},
  issn = {1467-5463},
  shorttitle = {Batch Effect Removal Methods for Microarray Gene Expression Data Integration},
  abstract = {Abstract.  Genomic data integration is a key goal to be achieved towards large-scale genomic data analysis. This process is very challenging due to the diverse},
  language = {en},
  number = {4},
  journal = {Briefings in Bioinformatics},
  doi = {10.1093/bib/bbs037},
  author = {Lazar, Cosmin and Meganck, Stijn and Taminau, Jonatan and Steenhoff, David and Coletta, Alain and Molter, Colin and {Weiss-Sol{\'i}s}, David Y. and Duque, Robin and Bersini, Hugues and Now{\'e}, Ann},
  month = jul,
  year = {2013},
  pages = {469-490},
  file = {/home/jdayton3/.zotero/library/storage/76URUBIH/Lazar et al. - 2013 - Batch effect removal methods for microarray gene e.pdf;/home/jdayton3/.zotero/library/storage/C5IQAP7B/191565.html}
}

@article{chen_gene_2016,
  title = {Gene Expression Inference with Deep Learning},
  volume = {32},
  issn = {1367-4803},
  abstract = {Abstract.  Motivation: Large-scale gene expression profiling has been widely used to characterize cellular states in response to various disease conditions, gen},
  language = {en},
  number = {12},
  journal = {Bioinformatics},
  doi = {10.1093/bioinformatics/btw074},
  author = {Chen, Yifei and Li, Yi and Narayan, Rajiv and Subramanian, Aravind and Xie, Xiaohui},
  month = jun,
  year = {2016},
  pages = {1832-1839},
  file = {/home/jdayton3/.zotero/library/storage/L9ERX4Q5/Chen et al. - 2016 - Gene expression inference with deep learning.pdf;/home/jdayton3/.zotero/library/storage/7DNG6LK3/1743989.html}
}

@article{lin_using_2017,
  title = {Using Neural Networks for Reducing the Dimensions of Single-Cell {{RNA}}-{{Seq}} Data},
  volume = {45},
  issn = {0305-1048},
  abstract = {Abstract.  While only recently developed, the ability to profile expression data in single cells (scRNA-Seq) has already led to several important studies and fi},
  language = {en},
  number = {17},
  journal = {Nucleic Acids Research},
  doi = {10.1093/nar/gkx681},
  author = {Lin, Chieh and Jain, Siddhartha and Kim, Hannah and {Bar-Joseph}, Ziv},
  month = sep,
  year = {2017},
  pages = {e156-e156},
  file = {/home/jdayton3/.zotero/library/storage/66KLDPQP/Lin et al. - 2017 - Using neural networks for reducing the dimensions .pdf;/home/jdayton3/.zotero/library/storage/QZ7ETTU6/4056711.html}
}

@article{aliper_deep_2016,
  title = {Deep {{Learning Applications}} for {{Predicting Pharmacological Properties}} of {{Drugs}} and {{Drug Repurposing Using Transcriptomic Data}}},
  volume = {13},
  issn = {1543-8384},
  abstract = {Deep learning is rapidly advancing many areas of science and technology with multiple success stories in image, text, voice and video recognition, robotics, and autonomous driving. In this paper we demonstrate how deep neural networks (DNN) trained on large transcriptional response data sets can classify various drugs to therapeutic categories solely based on their transcriptional profiles. We used the perturbation samples of 678 drugs across A549, MCF-7, and PC-3 cell lines from the LINCS Project and linked those to 12 therapeutic use categories derived from MeSH. To train the DNN, we utilized both gene level transcriptomic data and transcriptomic data processed using a pathway activation scoring algorithm, for a pooled data set of samples perturbed with different concentrations of the drug for 6 and 24 hours. In both pathway and gene level classification, DNN achieved high classification accuracy and convincingly outperformed the support vector machine (SVM) model on every multiclass classification problem, however, models based on pathway level data performed significantly better. For the first time we demonstrate a deep learning neural net trained on transcriptomic data to recognize pharmacological properties of multiple drugs across different biological systems and conditions. We also propose using deep neural net confusion matrices for drug repositioning. This work is a proof of principle for applying deep learning to drug discovery and development.},
  number = {7},
  journal = {Molecular Pharmaceutics},
  doi = {10.1021/acs.molpharmaceut.6b00248},
  author = {Aliper, Alexander and Plis, Sergey and Artemov, Artem and Ulloa, Alvaro and Mamoshina, Polina and Zhavoronkov, Alex},
  month = jul,
  year = {2016},
  pages = {2524-2530},
  file = {/home/jdayton3/.zotero/library/storage/Q6HEJ682/Aliper et al. - 2016 - Deep Learning Applications for Predicting Pharmaco.pdf;/home/jdayton3/.zotero/library/storage/YWYUVBMZ/acs.molpharmaceut.html}
}

@incollection{danaee_deep_2016,
  title = {A Deep Learning Approach for Cancer Detection and Relevant Gene Identification},
  isbn = {978-981-320-780-6},
  booktitle = {Biocomputing 2017},
  publisher = {{WORLD SCIENTIFIC}},
  author = {Danaee, Padideh and Ghaeini, Reza and Hendrix, David A.},
  month = nov,
  year = {2016},
  pages = {219-229},
  file = {/home/jdayton3/.zotero/library/storage/ECBAN8DF/Danaee et al. - 2016 - A deep learning approach for cancer detection and .pdf;/home/jdayton3/.zotero/library/storage/TUZZ5FIG/9789813207813_0022.html},
  doi = {10.1142/9789813207813_0022}
}

@book{rstudio_team_rstudio_2018,
  address = {{Boston, MA}},
  title = {{{RStudio}}: {{Integrated Development Environment}} for {{R}}},
  publisher = {{RStudio, Inc.}},
  author = {{RStudio Team}},
  year = {2018}
}

@article{santurkar_how_2018,
  archivePrefix = {arXiv},
  eprinttype = {arxiv},
  eprint = {1805.11604},
  primaryClass = {cs, stat},
  title = {How {{Does Batch Normalization Help Optimization}}?},
  abstract = {Batch Normalization (BatchNorm) is a widely adopted technique that enables faster and more stable training of deep neural networks (DNNs). Despite its pervasiveness, the exact reasons for BatchNorm's effectiveness are still poorly understood. The popular belief is that this effectiveness stems from controlling the change of the layers' input distributions during training to reduce the so-called "internal covariate shift". In this work, we demonstrate that such distributional stability of layer inputs has little to do with the success of BatchNorm. Instead, we uncover a more fundamental impact of BatchNorm on the training process: it makes the optimization landscape significantly smoother. This smoothness induces a more predictive and stable behavior of the gradients, allowing for faster training.},
  journal = {arXiv:1805.11604 [cs, stat]},
  author = {Santurkar, Shibani and Tsipras, Dimitris and Ilyas, Andrew and Madry, Aleksander},
  month = may,
  year = {2018},
  keywords = {Computer Science - Machine Learning,Computer Science - Neural and Evolutionary Computing,Statistics - Machine Learning},
  file = {/home/jdayton3/.zotero/library/storage/YQB2Q63G/Santurkar et al. - 2018 - How Does Batch Normalization Help Optimization.pdf;/home/jdayton3/.zotero/library/storage/T8UWE38R/1805.html},
  annote = {Comment: In NeurIPS'18}
}

@book{csaji_approximation_2001,
  title = {Approximation with {{Artificial Neural Networks}}},
  shorttitle = {Consultant},
  abstract = {Contents:},
  author = {Cs{\'a}ji, Bal{\'a}zs Csan{\'a}d and Eikelder, Huub Ten},
  year = {2001},
  file = {/home/jdayton3/.zotero/library/storage/F2K9VFTA/Csáji and Eikelder - Consultant.pdf;/home/jdayton3/.zotero/library/storage/LMKLZAM9/summary.html}
}

@article{clevert_fast_2015,
  archivePrefix = {arXiv},
  eprinttype = {arxiv},
  eprint = {1511.07289},
  primaryClass = {cs},
  title = {Fast and {{Accurate Deep Network Learning}} by {{Exponential Linear Units}} ({{ELUs}})},
  abstract = {We introduce the "exponential linear unit" (ELU) which speeds up learning in deep neural networks and leads to higher classification accuracies. Like rectified linear units (ReLUs), leaky ReLUs (LReLUs) and parametrized ReLUs (PReLUs), ELUs alleviate the vanishing gradient problem via the identity for positive values. However, ELUs have improved learning characteristics compared to the units with other activation functions. In contrast to ReLUs, ELUs have negative values which allows them to push mean unit activations closer to zero like batch normalization but with lower computational complexity. Mean shifts toward zero speed up learning by bringing the normal gradient closer to the unit natural gradient because of a reduced bias shift effect. While LReLUs and PReLUs have negative values, too, they do not ensure a noise-robust deactivation state. ELUs saturate to a negative value with smaller inputs and thereby decrease the forward propagated variation and information. Therefore, ELUs code the degree of presence of particular phenomena in the input, while they do not quantitatively model the degree of their absence. In experiments, ELUs lead not only to faster learning, but also to significantly better generalization performance than ReLUs and LReLUs on networks with more than 5 layers. On CIFAR-100 ELUs networks significantly outperform ReLU networks with batch normalization while batch normalization does not improve ELU networks. ELU networks are among the top 10 reported CIFAR-10 results and yield the best published result on CIFAR-100, without resorting to multi-view evaluation or model averaging. On ImageNet, ELU networks considerably speed up learning compared to a ReLU network with the same architecture, obtaining less than 10\% classification error for a single crop, single model network.},
  journal = {arXiv:1511.07289 [cs]},
  author = {Clevert, Djork-Arn{\'e} and Unterthiner, Thomas and Hochreiter, Sepp},
  month = nov,
  year = {2015},
  keywords = {Computer Science - Machine Learning},
  file = {/home/jdayton3/.zotero/library/storage/SD72EVBV/Clevert et al. - 2015 - Fast and Accurate Deep Network Learning by Exponen.pdf;/home/jdayton3/.zotero/library/storage/5MC3QR96/1511.html},
  annote = {Comment: Published as a conference paper at ICLR 2016}
}

@article{abadi_tensorflow_2016-1,
  archivePrefix = {arXiv},
  eprinttype = {arxiv},
  eprint = {1603.04467},
  primaryClass = {cs},
  title = {{{TensorFlow}}: {{Large}}-{{Scale Machine Learning}} on {{Heterogeneous Distributed Systems}}},
  shorttitle = {{{TensorFlow}}},
  abstract = {TensorFlow is an interface for expressing machine learning algorithms, and an implementation for executing such algorithms. A computation expressed using TensorFlow can be executed with little or no change on a wide variety of heterogeneous systems, ranging from mobile devices such as phones and tablets up to large-scale distributed systems of hundreds of machines and thousands of computational devices such as GPU cards. The system is flexible and can be used to express a wide variety of algorithms, including training and inference algorithms for deep neural network models, and it has been used for conducting research and for deploying machine learning systems into production across more than a dozen areas of computer science and other fields, including speech recognition, computer vision, robotics, information retrieval, natural language processing, geographic information extraction, and computational drug discovery. This paper describes the TensorFlow interface and an implementation of that interface that we have built at Google. The TensorFlow API and a reference implementation were released as an open-source package under the Apache 2.0 license in November, 2015 and are available at www.tensorflow.org.},
  journal = {arXiv:1603.04467 [cs]},
  author = {Abadi, Mart{\'i}n and Agarwal, Ashish and Barham, Paul and Brevdo, Eugene and Chen, Zhifeng and Citro, Craig and Corrado, Greg S. and Davis, Andy and Dean, Jeffrey and Devin, Matthieu and Ghemawat, Sanjay and Goodfellow, Ian and Harp, Andrew and Irving, Geoffrey and Isard, Michael and Jia, Yangqing and Jozefowicz, Rafal and Kaiser, Lukasz and Kudlur, Manjunath and Levenberg, Josh and Mane, Dan and Monga, Rajat and Moore, Sherry and Murray, Derek and Olah, Chris and Schuster, Mike and Shlens, Jonathon and Steiner, Benoit and Sutskever, Ilya and Talwar, Kunal and Tucker, Paul and Vanhoucke, Vincent and Vasudevan, Vijay and Viegas, Fernanda and Vinyals, Oriol and Warden, Pete and Wattenberg, Martin and Wicke, Martin and Yu, Yuan and Zheng, Xiaoqiang},
  month = mar,
  year = {2016},
  keywords = {Computer Science - Distributed; Parallel; and Cluster Computing,Computer Science - Machine Learning},
  file = {/home/jdayton3/.zotero/library/storage/LXTP9FZC/Abadi et al. - 2016 - TensorFlow Large-Scale Machine Learning on Hetero.pdf;/home/jdayton3/.zotero/library/storage/XIXKC46Q/1603.html},
  annote = {Comment: Version 2 updates only the metadata, to correct the formatting of Mart\textbackslash{}'in Abadi's name}
}

@article{freytag_systematic_2015,
  title = {Systematic Noise Degrades Gene Co-Expression Signals but Can Be Corrected},
  volume = {16},
  issn = {1471-2105},
  abstract = {In the past decade, the identification of gene co-expression has become a routine part of the analysis of high-dimensional microarray data. Gene co-expression, which is mostly detected via the Pearson correlation coefficient, has played an important role in the discovery of molecular pathways and networks. Unfortunately, the presence of systematic noise in high-dimensional microarray datasets corrupts estimates of gene co-expression. Removing systematic noise from microarray data is therefore crucial. Many cleaning approaches for microarray data exist, however these methods are aimed towards improving differential expression analysis and their performances have been primarily tested for this application. To our knowledge, the performances of these approaches have never been systematically compared in the context of gene co-expression estimation.},
  number = {1},
  journal = {BMC Bioinformatics},
  doi = {10.1186/s12859-015-0745-3},
  author = {Freytag, Saskia and {Gagnon-Bartsch}, Johann and Speed, Terence P. and Bahlo, Melanie},
  month = sep,
  year = {2015},
  pages = {309},
  file = {/home/jdayton3/.zotero/library/storage/9NNB9DZ4/Freytag et al. - 2015 - Systematic noise degrades gene co-expression signa.pdf;/home/jdayton3/.zotero/library/storage/I3TQJAS9/s12859-015-0745-3.html}
}

@article{liu_nonlinear_2016,
  title = {Nonlinear {{Network Reconstruction}} from {{Gene Expression Data Using Marginal Dependencies Measured}} by {{DCOL}}},
  volume = {11},
  issn = {1932-6203},
  abstract = {Reconstruction of networks from high-throughput expression data is an important tool to identify new regulatory relations. Given that nonlinear and complex relations exist between biological units, methods that can utilize nonlinear dependencies may yield insights that are not provided by methods using linear associations alone. We have previously developed a distance to measure predictive nonlinear relations, the Distance based on Conditional Ordered List (DCOL), which is sensitive and computationally efficient on large matrices. In this study, we explore the utility of DCOL in the reconstruction of networks, by combining it with local false discovery rate (lfdr)\textendash{}based inference. We demonstrate in simulations that the new method named nlnet is effective in recovering hidden nonlinear modules. We also demonstrate its utility using a single cell RNA seq dataset. The method is available as an R package at https://cran.r-project.org/web/packages/nlnet.},
  number = {7},
  journal = {PLoS ONE},
  doi = {10.1371/journal.pone.0158247},
  author = {Liu, Haodong and Li, Peng and Zhu, Mengyao and Wang, Xiaofei and Lu, Jianwei and Yu, Tianwei},
  month = jul,
  year = {2016},
  file = {/home/jdayton3/.zotero/library/storage/B3CLD65B/Liu et al. - 2016 - Nonlinear Network Reconstruction from Gene Express.pdf},
  pmid = {27380516},
  pmcid = {PMC4933395}
}

@incollection{lu_expressive_2017,
  title = {The {{Expressive Power}} of {{Neural Networks}}: {{A View}} from the {{Width}}},
  shorttitle = {The {{Expressive Power}} of {{Neural Networks}}},
  booktitle = {Advances in {{Neural Information Processing Systems}} 30},
  publisher = {{Curran Associates, Inc.}},
  author = {Lu, Zhou and Pu, Hongming and Wang, Feicheng and Hu, Zhiqiang and Wang, Liwei},
  editor = {Guyon, I. and Luxburg, U. V. and Bengio, S. and Wallach, H. and Fergus, R. and Vishwanathan, S. and Garnett, R.},
  year = {2017},
  pages = {6231--6239},
  file = {/home/jdayton3/.zotero/library/storage/5XJ85Q3V/Lu et al. - 2017 - The Expressive Power of Neural Networks A View fr.pdf;/home/jdayton3/.zotero/library/storage/NTM77949/7203-the-expressive-power-of-neural-networks-a-view-from-the-width.html}
}

@article{maaten_visualizing_2008,
  title = {Visualizing {{Data}} Using T-{{SNE}}},
  volume = {9},
  issn = {ISSN 1533-7928},
  number = {Nov},
  journal = {Journal of Machine Learning Research},
  author = {van der Maaten, Laurens and Hinton, Geoffrey},
  year = {2008},
  pages = {2579-2605},
  file = {/home/jdayton3/.zotero/library/storage/C3VCMD7M/Maaten and Hinton - 2008 - Visualizing Data using t-SNE.pdf;/home/jdayton3/.zotero/library/storage/Y7YWNUFM/vandermaaten08a.html}
}