docs/2015_08_msc_thesis/refs.bib

File generated automatically by 'cleanbib' script.

**********************************
* DO NOT EDIT THIS FILE BY HAND! *
**********************************

@book{      cole1989,
author = {Cole, Murray I},
keyword = {Algorithmic skeletons},
link = {http://homepages.inf.ed.ac.uk/mic/Pubs/skeletonbook.pdf},
mendeley-tags = {Algorithmic skeletons},
publisher = {Pitman London},
title = {Algorithmic Skeletons: Structured Management of Parallel
Computation},
year = {1989}
}

@phdthesis{    ansel2009,
author = {Ansel, Jason},
school = {MIT},
title = {PetaBricks: a language and compiler for algorithmic
choice},
year = {2009}
}

@article{    ansel2010,
author = {Ansel, Jason and Chan, Cy},
doi = {10.1145/1836543.1836554},
issn = {15284972},
journal = {XRDS: Crossroads, The ACM Magazine for Students},
link = {http://dl.acm.org/citation.cfm?doid=1836543.1836554},
month = {sep},
number = {1},
pages = {32},
title = {PetaBricks},
volume = {17},
year = {2010}
}

@inproceedings{    ansel2012,
author = {Ansel, Jason and Reilly, Una-may O},
booktitle = {Proceedings of the 2012 International Conference on
Compilers, Architectures and Synthesis for Embedded
Systems},
doi = {10.1145/2380403.2380425},
isbn = {9781450314244},
keyword = {autotuning,evolutionary algorithm,genetic algorithm},
link = {http://doi.acm.org/10.1145/2380403.2380425},
pages = {91--100},
publisher = {ACM},
title = {SiblingRivalry: Online Autotuning Through Local
Competitions},
year = {2012}
}

@phdthesis{    ansel2014,
author = {Ansel, Jason},
keyword = {OpenTuner,PetaBricks},
school = {Massachusetts Institute of Technology},
title = {Autotuning Programs with Algorithmic Choice},
year = {2014}
}

@inproceedings{    bilmes1997,
address = {New York, NY, USA},
author = {Bilmes, Jeff and Asanovic, Krste and Chin, Chee-Whye and
Demmel, Jim},
booktitle = {Proceedings of the 11th International Conference on
Supercomputing},
doi = {10.1145/263580.263662},
link = {http://doi.acm.org/10.1145/263580.263662},
pages = {340--347},
publisher = {ACM},
title = {Optimizing Matrix Multiply Using PHiPAC: A Portable, High-performance, ANSI C Coding Methodology},
year = {1997}
}

@inproceedings{    bitirgen2008,
author = {Bitirgen, Ramazan and Ipek, Engin and Martinez, Jose F.},
booktitle = {2008 41st IEEE/ACM International Symposium on
Microarchitecture},
doi = {10.1109/MICRO.2008.4771801},
isbn = {978-1-4244-2836-6},
keyword = {Efficient sharing of system resources is critical,allowing
us to adapt our allocation decisions as a,and learns a
predictive model of system performanc,but this is possible
only if accompanied by effici,coordinated management of
multiple interacting res,it becomes possible to make
reliable comparisons a,our approach makes it possible to
anticipate the s,our resource management scheme monitors
the execut,resources in a coordinated fashion to enforce
high},
link = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=4771801},
month = {nov},
pages = {318--329},
publisher = {IEEE Computer Society},
title = {Coordinated Management of Multiple Interacting Resources
in Chip Multiprocessors: A Machine Learning Approach},
year = {2008}
}

@article{    breuer2014,
author = {Breuer, Stefan and Steuwer, Michel and Gorlatch, Sergei},
doi = {10.1142/S0129626414410059},
isbn = {0129626414},
issn = {0129-6264},
journal = {HiStencils 2014},
keyword = {gpu,manycores,opencl,skelcl,skeletons,stencils},
number = {3},
pages = {23--30},
title = {High-Level Programming of Stencil Computations on
Multi-GPU Systems Using the SkelCL Library},
volume = {24},
year = {2014}
}

@article{    burke2013,
author = {Burke, Edmund K and Gendreau, Michel and Hyde, Matthew and
Kendall, Graham and Ochoa, Gabriela and \" { O } zcan, Ender
and Qu, Rong},
doi = {10.1057/jors.2013.71},
isbn = {0160-5682},
issn = {0160-5682},
journal = {Journal of the Operational Research Society},
keyword = {combinatorial,evolutionary
computation,hyper-heuristics,machine
learning,metaheuristics,optimisation,scheduling},
link = {http://www.palgrave-journals.com/doifinder/10.1057/jors.2013.71},
pages = {1695--1724},
title = {Hyper-heuristics: a survey of the state of the art},
volume = {64},
year = {2013}
}

@inproceedings{    chan2009,
address = {New York, New York, USA},
author = {Chan, Cy and Ansel, Jason and Wong, Yee Lok and
Amarasinghe, Saman and Edelman, Alan},
booktitle = {ACM/IEEE Conference on Supercomputing},
doi = {10.1145/1654059.1654065},
isbn = {9781605587448},
link = {http://dl.acm.org/citation.cfm?doid=1654059.1654065},
publisher = {ACM Press},
title = {Autotuning multigrid with PetaBricks},
year = {2009}
}

@inproceedings{    chen2014,
author = {Chen, Guoyang and Wu, Bo},
booktitle = {Microarchitecture (MICRO), 2014 47th Annual IEEE/ACM
International Symposium on},
keyword = {GPU,cache,compiler,data placement,hardware specification
language},
pages = {88--100},
publisher = {IEEE},
title = {PORPLE: An Extensible Optimizer for Portable Data
Placement on GPU},
year = {2014}
}

@inproceedings{    christen2011,
author = {Christen, Matthias and Schenk, Olaf and Burkhart, Helmar},
booktitle = {Parallel \& Distributed Processing Symposium (IPDPS), 2011
IEEE International},
doi = {10.1109/IPDPS.2011.70},
isbn = {978-1-61284-372-8},
keyword = {autotuning,code generation,high performance
computing,stencil computations},
link = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=6012879},
month = {may},
pages = {676--687},
publisher = {IEEE},
title = {PATUS: A Code Generation and Autotuning Framework for
Parallel Iterative Stencil Computations on Modern
Microarchitectures},
year = {2011}
}

@article{    cole2004,
author = {Cole, Murray I},
doi = {10.1016/j.parco.2003.12.002},
issn = {01678191},
journal = {Parallel Computing},
link = {http://linkinghub.elsevier.com/retrieve/pii/S0167819104000080},
month = {mar},
number = {3},
pages = {389--406},
publisher = {Elsevier},
title = {Bringing skeletons out of the closet: a pragmatic
manifesto for skeletal parallel programming},
volume = {30},
year = {2004}
}

@article{    collins2012,
author = {Collins, Alexander and Fensch, Christian and Leather, Hugh},
doi = {10.1142/S0129626412400051},
issn = {0129-6264},
journal = {Parallel Processing Letters},
keyword = {FastFlow,Multicore,Optimization space exploration,Parallel
skeletons,fastflow,multicore,optimization space
exploration,parallel skeletons},
link = {http://www.worldscientific.com/doi/abs/10.1142/S0129626412400051},
mendeley-tags = {FastFlow,Multicore,Optimization space exploration,Parallel
skeletons},
month = {jun},
number = {02},
pages = {1240005},
title = {Auto-Tuning Parallel Skeletons},
volume = {22},
year = {2012}
}

@article{    collins2013,
author = {Collins, Alexander and Fensch, Christian and Leather, Hugh
and Cole, Murray},
doi = {10.1109/HiPC.2013.6799098},
isbn = {978-1-4799-0730-4},
journal = {20th Annual International Conference on High Performance
Computing - HiPC},
link = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=6799098},
month = {dec},
pages = {186--195},
publisher = {IEEE},
title = {MaSiF: Machine Learning Guided Auto-tuning of Parallel
Skeletons},
year = {2013}
}

@inproceedings{    contreras2008,
author = {Contreras, Gilberto and Martonosi, Margaret},
booktitle = {Workload Characterization, 2008. IISWC 2008. IEEE
International Symposium on},
doi = {10.1109/IISWC.2008.4636091},
isbn = {978-1-4244-2777-2},
link = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=4636091},
month = {oct},
pages = {57--66},
publisher = {IEEE},
title = {Characterizing and improving the performance of Intel
Threading Building Blocks},
year = {2008}
}

@inproceedings{    dastgeer2011,
author = {Dastgeer, Usman and Enmyren, Johan and Kessler, Christoph
W},
booktitle = {Proceedings of the 4th International Workshop on Multicore
Software Engineering},
isbn = {9781450305778},
link = {http://dl.acm.org/citation.cfm?id=1984697},
pages = {25--32},
publisher = {ACM},
title = {Auto-tuning SkePU: a multi-backend skeleton programming
framework for multi-GPU systems},
year = {2011}
}

@phdthesis{    dastgeer2011,
author = {Dastgeer, Usman},
isbn = {9789173930666},
number = {1504},
pages = {107},
title = {Skeleton Programming for Heterogeneous GPU-based
Systems},
year = {2011}
}

@article{    dastgeer2015a,
author = {Dastgeer, Usman and Kessler, Christoph},
doi = {10.1007/s10766-015-0357-6},
issn = {0885-7458},
journal = {International Journal of Parallel Programming},
keyword = {gpu-based systems,memory management,runtime
optimizations,skeleton programming,skepu,smart containers},
link = {http://link.springer.com/10.1007/s10766-015-0357-6},
pages = {1--25},
publisher = {Springer},
title = {Smart Containers and Skeleton Programming for GPU-Based
Systems},
year = {2015}
}

@inproceedings{    eastep2011,
address = {New York, NY, USA},
author = {Eastep, Jonathan and Wingate, David and Agarwal, Anant},
booktitle = {Proceedings of the 8th ACM International Conference on
Autonomic Computing},
doi = {10.1145/1998582.1998587},
isbn = {9781450306072},
keyword = {auto-tuning,autonomic,concurrent data
structures,performance
optimization,self-aware,synchronization},
link = {http://doi.acm.org/10.1145/1998582.1998587},
pages = {11--20},
publisher = {ACM},
title = {Smart Data Structures: An Online Machine Learning
Approach to Multicore Data Structures},
year = {2011}
}

@inproceedings{    enmyren2010,
author = {Enmyren, J and Kessler, CW},
booktitle = {Proceedings of the fourth international workshop on
High-level parallel programming and applications},
keyword = {CUDA,Data Parallelism,GPU,OpenCL,Skeleton Programming},
link = {http://dl.acm.org/citation.cfm?id=1863487},
pages = {5--14},
publisher = {ACM},
title = {SkePU: a multi-backend skeleton programming library for
multi-GPU systems},
year = {2010}
}

@article{    fleming1986,
author = {Fleming, Philip J. and Wallace, John J.},
doi = {10.1145/5666.5673},
issn = {00010782},
journal = {Communications of the ACM},
number = {3},
pages = {218--221},
title = {How not to lie with statistics: the correct way to
summarize benchmark results},
volume = {29},
year = {1986}
}

@article{    fursin2011,
author = {Fursin, Grigori and Kashnikov, Yuriy and Memon, Abdul
Wahid and Chamski, Zbigniew and Temam, Olivier and
Namolaru, Mircea and Yom-Tov, Elad and Mendelson, Bilha and
Zaks, Ayal and Courtois, Eric and Bodin, Francois and
Barnard, Phil and Ashton, Elton and Bonilla, Edwin and
Thomson, John and Williams, Christopher K. I. and
O’Boyle, Michael},
doi = {10.1007/s10766-010-0161-2},
issn = {0885-7458},
journal = {International Journal of Parallel Programming},
keyword = {Adaptive compilation,Adaptive compiler,Automatic
performance tuning,Collective optimization,Continuous
optimization,Empirical performance tuning,Feedback-directed
compilation,Iterative compilation,Machine learning,Machine
learning compiler,Multi-objective optimization,Optimization
prediction,Optimization repository,Portable
optimization,Program characterization,Program
features,Self-tuning compiler},
link = {http://link.springer.com/10.1007/s10766-010-0161-2},
month = {jan},
number = {3},
pages = {296--327},
publisher = {Springer},
title = {Milepost GCC: Machine Learning Enabled Self-tuning
Compiler},
volume = {39},
year = {2011}
}

@inproceedings{    ganapathi2009,
author = {Ganapathi, Archana and Datta, Kaushik and Fox, Armando and
Patterson, David},
booktitle = {First USENIX Workshop on Hot Topics in Parallelism
(HotPar’09)},
title = {A case for machine learning to optimize multicore
performance},
year = {2009}
}

@inproceedings{    georges2007,
address = {New York, NY, USA},
author = {Georges, Andy and Buytaert, Dries and Eeckhout, Lieven},
booktitle = {Proceedings of the 22Nd Annual ACM SIGPLAN Conference on
Object-oriented Programming Systems and Applications},
doi = {10.1145/1297027.1297033},
isbn = {9781595937865},
issn = {03621340},
keyword = {benchmarking,data analysis,java,methodolgy,statistics},
link = {http://doi.acm.org/10.1145/1297027.1297033},
month = {oct},
number = {10},
pages = {57},
publisher = {ACM},
title = {Statistically Rigorous Java Performance Evaluation},
volume = {42},
year = {2007}
}

@article{    gregg2011,
author = {Gregg, Chris and Hazelwood, Kim},
doi = {10.1109/ISPASS.2011.5762730},
isbn = {9781612843681},
journal = {ISPASS 2011 - IEEE International Symposium on Performance
Analysis of Systems and Software},
pages = {134--144},
title = {Where is the data? Why you cannot debate CPU vs. GPU
performance without the answer},
year = {2011}
}

@inproceedings{    grewe2013,
author = {Grewe, Dominik and Wang, Zheng and O'Boyle, Michael F P
Mfp},
booktitle = {Code Generation and Optimization (CGO), 2013 IEEE/ACM
International Symposium on},
doi = {10.1109/CGO.2013.6494993},
isbn = {9781467355254},
keyword = {gpu,machine-learning mapping,opencl},
link = {http://ieeexplore.ieee.org/xpls/abs\_all.jsp?arnumber=6494993$\backslash$nhttp://www.mendeley.com/research/portable-mapping-data-parallel-programs-opencl-heterogeneous-systems-2/},
pages = {1--10},
publisher = {IEEE},
title = {Portable mapping of data parallel programs to OpenCL for
heterogeneous systems},
year = {2013}
}

@article{    holewinski2012,
author = {Holewinski, Justin and Pouchet, Louis-No\" { e } l and
Sadayappan, P},
doi = {10.1145/2304576.2304619},
isbn = {978-1-4503-1316-2},
journal = {Proceedings of the 26th ACM International Conference on
Supercomputing},
keyword = {gpu,opencl,overlapped tiling,stencil},
link = {http://doi.acm.org/10.1145/2304576.2304619},
pages = {311--320},
title = {High-performance Code Generation for Stencil Computations
on GPU Architectures},
year = {2012}
}

@article{    jeffrey2003,
author = {Jeffrey, O and David, M},
journal = {Computer},
number = {1},
pages = {41--50},
publisher = {IEEE},
title = {The Vision of Autonomic Computing},
volume = {36},
year = {2003}
}

@article{    joshi2002,
author = {Joshi, Rajeev and Nelson, Greg and Randall, Keith},
doi = {10.1145/543552.512566},
isbn = {1-58113-463-0},
issn = {03621340},
journal = {ACM SIGPLAN Notices},
keyword = {optimizing compiler,superoptimizer},
number = {5},
pages = {304},
publisher = {ACM},
title = {Denali: a goal-directed superoptimizer},
volume = {37},
year = {2002}
}

@article{    kamil2010,
author = {Kamil, Shoaib and Chan, Cy and Oliker, Leonid and Shall, John and Williams, Samuel},
doi = {10.1109/IPDPS.2010.5470421},
isbn = {9781424464432},
issn = {15302075},
journal = {Proceedings of the 2010 IEEE International Symposium on
Parallel and Distributed Processing, IPDPS 2010},
link = {http://ieeexplore.ieee.org/xpls/abs\_all.jsp?arnumber=5470421},
title = {An auto-tuning framework for parallel multicore stencil
computations},
year = {2010}
}

@inproceedings{    karimi2010,
archiveprefix = {arXiv},
arxivid = {1005.2581},
author = {Fang, Jianbin and Varbanescu, Ana Lucia and Sips, Henk},
booktitle = {Parallel Processing (ICPP), 2011 International Conference
on},
doi = {10.1109/ICPP.2011.45},
eprint = {1005.2581},
isbn = {978-1-4577-1336-1},
link = {http://arxiv.org/abs/1005.2581},
pages = {216--225},
publisher = {IEEE},
title = {A Comprehensive Performance Comparison of CUDA and
OpenCL},
year = {2011}
}

@inproceedings{    komatsu2010,
author = {Komatsu, Kazuhiko and Sato, Katsuto and Arai, Yusuke and
Koyama, Kentaro and Takizawa, Hiroyuki and Kobayashi, Hiroaki},
booktitle = {The fifth international workshop on automatic performance
tuning},
pages = {7},
title = {Evaluating performance and portability of OpenCL
programs},
year = {2010}
}

@inproceedings{    leather2009,
address = {Dublin},
author = {Leather, Hugh and O'Boyle, Michael and Worton, Bruce},
booktitle = {LCTES '09: Proceedings of the ACM SIGPLAN/SIGBED 2009
Conference on Languages, Compilers, and Tools for Embedded
Systems},
pages = {1--10},
title = {Raced Profiles: Efficient Selection of Competing Compiler
Optimizations},
year = {2009}
}

@inproceedings{    lee,
author = {Lee, Hyoukjoong and Brown, Kevin J and Sujeeth, Arvind K
and Rompf, Tiark and Olukotun, Kunle},
booktitle = {Microarchitecture (MICRO), 2014 47th Annual IEEE/ACM
International Symposium on},
doi = {10.1109/MICRO.2014.23},
pages = {63--74},
publisher = {IEEE},
title = {Locality-Aware Mapping of Nested Parallel Patterns on
GPUs},
year = {2014}
}

@article{    lee2010,
author = {Lee, Victor W. and Hammarlund, Per and Singhal, Ronak and
Dubey, Pradeep and Kim, Changkyu and Chhugani, Jatin and
Deisher, Michael and Kim, Daehyun and Nguyen, Anthony D.
and Satish, Nadathur and Smelyanskiy, Mikhail and
Chennupaty, Srinivas},
doi = {10.1145/1816038.1816021},
isbn = {9781450300537},
issn = {01635964},
journal = {ACM SIGARCH Computer Architecture News},
keyword = {cpu architecture,gpu architecture,mance
measurement,perfor-,performance analysis,software
optimization,throughput comput-},
pages = {451},
title = {Debunking the 100X GPU vs. CPU myth},
volume = {38},
year = {2010}
}

@article{    lutz2013,
author = {Lutz, Thibaut and Fensch, Christian and Cole, Murray},
doi = {10.1145/2400682.2400718},
issn = {15443566},
journal = {ACM Transactions on Architecture and Code Optimization},
link = {http://dl.acm.org/citation.cfm?doid=2400682.2400718},
number = {4},
pages = {1--24},
title = {PARTANS: An Autotuning Framework for Stencil Computation
on Multi-GPU Systems},
volume = {9},
year = {2013}
}

@inproceedings{    magni2014,
author = {Magni, Alberto and Dubach, Christophe and O'Boyle, Michael},
booktitle = {International Conference on Parallel Architectures and
Compilation},
doi = {10.1145/2628071.2628087},
isbn = {9781450328098},
link = {http://dl.acm.org/citation.cfm?doid=2628071.2628087},
pages = {455--466},
title = {Automatic optimization of thread-coarsening for graphics
processors},
year = {2014}
}

@article{    massalin1987,
author = {Massalin, Henry},
doi = {10.1145/36206.36194},
isbn = {0897912381},
journal = {ACM SIGPLAN Notices},
number = {10},
pages = {122--126},
title = {Superoptimizer -- A Look at the Smallest Program},
volume = {22},
year = {1987}
}

@inproceedings{    ogilvie2015,
author = {Ogilvie, William F. and Petoumenos, Pavlos and Wang, Zheng
and Leather, Hugh},
booktitle = {18th International Workshop on Compilers for Parallel
Computing},
title = {Intelligent Heuristic Construction with Active
Learning},
year = {2015}
}

@inproceedings{    phillips2010,
author = {Phillips, Everett H. and Fatica, Massimiliano},
booktitle = {Proceedings of the 2010 IEEE International Symposium on
Parallel and Distributed Processing, IPDPS 2010},
doi = {10.1109/IPDPS.2010.5470394},
isbn = {9781424464432},
issn = {1530-2075},
pages = {1--10},
title = {Implementing the Himeno benchmark with CUDA on GPU
clusters},
year = {2010}
}

@inproceedings{    rul2010,
author = {Rul, Sean and Vandierendonck, Hans and Haene, Joris D and
Bosschere, Koen De},
booktitle = {2010 Symposium on Application Accelerators in High
Performance Computing (SAAHPC'10)},
pages = {4--6},
title = {An Experimental Study on Performance Portability of
OpenCL Kernels},
year = {2010}
}

@inproceedings{    runciman2014,
author = {Trilla, Jose Manuel Calderon and Runciman, Colin},
booktitle = {IFL},
keyword = {Automatic parallelism,Feedback Directed
Compilation,Implicit Parallelism,Iterative Compilation,Lazy
Functional Languages,Projections,Strictness Analysis},
title = {An Iterative Compiler for Implicit Parallelism},
year = {2014}
}

@inproceedings{    ryoo2008,
address = {New York, New York, USA},
author = {Ryoo, Shane and Rodrigues, Christopher I. and Stone, Sam
S. and Baghsorkhi, Sara S. and Ueng, Sain-Zee and Stratton, John a. and Hwu, Wen-mei W.},
booktitle = {Proceedings of the 6th annual IEEE/ACM international
symposium on Code generation and optimization},
doi = {10.1145/1356058.1356084},
isbn = {9781595939784},
keyword = {gpgpu,optimization,parallel computing},
link = {http://portal.acm.org/citation.cfm?doid=1356058.1356084},
pages = {195--204},
publisher = {ACM Press},
title = {Program optimization space pruning for a multithreaded
GPU},
year = {2008}
}

@article{    ryoo2008a,
author = {Ryoo, Shane and Rodrigues, Christopher I. and Baghsorkhi, Sara S. and Stone, Sam S. and Kirk, David B. and Hwu, Wen-mei W.},
doi = {10.1145/1345206.1345220},
isbn = {9781595937957},
issn = {00778923},
journal = {Proceedings of the 13th ACM SIGPLAN Symposium on
Principles and practice of parallel programming - PPoPP
'08},
keyword = {GPU computing,parallel computing},
link = {http://portal.acm.org/citation.cfm?doid=1345206.1345220},
pages = {73},
title = {Optimization principles and application performance
evaluation of a multithreaded GPU using CUDA},
year = {2008}
}

@article{    stephenson2003,
author = {Stephenson, Mark and Martin, Martin and Reilly, Una-may
O},
isbn = {1581136625},
journal = {ACM SIGPLAN Notices},
number = {5},
pages = {77--90},
title = {Meta Optimization: Improving Compiler Heuristics with
Machine Learning},
volume = {38},
year = {2003}
}

@inproceedings{    steuwer2011,
author = {Steuwer, Michel and Kegel, Philipp and Gorlatch, Sergei},
booktitle = {Parallel and Distributed Processing Workshops and Phd
Forum (IPDPSW), 2011 IEEE International Symposium on},
doi = {10.1109/IPDPS.2011.269},
isbn = {978-1-61284-425-1},
keyword = {Algorithmic Skeletons,CUDA,GPU Computing,GPU
Programming,Multi-GPU Systems,OpenCL,SkelCL},
link = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=6008967},
month = {may},
pages = {1176--1182},
publisher = {IEEE},
title = {SkelCL - A Portable Skeleton Library for High-Level GPU
Programming},
year = {2011}
}

@inproceedings{    steuwer2012,
author = {Steuwer, Michel and Kegel, Philipp and Gorlatch, Sergei},
booktitle = {Parallel and Distributed Processing Symposium Workshops \&
PhD Forum (IPDPSW), 2012 IEEE 26th International},
doi = {10.1109/IPDPSW.2012.229},
isbn = {978-1-4673-0974-5},
keyword = {Algorithmic Skeletons,GPU Computing,GPU
Programming,Multi-GPU,OpenCL,SkelCL,Systems},
link = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=6270864},
month = {may},
pages = {1858--1865},
publisher = {Ieee},
title = {Towards High-Level Programming of Multi-GPU Systems Using
the SkelCL Library},
year = {2012}
}

@article{    steuwer2013,
author = {Steuwer, Michel and Gorlatch, Sergei},
doi = {10.1016/j.procs.2013.05.239},
issn = {18770509},
journal = {Procedia Computer Science},
keyword = {Algorithmic Skeletons,Image Reconstruction,LM OSEM
Algorithm,Multi-GPU Computing,SkelCL},
link = {http://linkinghub.elsevier.com/retrieve/pii/S1877050913003827},
month = {jan},
pages = {749--758},
publisher = {Elsevier B.V.},
title = {High-level Programming for Medical Imaging on Multi-GPU
Systems Using the SkelCL Library},
volume = {18},
year = {2013}
}

@article{    steuwer2013a,
author = {Steuwer, Michel and Gorlatch, Sergei},
doi = {10.1007/978-3-642-39958-9\_24},
journal = {Parallel Computing Technologies},
link = {http://dx.doi.org/10.1007/978-3-642-39958-9\_24},
pages = {258--272},
publisher = {Springer Berlin Heidelberg},
title = {SkelCL: Enhancing OpenCL for High-Level Programming of
Multi-GPU Systems},
volume = {7979},
year = {2013}
}

@article{    steuwer2014,
author = {Steuwer, Michel and Friese, Malte and Albers, Sebastian
and Gorlatch, Sergei},
doi = {10.1007/s10766-013-0265-6},
issn = {08857458},
journal = {International Journal of Parallel Programming},
keyword = {Algorithmic skeletons,Allpairs computation,GPU
computing,High-level programming models,SkelCL},
pages = {601--618},
title = {Introducing and implementing the allpairs skeleton for
programming multi-GPU Systems},
volume = {42},
year = {2014}
}

@article{    steuwer2015,
archiveprefix = {arXiv},
arxivid = {arXiv:1502.02389v1},
author = {Steuwer, Michel and Fensch, Christian and Dubach, Christophe},
eprint = {arXiv:1502.02389v1},
journal = {arXiv preprint arXiv:1502.02389},
keyword = {algorithmic patterns,code
generation,gpu,opencl,performance,portability,rewrite
rules},
title = {Patterns and Rewrite Rules for Systematic Code Generation
From High-Level Functional Patterns to High-Performance
OpenCL Code},
year = {2015}
}

@inproceedings{    tesauro2005,
author = {Tesauro, Gerald},
booktitle = {AAAI},
pages = {886--891},
title = {Online Resource Allocation Using Decompositional
Reinforcement Learning},
year = {2005}
}

@inproceedings{    wang2010,
author = {Wang, Zheng and Boyle, Michael F P O},
booktitle = {Proceedings of the 19th international conference on
Parallel architectures and compilation techniques},
isbn = {9781450301787},
keyword = {Compiler Optimization,Machine Learning,Partitioning
Streaming Parallelism},
pages = {307--318},
publisher = {ACM},
title = {Partitioning Streaming Parallelism for Multi-cores: A
Machine Learning Based Approach},
year = {2010}
}

@inproceedings{    zhang2013a,
author = {Zhang, Yongpeng and Mueller, Frank},
booktitle = {Proceedings of the Tenth International Symposium on Code
Generation and Optimization},
doi = {10.1109/TPDS.2012.160},
isbn = {9781450312066},
issn = {10459219},
keyword = {Accelerators,GPGPU programming,GPU clusters,stencil
codes},
pages = {155--164},
title = {Auto-generation and Auto-tuning of 3D Stencil Codes on
GPU clusters},
year = {2012}
}