-
Notifications
You must be signed in to change notification settings - Fork 34
/
Copy pathrefs.bib
801 lines (744 loc) Β· 24.6 KB
/
refs.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
File generated automatically by 'cleanbib' script.
**********************************
* DO NOT EDIT THIS FILE BY HAND! *
**********************************
@book{ cole1989,
author = {Cole, Murray I},
keyword = {Algorithmic skeletons},
link = {http://homepages.inf.ed.ac.uk/mic/Pubs/skeletonbook.pdf},
mendeley-tags = {Algorithmic skeletons},
publisher = {Pitman London},
title = {Algorithmic Skeletons: Structured Management of Parallel
Computation},
year = {1989}
}
@phdthesis{ ansel2009,
author = {Ansel, Jason},
school = {MIT},
title = {PetaBricks: a language and compiler for algorithmic
choice},
year = {2009}
}
@article{ ansel2010,
author = {Ansel, Jason and Chan, Cy},
doi = {10.1145/1836543.1836554},
issn = {15284972},
journal = {XRDS: Crossroads, The ACM Magazine for Students},
link = {http://dl.acm.org/citation.cfm?doid=1836543.1836554},
month = {sep},
number = {1},
pages = {32},
title = {PetaBricks},
volume = {17},
year = {2010}
}
@inproceedings{ ansel2012,
author = {Ansel, Jason and Reilly, Una-may O},
booktitle = {Proceedings of the 2012 International Conference on
Compilers, Architectures and Synthesis for Embedded
Systems},
doi = {10.1145/2380403.2380425},
isbn = {9781450314244},
keyword = {autotuning,evolutionary algorithm,genetic algorithm},
link = {http://doi.acm.org/10.1145/2380403.2380425},
pages = {91--100},
publisher = {ACM},
title = {SiblingRivalry: Online Autotuning Through Local
Competitions},
year = {2012}
}
@phdthesis{ ansel2014,
author = {Ansel, Jason},
keyword = {OpenTuner,PetaBricks},
school = {Massachusetts Institute of Technology},
title = {Autotuning Programs with Algorithmic Choice},
year = {2014}
}
@inproceedings{ bilmes1997,
address = {New York, NY, USA},
author = {Bilmes, Jeff and Asanovic, Krste and Chin, Chee-Whye and
Demmel, Jim},
booktitle = {Proceedings of the 11th International Conference on
Supercomputing},
doi = {10.1145/263580.263662},
link = {http://doi.acm.org/10.1145/263580.263662},
pages = {340--347},
publisher = {ACM},
title = {Optimizing Matrix Multiply Using PHiPAC: A Portable, High-performance, ANSI C Coding Methodology},
year = {1997}
}
@inproceedings{ bitirgen2008,
author = {Bitirgen, Ramazan and Ipek, Engin and Martinez, Jose F.},
booktitle = {2008 41st IEEE/ACM International Symposium on
Microarchitecture},
doi = {10.1109/MICRO.2008.4771801},
isbn = {978-1-4244-2836-6},
keyword = {Efficient sharing of system resources is critical,allowing
us to adapt our allocation decisions as a,and learns a
predictive model of system performanc,but this is possible
only if accompanied by effici,coordinated management of
multiple interacting res,it becomes possible to make
reliable comparisons a,our approach makes it possible to
anticipate the s,our resource management scheme monitors
the execut,resources in a coordinated fashion to enforce
high},
link = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=4771801},
month = {nov},
pages = {318--329},
publisher = {IEEE Computer Society},
title = {Coordinated Management of Multiple Interacting Resources
in Chip Multiprocessors: A Machine Learning Approach},
year = {2008}
}
@article{ breuer2014,
author = {Breuer, Stefan and Steuwer, Michel and Gorlatch, Sergei},
doi = {10.1142/S0129626414410059},
isbn = {0129626414},
issn = {0129-6264},
journal = {HiStencils 2014},
keyword = {gpu,manycores,opencl,skelcl,skeletons,stencils},
number = {3},
pages = {23--30},
title = {High-Level Programming of Stencil Computations on
Multi-GPU Systems Using the SkelCL Library},
volume = {24},
year = {2014}
}
@article{ burke2013,
author = {Burke, Edmund K and Gendreau, Michel and Hyde, Matthew and
Kendall, Graham and Ochoa, Gabriela and \" { O } zcan, Ender
and Qu, Rong},
doi = {10.1057/jors.2013.71},
isbn = {0160-5682},
issn = {0160-5682},
journal = {Journal of the Operational Research Society},
keyword = {combinatorial,evolutionary
computation,hyper-heuristics,machine
learning,metaheuristics,optimisation,scheduling},
link = {http://www.palgrave-journals.com/doifinder/10.1057/jors.2013.71},
pages = {1695--1724},
title = {Hyper-heuristics: a survey of the state of the art},
volume = {64},
year = {2013}
}
@inproceedings{ chan2009,
address = {New York, New York, USA},
author = {Chan, Cy and Ansel, Jason and Wong, Yee Lok and
Amarasinghe, Saman and Edelman, Alan},
booktitle = {ACM/IEEE Conference on Supercomputing},
doi = {10.1145/1654059.1654065},
isbn = {9781605587448},
link = {http://dl.acm.org/citation.cfm?doid=1654059.1654065},
publisher = {ACM Press},
title = {Autotuning multigrid with PetaBricks},
year = {2009}
}
@inproceedings{ chen2014,
author = {Chen, Guoyang and Wu, Bo},
booktitle = {Microarchitecture (MICRO), 2014 47th Annual IEEE/ACM
International Symposium on},
keyword = {GPU,cache,compiler,data placement,hardware specification
language},
pages = {88--100},
publisher = {IEEE},
title = {PORPLE: An Extensible Optimizer for Portable Data
Placement on GPU},
year = {2014}
}
@inproceedings{ christen2011,
author = {Christen, Matthias and Schenk, Olaf and Burkhart, Helmar},
booktitle = {Parallel \& Distributed Processing Symposium (IPDPS), 2011
IEEE International},
doi = {10.1109/IPDPS.2011.70},
isbn = {978-1-61284-372-8},
keyword = {autotuning,code generation,high performance
computing,stencil computations},
link = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=6012879},
month = {may},
pages = {676--687},
publisher = {IEEE},
title = {PATUS: A Code Generation and Autotuning Framework for
Parallel Iterative Stencil Computations on Modern
Microarchitectures},
year = {2011}
}
@article{ cole2004,
author = {Cole, Murray I},
doi = {10.1016/j.parco.2003.12.002},
issn = {01678191},
journal = {Parallel Computing},
link = {http://linkinghub.elsevier.com/retrieve/pii/S0167819104000080},
month = {mar},
number = {3},
pages = {389--406},
publisher = {Elsevier},
title = {Bringing skeletons out of the closet: a pragmatic
manifesto for skeletal parallel programming},
volume = {30},
year = {2004}
}
@article{ collins2012,
author = {Collins, Alexander and Fensch, Christian and Leather, Hugh},
doi = {10.1142/S0129626412400051},
issn = {0129-6264},
journal = {Parallel Processing Letters},
keyword = {FastFlow,Multicore,Optimization space exploration,Parallel
skeletons,fastflow,multicore,optimization space
exploration,parallel skeletons},
link = {http://www.worldscientific.com/doi/abs/10.1142/S0129626412400051},
mendeley-tags = {FastFlow,Multicore,Optimization space exploration,Parallel
skeletons},
month = {jun},
number = {02},
pages = {1240005},
title = {Auto-Tuning Parallel Skeletons},
volume = {22},
year = {2012}
}
@article{ collins2013,
author = {Collins, Alexander and Fensch, Christian and Leather, Hugh
and Cole, Murray},
doi = {10.1109/HiPC.2013.6799098},
isbn = {978-1-4799-0730-4},
journal = {20th Annual International Conference on High Performance
Computing - HiPC},
link = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=6799098},
month = {dec},
pages = {186--195},
publisher = {IEEE},
title = {MaSiF: Machine Learning Guided Auto-tuning of Parallel
Skeletons},
year = {2013}
}
@inproceedings{ contreras2008,
author = {Contreras, Gilberto and Martonosi, Margaret},
booktitle = {Workload Characterization, 2008. IISWC 2008. IEEE
International Symposium on},
doi = {10.1109/IISWC.2008.4636091},
isbn = {978-1-4244-2777-2},
link = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=4636091},
month = {oct},
pages = {57--66},
publisher = {IEEE},
title = {Characterizing and improving the performance of Intel
Threading Building Blocks},
year = {2008}
}
@inproceedings{ dastgeer2011,
author = {Dastgeer, Usman and Enmyren, Johan and Kessler, Christoph
W},
booktitle = {Proceedings of the 4th International Workshop on Multicore
Software Engineering},
isbn = {9781450305778},
link = {http://dl.acm.org/citation.cfm?id=1984697},
pages = {25--32},
publisher = {ACM},
title = {Auto-tuning SkePU: a multi-backend skeleton programming
framework for multi-GPU systems},
year = {2011}
}
@phdthesis{ dastgeer2011,
author = {Dastgeer, Usman},
isbn = {9789173930666},
number = {1504},
pages = {107},
title = {Skeleton Programming for Heterogeneous GPU-based
Systems},
year = {2011}
}
@article{ dastgeer2015a,
author = {Dastgeer, Usman and Kessler, Christoph},
doi = {10.1007/s10766-015-0357-6},
issn = {0885-7458},
journal = {International Journal of Parallel Programming},
keyword = {gpu-based systems,memory management,runtime
optimizations,skeleton programming,skepu,smart containers},
link = {http://link.springer.com/10.1007/s10766-015-0357-6},
pages = {1--25},
publisher = {Springer},
title = {Smart Containers and Skeleton Programming for GPU-Based
Systems},
year = {2015}
}
@inproceedings{ eastep2011,
address = {New York, NY, USA},
author = {Eastep, Jonathan and Wingate, David and Agarwal, Anant},
booktitle = {Proceedings of the 8th ACM International Conference on
Autonomic Computing},
doi = {10.1145/1998582.1998587},
isbn = {9781450306072},
keyword = {auto-tuning,autonomic,concurrent data
structures,performance
optimization,self-aware,synchronization},
link = {http://doi.acm.org/10.1145/1998582.1998587},
pages = {11--20},
publisher = {ACM},
title = {Smart Data Structures: An Online Machine Learning
Approach to Multicore Data Structures},
year = {2011}
}
@inproceedings{ enmyren2010,
author = {Enmyren, J and Kessler, CW},
booktitle = {Proceedings of the fourth international workshop on
High-level parallel programming and applications},
keyword = {CUDA,Data Parallelism,GPU,OpenCL,Skeleton Programming},
link = {http://dl.acm.org/citation.cfm?id=1863487},
pages = {5--14},
publisher = {ACM},
title = {SkePU: a multi-backend skeleton programming library for
multi-GPU systems},
year = {2010}
}
@article{ fleming1986,
author = {Fleming, Philip J. and Wallace, John J.},
doi = {10.1145/5666.5673},
issn = {00010782},
journal = {Communications of the ACM},
number = {3},
pages = {218--221},
title = {How not to lie with statistics: the correct way to
summarize benchmark results},
volume = {29},
year = {1986}
}
@article{ fursin2011,
author = {Fursin, Grigori and Kashnikov, Yuriy and Memon, Abdul
Wahid and Chamski, Zbigniew and Temam, Olivier and
Namolaru, Mircea and Yom-Tov, Elad and Mendelson, Bilha and
Zaks, Ayal and Courtois, Eric and Bodin, Francois and
Barnard, Phil and Ashton, Elton and Bonilla, Edwin and
Thomson, John and Williams, Christopher K. I. and
OβBoyle, Michael},
doi = {10.1007/s10766-010-0161-2},
issn = {0885-7458},
journal = {International Journal of Parallel Programming},
keyword = {Adaptive compilation,Adaptive compiler,Automatic
performance tuning,Collective optimization,Continuous
optimization,Empirical performance tuning,Feedback-directed
compilation,Iterative compilation,Machine learning,Machine
learning compiler,Multi-objective optimization,Optimization
prediction,Optimization repository,Portable
optimization,Program characterization,Program
features,Self-tuning compiler},
link = {http://link.springer.com/10.1007/s10766-010-0161-2},
month = {jan},
number = {3},
pages = {296--327},
publisher = {Springer},
title = {Milepost GCC: Machine Learning Enabled Self-tuning
Compiler},
volume = {39},
year = {2011}
}
@inproceedings{ ganapathi2009,
author = {Ganapathi, Archana and Datta, Kaushik and Fox, Armando and
Patterson, David},
booktitle = {First USENIX Workshop on Hot Topics in Parallelism
(HotParβ09)},
title = {A case for machine learning to optimize multicore
performance},
year = {2009}
}
@inproceedings{ georges2007,
address = {New York, NY, USA},
author = {Georges, Andy and Buytaert, Dries and Eeckhout, Lieven},
booktitle = {Proceedings of the 22Nd Annual ACM SIGPLAN Conference on
Object-oriented Programming Systems and Applications},
doi = {10.1145/1297027.1297033},
isbn = {9781595937865},
issn = {03621340},
keyword = {benchmarking,data analysis,java,methodolgy,statistics},
link = {http://doi.acm.org/10.1145/1297027.1297033},
month = {oct},
number = {10},
pages = {57},
publisher = {ACM},
title = {Statistically Rigorous Java Performance Evaluation},
volume = {42},
year = {2007}
}
@article{ gregg2011,
author = {Gregg, Chris and Hazelwood, Kim},
doi = {10.1109/ISPASS.2011.5762730},
isbn = {9781612843681},
journal = {ISPASS 2011 - IEEE International Symposium on Performance
Analysis of Systems and Software},
pages = {134--144},
title = {Where is the data? Why you cannot debate CPU vs. GPU
performance without the answer},
year = {2011}
}
@inproceedings{ grewe2013,
author = {Grewe, Dominik and Wang, Zheng and O'Boyle, Michael F P
Mfp},
booktitle = {Code Generation and Optimization (CGO), 2013 IEEE/ACM
International Symposium on},
doi = {10.1109/CGO.2013.6494993},
isbn = {9781467355254},
keyword = {gpu,machine-learning mapping,opencl},
link = {http://ieeexplore.ieee.org/xpls/abs\_all.jsp?arnumber=6494993$\backslash$nhttp://www.mendeley.com/research/portable-mapping-data-parallel-programs-opencl-heterogeneous-systems-2/},
pages = {1--10},
publisher = {IEEE},
title = {Portable mapping of data parallel programs to OpenCL for
heterogeneous systems},
year = {2013}
}
@article{ holewinski2012,
author = {Holewinski, Justin and Pouchet, Louis-No\" { e } l and
Sadayappan, P},
doi = {10.1145/2304576.2304619},
isbn = {978-1-4503-1316-2},
journal = {Proceedings of the 26th ACM International Conference on
Supercomputing},
keyword = {gpu,opencl,overlapped tiling,stencil},
link = {http://doi.acm.org/10.1145/2304576.2304619},
pages = {311--320},
title = {High-performance Code Generation for Stencil Computations
on GPU Architectures},
year = {2012}
}
@article{ jeffrey2003,
author = {Jeffrey, O and David, M},
journal = {Computer},
number = {1},
pages = {41--50},
publisher = {IEEE},
title = {The Vision of Autonomic Computing},
volume = {36},
year = {2003}
}
@article{ joshi2002,
author = {Joshi, Rajeev and Nelson, Greg and Randall, Keith},
doi = {10.1145/543552.512566},
isbn = {1-58113-463-0},
issn = {03621340},
journal = {ACM SIGPLAN Notices},
keyword = {optimizing compiler,superoptimizer},
number = {5},
pages = {304},
publisher = {ACM},
title = {Denali: a goal-directed superoptimizer},
volume = {37},
year = {2002}
}
@article{ kamil2010,
author = {Kamil, Shoaib and Chan, Cy and Oliker, Leonid and Shall, John and Williams, Samuel},
doi = {10.1109/IPDPS.2010.5470421},
isbn = {9781424464432},
issn = {15302075},
journal = {Proceedings of the 2010 IEEE International Symposium on
Parallel and Distributed Processing, IPDPS 2010},
link = {http://ieeexplore.ieee.org/xpls/abs\_all.jsp?arnumber=5470421},
title = {An auto-tuning framework for parallel multicore stencil
computations},
year = {2010}
}
@inproceedings{ karimi2010,
archiveprefix = {arXiv},
arxivid = {1005.2581},
author = {Fang, Jianbin and Varbanescu, Ana Lucia and Sips, Henk},
booktitle = {Parallel Processing (ICPP), 2011 International Conference
on},
doi = {10.1109/ICPP.2011.45},
eprint = {1005.2581},
isbn = {978-1-4577-1336-1},
link = {http://arxiv.org/abs/1005.2581},
pages = {216--225},
publisher = {IEEE},
title = {A Comprehensive Performance Comparison of CUDA and
OpenCL},
year = {2011}
}
@inproceedings{ komatsu2010,
author = {Komatsu, Kazuhiko and Sato, Katsuto and Arai, Yusuke and
Koyama, Kentaro and Takizawa, Hiroyuki and Kobayashi, Hiroaki},
booktitle = {The fifth international workshop on automatic performance
tuning},
pages = {7},
title = {Evaluating performance and portability of OpenCL
programs},
year = {2010}
}
@inproceedings{ leather2009,
address = {Dublin},
author = {Leather, Hugh and O'Boyle, Michael and Worton, Bruce},
booktitle = {LCTES '09: Proceedings of the ACM SIGPLAN/SIGBED 2009
Conference on Languages, Compilers, and Tools for Embedded
Systems},
pages = {1--10},
title = {Raced Profiles: Efficient Selection of Competing Compiler
Optimizations},
year = {2009}
}
@inproceedings{ lee,
author = {Lee, Hyoukjoong and Brown, Kevin J and Sujeeth, Arvind K
and Rompf, Tiark and Olukotun, Kunle},
booktitle = {Microarchitecture (MICRO), 2014 47th Annual IEEE/ACM
International Symposium on},
doi = {10.1109/MICRO.2014.23},
pages = {63--74},
publisher = {IEEE},
title = {Locality-Aware Mapping of Nested Parallel Patterns on
GPUs},
year = {2014}
}
@article{ lee2010,
author = {Lee, Victor W. and Hammarlund, Per and Singhal, Ronak and
Dubey, Pradeep and Kim, Changkyu and Chhugani, Jatin and
Deisher, Michael and Kim, Daehyun and Nguyen, Anthony D.
and Satish, Nadathur and Smelyanskiy, Mikhail and
Chennupaty, Srinivas},
doi = {10.1145/1816038.1816021},
isbn = {9781450300537},
issn = {01635964},
journal = {ACM SIGARCH Computer Architecture News},
keyword = {cpu architecture,gpu architecture,mance
measurement,perfor-,performance analysis,software
optimization,throughput comput-},
pages = {451},
title = {Debunking the 100X GPU vs. CPU myth},
volume = {38},
year = {2010}
}
@article{ lutz2013,
author = {Lutz, Thibaut and Fensch, Christian and Cole, Murray},
doi = {10.1145/2400682.2400718},
issn = {15443566},
journal = {ACM Transactions on Architecture and Code Optimization},
link = {http://dl.acm.org/citation.cfm?doid=2400682.2400718},
number = {4},
pages = {1--24},
title = {PARTANS: An Autotuning Framework for Stencil Computation
on Multi-GPU Systems},
volume = {9},
year = {2013}
}
@inproceedings{ magni2014,
author = {Magni, Alberto and Dubach, Christophe and O'Boyle, Michael},
booktitle = {International Conference on Parallel Architectures and
Compilation},
doi = {10.1145/2628071.2628087},
isbn = {9781450328098},
link = {http://dl.acm.org/citation.cfm?doid=2628071.2628087},
pages = {455--466},
title = {Automatic optimization of thread-coarsening for graphics
processors},
year = {2014}
}
@article{ massalin1987,
author = {Massalin, Henry},
doi = {10.1145/36206.36194},
isbn = {0897912381},
journal = {ACM SIGPLAN Notices},
number = {10},
pages = {122--126},
title = {Superoptimizer -- A Look at the Smallest Program},
volume = {22},
year = {1987}
}
@inproceedings{ ogilvie2015,
author = {Ogilvie, William F. and Petoumenos, Pavlos and Wang, Zheng
and Leather, Hugh},
booktitle = {18th International Workshop on Compilers for Parallel
Computing},
title = {Intelligent Heuristic Construction with Active
Learning},
year = {2015}
}
@inproceedings{ phillips2010,
author = {Phillips, Everett H. and Fatica, Massimiliano},
booktitle = {Proceedings of the 2010 IEEE International Symposium on
Parallel and Distributed Processing, IPDPS 2010},
doi = {10.1109/IPDPS.2010.5470394},
isbn = {9781424464432},
issn = {1530-2075},
pages = {1--10},
title = {Implementing the Himeno benchmark with CUDA on GPU
clusters},
year = {2010}
}
@inproceedings{ rul2010,
author = {Rul, Sean and Vandierendonck, Hans and Haene, Joris D and
Bosschere, Koen De},
booktitle = {2010 Symposium on Application Accelerators in High
Performance Computing (SAAHPC'10)},
pages = {4--6},
title = {An Experimental Study on Performance Portability of
OpenCL Kernels},
year = {2010}
}
@inproceedings{ runciman2014,
author = {Trilla, Jose Manuel Calderon and Runciman, Colin},
booktitle = {IFL},
keyword = {Automatic parallelism,Feedback Directed
Compilation,Implicit Parallelism,Iterative Compilation,Lazy
Functional Languages,Projections,Strictness Analysis},
title = {An Iterative Compiler for Implicit Parallelism},
year = {2014}
}
@inproceedings{ ryoo2008,
address = {New York, New York, USA},
author = {Ryoo, Shane and Rodrigues, Christopher I. and Stone, Sam
S. and Baghsorkhi, Sara S. and Ueng, Sain-Zee and Stratton, John a. and Hwu, Wen-mei W.},
booktitle = {Proceedings of the 6th annual IEEE/ACM international
symposium on Code generation and optimization},
doi = {10.1145/1356058.1356084},
isbn = {9781595939784},
keyword = {gpgpu,optimization,parallel computing},
link = {http://portal.acm.org/citation.cfm?doid=1356058.1356084},
pages = {195--204},
publisher = {ACM Press},
title = {Program optimization space pruning for a multithreaded
GPU},
year = {2008}
}
@article{ ryoo2008a,
author = {Ryoo, Shane and Rodrigues, Christopher I. and Baghsorkhi, Sara S. and Stone, Sam S. and Kirk, David B. and Hwu, Wen-mei W.},
doi = {10.1145/1345206.1345220},
isbn = {9781595937957},
issn = {00778923},
journal = {Proceedings of the 13th ACM SIGPLAN Symposium on
Principles and practice of parallel programming - PPoPP
'08},
keyword = {GPU computing,parallel computing},
link = {http://portal.acm.org/citation.cfm?doid=1345206.1345220},
pages = {73},
title = {Optimization principles and application performance
evaluation of a multithreaded GPU using CUDA},
year = {2008}
}
@article{ stephenson2003,
author = {Stephenson, Mark and Martin, Martin and Reilly, Una-may
O},
isbn = {1581136625},
journal = {ACM SIGPLAN Notices},
number = {5},
pages = {77--90},
title = {Meta Optimization: Improving Compiler Heuristics with
Machine Learning},
volume = {38},
year = {2003}
}
@inproceedings{ steuwer2011,
author = {Steuwer, Michel and Kegel, Philipp and Gorlatch, Sergei},
booktitle = {Parallel and Distributed Processing Workshops and Phd
Forum (IPDPSW), 2011 IEEE International Symposium on},
doi = {10.1109/IPDPS.2011.269},
isbn = {978-1-61284-425-1},
keyword = {Algorithmic Skeletons,CUDA,GPU Computing,GPU
Programming,Multi-GPU Systems,OpenCL,SkelCL},
link = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=6008967},
month = {may},
pages = {1176--1182},
publisher = {IEEE},
title = {SkelCL - A Portable Skeleton Library for High-Level GPU
Programming},
year = {2011}
}
@inproceedings{ steuwer2012,
author = {Steuwer, Michel and Kegel, Philipp and Gorlatch, Sergei},
booktitle = {Parallel and Distributed Processing Symposium Workshops \&
PhD Forum (IPDPSW), 2012 IEEE 26th International},
doi = {10.1109/IPDPSW.2012.229},
isbn = {978-1-4673-0974-5},
keyword = {Algorithmic Skeletons,GPU Computing,GPU
Programming,Multi-GPU,OpenCL,SkelCL,Systems},
link = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=6270864},
month = {may},
pages = {1858--1865},
publisher = {Ieee},
title = {Towards High-Level Programming of Multi-GPU Systems Using
the SkelCL Library},
year = {2012}
}
@article{ steuwer2013,
author = {Steuwer, Michel and Gorlatch, Sergei},
doi = {10.1016/j.procs.2013.05.239},
issn = {18770509},
journal = {Procedia Computer Science},
keyword = {Algorithmic Skeletons,Image Reconstruction,LM OSEM
Algorithm,Multi-GPU Computing,SkelCL},
link = {http://linkinghub.elsevier.com/retrieve/pii/S1877050913003827},
month = {jan},
pages = {749--758},
publisher = {Elsevier B.V.},
title = {High-level Programming for Medical Imaging on Multi-GPU
Systems Using the SkelCL Library},
volume = {18},
year = {2013}
}
@article{ steuwer2013a,
author = {Steuwer, Michel and Gorlatch, Sergei},
doi = {10.1007/978-3-642-39958-9\_24},
journal = {Parallel Computing Technologies},
link = {http://dx.doi.org/10.1007/978-3-642-39958-9\_24},
pages = {258--272},
publisher = {Springer Berlin Heidelberg},
title = {SkelCL: Enhancing OpenCL for High-Level Programming of
Multi-GPU Systems},
volume = {7979},
year = {2013}
}
@article{ steuwer2014,
author = {Steuwer, Michel and Friese, Malte and Albers, Sebastian
and Gorlatch, Sergei},
doi = {10.1007/s10766-013-0265-6},
issn = {08857458},
journal = {International Journal of Parallel Programming},
keyword = {Algorithmic skeletons,Allpairs computation,GPU
computing,High-level programming models,SkelCL},
pages = {601--618},
title = {Introducing and implementing the allpairs skeleton for
programming multi-GPU Systems},
volume = {42},
year = {2014}
}
@article{ steuwer2015,
archiveprefix = {arXiv},
arxivid = {arXiv:1502.02389v1},
author = {Steuwer, Michel and Fensch, Christian and Dubach, Christophe},
eprint = {arXiv:1502.02389v1},
journal = {arXiv preprint arXiv:1502.02389},
keyword = {algorithmic patterns,code
generation,gpu,opencl,performance,portability,rewrite
rules},
title = {Patterns and Rewrite Rules for Systematic Code Generation
From High-Level Functional Patterns to High-Performance
OpenCL Code},
year = {2015}
}
@inproceedings{ tesauro2005,
author = {Tesauro, Gerald},
booktitle = {AAAI},
pages = {886--891},
title = {Online Resource Allocation Using Decompositional
Reinforcement Learning},
year = {2005}
}
@inproceedings{ wang2010,
author = {Wang, Zheng and Boyle, Michael F P O},
booktitle = {Proceedings of the 19th international conference on
Parallel architectures and compilation techniques},
isbn = {9781450301787},
keyword = {Compiler Optimization,Machine Learning,Partitioning
Streaming Parallelism},
pages = {307--318},
publisher = {ACM},
title = {Partitioning Streaming Parallelism for Multi-cores: A
Machine Learning Based Approach},
year = {2010}
}
@inproceedings{ zhang2013a,
author = {Zhang, Yongpeng and Mueller, Frank},
booktitle = {Proceedings of the Tenth International Symposium on Code
Generation and Optimization},
doi = {10.1109/TPDS.2012.160},
isbn = {9781450312066},
issn = {10459219},
keyword = {Accelerators,GPGPU programming,GPU clusters,stencil
codes},
pages = {155--164},
title = {Auto-generation and Auto-tuning of 3D Stencil Codes on
GPU clusters},
year = {2012}
}