-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdevice.cl
54 lines (45 loc) · 1.36 KB
/
device.cl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
typedef struct {
float aggregate;
float inclusivePrefix;
//int status; // 0 - no info, 1 - aggregate available, 2 - inclusivePrefix available
//int dummy;
} PARTITION;
kernel void aggregate(const global float* src, global float* dst, global PARTITION* p) {
size_t ind = get_global_id(0);
size_t part_start = ind * BATCH_SIZE;
// 1 & 2 done on host
// 3
global PARTITION* me = &p[ind];
me->aggregate = NAN;
me->inclusivePrefix = NAN;
float sum = 0;
for (size_t i = part_start; i < part_start + BATCH_SIZE; i++) {
sum += src[i];
}
me->aggregate = sum;
if (ind == 0) {
me->inclusivePrefix = sum;
}
}
kernel void reduce(const global float* src, global float* dst, global PARTITION* p) {
size_t ind = get_global_id(0);
size_t part_start = ind * BATCH_SIZE;
// 4 & 5
global PARTITION* me = &p[ind];
float aggregate = 0;
for (size_t i = ind; i >= 1; i--) {
global PARTITION* it = &p[i - 1];
if (!isnan(it->inclusivePrefix)) {
aggregate += it->inclusivePrefix;
me->inclusivePrefix = aggregate + me->aggregate;
break;
} else {
aggregate += it->aggregate;
}
}
float res = 0;
for (size_t i = part_start; i < part_start + BATCH_SIZE; i++) {
res += src[i];
dst[i] = res + aggregate;
}
}