ae2f_docs
Mlp.cl.c
1#pragma OPENCL EXTENSION cl-fast-relaxed-math : enable
2#pragma OPENCL EXTENSION pod-pushconstant : enable
3
4#if __ae2f_MACRO_GENERATED
5#define ae2fVK_clspv_IS_OPENCL 1
6#endif
7
8#define ae2f_NEED_CLASS 0
9
10
11
12
13
14#include "./Mlp.auto.h"
15
16#ifndef ACT
17#define ACT(layer_idx, r, y, i, c) *(r) = (y)[i]
18#endif
19
20#ifndef ACT_DERIV
21#define ACT_DERIV(layer_idx, r, y, i, c) *(r) = 1
22#endif
23
24#ifndef LOSS_DERIV
25#define LOSS_DERIV(r, y, y_desired, i, c) *(r) = (y)[i] - (y_desired)[i]
26#endif
27
28#define pgsz sz
29
30#define weightsz weightsz
31#define pgsz_sqr weightsz
32
33/** length of `p_layerszlist` */
34#define lsz lsz
35
36/** count of layer. */
37#define llsz (lsz - 1)
38
39/** @brief lsz * sizeof(uint32_t) */
40#define p_layerszlist CAST(__global uint* restrict, glob)
41/** @brief sizeof(ae2f_float_t) * lsz * pgsz */
42#define p_outstream (CAST(__global host_float_t* restrict, p_layerszlist + lsz))
43#define p_inp p_outstream
44
45/** @brief sizeof(ae2f_float_t) * pgsz_sqr * llsz */
46#define p_weight (p_outstream + lsz * pgsz)
47
48/** @brief sizeof(ae2f_float_t) * pgsz * llsz */
49#define p_bias (p_weight + pgsz_sqr * llsz)
50
51/** @brief sizeof(ae2f_float_t) * pgsz * llsz */
52#define p_deltastream (p_bias + llsz * pgsz)
53
54/** @brief sizeof(ae2f_float_t) * pgsz */
55#define p_goal (p_deltastream + llsz * pgsz)
56
57#define _r_inp(lidx) (p_outstream + pgsz * (lidx))
58#define _r_out(lidx) (p_outstream + pgsz * ((lidx) + 1))
59#define _r_weight(lidx) (p_weight + pgsz_sqr * (lidx))
60#define _r_bias(lidx) (p_bias + pgsz * (lidx))
61#define _r_delta(lidx) (p_deltastream + pgsz * (lidx))
62#define _r_isz(lidx) ((p_layerszlist)[lidx])
63#define _r_osz(lidx) ((p_layerszlist)[(lidx) + 1])
64
65#define r_inp _r_inp(lidx)
66#define r_out _r_out(lidx)
67#define r_weight _r_weight(lidx)
68#define r_bias _r_bias(lidx)
69#define r_delta _r_delta(lidx)
70#define r_isz _r_isz(lidx)
71#define r_osz _r_osz(lidx)
72
73#define r_inp_then _r_inp(lidx-1)
74#define r_out_then _r_out(lidx-1)
75#define r_weight_then _r_weight(lidx-1)
76#define r_bias_then _r_bias(lidx-1)
77#define r_delta_then _r_delta(lidx-1)
78#define r_isz_then _r_isz(lidx-1)
79#define r_osz_then _r_osz(lidx-1)
80
81#define loc CAST(__local ae2f_float_t*, _loc)
82#define l_inp(O_R) (((loc) + pgsz * ((lidx) O_R)))
83#define l_out(O_R) (((loc) + pgsz * ((lidx + 1) O_R)))
84
85#define lp_deltastream ((loc) + pgsz * ((2/* lsz */)))
86#define l_delta (lp_deltastream + pgsz * ((lidx) & 1))
87#define l_delta_then (lp_deltastream + pgsz * (!((lidx) & 1)))
88#define l_tmpoutc (lp_deltastream + (pgsz << 1))
89
90/** For every runners */
91#define ACT_RUN(r, y, i, c) ACT(lidx, r, y, i, c)
92#define ACT_DERIV_RUN(r, y, i, c) ACT_DERIV(lidx, r, y, i, c)
93
94#define ACT_RUN_THEN(r, y, i, c) ACT((lidx - 1), r, y, i, c)
95#define ACT_DERIV_RUN_THEN(r, y, i, c) ACT_DERIV((lidx - 1), r, y, i, c)
96
97typedef const struct sz2_t {
98 uint32_t m_lsz;
99 uint32_t m_wsz;
100} sz2_t;
101
102#undef lsz
103#undef weightsz
104#define lsz push.m_lsz
105#define weightsz push.m_wsz
106
107/**
108 * @brief loc
109 * ae2f_float_t[Page]: inp
110 * ae2f_float_t[Page]: out
111 * */
112__kernel void kPredict(__global void* restrict glob, __local uint* restrict _loc, sz2_t push) {
113 const size_t
114 oidx = get_global_id(0)
115 , iidx = get_global_id(1)
116 , sz = get_global_size(0);
117
118 size_t lidx = 0;
119 _clAtomAddF_t(__local, ae2f_float_t) v_predict;
120 ae2f_float_t r_predict;
121
122 _clSlpPredict(__local, v_predict, r_predict, l_out(&1), r_inp, r_weight, r_bias, iidx, r_isz, oidx, r_osz, ACT_RUN);
123
124 while(++lidx < llsz - 1) {
125 if(iidx < r_isz && !oidx) l_inp(&1)[iidx] = r_predict;
126 _clSlpPredict(__local, v_predict, r_predict, l_out(&1), l_inp(&1), r_weight, r_bias, iidx, r_isz, oidx, r_osz, ACT_RUN);
127 }
128
129 if(iidx < r_isz && !oidx) l_inp(&1)[iidx] = r_predict;
130 _clSlpPredict(__local, v_predict, r_out[oidx], l_out(&1), l_inp(&1), r_weight, r_bias, iidx, r_isz, oidx, r_osz, ACT_RUN);
131}
132
133
134/**
135 * @brief loc
136 * ae2f_float_t[Page]: inp
137 * ae2f_float_t[Page]: out
138 * */
139__kernel void kPredictStream(__global void* restrict glob, __local uint* restrict _loc, const sz2_t push) {
140 const size_t
141 oidx = get_global_id(0)
142 , iidx = get_global_id(1)
143 , sz = get_global_size(0);
144
145 size_t lidx = 0;
146 ae2f_float_t v_predict;
147 _clAtomAddF_t(__local, ae2f_float_t) slppredict;
148
149 _clSlpPredict(__local, slppredict, v_predict, l_out(&1), r_inp, r_weight, r_bias, iidx, r_isz, oidx, r_osz, ACT_RUN);
150
151
152 while(++lidx < llsz - 1) {
153 if(iidx < r_isz && !oidx) l_inp(&1)[iidx] = v_predict;
154 _clSlpPredict(__local, slppredict, v_predict, l_out(&1), l_inp(&1), r_weight, r_bias, iidx, r_isz, oidx, r_osz, ACT_RUN);
155
156 if(oidx < r_osz && !iidx)
157 r_out[oidx] = v_predict;
158 }
159
160
161 if(iidx < r_isz && !oidx) l_inp(&1)[iidx] = v_predict;
162 _clSlpPredict(__local, slppredict, r_out[oidx], l_out(&1), l_inp(&1), r_weight, r_bias, iidx, r_isz, oidx, r_osz, ACT_RUN);
163}
164
165ae2f_structdef(union, lrlszel_t) {
166 host_float_t m_f;
167 uint m_u;
168};
169
170const ae2f_structdef(struct, lrlsz_t)
171{
172 lrlszel_t m_lsz, m_weight, m_bias, m_wsz;
173};
174
175typedef char STATIC_ASSERT_LRLSZEL_SZ[
176 sizeof(lrlszel_t) == (sizeof(uint) > sizeof(host_float_t) ? sizeof(uint) : sizeof(host_float_t))
177 ? 1 : -1
178];
179
180typedef char STATIC_ASSERT_LRLSZ_SZ[sizeof(lrlsz_t) == sizeof(lrlszel_t) * 4 ? 1 : -1];
181
182#undef lsz
183#undef m_weight
184#undef m_bias
185#undef weightsz
186#define lsz lr.m_lsz.m_u
187#define m_weight m_weight.m_f
188#define m_bias m_bias.m_f
189#define weightsz lr.m_wsz.m_u
190
191/**
192 * @brief loc
193 * ae2f_float_t[lsz - 1][Page]: OutStream
194 * ae2f_float_t[lsz - 1][Page]: DeltaStream
195 * */
196__kernel void kFollow(__global void* restrict glob, __local uint* restrict _loc, lrlsz_t lr) {
197 size_t lidx = llsz - 1;
198 _clMlpGetHD1_t(__local, ae2f_float_t) gethd;
199 ae2f_float_t v_tmp;
200
201 const size_t
202 oidx = get_global_id(0)
203 , iidx = get_global_id(1)
204 , sz = get_global_size(0);
205
206 if(lsz < 3) {
207 /** ASSERT */
208 return;
209 }
210
211 if(oidx < r_osz && iidx < r_isz) {
213 r_inp[iidx]
214 , r_delta[oidx]
215 , r_weight
216 , lr.m_weight
217 , r_isz, iidx
218 , r_osz, oidx
219 );
220
221 if(iidx == 0) {
223 r_bias[oidx]
224 , r_delta[oidx]
225 , lr.m_bias
226 );
227 }
228 }
229
231 __local
232 , gethd
235 , r_delta
236 , iidx, r_isz
237 , oidx, r_osz
238 );
239
241 v_tmp
243 , oidx, iidx
244 , r_isz, ACT_DERIV_RUN_THEN
246 );
247
248 /** Needs to be procedural */
249 /* barrier(CLK_ALL_MEM_FENCE); */
250
251
252 while(--lidx) {
253 if(oidx < r_osz && iidx < r_isz) {
255 l_inp()[iidx]
256 , l_delta[oidx]
257 , r_weight
258 , lr.m_weight
259 , r_isz, iidx
260 , r_osz, oidx
261 );
262
263 if(iidx == 0) {
265 r_bias[oidx]
266 , l_delta[oidx]
267 , lr.m_bias
268 );
269 }
270 }
271
273 __local
274 , gethd
277 , l_delta
278 , iidx, r_isz
279 , oidx, r_osz
280 );
281
283 v_tmp
285 , oidx, iidx
286 , r_isz, ACT_DERIV_RUN_THEN
288 );
289
290 /** Needs to be procedural */
291 /** barrier(CLK_LOCAL_MEM_FENCE); */
292 }
293
294 if(oidx < r_osz && iidx < r_isz) {
296 r_inp[iidx]
297 , l_delta[oidx]
298 , r_weight
299 , lr.m_weight
300 , r_isz, iidx
301 , r_osz, oidx
302 );
303
304 if(iidx == 0) {
306 r_bias[oidx]
307 , l_delta[oidx]
308 , lr.m_bias
309 );
310 }
311 }
312#undef v_tmp
313}
314
315/**
316 * @brief loc
317 * ae2f_float_t[lsz - 1][Page]: OutStream
318 * ae2f_float_t[lsz - 1][Page]: DeltaStream
319 * */
320__kernel void kTrainAuto(__global void* restrict glob, __local uint* restrict _loc, lrlsz_t lr) {
321 size_t lidx = 0;
322 const size_t
323 oidx = get_global_id(0)
324 , iidx = get_global_id(1)
325 , sz = get_global_size(0);
326
327 ae2f_float_t tmp0, tmp2;
328
329 _clMlpGetHD1_t(__local, ae2f_float_t) gethd;
330#define tmp1 gethd.m_atom[0].m_f
331
332
333 if(lsz < 3) {
334 return;
335 }
336
337 if(iidx < r_isz && oidx == 0)
338 l_inp()[iidx] = r_inp[iidx];
339
340 for(; lidx < llsz - 1; lidx++) {
342 __local, gethd
343 , l_out()[oidx], l_out()
344 , l_inp(), r_weight, r_bias
345 , iidx, r_isz, oidx, r_osz
346 , ACT_RUN
347 );
348 }
349
350 /** lidx == llsz - 1 */
352 __local, gethd
353 , tmp2, l_out(), l_inp()
355 , iidx, r_isz, oidx, r_osz
356 , ACT_RUN
357 );
358
359 if(oidx < r_osz && iidx == 0) {
360 r_out[oidx] = tmp2;
361 l_out()[oidx] = tmp2;
362
364 tmp0
365 , &tmp2, &tmp1
366 , l_out()
367 , p_goal
368 , oidx, r_osz
369 , ACT_DERIV_RUN
370 , LOSS_DERIV
371 );
372 }
373
374 /** lidx == llsz */
375 ++lidx;
376
377 /** lidx == llsz - 1 */
378 while(--lidx) {
379 if(oidx < r_osz && iidx < r_isz) {
381 l_inp()[iidx]
382 , tmp0
383 , r_weight
384 , lr.m_weight
385 , r_isz, iidx
386 , r_osz, oidx
387 );
388 unless(iidx) {
390 r_bias[oidx]
391 , tmp0
392 , lr.m_bias
393 );
394 }
395 }
396
398 __local
399 , gethd
402 , l_delta
403 , iidx, r_isz
404 , oidx, r_osz
405 );
406
408 tmp0
410 , oidx, iidx
411 , r_isz, ACT_DERIV_RUN_THEN
413 );
414 tmp0 = l_delta_then[iidx];
415 /** Needs to be procedural */
416 }
417
418 /** lidx == 0 */
419 if(oidx < r_osz && iidx < r_isz) {
421 l_inp()[iidx]
422 , tmp0
423 , r_weight
424 , lr.m_weight
425 , r_isz, iidx
426 , r_osz, oidx
427 );
428
429 unless(iidx) {
431 r_bias[oidx]
432 , tmp0
433 , lr.m_bias
434 );
435 }
436 }
437}
#define ae2f_structdef(key, name)
Definition Cast.h:110
#define unless(...)
Invokes when condition is false.
Definition Cast.h:103
#define _r_isz(lidx)
Definition Mlp.cl.c:62
#define m_weight
Definition Mlp.cl.c:187
#define _r_osz(lidx)
Definition Mlp.cl.c:63
#define _r_weight(lidx)
Definition Mlp.cl.c:59
#define pgsz
Definition Mlp.cl.c:28
#define ACT(layer_idx, r, y, i, c)
Definition Mlp.cl.c:17
#define tmp1
#define p_outstream
Definition Mlp.cl.c:42
#define r_out
Definition Mlp.cl.c:66
#define pgsz_sqr
Definition Mlp.cl.c:31
#define llsz
Definition Mlp.cl.c:37
#define ACT_DERIV(layer_idx, r, y, i, c)
Definition Mlp.cl.c:21
#define m_bias
Definition Mlp.cl.c:188
#define _r_bias(lidx)
Definition Mlp.cl.c:60
#define l_inp(O_R)
Definition Mlp.cl.c:82
#define weightsz
Definition Mlp.cl.c:30
#define l_delta
Definition Mlp.cl.c:86
#define r_inp
Definition Mlp.cl.c:65
#define l_out(O_R)
Definition Mlp.cl.c:83
#define l_delta_then
Definition Mlp.cl.c:87
#define r_weight_then
Definition Mlp.cl.c:75
#define loc
Definition Mlp.cl.c:81
#define _r_out(lidx)
Definition Mlp.cl.c:58
#define p_weight
Definition Mlp.cl.c:46
#define p_deltastream
Definition Mlp.cl.c:52
#define p_goal
Definition Mlp.cl.c:55
#define p_layerszlist
Definition Mlp.cl.c:40
#define _r_inp(lidx)
Definition Mlp.cl.c:57
#define r_delta
Definition Mlp.cl.c:69
#define p_bias
Definition Mlp.cl.c:49
#define _r_delta(lidx)
Definition Mlp.cl.c:61
#define lp_deltastream
Definition Mlp.cl.c:85
#define r_weight
Definition Mlp.cl.c:67
#define r_isz
Definition Mlp.cl.c:70
#define lsz
Definition Mlp.cl.c:34
#define r_osz
Definition Mlp.cl.c:71
#define r_bias
Definition Mlp.cl.c:68
#define __global
Definition addrspec.h:8
#define __local
Definition addrspec.h:10
#define __kernel
Definition addrspec.h:7
#define __ae2f_AnnSlpFetchDeltaOne_imp(rret, ptr_tmp0, ptr_tmp1, prm_out, prm_out_desired, prm_oidx, prm_osz, fn_actderiv, fn_lossderiv)
Definition Slp.auto.h:528
#define __ae2f_AnnSlpFollowOneW_imp(inp, delta, weight, learningrate, inp_sz, inp_idx, out_sz, out_idx)
Definition Slp.auto.h:381
#define __ae2f_AnnSlpFollowOneB_imp(rret_bias, prm_delta, prm_learningrate_bias)
Definition Slp.auto.h:400
#define size_t
Definition mac.h:20
#define CAST(t, x)
Definition mac.h:16
#define host_float_t
Definition mac.h:9
#define uint
Definition sclr.h:11
#define _clMlpGetHD1_t
Definition Mlp.auto.h:43
#define _clMlpGetHD1(__global, v_mem, r_delta_then, i_weight, i_delta, i_iidx, i_isz, i_oidx, i_osz)
Definition Mlp.auto.h:49
#define _clMlpRvrse(v_tmp, r_delta_then, i_oidx, i_iidx, i_isz, i_actderiv_then, i_inp, i_deltaseed)
Definition Mlp.auto.h:21
#define _clAtomAddF_t(__global, host_float_t)
Definition Slp.auto.h:24
#define _clSlpPredict(__global, v_mem, ret, loc, p_inp, p_weight, p_bias, iidx, isz, oidx, osz, ACT)
Definition Slp.auto.h:81
Definition Mlp.cl.c:97