ae2f_docs
Mlp.cl.c
Go to the documentation of this file.
1#define ae2f_NEED_CLASS 0
2
3#if __ae2f_MACRO_GENERATED
4#define ae2fVK_clspv_IS_OPENCL 1
5#endif
6
7#include "./Mlp.auto.h"
8
9#ifndef ACT
10#define ACT(layer_idx, r, x)
11#endif
12
13#ifndef ACT_DERIV
14#define ACT_DERIV(layer_idx, r, x)
15#endif
16
17#ifndef LOSS_DERIV
18#define LOSS_DERIV(r, y, y_desired, i, c)
19#endif
20
21#define pgsz sz
22#define pgsz_sqr (pgsz * pgsz)
23
24/** length of `p_layerszlist` */
25#define lsz lsz
26
27/** count of layer. */
28#define llsz (lsz - 1)
29
30/** @brief lsz * sizeof(uint32_t) */
31#define p_layerszlist CAST(__global uint32_t*, glob)
32/** @brief sizeof(ae2f_float_t) * lsz * pgsz */
33#define p_outstream (CAST(__global ae2f_float_t*, p_layerszlist + lsz))
34#define p_inp p_outstream
35
36/** @brief sizeof(ae2f_float_t) * pgsz_sqr * llsz */
37#define p_weight (p_outstream + lsz * pgsz)
38
39/** @brief sizeof(ae2f_float_t) * pgsz * llsz */
40#define p_bias (p_weight + pgsz_sqr * llsz)
41
42/** @brief sizeof(ae2f_float_t) * pgsz * llsz */
43#define p_deltastream (p_bias + llsz * pgsz)
44
45/** @brief sizeof(ae2f_float_t) * pgsz */
46#define p_goal (p_deltastream + llsz * pgsz)
47
48
49#define _r_inp(lidx) (p_outstream + pgsz * (lidx))
50#define _r_out(lidx) (p_outstream + pgsz * ((lidx) + 1))
51#define _r_weight(lidx) (p_weight + pgsz_sqr * (lidx))
52#define _r_bias(lidx) (p_bias + pgsz * (lidx))
53#define _r_delta(lidx) (p_deltastream + pgsz * (lidx))
54#define _r_isz(lidx) ((p_layerszlist)[lidx])
55#define _r_osz(lidx) ((p_layerszlist)[(lidx) + 1])
56
57#define r_inp _r_inp(lidx)
58#define r_out _r_out(lidx)
59#define r_weight _r_weight(lidx)
60#define r_bias _r_bias(lidx)
61#define r_delta _r_delta(lidx)
62#define r_isz _r_isz(lidx)
63#define r_osz _r_osz(lidx)
64
65#define r_inp_then _r_inp(lidx-1)
66#define r_out_then _r_out(lidx-1)
67#define r_weight_then _r_weight(lidx-1)
68#define r_bias_then _r_bias(lidx-1)
69#define r_delta_then _r_delta(lidx-1)
70#define r_isz_then _r_isz(lidx-1)
71#define r_osz_then _r_osz(lidx-1)
72
73#define l_inp(O_R) (((loc) + pgsz * ((lidx) O_R)))
74#define l_out(O_R) (((loc) + pgsz * ((lidx + 1) O_R)))
75
76#define lp_deltastream ((loc) + pgsz * ((lsz)))
77#define l_delta (lp_deltastream + pgsz * ((lidx) & 1))
78#define l_delta_then (lp_deltastream + pgsz * (!((lidx) & 1)))
79
80/** For every runners */
81#define ACT_RUN(r, x) ACT(lidx, r, x)
82#define ACT_DERIV_RUN(r, x) ACT_DERIV(lidx, r, x)
83
84#define ACT_RUN_THEN(r, x) ACT((lidx - 1), r, x)
85#define ACT_DERIV_RUN_THEN(r, x) ACT_DERIV((lidx - 1), r, x)
86
87
88/**
89 * @brief loc
90 * ae2f_float_t[Page]: inp
91 * ae2f_float_t[Page]: out
92 * */
93__kernel void kPredict(__global void* glob, __local ae2f_float_t* loc, const uint32_t lsz) {
94 const size_t
95 oidx = get_global_id(0)
96 , iidx = get_global_id(1)
97 , sz = get_global_size(0);
98
99 size_t lidx = 0;
100 clSlpPredict_t v_predict;
101
102 clSlpPredict(v_predict, l_out(&1), r_inp, r_weight, r_bias, iidx, r_isz, oidx, r_osz, ACT_RUN);
104
105 while(++lidx < llsz - 1) {
106 clSlpPredict(v_predict, l_out(&1), l_inp(&1), r_weight, r_bias, iidx, r_isz, oidx, r_osz, ACT_RUN);
108 }
109 clSlpPredict(v_predict, r_out, l_inp(&1), r_weight, r_bias, iidx, r_isz, oidx, r_osz, ACT_RUN);
110}
111
112
113/**
114 * @brief loc
115 * ae2f_float_t[Page]: inp
116 * ae2f_float_t[Page]: out
117 * */
119 const size_t
120 oidx = get_global_id(0)
121 , iidx = get_global_id(1)
122 , sz = get_global_size(0);
123
124 size_t lidx = 0;
125 clSlpPredict_t v_predict;
126
127 clSlpPredict(v_predict, l_out(&1), r_inp, r_weight, r_bias, iidx, r_isz, oidx, r_osz, ACT_RUN);
129
130 while(++lidx < llsz - 1) {
131 clSlpPredict(v_predict, l_out(&1), l_inp(&1), r_weight, r_bias, iidx, r_isz, oidx, r_osz, ACT_RUN);
132 if(oidx < r_osz && iidx == 0)
133 r_out[oidx] = l_out(&1)[oidx];
135 }
136
137 clSlpPredict(v_predict, r_out, l_inp(&1), r_weight, r_bias, iidx, r_isz, oidx, r_osz, ACT_RUN);
138}
139
140#pragma pack(push, 1)
141
142ae2f_structdef(union, lrlszel_t) {
143 ae2f_float_t m_f;
144 uint32_t m_u;
145};
146
147const ae2f_structdef(struct, lrlsz_t)
148{
149 lrlszel_t m_lsz, m_weight, m_bias;
150};
151
153 sizeof(lrlszel_t) == (sizeof(uint32_t) > sizeof(ae2f_float_t) ? sizeof(uint32_t) : sizeof(ae2f_float_t))
154 ? 1 : -1
155];
156
157typedef char STATIC_ASSERT_LRLSZ_SZ[sizeof(lrlsz_t) == sizeof(lrlszel_t) * 3 ? 1 : -1];
158
159#pragma pack(pop)
160
161/**
162 * @brief loc
163 * ae2f_float_t[lsz - 1][Page]: OutStream
164 * ae2f_float_t[lsz - 1][Page]: DeltaStream
165 * */
166__kernel void kFollow(__global void* glob, __local ae2f_float_t* loc, lrlsz_t lr) {
167#undef lsz
168#undef m_weight
169#undef m_bias
170#define lsz lr.m_lsz.m_u
171#define m_weight m_weight.m_f
172#define m_bias m_bias.m_f
173 if(lsz < 3) {
174 /** ASSERT */
175 return;
176 }
177
178 size_t lidx = llsz - 1;
179 ae2f_float_t v_tmp;
180
181 const size_t
182 oidx = get_global_id(0)
183 , iidx = get_global_id(1)
184 , sz = get_global_size(0);
185
186 if(oidx < r_osz && iidx < r_isz) {
188 r_inp[iidx]
189 , r_delta[oidx]
190 , r_weight
191 , lr.m_weight
192 , r_isz, iidx
193 , r_osz, oidx
194 );
195
196 if(iidx == 0) {
198 r_bias[oidx]
199 , r_delta[oidx]
200 , lr.m_bias
201 );
202 }
203 }
204
208 , r_delta
209 , iidx, r_isz
210 , oidx, r_osz
211 );
212
214 v_tmp
216 , oidx, iidx
217 , r_isz, ACT_DERIV_RUN_THEN
219 );
220
221 /** Needs to be procedural */
223
224
225 while(--lidx) {
226 if(oidx < r_osz && iidx < r_isz) {
228 l_inp()[iidx]
229 , l_delta[oidx]
230 , r_weight
231 , lr.m_weight
232 , r_isz, iidx
233 , r_osz, oidx
234 );
235
236 if(iidx == 0) {
238 r_bias[oidx]
239 , l_delta[oidx]
240 , lr.m_bias
241 );
242 }
243 }
244
248 , l_delta
249 , iidx, r_isz
250 , oidx, r_osz
251 );
252
254 v_tmp
256 , oidx, iidx
257 , r_isz, ACT_DERIV_RUN_THEN
259 );
260
261 /** Needs to be procedural */
263 }
264
265 if(oidx < r_osz && iidx < r_isz) {
267 r_inp[iidx]
268 , l_delta[oidx]
269 , r_weight
270 , lr.m_weight
271 , r_isz, iidx
272 , r_osz, oidx
273 );
274
275 if(iidx == 0) {
277 r_bias[oidx]
278 , l_delta[oidx]
279 , lr.m_bias
280 );
281 }
282 }
283}
284
285/**
286 * @brief loc
287 * ae2f_float_t[lsz - 1][Page]: OutStream
288 * ae2f_float_t[lsz - 1][Page]: DeltaStream
289 * */
290__kernel void kTrainAuto(__global void* glob, __local ae2f_float_t* loc, lrlsz_t lr) {
291
292#undef lsz
293#undef m_weight
294#undef m_bias
295#define lsz lr.m_lsz.m_u
296#define m_weight m_weight.m_f
297#define m_bias m_bias.m_f
298 if(lsz < 3) {
299 /** ASSERT */
300 return;
301 }
302
303 size_t lidx = 0;
304 ae2f_float_t v_tmp, v_tmp1;
305
306 const size_t
307 oidx = get_global_id(0)
308 , iidx = get_global_id(1)
309 , sz = get_global_size(0);
310
311 clSlpPredict_t v_predict;
312
313 if(iidx < r_isz && oidx == 0)
314 l_inp()[iidx] = r_inp[iidx];
315
317
318 for(; lidx < llsz - 1; lidx++) {
319 clSlpPredict(v_predict, l_out(), l_inp(), r_weight, r_bias, iidx, r_isz, oidx, r_osz, ACT_RUN);
321 }
322
323 /** lidx == llsz - 1 */
324 clSlpPredict(v_predict, l_out(), l_inp(), r_weight, r_bias, iidx, r_isz, oidx, r_osz, ACT_RUN);
326
327 if(oidx < r_osz && iidx == 0) {
328 r_out[oidx] = l_out()[oidx];
330 v_tmp
331 , v_tmp1
332 , l_out()
333 , p_goal
334 , ACT_DERIV_RUN
335 , LOSS_DERIV
336 , l_delta[oidx]
337 , oidx
338 , r_osz
339 );
340 }
341
343
344 /** lidx == llsz */
345 ++lidx;
346
347 /** lidx == llsz - 1 */
348 while(--lidx) {
349 if(oidx < r_osz && iidx < r_isz) {
351 l_inp()[iidx]
352 , l_delta[oidx]
353 , r_weight
354 , lr.m_weight
355 , r_isz, iidx
356 , r_osz, oidx
357 );
358
359 if(iidx == 0) {
361 r_bias[oidx]
362 , l_delta[oidx]
363 , lr.m_bias
364 );
365 }
366
370 , l_delta
371 , iidx, r_isz
372 , oidx, r_osz
373 );
374
376 v_tmp
378 , oidx, iidx
379 , r_isz, ACT_DERIV_RUN_THEN
381 );
382 }
383
384 /** Needs to be procedural */
386 }
387
388 /** lidx == 0 */
389 if(oidx < r_osz && iidx < r_isz) {
391 l_inp()[iidx]
392 , l_delta[oidx]
393 , r_weight
394 , lr.m_weight
395 , r_isz, iidx
396 , r_osz, oidx
397 );
398
399 if(iidx == 0) {
401 r_bias[oidx]
402 , l_delta[oidx]
403 , lr.m_bias
404 );
405 }
406 }
407
408#undef lsz
409}
#define ae2f_structdef(key, name)
Definition Cast.h:110
ae2f_float ae2f_float_t
Definition Float.h:38
#define _r_isz(lidx)
Definition Mlp.cl.c:54
#define m_weight
#define _r_osz(lidx)
Definition Mlp.cl.c:55
__kernel void kTrainAuto(__global void *glob, __local ae2f_float_t *loc, lrlsz_t lr)
loc ae2f_float_t[lsz - 1][Page]: OutStream ae2f_float_t[lsz - 1][Page]: DeltaStream
Definition Mlp.cl.c:290
#define p_weight
sizeof(ae2f_float_t) * pgsz_sqr * llsz
Definition Mlp.cl.c:37
#define _r_weight(lidx)
Definition Mlp.cl.c:51
#define ACT(layer_idx, r, x)
Definition Mlp.cl.c:10
#define pgsz
Definition Mlp.cl.c:21
#define ACT_DERIV(layer_idx, r, x)
Definition Mlp.cl.c:14
#define p_outstream
sizeof(ae2f_float_t) * lsz * pgsz
Definition Mlp.cl.c:33
#define r_out
Definition Mlp.cl.c:58
#define pgsz_sqr
Definition Mlp.cl.c:22
__kernel void kPredictStream(__global void *glob, __local ae2f_float_t *loc, const uint32_t lsz)
loc ae2f_float_t[Page]: inp ae2f_float_t[Page]: out
Definition Mlp.cl.c:118
#define llsz
Definition Mlp.cl.c:28
#define m_bias
#define _r_bias(lidx)
Definition Mlp.cl.c:52
#define l_inp(O_R)
Definition Mlp.cl.c:73
#define l_delta
Definition Mlp.cl.c:77
#define r_inp
Definition Mlp.cl.c:57
#define l_out(O_R)
Definition Mlp.cl.c:74
#define l_delta_then
Definition Mlp.cl.c:78
#define r_weight_then
Definition Mlp.cl.c:67
#define _r_out(lidx)
Definition Mlp.cl.c:50
#define p_deltastream
sizeof(ae2f_float_t) * pgsz * llsz
Definition Mlp.cl.c:43
#define p_goal
sizeof(ae2f_float_t) * pgsz
Definition Mlp.cl.c:46
#define p_bias
sizeof(ae2f_float_t) * pgsz * llsz
Definition Mlp.cl.c:40
__kernel void kPredict(__global void *glob, __local ae2f_float_t *loc, const uint32_t lsz)
loc ae2f_float_t[Page]: inp ae2f_float_t[Page]: out
Definition Mlp.cl.c:93
char STATIC_ASSERT_LRLSZEL_SZ[sizeof(lrlszel_t)==(sizeof(uint32_t) > sizeof(ae2f_float_t) ? sizeof(uint32_t) :sizeof(ae2f_float_t)) ? 1 :-1]
Definition Mlp.cl.c:155
#define p_layerszlist
lsz * sizeof(uint32_t)
Definition Mlp.cl.c:31
#define _r_inp(lidx)
Definition Mlp.cl.c:49
#define r_delta
Definition Mlp.cl.c:61
#define _r_delta(lidx)
Definition Mlp.cl.c:53
#define lp_deltastream
Definition Mlp.cl.c:76
#define r_weight
Definition Mlp.cl.c:59
#define r_isz
Definition Mlp.cl.c:62
#define lsz
Definition Mlp.cl.c:25
char STATIC_ASSERT_LRLSZ_SZ[sizeof(lrlsz_t)==sizeof(lrlszel_t) *3 ? 1 :-1]
Definition Mlp.cl.c:157
#define r_osz
Definition Mlp.cl.c:63
#define r_bias
Definition Mlp.cl.c:60
__kernel void kFollow(__global void *glob, __local ae2f_float_t *loc, lrlsz_t lr)
loc ae2f_float_t[lsz - 1][Page]: OutStream ae2f_float_t[lsz - 1][Page]: DeltaStream
Definition Mlp.cl.c:166
#define __global
#define uint32_t
#define __local
#define size_t
#define CLK_ALL_MEM_FENCE
Contains both LOCAL and GLOBAL.
#define __kernel
size_t get_global_id(uint dimindx)
@ CLK_LOCAL_MEM_FENCE
size_t get_global_size(uint dimindx)
void barrier(cl_mem_fence_flags flags)
#define __ae2f_AnnSlpFollowOneW_imp(inp, delta, weight, learningrate, inp_sz, inp_idx, out_sz, out_idx)
Weights.
Definition Slp.auto.h:366
#define __ae2f_AnnSlpFollowOneB_imp(r_bias, delta, learningrate_bias)
Definition Slp.auto.h:385
#define __ae2f_AnnSlpFetchDeltaOne_imp(v_fetchdelta_0, v_fetchdelta_1, out, out_desired, actderiv_opt, lossderiv, retdelta, oidx, osz)
Definition Slp.auto.h:513
#define CAST(t, x)
Definition mac.h:5
#define _clMlpRvrse(v_tmp, r_delta_then, i_oidx, i_iidx, i_isz, i_actderiv_then, i_inp, i_deltaseed)
delta to delta Propagate
Definition Mlp.auto.h:30
#define clMlpGetHD(...)
GetHidDelta Need no structure.
Definition Mlp.auto.h:120
#define clSlpPredict
Definition Slp.auto.h:88
#define clSlpPredict_t
Definition Slp.auto.h:89