ae2f_docs
Slp.auto.h
1#undef __ae2f_MACRO_GENERATED
2#define __ae2f_MACRO_GENERATED 1
3#ifndef Slp_h
4#define Slp_h
5
6#define ae2f_NEED_CLASS 0
7
8#include "mac.h"
9#undef __ae2f_MACRO_GENERATED
10#define __ae2f_MACRO_GENERATED 1
11#include <ae2f/Ann/Slp.core.h>
12#undef __ae2f_MACRO_GENERATED
13#define __ae2f_MACRO_GENERATED 1
14#include <ae2f/Ann/Slp.auto.h>
15#undef __ae2f_MACRO_GENERATED
16#define __ae2f_MACRO_GENERATED 1
17
18
20#define _clAtomAddF_tmpl(a, b, c, d, e) clAtomAddF_tmpl(d, e)
21#define _clAtomAddF(a, b, c, d) clAtomAddF(b, c, d)
22#endif
23
24#define _clAtomAddF_t(__global, host_float_t) struct {
25 union {
26 host_float_t m_f;
27 host_float_t m_fa[MAX(1, 4 / sizeof(host_float_t))];
28 uint m_u[(MAX(1, sizeof(host_float_t) >> 2))];
29 } m_atom[2];
30 uint m_count;
31 union {
32 __global volatile uint* m_pchg;
33 __global volatile host_float_t* m_fp;
34 intptr_t m_ip;
35 } m_U0; \
36}
37
39typedef _clAtomAddF_t(__global, host_float_t) clAtomAddF_t;
40#endif
41
42#define _clAtomAddF(
43 /** tparam */
44 __global,
45
46 /** param */
47 /* , clAtomAddF_t */ v_mem,
48 /* __global volatilehost_float_t* */ prm_dst,
49 /* ae2f_float_t */ prm_val \
50)\
51{
52 if(sizeof((v_mem).m_atom[0].m_f) < 4) {
53 (v_mem).m_count = 4 / sizeof((v_mem).m_atom[0].m_f);
54 while((v_mem).m_count--) {
55 (v_mem).m_atom[0].m_fa[(v_mem).m_count]
56 = (prm_dst)[(v_mem).m_count]
57 ;
58 }
59 } else {
60 ((v_mem).m_atom)[0].m_f = ((v_mem).m_atom)[1].m_f = *(prm_dst);
61 }
62
63 ((v_mem).m_atom)[1].m_f += (prm_val);
64
65 (v_mem).m_count = MAX(1, (sizeof(((v_mem).m_atom[0].m_f)) >> 2));
66 while((v_mem).m_count--) {
67 (v_mem).m_U0.m_fp = (prm_dst);
68 (v_mem).m_U0.m_pchg += (v_mem).m_count;
69 atom_xchg_u(
70 (v_mem).m_U0.m_pchg
71 , (v_mem).m_atom[1].m_u[(v_mem).m_count]
72 );
73 } \
74}
75
77#define _clSlpPredict_t _clAtomAddF_t
78typedef clAtomAddF_t clSlpPredict_t;
79#endif
80
81#define _clSlpPredict(
82 /** tparam */
83 __global,
84
85 /** param */
86 /* , clSlpPredict_t */ v_mem,
87 /* ae2f_float_t */ ret,
88 /* __localae2f_float_t* const */ loc,
89 /* const __globalae2f_float_t* const */ p_inp,
90 /* const __globalae2f_float_t* const */ p_weight,
91 /* const __globalae2f_float_t* const */ p_bias,
92 /* const size_t */ iidx,
93 /* const size_t */ isz,
94 /* const size_t */ oidx,
95 /* const size_t */ osz,
96 /* ae2f_AnnActFFN_t */ ACT \
97)\
98{
99 if((oidx) < (osz) && (iidx) < (isz)) {
100 unless((iidx)) (loc)[oidx] = 0;
101
102 if(sizeof((v_mem).m_atom[0].m_f) >= 4) {
103 _clAtomAddF(__global
104 , v_mem, &(loc)[oidx]
105 , (p_weight)[(oidx) * (isz) + (iidx)] * (p_inp)[iidx]
106 );
107 } else {
108 unless((oidx) & 1) {
109 (v_mem).m_atom[0].m_u[0] =
110 CAST(__global uint*, loc)[0];
111
112 (v_mem).m_atom[1].m_fa[0]
113 = (v_mem).m_atom[1].m_fa[0]
114 + (p_weight)[(oidx) * (isz) + (iidx)] * (p_inp)[iidx]
115 ;
116
117 if((oidx) + 1 < (osz))
118 (v_mem).m_atom[1].m_fa[1]
119 = (v_mem).m_atom[1].m_fa[1]
120 + (p_weight)[((oidx) + 1) * (isz) + (iidx)] * (p_inp)[iidx]
121 ;
122
123 atom_xchg_u(CAST(__global uint*, loc), (v_mem).m_atom[1].m_u[0]);
124 }
125 }
126
127 unless(iidx) {
128 (loc)[oidx] += (p_bias)[oidx];
129 ACT(&(ret), (loc), oidx, osz);
130 }
131 }
132 \
133}
134
135
136#endif
137
138#undef __ae2f_MACRO_GENERATED
139
140#define __ae2f_MACRO_GENERATED 0
#define ae2f_structdef(key, name)
Definition Cast.h:110
#define unless(...)
Invokes when condition is false.
Definition Cast.h:103
#define __ae2f_MACRO_GENERATED
Definition Conv.auto.h:2
#define _r_isz(lidx)
Definition Mlp.cl.c:62
#define m_weight
Definition Mlp.cl.c:187
#define _r_osz(lidx)
Definition Mlp.cl.c:63
#define _r_weight(lidx)
Definition Mlp.cl.c:59
#define pgsz
Definition Mlp.cl.c:28
#define ACT(layer_idx, r, y, i, c)
Definition Mlp.cl.c:17
#define tmp1
#define p_outstream
Definition Mlp.cl.c:42
#define r_out
Definition Mlp.cl.c:66
#define pgsz_sqr
Definition Mlp.cl.c:31
#define llsz
Definition Mlp.cl.c:37
#define ACT_DERIV(layer_idx, r, y, i, c)
Definition Mlp.cl.c:21
#define m_bias
Definition Mlp.cl.c:188
#define _r_bias(lidx)
Definition Mlp.cl.c:60
#define l_inp(O_R)
Definition Mlp.cl.c:82
#define weightsz
Definition Mlp.cl.c:30
#define l_delta
Definition Mlp.cl.c:86
#define r_inp
Definition Mlp.cl.c:65
#define l_out(O_R)
Definition Mlp.cl.c:83
#define l_delta_then
Definition Mlp.cl.c:87
#define r_weight_then
Definition Mlp.cl.c:75
#define loc
Definition Mlp.cl.c:81
#define _r_out(lidx)
Definition Mlp.cl.c:58
#define p_weight
Definition Mlp.cl.c:46
#define p_deltastream
Definition Mlp.cl.c:52
#define p_goal
Definition Mlp.cl.c:55
#define p_layerszlist
Definition Mlp.cl.c:40
#define _r_inp(lidx)
Definition Mlp.cl.c:57
#define r_delta
Definition Mlp.cl.c:69
#define p_bias
Definition Mlp.cl.c:49
#define _r_delta(lidx)
Definition Mlp.cl.c:61
#define lp_deltastream
Definition Mlp.cl.c:85
#define r_weight
Definition Mlp.cl.c:67
#define r_isz
Definition Mlp.cl.c:70
#define lsz
Definition Mlp.cl.c:34
#define r_osz
Definition Mlp.cl.c:71
#define r_bias
Definition Mlp.cl.c:68
#define __global
Definition addrspec.h:8
#define __local
Definition addrspec.h:10
#define __kernel
Definition addrspec.h:7
#define __ae2f_AnnSlpFetchDeltaOne_imp(rret, ptr_tmp0, ptr_tmp1, prm_out, prm_out_desired, prm_oidx, prm_osz, fn_actderiv, fn_lossderiv)
Definition Slp.auto.h:528
#define __ae2f_AnnSlpFollowOneW_imp(inp, delta, weight, learningrate, inp_sz, inp_idx, out_sz, out_idx)
Definition Slp.auto.h:381
#define __ae2f_AnnSlpFollowOneB_imp(rret_bias, prm_delta, prm_learningrate_bias)
Definition Slp.auto.h:400
#define size_t
Definition mac.h:20
#define CAST(t, x)
Definition mac.h:16
#define host_float_t
Definition mac.h:9
#define MAX(a, b)
Definition mac.h:18
#define uint
Definition sclr.h:11
#define _clMlpGetHD1_t
Definition Mlp.auto.h:43
#define _clMlpGetHD1(__global, v_mem, r_delta_then, i_weight, i_delta, i_iidx, i_isz, i_oidx, i_osz)
Definition Mlp.auto.h:49
#define _clMlpRvrse(v_tmp, r_delta_then, i_oidx, i_iidx, i_isz, i_actderiv_then, i_inp, i_deltaseed)
Definition Mlp.auto.h:21
#define _clAtomAddF(__global, v_mem, prm_dst, prm_val)
Definition Slp.auto.h:42
#define _clAtomAddF_t(__global, host_float_t)
Definition Slp.auto.h:24
#define _clSlpPredict(__global, v_mem, ret, loc, p_inp, p_weight, p_bias, iidx, isz, oidx, osz, ACT)
Definition Slp.auto.h:81
Definition Mlp.cl.c:97