4#define ae2f_NEED_CLASS 0
7#include <ae2f/Ann/Slp.core.h>
8#include <ae2f/Ann/Slp.auto.h>
12#define _clAtomAddF_tmpl(a, b, c, d, e) clAtomAddF_tmpl(d, e)
13#define _clAtomAddF(a, b, c, d) clAtomAddF(b, c, d)
16#define _clAtomAddF_t(__global, host_float_t) struct {
19 host_float_t m_fa[MAX(1
, 4
/ sizeof(host_float_t))];
20 uint m_u[(MAX(1
, sizeof(host_float_t) >> 2
))];
24 __global volatile uint* m_pchg;
25 __global volatile host_float_t* m_fp;
28}
36 if(
sizeof((v_mem).m_atom[0].m_f) < 4) {
37 (v_mem).m_count = 4 /
sizeof((v_mem).m_atom[0].m_f);
38 while((v_mem).m_count--) {
39 (v_mem).m_atom[0].m_fa[(v_mem).m_count]
40 = (prm_dst)[(v_mem).m_count]
44 ((v_mem).m_atom)[0].m_f = ((v_mem).m_atom)[1].m_f = *(prm_dst);
47 ((v_mem).m_atom)[1].m_f += (prm_val);
49 (v_mem).m_count =
MAX(1, (
sizeof(((v_mem).m_atom[0].m_f)) >> 2));
50 while((v_mem).m_count--) {
51 (v_mem).m_U0.m_fp = (prm_dst);
52 (v_mem).m_U0.m_pchg += (v_mem).m_count;
55 , (v_mem).m_atom[1].m_u[(v_mem).m_count]
62typedef clAtomAddF_t clSlpPredict_t;
68 __local ae2f_float_t*
const loc,
69 const __global ae2f_float_t*
const p_inp,
70 const __global ae2f_float_t*
const p_weight,
71 const __global ae2f_float_t*
const p_bias,
77 if((oidx) < (osz) && (iidx) < (isz)) {
78 unless((iidx)) (loc)[oidx] = 0;
80 if(
sizeof((v_mem).m_atom[0].m_f) >= 4) {
83 , (p_weight)[(oidx) * (isz) + (iidx)] * (p_inp)[iidx]
87 (v_mem).m_atom[0].m_u[0] =
90 (v_mem).m_atom[1].m_fa[0]
91 = (v_mem).m_atom[1].m_fa[0]
92 + (p_weight)[(oidx) * (isz) + (iidx)] * (p_inp)[iidx]
95 if((oidx) + 1 < (osz))
96 (v_mem).m_atom[1].m_fa[1]
97 = (v_mem).m_atom[1].m_fa[1]
98 + (p_weight)[((oidx) + 1) * (isz) + (iidx)] * (p_inp)[iidx]
106 (loc)[oidx] += (p_bias)[oidx];
107 ACT(&(ret), (loc), oidx, osz);
#define unless(...)
Invokes when condition is false.
#define __ae2f_MACRO_GENERATED
#define _clAtomAddF(__global, v_mem, prm_dst, prm_val)
#define _clAtomAddF_t(__global, host_float_t)