xref: /aosp_15_r20/external/clpeak/src/kernels/compute_hp_kernels.cl (revision 1cd03ba3888297bc945f2c84574e105e3ced3e34)
1*1cd03ba3SJeremy KempMSTRINGIFY(
2*1cd03ba3SJeremy Kemp
3*1cd03ba3SJeremy Kemp// Stringifying requires a new line after hash defines
4*1cd03ba3SJeremy Kemp
5*1cd03ba3SJeremy Kemp\n#if defined(cl_khr_fp16)
6*1cd03ba3SJeremy Kemp\n  #pragma OPENCL EXTENSION cl_khr_fp16 : enable
7*1cd03ba3SJeremy Kemp\n  #define HALF_AVAILABLE
8*1cd03ba3SJeremy Kemp\n#endif
9*1cd03ba3SJeremy Kemp
10*1cd03ba3SJeremy Kemp\n#undef MAD_4
11*1cd03ba3SJeremy Kemp\n#undef MAD_16
12*1cd03ba3SJeremy Kemp\n#undef MAD_64
13*1cd03ba3SJeremy Kemp\n
14*1cd03ba3SJeremy Kemp\n#define MAD_4(x, y)     x = mad(y, x, y);   y = mad(x, y, x);   x = mad(y, x, y);   y = mad(x, y, x);
15*1cd03ba3SJeremy Kemp\n#define MAD_16(x, y)    MAD_4(x, y);        MAD_4(x, y);        MAD_4(x, y);        MAD_4(x, y);
16*1cd03ba3SJeremy Kemp\n#define MAD_64(x, y)    MAD_16(x, y);       MAD_16(x, y);       MAD_16(x, y);       MAD_16(x, y);
17*1cd03ba3SJeremy Kemp\n
18*1cd03ba3SJeremy Kemp
19*1cd03ba3SJeremy Kemp\n
20*1cd03ba3SJeremy Kemp\n#ifdef HALF_AVAILABLE
21*1cd03ba3SJeremy Kemp\n
22*1cd03ba3SJeremy Kemp
23*1cd03ba3SJeremy Kemp
24*1cd03ba3SJeremy Kemp__kernel void compute_hp_v1(__global half *ptr, float _B)
25*1cd03ba3SJeremy Kemp{
26*1cd03ba3SJeremy Kemp    half _A = (half)_B;
27*1cd03ba3SJeremy Kemp    half x = _A;
28*1cd03ba3SJeremy Kemp    half y = (half)get_local_id(0);
29*1cd03ba3SJeremy Kemp
30*1cd03ba3SJeremy Kemp    for(int i=0; i<128; i++)
31*1cd03ba3SJeremy Kemp    {
32*1cd03ba3SJeremy Kemp        MAD_16(x, y);
33*1cd03ba3SJeremy Kemp    }
34*1cd03ba3SJeremy Kemp
35*1cd03ba3SJeremy Kemp    ptr[get_global_id(0)] = y;
36*1cd03ba3SJeremy Kemp}
37*1cd03ba3SJeremy Kemp
38*1cd03ba3SJeremy Kemp
39*1cd03ba3SJeremy Kemp__kernel void compute_hp_v2(__global half *ptr, float _B)
40*1cd03ba3SJeremy Kemp{
41*1cd03ba3SJeremy Kemp    half _A = (half)_B;
42*1cd03ba3SJeremy Kemp    half2 x = (half2)(_A, (_A+1));
43*1cd03ba3SJeremy Kemp    half2 y = (half2)get_local_id(0);
44*1cd03ba3SJeremy Kemp
45*1cd03ba3SJeremy Kemp    for(int i=0; i<64; i++)
46*1cd03ba3SJeremy Kemp    {
47*1cd03ba3SJeremy Kemp        MAD_16(x, y);
48*1cd03ba3SJeremy Kemp    }
49*1cd03ba3SJeremy Kemp
50*1cd03ba3SJeremy Kemp    ptr[get_global_id(0)] = (y.S0) + (y.S1);
51*1cd03ba3SJeremy Kemp}
52*1cd03ba3SJeremy Kemp
53*1cd03ba3SJeremy Kemp__kernel void compute_hp_v4(__global half *ptr, float _B)
54*1cd03ba3SJeremy Kemp{
55*1cd03ba3SJeremy Kemp    half _A = (half)_B;
56*1cd03ba3SJeremy Kemp    half4 x = (half4)(_A, (_A+1), (_A+2), (_A+3));
57*1cd03ba3SJeremy Kemp    half4 y = (half4)get_local_id(0);
58*1cd03ba3SJeremy Kemp
59*1cd03ba3SJeremy Kemp    for(int i=0; i<32; i++)
60*1cd03ba3SJeremy Kemp    {
61*1cd03ba3SJeremy Kemp        MAD_16(x, y);
62*1cd03ba3SJeremy Kemp    }
63*1cd03ba3SJeremy Kemp
64*1cd03ba3SJeremy Kemp    ptr[get_global_id(0)] = (y.S0) + (y.S1) + (y.S2) + (y.S3);
65*1cd03ba3SJeremy Kemp}
66*1cd03ba3SJeremy Kemp
67*1cd03ba3SJeremy Kemp
68*1cd03ba3SJeremy Kemp__kernel void compute_hp_v8(__global half *ptr, float _B)
69*1cd03ba3SJeremy Kemp{
70*1cd03ba3SJeremy Kemp    half _A = (half)_B;
71*1cd03ba3SJeremy Kemp    half8 x = (half8)(_A, (_A+1), (_A+2), (_A+3), (_A+4), (_A+5), (_A+6), (_A+7));
72*1cd03ba3SJeremy Kemp    half8 y = (half8)get_local_id(0);
73*1cd03ba3SJeremy Kemp
74*1cd03ba3SJeremy Kemp    for(int i=0; i<16; i++)
75*1cd03ba3SJeremy Kemp    {
76*1cd03ba3SJeremy Kemp        MAD_16(x, y);
77*1cd03ba3SJeremy Kemp    }
78*1cd03ba3SJeremy Kemp
79*1cd03ba3SJeremy Kemp    ptr[get_global_id(0)] = (y.S0) + (y.S1) + (y.S2) + (y.S3) + (y.S4) + (y.S5) + (y.S6) + (y.S7);
80*1cd03ba3SJeremy Kemp}
81*1cd03ba3SJeremy Kemp
82*1cd03ba3SJeremy Kemp__kernel void compute_hp_v16(__global half *ptr, float _B)
83*1cd03ba3SJeremy Kemp{
84*1cd03ba3SJeremy Kemp    half _A = (half)_B;
85*1cd03ba3SJeremy Kemp    half16 x = (half16)(_A, (_A+1), (_A+2), (_A+3), (_A+4), (_A+5), (_A+6), (_A+7),
86*1cd03ba3SJeremy Kemp                    (_A+8), (_A+9), (_A+10), (_A+11), (_A+12), (_A+13), (_A+14), (_A+15));
87*1cd03ba3SJeremy Kemp    half16 y = (half16)get_local_id(0);
88*1cd03ba3SJeremy Kemp
89*1cd03ba3SJeremy Kemp    for(int i=0; i<8; i++)
90*1cd03ba3SJeremy Kemp    {
91*1cd03ba3SJeremy Kemp        MAD_16(x, y);
92*1cd03ba3SJeremy Kemp    }
93*1cd03ba3SJeremy Kemp
94*1cd03ba3SJeremy Kemp    half2 t = (y.S01) + (y.S23) + (y.S45) + (y.S67) + (y.S89) + (y.SAB) + (y.SCD) + (y.SEF);
95*1cd03ba3SJeremy Kemp    ptr[get_global_id(0)] = t.S0 + t.S1;
96*1cd03ba3SJeremy Kemp}
97*1cd03ba3SJeremy Kemp
98*1cd03ba3SJeremy Kemp\n
99*1cd03ba3SJeremy Kemp\n#endif      // half_AVAILABLE
100*1cd03ba3SJeremy Kemp\n
101*1cd03ba3SJeremy Kemp
102*1cd03ba3SJeremy Kemp)
103