xref: /aosp_15_r20/external/clpeak/src/kernels/compute_hp_kernels.cl (revision 1cd03ba3888297bc945f2c84574e105e3ced3e34)
1MSTRINGIFY(
2
3// Stringifying requires a new line after hash defines
4
5\n#if defined(cl_khr_fp16)
6\n  #pragma OPENCL EXTENSION cl_khr_fp16 : enable
7\n  #define HALF_AVAILABLE
8\n#endif
9
10\n#undef MAD_4
11\n#undef MAD_16
12\n#undef MAD_64
13\n
14\n#define MAD_4(x, y)     x = mad(y, x, y);   y = mad(x, y, x);   x = mad(y, x, y);   y = mad(x, y, x);
15\n#define MAD_16(x, y)    MAD_4(x, y);        MAD_4(x, y);        MAD_4(x, y);        MAD_4(x, y);
16\n#define MAD_64(x, y)    MAD_16(x, y);       MAD_16(x, y);       MAD_16(x, y);       MAD_16(x, y);
17\n
18
19\n
20\n#ifdef HALF_AVAILABLE
21\n
22
23
24__kernel void compute_hp_v1(__global half *ptr, float _B)
25{
26    half _A = (half)_B;
27    half x = _A;
28    half y = (half)get_local_id(0);
29
30    for(int i=0; i<128; i++)
31    {
32        MAD_16(x, y);
33    }
34
35    ptr[get_global_id(0)] = y;
36}
37
38
39__kernel void compute_hp_v2(__global half *ptr, float _B)
40{
41    half _A = (half)_B;
42    half2 x = (half2)(_A, (_A+1));
43    half2 y = (half2)get_local_id(0);
44
45    for(int i=0; i<64; i++)
46    {
47        MAD_16(x, y);
48    }
49
50    ptr[get_global_id(0)] = (y.S0) + (y.S1);
51}
52
53__kernel void compute_hp_v4(__global half *ptr, float _B)
54{
55    half _A = (half)_B;
56    half4 x = (half4)(_A, (_A+1), (_A+2), (_A+3));
57    half4 y = (half4)get_local_id(0);
58
59    for(int i=0; i<32; i++)
60    {
61        MAD_16(x, y);
62    }
63
64    ptr[get_global_id(0)] = (y.S0) + (y.S1) + (y.S2) + (y.S3);
65}
66
67
68__kernel void compute_hp_v8(__global half *ptr, float _B)
69{
70    half _A = (half)_B;
71    half8 x = (half8)(_A, (_A+1), (_A+2), (_A+3), (_A+4), (_A+5), (_A+6), (_A+7));
72    half8 y = (half8)get_local_id(0);
73
74    for(int i=0; i<16; i++)
75    {
76        MAD_16(x, y);
77    }
78
79    ptr[get_global_id(0)] = (y.S0) + (y.S1) + (y.S2) + (y.S3) + (y.S4) + (y.S5) + (y.S6) + (y.S7);
80}
81
82__kernel void compute_hp_v16(__global half *ptr, float _B)
83{
84    half _A = (half)_B;
85    half16 x = (half16)(_A, (_A+1), (_A+2), (_A+3), (_A+4), (_A+5), (_A+6), (_A+7),
86                    (_A+8), (_A+9), (_A+10), (_A+11), (_A+12), (_A+13), (_A+14), (_A+15));
87    half16 y = (half16)get_local_id(0);
88
89    for(int i=0; i<8; i++)
90    {
91        MAD_16(x, y);
92    }
93
94    half2 t = (y.S01) + (y.S23) + (y.S45) + (y.S67) + (y.S89) + (y.SAB) + (y.SCD) + (y.SEF);
95    ptr[get_global_id(0)] = t.S0 + t.S1;
96}
97
98\n
99\n#endif      // half_AVAILABLE
100\n
101
102)
103