xref: /aosp_15_r20/external/clpeak/src/kernels/compute_dp_kernels.cl (revision 1cd03ba3888297bc945f2c84574e105e3ced3e34)
1MSTRINGIFY(
2
3// Stringifying requires a new line after hash defines
4
5\n#if defined(cl_khr_fp64)
6\n  #pragma OPENCL EXTENSION cl_khr_fp64 : enable
7\n  #define DOUBLE_AVAILABLE
8\n#elif defined(cl_amd_fp64)
9\n  #pragma OPENCL EXTENSION cl_amd_fp64 : enable
10\n  #define DOUBLE_AVAILABLE
11\n#endif
12
13\n#undef MAD_4
14\n#undef MAD_16
15\n#undef MAD_64
16\n
17\n#define MAD_4(x, y)     x = mad(y, x, y);   y = mad(x, y, x);   x = mad(y, x, y);   y = mad(x, y, x);
18\n#define MAD_16(x, y)    MAD_4(x, y);        MAD_4(x, y);        MAD_4(x, y);        MAD_4(x, y);
19\n#define MAD_64(x, y)    MAD_16(x, y);       MAD_16(x, y);       MAD_16(x, y);       MAD_16(x, y);
20\n
21
22\n
23\n#ifdef DOUBLE_AVAILABLE
24\n
25
26
27__kernel void compute_dp_v1(__global double *ptr, double _A)
28{
29    double x = _A;
30    double y = (double)get_local_id(0);
31
32    for(int i=0; i<128; i++)
33    {
34        MAD_16(x, y);
35    }
36
37    ptr[get_global_id(0)] = y;
38}
39
40
41__kernel void compute_dp_v2(__global double *ptr, double _A)
42{
43    double2 x = (double2)(_A, (_A+1));
44    double2 y = (double2)get_local_id(0);
45
46    for(int i=0; i<64; i++)
47    {
48        MAD_16(x, y);
49    }
50
51    ptr[get_global_id(0)] = (y.S0) + (y.S1);
52}
53
54__kernel void compute_dp_v4(__global double *ptr, double _A)
55{
56    double4 x = (double4)(_A, (_A+1), (_A+2), (_A+3));
57    double4 y = (double4)get_local_id(0);
58
59    for(int i=0; i<32; i++)
60    {
61        MAD_16(x, y);
62    }
63
64    ptr[get_global_id(0)] = (y.S0) + (y.S1) + (y.S2) + (y.S3);
65}
66
67
68__kernel void compute_dp_v8(__global double *ptr, double _A)
69{
70    double8 x = (double8)(_A, (_A+1), (_A+2), (_A+3), (_A+4), (_A+5), (_A+6), (_A+7));
71    double8 y = (double8)get_local_id(0);
72
73    for(int i=0; i<16; i++)
74    {
75        MAD_16(x, y);
76    }
77
78    ptr[get_global_id(0)] = (y.S0) + (y.S1) + (y.S2) + (y.S3) + (y.S4) + (y.S5) + (y.S6) + (y.S7);
79}
80
81__kernel void compute_dp_v16(__global double *ptr, double _A)
82{
83    double16 x = (double16)(_A, (_A+1), (_A+2), (_A+3), (_A+4), (_A+5), (_A+6), (_A+7),
84                    (_A+8), (_A+9), (_A+10), (_A+11), (_A+12), (_A+13), (_A+14), (_A+15));
85    double16 y = (double16)get_local_id(0);
86
87    for(int i=0; i<8; i++)
88    {
89        MAD_16(x, y);
90    }
91
92    double2 t = (y.S01) + (y.S23) + (y.S45) + (y.S67) + (y.S89) + (y.SAB) + (y.SCD) + (y.SEF);
93    ptr[get_global_id(0)] = t.S0 + t.S1;
94}
95
96\n
97\n#endif      // DOUBLE_AVAILABLE
98\n
99
100)
101