xref: /aosp_15_r20/external/clpeak/src/kernels/compute_int24_kernels.cl (revision 1cd03ba3888297bc945f2c84574e105e3ced3e34)
1MSTRINGIFY(
2
3// Avoiding auto-vectorize by using vector-width locked dependent code
4
5\n#undef MAD_4INT
6\n#undef MAD_16INT
7\n#undef MAD_64INT
8\n
9\n#define MAD_4INT(x, y)  x = mad24(y,x,y);   y = mad24(x,y,x);   x = mad24(y,x,y);   y = mad24(x,y,x);
10\n#define MAD_16INT(x, y) MAD_4INT(x, y);     MAD_4INT(x, y);     MAD_4INT(x, y);     MAD_4INT(x, y);
11\n#define MAD_64INT(x, y) MAD_16INT(x, y);    MAD_16INT(x, y);    MAD_16INT(x, y);    MAD_16INT(x, y);
12\n
13
14__kernel void compute_intfast_v1(__global int *ptr, int _A)
15{
16    int x = _A;
17    int y = (int)get_local_id(0);
18
19    for(int i=0; i<64; i++)
20    {
21        MAD_16INT(x, y);
22    }
23
24    ptr[get_global_id(0)] = y;
25}
26
27
28__kernel void compute_intfast_v2(__global int *ptr, int _A)
29{
30    int2 x = (int2)(_A, (_A+1));
31    int2 y = (int2)get_local_id(0);
32
33    for(int i=0; i<32; i++)
34    {
35        MAD_16INT(x, y);
36    }
37
38    ptr[get_global_id(0)] = (y.S0) + (y.S1);
39}
40
41__kernel void compute_intfast_v4(__global int *ptr, int _A)
42{
43    int4 x = (int4)(_A, (_A+1), (_A+2), (_A+3));
44    int4 y = (int4)get_local_id(0);
45
46    for(int i=0; i<16; i++)
47    {
48        MAD_16INT(x, y);
49    }
50
51    ptr[get_global_id(0)] = (y.S0) + (y.S1) + (y.S2) + (y.S3);
52}
53
54
55__kernel void compute_intfast_v8(__global int *ptr, int _A)
56{
57    int8 x = (int8)(_A, (_A+1), (_A+2), (_A+3), (_A+4), (_A+5), (_A+6), (_A+7));
58    int8 y = (int8)get_local_id(0);
59
60    for(int i=0; i<8; i++)
61    {
62        MAD_16INT(x, y);
63    }
64
65    ptr[get_global_id(0)] = (y.S0) + (y.S1) + (y.S2) + (y.S3) + (y.S4) + (y.S5) + (y.S6) + (y.S7);
66}
67
68__kernel void compute_intfast_v16(__global int *ptr, int _A)
69{
70    int16 x = (int16)(_A, (_A+1), (_A+2), (_A+3), (_A+4), (_A+5), (_A+6), (_A+7),
71                    (_A+8), (_A+9), (_A+10), (_A+11), (_A+12), (_A+13), (_A+14), (_A+15));
72    int16 y = (int16)get_local_id(0);
73
74    for(int i=0; i<4; i++)
75    {
76        MAD_16INT(x, y);
77    }
78
79    int2 t = (y.S01) + (y.S23) + (y.S45) + (y.S67) + (y.S89) + (y.SAB) + (y.SCD) + (y.SEF);
80    ptr[get_global_id(0)] = t.S0 + t.S1;
81}
82
83
84)
85