1MSTRINGIFY( 2 3// Stringifying requires a new line after hash defines 4 5\n#if defined(cl_khr_fp16) 6\n #pragma OPENCL EXTENSION cl_khr_fp16 : enable 7\n #define HALF_AVAILABLE 8\n#endif 9 10\n#undef MAD_4 11\n#undef MAD_16 12\n#undef MAD_64 13\n 14\n#define MAD_4(x, y) x = mad(y, x, y); y = mad(x, y, x); x = mad(y, x, y); y = mad(x, y, x); 15\n#define MAD_16(x, y) MAD_4(x, y); MAD_4(x, y); MAD_4(x, y); MAD_4(x, y); 16\n#define MAD_64(x, y) MAD_16(x, y); MAD_16(x, y); MAD_16(x, y); MAD_16(x, y); 17\n 18 19\n 20\n#ifdef HALF_AVAILABLE 21\n 22 23 24__kernel void compute_hp_v1(__global half *ptr, float _B) 25{ 26 half _A = (half)_B; 27 half x = _A; 28 half y = (half)get_local_id(0); 29 30 for(int i=0; i<128; i++) 31 { 32 MAD_16(x, y); 33 } 34 35 ptr[get_global_id(0)] = y; 36} 37 38 39__kernel void compute_hp_v2(__global half *ptr, float _B) 40{ 41 half _A = (half)_B; 42 half2 x = (half2)(_A, (_A+1)); 43 half2 y = (half2)get_local_id(0); 44 45 for(int i=0; i<64; i++) 46 { 47 MAD_16(x, y); 48 } 49 50 ptr[get_global_id(0)] = (y.S0) + (y.S1); 51} 52 53__kernel void compute_hp_v4(__global half *ptr, float _B) 54{ 55 half _A = (half)_B; 56 half4 x = (half4)(_A, (_A+1), (_A+2), (_A+3)); 57 half4 y = (half4)get_local_id(0); 58 59 for(int i=0; i<32; i++) 60 { 61 MAD_16(x, y); 62 } 63 64 ptr[get_global_id(0)] = (y.S0) + (y.S1) + (y.S2) + (y.S3); 65} 66 67 68__kernel void compute_hp_v8(__global half *ptr, float _B) 69{ 70 half _A = (half)_B; 71 half8 x = (half8)(_A, (_A+1), (_A+2), (_A+3), (_A+4), (_A+5), (_A+6), (_A+7)); 72 half8 y = (half8)get_local_id(0); 73 74 for(int i=0; i<16; i++) 75 { 76 MAD_16(x, y); 77 } 78 79 ptr[get_global_id(0)] = (y.S0) + (y.S1) + (y.S2) + (y.S3) + (y.S4) + (y.S5) + (y.S6) + (y.S7); 80} 81 82__kernel void compute_hp_v16(__global half *ptr, float _B) 83{ 84 half _A = (half)_B; 85 half16 x = (half16)(_A, (_A+1), (_A+2), (_A+3), (_A+4), (_A+5), (_A+6), (_A+7), 86 (_A+8), (_A+9), (_A+10), (_A+11), (_A+12), (_A+13), (_A+14), (_A+15)); 87 half16 y = (half16)get_local_id(0); 88 89 for(int i=0; i<8; i++) 90 { 91 MAD_16(x, y); 92 } 93 94 half2 t = (y.S01) + (y.S23) + (y.S45) + (y.S67) + (y.S89) + (y.SAB) + (y.SCD) + (y.SEF); 95 ptr[get_global_id(0)] = t.S0 + t.S1; 96} 97 98\n 99\n#endif // half_AVAILABLE 100\n 101 102) 103