1MSTRINGIFY( 2 3// Stringifying requires a new line after hash defines 4 5\n#if defined(cl_khr_fp64) 6\n #pragma OPENCL EXTENSION cl_khr_fp64 : enable 7\n #define DOUBLE_AVAILABLE 8\n#elif defined(cl_amd_fp64) 9\n #pragma OPENCL EXTENSION cl_amd_fp64 : enable 10\n #define DOUBLE_AVAILABLE 11\n#endif 12 13\n#undef MAD_4 14\n#undef MAD_16 15\n#undef MAD_64 16\n 17\n#define MAD_4(x, y) x = mad(y, x, y); y = mad(x, y, x); x = mad(y, x, y); y = mad(x, y, x); 18\n#define MAD_16(x, y) MAD_4(x, y); MAD_4(x, y); MAD_4(x, y); MAD_4(x, y); 19\n#define MAD_64(x, y) MAD_16(x, y); MAD_16(x, y); MAD_16(x, y); MAD_16(x, y); 20\n 21 22\n 23\n#ifdef DOUBLE_AVAILABLE 24\n 25 26 27__kernel void compute_dp_v1(__global double *ptr, double _A) 28{ 29 double x = _A; 30 double y = (double)get_local_id(0); 31 32 for(int i=0; i<128; i++) 33 { 34 MAD_16(x, y); 35 } 36 37 ptr[get_global_id(0)] = y; 38} 39 40 41__kernel void compute_dp_v2(__global double *ptr, double _A) 42{ 43 double2 x = (double2)(_A, (_A+1)); 44 double2 y = (double2)get_local_id(0); 45 46 for(int i=0; i<64; i++) 47 { 48 MAD_16(x, y); 49 } 50 51 ptr[get_global_id(0)] = (y.S0) + (y.S1); 52} 53 54__kernel void compute_dp_v4(__global double *ptr, double _A) 55{ 56 double4 x = (double4)(_A, (_A+1), (_A+2), (_A+3)); 57 double4 y = (double4)get_local_id(0); 58 59 for(int i=0; i<32; i++) 60 { 61 MAD_16(x, y); 62 } 63 64 ptr[get_global_id(0)] = (y.S0) + (y.S1) + (y.S2) + (y.S3); 65} 66 67 68__kernel void compute_dp_v8(__global double *ptr, double _A) 69{ 70 double8 x = (double8)(_A, (_A+1), (_A+2), (_A+3), (_A+4), (_A+5), (_A+6), (_A+7)); 71 double8 y = (double8)get_local_id(0); 72 73 for(int i=0; i<16; i++) 74 { 75 MAD_16(x, y); 76 } 77 78 ptr[get_global_id(0)] = (y.S0) + (y.S1) + (y.S2) + (y.S3) + (y.S4) + (y.S5) + (y.S6) + (y.S7); 79} 80 81__kernel void compute_dp_v16(__global double *ptr, double _A) 82{ 83 double16 x = (double16)(_A, (_A+1), (_A+2), (_A+3), (_A+4), (_A+5), (_A+6), (_A+7), 84 (_A+8), (_A+9), (_A+10), (_A+11), (_A+12), (_A+13), (_A+14), (_A+15)); 85 double16 y = (double16)get_local_id(0); 86 87 for(int i=0; i<8; i++) 88 { 89 MAD_16(x, y); 90 } 91 92 double2 t = (y.S01) + (y.S23) + (y.S45) + (y.S67) + (y.S89) + (y.SAB) + (y.SCD) + (y.SEF); 93 ptr[get_global_id(0)] = t.S0 + t.S1; 94} 95 96\n 97\n#endif // DOUBLE_AVAILABLE 98\n 99 100) 101