1*6467f958SSadaf Ebrahimi //
2*6467f958SSadaf Ebrahimi // Copyright (c) 2017 The Khronos Group Inc.
3*6467f958SSadaf Ebrahimi //
4*6467f958SSadaf Ebrahimi // Licensed under the Apache License, Version 2.0 (the "License");
5*6467f958SSadaf Ebrahimi // you may not use this file except in compliance with the License.
6*6467f958SSadaf Ebrahimi // You may obtain a copy of the License at
7*6467f958SSadaf Ebrahimi //
8*6467f958SSadaf Ebrahimi // http://www.apache.org/licenses/LICENSE-2.0
9*6467f958SSadaf Ebrahimi //
10*6467f958SSadaf Ebrahimi // Unless required by applicable law or agreed to in writing, software
11*6467f958SSadaf Ebrahimi // distributed under the License is distributed on an "AS IS" BASIS,
12*6467f958SSadaf Ebrahimi // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13*6467f958SSadaf Ebrahimi // See the License for the specific language governing permissions and
14*6467f958SSadaf Ebrahimi // limitations under the License.
15*6467f958SSadaf Ebrahimi //
16*6467f958SSadaf Ebrahimi #include "harness/compat.h"
17*6467f958SSadaf Ebrahimi
18*6467f958SSadaf Ebrahimi #include <string.h>
19*6467f958SSadaf Ebrahimi #include <stdio.h>
20*6467f958SSadaf Ebrahimi
21*6467f958SSadaf Ebrahimi #if !defined(_WIN32)
22*6467f958SSadaf Ebrahimi #include <libgen.h>
23*6467f958SSadaf Ebrahimi #include <sys/param.h>
24*6467f958SSadaf Ebrahimi #endif
25*6467f958SSadaf Ebrahimi
26*6467f958SSadaf Ebrahimi #include "mingw_compat.h"
27*6467f958SSadaf Ebrahimi #if defined (__MINGW32__)
28*6467f958SSadaf Ebrahimi #include <sys/param.h>
29*6467f958SSadaf Ebrahimi #endif
30*6467f958SSadaf Ebrahimi
31*6467f958SSadaf Ebrahimi #include <time.h>
32*6467f958SSadaf Ebrahimi #include "errorHelpers.h"
33*6467f958SSadaf Ebrahimi #include "harness/compat.h"
34*6467f958SSadaf Ebrahimi #include "harness/mt19937.h"
35*6467f958SSadaf Ebrahimi #include "harness/kernelHelpers.h"
36*6467f958SSadaf Ebrahimi #include "harness/rounding_mode.h"
37*6467f958SSadaf Ebrahimi #include "harness/fpcontrol.h"
38*6467f958SSadaf Ebrahimi #include "harness/testHarness.h"
39*6467f958SSadaf Ebrahimi #include "harness/parseParameters.h"
40*6467f958SSadaf Ebrahimi #if defined( __APPLE__ )
41*6467f958SSadaf Ebrahimi #include <sys/sysctl.h>
42*6467f958SSadaf Ebrahimi #endif
43*6467f958SSadaf Ebrahimi #if defined( __linux__ )
44*6467f958SSadaf Ebrahimi #include <unistd.h>
45*6467f958SSadaf Ebrahimi #include <sys/syscall.h>
46*6467f958SSadaf Ebrahimi #include <linux/sysctl.h>
47*6467f958SSadaf Ebrahimi #endif
48*6467f958SSadaf Ebrahimi
49*6467f958SSadaf Ebrahimi #if defined (_WIN32)
50*6467f958SSadaf Ebrahimi #include <string.h>
51*6467f958SSadaf Ebrahimi #endif
52*6467f958SSadaf Ebrahimi
53*6467f958SSadaf Ebrahimi #if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))
54*6467f958SSadaf Ebrahimi #include <emmintrin.h>
55*6467f958SSadaf Ebrahimi #endif
56*6467f958SSadaf Ebrahimi
57*6467f958SSadaf Ebrahimi #if defined(__PPC__)
58*6467f958SSadaf Ebrahimi // Global varaiable used to hold the FPU control register state. The FPSCR register can not
59*6467f958SSadaf Ebrahimi // be used because not all Power implementations retain or observed the NI (non-IEEE
60*6467f958SSadaf Ebrahimi // mode) bit.
61*6467f958SSadaf Ebrahimi __thread fpu_control_t fpu_control = 0;
62*6467f958SSadaf Ebrahimi #endif
63*6467f958SSadaf Ebrahimi
64*6467f958SSadaf Ebrahimi #ifndef MAXPATHLEN
65*6467f958SSadaf Ebrahimi #define MAXPATHLEN 2048
66*6467f958SSadaf Ebrahimi #endif
67*6467f958SSadaf Ebrahimi
68*6467f958SSadaf Ebrahimi char appName[ MAXPATHLEN ] = "";
69*6467f958SSadaf Ebrahimi cl_context gContext = NULL;
70*6467f958SSadaf Ebrahimi cl_command_queue gQueue = NULL;
71*6467f958SSadaf Ebrahimi cl_program gProgram[5] = { NULL, NULL, NULL, NULL, NULL };
72*6467f958SSadaf Ebrahimi cl_program gProgram_double[5] = { NULL, NULL, NULL, NULL, NULL };
73*6467f958SSadaf Ebrahimi int gForceFTZ = 0;
74*6467f958SSadaf Ebrahimi int gSeed = 0;
75*6467f958SSadaf Ebrahimi int gSeedSpecified = 0;
76*6467f958SSadaf Ebrahimi int gHasDouble = 0;
77*6467f958SSadaf Ebrahimi MTdata gMTdata = NULL;
78*6467f958SSadaf Ebrahimi int gSkipNanInf = 0;
79*6467f958SSadaf Ebrahimi int gIgnoreZeroSign = 0;
80*6467f958SSadaf Ebrahimi
81*6467f958SSadaf Ebrahimi cl_mem bufA = NULL;
82*6467f958SSadaf Ebrahimi cl_mem bufB = NULL;
83*6467f958SSadaf Ebrahimi cl_mem bufC = NULL;
84*6467f958SSadaf Ebrahimi cl_mem bufD = NULL;
85*6467f958SSadaf Ebrahimi cl_mem bufE = NULL;
86*6467f958SSadaf Ebrahimi cl_mem bufC_double = NULL;
87*6467f958SSadaf Ebrahimi cl_mem bufD_double = NULL;
88*6467f958SSadaf Ebrahimi float *buf1, *buf2, *buf3, *buf4, *buf5, *buf6;
89*6467f958SSadaf Ebrahimi float *correct[8];
90*6467f958SSadaf Ebrahimi int *skipTest[8];
91*6467f958SSadaf Ebrahimi
92*6467f958SSadaf Ebrahimi double *buf3_double, *buf4_double, *buf5_double, *buf6_double;
93*6467f958SSadaf Ebrahimi double *correct_double[8];
94*6467f958SSadaf Ebrahimi
95*6467f958SSadaf Ebrahimi static const char **gArgList;
96*6467f958SSadaf Ebrahimi static size_t gArgCount;
97*6467f958SSadaf Ebrahimi
98*6467f958SSadaf Ebrahimi #define BUFFER_SIZE (1024*1024)
99*6467f958SSadaf Ebrahimi
100*6467f958SSadaf Ebrahimi
101*6467f958SSadaf Ebrahimi static int ParseArgs( int argc, const char **argv );
102*6467f958SSadaf Ebrahimi static void PrintUsage( void );
103*6467f958SSadaf Ebrahimi test_status InitCL( cl_device_id device );
104*6467f958SSadaf Ebrahimi static void ReleaseCL( void );
105*6467f958SSadaf Ebrahimi static int RunTest( int testNumber );
106*6467f958SSadaf Ebrahimi static int RunTest_Double( int testNumber );
107*6467f958SSadaf Ebrahimi
108*6467f958SSadaf Ebrahimi #if defined(__ANDROID__)
109*6467f958SSadaf Ebrahimi #define nanf( X ) strtof( "NAN", ( char ** ) NULL )
110*6467f958SSadaf Ebrahimi #define nan( X ) strtod( "NAN", ( char ** ) NULL )
111*6467f958SSadaf Ebrahimi #endif
112*6467f958SSadaf Ebrahimi
113*6467f958SSadaf Ebrahimi #if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))
114*6467f958SSadaf Ebrahimi // defeat x87 on MSVC
sse_add(float x,float y)115*6467f958SSadaf Ebrahimi float sse_add(float x, float y)
116*6467f958SSadaf Ebrahimi {
117*6467f958SSadaf Ebrahimi volatile float a = x;
118*6467f958SSadaf Ebrahimi volatile float b = y;
119*6467f958SSadaf Ebrahimi
120*6467f958SSadaf Ebrahimi // defeat x87
121*6467f958SSadaf Ebrahimi __m128 va = _mm_set_ss( (float) a );
122*6467f958SSadaf Ebrahimi __m128 vb = _mm_set_ss( (float) b );
123*6467f958SSadaf Ebrahimi va = _mm_add_ss( va, vb );
124*6467f958SSadaf Ebrahimi _mm_store_ss( (float*) &a, va );
125*6467f958SSadaf Ebrahimi return a;
126*6467f958SSadaf Ebrahimi }
127*6467f958SSadaf Ebrahimi
sse_add_sd(double x,double y)128*6467f958SSadaf Ebrahimi double sse_add_sd(double x, double y)
129*6467f958SSadaf Ebrahimi {
130*6467f958SSadaf Ebrahimi volatile double a = x;
131*6467f958SSadaf Ebrahimi volatile double b = y;
132*6467f958SSadaf Ebrahimi
133*6467f958SSadaf Ebrahimi // defeat x87
134*6467f958SSadaf Ebrahimi __m128d va = _mm_set_sd( (double) a );
135*6467f958SSadaf Ebrahimi __m128d vb = _mm_set_sd( (double) b );
136*6467f958SSadaf Ebrahimi va = _mm_add_sd( va, vb );
137*6467f958SSadaf Ebrahimi _mm_store_sd( (double*) &a, va );
138*6467f958SSadaf Ebrahimi return a;
139*6467f958SSadaf Ebrahimi }
140*6467f958SSadaf Ebrahimi
sse_sub(float x,float y)141*6467f958SSadaf Ebrahimi float sse_sub(float x, float y)
142*6467f958SSadaf Ebrahimi {
143*6467f958SSadaf Ebrahimi volatile float a = x;
144*6467f958SSadaf Ebrahimi volatile float b = y;
145*6467f958SSadaf Ebrahimi
146*6467f958SSadaf Ebrahimi // defeat x87
147*6467f958SSadaf Ebrahimi __m128 va = _mm_set_ss( (float) a );
148*6467f958SSadaf Ebrahimi __m128 vb = _mm_set_ss( (float) b );
149*6467f958SSadaf Ebrahimi va = _mm_sub_ss( va, vb );
150*6467f958SSadaf Ebrahimi _mm_store_ss( (float*) &a, va );
151*6467f958SSadaf Ebrahimi return a;
152*6467f958SSadaf Ebrahimi }
153*6467f958SSadaf Ebrahimi
sse_sub_sd(double x,double y)154*6467f958SSadaf Ebrahimi double sse_sub_sd(double x, double y)
155*6467f958SSadaf Ebrahimi {
156*6467f958SSadaf Ebrahimi volatile double a = x;
157*6467f958SSadaf Ebrahimi volatile double b = y;
158*6467f958SSadaf Ebrahimi
159*6467f958SSadaf Ebrahimi // defeat x87
160*6467f958SSadaf Ebrahimi __m128d va = _mm_set_sd( (double) a );
161*6467f958SSadaf Ebrahimi __m128d vb = _mm_set_sd( (double) b );
162*6467f958SSadaf Ebrahimi va = _mm_sub_sd( va, vb );
163*6467f958SSadaf Ebrahimi _mm_store_sd( (double*) &a, va );
164*6467f958SSadaf Ebrahimi return a;
165*6467f958SSadaf Ebrahimi }
166*6467f958SSadaf Ebrahimi
sse_mul(float x,float y)167*6467f958SSadaf Ebrahimi float sse_mul(float x, float y)
168*6467f958SSadaf Ebrahimi {
169*6467f958SSadaf Ebrahimi volatile float a = x;
170*6467f958SSadaf Ebrahimi volatile float b = y;
171*6467f958SSadaf Ebrahimi
172*6467f958SSadaf Ebrahimi // defeat x87
173*6467f958SSadaf Ebrahimi __m128 va = _mm_set_ss( (float) a );
174*6467f958SSadaf Ebrahimi __m128 vb = _mm_set_ss( (float) b );
175*6467f958SSadaf Ebrahimi va = _mm_mul_ss( va, vb );
176*6467f958SSadaf Ebrahimi _mm_store_ss( (float*) &a, va );
177*6467f958SSadaf Ebrahimi return a;
178*6467f958SSadaf Ebrahimi }
179*6467f958SSadaf Ebrahimi
sse_mul_sd(double x,double y)180*6467f958SSadaf Ebrahimi double sse_mul_sd(double x, double y)
181*6467f958SSadaf Ebrahimi {
182*6467f958SSadaf Ebrahimi volatile double a = x;
183*6467f958SSadaf Ebrahimi volatile double b = y;
184*6467f958SSadaf Ebrahimi
185*6467f958SSadaf Ebrahimi // defeat x87
186*6467f958SSadaf Ebrahimi __m128d va = _mm_set_sd( (double) a );
187*6467f958SSadaf Ebrahimi __m128d vb = _mm_set_sd( (double) b );
188*6467f958SSadaf Ebrahimi va = _mm_mul_sd( va, vb );
189*6467f958SSadaf Ebrahimi _mm_store_sd( (double*) &a, va );
190*6467f958SSadaf Ebrahimi return a;
191*6467f958SSadaf Ebrahimi }
192*6467f958SSadaf Ebrahimi #endif
193*6467f958SSadaf Ebrahimi
194*6467f958SSadaf Ebrahimi #ifdef __PPC__
ppc_mul(float a,float b)195*6467f958SSadaf Ebrahimi float ppc_mul(float a, float b)
196*6467f958SSadaf Ebrahimi {
197*6467f958SSadaf Ebrahimi float p;
198*6467f958SSadaf Ebrahimi
199*6467f958SSadaf Ebrahimi if (gForceFTZ) {
200*6467f958SSadaf Ebrahimi // Flush input a to zero if it is sub-normal
201*6467f958SSadaf Ebrahimi if (fabsf(a) < FLT_MIN) {
202*6467f958SSadaf Ebrahimi a = copysignf(0.0, a);
203*6467f958SSadaf Ebrahimi }
204*6467f958SSadaf Ebrahimi // Flush input b to zero if it is sub-normal
205*6467f958SSadaf Ebrahimi if (fabsf(b) < FLT_MIN) {
206*6467f958SSadaf Ebrahimi b = copysignf(0.0, b);
207*6467f958SSadaf Ebrahimi }
208*6467f958SSadaf Ebrahimi // Perform multiply
209*6467f958SSadaf Ebrahimi p = a * b;
210*6467f958SSadaf Ebrahimi // Flush the product if it is a sub-normal
211*6467f958SSadaf Ebrahimi if (fabs((double)a * (double)b) < FLT_MIN) {
212*6467f958SSadaf Ebrahimi p = copysignf(0.0, p);
213*6467f958SSadaf Ebrahimi }
214*6467f958SSadaf Ebrahimi } else {
215*6467f958SSadaf Ebrahimi p = a * b;
216*6467f958SSadaf Ebrahimi }
217*6467f958SSadaf Ebrahimi return p;
218*6467f958SSadaf Ebrahimi }
219*6467f958SSadaf Ebrahimi #endif
220*6467f958SSadaf Ebrahimi
test_contractions_float_0(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)221*6467f958SSadaf Ebrahimi int test_contractions_float_0(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
222*6467f958SSadaf Ebrahimi {
223*6467f958SSadaf Ebrahimi return RunTest(0);
224*6467f958SSadaf Ebrahimi }
225*6467f958SSadaf Ebrahimi
test_contractions_float_1(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)226*6467f958SSadaf Ebrahimi int test_contractions_float_1(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
227*6467f958SSadaf Ebrahimi {
228*6467f958SSadaf Ebrahimi return RunTest(1);
229*6467f958SSadaf Ebrahimi }
230*6467f958SSadaf Ebrahimi
test_contractions_float_2(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)231*6467f958SSadaf Ebrahimi int test_contractions_float_2(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
232*6467f958SSadaf Ebrahimi {
233*6467f958SSadaf Ebrahimi return RunTest(2);
234*6467f958SSadaf Ebrahimi }
235*6467f958SSadaf Ebrahimi
test_contractions_float_3(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)236*6467f958SSadaf Ebrahimi int test_contractions_float_3(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
237*6467f958SSadaf Ebrahimi {
238*6467f958SSadaf Ebrahimi return RunTest(3);
239*6467f958SSadaf Ebrahimi }
240*6467f958SSadaf Ebrahimi
test_contractions_float_4(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)241*6467f958SSadaf Ebrahimi int test_contractions_float_4(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
242*6467f958SSadaf Ebrahimi {
243*6467f958SSadaf Ebrahimi return RunTest(4);
244*6467f958SSadaf Ebrahimi }
245*6467f958SSadaf Ebrahimi
test_contractions_float_5(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)246*6467f958SSadaf Ebrahimi int test_contractions_float_5(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
247*6467f958SSadaf Ebrahimi {
248*6467f958SSadaf Ebrahimi return RunTest(5);
249*6467f958SSadaf Ebrahimi }
250*6467f958SSadaf Ebrahimi
test_contractions_float_6(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)251*6467f958SSadaf Ebrahimi int test_contractions_float_6(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
252*6467f958SSadaf Ebrahimi {
253*6467f958SSadaf Ebrahimi return RunTest(6);
254*6467f958SSadaf Ebrahimi }
255*6467f958SSadaf Ebrahimi
test_contractions_float_7(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)256*6467f958SSadaf Ebrahimi int test_contractions_float_7(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
257*6467f958SSadaf Ebrahimi {
258*6467f958SSadaf Ebrahimi return RunTest(7);
259*6467f958SSadaf Ebrahimi }
260*6467f958SSadaf Ebrahimi
test_contractions_double_0(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)261*6467f958SSadaf Ebrahimi int test_contractions_double_0(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
262*6467f958SSadaf Ebrahimi {
263*6467f958SSadaf Ebrahimi return RunTest_Double(0);
264*6467f958SSadaf Ebrahimi }
265*6467f958SSadaf Ebrahimi
test_contractions_double_1(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)266*6467f958SSadaf Ebrahimi int test_contractions_double_1(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
267*6467f958SSadaf Ebrahimi {
268*6467f958SSadaf Ebrahimi return RunTest_Double(1);
269*6467f958SSadaf Ebrahimi }
270*6467f958SSadaf Ebrahimi
test_contractions_double_2(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)271*6467f958SSadaf Ebrahimi int test_contractions_double_2(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
272*6467f958SSadaf Ebrahimi {
273*6467f958SSadaf Ebrahimi return RunTest_Double(2);
274*6467f958SSadaf Ebrahimi }
275*6467f958SSadaf Ebrahimi
test_contractions_double_3(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)276*6467f958SSadaf Ebrahimi int test_contractions_double_3(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
277*6467f958SSadaf Ebrahimi {
278*6467f958SSadaf Ebrahimi return RunTest_Double(3);
279*6467f958SSadaf Ebrahimi }
280*6467f958SSadaf Ebrahimi
test_contractions_double_4(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)281*6467f958SSadaf Ebrahimi int test_contractions_double_4(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
282*6467f958SSadaf Ebrahimi {
283*6467f958SSadaf Ebrahimi return RunTest_Double(4);
284*6467f958SSadaf Ebrahimi }
285*6467f958SSadaf Ebrahimi
test_contractions_double_5(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)286*6467f958SSadaf Ebrahimi int test_contractions_double_5(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
287*6467f958SSadaf Ebrahimi {
288*6467f958SSadaf Ebrahimi return RunTest_Double(5);
289*6467f958SSadaf Ebrahimi }
290*6467f958SSadaf Ebrahimi
test_contractions_double_6(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)291*6467f958SSadaf Ebrahimi int test_contractions_double_6(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
292*6467f958SSadaf Ebrahimi {
293*6467f958SSadaf Ebrahimi return RunTest_Double(6);
294*6467f958SSadaf Ebrahimi }
295*6467f958SSadaf Ebrahimi
test_contractions_double_7(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)296*6467f958SSadaf Ebrahimi int test_contractions_double_7(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
297*6467f958SSadaf Ebrahimi {
298*6467f958SSadaf Ebrahimi return RunTest_Double(7);
299*6467f958SSadaf Ebrahimi }
300*6467f958SSadaf Ebrahimi
301*6467f958SSadaf Ebrahimi test_definition test_list[] = {
302*6467f958SSadaf Ebrahimi ADD_TEST( contractions_float_0 ),
303*6467f958SSadaf Ebrahimi ADD_TEST( contractions_float_1 ),
304*6467f958SSadaf Ebrahimi ADD_TEST( contractions_float_2 ),
305*6467f958SSadaf Ebrahimi ADD_TEST( contractions_float_3 ),
306*6467f958SSadaf Ebrahimi ADD_TEST( contractions_float_4 ),
307*6467f958SSadaf Ebrahimi ADD_TEST( contractions_float_5 ),
308*6467f958SSadaf Ebrahimi ADD_TEST( contractions_float_6 ),
309*6467f958SSadaf Ebrahimi ADD_TEST( contractions_float_7 ),
310*6467f958SSadaf Ebrahimi ADD_TEST( contractions_double_0 ),
311*6467f958SSadaf Ebrahimi ADD_TEST( contractions_double_1 ),
312*6467f958SSadaf Ebrahimi ADD_TEST( contractions_double_2 ),
313*6467f958SSadaf Ebrahimi ADD_TEST( contractions_double_3 ),
314*6467f958SSadaf Ebrahimi ADD_TEST( contractions_double_4 ),
315*6467f958SSadaf Ebrahimi ADD_TEST( contractions_double_5 ),
316*6467f958SSadaf Ebrahimi ADD_TEST( contractions_double_6 ),
317*6467f958SSadaf Ebrahimi ADD_TEST( contractions_double_7 ),
318*6467f958SSadaf Ebrahimi };
319*6467f958SSadaf Ebrahimi
320*6467f958SSadaf Ebrahimi const int test_num = ARRAY_SIZE( test_list );
321*6467f958SSadaf Ebrahimi
main(int argc,const char ** argv)322*6467f958SSadaf Ebrahimi int main( int argc, const char **argv )
323*6467f958SSadaf Ebrahimi {
324*6467f958SSadaf Ebrahimi argc = parseCustomParam(argc, argv);
325*6467f958SSadaf Ebrahimi if (argc == -1)
326*6467f958SSadaf Ebrahimi {
327*6467f958SSadaf Ebrahimi return -1;
328*6467f958SSadaf Ebrahimi }
329*6467f958SSadaf Ebrahimi
330*6467f958SSadaf Ebrahimi int error = ParseArgs( argc, argv );
331*6467f958SSadaf Ebrahimi
332*6467f958SSadaf Ebrahimi if( !error )
333*6467f958SSadaf Ebrahimi {
334*6467f958SSadaf Ebrahimi error = runTestHarnessWithCheck( gArgCount, gArgList, test_num, test_list, true, 0, InitCL );
335*6467f958SSadaf Ebrahimi }
336*6467f958SSadaf Ebrahimi
337*6467f958SSadaf Ebrahimi if( gQueue )
338*6467f958SSadaf Ebrahimi {
339*6467f958SSadaf Ebrahimi int flush_error = clFinish( gQueue );
340*6467f958SSadaf Ebrahimi if( flush_error )
341*6467f958SSadaf Ebrahimi log_error( "clFinish failed: %d\n", flush_error );
342*6467f958SSadaf Ebrahimi }
343*6467f958SSadaf Ebrahimi
344*6467f958SSadaf Ebrahimi ReleaseCL();
345*6467f958SSadaf Ebrahimi free( gArgList );
346*6467f958SSadaf Ebrahimi
347*6467f958SSadaf Ebrahimi return error;
348*6467f958SSadaf Ebrahimi }
349*6467f958SSadaf Ebrahimi
350*6467f958SSadaf Ebrahimi
351*6467f958SSadaf Ebrahimi
ParseArgs(int argc,const char ** argv)352*6467f958SSadaf Ebrahimi static int ParseArgs( int argc, const char **argv )
353*6467f958SSadaf Ebrahimi {
354*6467f958SSadaf Ebrahimi gArgList = (const char **)calloc( argc, sizeof( char*) );
355*6467f958SSadaf Ebrahimi
356*6467f958SSadaf Ebrahimi if( NULL == gArgList )
357*6467f958SSadaf Ebrahimi {
358*6467f958SSadaf Ebrahimi vlog_error( "Failed to allocate memory for argList\n" );
359*6467f958SSadaf Ebrahimi return 1;
360*6467f958SSadaf Ebrahimi }
361*6467f958SSadaf Ebrahimi
362*6467f958SSadaf Ebrahimi gArgList[0] = argv[0];
363*6467f958SSadaf Ebrahimi gArgCount = 1;
364*6467f958SSadaf Ebrahimi
365*6467f958SSadaf Ebrahimi int length_of_seed = 0;
366*6467f958SSadaf Ebrahimi
367*6467f958SSadaf Ebrahimi { // Extract the app name
368*6467f958SSadaf Ebrahimi strncpy( appName, argv[0], MAXPATHLEN );
369*6467f958SSadaf Ebrahimi
370*6467f958SSadaf Ebrahimi #if (defined( __APPLE__ ) || defined(__linux__) || defined(__MINGW32__))
371*6467f958SSadaf Ebrahimi char baseName[MAXPATHLEN];
372*6467f958SSadaf Ebrahimi char *base = NULL;
373*6467f958SSadaf Ebrahimi strncpy( baseName, argv[0], MAXPATHLEN );
374*6467f958SSadaf Ebrahimi base = basename( baseName );
375*6467f958SSadaf Ebrahimi if( NULL != base )
376*6467f958SSadaf Ebrahimi {
377*6467f958SSadaf Ebrahimi strncpy( appName, base, sizeof( appName ) );
378*6467f958SSadaf Ebrahimi appName[ sizeof( appName ) -1 ] = '\0';
379*6467f958SSadaf Ebrahimi }
380*6467f958SSadaf Ebrahimi #elif defined (_WIN32)
381*6467f958SSadaf Ebrahimi char fname[_MAX_FNAME + _MAX_EXT + 1];
382*6467f958SSadaf Ebrahimi char ext[_MAX_EXT];
383*6467f958SSadaf Ebrahimi
384*6467f958SSadaf Ebrahimi errno_t err = _splitpath_s( argv[0], NULL, 0, NULL, 0,
385*6467f958SSadaf Ebrahimi fname, _MAX_FNAME, ext, _MAX_EXT );
386*6467f958SSadaf Ebrahimi if (err == 0) { // no error
387*6467f958SSadaf Ebrahimi strcat (fname, ext); //just cat them, size of frame can keep both
388*6467f958SSadaf Ebrahimi strncpy (appName, fname, sizeof(appName));
389*6467f958SSadaf Ebrahimi appName[ sizeof( appName ) -1 ] = '\0';
390*6467f958SSadaf Ebrahimi }
391*6467f958SSadaf Ebrahimi #endif
392*6467f958SSadaf Ebrahimi }
393*6467f958SSadaf Ebrahimi
394*6467f958SSadaf Ebrahimi for( int i = 1; i < argc; i++ )
395*6467f958SSadaf Ebrahimi {
396*6467f958SSadaf Ebrahimi const char *arg = argv[i];
397*6467f958SSadaf Ebrahimi if( NULL == arg )
398*6467f958SSadaf Ebrahimi break;
399*6467f958SSadaf Ebrahimi
400*6467f958SSadaf Ebrahimi if( arg[0] == '-' )
401*6467f958SSadaf Ebrahimi {
402*6467f958SSadaf Ebrahimi while( arg[1] != '\0' )
403*6467f958SSadaf Ebrahimi {
404*6467f958SSadaf Ebrahimi arg++;
405*6467f958SSadaf Ebrahimi switch( *arg )
406*6467f958SSadaf Ebrahimi {
407*6467f958SSadaf Ebrahimi case 'h':
408*6467f958SSadaf Ebrahimi PrintUsage();
409*6467f958SSadaf Ebrahimi return -1;
410*6467f958SSadaf Ebrahimi
411*6467f958SSadaf Ebrahimi case 's':
412*6467f958SSadaf Ebrahimi arg++;
413*6467f958SSadaf Ebrahimi gSeed = atoi( arg );
414*6467f958SSadaf Ebrahimi while (arg[length_of_seed] >='0' && arg[length_of_seed]<='9')
415*6467f958SSadaf Ebrahimi length_of_seed++;
416*6467f958SSadaf Ebrahimi gSeedSpecified = 1;
417*6467f958SSadaf Ebrahimi arg+=length_of_seed-1;
418*6467f958SSadaf Ebrahimi break;
419*6467f958SSadaf Ebrahimi
420*6467f958SSadaf Ebrahimi case 'z':
421*6467f958SSadaf Ebrahimi gForceFTZ ^= 1;
422*6467f958SSadaf Ebrahimi break;
423*6467f958SSadaf Ebrahimi
424*6467f958SSadaf Ebrahimi default:
425*6467f958SSadaf Ebrahimi vlog( " <-- unknown flag: %c (0x%2.2x)\n)", *arg, *arg );
426*6467f958SSadaf Ebrahimi PrintUsage();
427*6467f958SSadaf Ebrahimi return -1;
428*6467f958SSadaf Ebrahimi }
429*6467f958SSadaf Ebrahimi }
430*6467f958SSadaf Ebrahimi }
431*6467f958SSadaf Ebrahimi else
432*6467f958SSadaf Ebrahimi {
433*6467f958SSadaf Ebrahimi gArgList[gArgCount] = arg;
434*6467f958SSadaf Ebrahimi gArgCount++;
435*6467f958SSadaf Ebrahimi }
436*6467f958SSadaf Ebrahimi }
437*6467f958SSadaf Ebrahimi
438*6467f958SSadaf Ebrahimi PrintArch();
439*6467f958SSadaf Ebrahimi
440*6467f958SSadaf Ebrahimi return 0;
441*6467f958SSadaf Ebrahimi }
442*6467f958SSadaf Ebrahimi
PrintUsage(void)443*6467f958SSadaf Ebrahimi static void PrintUsage( void )
444*6467f958SSadaf Ebrahimi {
445*6467f958SSadaf Ebrahimi vlog( "%s [-z]: <optional: test names>\n", appName );
446*6467f958SSadaf Ebrahimi vlog( "\tOptions:\n" );
447*6467f958SSadaf Ebrahimi vlog( "\t\t-z\tToggle FTZ mode (Section 6.5.3) for all functions. (Set by device capabilities by default.)\n" );
448*6467f958SSadaf Ebrahimi vlog( "\t\t-sNUMBER set random seed.\n");
449*6467f958SSadaf Ebrahimi vlog( "\n" );
450*6467f958SSadaf Ebrahimi vlog( "\tTest names:\n" );
451*6467f958SSadaf Ebrahimi for( int i = 0; i < test_num; i++ )
452*6467f958SSadaf Ebrahimi {
453*6467f958SSadaf Ebrahimi vlog( "\t\t%s\n", test_list[i].name );
454*6467f958SSadaf Ebrahimi }
455*6467f958SSadaf Ebrahimi }
456*6467f958SSadaf Ebrahimi
457*6467f958SSadaf Ebrahimi const char *sizeNames[] = { "float", "float2", "float4", "float8", "float16" };
458*6467f958SSadaf Ebrahimi const char *sizeNames_double[] = { "double", "double2", "double4", "double8", "double16" };
459*6467f958SSadaf Ebrahimi
InitCL(cl_device_id device)460*6467f958SSadaf Ebrahimi test_status InitCL( cl_device_id device )
461*6467f958SSadaf Ebrahimi {
462*6467f958SSadaf Ebrahimi int error;
463*6467f958SSadaf Ebrahimi uint32_t i, j;
464*6467f958SSadaf Ebrahimi int *bufSkip = NULL;
465*6467f958SSadaf Ebrahimi int isRTZ = 0;
466*6467f958SSadaf Ebrahimi RoundingMode oldRoundMode = kDefaultRoundingMode;
467*6467f958SSadaf Ebrahimi
468*6467f958SSadaf Ebrahimi cl_device_fp_config floatCapabilities = 0;
469*6467f958SSadaf Ebrahimi if( (error = clGetDeviceInfo(device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof(floatCapabilities), &floatCapabilities, NULL)))
470*6467f958SSadaf Ebrahimi floatCapabilities = 0;
471*6467f958SSadaf Ebrahimi if(0 == (CL_FP_DENORM & floatCapabilities) )
472*6467f958SSadaf Ebrahimi gForceFTZ ^= 1;
473*6467f958SSadaf Ebrahimi
474*6467f958SSadaf Ebrahimi // check for cl_khr_fp64
475*6467f958SSadaf Ebrahimi gHasDouble = is_extension_available(device, "cl_khr_fp64" );
476*6467f958SSadaf Ebrahimi
477*6467f958SSadaf Ebrahimi if(0 == (CL_FP_INF_NAN & floatCapabilities) )
478*6467f958SSadaf Ebrahimi gSkipNanInf = 1;
479*6467f958SSadaf Ebrahimi
480*6467f958SSadaf Ebrahimi // Embedded devices that flush to zero are allowed to have an undefined sign.
481*6467f958SSadaf Ebrahimi if (gIsEmbedded && gForceFTZ)
482*6467f958SSadaf Ebrahimi gIgnoreZeroSign = 1;
483*6467f958SSadaf Ebrahimi
484*6467f958SSadaf Ebrahimi gContext = clCreateContext( NULL, 1, &device, notify_callback, NULL, &error );
485*6467f958SSadaf Ebrahimi if( NULL == gContext || error )
486*6467f958SSadaf Ebrahimi {
487*6467f958SSadaf Ebrahimi vlog_error( "clCreateDeviceGroup failed. %d\n", error );
488*6467f958SSadaf Ebrahimi return TEST_FAIL;
489*6467f958SSadaf Ebrahimi }
490*6467f958SSadaf Ebrahimi
491*6467f958SSadaf Ebrahimi gQueue = clCreateCommandQueue( gContext, device, 0, &error );
492*6467f958SSadaf Ebrahimi if( NULL == gQueue || error )
493*6467f958SSadaf Ebrahimi {
494*6467f958SSadaf Ebrahimi vlog_error( "clCreateContext failed. %d\n", error );
495*6467f958SSadaf Ebrahimi return TEST_FAIL;
496*6467f958SSadaf Ebrahimi }
497*6467f958SSadaf Ebrahimi
498*6467f958SSadaf Ebrahimi // setup input buffers
499*6467f958SSadaf Ebrahimi bufA = clCreateBuffer( gContext, CL_MEM_READ_WRITE, BUFFER_SIZE, NULL, NULL );
500*6467f958SSadaf Ebrahimi bufB = clCreateBuffer( gContext, CL_MEM_READ_WRITE, BUFFER_SIZE, NULL, NULL );
501*6467f958SSadaf Ebrahimi bufC = clCreateBuffer( gContext, CL_MEM_READ_WRITE, BUFFER_SIZE, NULL, NULL );
502*6467f958SSadaf Ebrahimi bufD = clCreateBuffer( gContext, CL_MEM_READ_WRITE, BUFFER_SIZE, NULL, NULL );
503*6467f958SSadaf Ebrahimi bufE = clCreateBuffer( gContext, CL_MEM_READ_WRITE, BUFFER_SIZE, NULL, NULL );
504*6467f958SSadaf Ebrahimi
505*6467f958SSadaf Ebrahimi if( bufA == NULL ||
506*6467f958SSadaf Ebrahimi bufB == NULL ||
507*6467f958SSadaf Ebrahimi bufC == NULL ||
508*6467f958SSadaf Ebrahimi bufD == NULL ||
509*6467f958SSadaf Ebrahimi bufE == NULL )
510*6467f958SSadaf Ebrahimi {
511*6467f958SSadaf Ebrahimi vlog_error( "clCreateArray failed for input\n" );
512*6467f958SSadaf Ebrahimi return TEST_FAIL;
513*6467f958SSadaf Ebrahimi }
514*6467f958SSadaf Ebrahimi
515*6467f958SSadaf Ebrahimi if( gHasDouble )
516*6467f958SSadaf Ebrahimi {
517*6467f958SSadaf Ebrahimi bufC_double = clCreateBuffer( gContext, CL_MEM_READ_WRITE, BUFFER_SIZE, NULL, NULL );
518*6467f958SSadaf Ebrahimi bufD_double = clCreateBuffer( gContext, CL_MEM_READ_WRITE, BUFFER_SIZE, NULL, NULL );
519*6467f958SSadaf Ebrahimi if( bufC_double == NULL ||
520*6467f958SSadaf Ebrahimi bufD_double == NULL )
521*6467f958SSadaf Ebrahimi {
522*6467f958SSadaf Ebrahimi vlog_error( "clCreateArray failed for input DP\n" );
523*6467f958SSadaf Ebrahimi return TEST_FAIL;
524*6467f958SSadaf Ebrahimi }
525*6467f958SSadaf Ebrahimi }
526*6467f958SSadaf Ebrahimi
527*6467f958SSadaf Ebrahimi const char *kernels[] = {
528*6467f958SSadaf Ebrahimi "", "#pragma OPENCL FP_CONTRACT OFF\n"
529*6467f958SSadaf Ebrahimi "__kernel void kernel1( __global ", NULL, " *out, const __global ", NULL, " *a, const __global ", NULL, " *b, const __global ", NULL, " *c )\n"
530*6467f958SSadaf Ebrahimi "{\n"
531*6467f958SSadaf Ebrahimi " int i = get_global_id(0);\n"
532*6467f958SSadaf Ebrahimi " out[i] = a[i] * b[i] + c[i];\n"
533*6467f958SSadaf Ebrahimi "}\n"
534*6467f958SSadaf Ebrahimi "\n"
535*6467f958SSadaf Ebrahimi "__kernel void kernel2( __global ", NULL, " *out, const __global ", NULL, " *a, const __global ", NULL, " *b, const __global ", NULL, " *c )\n"
536*6467f958SSadaf Ebrahimi "{\n"
537*6467f958SSadaf Ebrahimi " int i = get_global_id(0);\n"
538*6467f958SSadaf Ebrahimi " out[i] = a[i] * b[i] - c[i];\n"
539*6467f958SSadaf Ebrahimi "}\n"
540*6467f958SSadaf Ebrahimi "\n"
541*6467f958SSadaf Ebrahimi "__kernel void kernel3( __global ", NULL, " *out, const __global ", NULL, " *a, const __global ", NULL, " *b, const __global ", NULL, " *c )\n"
542*6467f958SSadaf Ebrahimi "{\n"
543*6467f958SSadaf Ebrahimi " int i = get_global_id(0);\n"
544*6467f958SSadaf Ebrahimi " out[i] = c[i] + a[i] * b[i];\n"
545*6467f958SSadaf Ebrahimi "}\n"
546*6467f958SSadaf Ebrahimi "\n"
547*6467f958SSadaf Ebrahimi "__kernel void kernel4( __global ", NULL, " *out, const __global ", NULL, " *a, const __global ", NULL, " *b, const __global ", NULL, " *c )\n"
548*6467f958SSadaf Ebrahimi "{\n"
549*6467f958SSadaf Ebrahimi " int i = get_global_id(0);\n"
550*6467f958SSadaf Ebrahimi " out[i] = c[i] - a[i] * b[i];\n"
551*6467f958SSadaf Ebrahimi "}\n"
552*6467f958SSadaf Ebrahimi "\n"
553*6467f958SSadaf Ebrahimi "__kernel void kernel5( __global ", NULL, " *out, const __global ", NULL, " *a, const __global ", NULL, " *b, const __global ", NULL, " *c )\n"
554*6467f958SSadaf Ebrahimi "{\n"
555*6467f958SSadaf Ebrahimi " int i = get_global_id(0);\n"
556*6467f958SSadaf Ebrahimi " out[i] = -(a[i] * b[i] + c[i]);\n"
557*6467f958SSadaf Ebrahimi "}\n"
558*6467f958SSadaf Ebrahimi "\n"
559*6467f958SSadaf Ebrahimi "__kernel void kernel6( __global ", NULL, " *out, const __global ", NULL, " *a, const __global ", NULL, " *b, const __global ", NULL, " *c )\n"
560*6467f958SSadaf Ebrahimi "{\n"
561*6467f958SSadaf Ebrahimi " int i = get_global_id(0);\n"
562*6467f958SSadaf Ebrahimi " out[i] = -(a[i] * b[i] - c[i]);\n"
563*6467f958SSadaf Ebrahimi "}\n"
564*6467f958SSadaf Ebrahimi "\n"
565*6467f958SSadaf Ebrahimi "__kernel void kernel7( __global ", NULL, " *out, const __global ", NULL, " *a, const __global ", NULL, " *b, const __global ", NULL, " *c )\n"
566*6467f958SSadaf Ebrahimi "{\n"
567*6467f958SSadaf Ebrahimi " int i = get_global_id(0);\n"
568*6467f958SSadaf Ebrahimi " out[i] = -(c[i] + a[i] * b[i]);\n"
569*6467f958SSadaf Ebrahimi "}\n"
570*6467f958SSadaf Ebrahimi "\n"
571*6467f958SSadaf Ebrahimi "__kernel void kernel8( __global ", NULL, " *out, const __global ", NULL, " *a, const __global ", NULL, " *b, const __global ", NULL, " *c )\n"
572*6467f958SSadaf Ebrahimi "{\n"
573*6467f958SSadaf Ebrahimi " int i = get_global_id(0);\n"
574*6467f958SSadaf Ebrahimi " out[i] = -(c[i] - a[i] * b[i]);\n"
575*6467f958SSadaf Ebrahimi "}\n"
576*6467f958SSadaf Ebrahimi "\n" };
577*6467f958SSadaf Ebrahimi
578*6467f958SSadaf Ebrahimi for (i = 0; i < sizeof(sizeNames) / sizeof(sizeNames[0]); i++)
579*6467f958SSadaf Ebrahimi {
580*6467f958SSadaf Ebrahimi size_t strCount = sizeof(kernels) / sizeof(kernels[0]);
581*6467f958SSadaf Ebrahimi kernels[0] = "";
582*6467f958SSadaf Ebrahimi
583*6467f958SSadaf Ebrahimi for (j = 2; j < strCount; j += 2) kernels[j] = sizeNames[i];
584*6467f958SSadaf Ebrahimi error = create_single_kernel_helper(gContext, &gProgram[i], nullptr,
585*6467f958SSadaf Ebrahimi strCount, kernels, nullptr);
586*6467f958SSadaf Ebrahimi if (CL_SUCCESS != error || nullptr == gProgram[i])
587*6467f958SSadaf Ebrahimi {
588*6467f958SSadaf Ebrahimi log_error("Error: Unable to create test program! (%s) (in %s:%d)\n",
589*6467f958SSadaf Ebrahimi IGetErrorString(error), __FILE__, __LINE__);
590*6467f958SSadaf Ebrahimi return TEST_FAIL;
591*6467f958SSadaf Ebrahimi }
592*6467f958SSadaf Ebrahimi }
593*6467f958SSadaf Ebrahimi
594*6467f958SSadaf Ebrahimi if (gHasDouble)
595*6467f958SSadaf Ebrahimi {
596*6467f958SSadaf Ebrahimi kernels[0] = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
597*6467f958SSadaf Ebrahimi for (i = 0; i < sizeof(sizeNames_double) / sizeof(sizeNames_double[0]);
598*6467f958SSadaf Ebrahimi i++)
599*6467f958SSadaf Ebrahimi {
600*6467f958SSadaf Ebrahimi size_t strCount = sizeof(kernels) / sizeof(kernels[0]);
601*6467f958SSadaf Ebrahimi
602*6467f958SSadaf Ebrahimi for (j = 2; j < strCount; j += 2) kernels[j] = sizeNames_double[i];
603*6467f958SSadaf Ebrahimi error = create_single_kernel_helper(gContext, &gProgram_double[i],
604*6467f958SSadaf Ebrahimi nullptr, strCount, kernels,
605*6467f958SSadaf Ebrahimi nullptr);
606*6467f958SSadaf Ebrahimi if (CL_SUCCESS != error || nullptr == gProgram_double[i])
607*6467f958SSadaf Ebrahimi {
608*6467f958SSadaf Ebrahimi log_error(
609*6467f958SSadaf Ebrahimi "Error: Unable to create test program! (%s) (in %s:%d)\n",
610*6467f958SSadaf Ebrahimi IGetErrorString(error), __FILE__, __LINE__);
611*6467f958SSadaf Ebrahimi return TEST_FAIL;
612*6467f958SSadaf Ebrahimi }
613*6467f958SSadaf Ebrahimi }
614*6467f958SSadaf Ebrahimi }
615*6467f958SSadaf Ebrahimi
616*6467f958SSadaf Ebrahimi if( 0 == gSeedSpecified )
617*6467f958SSadaf Ebrahimi {
618*6467f958SSadaf Ebrahimi time_t currentTime = time( NULL );
619*6467f958SSadaf Ebrahimi struct tm *t = localtime(¤tTime);
620*6467f958SSadaf Ebrahimi gSeed = t->tm_sec + 60 * ( t->tm_min + 60 * (t->tm_hour + 24 * (t->tm_yday + 365 * t->tm_year)));
621*6467f958SSadaf Ebrahimi gSeed = (uint32_t) (((uint64_t) gSeed * (uint64_t) gSeed ) >> 16);
622*6467f958SSadaf Ebrahimi }
623*6467f958SSadaf Ebrahimi gMTdata = init_genrand( gSeed );
624*6467f958SSadaf Ebrahimi
625*6467f958SSadaf Ebrahimi
626*6467f958SSadaf Ebrahimi // Init bufA and bufB
627*6467f958SSadaf Ebrahimi {
628*6467f958SSadaf Ebrahimi buf1 = (float *)malloc( BUFFER_SIZE );
629*6467f958SSadaf Ebrahimi buf2 = (float *)malloc( BUFFER_SIZE );
630*6467f958SSadaf Ebrahimi buf3 = (float *)malloc( BUFFER_SIZE );
631*6467f958SSadaf Ebrahimi buf4 = (float *)malloc( BUFFER_SIZE );
632*6467f958SSadaf Ebrahimi buf5 = (float *)malloc( BUFFER_SIZE );
633*6467f958SSadaf Ebrahimi buf6 = (float *)malloc( BUFFER_SIZE );
634*6467f958SSadaf Ebrahimi
635*6467f958SSadaf Ebrahimi bufSkip = (int *)malloc( BUFFER_SIZE );
636*6467f958SSadaf Ebrahimi
637*6467f958SSadaf Ebrahimi if( NULL == buf1 || NULL == buf2 || NULL == buf3 || NULL == buf4 || NULL == buf5 || NULL == buf6 || NULL == bufSkip)
638*6467f958SSadaf Ebrahimi {
639*6467f958SSadaf Ebrahimi vlog_error( "Out of memory initializing buffers\n" );
640*6467f958SSadaf Ebrahimi return TEST_FAIL;
641*6467f958SSadaf Ebrahimi }
642*6467f958SSadaf Ebrahimi for( i = 0; i < sizeof( correct ) / sizeof( correct[0] ); i++ )
643*6467f958SSadaf Ebrahimi {
644*6467f958SSadaf Ebrahimi correct[i] = (float *)malloc( BUFFER_SIZE );
645*6467f958SSadaf Ebrahimi skipTest[i] = (int *)malloc( BUFFER_SIZE );
646*6467f958SSadaf Ebrahimi if(( NULL == correct[i] ) || ( NULL == skipTest[i]))
647*6467f958SSadaf Ebrahimi {
648*6467f958SSadaf Ebrahimi vlog_error( "Out of memory initializing buffers 2\n" );
649*6467f958SSadaf Ebrahimi return TEST_FAIL;
650*6467f958SSadaf Ebrahimi }
651*6467f958SSadaf Ebrahimi }
652*6467f958SSadaf Ebrahimi
653*6467f958SSadaf Ebrahimi for( i = 0; i < BUFFER_SIZE / sizeof(float); i++ )
654*6467f958SSadaf Ebrahimi ((uint32_t*) buf1)[i] = genrand_int32( gMTdata );
655*6467f958SSadaf Ebrahimi
656*6467f958SSadaf Ebrahimi if( (error = clEnqueueWriteBuffer(gQueue, bufA, CL_FALSE, 0, BUFFER_SIZE, buf1, 0, NULL, NULL) ))
657*6467f958SSadaf Ebrahimi {
658*6467f958SSadaf Ebrahimi vlog_error( "Failure %d at clEnqueueWriteBuffer1\n", error );
659*6467f958SSadaf Ebrahimi return TEST_FAIL;
660*6467f958SSadaf Ebrahimi }
661*6467f958SSadaf Ebrahimi
662*6467f958SSadaf Ebrahimi for( i = 0; i < BUFFER_SIZE / sizeof(float); i++ )
663*6467f958SSadaf Ebrahimi ((uint32_t*) buf2)[i] = genrand_int32( gMTdata );
664*6467f958SSadaf Ebrahimi
665*6467f958SSadaf Ebrahimi if( (error = clEnqueueWriteBuffer(gQueue, bufB, CL_FALSE, 0, BUFFER_SIZE, buf2, 0, NULL, NULL) ))
666*6467f958SSadaf Ebrahimi {
667*6467f958SSadaf Ebrahimi vlog_error( "Failure %d at clEnqueueWriteBuffer2\n", error );
668*6467f958SSadaf Ebrahimi return TEST_FAIL;
669*6467f958SSadaf Ebrahimi }
670*6467f958SSadaf Ebrahimi
671*6467f958SSadaf Ebrahimi void *ftzInfo = NULL;
672*6467f958SSadaf Ebrahimi if( gForceFTZ )
673*6467f958SSadaf Ebrahimi ftzInfo = FlushToZero();
674*6467f958SSadaf Ebrahimi if ((CL_FP_ROUND_TO_ZERO == get_default_rounding_mode(device)) && gIsEmbedded) {
675*6467f958SSadaf Ebrahimi oldRoundMode = set_round(kRoundTowardZero, kfloat);
676*6467f958SSadaf Ebrahimi isRTZ = 1;
677*6467f958SSadaf Ebrahimi }
678*6467f958SSadaf Ebrahimi float *f = (float*) buf1;
679*6467f958SSadaf Ebrahimi float *f2 = (float*) buf2;
680*6467f958SSadaf Ebrahimi float *f3 = (float*) buf3;
681*6467f958SSadaf Ebrahimi float *f4 = (float*) buf4;
682*6467f958SSadaf Ebrahimi for( i = 0; i < BUFFER_SIZE / sizeof(float); i++ )
683*6467f958SSadaf Ebrahimi {
684*6467f958SSadaf Ebrahimi float q = f[i];
685*6467f958SSadaf Ebrahimi float q2 = f2[i];
686*6467f958SSadaf Ebrahimi
687*6467f958SSadaf Ebrahimi feclearexcept(FE_OVERFLOW);
688*6467f958SSadaf Ebrahimi #if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))
689*6467f958SSadaf Ebrahimi // VS2005 might use x87 for straight multiplies, and we can't
690*6467f958SSadaf Ebrahimi // turn that off
691*6467f958SSadaf Ebrahimi f3[i] = sse_mul(q, q2);
692*6467f958SSadaf Ebrahimi f4[i] = sse_mul(-q, q2);
693*6467f958SSadaf Ebrahimi #elif defined(__PPC__)
694*6467f958SSadaf Ebrahimi // None of the current generation PPC processors support HW
695*6467f958SSadaf Ebrahimi // FTZ, emulate it in sw.
696*6467f958SSadaf Ebrahimi f3[i] = ppc_mul(q, q2);
697*6467f958SSadaf Ebrahimi f4[i] = ppc_mul(-q, q2);
698*6467f958SSadaf Ebrahimi #else
699*6467f958SSadaf Ebrahimi f3[i] = q * q2;
700*6467f958SSadaf Ebrahimi f4[i] = -q * q2;
701*6467f958SSadaf Ebrahimi #endif
702*6467f958SSadaf Ebrahimi // Skip test if the device doesn't support infinities and NaN AND the result overflows
703*6467f958SSadaf Ebrahimi // or either input is an infinity of NaN
704*6467f958SSadaf Ebrahimi bufSkip[i] = (gSkipNanInf && ((FE_OVERFLOW == (FE_OVERFLOW & fetestexcept(FE_OVERFLOW))) ||
705*6467f958SSadaf Ebrahimi (fabsf(q) == FLT_MAX) || (q != q) ||
706*6467f958SSadaf Ebrahimi (fabsf(q2) == FLT_MAX) || (q2 != q2)));
707*6467f958SSadaf Ebrahimi }
708*6467f958SSadaf Ebrahimi
709*6467f958SSadaf Ebrahimi if( gForceFTZ )
710*6467f958SSadaf Ebrahimi UnFlushToZero(ftzInfo);
711*6467f958SSadaf Ebrahimi
712*6467f958SSadaf Ebrahimi if (isRTZ)
713*6467f958SSadaf Ebrahimi (void)set_round(oldRoundMode, kfloat);
714*6467f958SSadaf Ebrahimi
715*6467f958SSadaf Ebrahimi
716*6467f958SSadaf Ebrahimi if( (error = clEnqueueWriteBuffer(gQueue, bufC, CL_FALSE, 0, BUFFER_SIZE, buf3, 0, NULL, NULL) ))
717*6467f958SSadaf Ebrahimi {
718*6467f958SSadaf Ebrahimi vlog_error( "Failure %d at clEnqueueWriteBuffer3\n", error );
719*6467f958SSadaf Ebrahimi return TEST_FAIL;
720*6467f958SSadaf Ebrahimi }
721*6467f958SSadaf Ebrahimi if( (error = clEnqueueWriteBuffer(gQueue, bufD, CL_FALSE, 0, BUFFER_SIZE, buf4, 0, NULL, NULL) ))
722*6467f958SSadaf Ebrahimi {
723*6467f958SSadaf Ebrahimi vlog_error( "Failure %d at clEnqueueWriteBuffer4\n", error );
724*6467f958SSadaf Ebrahimi return TEST_FAIL;
725*6467f958SSadaf Ebrahimi }
726*6467f958SSadaf Ebrahimi
727*6467f958SSadaf Ebrahimi // Fill the buffers with NaN
728*6467f958SSadaf Ebrahimi float *f5 = (float*) buf5;
729*6467f958SSadaf Ebrahimi float nan_val = nanf("");
730*6467f958SSadaf Ebrahimi for( i = 0; i < BUFFER_SIZE / sizeof( float ); i++ )
731*6467f958SSadaf Ebrahimi f5[i] = nan_val;
732*6467f958SSadaf Ebrahimi
733*6467f958SSadaf Ebrahimi // calculate reference results
734*6467f958SSadaf Ebrahimi for( i = 0; i < BUFFER_SIZE / sizeof( float ); i++ )
735*6467f958SSadaf Ebrahimi {
736*6467f958SSadaf Ebrahimi for ( j=0; j<8; j++)
737*6467f958SSadaf Ebrahimi {
738*6467f958SSadaf Ebrahimi feclearexcept(FE_OVERFLOW);
739*6467f958SSadaf Ebrahimi switch (j)
740*6467f958SSadaf Ebrahimi {
741*6467f958SSadaf Ebrahimi #if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))
742*6467f958SSadaf Ebrahimi // VS2005 might use x87 for straight add/sub, and we can't
743*6467f958SSadaf Ebrahimi // turn that off
744*6467f958SSadaf Ebrahimi case 0:
745*6467f958SSadaf Ebrahimi correct[0][i] = sse_add(buf3[i],buf4[i]); break;
746*6467f958SSadaf Ebrahimi case 1:
747*6467f958SSadaf Ebrahimi correct[1][i] = sse_sub(buf3[i],buf3[i]); break;
748*6467f958SSadaf Ebrahimi case 2:
749*6467f958SSadaf Ebrahimi correct[2][i] = sse_add(buf4[i],buf3[i]); break;
750*6467f958SSadaf Ebrahimi case 3:
751*6467f958SSadaf Ebrahimi correct[3][i] = sse_sub(buf3[i],buf3[i]); break;
752*6467f958SSadaf Ebrahimi case 4:
753*6467f958SSadaf Ebrahimi correct[4][i] = -sse_add(buf3[i],buf4[i]); break;
754*6467f958SSadaf Ebrahimi case 5:
755*6467f958SSadaf Ebrahimi correct[5][i] = -sse_sub(buf3[i],buf3[i]); break;
756*6467f958SSadaf Ebrahimi case 6:
757*6467f958SSadaf Ebrahimi correct[6][i] = -sse_add(buf4[i],buf3[i]); break;
758*6467f958SSadaf Ebrahimi case 7:
759*6467f958SSadaf Ebrahimi correct[7][i] = -sse_sub(buf3[i],buf3[i]); break;
760*6467f958SSadaf Ebrahimi #else
761*6467f958SSadaf Ebrahimi case 0:
762*6467f958SSadaf Ebrahimi correct[0][i] = buf3[i] + buf4[i]; break;
763*6467f958SSadaf Ebrahimi case 1:
764*6467f958SSadaf Ebrahimi correct[1][i] = buf3[i] - buf3[i]; break;
765*6467f958SSadaf Ebrahimi case 2:
766*6467f958SSadaf Ebrahimi correct[2][i] = buf4[i] + buf3[i]; break;
767*6467f958SSadaf Ebrahimi case 3:
768*6467f958SSadaf Ebrahimi correct[3][i] = buf3[i] - buf3[i]; break;
769*6467f958SSadaf Ebrahimi case 4:
770*6467f958SSadaf Ebrahimi correct[4][i] = -(buf3[i] + buf4[i]); break;
771*6467f958SSadaf Ebrahimi case 5:
772*6467f958SSadaf Ebrahimi correct[5][i] = -(buf3[i] - buf3[i]); break;
773*6467f958SSadaf Ebrahimi case 6:
774*6467f958SSadaf Ebrahimi correct[6][i] = -(buf4[i] + buf3[i]); break;
775*6467f958SSadaf Ebrahimi case 7:
776*6467f958SSadaf Ebrahimi correct[7][i] = -(buf3[i] - buf3[i]); break;
777*6467f958SSadaf Ebrahimi #endif
778*6467f958SSadaf Ebrahimi }
779*6467f958SSadaf Ebrahimi // Further skip test inputs if the device doesn support infinities AND NaNs
780*6467f958SSadaf Ebrahimi // resulting sum overflows
781*6467f958SSadaf Ebrahimi skipTest[j][i] = (bufSkip[i] ||
782*6467f958SSadaf Ebrahimi (gSkipNanInf && (FE_OVERFLOW == (FE_OVERFLOW & fetestexcept(FE_OVERFLOW)))));
783*6467f958SSadaf Ebrahimi
784*6467f958SSadaf Ebrahimi #if defined(__PPC__)
785*6467f958SSadaf Ebrahimi // Since the current Power processors don't emulate flush to zero in HW,
786*6467f958SSadaf Ebrahimi // it must be emulated in SW instead.
787*6467f958SSadaf Ebrahimi if (gForceFTZ)
788*6467f958SSadaf Ebrahimi {
789*6467f958SSadaf Ebrahimi if ((fabsf(correct[j][i]) < FLT_MIN) && (correct[j][i] != 0.0f))
790*6467f958SSadaf Ebrahimi correct[j][i] = copysignf(0.0f, correct[j][i]);
791*6467f958SSadaf Ebrahimi }
792*6467f958SSadaf Ebrahimi #endif
793*6467f958SSadaf Ebrahimi }
794*6467f958SSadaf Ebrahimi }
795*6467f958SSadaf Ebrahimi if( gHasDouble )
796*6467f958SSadaf Ebrahimi {
797*6467f958SSadaf Ebrahimi // Spec requires correct non-flushed results
798*6467f958SSadaf Ebrahimi // for doubles. We disable FTZ if this is default on
799*6467f958SSadaf Ebrahimi // the platform (like ARM) for reference result computation
800*6467f958SSadaf Ebrahimi // It is no-op if platform default is not FTZ (e.g. x86)
801*6467f958SSadaf Ebrahimi FPU_mode_type oldMode;
802*6467f958SSadaf Ebrahimi DisableFTZ( &oldMode );
803*6467f958SSadaf Ebrahimi
804*6467f958SSadaf Ebrahimi buf3_double = (double *)malloc( BUFFER_SIZE );
805*6467f958SSadaf Ebrahimi buf4_double = (double *)malloc( BUFFER_SIZE );
806*6467f958SSadaf Ebrahimi buf5_double = (double *)malloc( BUFFER_SIZE );
807*6467f958SSadaf Ebrahimi buf6_double = (double *)malloc( BUFFER_SIZE );
808*6467f958SSadaf Ebrahimi if( NULL == buf3_double || NULL == buf4_double || NULL == buf5_double || NULL == buf6_double )
809*6467f958SSadaf Ebrahimi {
810*6467f958SSadaf Ebrahimi vlog_error( "Out of memory initializing DP buffers\n" );
811*6467f958SSadaf Ebrahimi return TEST_FAIL;
812*6467f958SSadaf Ebrahimi }
813*6467f958SSadaf Ebrahimi for( i = 0; i < sizeof( correct_double ) / sizeof( correct_double[0] ); i++ )
814*6467f958SSadaf Ebrahimi {
815*6467f958SSadaf Ebrahimi correct_double[i] = (double *)malloc( BUFFER_SIZE );
816*6467f958SSadaf Ebrahimi if( NULL == correct_double[i] )
817*6467f958SSadaf Ebrahimi {
818*6467f958SSadaf Ebrahimi vlog_error( "Out of memory initializing DP buffers 2\n" );
819*6467f958SSadaf Ebrahimi return TEST_FAIL;
820*6467f958SSadaf Ebrahimi }
821*6467f958SSadaf Ebrahimi }
822*6467f958SSadaf Ebrahimi
823*6467f958SSadaf Ebrahimi
824*6467f958SSadaf Ebrahimi double *f = (double*) buf1;
825*6467f958SSadaf Ebrahimi double *f2 = (double*) buf2;
826*6467f958SSadaf Ebrahimi double *f3 = (double*) buf3_double;
827*6467f958SSadaf Ebrahimi double *f4 = (double*) buf4_double;
828*6467f958SSadaf Ebrahimi for( i = 0; i < BUFFER_SIZE / sizeof(double); i++ )
829*6467f958SSadaf Ebrahimi {
830*6467f958SSadaf Ebrahimi double q = f[i];
831*6467f958SSadaf Ebrahimi double q2 = f2[i];
832*6467f958SSadaf Ebrahimi #if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))
833*6467f958SSadaf Ebrahimi // VS2005 might use x87 for straight multiplies, and we can't
834*6467f958SSadaf Ebrahimi // turn that off
835*6467f958SSadaf Ebrahimi f3[i] = sse_mul_sd(q, q2);
836*6467f958SSadaf Ebrahimi f4[i] = sse_mul_sd(-q, q2);
837*6467f958SSadaf Ebrahimi #else
838*6467f958SSadaf Ebrahimi f3[i] = q * q2;
839*6467f958SSadaf Ebrahimi f4[i] = -q * q2;
840*6467f958SSadaf Ebrahimi #endif
841*6467f958SSadaf Ebrahimi }
842*6467f958SSadaf Ebrahimi
843*6467f958SSadaf Ebrahimi if( (error = clEnqueueWriteBuffer(gQueue, bufC_double, CL_FALSE, 0, BUFFER_SIZE, buf3_double, 0, NULL, NULL) ))
844*6467f958SSadaf Ebrahimi {
845*6467f958SSadaf Ebrahimi vlog_error( "Failure %d at clEnqueueWriteBuffer3\n", error );
846*6467f958SSadaf Ebrahimi return TEST_FAIL;
847*6467f958SSadaf Ebrahimi }
848*6467f958SSadaf Ebrahimi if( (error = clEnqueueWriteBuffer(gQueue, bufD_double, CL_FALSE, 0, BUFFER_SIZE, buf4_double, 0, NULL, NULL) ))
849*6467f958SSadaf Ebrahimi {
850*6467f958SSadaf Ebrahimi vlog_error( "Failure %d at clEnqueueWriteBuffer4\n", error );
851*6467f958SSadaf Ebrahimi return TEST_FAIL;
852*6467f958SSadaf Ebrahimi }
853*6467f958SSadaf Ebrahimi
854*6467f958SSadaf Ebrahimi // Fill the buffers with NaN
855*6467f958SSadaf Ebrahimi double *f5 = (double*) buf5_double;
856*6467f958SSadaf Ebrahimi double nan_val = nanf("");
857*6467f958SSadaf Ebrahimi for( i = 0; i < BUFFER_SIZE / sizeof( double ); i++ )
858*6467f958SSadaf Ebrahimi f5[i] = nan_val;
859*6467f958SSadaf Ebrahimi
860*6467f958SSadaf Ebrahimi // calculate reference results
861*6467f958SSadaf Ebrahimi for( i = 0; i < BUFFER_SIZE / sizeof( double ); i++ )
862*6467f958SSadaf Ebrahimi {
863*6467f958SSadaf Ebrahimi #if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))
864*6467f958SSadaf Ebrahimi // VS2005 might use x87 for straight add/sub, and we can't
865*6467f958SSadaf Ebrahimi // turn that off
866*6467f958SSadaf Ebrahimi correct_double[0][i] = sse_add_sd(buf3_double[i],buf4_double[i]);
867*6467f958SSadaf Ebrahimi correct_double[1][i] = sse_sub_sd(buf3_double[i],buf3_double[i]);
868*6467f958SSadaf Ebrahimi correct_double[2][i] = sse_add_sd(buf4_double[i],buf3_double[i]);
869*6467f958SSadaf Ebrahimi correct_double[3][i] = sse_sub_sd(buf3_double[i],buf3_double[i]);
870*6467f958SSadaf Ebrahimi correct_double[4][i] = -sse_add_sd(buf3_double[i],buf4_double[i]);
871*6467f958SSadaf Ebrahimi correct_double[5][i] = -sse_sub_sd(buf3_double[i],buf3_double[i]);
872*6467f958SSadaf Ebrahimi correct_double[6][i] = -sse_add_sd(buf4_double[i],buf3_double[i]);
873*6467f958SSadaf Ebrahimi correct_double[7][i] = -sse_sub_sd(buf3_double[i],buf3_double[i]);
874*6467f958SSadaf Ebrahimi #else
875*6467f958SSadaf Ebrahimi correct_double[0][i] = buf3_double[i] + buf4_double[i];
876*6467f958SSadaf Ebrahimi correct_double[1][i] = buf3_double[i] - buf3_double[i];
877*6467f958SSadaf Ebrahimi correct_double[2][i] = buf4_double[i] + buf3_double[i];
878*6467f958SSadaf Ebrahimi correct_double[3][i] = buf3_double[i] - buf3_double[i];
879*6467f958SSadaf Ebrahimi correct_double[4][i] = -(buf3_double[i] + buf4_double[i]);
880*6467f958SSadaf Ebrahimi correct_double[5][i] = -(buf3_double[i] - buf3_double[i]);
881*6467f958SSadaf Ebrahimi correct_double[6][i] = -(buf4_double[i] + buf3_double[i]);
882*6467f958SSadaf Ebrahimi correct_double[7][i] = -(buf3_double[i] - buf3_double[i]);
883*6467f958SSadaf Ebrahimi #endif
884*6467f958SSadaf Ebrahimi }
885*6467f958SSadaf Ebrahimi
886*6467f958SSadaf Ebrahimi // Restore previous FP state since we modified it for
887*6467f958SSadaf Ebrahimi // reference result computation (see DisableFTZ call above)
888*6467f958SSadaf Ebrahimi RestoreFPState(&oldMode);
889*6467f958SSadaf Ebrahimi }
890*6467f958SSadaf Ebrahimi }
891*6467f958SSadaf Ebrahimi
892*6467f958SSadaf Ebrahimi char c[1000];
893*6467f958SSadaf Ebrahimi static const char *no_yes[] = { "NO", "YES" };
894*6467f958SSadaf Ebrahimi vlog( "\nCompute Device info:\n" );
895*6467f958SSadaf Ebrahimi clGetDeviceInfo( device, CL_DEVICE_NAME, sizeof(c), (void *)&c, NULL);
896*6467f958SSadaf Ebrahimi vlog( "\tDevice Name: %s\n", c );
897*6467f958SSadaf Ebrahimi clGetDeviceInfo( device, CL_DEVICE_VENDOR, sizeof(c), (void *)&c, NULL);
898*6467f958SSadaf Ebrahimi vlog( "\tVendor: %s\n", c );
899*6467f958SSadaf Ebrahimi clGetDeviceInfo( device, CL_DEVICE_VERSION, sizeof(c), (void *)&c, NULL);
900*6467f958SSadaf Ebrahimi vlog( "\tDevice Version: %s\n", c );
901*6467f958SSadaf Ebrahimi clGetDeviceInfo( device, CL_DEVICE_OPENCL_C_VERSION, sizeof(c), &c, NULL);
902*6467f958SSadaf Ebrahimi vlog( "\tCL C Version: %s\n", c );
903*6467f958SSadaf Ebrahimi clGetDeviceInfo( device, CL_DRIVER_VERSION, sizeof(c), (void *)&c, NULL);
904*6467f958SSadaf Ebrahimi vlog( "\tDriver Version: %s\n", c );
905*6467f958SSadaf Ebrahimi vlog( "\tSubnormal values supported? %s\n", no_yes[0 != (CL_FP_DENORM & floatCapabilities)] );
906*6467f958SSadaf Ebrahimi vlog( "\tTesting with FTZ mode ON? %s\n", no_yes[0 != gForceFTZ] );
907*6467f958SSadaf Ebrahimi vlog( "\tTesting Doubles? %s\n", no_yes[0 != gHasDouble] );
908*6467f958SSadaf Ebrahimi vlog( "\tRandom Number seed: 0x%8.8x\n", gSeed );
909*6467f958SSadaf Ebrahimi vlog( "\n\n" );
910*6467f958SSadaf Ebrahimi
911*6467f958SSadaf Ebrahimi return TEST_PASS;
912*6467f958SSadaf Ebrahimi }
913*6467f958SSadaf Ebrahimi
ReleaseCL(void)914*6467f958SSadaf Ebrahimi static void ReleaseCL( void )
915*6467f958SSadaf Ebrahimi {
916*6467f958SSadaf Ebrahimi clReleaseMemObject(bufA);
917*6467f958SSadaf Ebrahimi clReleaseMemObject(bufB);
918*6467f958SSadaf Ebrahimi clReleaseMemObject(bufC);
919*6467f958SSadaf Ebrahimi clReleaseMemObject(bufD);
920*6467f958SSadaf Ebrahimi clReleaseMemObject(bufE);
921*6467f958SSadaf Ebrahimi clReleaseProgram(gProgram[0]);
922*6467f958SSadaf Ebrahimi clReleaseProgram(gProgram[1]);
923*6467f958SSadaf Ebrahimi clReleaseProgram(gProgram[2]);
924*6467f958SSadaf Ebrahimi clReleaseProgram(gProgram[3]);
925*6467f958SSadaf Ebrahimi clReleaseProgram(gProgram[4]);
926*6467f958SSadaf Ebrahimi if( gHasDouble )
927*6467f958SSadaf Ebrahimi {
928*6467f958SSadaf Ebrahimi clReleaseMemObject(bufC_double);
929*6467f958SSadaf Ebrahimi clReleaseMemObject(bufD_double);
930*6467f958SSadaf Ebrahimi clReleaseProgram(gProgram_double[0]);
931*6467f958SSadaf Ebrahimi clReleaseProgram(gProgram_double[1]);
932*6467f958SSadaf Ebrahimi clReleaseProgram(gProgram_double[2]);
933*6467f958SSadaf Ebrahimi clReleaseProgram(gProgram_double[3]);
934*6467f958SSadaf Ebrahimi clReleaseProgram(gProgram_double[4]);
935*6467f958SSadaf Ebrahimi }
936*6467f958SSadaf Ebrahimi clReleaseCommandQueue(gQueue);
937*6467f958SSadaf Ebrahimi clReleaseContext(gContext);
938*6467f958SSadaf Ebrahimi }
939*6467f958SSadaf Ebrahimi
940*6467f958SSadaf Ebrahimi
RunTest(int testNumber)941*6467f958SSadaf Ebrahimi static int RunTest( int testNumber )
942*6467f958SSadaf Ebrahimi {
943*6467f958SSadaf Ebrahimi size_t i;
944*6467f958SSadaf Ebrahimi int error = 0;
945*6467f958SSadaf Ebrahimi cl_mem args[4];
946*6467f958SSadaf Ebrahimi float *c;
947*6467f958SSadaf Ebrahimi const char *kernelName[] = { "kernel1", "kernel2", "kernel3", "kernel4",
948*6467f958SSadaf Ebrahimi "kernel5", "kernel6", "kernel7", "kernel8" };
949*6467f958SSadaf Ebrahimi switch( testNumber )
950*6467f958SSadaf Ebrahimi {
951*6467f958SSadaf Ebrahimi case 0: args[0] = bufE; args[1] = bufA; args[2] = bufB; args[3] = bufD; c = buf4; break; // a * b + c
952*6467f958SSadaf Ebrahimi case 1: args[0] = bufE; args[1] = bufA; args[2] = bufB; args[3] = bufC; c = buf3; break;
953*6467f958SSadaf Ebrahimi case 2: args[0] = bufE; args[1] = bufA; args[2] = bufB; args[3] = bufD; c = buf4; break;
954*6467f958SSadaf Ebrahimi case 3: args[0] = bufE; args[1] = bufA; args[2] = bufB; args[3] = bufC; c = buf3; break;
955*6467f958SSadaf Ebrahimi case 4: args[0] = bufE; args[1] = bufA; args[2] = bufB; args[3] = bufD; c = buf4; break;
956*6467f958SSadaf Ebrahimi case 5: args[0] = bufE; args[1] = bufA; args[2] = bufB; args[3] = bufC; c = buf3; break;
957*6467f958SSadaf Ebrahimi case 6: args[0] = bufE; args[1] = bufA; args[2] = bufB; args[3] = bufD; c = buf4; break;
958*6467f958SSadaf Ebrahimi case 7: args[0] = bufE; args[1] = bufA; args[2] = bufB; args[3] = bufC; c = buf3; break;
959*6467f958SSadaf Ebrahimi default:
960*6467f958SSadaf Ebrahimi vlog_error( "Unknown test case %d passed to RunTest\n", testNumber );
961*6467f958SSadaf Ebrahimi return -1;
962*6467f958SSadaf Ebrahimi }
963*6467f958SSadaf Ebrahimi
964*6467f958SSadaf Ebrahimi
965*6467f958SSadaf Ebrahimi int vectorSize;
966*6467f958SSadaf Ebrahimi for( vectorSize = 0; vectorSize < 5; vectorSize++ )
967*6467f958SSadaf Ebrahimi {
968*6467f958SSadaf Ebrahimi cl_kernel k = clCreateKernel( gProgram[ vectorSize ], kernelName[ testNumber ], &error );
969*6467f958SSadaf Ebrahimi if( NULL == k || error )
970*6467f958SSadaf Ebrahimi {
971*6467f958SSadaf Ebrahimi vlog_error( "%d) Unable to find kernel \"%s\" for vector size: %d\n", error, kernelName[ testNumber ], 1 << vectorSize );
972*6467f958SSadaf Ebrahimi return -2;
973*6467f958SSadaf Ebrahimi }
974*6467f958SSadaf Ebrahimi
975*6467f958SSadaf Ebrahimi // set the kernel args
976*6467f958SSadaf Ebrahimi for( i = 0; i < sizeof(args ) / sizeof( args[0]); i++ )
977*6467f958SSadaf Ebrahimi if( (error = clSetKernelArg(k, i, sizeof( cl_mem ), args + i) ))
978*6467f958SSadaf Ebrahimi {
979*6467f958SSadaf Ebrahimi vlog_error( "Error %d setting kernel arg # %ld\n", error, i );
980*6467f958SSadaf Ebrahimi return error;
981*6467f958SSadaf Ebrahimi }
982*6467f958SSadaf Ebrahimi
983*6467f958SSadaf Ebrahimi // write NaNs to the result array
984*6467f958SSadaf Ebrahimi if( (error = clEnqueueWriteBuffer(gQueue, bufE, CL_TRUE, 0, BUFFER_SIZE, buf5, 0, NULL, NULL) ))
985*6467f958SSadaf Ebrahimi {
986*6467f958SSadaf Ebrahimi vlog_error( "Failure %d at clWriteArray %d\n", error, testNumber );
987*6467f958SSadaf Ebrahimi return error;
988*6467f958SSadaf Ebrahimi }
989*6467f958SSadaf Ebrahimi
990*6467f958SSadaf Ebrahimi // execute the kernel
991*6467f958SSadaf Ebrahimi size_t gDim[3] = { BUFFER_SIZE / (sizeof( cl_float ) * (1<<vectorSize)), 0, 0 };
992*6467f958SSadaf Ebrahimi if( ((error = clEnqueueNDRangeKernel(gQueue, k, 1, NULL, gDim, NULL, 0, NULL, NULL) )))
993*6467f958SSadaf Ebrahimi {
994*6467f958SSadaf Ebrahimi vlog_error( "Got Error # %d trying to execture kernel\n", error );
995*6467f958SSadaf Ebrahimi return error;
996*6467f958SSadaf Ebrahimi }
997*6467f958SSadaf Ebrahimi
998*6467f958SSadaf Ebrahimi // read the data back
999*6467f958SSadaf Ebrahimi if( (error = clEnqueueReadBuffer(gQueue, bufE, CL_TRUE, 0, BUFFER_SIZE, buf6, 0, NULL, NULL ) ))
1000*6467f958SSadaf Ebrahimi {
1001*6467f958SSadaf Ebrahimi vlog_error( "Failure %d at clReadArray %d\n", error, testNumber );
1002*6467f958SSadaf Ebrahimi return error;
1003*6467f958SSadaf Ebrahimi }
1004*6467f958SSadaf Ebrahimi
1005*6467f958SSadaf Ebrahimi // verify results
1006*6467f958SSadaf Ebrahimi float *test = (float*) buf6;
1007*6467f958SSadaf Ebrahimi float *a = (float*) buf1;
1008*6467f958SSadaf Ebrahimi float *b = (float*) buf2;
1009*6467f958SSadaf Ebrahimi for( i = 0; i < BUFFER_SIZE / sizeof( float ); i++ )
1010*6467f958SSadaf Ebrahimi {
1011*6467f958SSadaf Ebrahimi if( isnan(test[i]) && isnan(correct[testNumber][i] ) )
1012*6467f958SSadaf Ebrahimi continue;
1013*6467f958SSadaf Ebrahimi
1014*6467f958SSadaf Ebrahimi if( skipTest[testNumber][i] )
1015*6467f958SSadaf Ebrahimi continue;
1016*6467f958SSadaf Ebrahimi
1017*6467f958SSadaf Ebrahimi // sign of zero must be correct
1018*6467f958SSadaf Ebrahimi if(( ((uint32_t*) test)[i] != ((uint32_t*) correct[testNumber])[i] ) &&
1019*6467f958SSadaf Ebrahimi !(gIgnoreZeroSign && (test[i] == 0.0f) && (correct[testNumber][i] == 0.0f)) )
1020*6467f958SSadaf Ebrahimi {
1021*6467f958SSadaf Ebrahimi switch( testNumber )
1022*6467f958SSadaf Ebrahimi {
1023*6467f958SSadaf Ebrahimi // Zeros for these should be positive
1024*6467f958SSadaf Ebrahimi case 0: vlog_error( "%ld) Error for %s %s: %a * %a + %a = *%a vs. %a\n", i, sizeNames[ vectorSize], kernelName[ testNumber ],
1025*6467f958SSadaf Ebrahimi a[i], b[i], c[i], correct[testNumber][i], test[i] ); clReleaseKernel(k); return -1;
1026*6467f958SSadaf Ebrahimi case 1: vlog_error( "%ld) Error for %s %s: %a * %a - %a = *%a vs. %a\n", i, sizeNames[ vectorSize], kernelName[ testNumber ],
1027*6467f958SSadaf Ebrahimi a[i], b[i], c[i], correct[testNumber][i], test[i] ); clReleaseKernel(k); return -1;
1028*6467f958SSadaf Ebrahimi case 2: vlog_error( "%ld) Error for %s %s: %a + %a * %a = *%a vs. %a\n", i, sizeNames[ vectorSize], kernelName[ testNumber ],
1029*6467f958SSadaf Ebrahimi c[i], a[i], b[i], correct[testNumber][i], test[i] ); clReleaseKernel(k); return -1;
1030*6467f958SSadaf Ebrahimi case 3: vlog_error( "%ld) Error for %s %s: %a - %a * %a = *%a vs. %a\n", i, sizeNames[ vectorSize], kernelName[ testNumber ],
1031*6467f958SSadaf Ebrahimi c[i], a[i], b[i], correct[testNumber][i], test[i] ); clReleaseKernel(k); return -1;
1032*6467f958SSadaf Ebrahimi
1033*6467f958SSadaf Ebrahimi // Zeros for these should be negative
1034*6467f958SSadaf Ebrahimi case 4: vlog_error( "%ld) Error for %s %s: -(%a * %a + %a) = *%a vs. %a\n", i, sizeNames[ vectorSize], kernelName[ testNumber ],
1035*6467f958SSadaf Ebrahimi a[i], b[i], c[i], correct[testNumber][i], test[i] ); clReleaseKernel(k); return -1;
1036*6467f958SSadaf Ebrahimi case 5: vlog_error( "%ld) Error for %s %s: -(%a * %a - %a) = *%a vs. %a\n", i, sizeNames[ vectorSize], kernelName[ testNumber ],
1037*6467f958SSadaf Ebrahimi a[i], b[i], c[i], correct[testNumber][i], test[i] ); clReleaseKernel(k); return -1;
1038*6467f958SSadaf Ebrahimi case 6: vlog_error( "%ld) Error for %s %s: -(%a + %a * %a) = *%a vs. %a\n", i, sizeNames[ vectorSize], kernelName[ testNumber ],
1039*6467f958SSadaf Ebrahimi c[i], a[i], b[i], correct[testNumber][i], test[i] ); clReleaseKernel(k); return -1;
1040*6467f958SSadaf Ebrahimi case 7: vlog_error( "%ld) Error for %s %s: -(%a - %a * %a) = *%a vs. %a\n", i, sizeNames[ vectorSize], kernelName[ testNumber ],
1041*6467f958SSadaf Ebrahimi c[i], a[i], b[i], correct[testNumber][i], test[i] ); clReleaseKernel(k); return -1;
1042*6467f958SSadaf Ebrahimi default:
1043*6467f958SSadaf Ebrahimi vlog_error( "error: Unknown test number!\n" );
1044*6467f958SSadaf Ebrahimi clReleaseKernel(k);
1045*6467f958SSadaf Ebrahimi return -2;
1046*6467f958SSadaf Ebrahimi }
1047*6467f958SSadaf Ebrahimi }
1048*6467f958SSadaf Ebrahimi }
1049*6467f958SSadaf Ebrahimi
1050*6467f958SSadaf Ebrahimi clReleaseKernel(k);
1051*6467f958SSadaf Ebrahimi }
1052*6467f958SSadaf Ebrahimi
1053*6467f958SSadaf Ebrahimi return error;
1054*6467f958SSadaf Ebrahimi }
1055*6467f958SSadaf Ebrahimi
RunTest_Double(int testNumber)1056*6467f958SSadaf Ebrahimi static int RunTest_Double( int testNumber )
1057*6467f958SSadaf Ebrahimi {
1058*6467f958SSadaf Ebrahimi if( !gHasDouble )
1059*6467f958SSadaf Ebrahimi {
1060*6467f958SSadaf Ebrahimi vlog("Double is not supported, test not run.\n");
1061*6467f958SSadaf Ebrahimi return 0;
1062*6467f958SSadaf Ebrahimi }
1063*6467f958SSadaf Ebrahimi
1064*6467f958SSadaf Ebrahimi size_t i;
1065*6467f958SSadaf Ebrahimi int error = 0;
1066*6467f958SSadaf Ebrahimi cl_mem args[4];
1067*6467f958SSadaf Ebrahimi double *c;
1068*6467f958SSadaf Ebrahimi const char *kernelName[] = { "kernel1", "kernel2", "kernel3", "kernel4",
1069*6467f958SSadaf Ebrahimi "kernel5", "kernel6", "kernel7", "kernel8" };
1070*6467f958SSadaf Ebrahimi
1071*6467f958SSadaf Ebrahimi switch( testNumber )
1072*6467f958SSadaf Ebrahimi {
1073*6467f958SSadaf Ebrahimi case 0: args[0] = bufE; args[1] = bufA; args[2] = bufB; args[3] = bufD_double; c = buf4_double; break; // a * b + c
1074*6467f958SSadaf Ebrahimi case 1: args[0] = bufE; args[1] = bufA; args[2] = bufB; args[3] = bufC_double; c = buf3_double; break;
1075*6467f958SSadaf Ebrahimi case 2: args[0] = bufE; args[1] = bufA; args[2] = bufB; args[3] = bufD_double; c = buf4_double; break;
1076*6467f958SSadaf Ebrahimi case 3: args[0] = bufE; args[1] = bufA; args[2] = bufB; args[3] = bufC_double; c = buf3_double; break;
1077*6467f958SSadaf Ebrahimi case 4: args[0] = bufE; args[1] = bufA; args[2] = bufB; args[3] = bufD_double; c = buf4_double; break;
1078*6467f958SSadaf Ebrahimi case 5: args[0] = bufE; args[1] = bufA; args[2] = bufB; args[3] = bufC_double; c = buf3_double; break;
1079*6467f958SSadaf Ebrahimi case 6: args[0] = bufE; args[1] = bufA; args[2] = bufB; args[3] = bufD_double; c = buf4_double; break;
1080*6467f958SSadaf Ebrahimi case 7: args[0] = bufE; args[1] = bufA; args[2] = bufB; args[3] = bufC_double; c = buf3_double; break;
1081*6467f958SSadaf Ebrahimi default:
1082*6467f958SSadaf Ebrahimi vlog_error( "Unknown test case %d passed to RunTest\n", testNumber );
1083*6467f958SSadaf Ebrahimi return -1;
1084*6467f958SSadaf Ebrahimi }
1085*6467f958SSadaf Ebrahimi
1086*6467f958SSadaf Ebrahimi int vectorSize;
1087*6467f958SSadaf Ebrahimi for( vectorSize = 0; vectorSize < 5; vectorSize++ )
1088*6467f958SSadaf Ebrahimi {
1089*6467f958SSadaf Ebrahimi cl_kernel k = clCreateKernel( gProgram_double[ vectorSize ], kernelName[ testNumber ], &error );
1090*6467f958SSadaf Ebrahimi if( NULL == k || error )
1091*6467f958SSadaf Ebrahimi {
1092*6467f958SSadaf Ebrahimi vlog_error( "%d) Unable to find kernel \"%s\" for vector size: %d\n", error, kernelName[ testNumber ], 1 << vectorSize );
1093*6467f958SSadaf Ebrahimi return -2;
1094*6467f958SSadaf Ebrahimi }
1095*6467f958SSadaf Ebrahimi
1096*6467f958SSadaf Ebrahimi // set the kernel args
1097*6467f958SSadaf Ebrahimi for( i = 0; i < sizeof(args ) / sizeof( args[0]); i++ )
1098*6467f958SSadaf Ebrahimi if( (error = clSetKernelArg(k, i, sizeof( cl_mem ), args + i) ))
1099*6467f958SSadaf Ebrahimi {
1100*6467f958SSadaf Ebrahimi vlog_error( "Error %d setting kernel arg # %ld\n", error, i );
1101*6467f958SSadaf Ebrahimi return error;
1102*6467f958SSadaf Ebrahimi }
1103*6467f958SSadaf Ebrahimi
1104*6467f958SSadaf Ebrahimi // write NaNs to the result array
1105*6467f958SSadaf Ebrahimi if( (error = clEnqueueWriteBuffer(gQueue, bufE, CL_FALSE, 0, BUFFER_SIZE, buf5_double, 0, NULL, NULL) ))
1106*6467f958SSadaf Ebrahimi {
1107*6467f958SSadaf Ebrahimi vlog_error( "Failure %d at clWriteArray %d\n", error, testNumber );
1108*6467f958SSadaf Ebrahimi return error;
1109*6467f958SSadaf Ebrahimi }
1110*6467f958SSadaf Ebrahimi
1111*6467f958SSadaf Ebrahimi // execute the kernel
1112*6467f958SSadaf Ebrahimi size_t gDim[3] = { BUFFER_SIZE / (sizeof( cl_double ) * (1<<vectorSize)), 0, 0 };
1113*6467f958SSadaf Ebrahimi if( ((error = clEnqueueNDRangeKernel(gQueue, k, 1, NULL, gDim, NULL, 0, NULL, NULL) )))
1114*6467f958SSadaf Ebrahimi {
1115*6467f958SSadaf Ebrahimi vlog_error( "Got Error # %d trying to execture kernel\n", error );
1116*6467f958SSadaf Ebrahimi return error;
1117*6467f958SSadaf Ebrahimi }
1118*6467f958SSadaf Ebrahimi
1119*6467f958SSadaf Ebrahimi // read the data back
1120*6467f958SSadaf Ebrahimi if( (error = clEnqueueReadBuffer(gQueue, bufE, CL_TRUE, 0, BUFFER_SIZE, buf6_double, 0, NULL, NULL ) ))
1121*6467f958SSadaf Ebrahimi {
1122*6467f958SSadaf Ebrahimi vlog_error( "Failure %d at clReadArray %d\n", error, testNumber );
1123*6467f958SSadaf Ebrahimi return error;
1124*6467f958SSadaf Ebrahimi }
1125*6467f958SSadaf Ebrahimi
1126*6467f958SSadaf Ebrahimi // verify results
1127*6467f958SSadaf Ebrahimi double *test = (double*) buf6_double;
1128*6467f958SSadaf Ebrahimi double *a = (double*) buf1;
1129*6467f958SSadaf Ebrahimi double *b = (double*) buf2;
1130*6467f958SSadaf Ebrahimi for( i = 0; i < BUFFER_SIZE / sizeof( double ); i++ )
1131*6467f958SSadaf Ebrahimi {
1132*6467f958SSadaf Ebrahimi if( isnan(test[i]) && isnan(correct_double[testNumber][i] ) )
1133*6467f958SSadaf Ebrahimi continue;
1134*6467f958SSadaf Ebrahimi
1135*6467f958SSadaf Ebrahimi // sign of zero must be correct
1136*6467f958SSadaf Ebrahimi if( ((uint64_t*) test)[i] != ((uint64_t*) correct_double[testNumber])[i] )
1137*6467f958SSadaf Ebrahimi {
1138*6467f958SSadaf Ebrahimi switch( testNumber )
1139*6467f958SSadaf Ebrahimi {
1140*6467f958SSadaf Ebrahimi // Zeros for these should be positive
1141*6467f958SSadaf Ebrahimi case 0: vlog_error( "%ld) Error for %s %s: %a * %a + %a = *%a vs. %a\n", i, sizeNames_double[ vectorSize], kernelName[ testNumber ],
1142*6467f958SSadaf Ebrahimi a[i], b[i], c[i], correct[testNumber][i], test[i] ); return -1;
1143*6467f958SSadaf Ebrahimi case 1: vlog_error( "%ld) Error for %s %s: %a * %a - %a = *%a vs. %a\n", i, sizeNames_double[ vectorSize], kernelName[ testNumber ],
1144*6467f958SSadaf Ebrahimi a[i], b[i], c[i], correct[testNumber][i], test[i] ); return -1;
1145*6467f958SSadaf Ebrahimi case 2: vlog_error( "%ld) Error for %s %s: %a + %a * %a = *%a vs. %a\n", i, sizeNames_double[ vectorSize], kernelName[ testNumber ],
1146*6467f958SSadaf Ebrahimi c[i], a[i], b[i], correct[testNumber][i], test[i] ); return -1;
1147*6467f958SSadaf Ebrahimi case 3: vlog_error( "%ld) Error for %s %s: %a - %a * %a = *%a vs. %a\n", i, sizeNames_double[ vectorSize], kernelName[ testNumber ],
1148*6467f958SSadaf Ebrahimi c[i], a[i], b[i], correct[testNumber][i], test[i] ); return -1;
1149*6467f958SSadaf Ebrahimi
1150*6467f958SSadaf Ebrahimi // Zeros for these should be negative
1151*6467f958SSadaf Ebrahimi case 4: vlog_error( "%ld) Error for %s %s: -(%a * %a + %a) = *%a vs. %a\n", i, sizeNames_double[ vectorSize], kernelName[ testNumber ],
1152*6467f958SSadaf Ebrahimi a[i], b[i], c[i], correct[testNumber][i], test[i] ); return -1;
1153*6467f958SSadaf Ebrahimi case 5: vlog_error( "%ld) Error for %s %s: -(%a * %a - %a) = *%a vs. %a\n", i, sizeNames_double[ vectorSize], kernelName[ testNumber ],
1154*6467f958SSadaf Ebrahimi a[i], b[i], c[i], correct[testNumber][i], test[i] ); return -1;
1155*6467f958SSadaf Ebrahimi case 6: vlog_error( "%ld) Error for %s %s: -(%a + %a * %a) = *%a vs. %a\n", i, sizeNames_double[ vectorSize], kernelName[ testNumber ],
1156*6467f958SSadaf Ebrahimi c[i], a[i], b[i], correct[testNumber][i], test[i] ); return -1;
1157*6467f958SSadaf Ebrahimi case 7: vlog_error( "%ld) Error for %s %s: -(%a - %a * %a) = *%a vs. %a\n", i, sizeNames_double[ vectorSize], kernelName[ testNumber ],
1158*6467f958SSadaf Ebrahimi c[i], a[i], b[i], correct[testNumber][i], test[i] ); return -1;
1159*6467f958SSadaf Ebrahimi default:
1160*6467f958SSadaf Ebrahimi vlog_error( "error: Unknown test number!\n" );
1161*6467f958SSadaf Ebrahimi return -2;
1162*6467f958SSadaf Ebrahimi }
1163*6467f958SSadaf Ebrahimi }
1164*6467f958SSadaf Ebrahimi }
1165*6467f958SSadaf Ebrahimi
1166*6467f958SSadaf Ebrahimi clReleaseKernel(k);
1167*6467f958SSadaf Ebrahimi }
1168*6467f958SSadaf Ebrahimi
1169*6467f958SSadaf Ebrahimi return error;
1170*6467f958SSadaf Ebrahimi }
1171