1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/testHarness.h"
17 #include "harness/compat.h"
18 #include "harness/ThreadPool.h"
19
20 #if defined(__APPLE__)
21 #include <sys/sysctl.h>
22 #include <mach/mach_time.h>
23 #endif
24
25 #if defined(__linux__)
26 #include <unistd.h>
27 #include <sys/syscall.h>
28 #include <linux/sysctl.h>
29 #endif
30 #if defined(__linux__)
31 #include <sys/param.h>
32 #include <libgen.h>
33 #endif
34
35 #if defined(__MINGW32__)
36 #include <sys/param.h>
37 #endif
38
39 #include <sstream>
40 #include <stdarg.h>
41 #if !defined(_WIN32)
42 #include <libgen.h>
43 #include <sys/mman.h>
44 #endif
45 #include <time.h>
46
47 #include <algorithm>
48
49 #include <vector>
50 #include <type_traits>
51
52 #include "basic_test_conversions.h"
53
54 #if defined(_WIN32)
55 #include <mmintrin.h>
56 #include <emmintrin.h>
57 #else // !_WIN32
58 #if defined(__SSE__)
59 #include <xmmintrin.h>
60 #endif
61 #if defined(__SSE2__)
62 #include <emmintrin.h>
63 #endif
64 #endif // _WIN32
65
66 cl_context gContext = NULL;
67 cl_command_queue gQueue = NULL;
68 int gStartTestNumber = -1;
69 int gEndTestNumber = 0;
70 #if defined(__APPLE__)
71 int gTimeResults = 1;
72 #else
73 int gTimeResults = 0;
74 #endif
75 int gReportAverageTimes = 0;
76 void *gIn = NULL;
77 void *gRef = NULL;
78 void *gAllowZ = NULL;
79 void *gOut[kCallStyleCount] = { NULL };
80 cl_mem gInBuffer;
81 cl_mem gOutBuffers[kCallStyleCount];
82 size_t gComputeDevices = 0;
83 uint32_t gDeviceFrequency = 0;
84 int gWimpyMode = 0;
85 int gWimpyReductionFactor = 128;
86 int gSkipTesting = 0;
87 int gForceFTZ = 0;
88 int gIsRTZ = 0;
89 uint32_t gSimdSize = 1;
90 int gHasDouble = 0;
91 int gTestDouble = 1;
92 const char *sizeNames[] = { "", "", "2", "3", "4", "8", "16" };
93 int vectorSizes[] = { 1, 1, 2, 3, 4, 8, 16 };
94 int gMinVectorSize = 0;
95 int gMaxVectorSize = sizeof(vectorSizes) / sizeof(vectorSizes[0]);
96 MTdata gMTdata;
97 const char **argList = NULL;
98 int argCount = 0;
99
100
101 double SubtractTime(uint64_t endTime, uint64_t startTime);
102
103
104 // clang-format off
105 // for readability sake keep this section unformatted
106
107 std::vector<unsigned int> DataInitInfo::specialValuesUInt = {
108 uint32_t(INT_MIN), uint32_t(INT_MIN + 1), uint32_t(INT_MIN + 2),
109 uint32_t(-(1 << 30) - 3), uint32_t(-(1 << 30) - 2), uint32_t(-(1 << 30) - 1), uint32_t(-(1 << 30)),
110 uint32_t(-(1 << 30) + 1), uint32_t(-(1 << 30) + 2), uint32_t(-(1 << 30) + 3),
111 uint32_t(-(1 << 24) - 3), uint32_t(-(1 << 24) - 2),uint32_t(-(1 << 24) - 1),
112 uint32_t(-(1 << 24)), uint32_t(-(1 << 24) + 1), uint32_t(-(1 << 24) + 2), uint32_t(-(1 << 24) + 3),
113 uint32_t(-(1 << 23) - 3), uint32_t(-(1 << 23) - 2),uint32_t(-(1 << 23) - 1),
114 uint32_t(-(1 << 23)), uint32_t(-(1 << 23) + 1), uint32_t(-(1 << 23) + 2), uint32_t(-(1 << 23) + 3),
115 uint32_t(-(1 << 22) - 3), uint32_t(-(1 << 22) - 2),uint32_t(-(1 << 22) - 1),
116 uint32_t(-(1 << 22)), uint32_t(-(1 << 22) + 1), uint32_t(-(1 << 22) + 2), uint32_t(-(1 << 22) + 3),
117 uint32_t(-(1 << 21) - 3), uint32_t(-(1 << 21) - 2),uint32_t(-(1 << 21) - 1),
118 uint32_t(-(1 << 21)), uint32_t(-(1 << 21) + 1), uint32_t(-(1 << 21) + 2), uint32_t(-(1 << 21) + 3),
119 uint32_t(-(1 << 16) - 3), uint32_t(-(1 << 16) - 2),uint32_t(-(1 << 16) - 1),
120 uint32_t(-(1 << 16)), uint32_t(-(1 << 16) + 1), uint32_t(-(1 << 16) + 2), uint32_t(-(1 << 16) + 3),
121 uint32_t(-(1 << 15) - 3), uint32_t(-(1 << 15) - 2),uint32_t(-(1 << 15) - 1),
122 uint32_t(-(1 << 15)), uint32_t(-(1 << 15) + 1), uint32_t(-(1 << 15) + 2), uint32_t(-(1 << 15) + 3),
123 uint32_t(-(1 << 8) - 3), uint32_t(-(1 << 8) - 2),uint32_t(-(1 << 8) - 1),
124 uint32_t(-(1 << 8)), uint32_t(-(1 << 8) + 1), uint32_t(-(1 << 8) + 2), uint32_t(-(1 << 8) + 3),
125 uint32_t(-(1 << 7) - 3), uint32_t(-(1 << 7) - 2),uint32_t(-(1 << 7) - 1),
126 uint32_t(-(1 << 7)), uint32_t(-(1 << 7) + 1), uint32_t(-(1 << 7) + 2), uint32_t(-(1 << 7) + 3),
127 uint32_t(-4), uint32_t(-3), uint32_t(-2), uint32_t(-1), 0, 1, 2, 3, 4,
128 (1 << 7) - 3,(1 << 7) - 2,(1 << 7) - 1, (1 << 7), (1 << 7) + 1, (1 << 7) + 2, (1 << 7) + 3,
129 (1 << 8) - 3,(1 << 8) - 2,(1 << 8) - 1, (1 << 8), (1 << 8) + 1, (1 << 8) + 2, (1 << 8) + 3,
130 (1 << 15) - 3,(1 << 15) - 2,(1 << 15) - 1, (1 << 15), (1 << 15) + 1, (1 << 15) + 2, (1 << 15) + 3,
131 (1 << 16) - 3,(1 << 16) - 2,(1 << 16) - 1, (1 << 16), (1 << 16) + 1, (1 << 16) + 2, (1 << 16) + 3,
132 (1 << 21) - 3,(1 << 21) - 2,(1 << 21) - 1, (1 << 21), (1 << 21) + 1, (1 << 21) + 2, (1 << 21) + 3,
133 (1 << 22) - 3,(1 << 22) - 2,(1 << 22) - 1, (1 << 22), (1 << 22) + 1, (1 << 22) + 2, (1 << 22) + 3,
134 (1 << 23) - 3,(1 << 23) - 2,(1 << 23) - 1, (1 << 23), (1 << 23) + 1, (1 << 23) + 2, (1 << 23) + 3,
135 (1 << 24) - 3,(1 << 24) - 2,(1 << 24) - 1, (1 << 24), (1 << 24) + 1, (1 << 24) + 2, (1 << 24) + 3,
136 (1 << 30) - 3,(1 << 30) - 2,(1 << 30) - 1, (1 << 30), (1 << 30) + 1, (1 << 30) + 2, (1 << 30) + 3,
137 INT_MAX - 3, INT_MAX - 2, INT_MAX - 1, INT_MAX, // 0x80000000, 0x80000001 0x80000002 already covered above
138 UINT_MAX - 3, UINT_MAX - 2, UINT_MAX - 1, UINT_MAX
139 };
140
141 std::vector<float> DataInitInfo::specialValuesFloat = {
142 -NAN, -INFINITY, -FLT_MAX,
143 MAKE_HEX_FLOAT(-0x1.000002p64f, -0x1000002L, 40), MAKE_HEX_FLOAT(-0x1.0p64f, -0x1L, 64), MAKE_HEX_FLOAT(-0x1.fffffep63f, -0x1fffffeL, 39),
144 MAKE_HEX_FLOAT(-0x1.000002p63f, -0x1000002L, 39), MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), MAKE_HEX_FLOAT(-0x1.fffffep62f, -0x1fffffeL, 38),
145 MAKE_HEX_FLOAT(-0x1.000002p32f, -0x1000002L, 8), MAKE_HEX_FLOAT(-0x1.0p32f, -0x1L, 32), MAKE_HEX_FLOAT(-0x1.fffffep31f, -0x1fffffeL, 7),
146 MAKE_HEX_FLOAT(-0x1.000002p31f, -0x1000002L, 7), MAKE_HEX_FLOAT(-0x1.0p31f, -0x1L, 31), MAKE_HEX_FLOAT(-0x1.fffffep30f, -0x1fffffeL, 6),
147 -1000.f, -100.f, -4.0f, -3.5f, -3.0f,
148 MAKE_HEX_FLOAT(-0x1.800002p1f, -0x1800002L, -23), -2.5f,
149 MAKE_HEX_FLOAT(-0x1.7ffffep1f, -0x17ffffeL, -23), -2.0f,
150 MAKE_HEX_FLOAT(-0x1.800002p0f, -0x1800002L, -24), -1.5f,
151 MAKE_HEX_FLOAT(-0x1.7ffffep0f, -0x17ffffeL, -24), MAKE_HEX_FLOAT(-0x1.000002p0f, -0x1000002L, -24), -1.0f,
152 MAKE_HEX_FLOAT(-0x1.fffffep-1f, -0x1fffffeL, -25), MAKE_HEX_FLOAT(-0x1.000002p-1f, -0x1000002L, -25), -0.5f,
153 MAKE_HEX_FLOAT(-0x1.fffffep-2f, -0x1fffffeL, -26), MAKE_HEX_FLOAT(-0x1.000002p-2f, -0x1000002L, -26), -0.25f,
154 MAKE_HEX_FLOAT(-0x1.fffffep-3f, -0x1fffffeL, -27), MAKE_HEX_FLOAT(-0x1.000002p-126f, -0x1000002L, -150), -FLT_MIN,
155 MAKE_HEX_FLOAT(-0x0.fffffep-126f, -0x0fffffeL, -150),
156 MAKE_HEX_FLOAT(-0x0.000ffep-126f, -0x0000ffeL, -150), MAKE_HEX_FLOAT(-0x0.0000fep-126f, -0x00000feL, -150),
157 MAKE_HEX_FLOAT(-0x0.00000ep-126f, -0x000000eL, -150), MAKE_HEX_FLOAT(-0x0.00000cp-126f, -0x000000cL, -150),
158 MAKE_HEX_FLOAT(-0x0.00000ap-126f, -0x000000aL, -150), MAKE_HEX_FLOAT(-0x0.000008p-126f, -0x0000008L, -150),
159 MAKE_HEX_FLOAT(-0x0.000006p-126f, -0x0000006L, -150), MAKE_HEX_FLOAT(-0x0.000004p-126f, -0x0000004L, -150),
160 MAKE_HEX_FLOAT(-0x0.000002p-126f, -0x0000002L, -150), -0.0f, +NAN, +INFINITY, +FLT_MAX,
161 MAKE_HEX_FLOAT(+0x1.000002p64f, +0x1000002L, 40), MAKE_HEX_FLOAT(+0x1.0p64f, +0x1L, 64), MAKE_HEX_FLOAT(+0x1.fffffep63f, +0x1fffffeL, 39),
162 MAKE_HEX_FLOAT(+0x1.000002p63f, +0x1000002L, 39), MAKE_HEX_FLOAT(+0x1.0p63f, +0x1L, 63), MAKE_HEX_FLOAT(+0x1.fffffep62f, +0x1fffffeL, 38),
163 MAKE_HEX_FLOAT(+0x1.000002p32f, +0x1000002L, 8), MAKE_HEX_FLOAT(+0x1.0p32f, +0x1L, 32), MAKE_HEX_FLOAT(+0x1.fffffep31f, +0x1fffffeL, 7),
164 MAKE_HEX_FLOAT(+0x1.000002p31f, +0x1000002L, 7), MAKE_HEX_FLOAT(+0x1.0p31f, +0x1L, 31), MAKE_HEX_FLOAT(+0x1.fffffep30f, +0x1fffffeL, 6),
165 +1000.f, +100.f, +4.0f, +3.5f, +3.0f,
166 MAKE_HEX_FLOAT(+0x1.800002p1f, +0x1800002L, -23), 2.5f, MAKE_HEX_FLOAT(+0x1.7ffffep1f, +0x17ffffeL, -23), +2.0f,
167 MAKE_HEX_FLOAT(+0x1.800002p0f, +0x1800002L, -24), 1.5f, MAKE_HEX_FLOAT(+0x1.7ffffep0f, +0x17ffffeL, -24),
168 MAKE_HEX_FLOAT(+0x1.000002p0f, +0x1000002L, -24), +1.0f, MAKE_HEX_FLOAT(+0x1.fffffep-1f, +0x1fffffeL, -25),
169 MAKE_HEX_FLOAT(+0x1.000002p-1f, +0x1000002L, -25), +0.5f, MAKE_HEX_FLOAT(+0x1.fffffep-2f, +0x1fffffeL, -26),
170 MAKE_HEX_FLOAT(+0x1.000002p-2f, +0x1000002L, -26), +0.25f, MAKE_HEX_FLOAT(+0x1.fffffep-3f, +0x1fffffeL, -27),
171 MAKE_HEX_FLOAT(0x1.000002p-126f, 0x1000002L, -150), +FLT_MIN, MAKE_HEX_FLOAT(+0x0.fffffep-126f, +0x0fffffeL, -150),
172 MAKE_HEX_FLOAT(+0x0.000ffep-126f, +0x0000ffeL, -150), MAKE_HEX_FLOAT(+0x0.0000fep-126f, +0x00000feL, -150),
173 MAKE_HEX_FLOAT(+0x0.00000ep-126f, +0x000000eL, -150), MAKE_HEX_FLOAT(+0x0.00000cp-126f, +0x000000cL, -150),
174 MAKE_HEX_FLOAT(+0x0.00000ap-126f, +0x000000aL, -150), MAKE_HEX_FLOAT(+0x0.000008p-126f, +0x0000008L, -150),
175 MAKE_HEX_FLOAT(+0x0.000006p-126f, +0x0000006L, -150), MAKE_HEX_FLOAT(+0x0.000004p-126f, +0x0000004L, -150),
176 MAKE_HEX_FLOAT(+0x0.000002p-126f, +0x0000002L, -150), +0.0f
177 };
178
179 // A table of more difficult cases to get right
180 std::vector<double> DataInitInfo::specialValuesDouble = {
181 -NAN, -INFINITY, -DBL_MAX,
182 MAKE_HEX_DOUBLE(-0x1.0000000000001p64, -0x10000000000001LL, 12), MAKE_HEX_DOUBLE(-0x1.0p64, -0x1LL, 64),
183 MAKE_HEX_DOUBLE(-0x1.fffffffffffffp63, -0x1fffffffffffffLL, 11), MAKE_HEX_DOUBLE(-0x1.80000000000001p64, -0x180000000000001LL, 8),
184 MAKE_HEX_DOUBLE(-0x1.8p64, -0x18LL, 60), MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp64, -0x17ffffffffffffLL, 12),
185 MAKE_HEX_DOUBLE(-0x1.80000000000001p63, -0x180000000000001LL, 7), MAKE_HEX_DOUBLE(-0x1.8p63, -0x18LL, 59),
186 MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp63, -0x17ffffffffffffLL, 11), MAKE_HEX_DOUBLE(-0x1.0000000000001p63, -0x10000000000001LL, 11),
187 MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp62, -0x1fffffffffffffLL, 10),
188 MAKE_HEX_DOUBLE(-0x1.80000000000001p32, -0x180000000000001LL, -24), MAKE_HEX_DOUBLE(-0x1.8p32, -0x18LL, 28),
189 MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp32, -0x17ffffffffffffLL, -20), MAKE_HEX_DOUBLE(-0x1.000002p32, -0x1000002LL, 8),
190 MAKE_HEX_DOUBLE(-0x1.0p32, -0x1LL, 32), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp31, -0x1fffffffffffffLL, -21),
191 MAKE_HEX_DOUBLE(-0x1.80000000000001p31, -0x180000000000001LL, -25), MAKE_HEX_DOUBLE(-0x1.8p31, -0x18LL, 27),
192 MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp31, -0x17ffffffffffffLL, -21), MAKE_HEX_DOUBLE(-0x1.0000000000001p31, -0x10000000000001LL, -21),
193 MAKE_HEX_DOUBLE(-0x1.0p31, -0x1LL, 31), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp30, -0x1fffffffffffffLL, -22),
194 -1000., -100., -4.0, -3.5, -3.0,
195 MAKE_HEX_DOUBLE(-0x1.8000000000001p1, -0x18000000000001LL, -51), -2.5,
196 MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp1, -0x17ffffffffffffLL, -51), -2.0,
197 MAKE_HEX_DOUBLE(-0x1.8000000000001p0, -0x18000000000001LL, -52), -1.5,
198 MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp0, -0x17ffffffffffffLL, -52), MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), -1.0,
199 MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-1, -0x1fffffffffffffLL, -53), MAKE_HEX_DOUBLE(-0x1.0000000000001p-1, -0x10000000000001LL, -53), -0.5,
200 MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-2, -0x1fffffffffffffLL, -54), MAKE_HEX_DOUBLE(-0x1.0000000000001p-2, -0x10000000000001LL, -54), -0.25,
201 MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-3, -0x1fffffffffffffLL, -55), MAKE_HEX_DOUBLE(-0x1.0000000000001p-1022, -0x10000000000001LL, -1074),
202 -DBL_MIN,
203 MAKE_HEX_DOUBLE(-0x0.fffffffffffffp-1022, -0x0fffffffffffffLL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000fffp-1022, -0x00000000000fffLL, -1074),
204 MAKE_HEX_DOUBLE(-0x0.00000000000fep-1022, -0x000000000000feLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000ep-1022, -0x0000000000000eLL, -1074),
205 MAKE_HEX_DOUBLE(-0x0.000000000000cp-1022, -0x0000000000000cLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000ap-1022, -0x0000000000000aLL, -1074),
206 MAKE_HEX_DOUBLE(-0x0.0000000000008p-1022, -0x00000000000008LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000007p-1022, -0x00000000000007LL, -1074),
207 MAKE_HEX_DOUBLE(-0x0.0000000000006p-1022, -0x00000000000006LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000005p-1022, -0x00000000000005LL, -1074),
208 MAKE_HEX_DOUBLE(-0x0.0000000000004p-1022, -0x00000000000004LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000003p-1022, -0x00000000000003LL, -1074),
209 MAKE_HEX_DOUBLE(-0x0.0000000000002p-1022, -0x00000000000002LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000001p-1022, -0x00000000000001LL, -1074),
210 -0.0, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp63, +0x1fffffffffffffLL, 11),
211 MAKE_HEX_DOUBLE(0x1.80000000000001p63, 0x180000000000001LL, 7), MAKE_HEX_DOUBLE(0x1.8p63, 0x18LL, 59),
212 MAKE_HEX_DOUBLE(0x1.7ffffffffffffp63, 0x17ffffffffffffLL, 11), MAKE_HEX_DOUBLE(+0x1.0000000000001p63, +0x10000000000001LL, 11),
213 MAKE_HEX_DOUBLE(+0x1.0p63, +0x1LL, 63), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp62, +0x1fffffffffffffLL, 10),
214 MAKE_HEX_DOUBLE(+0x1.80000000000001p32, +0x180000000000001LL, -24), MAKE_HEX_DOUBLE(+0x1.8p32, +0x18LL, 28),
215 MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp32, +0x17ffffffffffffLL, -20), MAKE_HEX_DOUBLE(+0x1.000002p32, +0x1000002LL, 8),
216 MAKE_HEX_DOUBLE(+0x1.0p32, +0x1LL, 32), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp31, +0x1fffffffffffffLL, -21),
217 MAKE_HEX_DOUBLE(+0x1.80000000000001p31, +0x180000000000001LL, -25), MAKE_HEX_DOUBLE(+0x1.8p31, +0x18LL, 27),
218 MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp31, +0x17ffffffffffffLL, -21), MAKE_HEX_DOUBLE(+0x1.0000000000001p31, +0x10000000000001LL, -21),
219 MAKE_HEX_DOUBLE(+0x1.0p31, +0x1LL, 31), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp30, +0x1fffffffffffffLL, -22),
220 +1000., +100., +4.0, +3.5, +3.0, MAKE_HEX_DOUBLE(+0x1.8000000000001p1, +0x18000000000001LL, -51), +2.5,
221 MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp1, +0x17ffffffffffffLL, -51), +2.0, MAKE_HEX_DOUBLE(+0x1.8000000000001p0, +0x18000000000001LL, -52),
222 +1.5, MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp0, +0x17ffffffffffffLL, -52), MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52),
223 +1.0, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-1, +0x1fffffffffffffLL, -53), MAKE_HEX_DOUBLE(+0x1.0000000000001p-1, +0x10000000000001LL, -53),
224 +0.5, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-2, +0x1fffffffffffffLL, -54), MAKE_HEX_DOUBLE(+0x1.0000000000001p-2, +0x10000000000001LL, -54),
225 +0.25, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-3, +0x1fffffffffffffLL, -55), MAKE_HEX_DOUBLE(+0x1.0000000000001p-1022, +0x10000000000001LL, -1074),
226 +DBL_MIN, MAKE_HEX_DOUBLE(+0x0.fffffffffffffp-1022, +0x0fffffffffffffLL, -1074),
227 MAKE_HEX_DOUBLE(+0x0.0000000000fffp-1022, +0x00000000000fffLL, -1074), MAKE_HEX_DOUBLE(+0x0.00000000000fep-1022, +0x000000000000feLL, -1074),
228 MAKE_HEX_DOUBLE(+0x0.000000000000ep-1022, +0x0000000000000eLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000cp-1022, +0x0000000000000cLL, -1074),
229 MAKE_HEX_DOUBLE(+0x0.000000000000ap-1022, +0x0000000000000aLL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000008p-1022, +0x00000000000008LL, -1074),
230 MAKE_HEX_DOUBLE(+0x0.0000000000007p-1022, +0x00000000000007LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000006p-1022, +0x00000000000006LL, -1074),
231 MAKE_HEX_DOUBLE(+0x0.0000000000005p-1022, +0x00000000000005LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000004p-1022, +0x00000000000004LL, -1074),
232 MAKE_HEX_DOUBLE(+0x0.0000000000003p-1022, +0x00000000000003LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000002p-1022, +0x00000000000002LL, -1074),
233 MAKE_HEX_DOUBLE(+0x0.0000000000001p-1022, +0x00000000000001LL, -1074), +0.0, MAKE_HEX_DOUBLE(-0x1.ffffffffffffep62, -0x1ffffffffffffeLL, 10),
234 MAKE_HEX_DOUBLE(-0x1.ffffffffffffcp62, -0x1ffffffffffffcLL, 10), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp62, -0x1fffffffffffffLL, 10),
235 MAKE_HEX_DOUBLE(+0x1.ffffffffffffep62, +0x1ffffffffffffeLL, 10), MAKE_HEX_DOUBLE(+0x1.ffffffffffffcp62, +0x1ffffffffffffcLL, 10),
236 MAKE_HEX_DOUBLE(+0x1.fffffffffffffp62, +0x1fffffffffffffLL, 10), MAKE_HEX_DOUBLE(-0x1.ffffffffffffep51, -0x1ffffffffffffeLL, -1),
237 MAKE_HEX_DOUBLE(-0x1.ffffffffffffcp51, -0x1ffffffffffffcLL, -1), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp51, -0x1fffffffffffffLL, -1),
238 MAKE_HEX_DOUBLE(+0x1.ffffffffffffep51, +0x1ffffffffffffeLL, -1), MAKE_HEX_DOUBLE(+0x1.ffffffffffffcp51, +0x1ffffffffffffcLL, -1),
239 MAKE_HEX_DOUBLE(+0x1.fffffffffffffp51, +0x1fffffffffffffLL, -1), MAKE_HEX_DOUBLE(-0x1.ffffffffffffep52, -0x1ffffffffffffeLL, 0),
240 MAKE_HEX_DOUBLE(-0x1.ffffffffffffcp52, -0x1ffffffffffffcLL, 0), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp52, -0x1fffffffffffffLL, 0),
241 MAKE_HEX_DOUBLE(+0x1.ffffffffffffep52, +0x1ffffffffffffeLL, 0), MAKE_HEX_DOUBLE(+0x1.ffffffffffffcp52, +0x1ffffffffffffcLL, 0),
242 MAKE_HEX_DOUBLE(+0x1.fffffffffffffp52, +0x1fffffffffffffLL, 0), MAKE_HEX_DOUBLE(-0x1.ffffffffffffep53, -0x1ffffffffffffeLL, 1),
243 MAKE_HEX_DOUBLE(-0x1.ffffffffffffcp53, -0x1ffffffffffffcLL, 1), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp53, -0x1fffffffffffffLL, 1),
244 MAKE_HEX_DOUBLE(+0x1.ffffffffffffep53, +0x1ffffffffffffeLL, 1), MAKE_HEX_DOUBLE(+0x1.ffffffffffffcp53, +0x1ffffffffffffcLL, 1),
245 MAKE_HEX_DOUBLE(+0x1.fffffffffffffp53, +0x1fffffffffffffLL, 1), MAKE_HEX_DOUBLE(-0x1.0000000000002p52, -0x10000000000002LL, 0),
246 MAKE_HEX_DOUBLE(-0x1.0000000000001p52, -0x10000000000001LL, 0), MAKE_HEX_DOUBLE(-0x1.0p52, -0x1LL, 52),
247 MAKE_HEX_DOUBLE(+0x1.0000000000002p52, +0x10000000000002LL, 0), MAKE_HEX_DOUBLE(+0x1.0000000000001p52, +0x10000000000001LL, 0),
248 MAKE_HEX_DOUBLE(+0x1.0p52, +0x1LL, 52), MAKE_HEX_DOUBLE(-0x1.0000000000002p53, -0x10000000000002LL, 1),
249 MAKE_HEX_DOUBLE(-0x1.0000000000001p53, -0x10000000000001LL, 1), MAKE_HEX_DOUBLE(-0x1.0p53, -0x1LL, 53),
250 MAKE_HEX_DOUBLE(+0x1.0000000000002p53, +0x10000000000002LL, 1), MAKE_HEX_DOUBLE(+0x1.0000000000001p53, +0x10000000000001LL, 1),
251 MAKE_HEX_DOUBLE(+0x1.0p53, +0x1LL, 53), MAKE_HEX_DOUBLE(-0x1.0000000000002p54, -0x10000000000002LL, 2),
252 MAKE_HEX_DOUBLE(-0x1.0000000000001p54, -0x10000000000001LL, 2), MAKE_HEX_DOUBLE(-0x1.0p54, -0x1LL, 54),
253 MAKE_HEX_DOUBLE(+0x1.0000000000002p54, +0x10000000000002LL, 2), MAKE_HEX_DOUBLE(+0x1.0000000000001p54, +0x10000000000001LL, 2),
254 MAKE_HEX_DOUBLE(+0x1.0p54, +0x1LL, 54), MAKE_HEX_DOUBLE(-0x1.fffffffefffffp62, -0x1fffffffefffffLL, 10),
255 MAKE_HEX_DOUBLE(-0x1.ffffffffp62, -0x1ffffffffLL, 30), MAKE_HEX_DOUBLE(-0x1.ffffffff00001p62, -0x1ffffffff00001LL, 10),
256 MAKE_HEX_DOUBLE(0x1.fffffffefffffp62, 0x1fffffffefffffLL, 10), MAKE_HEX_DOUBLE(0x1.ffffffffp62, 0x1ffffffffLL, 30),
257 MAKE_HEX_DOUBLE(0x1.ffffffff00001p62, 0x1ffffffff00001LL, 10),
258 };
259 // clang-format on
260
261
262 // Windows (since long double got deprecated) sets the x87 to 53-bit precision
263 // (that's x87 default state). This causes problems with the tests that
264 // convert long and ulong to float and double or otherwise deal with values
265 // that need more precision than 53-bit. So, set the x87 to 64-bit precision.
Force64BitFPUPrecision(void)266 static inline void Force64BitFPUPrecision(void)
267 {
268 #if __MINGW32__
269 // The usual method is to use _controlfp as follows:
270 // #include <float.h>
271 // _controlfp(_PC_64, _MCW_PC);
272 //
273 // _controlfp is available on MinGW32 but not on MinGW64. Instead of having
274 // divergent code just use inline assembly which works for both.
275 unsigned short int orig_cw = 0;
276 unsigned short int new_cw = 0;
277 __asm__ __volatile__("fstcw %0" : "=m"(orig_cw));
278 new_cw = orig_cw | 0x0300; // set precision to 64-bit
279 __asm__ __volatile__("fldcw %0" ::"m"(new_cw));
280 #else
281 /* Implement for other platforms if needed */
282 #endif
283 }
284
285
286 template <typename InType, typename OutType>
check_result(void * test,uint32_t count,int vectorSize)287 int CalcRefValsPat<InType, OutType>::check_result(void *test, uint32_t count,
288 int vectorSize)
289 {
290 const cl_uchar *a = (const cl_uchar *)gAllowZ;
291
292 if (std::is_integral<OutType>::value)
293 { // char/uchar/short/ushort/int/uint/long/ulong
294 const OutType *t = (const OutType *)test;
295 const OutType *c = (const OutType *)gRef;
296 for (uint32_t i = 0; i < count; i++)
297 if (t[i] != c[i] && !(a[i] != (cl_uchar)0 && t[i] == (OutType)0))
298 {
299 size_t s = sizeof(OutType) * 2;
300 std::stringstream sstr;
301 sstr << "\nError for vector size %d found at 0x%8.8x: *0x%"
302 << s << "." << s << "x vs 0x%" << s << "." << s << "x\n";
303 vlog(sstr.str().c_str(), vectorSize, i, c[i], t[i]);
304 return i + 1;
305 }
306 }
307 else if (std::is_same<OutType, cl_float>::value)
308 {
309 // cast to integral - from original test
310 const cl_uint *t = (const cl_uint *)test;
311 const cl_uint *c = (const cl_uint *)gRef;
312
313 for (uint32_t i = 0; i < count; i++)
314 if (t[i] != c[i] &&
315 // Allow nan's to be binary different
316 !((t[i] & 0x7fffffffU) > 0x7f800000U
317 && (c[i] & 0x7fffffffU) > 0x7f800000U)
318 && !(a[i] != (cl_uchar)0 && t[i] == (c[i] & 0x80000000U)))
319 {
320 vlog(
321 "\nError for vector size %d found at 0x%8.8x: *%a vs %a\n",
322 vectorSize, i, ((OutType *)gRef)[i], ((OutType *)test)[i]);
323 return i + 1;
324 }
325 }
326 else
327 {
328 const cl_ulong *t = (const cl_ulong *)test;
329 const cl_ulong *c = (const cl_ulong *)gRef;
330
331 for (uint32_t i = 0; i < count; i++)
332 if (t[i] != c[i] &&
333 // Allow nan's to be binary different
334 !((t[i] & 0x7fffffffffffffffULL) > 0x7ff0000000000000ULL
335 && (c[i] & 0x7fffffffffffffffULL) > 0x7f80000000000000ULL)
336 && !(a[i] != (cl_uchar)0
337 && t[i] == (c[i] & 0x8000000000000000ULL)))
338 {
339 vlog(
340 "\nError for vector size %d found at 0x%8.8x: *%a vs %a\n",
341 vectorSize, i, ((OutType *)gRef)[i], ((OutType *)test)[i]);
342 return i + 1;
343 }
344 }
345
346 return 0;
347 }
348
349
RoundUpToNextPowerOfTwo(cl_uint x)350 cl_uint RoundUpToNextPowerOfTwo(cl_uint x)
351 {
352 if (0 == (x & (x - 1))) return x;
353
354 while (x & (x - 1)) x &= x - 1;
355
356 return x + x;
357 }
358
359
Run()360 cl_int CustomConversionsTest::Run()
361 {
362 int startMinVectorSize = gMinVectorSize;
363 Type inType, outType;
364 RoundingMode round;
365 SaturationMode sat;
366
367 for (int i = 0; i < argCount; i++)
368 {
369 if (conv_test::GetTestCase(argList[i], &outType, &inType, &sat, &round))
370 {
371 vlog_error("\n\t\t**** ERROR: Unable to parse function name "
372 "%s. Skipping.... *****\n\n",
373 argList[i]);
374 continue;
375 }
376
377 // skip double if we don't have it
378 if (!gTestDouble && (inType == kdouble || outType == kdouble))
379 {
380 if (gHasDouble)
381 {
382 vlog_error("\t *** convert_%sn%s%s( %sn ) FAILED ** \n",
383 gTypeNames[outType], gSaturationNames[sat],
384 gRoundingModeNames[round], gTypeNames[inType]);
385 vlog("\t\tcl_khr_fp64 enabled, but double testing turned "
386 "off.\n");
387 }
388 continue;
389 }
390
391 // skip longs on embedded
392 if (!gHasLong
393 && (inType == klong || outType == klong || inType == kulong
394 || outType == kulong))
395 {
396 continue;
397 }
398
399 // Skip the implicit converts if the rounding mode is not default or
400 // test is saturated
401 if (0 == startMinVectorSize)
402 {
403 if (sat || round != kDefaultRoundingMode)
404 gMinVectorSize = 1;
405 else
406 gMinVectorSize = 0;
407 }
408
409 IterOverSelectedTypes iter(typeIterator, *this, inType, outType, round,
410 sat);
411
412 iter.Run();
413
414 if (gFailCount)
415 {
416 vlog_error("\t *** convert_%sn%s%s( %sn ) FAILED ** \n",
417 gTypeNames[outType], gSaturationNames[sat],
418 gRoundingModeNames[round], gTypeNames[inType]);
419 }
420 }
421
422 return gFailCount;
423 }
424
425
ConversionsTest(cl_device_id device,cl_context context,cl_command_queue queue)426 ConversionsTest::ConversionsTest(cl_device_id device, cl_context context,
427 cl_command_queue queue)
428 : context(context), device(device), queue(queue), num_elements(0),
429 typeIterator({ cl_uchar(0), cl_char(0), cl_ushort(0), cl_short(0),
430 cl_uint(0), cl_int(0), cl_float(0), cl_double(0),
431 cl_ulong(0), cl_long(0) })
432 {}
433
434
Run()435 cl_int ConversionsTest::Run()
436 {
437 IterOverTypes iter(typeIterator, *this);
438
439 iter.Run();
440
441 return gFailCount;
442 }
443
444
SetUp(int elements)445 cl_int ConversionsTest::SetUp(int elements)
446 {
447 num_elements = elements;
448 return CL_SUCCESS;
449 }
450
451
452 template <typename InType, typename OutType>
TestTypesConversion(const Type & inType,const Type & outType,int & testNumber,int startMinVectorSize)453 void ConversionsTest::TestTypesConversion(const Type &inType,
454 const Type &outType, int &testNumber,
455 int startMinVectorSize)
456 {
457 SaturationMode sat;
458 RoundingMode round;
459 int error;
460
461 // skip longs on embedded
462 if (!gHasLong
463 && (inType == klong || outType == klong || inType == kulong
464 || outType == kulong))
465 {
466 return;
467 }
468
469 for (sat = (SaturationMode)0; sat < kSaturationModeCount;
470 sat = (SaturationMode)(sat + 1))
471 {
472 // skip illegal saturated conversions to float type
473 if (kSaturated == sat && (outType == kfloat || outType == kdouble))
474 {
475 continue;
476 }
477
478 for (round = (RoundingMode)0; round < kRoundingModeCount;
479 round = (RoundingMode)(round + 1))
480 {
481 if (++testNumber < gStartTestNumber)
482 {
483 continue;
484 }
485 else
486 {
487 if (gEndTestNumber > 0 && testNumber >= gEndTestNumber) return;
488 }
489
490 vlog("%d) Testing convert_%sn%s%s( %sn ):\n", testNumber,
491 gTypeNames[outType], gSaturationNames[sat],
492 gRoundingModeNames[round], gTypeNames[inType]);
493
494 // skip double if we don't have it
495 if (!gTestDouble && (inType == kdouble || outType == kdouble))
496 {
497 if (gHasDouble)
498 {
499 vlog_error("\t *** %d) convert_%sn%s%s( %sn ) "
500 "FAILED ** \n",
501 testNumber, gTypeNames[outType],
502 gSaturationNames[sat], gRoundingModeNames[round],
503 gTypeNames[inType]);
504 vlog("\t\tcl_khr_fp64 enabled, but double "
505 "testing turned off.\n");
506 }
507 continue;
508 }
509
510 // Skip the implicit converts if the rounding mode is
511 // not default or test is saturated
512 if (0 == startMinVectorSize)
513 {
514 if (sat || round != kDefaultRoundingMode)
515 gMinVectorSize = 1;
516 else
517 gMinVectorSize = 0;
518 }
519
520 if ((error = DoTest<InType, OutType>(outType, inType, sat, round)))
521 {
522 vlog_error("\t *** %d) convert_%sn%s%s( %sn ) "
523 "FAILED ** \n",
524 testNumber, gTypeNames[outType],
525 gSaturationNames[sat], gRoundingModeNames[round],
526 gTypeNames[inType]);
527 }
528 }
529 }
530 }
531
532
533 template <typename InType, typename OutType>
DoTest(Type outType,Type inType,SaturationMode sat,RoundingMode round)534 int ConversionsTest::DoTest(Type outType, Type inType, SaturationMode sat,
535 RoundingMode round)
536 {
537 #ifdef __APPLE__
538 cl_ulong wall_start = mach_absolute_time();
539 #endif
540
541 cl_uint threads = GetThreadCount();
542
543 DataInitInfo info = { 0, 0, outType, inType, sat, round, threads };
544 DataInfoSpec<InType, OutType> init_info(info);
545 WriteInputBufferInfo writeInputBufferInfo;
546 int vectorSize;
547 int error = 0;
548 uint64_t i;
549
550 gTestCount++;
551 size_t blockCount =
552 BUFFER_SIZE / std::max(gTypeSizes[inType], gTypeSizes[outType]);
553 size_t step = blockCount;
554
555 for (i = 0; i < threads; i++)
556 {
557 init_info.mdv.emplace_back(MTdataHolder(gRandomSeed));
558 }
559
560 writeInputBufferInfo.outType = outType;
561 writeInputBufferInfo.inType = inType;
562
563 writeInputBufferInfo.calcInfo.resize(gMaxVectorSize);
564 for (vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize; vectorSize++)
565 {
566 writeInputBufferInfo.calcInfo[vectorSize].reset(
567 new CalcRefValsPat<InType, OutType>());
568 writeInputBufferInfo.calcInfo[vectorSize]->program =
569 conv_test::MakeProgram(
570 outType, inType, sat, round, vectorSize,
571 &writeInputBufferInfo.calcInfo[vectorSize]->kernel);
572 if (NULL == writeInputBufferInfo.calcInfo[vectorSize]->program)
573 {
574 gFailCount++;
575 return -1;
576 }
577 if (NULL == writeInputBufferInfo.calcInfo[vectorSize]->kernel)
578 {
579 gFailCount++;
580 vlog_error("\t\tFAILED -- Failed to create kernel.\n");
581 return -2;
582 }
583
584 writeInputBufferInfo.calcInfo[vectorSize]->parent =
585 &writeInputBufferInfo;
586 writeInputBufferInfo.calcInfo[vectorSize]->vectorSize = vectorSize;
587 writeInputBufferInfo.calcInfo[vectorSize]->result = -1;
588 }
589
590 if (gSkipTesting) return error;
591
592 // Patch up rounding mode if default is RTZ
593 // We leave the part above in default rounding mode so that the right kernel
594 // is compiled.
595 if (std::is_same<OutType, cl_float>::value)
596 {
597 if (round == kDefaultRoundingMode && gIsRTZ)
598 init_info.round = round = kRoundTowardZero;
599 }
600
601 // Figure out how many elements are in a work block
602 // we handle 64-bit types a bit differently.
603 uint64_t lastCase = (8 * gTypeSizes[inType] > 32)
604 ? 0x100000000ULL
605 : 1ULL << (8 * gTypeSizes[inType]);
606
607 if (!gWimpyMode && gIsEmbedded)
608 step = blockCount * EMBEDDED_REDUCTION_FACTOR;
609
610 if (gWimpyMode) step = (size_t)blockCount * (size_t)gWimpyReductionFactor;
611 vlog("Testing... ");
612 fflush(stdout);
613 for (i = 0; i < (uint64_t)lastCase; i += step)
614 {
615
616 if (0 == (i & ((lastCase >> 3) - 1)))
617 {
618 vlog(".");
619 fflush(stdout);
620 }
621
622 cl_uint count = (uint32_t)std::min((uint64_t)blockCount, lastCase - i);
623 writeInputBufferInfo.count = count;
624
625 // Crate a user event to represent the status of the reference value
626 // computation completion
627 writeInputBufferInfo.calcReferenceValues =
628 clCreateUserEvent(gContext, &error);
629 if (error || NULL == writeInputBufferInfo.calcReferenceValues)
630 {
631 vlog_error("ERROR: Unable to create user event. (%d)\n", error);
632 gFailCount++;
633 return error;
634 }
635
636 // retain for consumption by MapOutputBufferComplete
637 for (vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize;
638 vectorSize++)
639 {
640 if ((error =
641 clRetainEvent(writeInputBufferInfo.calcReferenceValues)))
642 {
643 vlog_error("ERROR: Unable to retain user event. (%d)\n", error);
644 gFailCount++;
645 return error;
646 }
647 }
648
649 // Crate a user event to represent when the callbacks are done verifying
650 // correctness
651 writeInputBufferInfo.doneBarrier = clCreateUserEvent(gContext, &error);
652 if (error || NULL == writeInputBufferInfo.doneBarrier)
653 {
654 vlog_error("ERROR: Unable to create user event for barrier. (%d)\n",
655 error);
656 gFailCount++;
657 return error;
658 }
659
660 // retain for use by the callback that calls this
661 if ((error = clRetainEvent(writeInputBufferInfo.doneBarrier)))
662 {
663 vlog_error("ERROR: Unable to retain user event doneBarrier. (%d)\n",
664 error);
665 gFailCount++;
666 return error;
667 }
668
669 // Call this in a multithreaded manner
670 cl_uint chunks = RoundUpToNextPowerOfTwo(threads) * 2;
671 init_info.start = i;
672 init_info.size = count / chunks;
673 if (init_info.size < 16384)
674 {
675 chunks = RoundUpToNextPowerOfTwo(threads);
676 init_info.size = count / chunks;
677 if (init_info.size < 16384)
678 {
679 init_info.size = count;
680 chunks = 1;
681 }
682 }
683
684 ThreadPool_Do(conv_test::InitData, chunks, &init_info);
685
686 // Copy the results to the device
687 if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_TRUE, 0,
688 count * gTypeSizes[inType], gIn, 0,
689 NULL, NULL)))
690 {
691 vlog_error("ERROR: clEnqueueWriteBuffer failed. (%d)\n", error);
692 gFailCount++;
693 return error;
694 }
695
696 // Call completion callback for the write, which will enqueue the rest
697 // of the work.
698 conv_test::WriteInputBufferComplete((void *)&writeInputBufferInfo);
699
700 // Make sure the work is actually running, so we don't deadlock
701 if ((error = clFlush(gQueue)))
702 {
703 vlog_error("clFlush failed with error %d\n", error);
704 gFailCount++;
705 return error;
706 }
707
708 ThreadPool_Do(conv_test::PrepareReference, chunks, &init_info);
709
710 // signal we are done calculating the reference results
711 if ((error = clSetUserEventStatus(
712 writeInputBufferInfo.calcReferenceValues, CL_COMPLETE)))
713 {
714 vlog_error(
715 "Error: Failed to set user event status to CL_COMPLETE: %d\n",
716 error);
717 gFailCount++;
718 return error;
719 }
720
721 // Wait for the event callbacks to finish verifying correctness.
722 if ((error = clWaitForEvents(
723 1, (cl_event *)&writeInputBufferInfo.doneBarrier)))
724 {
725 vlog_error("Error: Failed to wait for barrier: %d\n", error);
726 gFailCount++;
727 return error;
728 }
729
730 if ((error = clReleaseEvent(writeInputBufferInfo.calcReferenceValues)))
731 {
732 vlog_error("Error: Failed to release calcReferenceValues: %d\n",
733 error);
734 gFailCount++;
735 return error;
736 }
737
738 if ((error = clReleaseEvent(writeInputBufferInfo.doneBarrier)))
739 {
740 vlog_error("Error: Failed to release done barrier: %d\n", error);
741 gFailCount++;
742 return error;
743 }
744
745 for (vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize;
746 vectorSize++)
747 {
748 if ((error = writeInputBufferInfo.calcInfo[vectorSize]->result))
749 {
750 switch (inType)
751 {
752 case kuchar:
753 case kchar:
754 vlog("Input value: 0x%2.2x ",
755 ((unsigned char *)gIn)[error - 1]);
756 break;
757 case kushort:
758 case kshort:
759 vlog("Input value: 0x%4.4x ",
760 ((unsigned short *)gIn)[error - 1]);
761 break;
762 case kuint:
763 case kint:
764 vlog("Input value: 0x%8.8x ",
765 ((unsigned int *)gIn)[error - 1]);
766 break;
767 case kfloat:
768 vlog("Input value: %a ", ((float *)gIn)[error - 1]);
769 break;
770 case kulong:
771 case klong:
772 vlog("Input value: 0x%16.16llx ",
773 ((unsigned long long *)gIn)[error - 1]);
774 break;
775 case kdouble:
776 vlog("Input value: %a ", ((double *)gIn)[error - 1]);
777 break;
778 default:
779 vlog_error("Internal error at %s: %d\n", __FILE__,
780 __LINE__);
781 abort();
782 break;
783 }
784
785 // tell the user which conversion it was.
786 if (0 == vectorSize)
787 vlog(" (implicit scalar conversion from %s to %s)\n",
788 gTypeNames[inType], gTypeNames[outType]);
789 else
790 vlog(" (convert_%s%s%s%s( %s%s ))\n", gTypeNames[outType],
791 sizeNames[vectorSize], gSaturationNames[sat],
792 gRoundingModeNames[round], gTypeNames[inType],
793 sizeNames[vectorSize]);
794
795 gFailCount++;
796 return error;
797 }
798 }
799 }
800
801 log_info("done.\n");
802
803 if (gTimeResults)
804 {
805 // Kick off tests for the various vector lengths
806 for (vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize;
807 vectorSize++)
808 {
809 size_t workItemCount = blockCount / vectorSizes[vectorSize];
810 if (vectorSizes[vectorSize] * gTypeSizes[outType] < 4)
811 workItemCount /=
812 4 / (vectorSizes[vectorSize] * gTypeSizes[outType]);
813
814 double sum = 0.0;
815 double bestTime = INFINITY;
816 cl_uint k;
817 for (k = 0; k < PERF_LOOP_COUNT; k++)
818 {
819 uint64_t startTime = conv_test::GetTime();
820 if ((error = conv_test::RunKernel(
821 writeInputBufferInfo.calcInfo[vectorSize]->kernel,
822 gInBuffer, gOutBuffers[vectorSize], workItemCount)))
823 {
824 gFailCount++;
825 return error;
826 }
827
828 // Make sure OpenCL is done
829 if ((error = clFinish(gQueue)))
830 {
831 vlog_error("Error %d at clFinish\n", error);
832 return error;
833 }
834
835 uint64_t endTime = conv_test::GetTime();
836 double time = SubtractTime(endTime, startTime);
837 sum += time;
838 if (time < bestTime) bestTime = time;
839 }
840
841 if (gReportAverageTimes) bestTime = sum / PERF_LOOP_COUNT;
842 double clocksPerOp = bestTime * (double)gDeviceFrequency
843 * gComputeDevices * gSimdSize * 1e6
844 / (workItemCount * vectorSizes[vectorSize]);
845 if (0 == vectorSize)
846 vlog_perf(clocksPerOp, LOWER_IS_BETTER, "clocks / element",
847 "implicit convert %s -> %s", gTypeNames[inType],
848 gTypeNames[outType]);
849 else
850 vlog_perf(clocksPerOp, LOWER_IS_BETTER, "clocks / element",
851 "convert_%s%s%s%s( %s%s )", gTypeNames[outType],
852 sizeNames[vectorSize], gSaturationNames[sat],
853 gRoundingModeNames[round], gTypeNames[inType],
854 sizeNames[vectorSize]);
855 }
856 }
857
858 if (gWimpyMode)
859 vlog("\tWimp pass");
860 else
861 vlog("\tpassed");
862
863 #ifdef __APPLE__
864 // record the run time
865 vlog("\t(%f s)", 1e-9 * (mach_absolute_time() - wall_start));
866 #endif
867 vlog("\n\n");
868 fflush(stdout);
869
870 return error;
871 }
872
873 #if !defined(__APPLE__)
874 void memset_pattern4(void *dest, const void *src_pattern, size_t bytes);
875 #endif
876
877 #if defined(_MSC_VER)
878 /* function is defined in "compat.h" */
879 #else
SubtractTime(uint64_t endTime,uint64_t startTime)880 double SubtractTime(uint64_t endTime, uint64_t startTime)
881 {
882 uint64_t diff = endTime - startTime;
883 static double conversion = 0.0;
884
885 if (0.0 == conversion)
886 {
887 #if defined(__APPLE__)
888 mach_timebase_info_data_t info = { 0, 0 };
889 kern_return_t err = mach_timebase_info(&info);
890 if (0 == err)
891 conversion = 1e-9 * (double)info.numer / (double)info.denom;
892 #else
893 // This function consumes output from GetTime() above, and converts the
894 // time to secionds.
895 #warning need accurate ticks to seconds conversion factor here. Times are invalid.
896 #endif
897 }
898
899 // strictly speaking we should also be subtracting out timer latency here
900 return conversion * (double)diff;
901 }
902 #endif
903
904 ////////////////////////////////////////////////////////////////////////////////
905
setAllowZ(uint8_t * allow,uint32_t * x,cl_uint count)906 static void setAllowZ(uint8_t *allow, uint32_t *x, cl_uint count)
907 {
908 cl_uint i;
909 for (i = 0; i < count; ++i)
910 allow[i] |= (uint8_t)((x[i] & 0x7f800000U) == 0);
911 }
912
913
914 void MapResultValuesComplete(const std::unique_ptr<CalcRefValsBase> &ptr);
915
916 void CL_CALLBACK CalcReferenceValuesComplete(cl_event e, cl_int status,
917 void *data);
918
919 // Note: May be called reentrantly
MapResultValuesComplete(const std::unique_ptr<CalcRefValsBase> & info)920 void MapResultValuesComplete(const std::unique_ptr<CalcRefValsBase> &info)
921 {
922 cl_int status;
923 // CalcRefValsBase *info = (CalcRefValsBase *)data;
924 cl_event calcReferenceValues = info->parent->calcReferenceValues;
925
926 // we know that the map is done, wait for the main thread to finish
927 // calculating the reference values
928 if ((status =
929 clSetEventCallback(calcReferenceValues, CL_COMPLETE,
930 CalcReferenceValuesComplete, (void *)&info)))
931 {
932 vlog_error("ERROR: clSetEventCallback failed in "
933 "MapResultValuesComplete with status: %d\n",
934 status);
935 gFailCount++; // not thread safe -- being lazy here
936 }
937
938 // this thread no longer needs its reference to info->calcReferenceValues,
939 // so release it
940 if ((status = clReleaseEvent(calcReferenceValues)))
941 {
942 vlog_error("ERROR: clReleaseEvent(info->calcReferenceValues) failed "
943 "with status: %d\n",
944 status);
945 gFailCount++; // not thread safe -- being lazy here
946 }
947
948 // no need to flush since we didn't enqueue anything
949
950 // e was already released by WriteInputBufferComplete. It should be
951 // destroyed automatically soon after we exit.
952 }
953
954
CalcReferenceValuesComplete(cl_event e,cl_int status,void * data)955 void CL_CALLBACK CalcReferenceValuesComplete(cl_event e, cl_int status,
956 void *data)
957 {
958 std::unique_ptr<CalcRefValsBase> &info =
959 *(std::unique_ptr<CalcRefValsBase> *)data;
960
961 cl_uint vectorSize = info->vectorSize;
962 cl_uint count = info->parent->count;
963 Type outType =
964 info->parent->outType; // the data type of the conversion result
965 Type inType = info->parent->inType; // the data type of the conversion input
966 size_t j;
967 cl_int error;
968 cl_event doneBarrier = info->parent->doneBarrier;
969
970 // report spurious error condition
971 if (CL_SUCCESS != status)
972 {
973 vlog_error("ERROR: CalcReferenceValuesComplete did not succeed! (%d)\n",
974 status);
975 gFailCount++; // lazy about thread safety here
976 return;
977 }
978
979 // Now we know that both results have been mapped back from the device, and
980 // the main thread is done calculating the reference results. It is now time
981 // to check the results.
982
983 // verify results
984 void *mapped = info->p;
985
986 // Patch up NaNs conversions to integer to zero -- these can be converted to
987 // any integer
988 if (outType != kfloat && outType != kdouble)
989 {
990 if (inType == kfloat)
991 {
992 float *inp = (float *)gIn;
993 for (j = 0; j < count; j++)
994 {
995 if (isnan(inp[j]))
996 memset((char *)mapped + j * gTypeSizes[outType], 0,
997 gTypeSizes[outType]);
998 }
999 }
1000 if (inType == kdouble)
1001 {
1002 double *inp = (double *)gIn;
1003 for (j = 0; j < count; j++)
1004 {
1005 if (isnan(inp[j]))
1006 memset((char *)mapped + j * gTypeSizes[outType], 0,
1007 gTypeSizes[outType]);
1008 }
1009 }
1010 }
1011 else if (inType == kfloat || inType == kdouble)
1012 { // outtype and intype is float or double. NaN conversions for float <->
1013 // double can be any NaN
1014 if (inType == kfloat && outType == kdouble)
1015 {
1016 float *inp = (float *)gIn;
1017 double *outp = (double *)mapped;
1018 for (j = 0; j < count; j++)
1019 {
1020 if (isnan(inp[j]) && isnan(outp[j])) outp[j] = NAN;
1021 }
1022 }
1023 if (inType == kdouble && outType == kfloat)
1024 {
1025 double *inp = (double *)gIn;
1026 float *outp = (float *)mapped;
1027 for (j = 0; j < count; j++)
1028 {
1029 if (isnan(inp[j]) && isnan(outp[j])) outp[j] = NAN;
1030 }
1031 }
1032 }
1033
1034 if (memcmp(mapped, gRef, count * gTypeSizes[outType]))
1035 info->result =
1036 info->check_result(mapped, count, vectorSizes[vectorSize]);
1037 else
1038 info->result = 0;
1039
1040 // Fill the output buffer with junk and release it
1041 {
1042 cl_uint pattern = 0xffffdead;
1043 memset_pattern4(mapped, &pattern, count * gTypeSizes[outType]);
1044 if ((error = clEnqueueUnmapMemObject(gQueue, gOutBuffers[vectorSize],
1045 mapped, 0, NULL, NULL)))
1046 {
1047 vlog_error("ERROR: clEnqueueUnmapMemObject failed in "
1048 "CalcReferenceValuesComplete (%d)\n",
1049 error);
1050 gFailCount++;
1051 }
1052 }
1053
1054 if (1 == ThreadPool_AtomicAdd(&info->parent->barrierCount, -1))
1055 {
1056 if ((status = clSetUserEventStatus(doneBarrier, CL_COMPLETE)))
1057 {
1058 vlog_error("ERROR: clSetUserEventStatus failed in "
1059 "CalcReferenceValuesComplete (err: %d). We're probably "
1060 "going to deadlock.\n",
1061 status);
1062 gFailCount++;
1063 return;
1064 }
1065
1066 if ((status = clReleaseEvent(doneBarrier)))
1067 {
1068 vlog_error("ERROR: clReleaseEvent failed in "
1069 "CalcReferenceValuesComplete (err: %d).\n",
1070 status);
1071 gFailCount++;
1072 return;
1073 }
1074 }
1075 // e was already released by WriteInputBufferComplete. It should be
1076 // destroyed automatically soon after all the calls to
1077 // CalcReferenceValuesComplete exit.
1078 }
1079
1080 //
1081
1082 namespace conv_test {
1083
1084 ////////////////////////////////////////////////////////////////////////////////
1085
InitData(cl_uint job_id,cl_uint thread_id,void * p)1086 cl_int InitData(cl_uint job_id, cl_uint thread_id, void *p)
1087 {
1088 DataInitBase *info = (DataInitBase *)p;
1089
1090 info->init(job_id, thread_id);
1091
1092 return CL_SUCCESS;
1093 }
1094
1095 ////////////////////////////////////////////////////////////////////////////////
1096
PrepareReference(cl_uint job_id,cl_uint thread_id,void * p)1097 cl_int PrepareReference(cl_uint job_id, cl_uint thread_id, void *p)
1098 {
1099 DataInitBase *info = (DataInitBase *)p;
1100
1101 cl_uint count = info->size;
1102 Type inType = info->inType;
1103 Type outType = info->outType;
1104 RoundingMode round = info->round;
1105 size_t j;
1106
1107 Force64BitFPUPrecision();
1108
1109 void *s = (cl_uchar *)gIn + job_id * count * gTypeSizes[info->inType];
1110 void *a = (cl_uchar *)gAllowZ + job_id * count;
1111 void *d = (cl_uchar *)gRef + job_id * count * gTypeSizes[info->outType];
1112
1113
1114 if (outType != inType)
1115 {
1116 // create the reference while we wait
1117 #if (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__)
1118 /* ARM VFP doesn't have hardware instruction for converting from 64-bit
1119 * integer to float types, hence GCC ARM uses the floating-point
1120 * emulation code despite which -mfloat-abi setting it is. But the
1121 * emulation code in libgcc.a has only one rounding mode (round to
1122 * nearest even in this case) and ignores the user rounding mode setting
1123 * in hardware. As a result setting rounding modes in hardware won't
1124 * give correct rounding results for type covert from 64-bit integer to
1125 * float using GCC for ARM compiler so for testing different rounding
1126 * modes, we need to use alternative reference function. ARM64 does have
1127 * an instruction, however we cannot guarantee the compiler will use it.
1128 * On all ARM architechures use emulation to calculate reference.*/
1129 switch (round)
1130 {
1131 /* conversions to floating-point type use the current rounding mode.
1132 * The only default floating-point rounding mode supported is round
1133 * to nearest even i.e the current rounding mode will be _rte for
1134 * floating-point types. */
1135 case kDefaultRoundingMode: qcom_rm = qcomRTE; break;
1136 case kRoundToNearestEven: qcom_rm = qcomRTE; break;
1137 case kRoundUp: qcom_rm = qcomRTP; break;
1138 case kRoundDown: qcom_rm = qcomRTN; break;
1139 case kRoundTowardZero: qcom_rm = qcomRTZ; break;
1140 default:
1141 vlog_error("ERROR: undefined rounding mode %d\n", round);
1142 break;
1143 }
1144 qcom_sat = info->sat;
1145 #endif
1146
1147 RoundingMode oldRound = set_round(round, outType);
1148
1149 if (info->sat)
1150 info->conv_array_sat(d, s, count);
1151 else
1152 info->conv_array(d, s, count);
1153
1154 set_round(oldRound, outType);
1155
1156 // Decide if we allow a zero result in addition to the correctly rounded
1157 // one
1158 memset(a, 0, count);
1159 if (gForceFTZ)
1160 {
1161 if (inType == kfloat || outType == kfloat)
1162 setAllowZ((uint8_t *)a, (uint32_t *)s, count);
1163 }
1164 }
1165 else
1166 {
1167 // Copy the input to the reference
1168 memcpy(d, s, info->size * gTypeSizes[inType]);
1169 }
1170
1171 // Patch up NaNs conversions to integer to zero -- these can be converted to
1172 // any integer
1173 if (info->outType != kfloat && info->outType != kdouble)
1174 {
1175 if (inType == kfloat)
1176 {
1177 float *inp = (float *)s;
1178 for (j = 0; j < count; j++)
1179 {
1180 if (isnan(inp[j]))
1181 memset((char *)d + j * gTypeSizes[outType], 0,
1182 gTypeSizes[outType]);
1183 }
1184 }
1185 if (inType == kdouble)
1186 {
1187 double *inp = (double *)s;
1188 for (j = 0; j < count; j++)
1189 {
1190 if (isnan(inp[j]))
1191 memset((char *)d + j * gTypeSizes[outType], 0,
1192 gTypeSizes[outType]);
1193 }
1194 }
1195 }
1196 else if (inType == kfloat || inType == kdouble)
1197 { // outtype and intype is float or double. NaN conversions for float <->
1198 // double can be any NaN
1199 if (inType == kfloat && outType == kdouble)
1200 {
1201 float *inp = (float *)s;
1202 for (j = 0; j < count; j++)
1203 {
1204 if (isnan(inp[j])) ((double *)d)[j] = NAN;
1205 }
1206 }
1207 if (inType == kdouble && outType == kfloat)
1208 {
1209 double *inp = (double *)s;
1210 for (j = 0; j < count; j++)
1211 {
1212 if (isnan(inp[j])) ((float *)d)[j] = NAN;
1213 }
1214 }
1215 }
1216
1217 return CL_SUCCESS;
1218 }
1219
1220 ////////////////////////////////////////////////////////////////////////////////
1221
GetTime(void)1222 uint64_t GetTime(void)
1223 {
1224 #if defined(__APPLE__)
1225 return mach_absolute_time();
1226 #elif defined(_MSC_VER)
1227 return ReadTime();
1228 #else
1229 // mach_absolute_time is a high precision timer with precision < 1
1230 // microsecond.
1231 #warning need accurate clock here. Times are invalid.
1232 return 0;
1233 #endif
1234 }
1235
1236 ////////////////////////////////////////////////////////////////////////////////
1237
1238 // Note: not called reentrantly
WriteInputBufferComplete(void * data)1239 void WriteInputBufferComplete(void *data)
1240 {
1241 cl_int status;
1242 WriteInputBufferInfo *info = (WriteInputBufferInfo *)data;
1243 cl_uint count = info->count;
1244 int vectorSize;
1245
1246 info->barrierCount = gMaxVectorSize - gMinVectorSize;
1247
1248 // now that we know that the write buffer is complete, enqueue callbacks to
1249 // wait for the main thread to finish calculating the reference results.
1250 for (vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize; vectorSize++)
1251 {
1252 size_t workItemCount =
1253 (count + vectorSizes[vectorSize] - 1) / (vectorSizes[vectorSize]);
1254
1255 if ((status = conv_test::RunKernel(info->calcInfo[vectorSize]->kernel,
1256 gInBuffer, gOutBuffers[vectorSize],
1257 workItemCount)))
1258 {
1259 gFailCount++;
1260 return;
1261 }
1262
1263 info->calcInfo[vectorSize]->p = clEnqueueMapBuffer(
1264 gQueue, gOutBuffers[vectorSize], CL_TRUE,
1265 CL_MAP_READ | CL_MAP_WRITE, 0, count * gTypeSizes[info->outType], 0,
1266 NULL, NULL, &status);
1267 {
1268 if (status)
1269 {
1270 vlog_error("ERROR: WriteInputBufferComplete calback failed "
1271 "with status: %d\n",
1272 status);
1273 gFailCount++;
1274 return;
1275 }
1276 }
1277 }
1278
1279 for (vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize; vectorSize++)
1280 {
1281 MapResultValuesComplete(info->calcInfo[vectorSize]);
1282 }
1283
1284 // Make sure the work starts moving -- otherwise we may deadlock
1285 if ((status = clFlush(gQueue)))
1286 {
1287 vlog_error(
1288 "ERROR: WriteInputBufferComplete calback failed with status: %d\n",
1289 status);
1290 gFailCount++;
1291 return;
1292 }
1293
1294 // e was already released by the main thread. It should be destroyed
1295 // automatically soon after we exit.
1296 }
1297
1298 ////////////////////////////////////////////////////////////////////////////////
1299
MakeProgram(Type outType,Type inType,SaturationMode sat,RoundingMode round,int vectorSize,cl_kernel * outKernel)1300 cl_program MakeProgram(Type outType, Type inType, SaturationMode sat,
1301 RoundingMode round, int vectorSize, cl_kernel *outKernel)
1302 {
1303 cl_program program;
1304 char testName[256];
1305 int error = 0;
1306
1307 std::ostringstream source;
1308 if (outType == kdouble || inType == kdouble)
1309 source << "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
1310
1311 // Create the program. This is a bit complicated because we are trying to
1312 // avoid byte and short stores.
1313 if (0 == vectorSize)
1314 {
1315 // Create the type names.
1316 char inName[32];
1317 char outName[32];
1318 strncpy(inName, gTypeNames[inType], sizeof(inName));
1319 strncpy(outName, gTypeNames[outType], sizeof(outName));
1320 sprintf(testName, "test_implicit_%s_%s", outName, inName);
1321
1322 source << "__kernel void " << testName << "( __global " << inName
1323 << " *src, __global " << outName << " *dest )\n";
1324 source << "{\n";
1325 source << " size_t i = get_global_id(0);\n";
1326 source << " dest[i] = src[i];\n";
1327 source << "}\n";
1328
1329 vlog("Building implicit %s -> %s conversion test\n", gTypeNames[inType],
1330 gTypeNames[outType]);
1331 fflush(stdout);
1332 }
1333 else
1334 {
1335 int vectorSizetmp = vectorSizes[vectorSize];
1336
1337 // Create the type names.
1338 char convertString[128];
1339 char inName[32];
1340 char outName[32];
1341 switch (vectorSizetmp)
1342 {
1343 case 1:
1344 strncpy(inName, gTypeNames[inType], sizeof(inName));
1345 strncpy(outName, gTypeNames[outType], sizeof(outName));
1346 snprintf(convertString, sizeof(convertString), "convert_%s%s%s",
1347 outName, gSaturationNames[sat],
1348 gRoundingModeNames[round]);
1349 snprintf(testName, 256, "test_%s_%s", convertString, inName);
1350 vlog("Building %s( %s ) test\n", convertString, inName);
1351 break;
1352 case 3:
1353 strncpy(inName, gTypeNames[inType], sizeof(inName));
1354 strncpy(outName, gTypeNames[outType], sizeof(outName));
1355 snprintf(convertString, sizeof(convertString),
1356 "convert_%s3%s%s", outName, gSaturationNames[sat],
1357 gRoundingModeNames[round]);
1358 snprintf(testName, 256, "test_%s_%s3", convertString, inName);
1359 vlog("Building %s( %s3 ) test\n", convertString, inName);
1360 break;
1361 default:
1362 snprintf(inName, sizeof(inName), "%s%d", gTypeNames[inType],
1363 vectorSizetmp);
1364 snprintf(outName, sizeof(outName), "%s%d", gTypeNames[outType],
1365 vectorSizetmp);
1366 snprintf(convertString, sizeof(convertString), "convert_%s%s%s",
1367 outName, gSaturationNames[sat],
1368 gRoundingModeNames[round]);
1369 snprintf(testName, 256, "test_%s_%s", convertString, inName);
1370 vlog("Building %s( %s ) test\n", convertString, inName);
1371 break;
1372 }
1373 fflush(stdout);
1374
1375 if (vectorSizetmp == 3)
1376 {
1377 source << "__kernel void " << testName << "( __global " << inName
1378 << " *src, __global " << outName << " *dest )\n";
1379 source << "{\n";
1380 source << " size_t i = get_global_id(0);\n";
1381 source << " if( i + 1 < get_global_size(0))\n";
1382 source << " vstore3( " << convertString
1383 << "( vload3( i, src)), i, dest );\n";
1384 source << " else\n";
1385 source << " {\n";
1386 source << " " << inName << "3 in;\n";
1387 source << " " << outName << "3 out;\n";
1388 source << " if( 0 == (i & 1) )\n";
1389 source << " in.y = src[3*i+1];\n";
1390 source << " in.x = src[3*i];\n";
1391 source << " out = " << convertString << "( in ); \n";
1392 source << " dest[3*i] = out.x;\n";
1393 source << " if( 0 == (i & 1) )\n";
1394 source << " dest[3*i+1] = out.y;\n";
1395 source << " }\n";
1396 source << "}\n";
1397 }
1398 else
1399 {
1400 source << "__kernel void " << testName << "( __global " << inName
1401 << " *src, __global " << outName << " *dest )\n";
1402 source << "{\n";
1403 source << " size_t i = get_global_id(0);\n";
1404 source << " dest[i] = " << convertString << "( src[i] );\n";
1405 source << "}\n";
1406 }
1407 }
1408 *outKernel = NULL;
1409
1410 const char *flags = NULL;
1411 if (gForceFTZ) flags = "-cl-denorms-are-zero";
1412
1413 // build it
1414 std::string sourceString = source.str();
1415 const char *programSource = sourceString.c_str();
1416 error = create_single_kernel_helper(gContext, &program, outKernel, 1,
1417 &programSource, testName, flags);
1418 if (error)
1419 {
1420 vlog_error("Failed to build kernel/program (err = %d).\n", error);
1421 return NULL;
1422 }
1423
1424 return program;
1425 }
1426
1427 //
1428
RunKernel(cl_kernel kernel,void * inBuf,void * outBuf,size_t blockCount)1429 int RunKernel(cl_kernel kernel, void *inBuf, void *outBuf, size_t blockCount)
1430 {
1431 // The global dimensions are just the blockCount to execute since we haven't
1432 // set up multiple queues for multiple devices.
1433 int error;
1434
1435 error = clSetKernelArg(kernel, 0, sizeof(inBuf), &inBuf);
1436 error |= clSetKernelArg(kernel, 1, sizeof(outBuf), &outBuf);
1437
1438 if (error)
1439 {
1440 vlog_error("FAILED -- could not set kernel args (%d)\n", error);
1441 return error;
1442 }
1443
1444 if ((error = clEnqueueNDRangeKernel(gQueue, kernel, 1, NULL, &blockCount,
1445 NULL, 0, NULL, NULL)))
1446 {
1447 vlog_error("FAILED -- could not execute kernel (%d)\n", error);
1448 return error;
1449 }
1450
1451 return 0;
1452 }
1453
1454
GetTestCase(const char * name,Type * outType,Type * inType,SaturationMode * sat,RoundingMode * round)1455 int GetTestCase(const char *name, Type *outType, Type *inType,
1456 SaturationMode *sat, RoundingMode *round)
1457 {
1458 int i;
1459
1460 // Find the return type
1461 for (i = 0; i < kTypeCount; i++)
1462 if (name == strstr(name, gTypeNames[i]))
1463 {
1464 *outType = (Type)i;
1465 name += strlen(gTypeNames[i]);
1466
1467 break;
1468 }
1469
1470 if (i == kTypeCount) return -1;
1471
1472 // Check to see if _sat appears next
1473 *sat = (SaturationMode)0;
1474 for (i = 1; i < kSaturationModeCount; i++)
1475 if (name == strstr(name, gSaturationNames[i]))
1476 {
1477 *sat = (SaturationMode)i;
1478 name += strlen(gSaturationNames[i]);
1479 break;
1480 }
1481
1482 *round = (RoundingMode)0;
1483 for (i = 1; i < kRoundingModeCount; i++)
1484 if (name == strstr(name, gRoundingModeNames[i]))
1485 {
1486 *round = (RoundingMode)i;
1487 name += strlen(gRoundingModeNames[i]);
1488 break;
1489 }
1490
1491 if (*name != '_') return -2;
1492 name++;
1493
1494 for (i = 0; i < kTypeCount; i++)
1495 if (name == strstr(name, gTypeNames[i]))
1496 {
1497 *inType = (Type)i;
1498 name += strlen(gTypeNames[i]);
1499
1500 break;
1501 }
1502
1503 if (i == kTypeCount) return -3;
1504
1505 if (*name != '\0') return -4;
1506
1507 return 0;
1508 }
1509
1510 } // namespace conv_test
1511