1 /*
2 * Copyright © Microsoft Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <cmath>
25 #include <stdio.h>
26 #include <stdint.h>
27 #include <stdexcept>
28 #include <vector>
29
30 #include <unknwn.h>
31 #include <directx/d3d12.h>
32 #include <dxgi1_4.h>
33 #include <gtest/gtest.h>
34 #include <wrl.h>
35 #include <dxguids/dxguids.h>
36
37 #include "compute_test.h"
38
39 using std::vector;
40
TEST_F(ComputeTest,runtime_memcpy)41 TEST_F(ComputeTest, runtime_memcpy)
42 {
43 struct shift { uint8_t val; uint8_t shift; uint16_t ret; };
44 const char *kernel_source =
45 "struct shift { uchar val; uchar shift; ushort ret; };\n\
46 __kernel void main_test(__global struct shift *inout)\n\
47 {\n\
48 uint id = get_global_id(0);\n\
49 uint id2 = id + get_global_id(1);\n\
50 struct shift lc[4] = { { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }};\n\
51 lc[id] = inout[id];\n\
52 inout[id2].ret = (ushort) lc[id2].val << (ushort) lc[id2].shift;\n\
53 }\n";
54
55 auto inout = ShaderArg<struct shift>({
56 { 0x10, 1, 0xffff },
57 { 0x20, 2, 0xffff },
58 { 0x30, 3, 0xffff },
59 { 0x40, 4, 0xffff },
60 },
61 SHADER_ARG_INOUT);
62 const uint16_t expected[] = { 0x20, 0x80, 0x180, 0x400 };
63 run_shader(kernel_source, inout.size(), 1, 1, inout);
64 for (int i = 0; i < inout.size(); ++i)
65 EXPECT_EQ(inout[i].ret, expected[i]);
66 }
67
TEST_F(ComputeTest,two_global_arrays)68 TEST_F(ComputeTest, two_global_arrays)
69 {
70 const char *kernel_source =
71 "__kernel void main_test(__global uint *g1, __global uint *g2)\n\
72 {\n\
73 uint idx = get_global_id(0);\n\
74 g1[idx] -= g2[idx];\n\
75 }\n";
76 auto g1 = ShaderArg<uint32_t>({ 10, 20, 30, 40 }, SHADER_ARG_INOUT);
77 auto g2 = ShaderArg<uint32_t>({ 1, 2, 3, 4 }, SHADER_ARG_INPUT);
78 const uint32_t expected[] = {
79 9, 18, 27, 36
80 };
81
82 run_shader(kernel_source, g1.size(), 1, 1, g1, g2);
83 for (int i = 0; i < g1.size(); ++i)
84 EXPECT_EQ(g1[i], expected[i]);
85 }
86
TEST_F(ComputeTest,nested_arrays)87 TEST_F(ComputeTest, nested_arrays)
88 {
89 const char *kernel_source = R"(
90 float4 DoMagic(float4 inValue)
91 {
92 const float testArr[3][3] = {
93 {0.1f, 0.2f, 0.3f},
94 {0.4f, 0.5f, 0.6f},
95 {0.7f, 0.8f, 0.9f}};
96 float4 outValue = inValue;
97 outValue.x = inValue.x * testArr[0][0] + inValue.y * testArr[0][1] + inValue.z * testArr[0][2];
98 outValue.y = inValue.x * testArr[1][0] + inValue.y * testArr[1][1] + inValue.z * testArr[1][2];
99 outValue.z = inValue.x * testArr[2][0] + inValue.y * testArr[2][1] + inValue.z * testArr[2][2];
100 return outValue;
101 }
102 __kernel void main_test(__global float4 *g1, __global float4 *g2)
103 {
104 uint idx = get_global_id(0);
105 g1[idx] = DoMagic(g2[idx]);
106 })";
107 auto g1 = ShaderArg<float>({ 10, 20, 30, 40 }, SHADER_ARG_INOUT);
108 auto g2 = ShaderArg<float>({ 0.2f, 0.4f, 0.6f, 1.0f }, SHADER_ARG_INPUT);
109 const float expected[] = {
110 0.28f, 0.64f, 1.0f, 1.0f
111 };
112
113 run_shader(kernel_source, 1, 1, 1, g1, g2);
114 for (int i = 0; i < g1.size(); ++i)
115 EXPECT_FLOAT_EQ(g1[i], expected[i]);
116 }
117
118 /* Disabled until saturated conversions from f32->i64 fixed (mesa/mesa#3824) */
TEST_F(ComputeTest,DISABLED_i64tof32)119 TEST_F(ComputeTest, DISABLED_i64tof32)
120 {
121 const char *kernel_source =
122 "__kernel void main_test(__global long *out, __constant long *in)\n\
123 {\n\
124 __local float tmp[12];\n\
125 uint idx = get_global_id(0);\n\
126 tmp[idx] = in[idx];\n\
127 barrier(CLK_LOCAL_MEM_FENCE);\n\
128 out[idx] = tmp[idx + get_global_id(1)];\n\
129 }\n";
130 auto in = ShaderArg<int64_t>({ 0x100000000LL,
131 -0x100000000LL,
132 0x7fffffffffffffffLL,
133 0x4000004000000000LL,
134 0x4000003fffffffffLL,
135 0x4000004000000001LL,
136 -1,
137 -0x4000004000000000LL,
138 -0x4000003fffffffffLL,
139 -0x4000004000000001LL,
140 0,
141 INT64_MIN },
142 SHADER_ARG_INPUT);
143 auto out = ShaderArg<int64_t>(std::vector<int64_t>(12, 0xdeadbeed), SHADER_ARG_OUTPUT);
144 const int64_t expected[] = {
145 0x100000000LL,
146 -0x100000000LL,
147 0x7fffffffffffffffLL,
148 0x4000000000000000LL,
149 0x4000000000000000LL,
150 0x4000008000000000LL,
151 -1,
152 -0x4000000000000000LL,
153 -0x4000000000000000LL,
154 -0x4000008000000000LL,
155 0,
156 INT64_MIN,
157 };
158
159 run_shader(kernel_source, out.size(), 1, 1, out, in);
160 for (int i = 0; i < out.size(); ++i) {
161 EXPECT_EQ((int64_t)out[i], expected[i]);
162 }
163 }
TEST_F(ComputeTest,two_constant_arrays)164 TEST_F(ComputeTest, two_constant_arrays)
165 {
166 const char *kernel_source =
167 "__kernel void main_test(__constant uint *c1, __global uint *g1, __constant uint *c2)\n\
168 {\n\
169 uint idx = get_global_id(0);\n\
170 g1[idx] -= c1[idx] + c2[idx];\n\
171 }\n";
172 auto g1 = ShaderArg<uint32_t>({ 10, 20, 30, 40 }, SHADER_ARG_INOUT);
173 auto c1 = ShaderArg<uint32_t>({ 1, 2, 3, 4 }, SHADER_ARG_INPUT);
174 auto c2 = ShaderArg<uint32_t>(std::vector<uint32_t>(16384, 5), SHADER_ARG_INPUT);
175 const uint32_t expected[] = {
176 4, 13, 22, 31
177 };
178
179 run_shader(kernel_source, g1.size(), 1, 1, c1, g1, c2);
180 for (int i = 0; i < g1.size(); ++i)
181 EXPECT_EQ(g1[i], expected[i]);
182 }
183
TEST_F(ComputeTest,null_constant_ptr)184 TEST_F(ComputeTest, null_constant_ptr)
185 {
186 const char *kernel_source =
187 "__kernel void main_test(__global uint *g1, __constant uint *c1)\n\
188 {\n\
189 __constant uint fallback[] = {2, 3, 4, 5};\n\
190 __constant uint *c = c1 ? c1 : fallback;\n\
191 uint idx = get_global_id(0);\n\
192 g1[idx] -= c[idx];\n\
193 }\n";
194 auto g1 = ShaderArg<uint32_t>({ 10, 20, 30, 40 }, SHADER_ARG_INOUT);
195 auto c1 = ShaderArg<uint32_t>({ 1, 2, 3, 4 }, SHADER_ARG_INPUT);
196 const uint32_t expected1[] = {
197 9, 18, 27, 36
198 };
199
200 run_shader(kernel_source, g1.size(), 1, 1, g1, c1);
201 for (int i = 0; i < g1.size(); ++i)
202 EXPECT_EQ(g1[i], expected1[i]);
203
204 const uint32_t expected2[] = {
205 8, 17, 26, 35
206 };
207
208 g1 = ShaderArg<uint32_t>({ 10, 20, 30, 40 }, SHADER_ARG_INOUT);
209 auto c2 = NullShaderArg();
210 run_shader(kernel_source, g1.size(), 1, 1, g1, c2);
211 for (int i = 0; i < g1.size(); ++i)
212 EXPECT_EQ(g1[i], expected2[i]);
213 }
214
TEST_F(ComputeTest,null_global_ptr)215 TEST_F(ComputeTest, null_global_ptr)
216 {
217 const char *kernel_source =
218 "__kernel void main_test(__global uint *g1, __global uint *g2)\n\
219 {\n\
220 __constant uint fallback[] = {2, 3, 4, 5};\n\
221 uint idx = get_global_id(0);\n\
222 g1[idx] -= g2 ? g2[idx] : fallback[idx];\n\
223 }\n";
224 auto g1 = ShaderArg<uint32_t>({ 10, 20, 30, 40 }, SHADER_ARG_INOUT);
225 auto g2 = ShaderArg<uint32_t>({ 1, 2, 3, 4 }, SHADER_ARG_INPUT);
226 const uint32_t expected1[] = {
227 9, 18, 27, 36
228 };
229
230 run_shader(kernel_source, g1.size(), 1, 1, g1, g2);
231 for (int i = 0; i < g1.size(); ++i)
232 EXPECT_EQ(g1[i], expected1[i]);
233
234 const uint32_t expected2[] = {
235 8, 17, 26, 35
236 };
237
238 g1 = ShaderArg<uint32_t>({ 10, 20, 30, 40 }, SHADER_ARG_INOUT);
239 auto g2null = NullShaderArg();
240 run_shader(kernel_source, g1.size(), 1, 1, g1, g2null);
241 for (int i = 0; i < g1.size(); ++i)
242 EXPECT_EQ(g1[i], expected2[i]);
243 }
244
TEST_F(ComputeTest,ret_constant_ptr)245 TEST_F(ComputeTest, ret_constant_ptr)
246 {
247 struct s { uint64_t ptr; uint32_t val; };
248 const char *kernel_source =
249 "struct s { __constant uint *ptr; uint val; };\n\
250 __kernel void main_test(__global struct s *out, __constant uint *in)\n\
251 {\n\
252 __constant uint foo[] = { 1, 2 };\n\
253 uint idx = get_global_id(0);\n\
254 if (idx == 0)\n\
255 out[idx].ptr = foo;\n\
256 else\n\
257 out[idx].ptr = in;\n\
258 out[idx].val = out[idx].ptr[idx];\n\
259 }\n";
260 auto out = ShaderArg<struct s>(std::vector<struct s>(2, {0xdeadbeefdeadbeef, 0}), SHADER_ARG_OUTPUT);
261 auto in = ShaderArg<uint32_t>({ 3, 4 }, SHADER_ARG_INPUT);
262 const uint32_t expected_val[] = {
263 1, 4
264 };
265 const uint64_t expected_ptr[] = {
266 2ull << 32, 1ull << 32
267 };
268
269 run_shader(kernel_source, out.size(), 1, 1, out, in);
270 for (int i = 0; i < out.size(); ++i) {
271 EXPECT_EQ(out[i].val, expected_val[i]);
272 EXPECT_EQ(out[i].ptr, expected_ptr[i]);
273 }
274 }
275
TEST_F(ComputeTest,ret_global_ptr)276 TEST_F(ComputeTest, ret_global_ptr)
277 {
278 struct s { uint64_t ptr; uint32_t val; };
279 const char *kernel_source =
280 "struct s { __global uint *ptr; uint val; };\n\
281 __kernel void main_test(__global struct s *out, __global uint *in1, __global uint *in2)\n\
282 {\n\
283 uint idx = get_global_id(0);\n\
284 out[idx].ptr = idx ? in2 : in1;\n\
285 out[idx].val = out[idx].ptr[idx];\n\
286 }\n";
287 auto out = ShaderArg<struct s>(std::vector<struct s>(2, {0xdeadbeefdeadbeef, 0}), SHADER_ARG_OUTPUT);
288 auto in1 = ShaderArg<uint32_t>({ 1, 2 }, SHADER_ARG_INPUT);
289 auto in2 = ShaderArg<uint32_t>({ 3, 4 }, SHADER_ARG_INPUT);
290 const uint32_t expected_val[] = {
291 1, 4
292 };
293 const uint64_t expected_ptr[] = {
294 1ull << 32, 2ull << 32
295 };
296
297 run_shader(kernel_source, out.size(), 1, 1, out, in1, in2);
298 for (int i = 0; i < out.size(); ++i) {
299 EXPECT_EQ(out[i].val, expected_val[i]);
300 EXPECT_EQ(out[i].ptr, expected_ptr[i]);
301 }
302 }
303
TEST_F(ComputeTest,ret_local_ptr)304 TEST_F(ComputeTest, ret_local_ptr)
305 {
306 struct s { uint64_t ptr; };
307 const char *kernel_source =
308 "struct s { __local uint *ptr; };\n\
309 __kernel void main_test(__global struct s *out)\n\
310 {\n\
311 __local uint tmp[2];\n\
312 uint idx = get_global_id(0);\n\
313 tmp[idx] = idx;\n\
314 out[idx].ptr = &tmp[idx];\n\
315 }\n";
316 auto out = ShaderArg<struct s>(std::vector<struct s>(2, { 0xdeadbeefdeadbeef }), SHADER_ARG_OUTPUT);
317 const uint64_t expected_ptr[] = {
318 0, 4,
319 };
320
321 run_shader(kernel_source, out.size(), 1, 1, out);
322 for (int i = 0; i < out.size(); ++i) {
323 EXPECT_EQ(out[i].ptr, expected_ptr[i]);
324 }
325 }
326
TEST_F(ComputeTest,ret_private_ptr)327 TEST_F(ComputeTest, ret_private_ptr)
328 {
329 struct s { uint64_t ptr; uint32_t value; };
330 const char *kernel_source =
331 "struct s { __private uint *ptr; uint value; };\n\
332 __kernel void main_test(__global struct s *out)\n\
333 {\n\
334 uint tmp[2] = {1, 2};\n\
335 uint idx = get_global_id(0);\n\
336 out[idx].ptr = &tmp[idx];\n\
337 out[idx].value = *out[idx].ptr;\n\
338 }\n";
339 auto out = ShaderArg<struct s>(std::vector<struct s>(2, { 0xdeadbeefdeadbeef }), SHADER_ARG_OUTPUT);
340 const uint64_t expected_ptr[] = {
341 0, 4,
342 };
343 const uint32_t expected_value[] = {
344 1, 2
345 };
346
347 run_shader(kernel_source, out.size(), 1, 1, out);
348 for (int i = 0; i < out.size(); ++i) {
349 EXPECT_EQ(out[i].ptr, expected_ptr[i]);
350 }
351 }
352
TEST_F(ComputeTest,globals_8bit)353 TEST_F(ComputeTest, globals_8bit)
354 {
355 const char *kernel_source =
356 "__kernel void main_test(__global unsigned char *inout)\n\
357 {\n\
358 uint idx = get_global_id(0);\n\
359 inout[idx] = inout[idx] + 1;\n\
360 }\n";
361 auto inout = ShaderArg<uint8_t> ({ 100, 110, 120, 130 }, SHADER_ARG_INOUT);
362 const uint8_t expected[] = {
363 101, 111, 121, 131
364 };
365 run_shader(kernel_source, inout.size(), 1, 1, inout);
366 for (int i = 0; i < inout.size(); ++i)
367 EXPECT_EQ(inout[i], expected[i]);
368 }
369
TEST_F(ComputeTest,globals_16bit)370 TEST_F(ComputeTest, globals_16bit)
371 {
372 const char *kernel_source =
373 "__kernel void main_test(__global unsigned short *inout)\n\
374 {\n\
375 uint idx = get_global_id(0);\n\
376 inout[idx] = inout[idx] + 1;\n\
377 }\n";
378 auto inout = ShaderArg<uint16_t> ({ 10000, 10010, 10020, 10030 }, SHADER_ARG_INOUT);
379 const uint16_t expected[] = {
380 10001, 10011, 10021, 10031
381 };
382 run_shader(kernel_source, inout.size(), 1, 1, inout);
383 for (int i = 0; i < inout.size(); ++i)
384 EXPECT_EQ(inout[i], expected[i]);
385 }
386
TEST_F(ComputeTest,globals_64bit)387 TEST_F(ComputeTest, globals_64bit)
388 {
389 const char *kernel_source =
390 "__kernel void main_test(__global unsigned long *inout)\n\
391 {\n\
392 uint idx = get_global_id(0);\n\
393 inout[idx] = inout[idx] + 1;\n\
394 }\n";
395 uint64_t base = 1ull << 50;
396 auto inout = ShaderArg<uint64_t>({ base, base + 10, base + 20, base + 30 },
397 SHADER_ARG_INOUT);
398 const uint64_t expected[] = {
399 base + 1, base + 11, base + 21, base + 31
400 };
401 run_shader(kernel_source, inout.size(), 1, 1, inout);
402 for (int i = 0; i < inout.size(); ++i)
403 EXPECT_EQ(inout[i], expected[i]);
404 }
405
TEST_F(ComputeTest,built_ins_global_id)406 TEST_F(ComputeTest, built_ins_global_id)
407 {
408 const char *kernel_source =
409 "__kernel void main_test(__global uint *output)\n\
410 {\n\
411 output[get_global_id(0)] = get_global_id(0);\n\
412 }\n";
413 auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
414 SHADER_ARG_OUTPUT);
415 const uint32_t expected[] = {
416 0, 1, 2, 3
417 };
418
419 run_shader(kernel_source, output.size(), 1, 1, output);
420 for (int i = 0; i < output.size(); ++i)
421 EXPECT_EQ(output[i], expected[i]);
422 }
423
TEST_F(ComputeTest,built_ins_global_id_rmw)424 TEST_F(ComputeTest, built_ins_global_id_rmw)
425 {
426 const char *kernel_source =
427 "__kernel void main_test(__global uint *output)\n\
428 {\n\
429 uint id = get_global_id(0);\n\
430 output[id] = output[id] * (id + 1);\n\
431 }\n";
432 auto inout = ShaderArg<uint32_t>({0x00000001, 0x10000001, 0x00020002, 0x04010203},
433 SHADER_ARG_INOUT);
434 const uint32_t expected[] = {
435 0x00000001, 0x20000002, 0x00060006, 0x1004080c
436 };
437 run_shader(kernel_source, inout.size(), 1, 1, inout);
438 for (int i = 0; i < inout.size(); ++i)
439 EXPECT_EQ(inout[i], expected[i]);
440 }
441
TEST_F(ComputeTest,types_float_basics)442 TEST_F(ComputeTest, types_float_basics)
443 {
444 const char *kernel_source =
445 "__kernel void main_test(__global uint *output)\n\
446 {\n\
447 output[get_global_id(0)] = (uint)((float)get_global_id(0) + 1.5f);\n\
448 }\n";
449 auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
450 SHADER_ARG_OUTPUT);
451 const uint32_t expected[] = {
452 1, 2, 3, 4
453 };
454 run_shader(kernel_source, output.size(), 1, 1, output);
455 for (int i = 0; i < output.size(); ++i)
456 EXPECT_EQ(output[i], expected[i]);
457 }
458
TEST_F(ComputeTest,DISABLED_types_double_basics)459 TEST_F(ComputeTest, DISABLED_types_double_basics)
460 {
461 /* Disabled because doubles are unsupported */
462 const char *kernel_source =
463 "__kernel void main_test(__global uint *output)\n\
464 {\n\
465 output[get_global_id(0)] = (uint)((double)get_global_id(0) + 1.5);\n\
466 }\n";
467 auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
468 SHADER_ARG_OUTPUT);
469 const uint32_t expected[] = {
470 1, 2, 3, 4
471 };
472 run_shader(kernel_source, output.size(), 1, 1, output);
473 for (int i = 0; i < output.size(); ++i)
474 EXPECT_EQ(output[i], expected[i]);
475 }
476
TEST_F(ComputeTest,types_short_basics)477 TEST_F(ComputeTest, types_short_basics)
478 {
479 const char *kernel_source =
480 "__kernel void main_test(__global uint *output)\n\
481 {\n\
482 output[get_global_id(0)] = (uint)((short)get_global_id(0) + (short)1);\n\
483 }\n";
484 auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
485 SHADER_ARG_OUTPUT);
486 const uint32_t expected[] = {
487 1, 2, 3, 4
488 };
489 run_shader(kernel_source, output.size(), 1, 1, output);
490 for (int i = 0; i < output.size(); ++i)
491 EXPECT_EQ(output[i], expected[i]);
492 }
493
TEST_F(ComputeTest,types_char_basics)494 TEST_F(ComputeTest, types_char_basics)
495 {
496 const char *kernel_source =
497 "__kernel void main_test(__global uint *output)\n\
498 {\n\
499 output[get_global_id(0)] = (uint)((char)get_global_id(0) + (char)1);\n\
500 }\n";
501 auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
502 SHADER_ARG_OUTPUT);
503 const uint32_t expected[] = {
504 1, 2, 3, 4
505 };
506 run_shader(kernel_source, output.size(), 1, 1, output);
507 for (int i = 0; i < output.size(); ++i)
508 EXPECT_EQ(output[i], expected[i]);
509 }
510
TEST_F(ComputeTest,types_if_statement)511 TEST_F(ComputeTest, types_if_statement)
512 {
513 const char *kernel_source =
514 "__kernel void main_test(__global uint *output)\n\
515 {\n\
516 int idx = get_global_id(0);\n\
517 if (idx > 0)\n\
518 output[idx] = ~idx;\n\
519 else\n\
520 output[0] = 0xff;\n\
521 }\n";
522 auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
523 SHADER_ARG_OUTPUT);
524 const uint32_t expected[] = {
525 0xff, ~1u, ~2u, ~3u
526 };
527 run_shader(kernel_source, output.size(), 1, 1, output);
528 for (int i = 0; i < output.size(); ++i)
529 EXPECT_EQ(output[i], expected[i]);
530 }
531
TEST_F(ComputeTest,types_do_while_loop)532 TEST_F(ComputeTest, types_do_while_loop)
533 {
534 const char *kernel_source =
535 "__kernel void main_test(__global uint *output)\n\
536 {\n\
537 int value = 1;\n\
538 int i = 1, n = get_global_id(0);\n\
539 do {\n\
540 value *= i++;\n\
541 } while (i <= n);\n\
542 output[n] = value;\n\
543 }\n";
544 auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(5, 0xdeadbeef),
545 SHADER_ARG_OUTPUT);
546 const uint32_t expected[] = {
547 1, 1, 1*2, 1*2*3, 1*2*3*4
548 };
549 run_shader(kernel_source, output.size(), 1, 1, output);
550 for (int i = 0; i < output.size(); ++i)
551 EXPECT_EQ(output[i], expected[i]);
552 }
553
TEST_F(ComputeTest,types_for_loop)554 TEST_F(ComputeTest, types_for_loop)
555 {
556 const char *kernel_source =
557 "__kernel void main_test(__global uint *output)\n\
558 {\n\
559 int value = 1;\n\
560 int n = get_global_id(0);\n\
561 for (int i = 1; i <= n; ++i)\n\
562 value *= i;\n\
563 output[n] = value;\n\
564 }\n";
565 auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(5, 0xdeadbeef),
566 SHADER_ARG_OUTPUT);
567 const uint32_t expected[] = {
568 1, 1, 1*2, 1*2*3, 1*2*3*4
569 };
570 run_shader(kernel_source, output.size(), 1, 1, output);
571 for (int i = 0; i < output.size(); ++i)
572 EXPECT_EQ(output[i], expected[i]);
573 }
574
TEST_F(ComputeTest,complex_types_local_array_long)575 TEST_F(ComputeTest, complex_types_local_array_long)
576 {
577 const char *kernel_source =
578 "__kernel void main_test(__global ulong *inout)\n\
579 {\n\
580 ulong tmp[] = {\n\
581 get_global_id(1) + 0x00000000,\n\
582 get_global_id(1) + 0x10000001,\n\
583 get_global_id(1) + 0x20000020,\n\
584 get_global_id(1) + 0x30000300,\n\
585 };\n\
586 uint idx = get_global_id(0);\n\
587 inout[idx] = tmp[idx];\n\
588 }\n";
589 auto inout = ShaderArg<uint64_t>({ 0, 0, 0, 0 }, SHADER_ARG_INOUT);
590 const uint64_t expected[] = {
591 0x00000000, 0x10000001, 0x20000020, 0x30000300,
592 };
593 run_shader(kernel_source, inout.size(), 1, 1, inout);
594 for (int i = 0; i < inout.size(); ++i)
595 EXPECT_EQ(inout[i], expected[i]);
596 }
597
TEST_F(ComputeTest,complex_types_local_array_short)598 TEST_F(ComputeTest, complex_types_local_array_short)
599 {
600 const char *kernel_source =
601 "__kernel void main_test(__global ushort *inout)\n\
602 {\n\
603 ushort tmp[] = {\n\
604 get_global_id(1) + 0x00,\n\
605 get_global_id(1) + 0x10,\n\
606 get_global_id(1) + 0x20,\n\
607 get_global_id(1) + 0x30,\n\
608 };\n\
609 uint idx = get_global_id(0);\n\
610 inout[idx] = tmp[idx];\n\
611 }\n";
612 auto inout = ShaderArg<uint16_t>({ 0, 0, 0, 0 }, SHADER_ARG_INOUT);
613 const uint16_t expected[] = {
614 0x00, 0x10, 0x20, 0x30,
615 };
616 run_shader(kernel_source, inout.size(), 1, 1, inout);
617 for (int i = 0; i < inout.size(); ++i)
618 EXPECT_EQ(inout[i], expected[i]);
619 }
620
TEST_F(ComputeTest,complex_types_local_array_struct_vec_float_misaligned)621 TEST_F(ComputeTest, complex_types_local_array_struct_vec_float_misaligned)
622 {
623 const char *kernel_source =
624 "struct has_vecs { uchar c; ushort s; float2 f; };\n\
625 __kernel void main_test(__global uint *inout)\n\
626 {\n\
627 struct has_vecs tmp[] = {\n\
628 { 10 + get_global_id(0), get_global_id(1), { 10.0f, 1.0f } },\n\
629 { 19 + get_global_id(0), get_global_id(1), { 20.0f, 4.0f } },\n\
630 { 28 + get_global_id(0), get_global_id(1), { 30.0f, 9.0f } },\n\
631 { 37 + get_global_id(0), get_global_id(1), { 40.0f, 16.0f } },\n\
632 };\n\
633 uint idx = get_global_id(0);\n\
634 uint mul = (tmp[idx].c + tmp[idx].s) * trunc(tmp[idx].f[0]);\n\
635 inout[idx] = mul + trunc(tmp[idx].f[1]);\n\
636 }\n";
637 auto inout = ShaderArg<uint32_t>({ 0, 0, 0, 0 }, SHADER_ARG_INOUT);
638 const uint16_t expected[] = { 101, 404, 909, 1616 };
639 run_shader(kernel_source, inout.size(), 1, 1, inout);
640 for (int i = 0; i < inout.size(); ++i)
641 EXPECT_EQ(inout[i], expected[i]);
642 }
643
TEST_F(ComputeTest,complex_types_local_array)644 TEST_F(ComputeTest, complex_types_local_array)
645 {
646 const char *kernel_source =
647 "__kernel void main_test(__global uint *inout)\n\
648 {\n\
649 uint tmp[] = {\n\
650 get_global_id(1) + 0x00,\n\
651 get_global_id(1) + 0x10,\n\
652 get_global_id(1) + 0x20,\n\
653 get_global_id(1) + 0x30,\n\
654 };\n\
655 uint idx = get_global_id(0);\n\
656 inout[idx] = tmp[idx];\n\
657 }\n";
658 auto inout = ShaderArg<uint32_t>({ 0, 0, 0, 0 }, SHADER_ARG_INOUT);
659 const uint32_t expected[] = {
660 0x00, 0x10, 0x20, 0x30,
661 };
662 run_shader(kernel_source, inout.size(), 1, 1, inout);
663 for (int i = 0; i < inout.size(); ++i)
664 EXPECT_EQ(inout[i], expected[i]);
665 }
666
TEST_F(ComputeTest,complex_types_global_struct_array)667 TEST_F(ComputeTest, complex_types_global_struct_array)
668 {
669 struct two_vals { uint32_t add; uint32_t mul; };
670 const char *kernel_source =
671 "struct two_vals { uint add; uint mul; };\n\
672 __kernel void main_test(__global struct two_vals *in_out)\n\
673 {\n\
674 uint id = get_global_id(0);\n\
675 in_out[id].add = in_out[id].add + id;\n\
676 in_out[id].mul = in_out[id].mul * id;\n\
677 }\n";
678 auto inout = ShaderArg<struct two_vals>({ { 8, 8 }, { 16, 16 }, { 64, 64 }, { 65536, 65536 } },
679 SHADER_ARG_INOUT);
680 const struct two_vals expected[] = {
681 { 8 + 0, 8 * 0 },
682 { 16 + 1, 16 * 1 },
683 { 64 + 2, 64 * 2 },
684 { 65536 + 3, 65536 * 3 }
685 };
686 run_shader(kernel_source, inout.size(), 1, 1, inout);
687 for (int i = 0; i < inout.size(); ++i) {
688 EXPECT_EQ(inout[i].add, expected[i].add);
689 EXPECT_EQ(inout[i].mul, expected[i].mul);
690 }
691 }
692
TEST_F(ComputeTest,complex_types_global_uint2)693 TEST_F(ComputeTest, complex_types_global_uint2)
694 {
695 struct uint2 { uint32_t x; uint32_t y; };
696 const char *kernel_source =
697 "__kernel void main_test(__global uint2 *inout)\n\
698 {\n\
699 uint id = get_global_id(0);\n\
700 inout[id].x = inout[id].x + id;\n\
701 inout[id].y = inout[id].y * id;\n\
702 }\n";
703 auto inout = ShaderArg<struct uint2>({ { 8, 8 }, { 16, 16 }, { 64, 64 }, { 65536, 65536 } },
704 SHADER_ARG_INOUT);
705 const struct uint2 expected[] = {
706 { 8 + 0, 8 * 0 },
707 { 16 + 1, 16 * 1 },
708 { 64 + 2, 64 * 2 },
709 { 65536 + 3, 65536 * 3 }
710 };
711 run_shader(kernel_source, inout.size(), 1, 1, inout);
712 for (int i = 0; i < inout.size(); ++i) {
713 EXPECT_EQ(inout[i].x, expected[i].x);
714 EXPECT_EQ(inout[i].y, expected[i].y);
715 }
716 }
717
TEST_F(ComputeTest,complex_types_global_ushort2)718 TEST_F(ComputeTest, complex_types_global_ushort2)
719 {
720 struct ushort2 { uint16_t x; uint16_t y; };
721 const char *kernel_source =
722 "__kernel void main_test(__global ushort2 *inout)\n\
723 {\n\
724 uint id = get_global_id(0);\n\
725 inout[id].x = inout[id].x + id;\n\
726 inout[id].y = inout[id].y * id;\n\
727 }\n";
728 auto inout = ShaderArg<struct ushort2>({ { 8, 8 }, { 16, 16 }, { 64, 64 },
729 { (uint16_t)65536, (uint16_t)65536 } },
730 SHADER_ARG_INOUT);
731 const struct ushort2 expected[] = {
732 { 8 + 0, 8 * 0 },
733 { 16 + 1, 16 * 1 },
734 { 64 + 2, 64 * 2 },
735 { (uint16_t)(65536 + 3), (uint16_t)(65536 * 3) }
736 };
737 run_shader(kernel_source, inout.size(), 1, 1, inout);
738 for (int i = 0; i < inout.size(); ++i) {
739 EXPECT_EQ(inout[i].x, expected[i].x);
740 EXPECT_EQ(inout[i].y, expected[i].y);
741 }
742 }
743
TEST_F(ComputeTest,complex_types_global_uchar3)744 TEST_F(ComputeTest, complex_types_global_uchar3)
745 {
746 struct uchar3 { uint8_t x; uint8_t y; uint8_t z; uint8_t pad; };
747 const char *kernel_source =
748 "__kernel void main_test(__global uchar3 *inout)\n\
749 {\n\
750 uint id = get_global_id(0);\n\
751 inout[id].x = inout[id].x + id;\n\
752 inout[id].y = inout[id].y * id;\n\
753 inout[id].z = inout[id].y + inout[id].x;\n\
754 }\n";
755 auto inout = ShaderArg<struct uchar3>({ { 8, 8, 8 }, { 16, 16, 16 }, { 64, 64, 64 }, { 255, 255, 255 } },
756 SHADER_ARG_INOUT);
757 const struct uchar3 expected[] = {
758 { 8 + 0, 8 * 0, (8 + 0) + (8 * 0) },
759 { 16 + 1, 16 * 1, (16 + 1) + (16 * 1) },
760 { 64 + 2, 64 * 2, (64 + 2) + (64 * 2) },
761 { (uint8_t)(255 + 3), (uint8_t)(255 * 3), (uint8_t)((255 + 3) + (255 * 3)) }
762 };
763 run_shader(kernel_source, inout.size(), 1, 1, inout);
764 for (int i = 0; i < inout.size(); ++i) {
765 EXPECT_EQ(inout[i].x, expected[i].x);
766 EXPECT_EQ(inout[i].y, expected[i].y);
767 EXPECT_EQ(inout[i].z, expected[i].z);
768 }
769 }
770
TEST_F(ComputeTest,complex_types_constant_uchar3)771 TEST_F(ComputeTest, complex_types_constant_uchar3)
772 {
773 struct uchar3 { uint8_t x; uint8_t y; uint8_t z; uint8_t pad; };
774 const char *kernel_source =
775 "__kernel void main_test(__global uchar3 *out, __constant uchar3 *in)\n\
776 {\n\
777 uint id = get_global_id(0);\n\
778 out[id].x = in[id].x + id;\n\
779 out[id].y = in[id].y * id;\n\
780 out[id].z = out[id].y + out[id].x;\n\
781 }\n";
782 auto in = ShaderArg<struct uchar3>({ { 8, 8, 8 }, { 16, 16, 16 }, { 64, 64, 64 }, { 255, 255, 255 } },
783 SHADER_ARG_INPUT);
784 auto out = ShaderArg<struct uchar3>(std::vector<struct uchar3>(4, { 0xff, 0xff, 0xff }),
785 SHADER_ARG_OUTPUT);
786 const struct uchar3 expected[] = {
787 { 8 + 0, 8 * 0, (8 + 0) + (8 * 0) },
788 { 16 + 1, 16 * 1, (16 + 1) + (16 * 1) },
789 { 64 + 2, 64 * 2, (64 + 2) + (64 * 2) },
790 { (uint8_t)(255 + 3), (uint8_t)(255 * 3), (uint8_t)((255 + 3) + (255 * 3)) }
791 };
792 run_shader(kernel_source, out.size(), 1, 1, out, in);
793 for (int i = 0; i < out.size(); ++i) {
794 EXPECT_EQ(out[i].x, expected[i].x);
795 EXPECT_EQ(out[i].y, expected[i].y);
796 EXPECT_EQ(out[i].z, expected[i].z);
797 }
798 }
799
TEST_F(ComputeTest,complex_types_global_uint8)800 TEST_F(ComputeTest, complex_types_global_uint8)
801 {
802 struct uint8 {
803 uint32_t s0; uint32_t s1; uint32_t s2; uint32_t s3;
804 uint32_t s4; uint32_t s5; uint32_t s6; uint32_t s7;
805 };
806 const char *kernel_source =
807 "__kernel void main_test(__global uint8 *inout)\n\
808 {\n\
809 uint id = get_global_id(0);\n\
810 inout[id].s01234567 = inout[id].s01234567 * 2;\n\
811 }\n";
812 auto inout = ShaderArg<struct uint8>({ { 1, 2, 3, 4, 5, 6, 7, 8 } },
813 SHADER_ARG_INOUT);
814 const struct uint8 expected[] = {
815 { 2, 4, 6, 8, 10, 12, 14, 16 }
816 };
817 run_shader(kernel_source, inout.size(), 1, 1, inout);
818 for (int i = 0; i < inout.size(); ++i) {
819 EXPECT_EQ(inout[i].s0, expected[i].s0);
820 EXPECT_EQ(inout[i].s1, expected[i].s1);
821 EXPECT_EQ(inout[i].s2, expected[i].s2);
822 EXPECT_EQ(inout[i].s3, expected[i].s3);
823 EXPECT_EQ(inout[i].s4, expected[i].s4);
824 EXPECT_EQ(inout[i].s5, expected[i].s5);
825 EXPECT_EQ(inout[i].s6, expected[i].s6);
826 EXPECT_EQ(inout[i].s7, expected[i].s7);
827 }
828 }
829
TEST_F(ComputeTest,complex_types_local_ulong16)830 TEST_F(ComputeTest, complex_types_local_ulong16)
831 {
832 struct ulong16 {
833 uint64_t values[16];
834 };
835 const char *kernel_source =
836 R"(__kernel void main_test(__global ulong16 *inout)
837 {
838 __local ulong16 local_array[2];
839 uint id = get_global_id(0);
840 local_array[id] = inout[id];
841 barrier(CLK_LOCAL_MEM_FENCE);
842 inout[id] = local_array[0] * 2;
843 })";
844 auto inout = ShaderArg<struct ulong16>({ { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } },
845 SHADER_ARG_INOUT);
846 const struct ulong16 expected[] = {
847 { 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 }
848 };
849 run_shader(kernel_source, inout.size(), 1, 1, inout);
850 for (int i = 0; i < inout.size(); ++i) {
851 for (int j = 0; j < 16; ++j) {
852 EXPECT_EQ(inout[i].values[j], expected[i].values[j]);
853 }
854 }
855 }
856
TEST_F(ComputeTest,complex_types_constant_uint8)857 TEST_F(ComputeTest, complex_types_constant_uint8)
858 {
859 struct uint8 {
860 uint32_t s0; uint32_t s1; uint32_t s2; uint32_t s3;
861 uint32_t s4; uint32_t s5; uint32_t s6; uint32_t s7;
862 };
863 const char *kernel_source =
864 "__kernel void main_test(__global uint8 *out, __constant uint8 *in)\n\
865 {\n\
866 uint id = get_global_id(0);\n\
867 out[id].s01234567 = in[id].s01234567 * 2;\n\
868 }\n";
869 auto in = ShaderArg<struct uint8>({ { 1, 2, 3, 4, 5, 6, 7, 8 } },
870 SHADER_ARG_INPUT);
871 auto out = ShaderArg<struct uint8>({ { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff } },
872 SHADER_ARG_INOUT);
873 const struct uint8 expected[] = {
874 { 2, 4, 6, 8, 10, 12, 14, 16 }
875 };
876 run_shader(kernel_source, out.size(), 1, 1, out, in);
877 for (int i = 0; i < out.size(); ++i) {
878 EXPECT_EQ(out[i].s0, expected[i].s0);
879 EXPECT_EQ(out[i].s1, expected[i].s1);
880 EXPECT_EQ(out[i].s2, expected[i].s2);
881 EXPECT_EQ(out[i].s3, expected[i].s3);
882 EXPECT_EQ(out[i].s4, expected[i].s4);
883 EXPECT_EQ(out[i].s5, expected[i].s5);
884 EXPECT_EQ(out[i].s6, expected[i].s6);
885 EXPECT_EQ(out[i].s7, expected[i].s7);
886 }
887 }
888
TEST_F(ComputeTest,complex_types_const_array)889 TEST_F(ComputeTest, complex_types_const_array)
890 {
891 const char *kernel_source =
892 "__kernel void main_test(__global uint *output)\n\
893 {\n\
894 const uint foo[] = { 100, 101, 102, 103 };\n\
895 output[get_global_id(0)] = foo[get_global_id(0) % 4];\n\
896 }\n";
897 auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
898 SHADER_ARG_OUTPUT);
899 const uint32_t expected[] = {
900 100, 101, 102, 103
901 };
902 run_shader(kernel_source, output.size(), 1, 1, output);
903 for (int i = 0; i < output.size(); ++i)
904 EXPECT_EQ(output[i], expected[i]);
905 }
906
TEST_F(ComputeTest,mem_access_load_store_ordering)907 TEST_F(ComputeTest, mem_access_load_store_ordering)
908 {
909 const char *kernel_source =
910 "__kernel void main_test(__global uint *output)\n\
911 {\n\
912 uint foo[4];\n\
913 foo[0] = 0x11111111;\n\
914 foo[1] = 0x22222222;\n\
915 foo[2] = 0x44444444;\n\
916 foo[3] = 0x88888888;\n\
917 foo[get_global_id(1)] -= 0x11111111; // foo[0] = 0 \n\
918 foo[0] += get_global_id(0); // foo[0] = tid\n\
919 foo[foo[get_global_id(1)]] = get_global_id(0); // foo[tid] = tid\n\
920 output[get_global_id(0)] = foo[get_global_id(0)]; // output[tid] = tid\n\
921 }\n";
922 auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
923 SHADER_ARG_OUTPUT);
924 const uint16_t expected[] = {
925 0, 1, 2, 3
926 };
927 run_shader(kernel_source, output.size(), 1, 1, output);
928 for (int i = 0; i < output.size(); ++i)
929 EXPECT_EQ(output[i], expected[i]);
930 }
931
TEST_F(ComputeTest,two_const_arrays)932 TEST_F(ComputeTest, two_const_arrays)
933 {
934 const char *kernel_source =
935 "__kernel void main_test(__global uint *output)\n\
936 {\n\
937 uint id = get_global_id(0);\n\
938 uint foo[4] = {100, 101, 102, 103};\n\
939 uint bar[4] = {1, 2, 3, 4};\n\
940 output[id] = foo[id] * bar[id];\n\
941 }\n";
942 auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
943 SHADER_ARG_OUTPUT);
944 const uint32_t expected[] = {
945 100, 202, 306, 412
946 };
947 run_shader(kernel_source, output.size(), 1, 1, output);
948 for (int i = 0; i < output.size(); ++i)
949 EXPECT_EQ(output[i], expected[i]);
950 }
951
TEST_F(ComputeTest,imod_pos)952 TEST_F(ComputeTest, imod_pos)
953 {
954 const char *kernel_source =
955 "__kernel void main_test(__global int *inout)\n\
956 {\n\
957 inout[get_global_id(0)] = inout[get_global_id(0)] % 3;\n\
958 }\n";
959 auto inout = ShaderArg<int32_t>({ -4, -3, -2, -1, 0, 1, 2, 3, 4 },
960 SHADER_ARG_INOUT);
961 const int32_t expected[] = {
962 -1, 0, -2, -1, 0, 1, 2, 0, 1
963 };
964 run_shader(kernel_source, inout.size(), 1, 1, inout);
965 for (int i = 0; i < inout.size(); ++i)
966 EXPECT_EQ(inout[i], expected[i]);
967 }
968
TEST_F(ComputeTest,imod_neg)969 TEST_F(ComputeTest, imod_neg)
970 {
971 const char *kernel_source =
972 "__kernel void main_test(__global int *inout)\n\
973 {\n\
974 inout[get_global_id(0)] = inout[get_global_id(0)] % -3;\n\
975 }\n";
976 auto inout = ShaderArg<int32_t>({ -4, -3, -2, -1, 0, 1, 2, 3, 4 },
977 SHADER_ARG_INOUT);
978 const int32_t expected[] = {
979 -1, 0, -2, -1, 0, 1, 2, 0, 1
980 };
981 run_shader(kernel_source, inout.size(), 1, 1, inout);
982 for (int i = 0; i < inout.size(); ++i)
983 EXPECT_EQ(inout[i], expected[i]);
984 }
985
TEST_F(ComputeTest,umod)986 TEST_F(ComputeTest, umod)
987 {
988 const char *kernel_source =
989 "__kernel void main_test(__global uint *inout)\n\
990 {\n\
991 inout[get_global_id(0)] = inout[get_global_id(0)] % 0xfffffffc;\n\
992 }\n";
993 auto inout = ShaderArg<uint32_t>({ 0xfffffffa, 0xfffffffb, 0xfffffffc, 0xfffffffd, 0xfffffffe },
994 SHADER_ARG_INOUT);
995 const uint32_t expected[] = {
996 0xfffffffa, 0xfffffffb, 0, 1, 2
997 };
998 run_shader(kernel_source, inout.size(), 1, 1, inout);
999 for (int i = 0; i < inout.size(); ++i)
1000 EXPECT_EQ(inout[i], expected[i]);
1001 }
1002
TEST_F(ComputeTest,rotate)1003 TEST_F(ComputeTest, rotate)
1004 {
1005 const char *kernel_source =
1006 "__kernel void main_test(__global uint *inout)\n\
1007 {\n\
1008 inout[get_global_id(0)] = rotate(inout[get_global_id(0)], (uint)get_global_id(0) * 4);\n\
1009 }\n";
1010 auto inout = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
1011 SHADER_ARG_INOUT);
1012 const uint32_t expected[] = {
1013 0xdeadbeef, 0xeadbeefd, 0xadbeefde, 0xdbeefdea
1014 };
1015 run_shader(kernel_source, inout.size(), 1, 1, inout);
1016 for (int i = 0; i < inout.size(); ++i)
1017 EXPECT_EQ(inout[i], expected[i]);
1018 }
1019
TEST_F(ComputeTest,popcount)1020 TEST_F(ComputeTest, popcount)
1021 {
1022 const char *kernel_source =
1023 "__kernel void main_test(__global uint *inout)\n\
1024 {\n\
1025 inout[get_global_id(0)] = popcount(inout[get_global_id(0)]);\n\
1026 }\n";
1027 auto inout = ShaderArg<uint32_t>({ 0, 0x1, 0x3, 0x101, 0x110011, ~0u },
1028 SHADER_ARG_INOUT);
1029 const uint32_t expected[] = {
1030 0, 1, 2, 2, 4, 32
1031 };
1032 run_shader(kernel_source, inout.size(), 1, 1, inout);
1033 for (int i = 0; i < inout.size(); ++i)
1034 EXPECT_EQ(inout[i], expected[i]);
1035 }
1036
TEST_F(ComputeTest,hadd)1037 TEST_F(ComputeTest, hadd)
1038 {
1039 const char *kernel_source =
1040 "__kernel void main_test(__global uint *inout)\n\
1041 {\n\
1042 inout[get_global_id(0)] = hadd(inout[get_global_id(0)], 1u << 31);\n\
1043 }\n";
1044 auto inout = ShaderArg<uint32_t>({ 0, 1, 2, 3, 0xfffffffc, 0xfffffffd, 0xfffffffe, 0xffffffff },
1045 SHADER_ARG_INOUT);
1046 const uint32_t expected[] = {
1047 (1u << 31) >> 1,
1048 ((1u << 31) + 1) >> 1,
1049 ((1u << 31) + 2) >> 1,
1050 ((1u << 31) + 3) >> 1,
1051 ((1ull << 31) + 0xfffffffc) >> 1,
1052 ((1ull << 31) + 0xfffffffd) >> 1,
1053 ((1ull << 31) + 0xfffffffe) >> 1,
1054 ((1ull << 31) + 0xffffffff) >> 1,
1055 };
1056 run_shader(kernel_source, inout.size(), 1, 1, inout);
1057 for (int i = 0; i < inout.size(); ++i)
1058 EXPECT_EQ(inout[i], expected[i]);
1059 }
1060
TEST_F(ComputeTest,rhadd)1061 TEST_F(ComputeTest, rhadd)
1062 {
1063 const char *kernel_source =
1064 "__kernel void main_test(__global uint *inout)\n\
1065 {\n\
1066 inout[get_global_id(0)] = rhadd(inout[get_global_id(0)], 1u << 31);\n\
1067 }\n";
1068 auto inout = ShaderArg<uint32_t>({ 0, 1, 2, 3, 0xfffffffc, 0xfffffffd, 0xfffffffe, 0xffffffff },
1069 SHADER_ARG_INOUT);
1070 const uint32_t expected[] = {
1071 ((1u << 31) + 1) >> 1,
1072 ((1u << 31) + 2) >> 1,
1073 ((1u << 31) + 3) >> 1,
1074 ((1u << 31) + 4) >> 1,
1075 ((1ull << 31) + 0xfffffffd) >> 1,
1076 ((1ull << 31) + 0xfffffffe) >> 1,
1077 ((1ull << 31) + 0xffffffff) >> 1,
1078 ((1ull << 31) + (1ull << 32)) >> 1,
1079 };
1080 run_shader(kernel_source, inout.size(), 1, 1, inout);
1081 for (int i = 0; i < inout.size(); ++i)
1082 EXPECT_EQ(inout[i], expected[i]);
1083 }
1084
TEST_F(ComputeTest,add_sat)1085 TEST_F(ComputeTest, add_sat)
1086 {
1087 const char *kernel_source =
1088 "__kernel void main_test(__global uint *inout)\n\
1089 {\n\
1090 inout[get_global_id(0)] = add_sat(inout[get_global_id(0)], 2u);\n\
1091 }\n";
1092 auto inout = ShaderArg<uint32_t>({ 0xffffffff - 3, 0xffffffff - 2, 0xffffffff - 1, 0xffffffff },
1093 SHADER_ARG_INOUT);
1094 const uint32_t expected[] = {
1095 0xffffffff - 1, 0xffffffff, 0xffffffff, 0xffffffff
1096 };
1097 run_shader(kernel_source, inout.size(), 1, 1, inout);
1098 for (int i = 0; i < inout.size(); ++i)
1099 EXPECT_EQ(inout[i], expected[i]);
1100 }
1101
TEST_F(ComputeTest,sub_sat)1102 TEST_F(ComputeTest, sub_sat)
1103 {
1104 const char *kernel_source =
1105 "__kernel void main_test(__global uint *inout)\n\
1106 {\n\
1107 inout[get_global_id(0)] = sub_sat(inout[get_global_id(0)], 2u);\n\
1108 }\n";
1109 auto inout = ShaderArg<uint32_t>({ 0, 1, 2, 3 }, SHADER_ARG_INOUT);
1110 const uint32_t expected[] = {
1111 0, 0, 0, 1
1112 };
1113 run_shader(kernel_source, inout.size(), 1, 1, inout);
1114 for (int i = 0; i < inout.size(); ++i)
1115 EXPECT_EQ(inout[i], expected[i]);
1116 }
1117
TEST_F(ComputeTest,mul_hi)1118 TEST_F(ComputeTest, mul_hi)
1119 {
1120 const char *kernel_source =
1121 "__kernel void main_test(__global uint *inout)\n\
1122 {\n\
1123 inout[get_global_id(0)] = mul_hi(inout[get_global_id(0)], 1u << 31);\n\
1124 }\n";
1125 auto inout = ShaderArg<uint32_t>({ 0, 1, 2, 3, (1u << 31) }, SHADER_ARG_INOUT);
1126 const uint32_t expected[] = {
1127 0, 0, 1, 1, (1u << 30)
1128 };
1129 run_shader(kernel_source, inout.size(), 1, 1, inout);
1130 for (int i = 0; i < inout.size(); ++i)
1131 EXPECT_EQ(inout[i], expected[i]);
1132 }
1133
TEST_F(ComputeTest,ldexp_x)1134 TEST_F(ComputeTest, ldexp_x)
1135 {
1136 const char *kernel_source =
1137 "__kernel void main_test(__global float *inout)\n\
1138 {\n\
1139 inout[get_global_id(0)] = ldexp(inout[get_global_id(0)], 5);\n\
1140 }\n";
1141 auto inout = ShaderArg<float>({ 0.0f, 0.5f, 1.0f, 2.0f }, SHADER_ARG_INOUT);
1142 const float expected[] = {
1143 ldexp(0.0f, 5), ldexp(0.5f, 5), ldexp(1.0f, 5), ldexp(2.0f, 5)
1144 };
1145 run_shader(kernel_source, inout.size(), 1, 1, inout);
1146 for (int i = 0; i < inout.size(); ++i)
1147 EXPECT_FLOAT_EQ(inout[i], expected[i]);
1148 }
1149
TEST_F(ComputeTest,ldexp_y)1150 TEST_F(ComputeTest, ldexp_y)
1151 {
1152 const char *kernel_source =
1153 "__kernel void main_test(__global float *inout)\n\
1154 {\n\
1155 inout[get_global_id(0)] = ldexp(inout[get_global_id(0)], get_global_id(0));\n\
1156 }\n";
1157 auto inout = ShaderArg<float>({ 0.25f, 0.5f, 0.75f, 1.0f }, SHADER_ARG_INOUT);
1158 const float expected[] = {
1159 ldexp(0.25f, 0), ldexp(0.5f, 1), ldexp(0.75f, 2), ldexp(1.0f, 3)
1160 };
1161 run_shader(kernel_source, inout.size(), 1, 1, inout);
1162 for (int i = 0; i < inout.size(); ++i)
1163 EXPECT_FLOAT_EQ(inout[i], expected[i]);
1164 }
1165
TEST_F(ComputeTest,frexp_ret)1166 TEST_F(ComputeTest, frexp_ret)
1167 {
1168 const char *kernel_source =
1169 "__kernel void main_test(__global float *inout)\n\
1170 {\n\
1171 int exp;\n\
1172 inout[get_global_id(0)] = frexp(inout[get_global_id(0)], &exp);\n\
1173 }\n";
1174 auto inout = ShaderArg<float>({ 0.0f, 0.5f, 1.0f, 3.0f }, SHADER_ARG_INOUT);
1175 const float expected[] = {
1176 0.0f, 0.5f, 0.5f, 0.75f
1177 };
1178 run_shader(kernel_source, inout.size(), 1, 1, inout);
1179 for (int i = 0; i < inout.size(); ++i)
1180 EXPECT_FLOAT_EQ(inout[i], expected[i]);
1181 }
1182
TEST_F(ComputeTest,frexp_exp)1183 TEST_F(ComputeTest, frexp_exp)
1184 {
1185 const char *kernel_source =
1186 "__kernel void main_test(__global float *inout)\n\
1187 {\n\
1188 int exp;\n\
1189 frexp(inout[get_global_id(0)], &exp);\n\
1190 inout[get_global_id(0)] = (float)exp;\n\
1191 }\n";
1192 auto inout = ShaderArg<float>({ 0.0f, 0.5f, 1.0f, 3.0f }, SHADER_ARG_INOUT);
1193 const float expected[] = {
1194 0.0f, 0.0f, 1.0f, 2.0f
1195 };
1196 run_shader(kernel_source, inout.size(), 1, 1, inout);
1197 for (int i = 0; i < inout.size(); ++i)
1198 EXPECT_FLOAT_EQ(inout[i], expected[i]);
1199 }
1200
TEST_F(ComputeTest,clz)1201 TEST_F(ComputeTest, clz)
1202 {
1203 const char *kernel_source =
1204 "__kernel void main_test(__global uint *inout)\n\
1205 {\n\
1206 inout[get_global_id(0)] = clz(inout[get_global_id(0)]);\n\
1207 }\n";
1208 auto inout = ShaderArg<uint32_t>({ 0, 1, 0xffff, (1u << 30), (1u << 31) }, SHADER_ARG_INOUT);
1209 const uint32_t expected[] = {
1210 32, 31, 16, 1, 0
1211 };
1212 run_shader(kernel_source, inout.size(), 1, 1, inout);
1213 for (int i = 0; i < inout.size(); ++i)
1214 EXPECT_FLOAT_EQ(inout[i], expected[i]);
1215 }
1216
TEST_F(ComputeTest,sin)1217 TEST_F(ComputeTest, sin)
1218 {
1219 struct sin_vals { float in; float clc; float native; };
1220 const char *kernel_source =
1221 "struct sin_vals { float in; float clc; float native; };\n\
1222 __kernel void main_test(__global struct sin_vals *inout)\n\
1223 {\n\
1224 inout[get_global_id(0)].clc = sin(inout[get_global_id(0)].in);\n\
1225 inout[get_global_id(0)].native = native_sin(inout[get_global_id(0)].in);\n\
1226 }\n";
1227 const vector<sin_vals> input = {
1228 { 0.0f, 0.0f, 0.0f },
1229 { 1.0f, 0.0f, 0.0f },
1230 { 2.0f, 0.0f, 0.0f },
1231 { 3.0f, 0.0f, 0.0f },
1232 };
1233 auto inout = ShaderArg<sin_vals>(input, SHADER_ARG_INOUT);
1234 const struct sin_vals expected[] = {
1235 { 0.0f, 0.0f, 0.0f },
1236 { 1.0f, sin(1.0f), sin(1.0f) },
1237 { 2.0f, sin(2.0f), sin(2.0f) },
1238 { 3.0f, sin(3.0f), sin(3.0f) },
1239 };
1240 run_shader(kernel_source, inout.size(), 1, 1, inout);
1241 for (int i = 0; i < inout.size(); ++i) {
1242 EXPECT_FLOAT_EQ(inout[i].in, inout[i].in);
1243 EXPECT_FLOAT_EQ(inout[i].clc, inout[i].clc);
1244 EXPECT_NEAR(inout[i].clc, inout[i].native, 0.008f); // range from DXIL spec
1245 }
1246 }
1247
TEST_F(ComputeTest,cosh)1248 TEST_F(ComputeTest, cosh)
1249 {
1250 const char *kernel_source =
1251 "__kernel void main_test(__global float *inout)\n\
1252 {\n\
1253 inout[get_global_id(0)] = cosh(inout[get_global_id(0)]);\n\
1254 }\n";
1255 auto inout = ShaderArg<float>({ 0.0f, 1.0f, 2.0f, 3.0f }, SHADER_ARG_INOUT);
1256 const float expected[] = {
1257 cosh(0.0f), cosh(1.0f), cosh(2.0f), cosh(3.0f)
1258 };
1259 run_shader(kernel_source, inout.size(), 1, 1, inout);
1260 for (int i = 0; i < inout.size(); ++i)
1261 EXPECT_FLOAT_EQ(inout[i], expected[i]);
1262 }
1263
TEST_F(ComputeTest,exp)1264 TEST_F(ComputeTest, exp)
1265 {
1266 const char *kernel_source =
1267 "__kernel void main_test(__global float *inout)\n\
1268 {\n\
1269 inout[get_global_id(0)] = native_exp(inout[get_global_id(0)]);\n\
1270 }\n";
1271 auto inout = ShaderArg<float>({ 0.0f, 1.0f, 2.0f, 3.0f }, SHADER_ARG_INOUT);
1272 const float expected[] = {
1273 exp(0.0f), exp(1.0f), exp(2.0f), exp(3.0f)
1274 };
1275 run_shader(kernel_source, inout.size(), 1, 1, inout);
1276 for (int i = 0; i < inout.size(); ++i)
1277 EXPECT_FLOAT_EQ(inout[i], expected[i]);
1278 }
1279
TEST_F(ComputeTest,exp10)1280 TEST_F(ComputeTest, exp10)
1281 {
1282 const char *kernel_source =
1283 "__kernel void main_test(__global float *inout)\n\
1284 {\n\
1285 inout[get_global_id(0)] = native_exp10(inout[get_global_id(0)]);\n\
1286 }\n";
1287 auto inout = ShaderArg<float>({ 0.0f, 1.0f, 2.0f, 3.0f }, SHADER_ARG_INOUT);
1288 const float expected[] = {
1289 pow(10.0f, 0.0f), pow(10.0f, 1.0f), pow(10.0f, 2.0f), pow(10.0f, 3.0f)
1290 };
1291 run_shader(kernel_source, inout.size(), 1, 1, inout);
1292 for (int i = 0; i < inout.size(); ++i)
1293 EXPECT_FLOAT_EQ(inout[i], expected[i]);
1294 }
1295
TEST_F(ComputeTest,exp2)1296 TEST_F(ComputeTest, exp2)
1297 {
1298 const char *kernel_source =
1299 "__kernel void main_test(__global float *inout)\n\
1300 {\n\
1301 inout[get_global_id(0)] = native_exp2(inout[get_global_id(0)]);\n\
1302 }\n";
1303 auto inout = ShaderArg<float>({ 0.0f, 1.0f, 2.0f, 3.0f }, SHADER_ARG_INOUT);
1304 const float expected[] = {
1305 pow(2.0f, 0.0f), pow(2.0f, 1.0f), pow(2.0f, 2.0f), pow(2.0f, 3.0f)
1306 };
1307 run_shader(kernel_source, inout.size(), 1, 1, inout);
1308 for (int i = 0; i < inout.size(); ++i)
1309 EXPECT_FLOAT_EQ(inout[i], expected[i]);
1310 }
1311
TEST_F(ComputeTest,log)1312 TEST_F(ComputeTest, log)
1313 {
1314 const char *kernel_source =
1315 "__kernel void main_test(__global float *inout)\n\
1316 {\n\
1317 inout[get_global_id(0)] = native_log(inout[get_global_id(0)]);\n\
1318 }\n";
1319 auto inout = ShaderArg<float>({ 0.0f, 1.0f, 2.0f, 3.0f }, SHADER_ARG_INOUT);
1320 const float expected[] = {
1321 log(0.0f), log(1.0f), log(2.0f), log(3.0f)
1322 };
1323 run_shader(kernel_source, inout.size(), 1, 1, inout);
1324 for (int i = 0; i < inout.size(); ++i)
1325 EXPECT_FLOAT_EQ(inout[i], expected[i]);
1326 }
1327
TEST_F(ComputeTest,log10)1328 TEST_F(ComputeTest, log10)
1329 {
1330 const char *kernel_source =
1331 "__kernel void main_test(__global float *inout)\n\
1332 {\n\
1333 inout[get_global_id(0)] = native_log10(inout[get_global_id(0)]);\n\
1334 }\n";
1335 auto inout = ShaderArg<float>({ 0.0f, 1.0f, 2.0f, 3.0f }, SHADER_ARG_INOUT);
1336 const float expected[] = {
1337 log10(0.0f), log10(1.0f), log10(2.0f), log10(3.0f)
1338 };
1339 run_shader(kernel_source, inout.size(), 1, 1, inout);
1340 for (int i = 0; i < inout.size(); ++i)
1341 EXPECT_FLOAT_EQ(inout[i], expected[i]);
1342 }
1343
TEST_F(ComputeTest,log2)1344 TEST_F(ComputeTest, log2)
1345 {
1346 const char *kernel_source =
1347 "__kernel void main_test(__global float *inout)\n\
1348 {\n\
1349 inout[get_global_id(0)] = native_log2(inout[get_global_id(0)]);\n\
1350 }\n";
1351 auto inout = ShaderArg<float>({ 0.0f, 1.0f, 2.0f, 3.0f }, SHADER_ARG_INOUT);
1352 const float expected[] = {
1353 log(0.0f) / log(2.0f), log(1.0f) / log(2.0f), log(2.0f) / log(2.0f), log(3.0f) / log(2.0f)
1354 };
1355 run_shader(kernel_source, inout.size(), 1, 1, inout);
1356 for (int i = 0; i < inout.size(); ++i)
1357 EXPECT_FLOAT_EQ(inout[i], expected[i]);
1358 }
1359
TEST_F(ComputeTest,rint)1360 TEST_F(ComputeTest, rint)
1361 {
1362 const char *kernel_source =
1363 "__kernel void main_test(__global float *inout)\n\
1364 {\n\
1365 inout[get_global_id(0)] = rint(inout[get_global_id(0)]);\n\
1366 }\n";
1367
1368 auto inout = ShaderArg<float>({ 0.5f, 1.5f, -0.5f, -1.5f, 1.4f }, SHADER_ARG_INOUT);
1369 const float expected[] = {
1370 0.0f, 2.0f, 0.0f, -2.0f, 1.0f,
1371 };
1372 run_shader(kernel_source, inout.size(), 1, 1, inout);
1373 for (int i = 0; i < inout.size(); ++i)
1374 EXPECT_FLOAT_EQ(inout[i], expected[i]);
1375 }
1376
TEST_F(ComputeTest,round)1377 TEST_F(ComputeTest, round)
1378 {
1379 const char *kernel_source =
1380 "__kernel void main_test(__global float *inout)\n\
1381 {\n\
1382 inout[get_global_id(0)] = round(inout[get_global_id(0)]);\n\
1383 }\n";
1384 auto inout = ShaderArg<float>({ 0, 0.3f, -0.3f, 0.5f, -0.5f, 1.1f, -1.1f },
1385 SHADER_ARG_INOUT);
1386 const float expected[] = {
1387 0.0f, 0.0f, -0.0f, 1.0f, -1.0f, 1.0f, -1.0f
1388 };
1389 run_shader(kernel_source, inout.size(), 1, 1, inout);
1390 for (int i = 0; i < inout.size(); ++i)
1391 EXPECT_FLOAT_EQ(inout[i], expected[i]);
1392 }
1393
TEST_F(ComputeTest,arg_by_val)1394 TEST_F(ComputeTest, arg_by_val)
1395 {
1396 const char *kernel_source =
1397 "__kernel void main_test(__global float *inout, float mul)\n\
1398 {\n\
1399 inout[get_global_id(0)] = inout[get_global_id(0)] * mul;\n\
1400 }\n";
1401 auto inout = ShaderArg<float>({ 0, 0.3f, -0.3f, 0.5f, -0.5f, 1.1f, -1.1f },
1402 SHADER_ARG_INOUT);
1403 auto mul = ShaderArg<float>(10.0f, SHADER_ARG_INPUT);
1404 const float expected[] = {
1405 0.0f, 3.0f, -3.0f, 5.0f, -5.0f, 11.0f, -11.0f
1406 };
1407 run_shader(kernel_source, inout.size(), 1, 1, inout, mul);
1408 for (int i = 0; i < inout.size(); ++i)
1409 EXPECT_FLOAT_EQ(inout[i], expected[i]);
1410 }
1411
TEST_F(ComputeTest,uint8_by_val)1412 TEST_F(ComputeTest, uint8_by_val)
1413 {
1414 struct uint8 {
1415 uint32_t s0; uint32_t s1; uint32_t s2; uint32_t s3;
1416 uint32_t s4; uint32_t s5; uint32_t s6; uint32_t s7;
1417 };
1418 const char *kernel_source =
1419 "__kernel void main_test(__global uint *out, uint8 val)\n\
1420 {\n\
1421 out[get_global_id(0)] = val.s0 + val.s1 + val.s2 + val.s3 +\n\
1422 val.s4 + val.s5 + val.s6 + val.s7;\n\
1423 }\n";
1424 auto out = ShaderArg<uint32_t>({ 0 }, SHADER_ARG_OUTPUT);
1425 auto val = ShaderArg<struct uint8>({ {0, 1, 2, 3, 4, 5, 6, 7 }}, SHADER_ARG_INPUT);
1426 const uint32_t expected[] = { 0 + 1 + 2 + 3 + 4 + 5 + 6 + 7 };
1427 run_shader(kernel_source, out.size(), 1, 1, out, val);
1428 for (int i = 0; i < out.size(); ++i)
1429 EXPECT_EQ(out[i], expected[i]);
1430 }
1431
TEST_F(ComputeTest,link)1432 TEST_F(ComputeTest, link)
1433 {
1434 const char *foo_src =
1435 "float foo(float in)\n\
1436 {\n\
1437 return in * in;\n\
1438 }\n";
1439 const char *kernel_source =
1440 "float foo(float in);\n\
1441 __kernel void main_test(__global float *inout)\n\
1442 {\n\
1443 inout[get_global_id(0)] = foo(inout[get_global_id(0)]);\n\
1444 }\n";
1445 std::vector<const char *> srcs = { foo_src, kernel_source };
1446 auto inout = ShaderArg<float>({ 2.0f }, SHADER_ARG_INOUT);
1447 const float expected[] = {
1448 4.0f,
1449 };
1450 run_shader(srcs, inout.size(), 1, 1, inout);
1451 for (int i = 0; i < inout.size(); ++i)
1452 EXPECT_EQ(inout[i], expected[i]);
1453 }
1454
TEST_F(ComputeTest,link_library)1455 TEST_F(ComputeTest, link_library)
1456 {
1457 const char *bar_src =
1458 "float bar(float in)\n\
1459 {\n\
1460 return in * 5;\n\
1461 }\n";
1462 const char *foo_src =
1463 "float bar(float in);\n\
1464 float foo(float in)\n\
1465 {\n\
1466 return in * bar(in);\n\
1467 }\n";
1468 const char *kernel_source =
1469 "float foo(float in);\n\
1470 __kernel void main_test(__global float *inout)\n\
1471 {\n\
1472 inout[get_global_id(0)] = foo(inout[get_global_id(0)]);\n\
1473 }\n";
1474 std::vector<Shader> libraries = {
1475 compile({ bar_src, kernel_source }, {}, true),
1476 compile({ foo_src }, {}, true)
1477 };
1478 Shader exe = link(libraries);
1479 auto inout = ShaderArg<float>({ 2.0f }, SHADER_ARG_INOUT);
1480 const float expected[] = {
1481 20.0f,
1482 };
1483 run_shader(exe, { (unsigned)inout.size(), 1, 1 }, inout);
1484 for (int i = 0; i < inout.size(); ++i)
1485 EXPECT_EQ(inout[i], expected[i]);
1486 }
1487
TEST_F(ComputeTest,localvar)1488 TEST_F(ComputeTest, localvar)
1489 {
1490 const char *kernel_source =
1491 "__kernel __attribute__((reqd_work_group_size(2, 1, 1)))\n\
1492 void main_test(__global float *inout)\n\
1493 {\n\
1494 __local float2 tmp[2];\n\
1495 tmp[get_local_id(0)].x = inout[get_global_id(0)] + 1;\n\
1496 tmp[get_local_id(0)].y = inout[get_global_id(0)] - 1;\n\
1497 barrier(CLK_LOCAL_MEM_FENCE);\n\
1498 inout[get_global_id(0)] = tmp[get_local_id(0) % 2].x * tmp[(get_local_id(0) + 1) % 2].y;\n\
1499 }\n";
1500
1501 auto inout = ShaderArg<float>({ 2.0f, 4.0f }, SHADER_ARG_INOUT);
1502 const float expected[] = {
1503 9.0f, 5.0f
1504 };
1505 run_shader(kernel_source, inout.size(), 1, 1, inout);
1506 for (int i = 0; i < inout.size(); ++i)
1507 EXPECT_EQ(inout[i], expected[i]);
1508 }
1509
TEST_F(ComputeTest,localvar_uchar2)1510 TEST_F(ComputeTest, localvar_uchar2)
1511 {
1512 const char *kernel_source =
1513 "__attribute__((reqd_work_group_size(2, 1, 1)))\n\
1514 __kernel void main_test(__global uchar *inout)\n\
1515 {\n\
1516 __local uchar2 tmp[2];\n\
1517 tmp[get_local_id(0)].x = inout[get_global_id(0)] + 1;\n\
1518 tmp[get_local_id(0)].y = inout[get_global_id(0)] - 1;\n\
1519 barrier(CLK_LOCAL_MEM_FENCE);\n\
1520 inout[get_global_id(0)] = tmp[get_local_id(0) % 2].x * tmp[(get_local_id(0) + 1) % 2].y;\n\
1521 }\n";
1522
1523 auto inout = ShaderArg<uint8_t>({ 2, 4 }, SHADER_ARG_INOUT);
1524 const uint8_t expected[] = { 9, 5 };
1525 run_shader(kernel_source, inout.size(), 1, 1, inout);
1526 for (int i = 0; i < inout.size(); ++i)
1527 EXPECT_EQ(inout[i], expected[i]);
1528 }
1529
TEST_F(ComputeTest,work_group_size_hint)1530 TEST_F(ComputeTest, work_group_size_hint)
1531 {
1532 const char *kernel_source =
1533 "__attribute__((work_group_size_hint(2, 1, 1)))\n\
1534 __kernel void main_test(__global uint *output)\n\
1535 {\n\
1536 output[get_global_id(0)] = get_local_id(0);\n\
1537 }\n";
1538 auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
1539 SHADER_ARG_OUTPUT);
1540 const uint32_t expected[] = {
1541 0, 1, 2, 3
1542 };
1543 run_shader(kernel_source, output.size(), 1, 1, output);
1544 for (int i = 0; i < output.size(); ++i)
1545 EXPECT_EQ(output[i], expected[i]);
1546 }
1547
TEST_F(ComputeTest,reqd_work_group_size)1548 TEST_F(ComputeTest, reqd_work_group_size)
1549 {
1550 const char *kernel_source =
1551 "__attribute__((reqd_work_group_size(2, 1, 1)))\n\
1552 __kernel void main_test(__global uint *output)\n\
1553 {\n\
1554 output[get_global_id(0)] = get_local_id(0);\n\
1555 }\n";
1556 auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
1557 SHADER_ARG_OUTPUT);
1558 const uint32_t expected[] = {
1559 0, 1, 0, 1
1560 };
1561 run_shader(kernel_source, output.size(), 1, 1, output);
1562 for (int i = 0; i < output.size(); ++i)
1563 EXPECT_EQ(output[i], expected[i]);
1564 }
1565
TEST_F(ComputeTest,image)1566 TEST_F(ComputeTest, image)
1567 {
1568 const char* kernel_source =
1569 "__kernel void main_test(read_only image2d_t input, write_only image2d_t output)\n\
1570 {\n\
1571 int2 coords = (int2)(get_global_id(0), get_global_id(1));\n\
1572 write_imagef(output, coords, read_imagef(input, coords));\n\
1573 }\n";
1574 Shader shader = compile(std::vector<const char*>({ kernel_source }));
1575 validate(shader);
1576 }
1577
TEST_F(ComputeTest,image_two_reads)1578 TEST_F(ComputeTest, image_two_reads)
1579 {
1580 // Note: unnecessary control flow is present so that nir_opt_dead_cf kicks in, causing
1581 // nir_rematerialize_derefs_in_use_blocks to run. The duplicated uses ensure that the
1582 // per-var-deref processing works correctly.
1583 const char* kernel_source =
1584 R"(__kernel void main_test(image2d_t image, int is_float, __global float* output)
1585 {
1586 int x = get_global_id(0);
1587 if (is_float)
1588 x = get_global_id(0);
1589 if (is_float)
1590 output[x] = read_imagef(image, (int2)(0, 0)).x;
1591 else
1592 output[x] = (float)read_imagei(image, (int2)(0, 0)).x;
1593 if (is_float)
1594 output[x] = read_imagef(image, (int2)(0, 0)).x;
1595 else
1596 output[x] = (float)read_imagei(image, (int2)(0, 0)).x;
1597 })";
1598 Shader shader = compile(std::vector<const char*>({ kernel_source }));
1599 validate(shader);
1600 }
1601
TEST_F(ComputeTest,image_unused)1602 TEST_F(ComputeTest, image_unused)
1603 {
1604 const char* kernel_source =
1605 "__kernel void main_test(read_only image2d_t input, write_only image2d_t output)\n\
1606 {\n\
1607 }\n";
1608 Shader shader = compile(std::vector<const char*>({ kernel_source }));
1609 validate(shader);
1610 }
1611
TEST_F(ComputeTest,image_read_write)1612 TEST_F(ComputeTest, image_read_write)
1613 {
1614 const char *kernel_source =
1615 R"(__kernel void main_test(read_write image2d_t image)
1616 {
1617 int2 coords = (int2)(get_global_id(0), get_global_id(1));
1618 write_imagef(image, coords, read_imagef(image, coords) + (float4)(1.0f, 1.0f, 1.0f, 1.0f));
1619 })";
1620 Shader shader = compile(std::vector<const char*>({ kernel_source }), { "-cl-std=cl3.0" });
1621 validate(shader);
1622 }
1623
TEST_F(ComputeTest,sampler)1624 TEST_F(ComputeTest, sampler)
1625 {
1626 const char* kernel_source =
1627 "__kernel void main_test(image2d_t image, sampler_t sampler, __global float* output)\n\
1628 {\n\
1629 output[get_global_id(0)] = read_imagef(image, sampler, (int2)(0, 0)).x;\n\
1630 }\n";
1631 Shader shader = compile(std::vector<const char*>({ kernel_source }));
1632 validate(shader);
1633 }
1634
TEST_F(ComputeTest,image_dims)1635 TEST_F(ComputeTest, image_dims)
1636 {
1637 const char* kernel_source =
1638 "__kernel void main_test(image2d_t roimage, write_only image2d_t woimage, __global uint* output)\n\
1639 {\n\
1640 output[get_global_id(0)] = get_image_width(roimage);\n\
1641 output[get_global_id(0) + 1] = get_image_width(woimage);\n\
1642 }\n";
1643 Shader shader = compile(std::vector<const char*>({ kernel_source }));
1644 validate(shader);
1645 }
1646
TEST_F(ComputeTest,image_format)1647 TEST_F(ComputeTest, image_format)
1648 {
1649 const char* kernel_source =
1650 "__kernel void main_test(image2d_t roimage, write_only image2d_t woimage, __global uint* output)\n\
1651 {\n\
1652 output[get_global_id(0)] = get_image_channel_data_type(roimage);\n\
1653 output[get_global_id(0) + 1] = get_image_channel_order(woimage);\n\
1654 }\n";
1655 Shader shader = compile(std::vector<const char*>({ kernel_source }));
1656 validate(shader);
1657 }
1658
TEST_F(ComputeTest,image1d_buffer_t)1659 TEST_F(ComputeTest, image1d_buffer_t)
1660 {
1661 const char* kernel_source =
1662 "__kernel void main_test(read_only image1d_buffer_t input, write_only image1d_buffer_t output)\n\
1663 {\n\
1664 write_imageui(output, get_global_id(0), read_imageui(input, get_global_id(0)));\n\
1665 }\n";
1666 Shader shader = compile(std::vector<const char*>({ kernel_source }));
1667 validate(shader);
1668 }
1669
TEST_F(ComputeTest,local_ptr)1670 TEST_F(ComputeTest, local_ptr)
1671 {
1672 struct uint2 { uint32_t x, y; };
1673 const char *kernel_source =
1674 "__kernel void main_test(__global uint *inout, __local uint2 *tmp)\n\
1675 {\n\
1676 tmp[get_local_id(0)].x = inout[get_global_id(0)] + 1;\n\
1677 tmp[get_local_id(0)].y = inout[get_global_id(0)] - 1;\n\
1678 barrier(CLK_LOCAL_MEM_FENCE);\n\
1679 inout[get_global_id(0)] = tmp[get_local_id(0) % 2].x * tmp[(get_local_id(0) + 1) % 2].y;\n\
1680 }\n";
1681 auto inout = ShaderArg<uint32_t>({ 2, 4 }, SHADER_ARG_INOUT);
1682 auto tmp = ShaderArg<struct uint2>(std::vector<struct uint2>(4096), SHADER_ARG_INPUT);
1683 const uint8_t expected[] = { 9, 5 };
1684 run_shader(kernel_source, inout.size(), 1, 1, inout, tmp);
1685 for (int i = 0; i < inout.size(); ++i)
1686 EXPECT_EQ(inout[i], expected[i]);
1687 }
1688
TEST_F(ComputeTest,two_local_ptrs)1689 TEST_F(ComputeTest, two_local_ptrs)
1690 {
1691 struct uint2 { uint32_t x, y; };
1692 const char *kernel_source =
1693 "__kernel void main_test(__global uint *inout, __local uint2 *tmp, __local uint *tmp2)\n\
1694 {\n\
1695 tmp[get_local_id(0)].x = inout[get_global_id(0)] + 1;\n\
1696 tmp[get_local_id(0)].y = inout[get_global_id(0)] - 1;\n\
1697 tmp2[get_local_id(0)] = get_global_id(0);\n\
1698 barrier(CLK_LOCAL_MEM_FENCE);\n\
1699 inout[get_global_id(0)] = tmp[get_local_id(0) % 2].x * tmp[(get_local_id(0) + 1) % 2].y + tmp2[get_local_id(0) % 2];\n\
1700 }\n";
1701 auto inout = ShaderArg<uint32_t>({ 2, 4 }, SHADER_ARG_INOUT);
1702 auto tmp = ShaderArg<struct uint2>(std::vector<struct uint2>(1024), SHADER_ARG_INPUT);
1703 auto tmp2 = ShaderArg<uint32_t>(std::vector<uint32_t>(1024), SHADER_ARG_INPUT);
1704 const uint8_t expected[] = { 9, 6 };
1705 run_shader(kernel_source, inout.size(), 1, 1, inout, tmp, tmp2);
1706 for (int i = 0; i < inout.size(); ++i)
1707 EXPECT_EQ(inout[i], expected[i]);
1708 }
1709
TEST_F(ComputeTest,int8_to_float)1710 TEST_F(ComputeTest, int8_to_float)
1711 {
1712 const char *kernel_source =
1713 "__kernel void main_test(__global char* in, __global float* out)\n\
1714 {\n\
1715 uint pos = get_global_id(0);\n\
1716 out[pos] = in[pos] / 100.0f;\n\
1717 }";
1718 auto in = ShaderArg<char>({ 10, 20, 30, 40 }, SHADER_ARG_INPUT);
1719 auto out = ShaderArg<float>(std::vector<float>(4, std::numeric_limits<float>::infinity()), SHADER_ARG_OUTPUT);
1720 const float expected[] = { 0.1f, 0.2f, 0.3f, 0.4f };
1721 run_shader(kernel_source, in.size(), 1, 1, in, out);
1722 for (int i = 0; i < in.size(); ++i)
1723 EXPECT_FLOAT_EQ(out[i], expected[i]);
1724 }
1725
TEST_F(ComputeTest,vec_hint_float4)1726 TEST_F(ComputeTest, vec_hint_float4)
1727 {
1728 const char *kernel_source =
1729 "__kernel __attribute__((vec_type_hint(float4))) void main_test(__global float *inout)\n\
1730 {\n\
1731 inout[get_global_id(0)] *= inout[get_global_id(1)];\n\
1732 }";
1733 Shader shader = compile({ kernel_source });
1734 EXPECT_EQ(shader.metadata->kernels[0].vec_hint_size, 4);
1735 EXPECT_EQ(shader.metadata->kernels[0].vec_hint_type, CLC_VEC_HINT_TYPE_FLOAT);
1736 }
1737
TEST_F(ComputeTest,vec_hint_uchar2)1738 TEST_F(ComputeTest, vec_hint_uchar2)
1739 {
1740 const char *kernel_source =
1741 "__kernel __attribute__((vec_type_hint(uchar2))) void main_test(__global float *inout)\n\
1742 {\n\
1743 inout[get_global_id(0)] *= inout[get_global_id(1)];\n\
1744 }";
1745 Shader shader = compile({ kernel_source });
1746 EXPECT_EQ(shader.metadata->kernels[0].vec_hint_size, 2);
1747 EXPECT_EQ(shader.metadata->kernels[0].vec_hint_type, CLC_VEC_HINT_TYPE_CHAR);
1748 }
1749
TEST_F(ComputeTest,vec_hint_none)1750 TEST_F(ComputeTest, vec_hint_none)
1751 {
1752 const char *kernel_source =
1753 "__kernel void main_test(__global float *inout)\n\
1754 {\n\
1755 inout[get_global_id(0)] *= inout[get_global_id(1)];\n\
1756 }";
1757 Shader shader = compile({ kernel_source });
1758 EXPECT_EQ(shader.metadata->kernels[0].vec_hint_size, 0);
1759 }
1760
TEST_F(ComputeTest,DISABLED_debug_layer_failure)1761 TEST_F(ComputeTest, DISABLED_debug_layer_failure)
1762 {
1763 /* This is a negative test case, it intentionally triggers a failure to validate the mechanism
1764 * is in place, so other tests will fail if they produce debug messages
1765 */
1766 const char *kernel_source =
1767 "__kernel void main_test(__global float *inout, float mul)\n\
1768 {\n\
1769 inout[get_global_id(0)] = inout[get_global_id(0)] * mul;\n\
1770 }\n";
1771 auto inout = ShaderArg<float>({ 0, 0.3f, -0.3f, 0.5f, -0.5f, 1.1f, -1.1f },
1772 SHADER_ARG_INOUT);
1773 auto mul = ShaderArg<float>(10.0f, SHADER_ARG_INPUT);
1774 const float expected[] = {
1775 0.0f, 3.0f, -3.0f, 5.0f, -5.0f, 11.0f, -11.0f
1776 };
1777 ComPtr<ID3D12InfoQueue> info_queue;
1778 dev->QueryInterface(info_queue.ReleaseAndGetAddressOf());
1779 if (!info_queue) {
1780 GTEST_SKIP() << "No info queue";
1781 return;
1782 }
1783
1784 info_queue->AddApplicationMessage(D3D12_MESSAGE_SEVERITY_ERROR, "This should cause the test to fail");
1785 run_shader(kernel_source, inout.size(), 1, 1, inout, mul);
1786 for (int i = 0; i < inout.size(); ++i)
1787 EXPECT_FLOAT_EQ(inout[i], expected[i]);
1788 }
1789
TEST_F(ComputeTest,compiler_defines)1790 TEST_F(ComputeTest, compiler_defines)
1791 {
1792 const char *kernel_source =
1793 "__kernel void main_test(__global int* out)\n\
1794 {\n\
1795 out[0] = OUT_VAL0;\n\
1796 out[1] = __OPENCL_C_VERSION__;\n\
1797 }";
1798 auto out = ShaderArg<int>(std::vector<int>(2, 0), SHADER_ARG_OUTPUT);
1799 CompileArgs compile_args = { 1, 1, 1 };
1800 compile_args.compiler_command_line = { "-DOUT_VAL0=5", "-cl-std=cl" };
1801 std::vector<RawShaderArg *> raw_args = { &out };
1802 run_shader({ kernel_source }, compile_args, out);
1803 EXPECT_EQ(out[0], 5);
1804 EXPECT_EQ(out[1], 100);
1805 }
1806
TEST_F(ComputeTest,global_atomic_add)1807 TEST_F(ComputeTest, global_atomic_add)
1808 {
1809 const char *kernel_source =
1810 "__kernel void main_test(__global int *inout, __global int *old)\n\
1811 {\n\
1812 old[get_global_id(0)] = atomic_add(inout + get_global_id(0), 3);\n\
1813 }\n";
1814 auto inout = ShaderArg<int32_t>({ 2, 4 }, SHADER_ARG_INOUT);
1815 auto old = ShaderArg<int32_t>(std::vector<int32_t>(2, 0xdeadbeef), SHADER_ARG_OUTPUT);
1816 const int32_t expected_inout[] = { 5, 7 };
1817 const int32_t expected_old[] = { 2, 4 };
1818 run_shader(kernel_source, inout.size(), 1, 1, inout, old);
1819 for (int i = 0; i < inout.size(); ++i) {
1820 EXPECT_EQ(inout[i], expected_inout[i]);
1821 EXPECT_EQ(old[i], expected_old[i]);
1822 }
1823 }
1824
TEST_F(ComputeTest,global_atomic_imin)1825 TEST_F(ComputeTest, global_atomic_imin)
1826 {
1827 const char *kernel_source =
1828 "__kernel void main_test(__global int *inout, __global int *old)\n\
1829 {\n\
1830 old[get_global_id(0)] = atomic_min(inout + get_global_id(0), 1);\n\
1831 }\n";
1832 auto inout = ShaderArg<int32_t>({ 0, 2, -1 }, SHADER_ARG_INOUT);
1833 auto old = ShaderArg<int32_t>(std::vector<int32_t>(3, 0xdeadbeef), SHADER_ARG_OUTPUT);
1834 const int32_t expected_inout[] = { 0, 1, -1 };
1835 const int32_t expected_old[] = { 0, 2, -1 };
1836 run_shader(kernel_source, inout.size(), 1, 1, inout, old);
1837 for (int i = 0; i < inout.size(); ++i) {
1838 EXPECT_EQ(inout[i], expected_inout[i]);
1839 EXPECT_EQ(old[i], expected_old[i]);
1840 }
1841 }
1842
TEST_F(ComputeTest,global_atomic_and_or)1843 TEST_F(ComputeTest, global_atomic_and_or)
1844 {
1845 const char *kernel_source =
1846 "__attribute__((reqd_work_group_size(3, 1, 1)))\n\
1847 __kernel void main_test(__global int *inout)\n\
1848 {\n\
1849 atomic_and(inout, ~(1 << get_global_id(0)));\n\
1850 atomic_or(inout, (1 << (get_global_id(0) + 4)));\n\
1851 }\n";
1852 auto inout = ShaderArg<int32_t>(0xf, SHADER_ARG_INOUT);
1853 const int32_t expected[] = { 0x78 };
1854 run_shader(kernel_source, 3, 1, 1, inout);
1855 for (int i = 0; i < inout.size(); ++i)
1856 EXPECT_EQ(inout[i], expected[i]);
1857 }
1858
TEST_F(ComputeTest,global_atomic_cmpxchg)1859 TEST_F(ComputeTest, global_atomic_cmpxchg)
1860 {
1861 const char *kernel_source =
1862 "__attribute__((reqd_work_group_size(2, 1, 1)))\n\
1863 __kernel void main_test(__global int *inout)\n\
1864 {\n\
1865 while (atomic_cmpxchg(inout, get_global_id(0), get_global_id(0) + 1) != get_global_id(0))\n\
1866 ;\n\
1867 }\n";
1868 auto inout = ShaderArg<int32_t>(0, SHADER_ARG_INOUT);
1869 const int32_t expected_inout[] = { 2 };
1870 run_shader(kernel_source, 2, 1, 1, inout);
1871 for (int i = 0; i < inout.size(); ++i)
1872 EXPECT_EQ(inout[i], expected_inout[i]);
1873 }
1874
TEST_F(ComputeTest,local_atomic_and_or)1875 TEST_F(ComputeTest, local_atomic_and_or)
1876 {
1877 const char *kernel_source =
1878 "__attribute__((reqd_work_group_size(2, 1, 1)))\n\
1879 __kernel void main_test(__global ushort *inout)\n\
1880 {\n\
1881 __local ushort tmp;\n\
1882 atomic_and(&tmp, ~(0xff << (get_global_id(0) * 8)));\n\
1883 atomic_or(&tmp, inout[get_global_id(0)] << (get_global_id(0) * 8));\n\
1884 barrier(CLK_LOCAL_MEM_FENCE);\n\
1885 inout[get_global_id(0)] = tmp;\n\
1886 }\n";
1887 auto inout = ShaderArg<uint16_t>({ 2, 4 }, SHADER_ARG_INOUT);
1888 const uint16_t expected[] = { 0x402, 0x402 };
1889 run_shader(kernel_source, inout.size(), 1, 1, inout);
1890 for (int i = 0; i < inout.size(); ++i)
1891 EXPECT_EQ(inout[i], expected[i]);
1892 }
1893
TEST_F(ComputeTest,local_atomic_cmpxchg)1894 TEST_F(ComputeTest, local_atomic_cmpxchg)
1895 {
1896 const char *kernel_source =
1897 "__attribute__((reqd_work_group_size(2, 1, 1)))\n\
1898 __kernel void main_test(__global int *out)\n\
1899 {\n\
1900 __local uint tmp;\n\
1901 tmp = 0;\n\
1902 barrier(CLK_LOCAL_MEM_FENCE);\n\
1903 while (atomic_cmpxchg(&tmp, get_global_id(0), get_global_id(0) + 1) != get_global_id(0))\n\
1904 ;\n\
1905 barrier(CLK_LOCAL_MEM_FENCE);\n\
1906 out[0] = tmp;\n\
1907 }\n";
1908
1909 auto out = ShaderArg<uint32_t>(0xdeadbeef, SHADER_ARG_OUTPUT);
1910 const uint16_t expected[] = { 2 };
1911 run_shader(kernel_source, 2, 1, 1, out);
1912 for (int i = 0; i < out.size(); ++i)
1913 EXPECT_EQ(out[i], expected[i]);
1914 }
1915
TEST_F(ComputeTest,constant_sampler)1916 TEST_F(ComputeTest, constant_sampler)
1917 {
1918 const char* kernel_source =
1919 "__constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_LINEAR;\n\
1920 __kernel void main_test(read_only image2d_t input, write_only image2d_t output)\n\
1921 {\n\
1922 int2 coordsi = (int2)(get_global_id(0), get_global_id(1));\n\
1923 float2 coordsf = (float2)((float)coordsi.x / get_image_width(input), (float)coordsi.y / get_image_height(input));\n\
1924 write_imagef(output, coordsi, \n\
1925 read_imagef(input, sampler, coordsf) + \n\
1926 read_imagef(input, sampler, coordsf + (float2)(0.1, 0.1)));\n\
1927 }\n";
1928 Shader shader = compile(std::vector<const char*>({ kernel_source }));
1929 validate(shader);
1930 EXPECT_EQ(shader.dxil->metadata.num_const_samplers, 1);
1931 }
1932
TEST_F(ComputeTest,hi)1933 TEST_F(ComputeTest, hi)
1934 {
1935 const char *kernel_source = R"(
1936 __kernel void main_test(__global char3 *srcA, __global char2 *dst)
1937 {
1938 int tid = get_global_id(0);
1939
1940 char2 tmp = srcA[tid].hi;
1941 dst[tid] = tmp;
1942 })";
1943 Shader shader = compile(std::vector<const char*>({ kernel_source }));
1944 validate(shader);
1945 }
1946
TEST_F(ComputeTest,system_values)1947 TEST_F(ComputeTest, system_values)
1948 {
1949 const char *kernel_source =
1950 "__kernel void main_test(__global uint* outputs)\n\
1951 {\n\
1952 outputs[0] = get_work_dim();\n\
1953 outputs[1] = get_global_size(0);\n\
1954 outputs[2] = get_local_size(0);\n\
1955 outputs[3] = get_num_groups(0);\n\
1956 outputs[4] = get_group_id(0);\n\
1957 outputs[5] = get_global_offset(0);\n\
1958 outputs[6] = get_global_id(0);\n\
1959 }\n";
1960 auto out = ShaderArg<uint32_t>(std::vector<uint32_t>(6, 0xdeadbeef), SHADER_ARG_OUTPUT);
1961 const uint16_t expected[] = { 3, 1, 1, 1, 0, 0, 0, };
1962 CompileArgs args = { 1, 1, 1 };
1963 Shader shader = compile({ kernel_source });
1964 run_shader(shader, args, out);
1965 for (int i = 0; i < out.size(); ++i)
1966 EXPECT_EQ(out[i], expected[i]);
1967
1968 args.work_props.work_dim = 2;
1969 args.work_props.global_offset_x = 100;
1970 args.work_props.group_id_offset_x = 2;
1971 args.work_props.group_count_total_x = 5;
1972 const uint32_t expected_withoffsets[] = { 2, 5, 1, 5, 2, 100, 102 };
1973 run_shader(shader, args, out);
1974 for (int i = 0; i < out.size(); ++i)
1975 EXPECT_EQ(out[i], expected_withoffsets[i]);
1976 }
1977
TEST_F(ComputeTest,convert_round_sat)1978 TEST_F(ComputeTest, convert_round_sat)
1979 {
1980 const char *kernel_source =
1981 "__kernel void main_test(__global float *f, __global uchar *u)\n\
1982 {\n\
1983 uint idx = get_global_id(0);\n\
1984 u[idx] = convert_uchar_sat_rtp(f[idx]);\n\
1985 }\n";
1986 auto f = ShaderArg<float>({ -1.0f, 1.1f, 20.0f, 255.5f }, SHADER_ARG_INPUT);
1987 auto u = ShaderArg<uint8_t>({ 255, 0, 0, 0 }, SHADER_ARG_OUTPUT);
1988 const uint8_t expected[] = {
1989 0, 2, 20, 255
1990 };
1991
1992 run_shader(kernel_source, f.size(), 1, 1, f, u);
1993 for (int i = 0; i < u.size(); ++i)
1994 EXPECT_EQ(u[i], expected[i]);
1995 }
1996
TEST_F(ComputeTest,convert_round_sat_vec)1997 TEST_F(ComputeTest, convert_round_sat_vec)
1998 {
1999 const char *kernel_source =
2000 "__kernel void main_test(__global float16 *f, __global uchar16 *u)\n\
2001 {\n\
2002 uint idx = get_global_id(0);\n\
2003 u[idx] = convert_uchar16_sat_rtp(f[idx]);\n\
2004 }\n";
2005 auto f = ShaderArg<float>({
2006 -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f,
2007 -0.5f, 1.9f, 20.0f, 254.5f, -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f,
2008 0.0f, 1.3f, 20.0f, 255.1f, -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f,
2009 -0.0f, 1.5555f, 20.0f, 254.9f, -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f,
2010 }, SHADER_ARG_INPUT);
2011 auto u = ShaderArg<uint8_t>({
2012 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0,
2013 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0,
2014 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0,
2015 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0,
2016 }, SHADER_ARG_OUTPUT);
2017 const uint8_t expected[] = {
2018 0, 2, 20, 255, 0, 2, 20, 255, 0, 2, 20, 255, 0, 2, 20, 255,
2019 0, 2, 20, 255, 0, 2, 20, 255, 0, 2, 20, 255, 0, 2, 20, 255,
2020 0, 2, 20, 255, 0, 2, 20, 255, 0, 2, 20, 255, 0, 2, 20, 255,
2021 0, 2, 20, 255, 0, 2, 20, 255, 0, 2, 20, 255, 0, 2, 20, 255,
2022 };
2023
2024 run_shader(kernel_source, 4, 1, 1, f, u);
2025 for (int i = 0; i < u.size(); ++i)
2026 EXPECT_EQ(u[i], expected[i]);
2027 }
2028
TEST_F(ComputeTest,convert_char2_uchar2)2029 TEST_F(ComputeTest, convert_char2_uchar2)
2030 {
2031 const char *kernel_source =
2032 "__kernel void main_test( __global char2 *src, __global uchar2 *dest )\n\
2033 {\n\
2034 size_t i = get_global_id(0);\n\
2035 dest[i] = convert_uchar2_sat( src[i] );\n\
2036 }\n";
2037
2038 auto c = ShaderArg<int8_t>({ -127, -4, 0, 4, 126, 127, 16, 32 }, SHADER_ARG_INPUT);
2039 auto u = ShaderArg<uint8_t>({ 99, 99, 99, 99, 99, 99, 99, 99 }, SHADER_ARG_OUTPUT);
2040 const uint8_t expected[] = { 0, 0, 0, 4, 126, 127, 16, 32 };
2041 run_shader(kernel_source, 4, 1, 1, c, u);
2042 for (int i = 0; i < u.size(); i++)
2043 EXPECT_EQ(u[i], expected[i]);
2044 }
2045
TEST_F(ComputeTest,async_copy)2046 TEST_F(ComputeTest, async_copy)
2047 {
2048 const char *kernel_source = R"(
2049 __kernel void main_test( const __global char *src, __global char *dst, __local char *localBuffer, int copiesPerWorkgroup, int copiesPerWorkItem )
2050 {
2051 int i;
2052 for(i=0; i<copiesPerWorkItem; i++)
2053 localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ] = (char)(char)0;
2054 barrier( CLK_LOCAL_MEM_FENCE );
2055 event_t event;
2056 event = async_work_group_copy( (__local char*)localBuffer, (__global const char*)(src+copiesPerWorkgroup*get_group_id(0)), (size_t)copiesPerWorkgroup, 0 );
2057 wait_group_events( 1, &event );
2058 for(i=0; i<copiesPerWorkItem; i++)
2059 dst[ get_global_id( 0 )*copiesPerWorkItem+i ] = localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ];
2060 })";
2061 Shader shader = compile({ kernel_source });
2062 validate(shader);
2063 }
2064
TEST_F(ComputeTest,packed_struct_global)2065 TEST_F(ComputeTest, packed_struct_global)
2066 {
2067 #pragma pack(push, 1)
2068 struct s { uint8_t uc; uint64_t ul; uint16_t us; };
2069 #pragma pack(pop)
2070
2071 const char *kernel_source =
2072 "struct __attribute__((packed)) s {uchar uc; ulong ul; ushort us; };\n\
2073 __kernel void main_test(__global struct s *inout, global uint *size)\n\
2074 {\n\
2075 uint idx = get_global_id(0);\n\
2076 inout[idx].uc = idx + 1;\n\
2077 inout[idx].ul = ((ulong)(idx + 1 + 0xfbfcfdfe) << 32) | 0x12345678;\n\
2078 inout[idx].us = ((ulong)(idx + 1 + 0xa0) << 8) | 0x12;\n\
2079 *size = sizeof(struct s);\n\
2080 }\n";
2081 auto inout = ShaderArg<struct s>({0, 0, 0}, SHADER_ARG_OUTPUT);
2082 auto size = ShaderArg<uint32_t>(0, SHADER_ARG_OUTPUT);
2083 const struct s expected[] = {
2084 { 1, 0xfbfcfdff12345678, 0xa112 }
2085 };
2086
2087 run_shader(kernel_source, inout.size(), 1, 1, inout, size);
2088 for (int i = 0; i < inout.size(); ++i) {
2089 EXPECT_EQ(inout[i].uc, expected[i].uc);
2090 EXPECT_EQ(inout[i].ul, expected[i].ul);
2091 EXPECT_EQ(inout[i].us, expected[i].us);
2092 }
2093 EXPECT_EQ(size, sizeof(struct s));
2094 }
2095
TEST_F(ComputeTest,packed_struct_arg)2096 TEST_F(ComputeTest, packed_struct_arg)
2097 {
2098 #pragma pack(push, 1)
2099 struct s { uint8_t uc; uint64_t ul; uint16_t us; };
2100 #pragma pack(pop)
2101
2102 const char *kernel_source =
2103 "struct __attribute__((packed)) s {uchar uc; ulong ul; ushort us; };\n\
2104 __kernel void main_test(__global struct s *out, struct s in)\n\
2105 {\n\
2106 uint idx = get_global_id(0);\n\
2107 out[idx].uc = in.uc + 0x12;\n\
2108 out[idx].ul = in.ul + 0x123456789abcdef;\n\
2109 out[idx].us = in.us + 0x1234;\n\
2110 }\n";
2111 auto out = ShaderArg<struct s>({0, 0, 0}, SHADER_ARG_OUTPUT);
2112 auto in = ShaderArg<struct s>({1, 2, 3}, SHADER_ARG_INPUT);
2113 const struct s expected[] = {
2114 { 0x12 + 1, 0x123456789abcdef + 2, 0x1234 + 3 }
2115 };
2116
2117 run_shader(kernel_source, out.size(), 1, 1, out, in);
2118 for (int i = 0; i < out.size(); ++i) {
2119 EXPECT_EQ(out[i].uc, expected[i].uc);
2120 EXPECT_EQ(out[i].ul, expected[i].ul);
2121 EXPECT_EQ(out[i].us, expected[i].us);
2122 }
2123 }
2124
TEST_F(ComputeTest,packed_struct_local)2125 TEST_F(ComputeTest, packed_struct_local)
2126 {
2127 #pragma pack(push, 1)
2128 struct s { uint8_t uc; uint64_t ul; uint16_t us; };
2129 #pragma pack(pop)
2130
2131 const char *kernel_source =
2132 "struct __attribute__((packed)) s {uchar uc; ulong ul; ushort us; };\n\
2133 __kernel void main_test(__global struct s *out, __constant struct s *in)\n\
2134 {\n\
2135 uint idx = get_global_id(0);\n\
2136 __local struct s tmp[2];\n\
2137 tmp[get_local_id(0)] = in[idx];\n\
2138 barrier(CLK_LOCAL_MEM_FENCE);\n\
2139 out[idx] = tmp[(get_local_id(0) + 1) % 2];\n\
2140 }\n";
2141 auto out = ShaderArg<struct s>({{0, 0, 0}, {0, 0, 0}}, SHADER_ARG_OUTPUT);
2142 auto in = ShaderArg<struct s>({{1, 2, 3}, {0x12, 0x123456789abcdef, 0x1234} }, SHADER_ARG_INPUT);
2143 const struct s expected[] = {
2144 { 0x12, 0x123456789abcdef, 0x1234 },
2145 { 1, 2, 3 },
2146 };
2147
2148 run_shader(kernel_source, out.size(), 1, 1, out, in);
2149 for (int i = 0; i < out.size(); ++i) {
2150 EXPECT_EQ(out[i].uc, expected[i].uc);
2151 EXPECT_EQ(out[i].ul, expected[i].ul);
2152 EXPECT_EQ(out[i].us, expected[i].us);
2153 }
2154 }
2155
TEST_F(ComputeTest,packed_struct_const)2156 TEST_F(ComputeTest, packed_struct_const)
2157 {
2158 #pragma pack(push, 1)
2159 struct s { uint8_t uc; uint64_t ul; uint16_t us; };
2160 #pragma pack(pop)
2161
2162 const char *kernel_source =
2163 "struct __attribute__((packed)) s {uchar uc; ulong ul; ushort us; };\n\
2164 __kernel void main_test(__global struct s *out, struct s in)\n\
2165 {\n\
2166 __constant struct s base[] = {\n\
2167 {0x12, 0x123456789abcdef, 0x1234},\n\
2168 {0x11, 0x123456789abcdee, 0x1233},\n\
2169 };\n\
2170 uint idx = get_global_id(0);\n\
2171 out[idx].uc = base[idx % 2].uc + in.uc;\n\
2172 out[idx].ul = base[idx % 2].ul + in.ul;\n\
2173 out[idx].us = base[idx % 2].us + in.us;\n\
2174 }\n";
2175 auto out = ShaderArg<struct s>(std::vector<struct s>(2, {0, 0, 0}), SHADER_ARG_OUTPUT);
2176 auto in = ShaderArg<struct s>({1, 2, 3}, SHADER_ARG_INPUT);
2177 const struct s expected[] = {
2178 { 0x12 + 1, 0x123456789abcdef + 2, 0x1234 + 3 },
2179 { 0x11 + 1, 0x123456789abcdee + 2, 0x1233 + 3 },
2180 };
2181
2182 run_shader(kernel_source, out.size(), 1, 1, out, in);
2183 for (int i = 0; i < out.size(); ++i) {
2184 EXPECT_EQ(out[i].uc, expected[i].uc);
2185 EXPECT_EQ(out[i].ul, expected[i].ul);
2186 EXPECT_EQ(out[i].us, expected[i].us);
2187 }
2188 }
2189
TEST_F(ComputeTest,printf)2190 TEST_F(ComputeTest, printf)
2191 {
2192 const char *kernel_source = R"(
2193 __kernel void main_test(__global float *src, __global uint *dest)
2194 {
2195 *dest = printf("%s: %f", "Test", src[0]);
2196 })";
2197
2198 auto src = ShaderArg<float>({ 1.0f }, SHADER_ARG_INPUT);
2199 auto dest = ShaderArg<uint32_t>({ 0xdeadbeef }, SHADER_ARG_OUTPUT);
2200 run_shader(kernel_source, 1, 1, 1, src, dest);
2201 EXPECT_EQ(dest[0], 0);
2202 }
2203
TEST_F(ComputeTest,vload_half)2204 TEST_F(ComputeTest, vload_half)
2205 {
2206 const char *kernel_source = R"(
2207 __kernel void main_test(__global half *src, __global float4 *dest)
2208 {
2209 int offset = get_global_id(0);
2210 dest[offset] = vload_half4(offset, src);
2211 })";
2212 auto src = ShaderArg<uint16_t>({ 0x3c00, 0x4000, 0x4200, 0x4400,
2213 0x4500, 0x4600, 0x4700, 0x4800 }, SHADER_ARG_INPUT);
2214 auto dest = ShaderArg<float>({ FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX,
2215 FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX }, SHADER_ARG_OUTPUT);
2216 run_shader(kernel_source, 2, 1, 1, src, dest);
2217 for (unsigned i = 0; i < 8; ++i)
2218 EXPECT_FLOAT_EQ(dest[i], (float)(i + 1));
2219 }
2220
TEST_F(ComputeTest,vstore_half)2221 TEST_F(ComputeTest, vstore_half)
2222 {
2223 const char *kernel_source = R"(
2224 __kernel void main_test(__global half *dst, __global float4 *src)
2225 {
2226 int offset = get_global_id(0);
2227 vstore_half4(src[offset], offset, dst);
2228 })";
2229 auto dest = ShaderArg<uint16_t>({0xdead, 0xdead, 0xdead, 0xdead,
2230 0xdead, 0xdead, 0xdead, 0xdead}, SHADER_ARG_OUTPUT);
2231 auto src = ShaderArg<float>({ 1.0, 2.0, 3.0, 4.0,
2232 5.0, 6.0, 7.0, 8.0 }, SHADER_ARG_INPUT);
2233 run_shader(kernel_source, 2, 1, 1, dest, src);
2234 const uint16_t expected[] = { 0x3c00, 0x4000, 0x4200, 0x4400,
2235 0x4500, 0x4600, 0x4700, 0x4800 };
2236 for (unsigned i = 0; i < 8; ++i)
2237 EXPECT_EQ(dest[i], expected[i]);
2238 }
2239
TEST_F(ComputeTest,inline_function)2240 TEST_F(ComputeTest, inline_function)
2241 {
2242 const char *kernel_source = R"(
2243 inline float helper(float foo)
2244 {
2245 return foo * 2;
2246 }
2247
2248 __kernel void main_test(__global float *dst, __global float *src)
2249 {
2250 *dst = helper(*src);
2251 })";
2252 auto dest = ShaderArg<float>({ NAN }, SHADER_ARG_OUTPUT);
2253 auto src = ShaderArg<float>({ 1.0f }, SHADER_ARG_INPUT);
2254 run_shader(kernel_source, 1, 1, 1, dest, src);
2255 EXPECT_EQ(dest[0], 2.0f);
2256 }
2257
TEST_F(ComputeTest,unused_arg)2258 TEST_F(ComputeTest, unused_arg)
2259 {
2260 const char *kernel_source = R"(
2261 __kernel void main_test(__global int *dst, __global int *unused, __global int *src)
2262 {
2263 int i = get_global_id(0);
2264 dst[i] = src[i];
2265 })";
2266 auto dest = ShaderArg<int>({ -1, -1, -1, -1 }, SHADER_ARG_OUTPUT);
2267 auto src = ShaderArg<int>({ 1, 2, 3, 4 }, SHADER_ARG_INPUT);
2268 auto unused = ShaderArg<int>({ -1, -1, -1, -1 }, SHADER_ARG_INPUT);
2269 run_shader(kernel_source, 4, 1, 1, dest, unused, src);
2270 for (int i = 0; i < 4; ++i)
2271 EXPECT_EQ(dest[i], i + 1);
2272 }
2273
TEST_F(ComputeTest,spec_constant)2274 TEST_F(ComputeTest, spec_constant)
2275 {
2276 const char *spirv_asm = R"(
2277 OpCapability Addresses
2278 OpCapability Kernel
2279 OpCapability Int64
2280 %1 = OpExtInstImport "OpenCL.std"
2281 OpMemoryModel Physical64 OpenCL
2282 OpEntryPoint Kernel %2 "main_test" %__spirv_BuiltInGlobalInvocationId
2283 %4 = OpString "kernel_arg_type.main_test.uint*,"
2284 OpSource OpenCL_C 102000
2285 OpName %__spirv_BuiltInGlobalInvocationId "__spirv_BuiltInGlobalInvocationId"
2286 OpName %output "output"
2287 OpName %entry "entry"
2288 OpName %output_addr "output.addr"
2289 OpName %id "id"
2290 OpName %call "call"
2291 OpName %conv "conv"
2292 OpName %idxprom "idxprom"
2293 OpName %arrayidx "arrayidx"
2294 OpName %add "add"
2295 OpName %mul "mul"
2296 OpName %idxprom1 "idxprom1"
2297 OpName %arrayidx2 "arrayidx2"
2298 OpDecorate %__spirv_BuiltInGlobalInvocationId BuiltIn GlobalInvocationId
2299 OpDecorate %__spirv_BuiltInGlobalInvocationId Constant
2300 OpDecorate %id Alignment 4
2301 OpDecorate %output_addr Alignment 8
2302 OpDecorate %uint_1 SpecId 1
2303 %ulong = OpTypeInt 64 0
2304 %uint = OpTypeInt 32 0
2305 %uint_1 = OpSpecConstant %uint 1
2306 %v3ulong = OpTypeVector %ulong 3
2307 %_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
2308 %void = OpTypeVoid
2309 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
2310 %24 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint
2311 %_ptr_Function__ptr_CrossWorkgroup_uint = OpTypePointer Function %_ptr_CrossWorkgroup_uint
2312 %_ptr_Function_uint = OpTypePointer Function %uint
2313 %__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_Input_v3ulong Input
2314 %2 = OpFunction %void DontInline %24
2315 %output = OpFunctionParameter %_ptr_CrossWorkgroup_uint
2316 %entry = OpLabel
2317 %output_addr = OpVariable %_ptr_Function__ptr_CrossWorkgroup_uint Function
2318 %id = OpVariable %_ptr_Function_uint Function
2319 OpStore %output_addr %output Aligned 8
2320 %27 = OpLoad %v3ulong %__spirv_BuiltInGlobalInvocationId Aligned 32
2321 %call = OpCompositeExtract %ulong %27 0
2322 %conv = OpUConvert %uint %call
2323 OpStore %id %conv Aligned 4
2324 %28 = OpLoad %_ptr_CrossWorkgroup_uint %output_addr Aligned 8
2325 %29 = OpLoad %uint %id Aligned 4
2326 %idxprom = OpUConvert %ulong %29
2327 %arrayidx = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_uint %28 %idxprom
2328 %30 = OpLoad %uint %arrayidx Aligned 4
2329 %31 = OpLoad %uint %id Aligned 4
2330 %add = OpIAdd %uint %31 %uint_1
2331 %mul = OpIMul %uint %30 %add
2332 %32 = OpLoad %_ptr_CrossWorkgroup_uint %output_addr Aligned 8
2333 %33 = OpLoad %uint %id Aligned 4
2334 %idxprom1 = OpUConvert %ulong %33
2335 %arrayidx2 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_uint %32 %idxprom1
2336 OpStore %arrayidx2 %mul Aligned 4
2337 OpReturn
2338 OpFunctionEnd)";
2339 Shader shader = assemble(spirv_asm);
2340 Shader spec_shader = specialize(shader, 1, 5);
2341
2342 auto inout = ShaderArg<uint32_t>({ 0x00000001, 0x10000001, 0x00020002, 0x04010203 },
2343 SHADER_ARG_INOUT);
2344 const uint32_t expected[] = {
2345 0x00000005, 0x60000006, 0x000e000e, 0x20081018
2346 };
2347 CompileArgs args = { (unsigned)inout.size(), 1, 1 };
2348 run_shader(spec_shader, args, inout);
2349 for (int i = 0; i < inout.size(); ++i)
2350 EXPECT_EQ(inout[i], expected[i]);
2351 }
2352
TEST_F(ComputeTest,arg_metadata)2353 TEST_F(ComputeTest, arg_metadata)
2354 {
2355 const char *kernel_source = R"(
2356 __kernel void main_test(
2357 __global int *undec_ptr,
2358 __global volatile int *vol_ptr,
2359 __global const int *const_ptr,
2360 __global int *restrict restr_ptr,
2361 __global const int *restrict const_restr_ptr,
2362 __constant int *const_ptr2)
2363 {
2364 })";
2365 Shader shader = compile({ kernel_source });
2366 EXPECT_EQ(shader.metadata->kernels[0].args[0].address_qualifier, CLC_KERNEL_ARG_ADDRESS_GLOBAL);
2367 EXPECT_EQ(shader.metadata->kernels[0].args[0].type_qualifier, 0);
2368 EXPECT_EQ(shader.metadata->kernels[0].args[1].address_qualifier, CLC_KERNEL_ARG_ADDRESS_GLOBAL);
2369 EXPECT_EQ(shader.metadata->kernels[0].args[1].type_qualifier, CLC_KERNEL_ARG_TYPE_VOLATILE);
2370 EXPECT_EQ(shader.metadata->kernels[0].args[2].address_qualifier, CLC_KERNEL_ARG_ADDRESS_GLOBAL);
2371 EXPECT_EQ(shader.metadata->kernels[0].args[2].type_qualifier, CLC_KERNEL_ARG_TYPE_CONST);
2372 EXPECT_EQ(shader.metadata->kernels[0].args[3].address_qualifier, CLC_KERNEL_ARG_ADDRESS_GLOBAL);
2373 EXPECT_EQ(shader.metadata->kernels[0].args[3].type_qualifier, CLC_KERNEL_ARG_TYPE_RESTRICT);
2374 EXPECT_EQ(shader.metadata->kernels[0].args[4].address_qualifier, CLC_KERNEL_ARG_ADDRESS_GLOBAL);
2375 EXPECT_EQ(shader.metadata->kernels[0].args[4].type_qualifier, CLC_KERNEL_ARG_TYPE_RESTRICT | CLC_KERNEL_ARG_TYPE_CONST);
2376 EXPECT_EQ(shader.metadata->kernels[0].args[5].address_qualifier, CLC_KERNEL_ARG_ADDRESS_CONSTANT);
2377 EXPECT_EQ(shader.metadata->kernels[0].args[5].type_qualifier, CLC_KERNEL_ARG_TYPE_CONST);
2378 }
2379