xref: /aosp_15_r20/external/mesa3d/src/microsoft/clc/clc_compiler_test.cpp (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © Microsoft Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include <cmath>
25 #include <stdio.h>
26 #include <stdint.h>
27 #include <stdexcept>
28 #include <vector>
29 
30 #include <unknwn.h>
31 #include <directx/d3d12.h>
32 #include <dxgi1_4.h>
33 #include <gtest/gtest.h>
34 #include <wrl.h>
35 #include <dxguids/dxguids.h>
36 
37 #include "compute_test.h"
38 
39 using std::vector;
40 
TEST_F(ComputeTest,runtime_memcpy)41 TEST_F(ComputeTest, runtime_memcpy)
42 {
43    struct shift { uint8_t val; uint8_t shift; uint16_t ret; };
44    const char *kernel_source =
45    "struct shift { uchar val; uchar shift; ushort ret; };\n\
46    __kernel void main_test(__global struct shift *inout)\n\
47    {\n\
48       uint id = get_global_id(0);\n\
49       uint id2 = id + get_global_id(1);\n\
50       struct shift lc[4] = { { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }};\n\
51       lc[id] = inout[id];\n\
52       inout[id2].ret = (ushort) lc[id2].val << (ushort) lc[id2].shift;\n\
53    }\n";
54 
55    auto inout = ShaderArg<struct shift>({
56          { 0x10, 1, 0xffff },
57          { 0x20, 2, 0xffff },
58          { 0x30, 3, 0xffff },
59          { 0x40, 4, 0xffff },
60       },
61       SHADER_ARG_INOUT);
62    const uint16_t expected[] = { 0x20, 0x80, 0x180, 0x400 };
63    run_shader(kernel_source, inout.size(), 1, 1, inout);
64    for (int i = 0; i < inout.size(); ++i)
65       EXPECT_EQ(inout[i].ret, expected[i]);
66 }
67 
TEST_F(ComputeTest,two_global_arrays)68 TEST_F(ComputeTest, two_global_arrays)
69 {
70    const char *kernel_source =
71    "__kernel void main_test(__global uint *g1, __global uint *g2)\n\
72    {\n\
73        uint idx = get_global_id(0);\n\
74        g1[idx] -= g2[idx];\n\
75    }\n";
76    auto g1 = ShaderArg<uint32_t>({ 10, 20, 30, 40 }, SHADER_ARG_INOUT);
77    auto g2 = ShaderArg<uint32_t>({ 1, 2, 3, 4 }, SHADER_ARG_INPUT);
78    const uint32_t expected[] = {
79       9, 18, 27, 36
80    };
81 
82    run_shader(kernel_source, g1.size(), 1, 1, g1, g2);
83    for (int i = 0; i < g1.size(); ++i)
84       EXPECT_EQ(g1[i], expected[i]);
85 }
86 
TEST_F(ComputeTest,nested_arrays)87 TEST_F(ComputeTest, nested_arrays)
88 {
89    const char *kernel_source = R"(
90 float4 DoMagic(float4 inValue)
91 {
92     const float testArr[3][3] = {
93         {0.1f, 0.2f, 0.3f},
94         {0.4f, 0.5f, 0.6f},
95         {0.7f, 0.8f, 0.9f}};
96     float4 outValue = inValue;
97     outValue.x = inValue.x * testArr[0][0] + inValue.y * testArr[0][1] + inValue.z * testArr[0][2];
98     outValue.y = inValue.x * testArr[1][0] + inValue.y * testArr[1][1] + inValue.z * testArr[1][2];
99     outValue.z = inValue.x * testArr[2][0] + inValue.y * testArr[2][1] + inValue.z * testArr[2][2];
100     return outValue;
101 }
102 __kernel void main_test(__global float4 *g1, __global float4 *g2)
103 {
104    uint idx = get_global_id(0);
105    g1[idx] = DoMagic(g2[idx]);
106 })";
107    auto g1 = ShaderArg<float>({ 10, 20, 30, 40 }, SHADER_ARG_INOUT);
108    auto g2 = ShaderArg<float>({ 0.2f, 0.4f, 0.6f, 1.0f }, SHADER_ARG_INPUT);
109    const float expected[] = {
110       0.28f, 0.64f, 1.0f, 1.0f
111    };
112 
113    run_shader(kernel_source, 1, 1, 1, g1, g2);
114    for (int i = 0; i < g1.size(); ++i)
115       EXPECT_FLOAT_EQ(g1[i], expected[i]);
116 }
117 
118 /* Disabled until saturated conversions from f32->i64 fixed (mesa/mesa#3824) */
TEST_F(ComputeTest,DISABLED_i64tof32)119 TEST_F(ComputeTest, DISABLED_i64tof32)
120 {
121    const char *kernel_source =
122    "__kernel void main_test(__global long *out, __constant long *in)\n\
123    {\n\
124        __local float tmp[12];\n\
125        uint idx = get_global_id(0);\n\
126        tmp[idx] = in[idx];\n\
127        barrier(CLK_LOCAL_MEM_FENCE);\n\
128        out[idx] = tmp[idx + get_global_id(1)];\n\
129    }\n";
130    auto in = ShaderArg<int64_t>({ 0x100000000LL,
131                                   -0x100000000LL,
132                                   0x7fffffffffffffffLL,
133                                   0x4000004000000000LL,
134                                   0x4000003fffffffffLL,
135                                   0x4000004000000001LL,
136                                   -1,
137                                   -0x4000004000000000LL,
138                                   -0x4000003fffffffffLL,
139                                   -0x4000004000000001LL,
140                                   0,
141                                   INT64_MIN },
142                                 SHADER_ARG_INPUT);
143    auto out = ShaderArg<int64_t>(std::vector<int64_t>(12, 0xdeadbeed), SHADER_ARG_OUTPUT);
144    const int64_t expected[] = {
145       0x100000000LL,
146       -0x100000000LL,
147       0x7fffffffffffffffLL,
148       0x4000000000000000LL,
149       0x4000000000000000LL,
150       0x4000008000000000LL,
151       -1,
152       -0x4000000000000000LL,
153       -0x4000000000000000LL,
154       -0x4000008000000000LL,
155       0,
156       INT64_MIN,
157    };
158 
159    run_shader(kernel_source, out.size(), 1, 1, out, in);
160    for (int i = 0; i < out.size(); ++i) {
161       EXPECT_EQ((int64_t)out[i], expected[i]);
162    }
163 }
TEST_F(ComputeTest,two_constant_arrays)164 TEST_F(ComputeTest, two_constant_arrays)
165 {
166    const char *kernel_source =
167    "__kernel void main_test(__constant uint *c1, __global uint *g1, __constant uint *c2)\n\
168    {\n\
169        uint idx = get_global_id(0);\n\
170        g1[idx] -= c1[idx] + c2[idx];\n\
171    }\n";
172    auto g1 = ShaderArg<uint32_t>({ 10, 20, 30, 40 }, SHADER_ARG_INOUT);
173    auto c1 = ShaderArg<uint32_t>({ 1, 2, 3, 4 }, SHADER_ARG_INPUT);
174    auto c2 = ShaderArg<uint32_t>(std::vector<uint32_t>(16384, 5), SHADER_ARG_INPUT);
175    const uint32_t expected[] = {
176       4, 13, 22, 31
177    };
178 
179    run_shader(kernel_source, g1.size(), 1, 1, c1, g1, c2);
180    for (int i = 0; i < g1.size(); ++i)
181       EXPECT_EQ(g1[i], expected[i]);
182 }
183 
TEST_F(ComputeTest,null_constant_ptr)184 TEST_F(ComputeTest, null_constant_ptr)
185 {
186    const char *kernel_source =
187    "__kernel void main_test(__global uint *g1, __constant uint *c1)\n\
188    {\n\
189        __constant uint fallback[] = {2, 3, 4, 5};\n\
190        __constant uint *c = c1 ? c1 : fallback;\n\
191        uint idx = get_global_id(0);\n\
192        g1[idx] -= c[idx];\n\
193    }\n";
194    auto g1 = ShaderArg<uint32_t>({ 10, 20, 30, 40 }, SHADER_ARG_INOUT);
195    auto c1 = ShaderArg<uint32_t>({ 1, 2, 3, 4 }, SHADER_ARG_INPUT);
196    const uint32_t expected1[] = {
197       9, 18, 27, 36
198    };
199 
200    run_shader(kernel_source, g1.size(), 1, 1, g1, c1);
201    for (int i = 0; i < g1.size(); ++i)
202       EXPECT_EQ(g1[i], expected1[i]);
203 
204    const uint32_t expected2[] = {
205       8, 17, 26, 35
206    };
207 
208    g1 = ShaderArg<uint32_t>({ 10, 20, 30, 40 }, SHADER_ARG_INOUT);
209    auto c2 = NullShaderArg();
210    run_shader(kernel_source, g1.size(), 1, 1, g1, c2);
211    for (int i = 0; i < g1.size(); ++i)
212       EXPECT_EQ(g1[i], expected2[i]);
213 }
214 
TEST_F(ComputeTest,null_global_ptr)215 TEST_F(ComputeTest, null_global_ptr)
216 {
217    const char *kernel_source =
218    "__kernel void main_test(__global uint *g1, __global uint *g2)\n\
219    {\n\
220        __constant uint fallback[] = {2, 3, 4, 5};\n\
221        uint idx = get_global_id(0);\n\
222        g1[idx] -= g2 ? g2[idx] : fallback[idx];\n\
223    }\n";
224    auto g1 = ShaderArg<uint32_t>({ 10, 20, 30, 40 }, SHADER_ARG_INOUT);
225    auto g2 = ShaderArg<uint32_t>({ 1, 2, 3, 4 }, SHADER_ARG_INPUT);
226    const uint32_t expected1[] = {
227       9, 18, 27, 36
228    };
229 
230    run_shader(kernel_source, g1.size(), 1, 1, g1, g2);
231    for (int i = 0; i < g1.size(); ++i)
232       EXPECT_EQ(g1[i], expected1[i]);
233 
234    const uint32_t expected2[] = {
235       8, 17, 26, 35
236    };
237 
238    g1 = ShaderArg<uint32_t>({ 10, 20, 30, 40 }, SHADER_ARG_INOUT);
239    auto g2null = NullShaderArg();
240    run_shader(kernel_source, g1.size(), 1, 1, g1, g2null);
241    for (int i = 0; i < g1.size(); ++i)
242       EXPECT_EQ(g1[i], expected2[i]);
243 }
244 
TEST_F(ComputeTest,ret_constant_ptr)245 TEST_F(ComputeTest, ret_constant_ptr)
246 {
247    struct s { uint64_t ptr; uint32_t val; };
248    const char *kernel_source =
249    "struct s { __constant uint *ptr; uint val; };\n\
250    __kernel void main_test(__global struct s *out, __constant uint *in)\n\
251    {\n\
252        __constant uint foo[] = { 1, 2 };\n\
253        uint idx = get_global_id(0);\n\
254        if (idx == 0)\n\
255           out[idx].ptr = foo;\n\
256        else\n\
257           out[idx].ptr = in;\n\
258        out[idx].val = out[idx].ptr[idx];\n\
259    }\n";
260    auto out = ShaderArg<struct s>(std::vector<struct s>(2, {0xdeadbeefdeadbeef, 0}), SHADER_ARG_OUTPUT);
261    auto in = ShaderArg<uint32_t>({ 3, 4 }, SHADER_ARG_INPUT);
262    const uint32_t expected_val[] = {
263       1, 4
264    };
265    const uint64_t expected_ptr[] = {
266       2ull << 32, 1ull << 32
267    };
268 
269    run_shader(kernel_source, out.size(), 1, 1, out, in);
270    for (int i = 0; i < out.size(); ++i) {
271       EXPECT_EQ(out[i].val, expected_val[i]);
272       EXPECT_EQ(out[i].ptr, expected_ptr[i]);
273    }
274 }
275 
TEST_F(ComputeTest,ret_global_ptr)276 TEST_F(ComputeTest, ret_global_ptr)
277 {
278    struct s { uint64_t ptr; uint32_t val; };
279    const char *kernel_source =
280    "struct s { __global uint *ptr; uint val; };\n\
281    __kernel void main_test(__global struct s *out, __global uint *in1, __global uint *in2)\n\
282    {\n\
283        uint idx = get_global_id(0);\n\
284        out[idx].ptr = idx ? in2 : in1;\n\
285        out[idx].val = out[idx].ptr[idx];\n\
286    }\n";
287    auto out = ShaderArg<struct s>(std::vector<struct s>(2, {0xdeadbeefdeadbeef, 0}), SHADER_ARG_OUTPUT);
288    auto in1 = ShaderArg<uint32_t>({ 1, 2 }, SHADER_ARG_INPUT);
289    auto in2 = ShaderArg<uint32_t>({ 3, 4 }, SHADER_ARG_INPUT);
290    const uint32_t expected_val[] = {
291       1, 4
292    };
293    const uint64_t expected_ptr[] = {
294       1ull << 32, 2ull << 32
295    };
296 
297    run_shader(kernel_source, out.size(), 1, 1, out, in1, in2);
298    for (int i = 0; i < out.size(); ++i) {
299       EXPECT_EQ(out[i].val, expected_val[i]);
300       EXPECT_EQ(out[i].ptr, expected_ptr[i]);
301    }
302 }
303 
TEST_F(ComputeTest,ret_local_ptr)304 TEST_F(ComputeTest, ret_local_ptr)
305 {
306    struct s { uint64_t ptr; };
307    const char *kernel_source =
308    "struct s { __local uint *ptr; };\n\
309    __kernel void main_test(__global struct s *out)\n\
310    {\n\
311        __local uint tmp[2];\n\
312        uint idx = get_global_id(0);\n\
313        tmp[idx] = idx;\n\
314        out[idx].ptr = &tmp[idx];\n\
315    }\n";
316    auto out = ShaderArg<struct s>(std::vector<struct s>(2, { 0xdeadbeefdeadbeef }), SHADER_ARG_OUTPUT);
317    const uint64_t expected_ptr[] = {
318       0, 4,
319    };
320 
321    run_shader(kernel_source, out.size(), 1, 1, out);
322    for (int i = 0; i < out.size(); ++i) {
323       EXPECT_EQ(out[i].ptr, expected_ptr[i]);
324    }
325 }
326 
TEST_F(ComputeTest,ret_private_ptr)327 TEST_F(ComputeTest, ret_private_ptr)
328 {
329    struct s { uint64_t ptr; uint32_t value; };
330    const char *kernel_source =
331    "struct s { __private uint *ptr; uint value; };\n\
332    __kernel void main_test(__global struct s *out)\n\
333    {\n\
334        uint tmp[2] = {1, 2};\n\
335        uint idx = get_global_id(0);\n\
336        out[idx].ptr = &tmp[idx];\n\
337        out[idx].value = *out[idx].ptr;\n\
338    }\n";
339    auto out = ShaderArg<struct s>(std::vector<struct s>(2, { 0xdeadbeefdeadbeef }), SHADER_ARG_OUTPUT);
340    const uint64_t expected_ptr[] = {
341       0, 4,
342    };
343    const uint32_t expected_value[] = {
344       1, 2
345    };
346 
347    run_shader(kernel_source, out.size(), 1, 1, out);
348    for (int i = 0; i < out.size(); ++i) {
349       EXPECT_EQ(out[i].ptr, expected_ptr[i]);
350    }
351 }
352 
TEST_F(ComputeTest,globals_8bit)353 TEST_F(ComputeTest, globals_8bit)
354 {
355    const char *kernel_source =
356    "__kernel void main_test(__global unsigned char *inout)\n\
357    {\n\
358        uint idx = get_global_id(0);\n\
359        inout[idx] = inout[idx] + 1;\n\
360    }\n";
361    auto inout = ShaderArg<uint8_t> ({ 100, 110, 120, 130 }, SHADER_ARG_INOUT);
362    const uint8_t expected[] = {
363       101, 111, 121, 131
364    };
365    run_shader(kernel_source, inout.size(), 1, 1, inout);
366    for (int i = 0; i < inout.size(); ++i)
367       EXPECT_EQ(inout[i], expected[i]);
368 }
369 
TEST_F(ComputeTest,globals_16bit)370 TEST_F(ComputeTest, globals_16bit)
371 {
372    const char *kernel_source =
373    "__kernel void main_test(__global unsigned short *inout)\n\
374    {\n\
375        uint idx = get_global_id(0);\n\
376        inout[idx] = inout[idx] + 1;\n\
377    }\n";
378    auto inout = ShaderArg<uint16_t> ({ 10000, 10010, 10020, 10030 }, SHADER_ARG_INOUT);
379    const uint16_t expected[] = {
380       10001, 10011, 10021, 10031
381    };
382    run_shader(kernel_source, inout.size(), 1, 1, inout);
383    for (int i = 0; i < inout.size(); ++i)
384       EXPECT_EQ(inout[i], expected[i]);
385 }
386 
TEST_F(ComputeTest,globals_64bit)387 TEST_F(ComputeTest, globals_64bit)
388 {
389    const char *kernel_source =
390    "__kernel void main_test(__global unsigned long *inout)\n\
391    {\n\
392        uint idx = get_global_id(0);\n\
393        inout[idx] = inout[idx] + 1;\n\
394    }\n";
395    uint64_t base = 1ull << 50;
396    auto inout = ShaderArg<uint64_t>({ base, base + 10, base + 20, base + 30 },
397                                     SHADER_ARG_INOUT);
398    const uint64_t expected[] = {
399       base + 1, base + 11, base + 21, base + 31
400    };
401    run_shader(kernel_source, inout.size(), 1, 1, inout);
402    for (int i = 0; i < inout.size(); ++i)
403       EXPECT_EQ(inout[i], expected[i]);
404 }
405 
TEST_F(ComputeTest,built_ins_global_id)406 TEST_F(ComputeTest, built_ins_global_id)
407 {
408    const char *kernel_source =
409    "__kernel void main_test(__global uint *output)\n\
410    {\n\
411        output[get_global_id(0)] = get_global_id(0);\n\
412    }\n";
413    auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
414                                      SHADER_ARG_OUTPUT);
415    const uint32_t expected[] = {
416       0, 1, 2, 3
417    };
418 
419    run_shader(kernel_source, output.size(), 1, 1, output);
420    for (int i = 0; i < output.size(); ++i)
421       EXPECT_EQ(output[i], expected[i]);
422 }
423 
TEST_F(ComputeTest,built_ins_global_id_rmw)424 TEST_F(ComputeTest, built_ins_global_id_rmw)
425 {
426    const char *kernel_source =
427    "__kernel void main_test(__global uint *output)\n\
428    {\n\
429        uint id = get_global_id(0);\n\
430        output[id] = output[id] * (id + 1);\n\
431    }\n";
432    auto inout = ShaderArg<uint32_t>({0x00000001, 0x10000001, 0x00020002, 0x04010203},
433                                     SHADER_ARG_INOUT);
434    const uint32_t expected[] = {
435       0x00000001, 0x20000002, 0x00060006, 0x1004080c
436    };
437    run_shader(kernel_source, inout.size(), 1, 1, inout);
438    for (int i = 0; i < inout.size(); ++i)
439       EXPECT_EQ(inout[i], expected[i]);
440 }
441 
TEST_F(ComputeTest,types_float_basics)442 TEST_F(ComputeTest, types_float_basics)
443 {
444    const char *kernel_source =
445    "__kernel void main_test(__global uint *output)\n\
446    {\n\
447        output[get_global_id(0)] = (uint)((float)get_global_id(0) + 1.5f);\n\
448    }\n";
449    auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
450                                      SHADER_ARG_OUTPUT);
451    const uint32_t expected[] = {
452       1, 2, 3, 4
453    };
454    run_shader(kernel_source, output.size(), 1, 1, output);
455    for (int i = 0; i < output.size(); ++i)
456       EXPECT_EQ(output[i], expected[i]);
457 }
458 
TEST_F(ComputeTest,DISABLED_types_double_basics)459 TEST_F(ComputeTest, DISABLED_types_double_basics)
460 {
461    /* Disabled because doubles are unsupported */
462    const char *kernel_source =
463    "__kernel void main_test(__global uint *output)\n\
464    {\n\
465        output[get_global_id(0)] = (uint)((double)get_global_id(0) + 1.5);\n\
466    }\n";
467    auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
468                                      SHADER_ARG_OUTPUT);
469    const uint32_t expected[] = {
470       1, 2, 3, 4
471    };
472    run_shader(kernel_source, output.size(), 1, 1, output);
473    for (int i = 0; i < output.size(); ++i)
474       EXPECT_EQ(output[i], expected[i]);
475 }
476 
TEST_F(ComputeTest,types_short_basics)477 TEST_F(ComputeTest, types_short_basics)
478 {
479    const char *kernel_source =
480    "__kernel void main_test(__global uint *output)\n\
481    {\n\
482        output[get_global_id(0)] = (uint)((short)get_global_id(0) + (short)1);\n\
483    }\n";
484    auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
485                                      SHADER_ARG_OUTPUT);
486    const uint32_t expected[] = {
487       1, 2, 3, 4
488    };
489    run_shader(kernel_source, output.size(), 1, 1, output);
490    for (int i = 0; i < output.size(); ++i)
491       EXPECT_EQ(output[i], expected[i]);
492 }
493 
TEST_F(ComputeTest,types_char_basics)494 TEST_F(ComputeTest, types_char_basics)
495 {
496    const char *kernel_source =
497    "__kernel void main_test(__global uint *output)\n\
498    {\n\
499        output[get_global_id(0)] = (uint)((char)get_global_id(0) + (char)1);\n\
500    }\n";
501    auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
502                                      SHADER_ARG_OUTPUT);
503    const uint32_t expected[] = {
504       1, 2, 3, 4
505    };
506    run_shader(kernel_source, output.size(), 1, 1, output);
507    for (int i = 0; i < output.size(); ++i)
508       EXPECT_EQ(output[i], expected[i]);
509 }
510 
TEST_F(ComputeTest,types_if_statement)511 TEST_F(ComputeTest, types_if_statement)
512 {
513    const char *kernel_source =
514    "__kernel void main_test(__global uint *output)\n\
515    {\n\
516        int idx = get_global_id(0);\n\
517        if (idx > 0)\n\
518            output[idx] = ~idx;\n\
519        else\n\
520            output[0] = 0xff;\n\
521    }\n";
522    auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
523                                      SHADER_ARG_OUTPUT);
524    const uint32_t expected[] = {
525       0xff, ~1u, ~2u, ~3u
526    };
527    run_shader(kernel_source, output.size(), 1, 1, output);
528    for (int i = 0; i < output.size(); ++i)
529       EXPECT_EQ(output[i], expected[i]);
530 }
531 
TEST_F(ComputeTest,types_do_while_loop)532 TEST_F(ComputeTest, types_do_while_loop)
533 {
534    const char *kernel_source =
535    "__kernel void main_test(__global uint *output)\n\
536    {\n\
537        int value = 1;\n\
538        int i = 1, n = get_global_id(0);\n\
539        do {\n\
540           value *= i++;\n\
541        } while (i <= n);\n\
542        output[n] = value;\n\
543    }\n";
544    auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(5, 0xdeadbeef),
545                                      SHADER_ARG_OUTPUT);
546    const uint32_t expected[] = {
547       1, 1, 1*2, 1*2*3, 1*2*3*4
548    };
549    run_shader(kernel_source, output.size(), 1, 1, output);
550    for (int i = 0; i < output.size(); ++i)
551       EXPECT_EQ(output[i], expected[i]);
552 }
553 
TEST_F(ComputeTest,types_for_loop)554 TEST_F(ComputeTest, types_for_loop)
555 {
556    const char *kernel_source =
557    "__kernel void main_test(__global uint *output)\n\
558    {\n\
559        int value = 1;\n\
560        int n = get_global_id(0);\n\
561        for (int i = 1; i <= n; ++i)\n\
562           value *= i;\n\
563        output[n] = value;\n\
564    }\n";
565    auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(5, 0xdeadbeef),
566                                      SHADER_ARG_OUTPUT);
567    const uint32_t expected[] = {
568       1, 1, 1*2, 1*2*3, 1*2*3*4
569    };
570    run_shader(kernel_source, output.size(), 1, 1, output);
571    for (int i = 0; i < output.size(); ++i)
572       EXPECT_EQ(output[i], expected[i]);
573 }
574 
TEST_F(ComputeTest,complex_types_local_array_long)575 TEST_F(ComputeTest, complex_types_local_array_long)
576 {
577    const char *kernel_source =
578    "__kernel void main_test(__global ulong *inout)\n\
579    {\n\
580       ulong tmp[] = {\n\
581          get_global_id(1) + 0x00000000,\n\
582          get_global_id(1) + 0x10000001,\n\
583          get_global_id(1) + 0x20000020,\n\
584          get_global_id(1) + 0x30000300,\n\
585       };\n\
586       uint idx = get_global_id(0);\n\
587       inout[idx] = tmp[idx];\n\
588    }\n";
589    auto inout = ShaderArg<uint64_t>({ 0, 0, 0, 0 }, SHADER_ARG_INOUT);
590    const uint64_t expected[] = {
591       0x00000000, 0x10000001, 0x20000020, 0x30000300,
592    };
593    run_shader(kernel_source, inout.size(), 1, 1, inout);
594    for (int i = 0; i < inout.size(); ++i)
595       EXPECT_EQ(inout[i], expected[i]);
596 }
597 
TEST_F(ComputeTest,complex_types_local_array_short)598 TEST_F(ComputeTest, complex_types_local_array_short)
599 {
600    const char *kernel_source =
601    "__kernel void main_test(__global ushort *inout)\n\
602    {\n\
603       ushort tmp[] = {\n\
604          get_global_id(1) + 0x00,\n\
605          get_global_id(1) + 0x10,\n\
606          get_global_id(1) + 0x20,\n\
607          get_global_id(1) + 0x30,\n\
608       };\n\
609       uint idx = get_global_id(0);\n\
610       inout[idx] = tmp[idx];\n\
611    }\n";
612    auto inout = ShaderArg<uint16_t>({ 0, 0, 0, 0 }, SHADER_ARG_INOUT);
613    const uint16_t expected[] = {
614       0x00, 0x10, 0x20, 0x30,
615    };
616    run_shader(kernel_source, inout.size(), 1, 1, inout);
617    for (int i = 0; i < inout.size(); ++i)
618       EXPECT_EQ(inout[i], expected[i]);
619 }
620 
TEST_F(ComputeTest,complex_types_local_array_struct_vec_float_misaligned)621 TEST_F(ComputeTest, complex_types_local_array_struct_vec_float_misaligned)
622 {
623    const char *kernel_source =
624    "struct has_vecs { uchar c; ushort s; float2 f; };\n\
625    __kernel void main_test(__global uint *inout)\n\
626    {\n\
627       struct has_vecs tmp[] = {\n\
628          { 10 + get_global_id(0), get_global_id(1), { 10.0f, 1.0f } },\n\
629          { 19 + get_global_id(0), get_global_id(1), { 20.0f, 4.0f } },\n\
630          { 28 + get_global_id(0), get_global_id(1), { 30.0f, 9.0f } },\n\
631          { 37 + get_global_id(0), get_global_id(1), { 40.0f, 16.0f } },\n\
632       };\n\
633       uint idx = get_global_id(0);\n\
634       uint mul = (tmp[idx].c + tmp[idx].s) * trunc(tmp[idx].f[0]);\n\
635       inout[idx] = mul + trunc(tmp[idx].f[1]);\n\
636    }\n";
637    auto inout = ShaderArg<uint32_t>({ 0, 0, 0, 0 }, SHADER_ARG_INOUT);
638    const uint16_t expected[] = { 101, 404, 909, 1616 };
639    run_shader(kernel_source, inout.size(), 1, 1, inout);
640    for (int i = 0; i < inout.size(); ++i)
641       EXPECT_EQ(inout[i], expected[i]);
642 }
643 
TEST_F(ComputeTest,complex_types_local_array)644 TEST_F(ComputeTest, complex_types_local_array)
645 {
646    const char *kernel_source =
647    "__kernel void main_test(__global uint *inout)\n\
648    {\n\
649       uint tmp[] = {\n\
650          get_global_id(1) + 0x00,\n\
651          get_global_id(1) + 0x10,\n\
652          get_global_id(1) + 0x20,\n\
653          get_global_id(1) + 0x30,\n\
654       };\n\
655       uint idx = get_global_id(0);\n\
656       inout[idx] = tmp[idx];\n\
657    }\n";
658    auto inout = ShaderArg<uint32_t>({ 0, 0, 0, 0 }, SHADER_ARG_INOUT);
659    const uint32_t expected[] = {
660       0x00, 0x10, 0x20, 0x30,
661    };
662    run_shader(kernel_source, inout.size(), 1, 1, inout);
663    for (int i = 0; i < inout.size(); ++i)
664       EXPECT_EQ(inout[i], expected[i]);
665 }
666 
TEST_F(ComputeTest,complex_types_global_struct_array)667 TEST_F(ComputeTest, complex_types_global_struct_array)
668 {
669    struct two_vals { uint32_t add; uint32_t mul; };
670    const char *kernel_source =
671    "struct two_vals { uint add; uint mul; };\n\
672    __kernel void main_test(__global struct two_vals *in_out)\n\
673    {\n\
674       uint id = get_global_id(0);\n\
675       in_out[id].add = in_out[id].add + id;\n\
676       in_out[id].mul = in_out[id].mul * id;\n\
677    }\n";
678    auto inout = ShaderArg<struct two_vals>({ { 8, 8 }, { 16, 16 }, { 64, 64 }, { 65536, 65536 } },
679                                            SHADER_ARG_INOUT);
680    const struct two_vals expected[] = {
681       { 8 + 0, 8 * 0 },
682       { 16 + 1, 16 * 1 },
683       { 64 + 2, 64 * 2 },
684       { 65536 + 3, 65536 * 3 }
685    };
686    run_shader(kernel_source, inout.size(), 1, 1, inout);
687    for (int i = 0; i < inout.size(); ++i) {
688       EXPECT_EQ(inout[i].add, expected[i].add);
689       EXPECT_EQ(inout[i].mul, expected[i].mul);
690    }
691 }
692 
TEST_F(ComputeTest,complex_types_global_uint2)693 TEST_F(ComputeTest, complex_types_global_uint2)
694 {
695    struct uint2 { uint32_t x; uint32_t y; };
696    const char *kernel_source =
697    "__kernel void main_test(__global uint2 *inout)\n\
698    {\n\
699       uint id = get_global_id(0);\n\
700       inout[id].x = inout[id].x + id;\n\
701       inout[id].y = inout[id].y * id;\n\
702    }\n";
703    auto inout = ShaderArg<struct uint2>({ { 8, 8 }, { 16, 16 }, { 64, 64 }, { 65536, 65536 } },
704                                         SHADER_ARG_INOUT);
705    const struct uint2 expected[] = {
706       { 8 + 0, 8 * 0 },
707       { 16 + 1, 16 * 1 },
708       { 64 + 2, 64 * 2 },
709       { 65536 + 3, 65536 * 3 }
710    };
711    run_shader(kernel_source, inout.size(), 1, 1, inout);
712    for (int i = 0; i < inout.size(); ++i) {
713       EXPECT_EQ(inout[i].x, expected[i].x);
714       EXPECT_EQ(inout[i].y, expected[i].y);
715    }
716 }
717 
TEST_F(ComputeTest,complex_types_global_ushort2)718 TEST_F(ComputeTest, complex_types_global_ushort2)
719 {
720    struct ushort2 { uint16_t x; uint16_t y; };
721    const char *kernel_source =
722    "__kernel void main_test(__global ushort2 *inout)\n\
723    {\n\
724       uint id = get_global_id(0);\n\
725       inout[id].x = inout[id].x + id;\n\
726       inout[id].y = inout[id].y * id;\n\
727    }\n";
728    auto inout = ShaderArg<struct ushort2>({ { 8, 8 }, { 16, 16 }, { 64, 64 },
729                                             { (uint16_t)65536, (uint16_t)65536 } },
730                                           SHADER_ARG_INOUT);
731    const struct ushort2 expected[] = {
732       { 8 + 0, 8 * 0 },
733       { 16 + 1, 16 * 1 },
734       { 64 + 2, 64 * 2 },
735       { (uint16_t)(65536 + 3), (uint16_t)(65536 * 3) }
736    };
737    run_shader(kernel_source, inout.size(), 1, 1, inout);
738    for (int i = 0; i < inout.size(); ++i) {
739       EXPECT_EQ(inout[i].x, expected[i].x);
740       EXPECT_EQ(inout[i].y, expected[i].y);
741    }
742 }
743 
TEST_F(ComputeTest,complex_types_global_uchar3)744 TEST_F(ComputeTest, complex_types_global_uchar3)
745 {
746    struct uchar3 { uint8_t x; uint8_t y; uint8_t z; uint8_t pad; };
747    const char *kernel_source =
748    "__kernel void main_test(__global uchar3 *inout)\n\
749    {\n\
750       uint id = get_global_id(0);\n\
751       inout[id].x = inout[id].x + id;\n\
752       inout[id].y = inout[id].y * id;\n\
753       inout[id].z = inout[id].y + inout[id].x;\n\
754    }\n";
755    auto inout = ShaderArg<struct uchar3>({ { 8, 8, 8 }, { 16, 16, 16 }, { 64, 64, 64 }, { 255, 255, 255 } },
756                                          SHADER_ARG_INOUT);
757    const struct uchar3 expected[] = {
758       { 8 + 0, 8 * 0, (8 + 0) + (8 * 0) },
759       { 16 + 1, 16 * 1, (16 + 1) + (16 * 1) },
760       { 64 + 2, 64 * 2, (64 + 2) + (64 * 2) },
761       { (uint8_t)(255 + 3), (uint8_t)(255 * 3), (uint8_t)((255 + 3) + (255 * 3)) }
762    };
763    run_shader(kernel_source, inout.size(), 1, 1, inout);
764    for (int i = 0; i < inout.size(); ++i) {
765       EXPECT_EQ(inout[i].x, expected[i].x);
766       EXPECT_EQ(inout[i].y, expected[i].y);
767       EXPECT_EQ(inout[i].z, expected[i].z);
768    }
769 }
770 
TEST_F(ComputeTest,complex_types_constant_uchar3)771 TEST_F(ComputeTest, complex_types_constant_uchar3)
772 {
773    struct uchar3 { uint8_t x; uint8_t y; uint8_t z; uint8_t pad; };
774    const char *kernel_source =
775    "__kernel void main_test(__global uchar3 *out, __constant uchar3 *in)\n\
776    {\n\
777       uint id = get_global_id(0);\n\
778       out[id].x = in[id].x + id;\n\
779       out[id].y = in[id].y * id;\n\
780       out[id].z = out[id].y + out[id].x;\n\
781    }\n";
782    auto in = ShaderArg<struct uchar3>({ { 8, 8, 8 }, { 16, 16, 16 }, { 64, 64, 64 }, { 255, 255, 255 } },
783                                       SHADER_ARG_INPUT);
784    auto out = ShaderArg<struct uchar3>(std::vector<struct uchar3>(4, { 0xff, 0xff, 0xff }),
785                                       SHADER_ARG_OUTPUT);
786    const struct uchar3 expected[] = {
787       { 8 + 0, 8 * 0, (8 + 0) + (8 * 0) },
788       { 16 + 1, 16 * 1, (16 + 1) + (16 * 1) },
789       { 64 + 2, 64 * 2, (64 + 2) + (64 * 2) },
790       { (uint8_t)(255 + 3), (uint8_t)(255 * 3), (uint8_t)((255 + 3) + (255 * 3)) }
791    };
792    run_shader(kernel_source, out.size(), 1, 1, out, in);
793    for (int i = 0; i < out.size(); ++i) {
794       EXPECT_EQ(out[i].x, expected[i].x);
795       EXPECT_EQ(out[i].y, expected[i].y);
796       EXPECT_EQ(out[i].z, expected[i].z);
797    }
798 }
799 
TEST_F(ComputeTest,complex_types_global_uint8)800 TEST_F(ComputeTest, complex_types_global_uint8)
801 {
802    struct uint8 {
803       uint32_t s0; uint32_t s1; uint32_t s2; uint32_t s3;
804       uint32_t s4; uint32_t s5; uint32_t s6; uint32_t s7;
805    };
806    const char *kernel_source =
807    "__kernel void main_test(__global uint8 *inout)\n\
808    {\n\
809       uint id = get_global_id(0);\n\
810       inout[id].s01234567 = inout[id].s01234567 * 2;\n\
811    }\n";
812    auto inout = ShaderArg<struct uint8>({ { 1, 2, 3, 4, 5, 6, 7, 8 } },
813                                         SHADER_ARG_INOUT);
814    const struct uint8 expected[] = {
815       { 2, 4, 6, 8, 10, 12, 14, 16 }
816    };
817    run_shader(kernel_source, inout.size(), 1, 1, inout);
818    for (int i = 0; i < inout.size(); ++i) {
819       EXPECT_EQ(inout[i].s0, expected[i].s0);
820       EXPECT_EQ(inout[i].s1, expected[i].s1);
821       EXPECT_EQ(inout[i].s2, expected[i].s2);
822       EXPECT_EQ(inout[i].s3, expected[i].s3);
823       EXPECT_EQ(inout[i].s4, expected[i].s4);
824       EXPECT_EQ(inout[i].s5, expected[i].s5);
825       EXPECT_EQ(inout[i].s6, expected[i].s6);
826       EXPECT_EQ(inout[i].s7, expected[i].s7);
827    }
828 }
829 
TEST_F(ComputeTest,complex_types_local_ulong16)830 TEST_F(ComputeTest, complex_types_local_ulong16)
831 {
832    struct ulong16 {
833       uint64_t values[16];
834    };
835    const char *kernel_source =
836    R"(__kernel void main_test(__global ulong16 *inout)
837    {
838       __local ulong16 local_array[2];
839       uint id = get_global_id(0);
840       local_array[id] = inout[id];
841       barrier(CLK_LOCAL_MEM_FENCE);
842       inout[id] = local_array[0] * 2;
843    })";
844    auto inout = ShaderArg<struct ulong16>({ { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } },
845                                         SHADER_ARG_INOUT);
846    const struct ulong16 expected[] = {
847       { 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 }
848    };
849    run_shader(kernel_source, inout.size(), 1, 1, inout);
850    for (int i = 0; i < inout.size(); ++i) {
851       for (int j = 0; j < 16; ++j) {
852          EXPECT_EQ(inout[i].values[j], expected[i].values[j]);
853       }
854    }
855 }
856 
TEST_F(ComputeTest,complex_types_constant_uint8)857 TEST_F(ComputeTest, complex_types_constant_uint8)
858 {
859    struct uint8 {
860       uint32_t s0; uint32_t s1; uint32_t s2; uint32_t s3;
861       uint32_t s4; uint32_t s5; uint32_t s6; uint32_t s7;
862    };
863    const char *kernel_source =
864    "__kernel void main_test(__global uint8 *out, __constant uint8 *in)\n\
865    {\n\
866       uint id = get_global_id(0);\n\
867       out[id].s01234567 = in[id].s01234567 * 2;\n\
868    }\n";
869    auto in = ShaderArg<struct uint8>({ { 1, 2, 3, 4, 5, 6, 7, 8 } },
870                                      SHADER_ARG_INPUT);
871    auto out = ShaderArg<struct uint8>({ { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff } },
872                                       SHADER_ARG_INOUT);
873    const struct uint8 expected[] = {
874       { 2, 4, 6, 8, 10, 12, 14, 16 }
875    };
876    run_shader(kernel_source, out.size(), 1, 1, out, in);
877    for (int i = 0; i < out.size(); ++i) {
878       EXPECT_EQ(out[i].s0, expected[i].s0);
879       EXPECT_EQ(out[i].s1, expected[i].s1);
880       EXPECT_EQ(out[i].s2, expected[i].s2);
881       EXPECT_EQ(out[i].s3, expected[i].s3);
882       EXPECT_EQ(out[i].s4, expected[i].s4);
883       EXPECT_EQ(out[i].s5, expected[i].s5);
884       EXPECT_EQ(out[i].s6, expected[i].s6);
885       EXPECT_EQ(out[i].s7, expected[i].s7);
886    }
887 }
888 
TEST_F(ComputeTest,complex_types_const_array)889 TEST_F(ComputeTest, complex_types_const_array)
890 {
891    const char *kernel_source =
892    "__kernel void main_test(__global uint *output)\n\
893    {\n\
894        const uint foo[] = { 100, 101, 102, 103 };\n\
895        output[get_global_id(0)] = foo[get_global_id(0) % 4];\n\
896    }\n";
897    auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
898                                      SHADER_ARG_OUTPUT);
899    const uint32_t expected[] = {
900       100, 101, 102, 103
901    };
902    run_shader(kernel_source, output.size(), 1, 1, output);
903    for (int i = 0; i < output.size(); ++i)
904       EXPECT_EQ(output[i], expected[i]);
905 }
906 
TEST_F(ComputeTest,mem_access_load_store_ordering)907 TEST_F(ComputeTest, mem_access_load_store_ordering)
908 {
909    const char *kernel_source =
910    "__kernel void main_test(__global uint *output)\n\
911    {\n\
912        uint foo[4];\n\
913        foo[0] = 0x11111111;\n\
914        foo[1] = 0x22222222;\n\
915        foo[2] = 0x44444444;\n\
916        foo[3] = 0x88888888;\n\
917        foo[get_global_id(1)] -= 0x11111111; // foo[0] = 0 \n\
918        foo[0] += get_global_id(0); // foo[0] = tid\n\
919        foo[foo[get_global_id(1)]] = get_global_id(0); // foo[tid] = tid\n\
920        output[get_global_id(0)] = foo[get_global_id(0)]; // output[tid] = tid\n\
921    }\n";
922    auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
923                                      SHADER_ARG_OUTPUT);
924    const uint16_t expected[] = {
925       0, 1, 2, 3
926    };
927    run_shader(kernel_source, output.size(), 1, 1, output);
928    for (int i = 0; i < output.size(); ++i)
929       EXPECT_EQ(output[i], expected[i]);
930 }
931 
TEST_F(ComputeTest,two_const_arrays)932 TEST_F(ComputeTest, two_const_arrays)
933 {
934    const char *kernel_source =
935    "__kernel void main_test(__global uint *output)\n\
936    {\n\
937       uint id = get_global_id(0);\n\
938       uint foo[4] = {100, 101, 102, 103};\n\
939       uint bar[4] = {1, 2, 3, 4};\n\
940       output[id] = foo[id] * bar[id];\n\
941    }\n";
942    auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
943                                      SHADER_ARG_OUTPUT);
944    const uint32_t expected[] = {
945       100, 202, 306, 412
946    };
947    run_shader(kernel_source, output.size(), 1, 1, output);
948    for (int i = 0; i < output.size(); ++i)
949       EXPECT_EQ(output[i], expected[i]);
950 }
951 
TEST_F(ComputeTest,imod_pos)952 TEST_F(ComputeTest, imod_pos)
953 {
954    const char *kernel_source =
955    "__kernel void main_test(__global int *inout)\n\
956    {\n\
957        inout[get_global_id(0)] = inout[get_global_id(0)] % 3;\n\
958    }\n";
959    auto inout = ShaderArg<int32_t>({ -4, -3, -2, -1, 0, 1, 2, 3, 4 },
960                                    SHADER_ARG_INOUT);
961    const int32_t expected[] = {
962       -1, 0, -2, -1,  0, 1, 2, 0, 1
963    };
964    run_shader(kernel_source, inout.size(), 1, 1, inout);
965    for (int i = 0; i < inout.size(); ++i)
966       EXPECT_EQ(inout[i], expected[i]);
967 }
968 
TEST_F(ComputeTest,imod_neg)969 TEST_F(ComputeTest, imod_neg)
970 {
971    const char *kernel_source =
972    "__kernel void main_test(__global int *inout)\n\
973    {\n\
974        inout[get_global_id(0)] = inout[get_global_id(0)] % -3;\n\
975    }\n";
976    auto inout = ShaderArg<int32_t>({ -4, -3, -2, -1, 0, 1, 2, 3, 4 },
977                                    SHADER_ARG_INOUT);
978    const int32_t expected[] = {
979       -1, 0, -2, -1,  0, 1, 2, 0, 1
980    };
981    run_shader(kernel_source, inout.size(), 1, 1, inout);
982    for (int i = 0; i < inout.size(); ++i)
983       EXPECT_EQ(inout[i], expected[i]);
984 }
985 
TEST_F(ComputeTest,umod)986 TEST_F(ComputeTest, umod)
987 {
988    const char *kernel_source =
989    "__kernel void main_test(__global uint *inout)\n\
990    {\n\
991        inout[get_global_id(0)] = inout[get_global_id(0)] % 0xfffffffc;\n\
992    }\n";
993    auto inout = ShaderArg<uint32_t>({ 0xfffffffa, 0xfffffffb, 0xfffffffc, 0xfffffffd, 0xfffffffe },
994                                     SHADER_ARG_INOUT);
995    const uint32_t expected[] = {
996       0xfffffffa, 0xfffffffb, 0, 1, 2
997    };
998    run_shader(kernel_source, inout.size(), 1, 1, inout);
999    for (int i = 0; i < inout.size(); ++i)
1000       EXPECT_EQ(inout[i], expected[i]);
1001 }
1002 
TEST_F(ComputeTest,rotate)1003 TEST_F(ComputeTest, rotate)
1004 {
1005    const char *kernel_source =
1006    "__kernel void main_test(__global uint *inout)\n\
1007    {\n\
1008        inout[get_global_id(0)] = rotate(inout[get_global_id(0)], (uint)get_global_id(0) * 4);\n\
1009    }\n";
1010    auto inout = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
1011                                     SHADER_ARG_INOUT);
1012    const uint32_t expected[] = {
1013       0xdeadbeef, 0xeadbeefd, 0xadbeefde, 0xdbeefdea
1014    };
1015    run_shader(kernel_source, inout.size(), 1, 1, inout);
1016    for (int i = 0; i < inout.size(); ++i)
1017       EXPECT_EQ(inout[i], expected[i]);
1018 }
1019 
TEST_F(ComputeTest,popcount)1020 TEST_F(ComputeTest, popcount)
1021 {
1022    const char *kernel_source =
1023    "__kernel void main_test(__global uint *inout)\n\
1024    {\n\
1025        inout[get_global_id(0)] = popcount(inout[get_global_id(0)]);\n\
1026    }\n";
1027    auto inout = ShaderArg<uint32_t>({ 0, 0x1, 0x3, 0x101, 0x110011, ~0u },
1028                                     SHADER_ARG_INOUT);
1029    const uint32_t expected[] = {
1030       0, 1, 2, 2, 4, 32
1031    };
1032    run_shader(kernel_source, inout.size(), 1, 1, inout);
1033    for (int i = 0; i < inout.size(); ++i)
1034       EXPECT_EQ(inout[i], expected[i]);
1035 }
1036 
TEST_F(ComputeTest,hadd)1037 TEST_F(ComputeTest, hadd)
1038 {
1039    const char *kernel_source =
1040    "__kernel void main_test(__global uint *inout)\n\
1041    {\n\
1042        inout[get_global_id(0)] = hadd(inout[get_global_id(0)], 1u << 31);\n\
1043    }\n";
1044    auto inout = ShaderArg<uint32_t>({ 0, 1, 2, 3, 0xfffffffc, 0xfffffffd, 0xfffffffe, 0xffffffff },
1045                                     SHADER_ARG_INOUT);
1046    const uint32_t expected[] = {
1047       (1u << 31) >> 1,
1048       ((1u << 31) + 1) >> 1,
1049       ((1u << 31) + 2) >> 1,
1050       ((1u << 31) + 3) >> 1,
1051       ((1ull << 31) + 0xfffffffc) >> 1,
1052       ((1ull << 31) + 0xfffffffd) >> 1,
1053       ((1ull << 31) + 0xfffffffe) >> 1,
1054       ((1ull << 31) + 0xffffffff) >> 1,
1055    };
1056    run_shader(kernel_source, inout.size(), 1, 1, inout);
1057    for (int i = 0; i < inout.size(); ++i)
1058       EXPECT_EQ(inout[i], expected[i]);
1059 }
1060 
TEST_F(ComputeTest,rhadd)1061 TEST_F(ComputeTest, rhadd)
1062 {
1063    const char *kernel_source =
1064    "__kernel void main_test(__global uint *inout)\n\
1065    {\n\
1066        inout[get_global_id(0)] = rhadd(inout[get_global_id(0)], 1u << 31);\n\
1067    }\n";
1068    auto inout = ShaderArg<uint32_t>({ 0, 1, 2, 3, 0xfffffffc, 0xfffffffd, 0xfffffffe, 0xffffffff },
1069                                     SHADER_ARG_INOUT);
1070    const uint32_t expected[] = {
1071       ((1u << 31) + 1) >> 1,
1072       ((1u << 31) + 2) >> 1,
1073       ((1u << 31) + 3) >> 1,
1074       ((1u << 31) + 4) >> 1,
1075       ((1ull << 31) + 0xfffffffd) >> 1,
1076       ((1ull << 31) + 0xfffffffe) >> 1,
1077       ((1ull << 31) + 0xffffffff) >> 1,
1078       ((1ull << 31) + (1ull << 32)) >> 1,
1079    };
1080    run_shader(kernel_source, inout.size(), 1, 1, inout);
1081    for (int i = 0; i < inout.size(); ++i)
1082       EXPECT_EQ(inout[i], expected[i]);
1083 }
1084 
TEST_F(ComputeTest,add_sat)1085 TEST_F(ComputeTest, add_sat)
1086 {
1087    const char *kernel_source =
1088    "__kernel void main_test(__global uint *inout)\n\
1089    {\n\
1090        inout[get_global_id(0)] = add_sat(inout[get_global_id(0)], 2u);\n\
1091    }\n";
1092    auto inout = ShaderArg<uint32_t>({ 0xffffffff - 3, 0xffffffff - 2, 0xffffffff - 1, 0xffffffff },
1093                                     SHADER_ARG_INOUT);
1094    const uint32_t expected[] = {
1095       0xffffffff - 1, 0xffffffff, 0xffffffff, 0xffffffff
1096    };
1097    run_shader(kernel_source, inout.size(), 1, 1, inout);
1098    for (int i = 0; i < inout.size(); ++i)
1099       EXPECT_EQ(inout[i], expected[i]);
1100 }
1101 
TEST_F(ComputeTest,sub_sat)1102 TEST_F(ComputeTest, sub_sat)
1103 {
1104    const char *kernel_source =
1105    "__kernel void main_test(__global uint *inout)\n\
1106    {\n\
1107        inout[get_global_id(0)] = sub_sat(inout[get_global_id(0)], 2u);\n\
1108    }\n";
1109    auto inout = ShaderArg<uint32_t>({ 0, 1, 2, 3 }, SHADER_ARG_INOUT);
1110    const uint32_t expected[] = {
1111       0, 0, 0, 1
1112    };
1113    run_shader(kernel_source, inout.size(), 1, 1, inout);
1114    for (int i = 0; i < inout.size(); ++i)
1115       EXPECT_EQ(inout[i], expected[i]);
1116 }
1117 
TEST_F(ComputeTest,mul_hi)1118 TEST_F(ComputeTest, mul_hi)
1119 {
1120    const char *kernel_source =
1121    "__kernel void main_test(__global uint *inout)\n\
1122    {\n\
1123        inout[get_global_id(0)] = mul_hi(inout[get_global_id(0)], 1u << 31);\n\
1124    }\n";
1125    auto inout = ShaderArg<uint32_t>({ 0, 1, 2, 3, (1u << 31) }, SHADER_ARG_INOUT);
1126    const uint32_t expected[] = {
1127       0, 0, 1, 1, (1u << 30)
1128    };
1129    run_shader(kernel_source, inout.size(), 1, 1, inout);
1130    for (int i = 0; i < inout.size(); ++i)
1131       EXPECT_EQ(inout[i], expected[i]);
1132 }
1133 
TEST_F(ComputeTest,ldexp_x)1134 TEST_F(ComputeTest, ldexp_x)
1135 {
1136    const char *kernel_source =
1137    "__kernel void main_test(__global float *inout)\n\
1138    {\n\
1139        inout[get_global_id(0)] = ldexp(inout[get_global_id(0)], 5);\n\
1140    }\n";
1141    auto inout = ShaderArg<float>({ 0.0f, 0.5f, 1.0f, 2.0f }, SHADER_ARG_INOUT);
1142    const float expected[] = {
1143       ldexp(0.0f, 5), ldexp(0.5f, 5), ldexp(1.0f, 5), ldexp(2.0f, 5)
1144    };
1145    run_shader(kernel_source, inout.size(), 1, 1, inout);
1146    for (int i = 0; i < inout.size(); ++i)
1147       EXPECT_FLOAT_EQ(inout[i], expected[i]);
1148 }
1149 
TEST_F(ComputeTest,ldexp_y)1150 TEST_F(ComputeTest, ldexp_y)
1151 {
1152    const char *kernel_source =
1153    "__kernel void main_test(__global float *inout)\n\
1154    {\n\
1155        inout[get_global_id(0)] = ldexp(inout[get_global_id(0)], get_global_id(0));\n\
1156    }\n";
1157    auto inout = ShaderArg<float>({ 0.25f, 0.5f, 0.75f, 1.0f }, SHADER_ARG_INOUT);
1158    const float expected[] = {
1159       ldexp(0.25f, 0), ldexp(0.5f, 1), ldexp(0.75f, 2), ldexp(1.0f, 3)
1160    };
1161    run_shader(kernel_source, inout.size(), 1, 1, inout);
1162    for (int i = 0; i < inout.size(); ++i)
1163       EXPECT_FLOAT_EQ(inout[i], expected[i]);
1164 }
1165 
TEST_F(ComputeTest,frexp_ret)1166 TEST_F(ComputeTest, frexp_ret)
1167 {
1168    const char *kernel_source =
1169    "__kernel void main_test(__global float *inout)\n\
1170    {\n\
1171        int exp;\n\
1172        inout[get_global_id(0)] = frexp(inout[get_global_id(0)], &exp);\n\
1173    }\n";
1174    auto inout = ShaderArg<float>({ 0.0f, 0.5f, 1.0f, 3.0f }, SHADER_ARG_INOUT);
1175    const float expected[] = {
1176       0.0f, 0.5f, 0.5f, 0.75f
1177    };
1178    run_shader(kernel_source, inout.size(), 1, 1, inout);
1179    for (int i = 0; i < inout.size(); ++i)
1180       EXPECT_FLOAT_EQ(inout[i], expected[i]);
1181 }
1182 
TEST_F(ComputeTest,frexp_exp)1183 TEST_F(ComputeTest, frexp_exp)
1184 {
1185    const char *kernel_source =
1186    "__kernel void main_test(__global float *inout)\n\
1187    {\n\
1188        int exp;\n\
1189        frexp(inout[get_global_id(0)], &exp);\n\
1190        inout[get_global_id(0)] = (float)exp;\n\
1191    }\n";
1192    auto inout = ShaderArg<float>({ 0.0f, 0.5f, 1.0f, 3.0f }, SHADER_ARG_INOUT);
1193    const float expected[] = {
1194       0.0f, 0.0f, 1.0f, 2.0f
1195    };
1196    run_shader(kernel_source, inout.size(), 1, 1, inout);
1197    for (int i = 0; i < inout.size(); ++i)
1198       EXPECT_FLOAT_EQ(inout[i], expected[i]);
1199 }
1200 
TEST_F(ComputeTest,clz)1201 TEST_F(ComputeTest, clz)
1202 {
1203    const char *kernel_source =
1204    "__kernel void main_test(__global uint *inout)\n\
1205    {\n\
1206        inout[get_global_id(0)] = clz(inout[get_global_id(0)]);\n\
1207    }\n";
1208    auto inout = ShaderArg<uint32_t>({ 0, 1, 0xffff,  (1u << 30), (1u << 31) }, SHADER_ARG_INOUT);
1209    const uint32_t expected[] = {
1210       32, 31, 16, 1, 0
1211    };
1212    run_shader(kernel_source, inout.size(), 1, 1, inout);
1213    for (int i = 0; i < inout.size(); ++i)
1214       EXPECT_FLOAT_EQ(inout[i], expected[i]);
1215 }
1216 
TEST_F(ComputeTest,sin)1217 TEST_F(ComputeTest, sin)
1218 {
1219    struct sin_vals { float in; float clc; float native; };
1220    const char *kernel_source =
1221    "struct sin_vals { float in; float clc; float native; };\n\
1222    __kernel void main_test(__global struct sin_vals *inout)\n\
1223    {\n\
1224        inout[get_global_id(0)].clc = sin(inout[get_global_id(0)].in);\n\
1225        inout[get_global_id(0)].native = native_sin(inout[get_global_id(0)].in);\n\
1226    }\n";
1227    const vector<sin_vals> input = {
1228       { 0.0f, 0.0f, 0.0f },
1229       { 1.0f, 0.0f, 0.0f },
1230       { 2.0f, 0.0f, 0.0f },
1231       { 3.0f, 0.0f, 0.0f },
1232    };
1233    auto inout = ShaderArg<sin_vals>(input, SHADER_ARG_INOUT);
1234    const struct sin_vals expected[] = {
1235       { 0.0f, 0.0f,       0.0f       },
1236       { 1.0f, sin(1.0f), sin(1.0f) },
1237       { 2.0f, sin(2.0f), sin(2.0f) },
1238       { 3.0f, sin(3.0f), sin(3.0f) },
1239    };
1240    run_shader(kernel_source, inout.size(), 1, 1, inout);
1241    for (int i = 0; i < inout.size(); ++i) {
1242       EXPECT_FLOAT_EQ(inout[i].in, inout[i].in);
1243       EXPECT_FLOAT_EQ(inout[i].clc, inout[i].clc);
1244       EXPECT_NEAR(inout[i].clc, inout[i].native, 0.008f); // range from DXIL spec
1245    }
1246 }
1247 
TEST_F(ComputeTest,cosh)1248 TEST_F(ComputeTest, cosh)
1249 {
1250    const char *kernel_source =
1251    "__kernel void main_test(__global float *inout)\n\
1252    {\n\
1253        inout[get_global_id(0)] = cosh(inout[get_global_id(0)]);\n\
1254    }\n";
1255    auto inout = ShaderArg<float>({ 0.0f, 1.0f, 2.0f, 3.0f }, SHADER_ARG_INOUT);
1256    const float expected[] = {
1257       cosh(0.0f), cosh(1.0f), cosh(2.0f), cosh(3.0f)
1258    };
1259    run_shader(kernel_source, inout.size(), 1, 1, inout);
1260    for (int i = 0; i < inout.size(); ++i)
1261       EXPECT_FLOAT_EQ(inout[i], expected[i]);
1262 }
1263 
TEST_F(ComputeTest,exp)1264 TEST_F(ComputeTest, exp)
1265 {
1266    const char *kernel_source =
1267    "__kernel void main_test(__global float *inout)\n\
1268    {\n\
1269        inout[get_global_id(0)] = native_exp(inout[get_global_id(0)]);\n\
1270    }\n";
1271    auto inout = ShaderArg<float>({ 0.0f, 1.0f, 2.0f, 3.0f }, SHADER_ARG_INOUT);
1272    const float expected[] = {
1273       exp(0.0f), exp(1.0f), exp(2.0f), exp(3.0f)
1274    };
1275    run_shader(kernel_source, inout.size(), 1, 1, inout);
1276    for (int i = 0; i < inout.size(); ++i)
1277       EXPECT_FLOAT_EQ(inout[i], expected[i]);
1278 }
1279 
TEST_F(ComputeTest,exp10)1280 TEST_F(ComputeTest, exp10)
1281 {
1282    const char *kernel_source =
1283    "__kernel void main_test(__global float *inout)\n\
1284    {\n\
1285        inout[get_global_id(0)] = native_exp10(inout[get_global_id(0)]);\n\
1286    }\n";
1287    auto inout = ShaderArg<float>({ 0.0f, 1.0f, 2.0f, 3.0f }, SHADER_ARG_INOUT);
1288    const float expected[] = {
1289       pow(10.0f, 0.0f), pow(10.0f, 1.0f), pow(10.0f, 2.0f), pow(10.0f, 3.0f)
1290    };
1291    run_shader(kernel_source, inout.size(), 1, 1, inout);
1292    for (int i = 0; i < inout.size(); ++i)
1293       EXPECT_FLOAT_EQ(inout[i], expected[i]);
1294 }
1295 
TEST_F(ComputeTest,exp2)1296 TEST_F(ComputeTest, exp2)
1297 {
1298    const char *kernel_source =
1299    "__kernel void main_test(__global float *inout)\n\
1300    {\n\
1301        inout[get_global_id(0)] = native_exp2(inout[get_global_id(0)]);\n\
1302    }\n";
1303    auto inout = ShaderArg<float>({ 0.0f, 1.0f, 2.0f, 3.0f }, SHADER_ARG_INOUT);
1304    const float expected[] = {
1305       pow(2.0f, 0.0f), pow(2.0f, 1.0f), pow(2.0f, 2.0f), pow(2.0f, 3.0f)
1306    };
1307    run_shader(kernel_source, inout.size(), 1, 1, inout);
1308    for (int i = 0; i < inout.size(); ++i)
1309       EXPECT_FLOAT_EQ(inout[i], expected[i]);
1310 }
1311 
TEST_F(ComputeTest,log)1312 TEST_F(ComputeTest, log)
1313 {
1314    const char *kernel_source =
1315    "__kernel void main_test(__global float *inout)\n\
1316    {\n\
1317        inout[get_global_id(0)] = native_log(inout[get_global_id(0)]);\n\
1318    }\n";
1319    auto inout = ShaderArg<float>({ 0.0f, 1.0f, 2.0f, 3.0f }, SHADER_ARG_INOUT);
1320    const float expected[] = {
1321       log(0.0f), log(1.0f), log(2.0f), log(3.0f)
1322    };
1323    run_shader(kernel_source, inout.size(), 1, 1, inout);
1324    for (int i = 0; i < inout.size(); ++i)
1325       EXPECT_FLOAT_EQ(inout[i], expected[i]);
1326 }
1327 
TEST_F(ComputeTest,log10)1328 TEST_F(ComputeTest, log10)
1329 {
1330    const char *kernel_source =
1331    "__kernel void main_test(__global float *inout)\n\
1332    {\n\
1333        inout[get_global_id(0)] = native_log10(inout[get_global_id(0)]);\n\
1334    }\n";
1335    auto inout = ShaderArg<float>({ 0.0f, 1.0f, 2.0f, 3.0f }, SHADER_ARG_INOUT);
1336    const float expected[] = {
1337       log10(0.0f), log10(1.0f), log10(2.0f), log10(3.0f)
1338    };
1339    run_shader(kernel_source, inout.size(), 1, 1, inout);
1340    for (int i = 0; i < inout.size(); ++i)
1341       EXPECT_FLOAT_EQ(inout[i], expected[i]);
1342 }
1343 
TEST_F(ComputeTest,log2)1344 TEST_F(ComputeTest, log2)
1345 {
1346    const char *kernel_source =
1347    "__kernel void main_test(__global float *inout)\n\
1348    {\n\
1349        inout[get_global_id(0)] = native_log2(inout[get_global_id(0)]);\n\
1350    }\n";
1351    auto inout = ShaderArg<float>({ 0.0f, 1.0f, 2.0f, 3.0f }, SHADER_ARG_INOUT);
1352    const float expected[] = {
1353       log(0.0f) / log(2.0f), log(1.0f) / log(2.0f), log(2.0f) / log(2.0f), log(3.0f) / log(2.0f)
1354    };
1355    run_shader(kernel_source, inout.size(), 1, 1, inout);
1356    for (int i = 0; i < inout.size(); ++i)
1357       EXPECT_FLOAT_EQ(inout[i], expected[i]);
1358 }
1359 
TEST_F(ComputeTest,rint)1360 TEST_F(ComputeTest, rint)
1361 {
1362    const char *kernel_source =
1363    "__kernel void main_test(__global float *inout)\n\
1364    {\n\
1365       inout[get_global_id(0)] = rint(inout[get_global_id(0)]);\n\
1366    }\n";
1367 
1368    auto inout = ShaderArg<float>({ 0.5f, 1.5f, -0.5f, -1.5f, 1.4f }, SHADER_ARG_INOUT);
1369    const float expected[] = {
1370       0.0f, 2.0f, 0.0f, -2.0f, 1.0f,
1371    };
1372    run_shader(kernel_source, inout.size(), 1, 1, inout);
1373    for (int i = 0; i < inout.size(); ++i)
1374       EXPECT_FLOAT_EQ(inout[i], expected[i]);
1375 }
1376 
TEST_F(ComputeTest,round)1377 TEST_F(ComputeTest, round)
1378 {
1379    const char *kernel_source =
1380    "__kernel void main_test(__global float *inout)\n\
1381    {\n\
1382        inout[get_global_id(0)] = round(inout[get_global_id(0)]);\n\
1383    }\n";
1384    auto inout = ShaderArg<float>({ 0, 0.3f, -0.3f, 0.5f, -0.5f, 1.1f, -1.1f },
1385                                  SHADER_ARG_INOUT);
1386    const float expected[] = {
1387       0.0f, 0.0f, -0.0f, 1.0f, -1.0f, 1.0f, -1.0f
1388    };
1389    run_shader(kernel_source, inout.size(), 1, 1, inout);
1390    for (int i = 0; i < inout.size(); ++i)
1391       EXPECT_FLOAT_EQ(inout[i], expected[i]);
1392 }
1393 
TEST_F(ComputeTest,arg_by_val)1394 TEST_F(ComputeTest, arg_by_val)
1395 {
1396    const char *kernel_source =
1397    "__kernel void main_test(__global float *inout, float mul)\n\
1398    {\n\
1399        inout[get_global_id(0)] = inout[get_global_id(0)] * mul;\n\
1400    }\n";
1401    auto inout = ShaderArg<float>({ 0, 0.3f, -0.3f, 0.5f, -0.5f, 1.1f, -1.1f },
1402                                  SHADER_ARG_INOUT);
1403    auto mul = ShaderArg<float>(10.0f, SHADER_ARG_INPUT);
1404    const float expected[] = {
1405       0.0f, 3.0f, -3.0f, 5.0f, -5.0f, 11.0f, -11.0f
1406    };
1407    run_shader(kernel_source, inout.size(), 1, 1, inout, mul);
1408    for (int i = 0; i < inout.size(); ++i)
1409       EXPECT_FLOAT_EQ(inout[i], expected[i]);
1410 }
1411 
TEST_F(ComputeTest,uint8_by_val)1412 TEST_F(ComputeTest, uint8_by_val)
1413 {
1414    struct uint8 {
1415       uint32_t s0; uint32_t s1; uint32_t s2; uint32_t s3;
1416       uint32_t s4; uint32_t s5; uint32_t s6; uint32_t s7;
1417    };
1418    const char *kernel_source =
1419    "__kernel void main_test(__global uint *out, uint8 val)\n\
1420    {\n\
1421        out[get_global_id(0)] = val.s0 + val.s1 + val.s2 + val.s3 +\n\
1422                                val.s4 + val.s5 + val.s6 + val.s7;\n\
1423    }\n";
1424    auto out = ShaderArg<uint32_t>({ 0 }, SHADER_ARG_OUTPUT);
1425    auto val = ShaderArg<struct uint8>({ {0, 1, 2, 3, 4, 5, 6, 7 }}, SHADER_ARG_INPUT);
1426    const uint32_t expected[] = { 0 + 1 + 2 + 3 + 4 + 5 + 6 + 7 };
1427    run_shader(kernel_source, out.size(), 1, 1, out, val);
1428    for (int i = 0; i < out.size(); ++i)
1429       EXPECT_EQ(out[i], expected[i]);
1430 }
1431 
TEST_F(ComputeTest,link)1432 TEST_F(ComputeTest, link)
1433 {
1434    const char *foo_src =
1435    "float foo(float in)\n\
1436    {\n\
1437        return in * in;\n\
1438    }\n";
1439    const char *kernel_source =
1440    "float foo(float in);\n\
1441    __kernel void main_test(__global float *inout)\n\
1442    {\n\
1443        inout[get_global_id(0)] = foo(inout[get_global_id(0)]);\n\
1444    }\n";
1445    std::vector<const char *> srcs = { foo_src, kernel_source };
1446    auto inout = ShaderArg<float>({ 2.0f }, SHADER_ARG_INOUT);
1447    const float expected[] = {
1448       4.0f,
1449    };
1450    run_shader(srcs, inout.size(), 1, 1, inout);
1451    for (int i = 0; i < inout.size(); ++i)
1452       EXPECT_EQ(inout[i], expected[i]);
1453 }
1454 
TEST_F(ComputeTest,link_library)1455 TEST_F(ComputeTest, link_library)
1456 {
1457    const char *bar_src =
1458    "float bar(float in)\n\
1459    {\n\
1460       return in * 5;\n\
1461    }\n";
1462    const char *foo_src =
1463    "float bar(float in);\n\
1464    float foo(float in)\n\
1465    {\n\
1466        return in * bar(in);\n\
1467    }\n";
1468    const char *kernel_source =
1469    "float foo(float in);\n\
1470    __kernel void main_test(__global float *inout)\n\
1471    {\n\
1472        inout[get_global_id(0)] = foo(inout[get_global_id(0)]);\n\
1473    }\n";
1474    std::vector<Shader> libraries = {
1475       compile({ bar_src, kernel_source }, {}, true),
1476       compile({ foo_src }, {}, true)
1477    };
1478    Shader exe = link(libraries);
1479    auto inout = ShaderArg<float>({ 2.0f }, SHADER_ARG_INOUT);
1480    const float expected[] = {
1481       20.0f,
1482    };
1483    run_shader(exe, { (unsigned)inout.size(), 1, 1 }, inout);
1484    for (int i = 0; i < inout.size(); ++i)
1485       EXPECT_EQ(inout[i], expected[i]);
1486 }
1487 
TEST_F(ComputeTest,localvar)1488 TEST_F(ComputeTest, localvar)
1489 {
1490    const char *kernel_source =
1491    "__kernel __attribute__((reqd_work_group_size(2, 1, 1)))\n\
1492    void main_test(__global float *inout)\n\
1493    {\n\
1494       __local float2 tmp[2];\n\
1495       tmp[get_local_id(0)].x = inout[get_global_id(0)] + 1;\n\
1496       tmp[get_local_id(0)].y = inout[get_global_id(0)] - 1;\n\
1497       barrier(CLK_LOCAL_MEM_FENCE);\n\
1498       inout[get_global_id(0)] = tmp[get_local_id(0) % 2].x * tmp[(get_local_id(0) + 1) % 2].y;\n\
1499    }\n";
1500 
1501    auto inout = ShaderArg<float>({ 2.0f, 4.0f }, SHADER_ARG_INOUT);
1502    const float expected[] = {
1503       9.0f, 5.0f
1504    };
1505    run_shader(kernel_source, inout.size(), 1, 1, inout);
1506    for (int i = 0; i < inout.size(); ++i)
1507       EXPECT_EQ(inout[i], expected[i]);
1508 }
1509 
TEST_F(ComputeTest,localvar_uchar2)1510 TEST_F(ComputeTest, localvar_uchar2)
1511 {
1512    const char *kernel_source =
1513    "__attribute__((reqd_work_group_size(2, 1, 1)))\n\
1514    __kernel void main_test(__global uchar *inout)\n\
1515    {\n\
1516       __local uchar2 tmp[2];\n\
1517       tmp[get_local_id(0)].x = inout[get_global_id(0)] + 1;\n\
1518       tmp[get_local_id(0)].y = inout[get_global_id(0)] - 1;\n\
1519       barrier(CLK_LOCAL_MEM_FENCE);\n\
1520       inout[get_global_id(0)] = tmp[get_local_id(0) % 2].x * tmp[(get_local_id(0) + 1) % 2].y;\n\
1521    }\n";
1522 
1523    auto inout = ShaderArg<uint8_t>({ 2, 4 }, SHADER_ARG_INOUT);
1524    const uint8_t expected[] = { 9, 5 };
1525    run_shader(kernel_source, inout.size(), 1, 1, inout);
1526    for (int i = 0; i < inout.size(); ++i)
1527       EXPECT_EQ(inout[i], expected[i]);
1528 }
1529 
TEST_F(ComputeTest,work_group_size_hint)1530 TEST_F(ComputeTest, work_group_size_hint)
1531 {
1532    const char *kernel_source =
1533    "__attribute__((work_group_size_hint(2, 1, 1)))\n\
1534    __kernel void main_test(__global uint *output)\n\
1535    {\n\
1536        output[get_global_id(0)] = get_local_id(0);\n\
1537    }\n";
1538    auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
1539                                      SHADER_ARG_OUTPUT);
1540    const uint32_t expected[] = {
1541       0, 1, 2, 3
1542    };
1543    run_shader(kernel_source, output.size(), 1, 1, output);
1544    for (int i = 0; i < output.size(); ++i)
1545       EXPECT_EQ(output[i], expected[i]);
1546 }
1547 
TEST_F(ComputeTest,reqd_work_group_size)1548 TEST_F(ComputeTest, reqd_work_group_size)
1549 {
1550    const char *kernel_source =
1551    "__attribute__((reqd_work_group_size(2, 1, 1)))\n\
1552    __kernel void main_test(__global uint *output)\n\
1553    {\n\
1554        output[get_global_id(0)] = get_local_id(0);\n\
1555    }\n";
1556    auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
1557                                      SHADER_ARG_OUTPUT);
1558    const uint32_t expected[] = {
1559       0, 1, 0, 1
1560    };
1561    run_shader(kernel_source, output.size(), 1, 1, output);
1562    for (int i = 0; i < output.size(); ++i)
1563       EXPECT_EQ(output[i], expected[i]);
1564 }
1565 
TEST_F(ComputeTest,image)1566 TEST_F(ComputeTest, image)
1567 {
1568    const char* kernel_source =
1569    "__kernel void main_test(read_only image2d_t input, write_only image2d_t output)\n\
1570    {\n\
1571       int2 coords = (int2)(get_global_id(0), get_global_id(1));\n\
1572       write_imagef(output, coords, read_imagef(input, coords));\n\
1573    }\n";
1574    Shader shader = compile(std::vector<const char*>({ kernel_source }));
1575    validate(shader);
1576 }
1577 
TEST_F(ComputeTest,image_two_reads)1578 TEST_F(ComputeTest, image_two_reads)
1579 {
1580    // Note: unnecessary control flow is present so that nir_opt_dead_cf kicks in, causing
1581    // nir_rematerialize_derefs_in_use_blocks to run. The duplicated uses ensure that the
1582    // per-var-deref processing works correctly.
1583    const char* kernel_source =
1584    R"(__kernel void main_test(image2d_t image, int is_float, __global float* output)
1585    {
1586       int x = get_global_id(0);
1587       if (is_float)
1588          x = get_global_id(0);
1589       if (is_float)
1590          output[x] = read_imagef(image, (int2)(0, 0)).x;
1591       else
1592          output[x] = (float)read_imagei(image, (int2)(0, 0)).x;
1593       if (is_float)
1594          output[x] = read_imagef(image, (int2)(0, 0)).x;
1595       else
1596          output[x] = (float)read_imagei(image, (int2)(0, 0)).x;
1597    })";
1598    Shader shader = compile(std::vector<const char*>({ kernel_source }));
1599    validate(shader);
1600 }
1601 
TEST_F(ComputeTest,image_unused)1602 TEST_F(ComputeTest, image_unused)
1603 {
1604    const char* kernel_source =
1605    "__kernel void main_test(read_only image2d_t input, write_only image2d_t output)\n\
1606    {\n\
1607    }\n";
1608    Shader shader = compile(std::vector<const char*>({ kernel_source }));
1609    validate(shader);
1610 }
1611 
TEST_F(ComputeTest,image_read_write)1612 TEST_F(ComputeTest, image_read_write)
1613 {
1614    const char *kernel_source =
1615    R"(__kernel void main_test(read_write image2d_t image)
1616    {
1617       int2 coords = (int2)(get_global_id(0), get_global_id(1));
1618       write_imagef(image, coords, read_imagef(image, coords) + (float4)(1.0f, 1.0f, 1.0f, 1.0f));
1619    })";
1620    Shader shader = compile(std::vector<const char*>({ kernel_source }), { "-cl-std=cl3.0" });
1621    validate(shader);
1622 }
1623 
TEST_F(ComputeTest,sampler)1624 TEST_F(ComputeTest, sampler)
1625 {
1626    const char* kernel_source =
1627    "__kernel void main_test(image2d_t image, sampler_t sampler, __global float* output)\n\
1628    {\n\
1629       output[get_global_id(0)] = read_imagef(image, sampler, (int2)(0, 0)).x;\n\
1630    }\n";
1631    Shader shader = compile(std::vector<const char*>({ kernel_source }));
1632    validate(shader);
1633 }
1634 
TEST_F(ComputeTest,image_dims)1635 TEST_F(ComputeTest, image_dims)
1636 {
1637    const char* kernel_source =
1638    "__kernel void main_test(image2d_t roimage, write_only image2d_t woimage, __global uint* output)\n\
1639    {\n\
1640       output[get_global_id(0)] = get_image_width(roimage);\n\
1641       output[get_global_id(0) + 1] = get_image_width(woimage);\n\
1642    }\n";
1643    Shader shader = compile(std::vector<const char*>({ kernel_source }));
1644    validate(shader);
1645 }
1646 
TEST_F(ComputeTest,image_format)1647 TEST_F(ComputeTest, image_format)
1648 {
1649    const char* kernel_source =
1650    "__kernel void main_test(image2d_t roimage, write_only image2d_t woimage, __global uint* output)\n\
1651    {\n\
1652       output[get_global_id(0)] = get_image_channel_data_type(roimage);\n\
1653       output[get_global_id(0) + 1] = get_image_channel_order(woimage);\n\
1654    }\n";
1655    Shader shader = compile(std::vector<const char*>({ kernel_source }));
1656    validate(shader);
1657 }
1658 
TEST_F(ComputeTest,image1d_buffer_t)1659 TEST_F(ComputeTest, image1d_buffer_t)
1660 {
1661    const char* kernel_source =
1662    "__kernel void main_test(read_only image1d_buffer_t input, write_only image1d_buffer_t output)\n\
1663    {\n\
1664       write_imageui(output, get_global_id(0), read_imageui(input, get_global_id(0)));\n\
1665    }\n";
1666    Shader shader = compile(std::vector<const char*>({ kernel_source }));
1667    validate(shader);
1668 }
1669 
TEST_F(ComputeTest,local_ptr)1670 TEST_F(ComputeTest, local_ptr)
1671 {
1672    struct uint2 { uint32_t x, y; };
1673    const char *kernel_source =
1674    "__kernel void main_test(__global uint *inout, __local uint2 *tmp)\n\
1675    {\n\
1676       tmp[get_local_id(0)].x = inout[get_global_id(0)] + 1;\n\
1677       tmp[get_local_id(0)].y = inout[get_global_id(0)] - 1;\n\
1678       barrier(CLK_LOCAL_MEM_FENCE);\n\
1679       inout[get_global_id(0)] = tmp[get_local_id(0) % 2].x * tmp[(get_local_id(0) + 1) % 2].y;\n\
1680    }\n";
1681    auto inout = ShaderArg<uint32_t>({ 2, 4 }, SHADER_ARG_INOUT);
1682    auto tmp = ShaderArg<struct uint2>(std::vector<struct uint2>(4096), SHADER_ARG_INPUT);
1683    const uint8_t expected[] = { 9, 5 };
1684    run_shader(kernel_source, inout.size(), 1, 1, inout, tmp);
1685    for (int i = 0; i < inout.size(); ++i)
1686       EXPECT_EQ(inout[i], expected[i]);
1687 }
1688 
TEST_F(ComputeTest,two_local_ptrs)1689 TEST_F(ComputeTest, two_local_ptrs)
1690 {
1691    struct uint2 { uint32_t x, y; };
1692    const char *kernel_source =
1693    "__kernel void main_test(__global uint *inout, __local uint2 *tmp, __local uint *tmp2)\n\
1694    {\n\
1695       tmp[get_local_id(0)].x = inout[get_global_id(0)] + 1;\n\
1696       tmp[get_local_id(0)].y = inout[get_global_id(0)] - 1;\n\
1697       tmp2[get_local_id(0)] = get_global_id(0);\n\
1698       barrier(CLK_LOCAL_MEM_FENCE);\n\
1699       inout[get_global_id(0)] = tmp[get_local_id(0) % 2].x * tmp[(get_local_id(0) + 1) % 2].y + tmp2[get_local_id(0) % 2];\n\
1700    }\n";
1701    auto inout = ShaderArg<uint32_t>({ 2, 4 }, SHADER_ARG_INOUT);
1702    auto tmp = ShaderArg<struct uint2>(std::vector<struct uint2>(1024), SHADER_ARG_INPUT);
1703    auto tmp2 = ShaderArg<uint32_t>(std::vector<uint32_t>(1024), SHADER_ARG_INPUT);
1704    const uint8_t expected[] = { 9, 6 };
1705    run_shader(kernel_source, inout.size(), 1, 1, inout, tmp, tmp2);
1706    for (int i = 0; i < inout.size(); ++i)
1707       EXPECT_EQ(inout[i], expected[i]);
1708 }
1709 
TEST_F(ComputeTest,int8_to_float)1710 TEST_F(ComputeTest, int8_to_float)
1711 {
1712    const char *kernel_source =
1713    "__kernel void main_test(__global char* in, __global float* out)\n\
1714    {\n\
1715       uint pos = get_global_id(0);\n\
1716       out[pos] = in[pos] / 100.0f;\n\
1717    }";
1718    auto in = ShaderArg<char>({ 10, 20, 30, 40 }, SHADER_ARG_INPUT);
1719    auto out = ShaderArg<float>(std::vector<float>(4, std::numeric_limits<float>::infinity()), SHADER_ARG_OUTPUT);
1720    const float expected[] = { 0.1f, 0.2f, 0.3f, 0.4f };
1721    run_shader(kernel_source, in.size(), 1, 1, in, out);
1722    for (int i = 0; i < in.size(); ++i)
1723       EXPECT_FLOAT_EQ(out[i], expected[i]);
1724 }
1725 
TEST_F(ComputeTest,vec_hint_float4)1726 TEST_F(ComputeTest, vec_hint_float4)
1727 {
1728    const char *kernel_source =
1729    "__kernel __attribute__((vec_type_hint(float4))) void main_test(__global float *inout)\n\
1730    {\n\
1731       inout[get_global_id(0)] *= inout[get_global_id(1)];\n\
1732    }";
1733    Shader shader = compile({ kernel_source });
1734    EXPECT_EQ(shader.metadata->kernels[0].vec_hint_size, 4);
1735    EXPECT_EQ(shader.metadata->kernels[0].vec_hint_type, CLC_VEC_HINT_TYPE_FLOAT);
1736 }
1737 
TEST_F(ComputeTest,vec_hint_uchar2)1738 TEST_F(ComputeTest, vec_hint_uchar2)
1739 {
1740    const char *kernel_source =
1741    "__kernel __attribute__((vec_type_hint(uchar2))) void main_test(__global float *inout)\n\
1742    {\n\
1743       inout[get_global_id(0)] *= inout[get_global_id(1)];\n\
1744    }";
1745    Shader shader = compile({ kernel_source });
1746    EXPECT_EQ(shader.metadata->kernels[0].vec_hint_size, 2);
1747    EXPECT_EQ(shader.metadata->kernels[0].vec_hint_type, CLC_VEC_HINT_TYPE_CHAR);
1748 }
1749 
TEST_F(ComputeTest,vec_hint_none)1750 TEST_F(ComputeTest, vec_hint_none)
1751 {
1752    const char *kernel_source =
1753    "__kernel void main_test(__global float *inout)\n\
1754    {\n\
1755       inout[get_global_id(0)] *= inout[get_global_id(1)];\n\
1756    }";
1757    Shader shader = compile({ kernel_source });
1758    EXPECT_EQ(shader.metadata->kernels[0].vec_hint_size, 0);
1759 }
1760 
TEST_F(ComputeTest,DISABLED_debug_layer_failure)1761 TEST_F(ComputeTest, DISABLED_debug_layer_failure)
1762 {
1763    /* This is a negative test case, it intentionally triggers a failure to validate the mechanism
1764     * is in place, so other tests will fail if they produce debug messages
1765     */
1766    const char *kernel_source =
1767    "__kernel void main_test(__global float *inout, float mul)\n\
1768    {\n\
1769        inout[get_global_id(0)] = inout[get_global_id(0)] * mul;\n\
1770    }\n";
1771    auto inout = ShaderArg<float>({ 0, 0.3f, -0.3f, 0.5f, -0.5f, 1.1f, -1.1f },
1772                                  SHADER_ARG_INOUT);
1773    auto mul = ShaderArg<float>(10.0f, SHADER_ARG_INPUT);
1774    const float expected[] = {
1775       0.0f, 3.0f, -3.0f, 5.0f, -5.0f, 11.0f, -11.0f
1776    };
1777    ComPtr<ID3D12InfoQueue> info_queue;
1778    dev->QueryInterface(info_queue.ReleaseAndGetAddressOf());
1779    if (!info_queue) {
1780       GTEST_SKIP() << "No info queue";
1781       return;
1782    }
1783 
1784    info_queue->AddApplicationMessage(D3D12_MESSAGE_SEVERITY_ERROR, "This should cause the test to fail");
1785    run_shader(kernel_source, inout.size(), 1, 1, inout, mul);
1786    for (int i = 0; i < inout.size(); ++i)
1787       EXPECT_FLOAT_EQ(inout[i], expected[i]);
1788 }
1789 
TEST_F(ComputeTest,compiler_defines)1790 TEST_F(ComputeTest, compiler_defines)
1791 {
1792    const char *kernel_source =
1793       "__kernel void main_test(__global int* out)\n\
1794    {\n\
1795       out[0] = OUT_VAL0;\n\
1796       out[1] = __OPENCL_C_VERSION__;\n\
1797    }";
1798    auto out = ShaderArg<int>(std::vector<int>(2, 0), SHADER_ARG_OUTPUT);
1799    CompileArgs compile_args = { 1, 1, 1 };
1800    compile_args.compiler_command_line = { "-DOUT_VAL0=5", "-cl-std=cl" };
1801    std::vector<RawShaderArg *> raw_args = { &out };
1802    run_shader({ kernel_source }, compile_args, out);
1803    EXPECT_EQ(out[0], 5);
1804    EXPECT_EQ(out[1], 100);
1805 }
1806 
TEST_F(ComputeTest,global_atomic_add)1807 TEST_F(ComputeTest, global_atomic_add)
1808 {
1809    const char *kernel_source =
1810    "__kernel void main_test(__global int *inout, __global int *old)\n\
1811    {\n\
1812       old[get_global_id(0)] = atomic_add(inout + get_global_id(0), 3);\n\
1813    }\n";
1814    auto inout = ShaderArg<int32_t>({ 2, 4 }, SHADER_ARG_INOUT);
1815    auto old = ShaderArg<int32_t>(std::vector<int32_t>(2, 0xdeadbeef), SHADER_ARG_OUTPUT);
1816    const int32_t expected_inout[] = { 5, 7 };
1817    const int32_t expected_old[] = { 2, 4 };
1818    run_shader(kernel_source, inout.size(), 1, 1, inout, old);
1819    for (int i = 0; i < inout.size(); ++i) {
1820       EXPECT_EQ(inout[i], expected_inout[i]);
1821       EXPECT_EQ(old[i], expected_old[i]);
1822    }
1823 }
1824 
TEST_F(ComputeTest,global_atomic_imin)1825 TEST_F(ComputeTest, global_atomic_imin)
1826 {
1827    const char *kernel_source =
1828    "__kernel void main_test(__global int *inout, __global int *old)\n\
1829    {\n\
1830       old[get_global_id(0)] = atomic_min(inout + get_global_id(0), 1);\n\
1831    }\n";
1832    auto inout = ShaderArg<int32_t>({ 0, 2, -1 }, SHADER_ARG_INOUT);
1833    auto old = ShaderArg<int32_t>(std::vector<int32_t>(3, 0xdeadbeef), SHADER_ARG_OUTPUT);
1834    const int32_t expected_inout[] = { 0, 1, -1 };
1835    const int32_t expected_old[] = { 0, 2, -1 };
1836    run_shader(kernel_source, inout.size(), 1, 1, inout, old);
1837    for (int i = 0; i < inout.size(); ++i) {
1838       EXPECT_EQ(inout[i], expected_inout[i]);
1839       EXPECT_EQ(old[i], expected_old[i]);
1840    }
1841 }
1842 
TEST_F(ComputeTest,global_atomic_and_or)1843 TEST_F(ComputeTest, global_atomic_and_or)
1844 {
1845    const char *kernel_source =
1846    "__attribute__((reqd_work_group_size(3, 1, 1)))\n\
1847    __kernel void main_test(__global int *inout)\n\
1848    {\n\
1849       atomic_and(inout, ~(1 << get_global_id(0)));\n\
1850       atomic_or(inout, (1 << (get_global_id(0) + 4)));\n\
1851    }\n";
1852    auto inout = ShaderArg<int32_t>(0xf, SHADER_ARG_INOUT);
1853    const int32_t expected[] = { 0x78 };
1854    run_shader(kernel_source, 3, 1, 1, inout);
1855    for (int i = 0; i < inout.size(); ++i)
1856       EXPECT_EQ(inout[i], expected[i]);
1857 }
1858 
TEST_F(ComputeTest,global_atomic_cmpxchg)1859 TEST_F(ComputeTest, global_atomic_cmpxchg)
1860 {
1861    const char *kernel_source =
1862    "__attribute__((reqd_work_group_size(2, 1, 1)))\n\
1863    __kernel void main_test(__global int *inout)\n\
1864    {\n\
1865       while (atomic_cmpxchg(inout, get_global_id(0), get_global_id(0) + 1) != get_global_id(0))\n\
1866          ;\n\
1867    }\n";
1868    auto inout = ShaderArg<int32_t>(0, SHADER_ARG_INOUT);
1869    const int32_t expected_inout[] = { 2 };
1870    run_shader(kernel_source, 2, 1, 1, inout);
1871    for (int i = 0; i < inout.size(); ++i)
1872       EXPECT_EQ(inout[i], expected_inout[i]);
1873 }
1874 
TEST_F(ComputeTest,local_atomic_and_or)1875 TEST_F(ComputeTest, local_atomic_and_or)
1876 {
1877    const char *kernel_source =
1878    "__attribute__((reqd_work_group_size(2, 1, 1)))\n\
1879    __kernel void main_test(__global ushort *inout)\n\
1880    {\n\
1881       __local ushort tmp;\n\
1882       atomic_and(&tmp, ~(0xff << (get_global_id(0) * 8)));\n\
1883       atomic_or(&tmp, inout[get_global_id(0)] << (get_global_id(0) * 8));\n\
1884       barrier(CLK_LOCAL_MEM_FENCE);\n\
1885       inout[get_global_id(0)] = tmp;\n\
1886    }\n";
1887    auto inout = ShaderArg<uint16_t>({ 2, 4 }, SHADER_ARG_INOUT);
1888    const uint16_t expected[] = { 0x402, 0x402 };
1889    run_shader(kernel_source, inout.size(), 1, 1, inout);
1890    for (int i = 0; i < inout.size(); ++i)
1891       EXPECT_EQ(inout[i], expected[i]);
1892 }
1893 
TEST_F(ComputeTest,local_atomic_cmpxchg)1894 TEST_F(ComputeTest, local_atomic_cmpxchg)
1895 {
1896    const char *kernel_source =
1897    "__attribute__((reqd_work_group_size(2, 1, 1)))\n\
1898    __kernel void main_test(__global int *out)\n\
1899    {\n\
1900       __local uint tmp;\n\
1901       tmp = 0;\n\
1902       barrier(CLK_LOCAL_MEM_FENCE);\n\
1903       while (atomic_cmpxchg(&tmp, get_global_id(0), get_global_id(0) + 1) != get_global_id(0))\n\
1904          ;\n\
1905       barrier(CLK_LOCAL_MEM_FENCE);\n\
1906       out[0] = tmp;\n\
1907    }\n";
1908 
1909    auto out = ShaderArg<uint32_t>(0xdeadbeef, SHADER_ARG_OUTPUT);
1910    const uint16_t expected[] = { 2 };
1911    run_shader(kernel_source, 2, 1, 1, out);
1912    for (int i = 0; i < out.size(); ++i)
1913       EXPECT_EQ(out[i], expected[i]);
1914 }
1915 
TEST_F(ComputeTest,constant_sampler)1916 TEST_F(ComputeTest, constant_sampler)
1917 {
1918    const char* kernel_source =
1919    "__constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_LINEAR;\n\
1920    __kernel void main_test(read_only image2d_t input, write_only image2d_t output)\n\
1921    {\n\
1922       int2 coordsi = (int2)(get_global_id(0), get_global_id(1));\n\
1923       float2 coordsf = (float2)((float)coordsi.x / get_image_width(input), (float)coordsi.y / get_image_height(input));\n\
1924       write_imagef(output, coordsi, \n\
1925          read_imagef(input, sampler, coordsf) + \n\
1926          read_imagef(input, sampler, coordsf + (float2)(0.1, 0.1)));\n\
1927    }\n";
1928    Shader shader = compile(std::vector<const char*>({ kernel_source }));
1929    validate(shader);
1930    EXPECT_EQ(shader.dxil->metadata.num_const_samplers, 1);
1931 }
1932 
TEST_F(ComputeTest,hi)1933 TEST_F(ComputeTest, hi)
1934 {
1935    const char *kernel_source = R"(
1936    __kernel void main_test(__global char3 *srcA, __global char2 *dst)
1937    {
1938        int  tid = get_global_id(0);
1939 
1940        char2 tmp = srcA[tid].hi;
1941        dst[tid] = tmp;
1942    })";
1943    Shader shader = compile(std::vector<const char*>({ kernel_source }));
1944    validate(shader);
1945 }
1946 
TEST_F(ComputeTest,system_values)1947 TEST_F(ComputeTest, system_values)
1948 {
1949    const char *kernel_source =
1950    "__kernel void main_test(__global uint* outputs)\n\
1951    {\n\
1952       outputs[0] = get_work_dim();\n\
1953       outputs[1] = get_global_size(0);\n\
1954       outputs[2] = get_local_size(0);\n\
1955       outputs[3] = get_num_groups(0);\n\
1956       outputs[4] = get_group_id(0);\n\
1957       outputs[5] = get_global_offset(0);\n\
1958       outputs[6] = get_global_id(0);\n\
1959    }\n";
1960    auto out = ShaderArg<uint32_t>(std::vector<uint32_t>(6, 0xdeadbeef), SHADER_ARG_OUTPUT);
1961    const uint16_t expected[] = { 3, 1, 1, 1, 0, 0, 0, };
1962    CompileArgs args = { 1, 1, 1 };
1963    Shader shader = compile({ kernel_source });
1964    run_shader(shader, args, out);
1965    for (int i = 0; i < out.size(); ++i)
1966       EXPECT_EQ(out[i], expected[i]);
1967 
1968    args.work_props.work_dim = 2;
1969    args.work_props.global_offset_x = 100;
1970    args.work_props.group_id_offset_x = 2;
1971    args.work_props.group_count_total_x = 5;
1972    const uint32_t expected_withoffsets[] = { 2, 5, 1, 5, 2, 100, 102 };
1973    run_shader(shader, args, out);
1974    for (int i = 0; i < out.size(); ++i)
1975       EXPECT_EQ(out[i], expected_withoffsets[i]);
1976 }
1977 
TEST_F(ComputeTest,convert_round_sat)1978 TEST_F(ComputeTest, convert_round_sat)
1979 {
1980    const char *kernel_source =
1981    "__kernel void main_test(__global float *f, __global uchar *u)\n\
1982    {\n\
1983        uint idx = get_global_id(0);\n\
1984        u[idx] = convert_uchar_sat_rtp(f[idx]);\n\
1985    }\n";
1986    auto f = ShaderArg<float>({ -1.0f, 1.1f, 20.0f, 255.5f }, SHADER_ARG_INPUT);
1987    auto u = ShaderArg<uint8_t>({ 255, 0, 0, 0 }, SHADER_ARG_OUTPUT);
1988    const uint8_t expected[] = {
1989       0, 2, 20, 255
1990    };
1991 
1992    run_shader(kernel_source, f.size(), 1, 1, f, u);
1993    for (int i = 0; i < u.size(); ++i)
1994       EXPECT_EQ(u[i], expected[i]);
1995 }
1996 
TEST_F(ComputeTest,convert_round_sat_vec)1997 TEST_F(ComputeTest, convert_round_sat_vec)
1998 {
1999    const char *kernel_source =
2000    "__kernel void main_test(__global float16 *f, __global uchar16 *u)\n\
2001    {\n\
2002        uint idx = get_global_id(0);\n\
2003        u[idx] = convert_uchar16_sat_rtp(f[idx]);\n\
2004    }\n";
2005    auto f = ShaderArg<float>({
2006       -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f,
2007       -0.5f, 1.9f, 20.0f, 254.5f, -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f,
2008        0.0f, 1.3f, 20.0f, 255.1f, -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f,
2009       -0.0f, 1.5555f, 20.0f, 254.9f, -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f,
2010    }, SHADER_ARG_INPUT);
2011    auto u = ShaderArg<uint8_t>({
2012       255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0,
2013       255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0,
2014       255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0,
2015       255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0,
2016    }, SHADER_ARG_OUTPUT);
2017    const uint8_t expected[] = {
2018       0, 2, 20, 255, 0, 2, 20, 255, 0, 2, 20, 255, 0, 2, 20, 255,
2019       0, 2, 20, 255, 0, 2, 20, 255, 0, 2, 20, 255, 0, 2, 20, 255,
2020       0, 2, 20, 255, 0, 2, 20, 255, 0, 2, 20, 255, 0, 2, 20, 255,
2021       0, 2, 20, 255, 0, 2, 20, 255, 0, 2, 20, 255, 0, 2, 20, 255,
2022    };
2023 
2024    run_shader(kernel_source, 4, 1, 1, f, u);
2025    for (int i = 0; i < u.size(); ++i)
2026       EXPECT_EQ(u[i], expected[i]);
2027 }
2028 
TEST_F(ComputeTest,convert_char2_uchar2)2029 TEST_F(ComputeTest, convert_char2_uchar2)
2030 {
2031    const char *kernel_source =
2032    "__kernel void main_test( __global char2 *src, __global uchar2 *dest )\n\
2033    {\n\
2034       size_t i = get_global_id(0);\n\
2035       dest[i] = convert_uchar2_sat( src[i] );\n\
2036    }\n";
2037 
2038    auto c = ShaderArg<int8_t>({ -127, -4, 0, 4, 126, 127, 16, 32 }, SHADER_ARG_INPUT);
2039    auto u = ShaderArg<uint8_t>({ 99, 99, 99, 99, 99, 99, 99, 99 }, SHADER_ARG_OUTPUT);
2040    const uint8_t expected[] = { 0, 0, 0, 4, 126, 127, 16, 32 };
2041    run_shader(kernel_source, 4, 1, 1, c, u);
2042    for (int i = 0; i < u.size(); i++)
2043       EXPECT_EQ(u[i], expected[i]);
2044 }
2045 
TEST_F(ComputeTest,async_copy)2046 TEST_F(ComputeTest, async_copy)
2047 {
2048    const char *kernel_source = R"(
2049    __kernel void main_test( const __global char *src, __global char *dst, __local char *localBuffer, int copiesPerWorkgroup, int copiesPerWorkItem )
2050    {
2051     int i;
2052     for(i=0; i<copiesPerWorkItem; i++)
2053         localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ] = (char)(char)0;
2054        barrier( CLK_LOCAL_MEM_FENCE );
2055        event_t event;
2056        event = async_work_group_copy( (__local char*)localBuffer, (__global const char*)(src+copiesPerWorkgroup*get_group_id(0)), (size_t)copiesPerWorkgroup, 0 );
2057        wait_group_events( 1, &event );
2058     for(i=0; i<copiesPerWorkItem; i++)
2059      dst[ get_global_id( 0 )*copiesPerWorkItem+i ] = localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ];
2060    })";
2061    Shader shader = compile({ kernel_source });
2062    validate(shader);
2063 }
2064 
TEST_F(ComputeTest,packed_struct_global)2065 TEST_F(ComputeTest, packed_struct_global)
2066 {
2067 #pragma pack(push, 1)
2068    struct s { uint8_t uc; uint64_t ul; uint16_t us; };
2069 #pragma pack(pop)
2070 
2071    const char *kernel_source =
2072    "struct __attribute__((packed)) s {uchar uc; ulong ul; ushort us; };\n\
2073    __kernel void main_test(__global struct s *inout, global uint *size)\n\
2074    {\n\
2075        uint idx = get_global_id(0);\n\
2076        inout[idx].uc = idx + 1;\n\
2077        inout[idx].ul = ((ulong)(idx + 1 + 0xfbfcfdfe) << 32) | 0x12345678;\n\
2078        inout[idx].us = ((ulong)(idx + 1 + 0xa0) << 8) | 0x12;\n\
2079        *size = sizeof(struct s);\n\
2080    }\n";
2081    auto inout = ShaderArg<struct s>({0, 0, 0}, SHADER_ARG_OUTPUT);
2082    auto size = ShaderArg<uint32_t>(0, SHADER_ARG_OUTPUT);
2083    const struct s expected[] = {
2084       { 1, 0xfbfcfdff12345678, 0xa112 }
2085    };
2086 
2087    run_shader(kernel_source, inout.size(), 1, 1, inout, size);
2088    for (int i = 0; i < inout.size(); ++i) {
2089       EXPECT_EQ(inout[i].uc, expected[i].uc);
2090       EXPECT_EQ(inout[i].ul, expected[i].ul);
2091       EXPECT_EQ(inout[i].us, expected[i].us);
2092    }
2093    EXPECT_EQ(size, sizeof(struct s));
2094 }
2095 
TEST_F(ComputeTest,packed_struct_arg)2096 TEST_F(ComputeTest, packed_struct_arg)
2097 {
2098 #pragma pack(push, 1)
2099    struct s { uint8_t uc; uint64_t ul; uint16_t us; };
2100 #pragma pack(pop)
2101 
2102    const char *kernel_source =
2103    "struct __attribute__((packed)) s {uchar uc; ulong ul; ushort us; };\n\
2104    __kernel void main_test(__global struct s *out, struct s in)\n\
2105    {\n\
2106        uint idx = get_global_id(0);\n\
2107        out[idx].uc = in.uc + 0x12;\n\
2108        out[idx].ul = in.ul + 0x123456789abcdef;\n\
2109        out[idx].us = in.us + 0x1234;\n\
2110    }\n";
2111    auto out = ShaderArg<struct s>({0, 0, 0}, SHADER_ARG_OUTPUT);
2112    auto in = ShaderArg<struct s>({1, 2, 3}, SHADER_ARG_INPUT);
2113    const struct s expected[] = {
2114       { 0x12 + 1, 0x123456789abcdef + 2, 0x1234 + 3 }
2115    };
2116 
2117    run_shader(kernel_source, out.size(), 1, 1, out, in);
2118    for (int i = 0; i < out.size(); ++i) {
2119       EXPECT_EQ(out[i].uc, expected[i].uc);
2120       EXPECT_EQ(out[i].ul, expected[i].ul);
2121       EXPECT_EQ(out[i].us, expected[i].us);
2122    }
2123 }
2124 
TEST_F(ComputeTest,packed_struct_local)2125 TEST_F(ComputeTest, packed_struct_local)
2126 {
2127 #pragma pack(push, 1)
2128    struct s { uint8_t uc; uint64_t ul; uint16_t us; };
2129 #pragma pack(pop)
2130 
2131    const char *kernel_source =
2132    "struct __attribute__((packed)) s {uchar uc; ulong ul; ushort us; };\n\
2133    __kernel void main_test(__global struct s *out, __constant struct s *in)\n\
2134    {\n\
2135        uint idx = get_global_id(0);\n\
2136        __local struct s tmp[2];\n\
2137        tmp[get_local_id(0)] = in[idx];\n\
2138        barrier(CLK_LOCAL_MEM_FENCE);\n\
2139        out[idx] = tmp[(get_local_id(0) + 1) % 2];\n\
2140    }\n";
2141    auto out = ShaderArg<struct s>({{0, 0, 0}, {0, 0, 0}}, SHADER_ARG_OUTPUT);
2142    auto in = ShaderArg<struct s>({{1, 2, 3}, {0x12, 0x123456789abcdef, 0x1234} }, SHADER_ARG_INPUT);
2143    const struct s expected[] = {
2144       { 0x12, 0x123456789abcdef, 0x1234 },
2145       { 1, 2, 3 },
2146    };
2147 
2148    run_shader(kernel_source, out.size(), 1, 1, out, in);
2149    for (int i = 0; i < out.size(); ++i) {
2150       EXPECT_EQ(out[i].uc, expected[i].uc);
2151       EXPECT_EQ(out[i].ul, expected[i].ul);
2152       EXPECT_EQ(out[i].us, expected[i].us);
2153    }
2154 }
2155 
TEST_F(ComputeTest,packed_struct_const)2156 TEST_F(ComputeTest, packed_struct_const)
2157 {
2158 #pragma pack(push, 1)
2159    struct s { uint8_t uc; uint64_t ul; uint16_t us; };
2160 #pragma pack(pop)
2161 
2162    const char *kernel_source =
2163    "struct __attribute__((packed)) s {uchar uc; ulong ul; ushort us; };\n\
2164    __kernel void main_test(__global struct s *out, struct s in)\n\
2165    {\n\
2166        __constant struct s base[] = {\n\
2167           {0x12, 0x123456789abcdef, 0x1234},\n\
2168           {0x11, 0x123456789abcdee, 0x1233},\n\
2169        };\n\
2170        uint idx = get_global_id(0);\n\
2171        out[idx].uc = base[idx % 2].uc + in.uc;\n\
2172        out[idx].ul = base[idx % 2].ul + in.ul;\n\
2173        out[idx].us = base[idx % 2].us + in.us;\n\
2174    }\n";
2175    auto out = ShaderArg<struct s>(std::vector<struct s>(2, {0, 0, 0}), SHADER_ARG_OUTPUT);
2176    auto in = ShaderArg<struct s>({1, 2, 3}, SHADER_ARG_INPUT);
2177    const struct s expected[] = {
2178       { 0x12 + 1, 0x123456789abcdef + 2, 0x1234 + 3 },
2179       { 0x11 + 1, 0x123456789abcdee + 2, 0x1233 + 3 },
2180    };
2181 
2182    run_shader(kernel_source, out.size(), 1, 1, out, in);
2183    for (int i = 0; i < out.size(); ++i) {
2184       EXPECT_EQ(out[i].uc, expected[i].uc);
2185       EXPECT_EQ(out[i].ul, expected[i].ul);
2186       EXPECT_EQ(out[i].us, expected[i].us);
2187    }
2188 }
2189 
TEST_F(ComputeTest,printf)2190 TEST_F(ComputeTest, printf)
2191 {
2192    const char *kernel_source = R"(
2193    __kernel void main_test(__global float *src, __global uint *dest)
2194    {
2195       *dest = printf("%s: %f", "Test", src[0]);
2196    })";
2197 
2198    auto src = ShaderArg<float>({ 1.0f }, SHADER_ARG_INPUT);
2199    auto dest = ShaderArg<uint32_t>({ 0xdeadbeef }, SHADER_ARG_OUTPUT);
2200    run_shader(kernel_source, 1, 1, 1, src, dest);
2201    EXPECT_EQ(dest[0], 0);
2202 }
2203 
TEST_F(ComputeTest,vload_half)2204 TEST_F(ComputeTest, vload_half)
2205 {
2206    const char *kernel_source = R"(
2207    __kernel void main_test(__global half *src, __global float4 *dest)
2208    {
2209       int offset = get_global_id(0);
2210       dest[offset] = vload_half4(offset, src);
2211    })";
2212    auto src = ShaderArg<uint16_t>({ 0x3c00, 0x4000, 0x4200, 0x4400,
2213                                     0x4500, 0x4600, 0x4700, 0x4800 }, SHADER_ARG_INPUT);
2214    auto dest = ShaderArg<float>({ FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX,
2215                                   FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX }, SHADER_ARG_OUTPUT);
2216    run_shader(kernel_source, 2, 1, 1, src, dest);
2217    for (unsigned i = 0; i < 8; ++i)
2218       EXPECT_FLOAT_EQ(dest[i], (float)(i + 1));
2219 }
2220 
TEST_F(ComputeTest,vstore_half)2221 TEST_F(ComputeTest, vstore_half)
2222 {
2223    const char *kernel_source = R"(
2224    __kernel void main_test(__global half *dst, __global float4 *src)
2225    {
2226       int offset = get_global_id(0);
2227       vstore_half4(src[offset], offset, dst);
2228    })";
2229    auto dest = ShaderArg<uint16_t>({0xdead, 0xdead, 0xdead, 0xdead,
2230                                    0xdead, 0xdead, 0xdead, 0xdead}, SHADER_ARG_OUTPUT);
2231    auto src = ShaderArg<float>({ 1.0, 2.0, 3.0, 4.0,
2232                                   5.0, 6.0, 7.0, 8.0 }, SHADER_ARG_INPUT);
2233    run_shader(kernel_source, 2, 1, 1, dest, src);
2234    const uint16_t expected[] = { 0x3c00, 0x4000, 0x4200, 0x4400,
2235                                  0x4500, 0x4600, 0x4700, 0x4800 };
2236    for (unsigned i = 0; i < 8; ++i)
2237       EXPECT_EQ(dest[i], expected[i]);
2238 }
2239 
TEST_F(ComputeTest,inline_function)2240 TEST_F(ComputeTest, inline_function)
2241 {
2242    const char *kernel_source = R"(
2243    inline float helper(float foo)
2244    {
2245       return foo * 2;
2246    }
2247 
2248    __kernel void main_test(__global float *dst, __global float *src)
2249    {
2250       *dst = helper(*src);
2251    })";
2252    auto dest = ShaderArg<float>({ NAN }, SHADER_ARG_OUTPUT);
2253    auto src = ShaderArg<float>({ 1.0f }, SHADER_ARG_INPUT);
2254    run_shader(kernel_source, 1, 1, 1, dest, src);
2255    EXPECT_EQ(dest[0], 2.0f);
2256 }
2257 
TEST_F(ComputeTest,unused_arg)2258 TEST_F(ComputeTest, unused_arg)
2259 {
2260    const char *kernel_source = R"(
2261    __kernel void main_test(__global int *dst, __global int *unused, __global int *src)
2262    {
2263       int i = get_global_id(0);
2264       dst[i] = src[i];
2265    })";
2266    auto dest = ShaderArg<int>({ -1, -1, -1, -1 }, SHADER_ARG_OUTPUT);
2267    auto src = ShaderArg<int>({ 1, 2, 3, 4 }, SHADER_ARG_INPUT);
2268    auto unused = ShaderArg<int>({ -1, -1, -1, -1 }, SHADER_ARG_INPUT);
2269    run_shader(kernel_source, 4, 1, 1, dest, unused, src);
2270    for (int i = 0; i < 4; ++i)
2271       EXPECT_EQ(dest[i], i + 1);
2272 }
2273 
TEST_F(ComputeTest,spec_constant)2274 TEST_F(ComputeTest, spec_constant)
2275 {
2276    const char *spirv_asm = R"(
2277                OpCapability Addresses
2278                OpCapability Kernel
2279                OpCapability Int64
2280           %1 = OpExtInstImport "OpenCL.std"
2281                OpMemoryModel Physical64 OpenCL
2282                OpEntryPoint Kernel %2 "main_test" %__spirv_BuiltInGlobalInvocationId
2283           %4 = OpString "kernel_arg_type.main_test.uint*,"
2284                OpSource OpenCL_C 102000
2285                OpName %__spirv_BuiltInGlobalInvocationId "__spirv_BuiltInGlobalInvocationId"
2286                OpName %output "output"
2287                OpName %entry "entry"
2288                OpName %output_addr "output.addr"
2289                OpName %id "id"
2290                OpName %call "call"
2291                OpName %conv "conv"
2292                OpName %idxprom "idxprom"
2293                OpName %arrayidx "arrayidx"
2294                OpName %add "add"
2295                OpName %mul "mul"
2296                OpName %idxprom1 "idxprom1"
2297                OpName %arrayidx2 "arrayidx2"
2298                OpDecorate %__spirv_BuiltInGlobalInvocationId BuiltIn GlobalInvocationId
2299                OpDecorate %__spirv_BuiltInGlobalInvocationId Constant
2300                OpDecorate %id Alignment 4
2301                OpDecorate %output_addr Alignment 8
2302                OpDecorate %uint_1 SpecId 1
2303       %ulong = OpTypeInt 64 0
2304        %uint = OpTypeInt 32 0
2305      %uint_1 = OpSpecConstant %uint 1
2306     %v3ulong = OpTypeVector %ulong 3
2307 %_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
2308        %void = OpTypeVoid
2309 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
2310          %24 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint
2311 %_ptr_Function__ptr_CrossWorkgroup_uint = OpTypePointer Function %_ptr_CrossWorkgroup_uint
2312 %_ptr_Function_uint = OpTypePointer Function %uint
2313 %__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_Input_v3ulong Input
2314           %2 = OpFunction %void DontInline %24
2315      %output = OpFunctionParameter %_ptr_CrossWorkgroup_uint
2316       %entry = OpLabel
2317 %output_addr = OpVariable %_ptr_Function__ptr_CrossWorkgroup_uint Function
2318          %id = OpVariable %_ptr_Function_uint Function
2319                OpStore %output_addr %output Aligned 8
2320          %27 = OpLoad %v3ulong %__spirv_BuiltInGlobalInvocationId Aligned 32
2321        %call = OpCompositeExtract %ulong %27 0
2322        %conv = OpUConvert %uint %call
2323                OpStore %id %conv Aligned 4
2324          %28 = OpLoad %_ptr_CrossWorkgroup_uint %output_addr Aligned 8
2325          %29 = OpLoad %uint %id Aligned 4
2326     %idxprom = OpUConvert %ulong %29
2327    %arrayidx = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_uint %28 %idxprom
2328          %30 = OpLoad %uint %arrayidx Aligned 4
2329          %31 = OpLoad %uint %id Aligned 4
2330         %add = OpIAdd %uint %31 %uint_1
2331         %mul = OpIMul %uint %30 %add
2332          %32 = OpLoad %_ptr_CrossWorkgroup_uint %output_addr Aligned 8
2333          %33 = OpLoad %uint %id Aligned 4
2334    %idxprom1 = OpUConvert %ulong %33
2335   %arrayidx2 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_uint %32 %idxprom1
2336                OpStore %arrayidx2 %mul Aligned 4
2337                OpReturn
2338                OpFunctionEnd)";
2339    Shader shader = assemble(spirv_asm);
2340    Shader spec_shader = specialize(shader, 1, 5);
2341 
2342    auto inout = ShaderArg<uint32_t>({ 0x00000001, 0x10000001, 0x00020002, 0x04010203 },
2343       SHADER_ARG_INOUT);
2344    const uint32_t expected[] = {
2345       0x00000005, 0x60000006, 0x000e000e, 0x20081018
2346    };
2347    CompileArgs args = { (unsigned)inout.size(), 1, 1 };
2348    run_shader(spec_shader, args, inout);
2349    for (int i = 0; i < inout.size(); ++i)
2350       EXPECT_EQ(inout[i], expected[i]);
2351 }
2352 
TEST_F(ComputeTest,arg_metadata)2353 TEST_F(ComputeTest, arg_metadata)
2354 {
2355    const char *kernel_source = R"(
2356    __kernel void main_test(
2357       __global int *undec_ptr,
2358       __global volatile int *vol_ptr,
2359       __global const int *const_ptr,
2360       __global int *restrict restr_ptr,
2361       __global const int *restrict const_restr_ptr,
2362       __constant int *const_ptr2)
2363    {
2364    })";
2365    Shader shader = compile({ kernel_source });
2366    EXPECT_EQ(shader.metadata->kernels[0].args[0].address_qualifier, CLC_KERNEL_ARG_ADDRESS_GLOBAL);
2367    EXPECT_EQ(shader.metadata->kernels[0].args[0].type_qualifier, 0);
2368    EXPECT_EQ(shader.metadata->kernels[0].args[1].address_qualifier, CLC_KERNEL_ARG_ADDRESS_GLOBAL);
2369    EXPECT_EQ(shader.metadata->kernels[0].args[1].type_qualifier, CLC_KERNEL_ARG_TYPE_VOLATILE);
2370    EXPECT_EQ(shader.metadata->kernels[0].args[2].address_qualifier, CLC_KERNEL_ARG_ADDRESS_GLOBAL);
2371    EXPECT_EQ(shader.metadata->kernels[0].args[2].type_qualifier, CLC_KERNEL_ARG_TYPE_CONST);
2372    EXPECT_EQ(shader.metadata->kernels[0].args[3].address_qualifier, CLC_KERNEL_ARG_ADDRESS_GLOBAL);
2373    EXPECT_EQ(shader.metadata->kernels[0].args[3].type_qualifier, CLC_KERNEL_ARG_TYPE_RESTRICT);
2374    EXPECT_EQ(shader.metadata->kernels[0].args[4].address_qualifier, CLC_KERNEL_ARG_ADDRESS_GLOBAL);
2375    EXPECT_EQ(shader.metadata->kernels[0].args[4].type_qualifier, CLC_KERNEL_ARG_TYPE_RESTRICT | CLC_KERNEL_ARG_TYPE_CONST);
2376    EXPECT_EQ(shader.metadata->kernels[0].args[5].address_qualifier, CLC_KERNEL_ARG_ADDRESS_CONSTANT);
2377    EXPECT_EQ(shader.metadata->kernels[0].args[5].type_qualifier, CLC_KERNEL_ARG_TYPE_CONST);
2378 }
2379